From 711e26c00e4c7b7cef0420c76a61e6d818e12687 Mon Sep 17 00:00:00 2001 From: Lv Ruyi Date: Sat, 9 Oct 2021 08:59:00 +0000 Subject: [PATCH 001/868] firmware: tegra: Fix error application of sizeof() to pointer Application of sizeof() to pointer yields the number of bytes of the pointer, but it should use the length of buffer in the code. Fixes: 06c2d9a078ab ("firmware: tegra: Reduce stack usage") Reported-by: Zeal Robot Signed-off-by: Lv Ruyi Reviewed-by: Jon Hunter Tested-by: Jon Hunter Signed-off-by: Thierry Reding --- drivers/firmware/tegra/bpmp-debugfs.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/firmware/tegra/bpmp-debugfs.c b/drivers/firmware/tegra/bpmp-debugfs.c index 6d66fe03fb6a..fd89899aeeed 100644 --- a/drivers/firmware/tegra/bpmp-debugfs.c +++ b/drivers/firmware/tegra/bpmp-debugfs.c @@ -77,13 +77,14 @@ static const char *get_filename(struct tegra_bpmp *bpmp, const char *root_path, *filename = NULL; char *root_path_buf; size_t root_len; + size_t root_path_buf_len = 512; - root_path_buf = kzalloc(512, GFP_KERNEL); + root_path_buf = kzalloc(root_path_buf_len, GFP_KERNEL); if (!root_path_buf) goto out; root_path = dentry_path(bpmp->debugfs_mirror, root_path_buf, - sizeof(root_path_buf)); + root_path_buf_len); if (IS_ERR(root_path)) goto out; From 8383226583251858814d5521b542e7bf7dbadc4b Mon Sep 17 00:00:00 2001 From: Wilken Gottwalt Date: Sat, 13 Nov 2021 06:53:52 +0000 Subject: [PATCH 002/868] hwmon: (corsair-psu) fix plain integer used as NULL pointer sparse warnings: (new ones prefixed by >>) >> drivers/hwmon/corsair-psu.c:536:82: sparse: sparse: Using plain integer as NULL pointer Fixes: d115b51e0e56 ("hwmon: add Corsair PSU HID controller driver") Reported-by: kernel test robot Signed-off-by: Wilken Gottwalt Link: https://lore.kernel.org/r/YY9hAL8MZEQYLYPf@monster.localdomain Signed-off-by: Guenter Roeck --- drivers/hwmon/corsair-psu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hwmon/corsair-psu.c b/drivers/hwmon/corsair-psu.c index 731d5117f9f1..14389fd7afb8 100644 --- a/drivers/hwmon/corsair-psu.c +++ b/drivers/hwmon/corsair-psu.c @@ -729,7 +729,7 @@ static int corsairpsu_probe(struct hid_device *hdev, const struct hid_device_id corsairpsu_check_cmd_support(priv); priv->hwmon_dev = hwmon_device_register_with_info(&hdev->dev, "corsairpsu", priv, - &corsairpsu_chip_info, 0); + &corsairpsu_chip_info, NULL); if (IS_ERR(priv->hwmon_dev)) { ret = PTR_ERR(priv->hwmon_dev); From dbd3e6eaf3d813939b28e8a66e29d81cdc836445 Mon Sep 17 00:00:00 2001 From: Armin Wolf Date: Fri, 12 Nov 2021 18:14:40 +0100 Subject: [PATCH 003/868] hwmon: (dell-smm) Fix warning on /proc/i8k creation error MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The removal function is called regardless of whether /proc/i8k was created successfully or not, the later causing a WARN() on module removal. Fix that by only registering the removal function if /proc/i8k was created successfully. Tested on a Inspiron 3505. Fixes: 039ae58503f3 ("hwmon: Allow to compile dell-smm-hwmon driver without /proc/i8k") Signed-off-by: Armin Wolf Acked-by: Pali Rohár Link: https://lore.kernel.org/r/20211112171440.59006-1-W_Armin@gmx.de Signed-off-by: Guenter Roeck --- drivers/hwmon/dell-smm-hwmon.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/hwmon/dell-smm-hwmon.c b/drivers/hwmon/dell-smm-hwmon.c index eaace478f508..5596c211f38d 100644 --- a/drivers/hwmon/dell-smm-hwmon.c +++ b/drivers/hwmon/dell-smm-hwmon.c @@ -627,10 +627,9 @@ static void __init i8k_init_procfs(struct device *dev) { struct dell_smm_data *data = dev_get_drvdata(dev); - /* Register the proc entry */ - proc_create_data("i8k", 0, NULL, &i8k_proc_ops, data); - - devm_add_action_or_reset(dev, i8k_exit_procfs, NULL); + /* Only register exit function if creation was successful */ + if (proc_create_data("i8k", 0, NULL, &i8k_proc_ops, data)) + devm_add_action_or_reset(dev, i8k_exit_procfs, NULL); } #else From 214f525255069a55b4664842c68bc15b2ee049f0 Mon Sep 17 00:00:00 2001 From: Zev Weiss Date: Wed, 10 Nov 2021 18:53:38 -0800 Subject: [PATCH 004/868] hwmon: (nct6775) mask out bank number in nct6775_wmi_read_value() The first call to nct6775_asuswmi_read() in nct6775_wmi_read_value() had been passing the full bank+register number instead of just the lower 8 bits. It didn't end up actually causing problems because the second argument of that function is a u8 anyway, but it seems preferable to be explicit about it at the call site (and consistent with the rest of the code). Signed-off-by: Zev Weiss Fixes: 3fbbfc27f955 ("hwmon: (nct6775) Support access via Asus WMI") Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20211111025339.27520-1-zev@bewilderbeest.net Signed-off-by: Guenter Roeck --- drivers/hwmon/nct6775.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hwmon/nct6775.c b/drivers/hwmon/nct6775.c index 93dca471972e..57ce8633a725 100644 --- a/drivers/hwmon/nct6775.c +++ b/drivers/hwmon/nct6775.c @@ -1527,7 +1527,7 @@ static u16 nct6775_wmi_read_value(struct nct6775_data *data, u16 reg) nct6775_wmi_set_bank(data, reg); - err = nct6775_asuswmi_read(data->bank, reg, &tmp); + err = nct6775_asuswmi_read(data->bank, reg & 0xff, &tmp); if (err) return 0; From 51c7b6a0398f54b9120795796a4cff4fc9634f7d Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Mon, 15 Nov 2021 00:12:07 +0100 Subject: [PATCH 005/868] power: supply: core: Break capacity loop We should not go on looking for more capacity tables after we realize we have looked at the last one in power_supply_find_ocv2cap_table(). Fixes: 3afb50d7125b ("power: supply: core: Add some helpers to use the battery OCV capacity table") Cc: Chunyan Zhang Cc: Baolin Wang Signed-off-by: Linus Walleij Reviewed-by: Baolin Wang Signed-off-by: Sebastian Reichel --- drivers/power/supply/power_supply_core.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/power/supply/power_supply_core.c b/drivers/power/supply/power_supply_core.c index fc12a4f407f4..6093754cebd5 100644 --- a/drivers/power/supply/power_supply_core.c +++ b/drivers/power/supply/power_supply_core.c @@ -853,6 +853,10 @@ power_supply_find_ocv2cap_table(struct power_supply_battery_info *info, return NULL; for (i = 0; i < POWER_SUPPLY_OCV_TEMP_MAX; i++) { + /* Out of capacity tables */ + if (!info->ocv_table[i]) + break; + temp_diff = abs(info->ocv_temp[i] - temp); if (temp_diff < best_temp_diff) { From 80211be1b9dec04cc2805d3d81e2091ecac289a1 Mon Sep 17 00:00:00 2001 From: Yauhen Kharuzhy Date: Sun, 7 Nov 2021 23:20:01 +0300 Subject: [PATCH 006/868] power: bq25890: Enable continuous conversion for ADC at charging Instead of one shot run of ADC at beginning of charging, run continuous conversion to ensure that all charging-related values are monitored properly (input voltage, input current, themperature etc.). Signed-off-by: Yauhen Kharuzhy Reviewed-by: Hans de Goede Signed-off-by: Sebastian Reichel --- drivers/power/supply/bq25890_charger.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/power/supply/bq25890_charger.c b/drivers/power/supply/bq25890_charger.c index 34ec186a2e9a..b7eac5428083 100644 --- a/drivers/power/supply/bq25890_charger.c +++ b/drivers/power/supply/bq25890_charger.c @@ -581,12 +581,12 @@ static irqreturn_t __bq25890_handle_irq(struct bq25890_device *bq) if (!new_state.online && bq->state.online) { /* power removed */ /* disable ADC */ - ret = bq25890_field_write(bq, F_CONV_START, 0); + ret = bq25890_field_write(bq, F_CONV_RATE, 0); if (ret < 0) goto error; } else if (new_state.online && !bq->state.online) { /* power inserted */ /* enable ADC, to have control of charge current/voltage */ - ret = bq25890_field_write(bq, F_CONV_START, 1); + ret = bq25890_field_write(bq, F_CONV_RATE, 1); if (ret < 0) goto error; } From 8755e9e6d0e41336879035d2280f7a4a24236543 Mon Sep 17 00:00:00 2001 From: Amelie Delaunay Date: Tue, 26 Oct 2021 17:48:17 +0200 Subject: [PATCH 007/868] phy: stm32: fix st,slow-hs-slew-rate with st,decrease-hs-slew-rate st,decrease-hs-slew-rate is described in phy-stm32-usbphyc.yaml. Then fix the property name in driver. Fixes: 2f5e9f815a2f ("phy: stm32: add phy tuning support") Signed-off-by: Amelie Delaunay Link: https://lore.kernel.org/r/20211026154817.198937-1-amelie.delaunay@foss.st.com Signed-off-by: Vinod Koul --- drivers/phy/st/phy-stm32-usbphyc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/phy/st/phy-stm32-usbphyc.c b/drivers/phy/st/phy-stm32-usbphyc.c index 7df6a63ad37b..e4f4a9be5132 100644 --- a/drivers/phy/st/phy-stm32-usbphyc.c +++ b/drivers/phy/st/phy-stm32-usbphyc.c @@ -478,7 +478,7 @@ static void stm32_usbphyc_phy_tuning(struct stm32_usbphyc *usbphyc, if (!of_property_read_bool(np, "st,no-lsfs-fb-cap")) usbphyc_phy->tune |= LFSCAPEN; - if (of_property_read_bool(np, "st,slow-hs-slew-rate")) + if (of_property_read_bool(np, "st,decrease-hs-slew-rate")) usbphyc_phy->tune |= HSDRVSLEW; ret = of_property_read_u32(np, "st,tune-hs-dc-level", &val); From 7adaf921b6438b6ba1c983a4ca5622f8173063f0 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 14 Nov 2021 19:05:59 -0800 Subject: [PATCH 008/868] phy: ti: report 2 non-kernel-doc comments Do not use "/**" to begin a non-kernel-doc comment. Fixes these build warnings: drivers/phy/ti/phy-am654-serdes.c:3: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst * PCIe SERDES driver for AM654x SoC drivers/phy/ti/phy-j721e-wiz.c:3: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst * Wrapper driver for SERDES used in J721E Signed-off-by: Randy Dunlap Reported-by: kernel test robot Cc: Kishon Vijay Abraham I Cc: Vinod Koul Cc: linux-phy@lists.infradead.org Link: https://lore.kernel.org/r/20211115030559.13994-1-rdunlap@infradead.org Signed-off-by: Vinod Koul --- drivers/phy/ti/phy-am654-serdes.c | 2 +- drivers/phy/ti/phy-j721e-wiz.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/phy/ti/phy-am654-serdes.c b/drivers/phy/ti/phy-am654-serdes.c index 2ff56ce77b30..c1211c4f863c 100644 --- a/drivers/phy/ti/phy-am654-serdes.c +++ b/drivers/phy/ti/phy-am654-serdes.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/** +/* * PCIe SERDES driver for AM654x SoC * * Copyright (C) 2018 - 2019 Texas Instruments Incorporated - http://www.ti.com/ diff --git a/drivers/phy/ti/phy-j721e-wiz.c b/drivers/phy/ti/phy-j721e-wiz.c index 126f5b8735cc..b3384c31637a 100644 --- a/drivers/phy/ti/phy-j721e-wiz.c +++ b/drivers/phy/ti/phy-j721e-wiz.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/** +/* * Wrapper driver for SERDES used in J721E * * Copyright (C) 2019 Texas Instruments Incorporated - http://www.ti.com/ From 644106cdb89844be2496b21175b7c0c2e0fab381 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Fri, 5 Nov 2021 08:20:50 -0700 Subject: [PATCH 009/868] power: reset: ltc2952: Fix use of floating point literals A new commit in LLVM causes an error on the use of 'long double' when '-mno-x87' is used, which the kernel does through an alias, '-mno-80387' (see the LLVM commit below for more details around why it does this). drivers/power/reset/ltc2952-poweroff.c:162:28: error: expression requires 'long double' type support, but target 'x86_64-unknown-linux-gnu' does not support it data->wde_interval = 300L * 1E6L; ^ drivers/power/reset/ltc2952-poweroff.c:162:21: error: expression requires 'long double' type support, but target 'x86_64-unknown-linux-gnu' does not support it data->wde_interval = 300L * 1E6L; ^ drivers/power/reset/ltc2952-poweroff.c:163:41: error: expression requires 'long double' type support, but target 'x86_64-unknown-linux-gnu' does not support it data->trigger_delay = ktime_set(2, 500L*1E6L); ^ 3 errors generated. This happens due to the use of a 'long double' literal. The 'E6' part of '1E6L' causes the literal to be a 'double' then the 'L' suffix promotes it to 'long double'. There is no visible reason for floating point values in this driver, as the values are only assigned to integer types. Use NSEC_PER_MSEC, which is the same integer value as '1E6L', to avoid changing functionality but fix the error. Fixes: 6647156c00cc ("power: reset: add LTC2952 poweroff driver") Link: https://github.com/ClangBuiltLinux/linux/issues/1497 Link: https://github.com/llvm/llvm-project/commit/a8083d42b1c346e21623a1d36d1f0cadd7801d83 Signed-off-by: Nathan Chancellor Reviewed-by: Nick Desaulniers Signed-off-by: Sebastian Reichel --- drivers/power/reset/ltc2952-poweroff.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/power/reset/ltc2952-poweroff.c b/drivers/power/reset/ltc2952-poweroff.c index fbb344353fe4..65d9528cc989 100644 --- a/drivers/power/reset/ltc2952-poweroff.c +++ b/drivers/power/reset/ltc2952-poweroff.c @@ -159,8 +159,8 @@ static void ltc2952_poweroff_kill(void) static void ltc2952_poweroff_default(struct ltc2952_poweroff *data) { - data->wde_interval = 300L * 1E6L; - data->trigger_delay = ktime_set(2, 500L*1E6L); + data->wde_interval = 300L * NSEC_PER_MSEC; + data->trigger_delay = ktime_set(2, 500L * NSEC_PER_MSEC); hrtimer_init(&data->timer_trigger, CLOCK_MONOTONIC, HRTIMER_MODE_REL); data->timer_trigger.function = ltc2952_poweroff_timer_trigger; From f45b2974cc0ae959a4c503a071e38a56bd64372f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20T=C3=B6pel?= Date: Wed, 17 Nov 2021 13:57:08 +0100 Subject: [PATCH 010/868] bpf, x86: Fix "no previous prototype" warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The arch_prepare_bpf_dispatcher function does not have a prototype, and yields the following warning when W=1 is enabled for the kernel build. >> arch/x86/net/bpf_jit_comp.c:2188:5: warning: no previous \ prototype for 'arch_prepare_bpf_dispatcher' [-Wmissing-prototypes] 2188 | int arch_prepare_bpf_dispatcher(void *image, s64 *funcs, \ int num_funcs) | ^~~~~~~~~~~~~~~~~~~~~~~~~~~ Remove the warning by adding a function declaration to include/linux/bpf.h. Fixes: 75ccbef6369e ("bpf: Introduce BPF dispatcher") Reported-by: kernel test robot Signed-off-by: Björn Töpel Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20211117125708.769168-1-bjorn@kernel.org --- include/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index e7a163a3146b..84ff6ef49462 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -732,6 +732,7 @@ int bpf_trampoline_unlink_prog(struct bpf_prog *prog, struct bpf_trampoline *tr) struct bpf_trampoline *bpf_trampoline_get(u64 key, struct bpf_attach_target_info *tgt_info); void bpf_trampoline_put(struct bpf_trampoline *tr); +int arch_prepare_bpf_dispatcher(void *image, s64 *funcs, int num_funcs); #define BPF_DISPATCHER_INIT(_name) { \ .mutex = __MUTEX_INITIALIZER(_name.mutex), \ .func = &_name##_func, \ From 08d2061ff9c5319a07bf9ca6bbf11fdec68f704a Mon Sep 17 00:00:00 2001 From: Robert Marko Date: Wed, 17 Nov 2021 15:02:22 +0100 Subject: [PATCH 011/868] arm64: dts: allwinner: orangepi-zero-plus: fix PHY mode Orange Pi Zero Plus uses a Realtek RTL8211E RGMII Gigabit PHY, but its currently set to plain RGMII mode meaning that it doesn't introduce delays. With this setup, TX packets are completely lost and changing the mode to RGMII-ID so the PHY will add delays internally fixes the issue. Fixes: a7affb13b271 ("arm64: allwinner: H5: Add Xunlong Orange Pi Zero Plus") Acked-by: Chen-Yu Tsai Tested-by: Ron Goossens Tested-by: Samuel Holland Signed-off-by: Robert Marko Signed-off-by: Maxime Ripard Link: https://lore.kernel.org/r/20211117140222.43692-1-robert.marko@sartura.hr --- arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-zero-plus.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-zero-plus.dts b/arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-zero-plus.dts index d13980ed7a79..7ec5ac850a0d 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-zero-plus.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-zero-plus.dts @@ -69,7 +69,7 @@ pinctrl-0 = <&emac_rgmii_pins>; phy-supply = <®_gmac_3v3>; phy-handle = <&ext_rgmii_phy>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; status = "okay"; }; From 69125b4b9440be015783312e1b8753ec96febde0 Mon Sep 17 00:00:00 2001 From: Jon Hunter Date: Fri, 12 Nov 2021 11:27:12 +0000 Subject: [PATCH 012/868] reset: tegra-bpmp: Revert Handle errors in BPMP response Commit c045ceb5a145 ("reset: tegra-bpmp: Handle errors in BPMP response") fixed an issue in the Tegra BPMP error handling but has exposed an issue in the Tegra194 HDA driver and now resetting the Tegra194 HDA controller is failing. For now revert the commit c045ceb5a145 ("reset: tegra-bpmp: Handle errors in BPMP response") while a fix for the Tegra HDA driver is created. Fixes: c045ceb5a145 ("reset: tegra-bpmp: Handle errors in BPMP response") Signed-off-by: Jon Hunter Link: https://lore.kernel.org/r/20211112112712.21587-1-jonathanh@nvidia.com Signed-off-by: Philipp Zabel --- drivers/reset/tegra/reset-bpmp.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/drivers/reset/tegra/reset-bpmp.c b/drivers/reset/tegra/reset-bpmp.c index 4c5bba52b105..24d3395964cc 100644 --- a/drivers/reset/tegra/reset-bpmp.c +++ b/drivers/reset/tegra/reset-bpmp.c @@ -20,7 +20,6 @@ static int tegra_bpmp_reset_common(struct reset_controller_dev *rstc, struct tegra_bpmp *bpmp = to_tegra_bpmp(rstc); struct mrq_reset_request request; struct tegra_bpmp_message msg; - int err; memset(&request, 0, sizeof(request)); request.cmd = command; @@ -31,13 +30,7 @@ static int tegra_bpmp_reset_common(struct reset_controller_dev *rstc, msg.tx.data = &request; msg.tx.size = sizeof(request); - err = tegra_bpmp_transfer(bpmp, &msg); - if (err) - return err; - if (msg.rx.ret) - return -EINVAL; - - return 0; + return tegra_bpmp_transfer(bpmp, &msg); } static int tegra_bpmp_reset_module(struct reset_controller_dev *rstc, From 70c9774e180d151abaab358108e3510a8e615215 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Mon, 25 Oct 2021 20:41:59 +0800 Subject: [PATCH 013/868] iio: accel: kxcjk-1013: Fix possible memory leak in probe and remove When ACPI type is ACPI_SMO8500, the data->dready_trig will not be set, the memory allocated by iio_triggered_buffer_setup() will not be freed, and cause memory leak as follows: unreferenced object 0xffff888009551400 (size 512): comm "i2c-SMO8500-125", pid 911, jiffies 4294911787 (age 83.852s) hex dump (first 32 bytes): 02 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00 00 00 00 00 00 00 00 20 e2 e5 c0 ff ff ff ff ........ ....... backtrace: [<0000000041ce75ee>] kmem_cache_alloc_trace+0x16d/0x360 [<000000000aeb17b0>] iio_kfifo_allocate+0x41/0x130 [kfifo_buf] [<000000004b40c1f5>] iio_triggered_buffer_setup_ext+0x2c/0x210 [industrialio_triggered_buffer] [<000000004375b15f>] kxcjk1013_probe+0x10c3/0x1d81 [kxcjk_1013] Fix it by remove data->dready_trig condition in probe and remove. Reported-by: Hulk Robot Fixes: a25691c1f967 ("iio: accel: kxcjk1013: allow using an external trigger") Signed-off-by: Yang Yingliang Cc: Reviewed-by: Hans de Goede Link: https://lore.kernel.org/r/20211025124159.2700301-1-yangyingliang@huawei.com Signed-off-by: Jonathan Cameron --- drivers/iio/accel/kxcjk-1013.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/iio/accel/kxcjk-1013.c b/drivers/iio/accel/kxcjk-1013.c index a51fdd3c9b5b..24c9387c2968 100644 --- a/drivers/iio/accel/kxcjk-1013.c +++ b/drivers/iio/accel/kxcjk-1013.c @@ -1595,8 +1595,7 @@ static int kxcjk1013_probe(struct i2c_client *client, return 0; err_buffer_cleanup: - if (data->dready_trig) - iio_triggered_buffer_cleanup(indio_dev); + iio_triggered_buffer_cleanup(indio_dev); err_trigger_unregister: if (data->dready_trig) iio_trigger_unregister(data->dready_trig); @@ -1618,8 +1617,8 @@ static int kxcjk1013_remove(struct i2c_client *client) pm_runtime_disable(&client->dev); pm_runtime_set_suspended(&client->dev); + iio_triggered_buffer_cleanup(indio_dev); if (data->dready_trig) { - iio_triggered_buffer_cleanup(indio_dev); iio_trigger_unregister(data->dready_trig); iio_trigger_unregister(data->motion_trig); } From ef9d67fa72c1b149a420587e435a3e888bdbf74f Mon Sep 17 00:00:00 2001 From: Lars-Peter Clausen Date: Sun, 24 Oct 2021 19:12:49 +0200 Subject: [PATCH 014/868] iio: ltr501: Don't return error code in trigger handler IIO trigger handlers need to return one of the irqreturn_t values. Returning an error code is not supported. The ltr501 interrupt handler gets this right for most error paths, but there is one case where it returns the error code. In addition for this particular case the trigger handler does not call `iio_trigger_notify_done()`. Which when not done keeps the triggered disabled forever. Modify the code so that the function returns a valid irqreturn_t value as well as calling `iio_trigger_notify_done()` on all exit paths. Fixes: 2690be905123 ("iio: Add Lite-On ltr501 ambient light / proximity sensor driver") Signed-off-by: Lars-Peter Clausen Link: https://lore.kernel.org/r/20211024171251.22896-1-lars@metafoo.de Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/light/ltr501.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/light/ltr501.c b/drivers/iio/light/ltr501.c index 7e51aaac0bf8..b2983b1a9ed1 100644 --- a/drivers/iio/light/ltr501.c +++ b/drivers/iio/light/ltr501.c @@ -1275,7 +1275,7 @@ static irqreturn_t ltr501_trigger_handler(int irq, void *p) ret = regmap_bulk_read(data->regmap, LTR501_ALS_DATA1, als_buf, sizeof(als_buf)); if (ret < 0) - return ret; + goto done; if (test_bit(0, indio_dev->active_scan_mask)) scan.channels[j++] = le16_to_cpu(als_buf[1]); if (test_bit(1, indio_dev->active_scan_mask)) From 45febe0d63917ee908198c5be08511c64ee1790a Mon Sep 17 00:00:00 2001 From: Lars-Peter Clausen Date: Sun, 24 Oct 2021 19:12:50 +0200 Subject: [PATCH 015/868] iio: kxsd9: Don't return error code in trigger handler IIO trigger handlers need to return one of the irqreturn_t values. Returning an error code is not supported. The kxsd9 interrupt handler returns an error code if reading the data registers fails. In addition when exiting due to an error the trigger handler does not call `iio_trigger_notify_done()`. Which when not done keeps the triggered disabled forever. Modify the code so that the function returns a valid irqreturn_t value as well as calling `iio_trigger_notify_done()` on all exit paths. Since we can't return the error code make sure to at least log it as part of the error message. Fixes: 0427a106a98a ("iio: accel: kxsd9: Add triggered buffer handling") Signed-off-by: Lars-Peter Clausen Reviewed-by: Linus Walleij Link: https://lore.kernel.org/r/20211024171251.22896-2-lars@metafoo.de Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/accel/kxsd9.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/iio/accel/kxsd9.c b/drivers/iio/accel/kxsd9.c index 2faf85ca996e..552eba5e8b4f 100644 --- a/drivers/iio/accel/kxsd9.c +++ b/drivers/iio/accel/kxsd9.c @@ -224,14 +224,14 @@ static irqreturn_t kxsd9_trigger_handler(int irq, void *p) hw_values.chan, sizeof(hw_values.chan)); if (ret) { - dev_err(st->dev, - "error reading data\n"); - return ret; + dev_err(st->dev, "error reading data: %d\n", ret); + goto out; } iio_push_to_buffers_with_timestamp(indio_dev, &hw_values, iio_get_time_ns(indio_dev)); +out: iio_trigger_notify_done(indio_dev->trig); return IRQ_HANDLED; From 8e1eeca5afa7ba84d885987165dbdc5decf15413 Mon Sep 17 00:00:00 2001 From: Lars-Peter Clausen Date: Sun, 24 Oct 2021 19:12:51 +0200 Subject: [PATCH 016/868] iio: stk3310: Don't return error code in interrupt handler Interrupt handlers must return one of the irqreturn_t values. Returning a error code is not supported. The stk3310 event interrupt handler returns an error code when reading the flags register fails. Fix the implementation to always return an irqreturn_t value. Fixes: 3dd477acbdd1 ("iio: light: Add threshold interrupt support for STK3310") Signed-off-by: Lars-Peter Clausen Link: https://lore.kernel.org/r/20211024171251.22896-3-lars@metafoo.de Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/light/stk3310.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/iio/light/stk3310.c b/drivers/iio/light/stk3310.c index 07e91846307c..fc63856ed54d 100644 --- a/drivers/iio/light/stk3310.c +++ b/drivers/iio/light/stk3310.c @@ -546,9 +546,8 @@ static irqreturn_t stk3310_irq_event_handler(int irq, void *private) mutex_lock(&data->lock); ret = regmap_field_read(data->reg_flag_nf, &dir); if (ret < 0) { - dev_err(&data->client->dev, "register read failed\n"); - mutex_unlock(&data->lock); - return ret; + dev_err(&data->client->dev, "register read failed: %d\n", ret); + goto out; } event = IIO_UNMOD_EVENT_CODE(IIO_PROXIMITY, 1, IIO_EV_TYPE_THRESH, @@ -560,6 +559,7 @@ static irqreturn_t stk3310_irq_event_handler(int irq, void *private) ret = regmap_field_write(data->reg_flag_psint, 0); if (ret < 0) dev_err(&data->client->dev, "failed to reset interrupts\n"); +out: mutex_unlock(&data->lock); return IRQ_HANDLED; From cd0082235783f814241a1c9483fb89e405f4f892 Mon Sep 17 00:00:00 2001 From: Lars-Peter Clausen Date: Sun, 24 Oct 2021 11:26:59 +0200 Subject: [PATCH 017/868] iio: mma8452: Fix trigger reference couting The mma8452 driver directly assigns a trigger to the struct iio_dev. The IIO core when done using this trigger will call `iio_trigger_put()` to drop the reference count by 1. Without the matching `iio_trigger_get()` in the driver the reference count can reach 0 too early, the trigger gets freed while still in use and a use-after-free occurs. Fix this by getting a reference to the trigger before assigning it to the IIO device. Fixes: ae6d9ce05691 ("iio: mma8452: Add support for interrupt driven triggers.") Signed-off-by: Lars-Peter Clausen Link: https://lore.kernel.org/r/20211024092700.6844-1-lars@metafoo.de Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/accel/mma8452.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/accel/mma8452.c b/drivers/iio/accel/mma8452.c index 715b8138fb71..09c7f10fefb6 100644 --- a/drivers/iio/accel/mma8452.c +++ b/drivers/iio/accel/mma8452.c @@ -1470,7 +1470,7 @@ static int mma8452_trigger_setup(struct iio_dev *indio_dev) if (ret) return ret; - indio_dev->trig = trig; + indio_dev->trig = iio_trigger_get(trig); return 0; } From f711f28e71e965c0d1141c830fa7131b41abbe75 Mon Sep 17 00:00:00 2001 From: Fabrice Gasnier Date: Fri, 22 Oct 2021 14:19:29 +0200 Subject: [PATCH 018/868] iio: adc: stm32: fix a current leak by resetting pcsel before disabling vdda Some I/Os are connected to ADC input channels, when the corresponding bit in PCSEL register are set on STM32H7 and STM32MP15. This is done in the prepare routine of stm32-adc driver. There are constraints here, as PCSEL shouldn't be set when VDDA supply is disabled. Enabling/disabling of VDDA supply in done via stm32-adc-core runtime PM routines (before/after ADC is enabled/disabled). Currently, PCSEL remains set when disabling ADC. Later on, PM runtime can disable the VDDA supply. This creates some conditions on I/Os that can start to leak current. So PCSEL needs to be cleared when disabling the ADC. Fixes: 95e339b6e85d ("iio: adc: stm32: add support for STM32H7") Signed-off-by: Fabrice Gasnier Reviewed-by: Olivier Moysan Link: https://lore.kernel.org/r/1634905169-23762-1-git-send-email-fabrice.gasnier@foss.st.com Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/adc/stm32-adc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/iio/adc/stm32-adc.c b/drivers/iio/adc/stm32-adc.c index 6245434f8377..60f2ccf7e342 100644 --- a/drivers/iio/adc/stm32-adc.c +++ b/drivers/iio/adc/stm32-adc.c @@ -1117,6 +1117,7 @@ static void stm32h7_adc_unprepare(struct iio_dev *indio_dev) { struct stm32_adc *adc = iio_priv(indio_dev); + stm32_adc_writel(adc, STM32H7_ADC_PCSEL, 0); stm32h7_adc_disable(indio_dev); stm32_adc_int_ch_disable(adc); stm32h7_adc_enter_pwr_down(adc); From 59f92868176f191eefde70d284bdfc1ed76a84bc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Noralf=20Tr=C3=B8nnes?= Date: Mon, 18 Oct 2021 13:37:31 +0200 Subject: [PATCH 019/868] iio: dln2-adc: Fix lockdep complaint MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When reading the voltage: $ cat /sys/bus/iio/devices/iio\:device0/in_voltage0_raw Lockdep complains: [ 153.910616] ====================================================== [ 153.916918] WARNING: possible circular locking dependency detected [ 153.923221] 5.14.0+ #5 Not tainted [ 153.926692] ------------------------------------------------------ [ 153.932992] cat/717 is trying to acquire lock: [ 153.937525] c2585358 (&indio_dev->mlock){+.+.}-{3:3}, at: iio_device_claim_direct_mode+0x28/0x44 [ 153.946541] but task is already holding lock: [ 153.952487] c2585860 (&dln2->mutex){+.+.}-{3:3}, at: dln2_adc_read_raw+0x94/0x2bc [dln2_adc] [ 153.961152] which lock already depends on the new lock. Fix this by not calling into the iio core underneath the dln2->mutex lock. Fixes: 7c0299e879dd ("iio: adc: Add support for DLN2 ADC") Cc: Jack Andersen Signed-off-by: Noralf Trønnes Link: https://lore.kernel.org/r/20211018113731.25723-1-noralf@tronnes.org Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/adc/dln2-adc.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/drivers/iio/adc/dln2-adc.c b/drivers/iio/adc/dln2-adc.c index 16407664182c..6c67192946aa 100644 --- a/drivers/iio/adc/dln2-adc.c +++ b/drivers/iio/adc/dln2-adc.c @@ -248,7 +248,6 @@ static int dln2_adc_set_chan_period(struct dln2_adc *dln2, static int dln2_adc_read(struct dln2_adc *dln2, unsigned int channel) { int ret, i; - struct iio_dev *indio_dev = platform_get_drvdata(dln2->pdev); u16 conflict; __le16 value; int olen = sizeof(value); @@ -257,13 +256,9 @@ static int dln2_adc_read(struct dln2_adc *dln2, unsigned int channel) .chan = channel, }; - ret = iio_device_claim_direct_mode(indio_dev); - if (ret < 0) - return ret; - ret = dln2_adc_set_chan_enabled(dln2, channel, true); if (ret < 0) - goto release_direct; + return ret; ret = dln2_adc_set_port_enabled(dln2, true, &conflict); if (ret < 0) { @@ -300,8 +295,6 @@ disable_port: dln2_adc_set_port_enabled(dln2, false, NULL); disable_chan: dln2_adc_set_chan_enabled(dln2, channel, false); -release_direct: - iio_device_release_direct_mode(indio_dev); return ret; } @@ -337,10 +330,16 @@ static int dln2_adc_read_raw(struct iio_dev *indio_dev, switch (mask) { case IIO_CHAN_INFO_RAW: + ret = iio_device_claim_direct_mode(indio_dev); + if (ret < 0) + return ret; + mutex_lock(&dln2->mutex); ret = dln2_adc_read(dln2, chan->channel); mutex_unlock(&dln2->mutex); + iio_device_release_direct_mode(indio_dev); + if (ret < 0) return ret; From a827a4984664308f13599a0b26c77018176d0c7c Mon Sep 17 00:00:00 2001 From: Lars-Peter Clausen Date: Sun, 24 Oct 2021 11:27:00 +0200 Subject: [PATCH 020/868] iio: trigger: Fix reference counting MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In viio_trigger_alloc() device_initialize() is used to set the initial reference count of the trigger to 1. Then another get_device() is called on trigger. This sets the reference count to 2 before the trigger is returned. iio_trigger_free(), which is the matching API to viio_trigger_alloc(), calls put_device() which decreases the reference count by 1. But the second reference count acquired in viio_trigger_alloc() is never dropped. As a result the iio_trigger_release() function is never called and the memory associated with the trigger is never freed. Since there is no reason for the trigger to start its lifetime with two reference counts just remove the extra get_device() in viio_trigger_alloc(). Fixes: 5f9c035cae18 ("staging:iio:triggers. Add a reference get to the core for triggers.") Signed-off-by: Lars-Peter Clausen Acked-by: Nuno Sá Link: https://lore.kernel.org/r/20211024092700.6844-2-lars@metafoo.de Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/industrialio-trigger.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/iio/industrialio-trigger.c b/drivers/iio/industrialio-trigger.c index b23caa2f2aa1..93990ff1dfe3 100644 --- a/drivers/iio/industrialio-trigger.c +++ b/drivers/iio/industrialio-trigger.c @@ -556,7 +556,6 @@ struct iio_trigger *viio_trigger_alloc(struct device *parent, irq_modify_status(trig->subirq_base + i, IRQ_NOREQUEST | IRQ_NOAUTOEN, IRQ_NOPROBE); } - get_device(&trig->dev); return trig; From 90751fb9f224e0e1555b49a8aa9e68f6537e4cec Mon Sep 17 00:00:00 2001 From: Lars-Peter Clausen Date: Mon, 1 Nov 2021 14:30:43 +0100 Subject: [PATCH 021/868] iio: dln2: Check return value of devm_iio_trigger_register() Registering a trigger can fail and the return value of devm_iio_trigger_register() must be checked. Otherwise undefined behavior can occur when the trigger is used. Fixes: 7c0299e879dd ("iio: adc: Add support for DLN2 ADC") Signed-off-by: Lars-Peter Clausen Link: https://lore.kernel.org/r/20211101133043.6974-1-lars@metafoo.de Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/adc/dln2-adc.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/iio/adc/dln2-adc.c b/drivers/iio/adc/dln2-adc.c index 6c67192946aa..97d162a3cba4 100644 --- a/drivers/iio/adc/dln2-adc.c +++ b/drivers/iio/adc/dln2-adc.c @@ -655,7 +655,11 @@ static int dln2_adc_probe(struct platform_device *pdev) return -ENOMEM; } iio_trigger_set_drvdata(dln2->trig, dln2); - devm_iio_trigger_register(dev, dln2->trig); + ret = devm_iio_trigger_register(dev, dln2->trig); + if (ret) { + dev_err(dev, "failed to register trigger: %d\n", ret); + return ret; + } iio_trigger_set_immutable(indio_dev, dln2->trig); ret = devm_iio_triggered_buffer_setup(dev, indio_dev, NULL, From 4a3bf703a9dccc29e48390b8cd1bf30c4e599100 Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Thu, 11 Nov 2021 09:39:54 +0100 Subject: [PATCH 022/868] iio: imx8qxp-adc: fix dependency to the intended ARCH_MXC config Commit 1e23dcaa1a9f ("iio: imx8qxp-adc: Add driver support for NXP IMX8QXP ADC") adds the config IMX8QXP_ADC for this new driver, which depends on the non-existing config ARCH_MXC_ARM64. Hence, ./scripts/checkkconfigsymbols.py warns: ARCH_MXC_ARM64 Referencing files: drivers/iio/adc/Kconfig Probably, the existing config ARCH_MXC is intended to be referred here. So, repair the dependency to refer to that config. Fixes: 1e23dcaa1a9f ("iio: imx8qxp-adc: Add driver support for NXP IMX8QXP ADC") Signed-off-by: Lukas Bulwahn Reviewed-by: Fabio Estevam Link: https://lore.kernel.org/r/20211111083954.6286-1-lukas.bulwahn@gmail.com Signed-off-by: Jonathan Cameron --- drivers/iio/adc/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/adc/Kconfig b/drivers/iio/adc/Kconfig index 8bf5b62a73f4..3363af15a43f 100644 --- a/drivers/iio/adc/Kconfig +++ b/drivers/iio/adc/Kconfig @@ -532,7 +532,7 @@ config IMX7D_ADC config IMX8QXP_ADC tristate "NXP IMX8QXP ADC driver" - depends on ARCH_MXC_ARM64 || COMPILE_TEST + depends on ARCH_MXC || COMPILE_TEST depends on HAS_IOMEM help Say yes here to build support for IMX8QXP ADC. From 67fe29583e72b2103abb661bb58036e3c1f00277 Mon Sep 17 00:00:00 2001 From: Lars-Peter Clausen Date: Mon, 1 Nov 2021 15:40:54 +0100 Subject: [PATCH 023/868] iio: itg3200: Call iio_trigger_notify_done() on error IIO trigger handlers must call iio_trigger_notify_done() when done. This must be done even when an error occurred. Otherwise the trigger will be seen as busy indefinitely and the trigger handler will never be called again. The itg3200 driver neglects to call iio_trigger_notify_done() when there is an error reading the gyro data. Fix this by making sure that iio_trigger_notify_done() is included in the error exit path. Fixes: 9dbf091da080 ("iio: gyro: Add itg3200") Signed-off-by: Lars-Peter Clausen Link: https://lore.kernel.org/r/20211101144055.13858-1-lars@metafoo.de Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/gyro/itg3200_buffer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/gyro/itg3200_buffer.c b/drivers/iio/gyro/itg3200_buffer.c index 04dd6a7969ea..4cfa0d439560 100644 --- a/drivers/iio/gyro/itg3200_buffer.c +++ b/drivers/iio/gyro/itg3200_buffer.c @@ -61,9 +61,9 @@ static irqreturn_t itg3200_trigger_handler(int irq, void *p) iio_push_to_buffers_with_timestamp(indio_dev, &scan, pf->timestamp); +error_ret: iio_trigger_notify_done(indio_dev->trig); -error_ret: return IRQ_HANDLED; } From 6661146427cbbce6d1fe3dbb11ff1c487f55799a Mon Sep 17 00:00:00 2001 From: Lars-Peter Clausen Date: Mon, 1 Nov 2021 15:40:55 +0100 Subject: [PATCH 024/868] iio: ad7768-1: Call iio_trigger_notify_done() on error IIO trigger handlers must call iio_trigger_notify_done() when done. This must be done even when an error occurred. Otherwise the trigger will be seen as busy indefinitely and the trigger handler will never be called again. The ad7768-1 driver neglects to call iio_trigger_notify_done() when there is an error reading the converter data. Fix this by making sure that iio_trigger_notify_done() is included in the error exit path. Fixes: a5f8c7da3dbe ("iio: adc: Add AD7768-1 ADC basic support") Signed-off-by: Lars-Peter Clausen Link: https://lore.kernel.org/r/20211101144055.13858-2-lars@metafoo.de Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ad7768-1.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/adc/ad7768-1.c b/drivers/iio/adc/ad7768-1.c index 2c5c8a3672b2..aa42ba759fa1 100644 --- a/drivers/iio/adc/ad7768-1.c +++ b/drivers/iio/adc/ad7768-1.c @@ -480,8 +480,8 @@ static irqreturn_t ad7768_trigger_handler(int irq, void *p) iio_push_to_buffers_with_timestamp(indio_dev, &st->data.scan, iio_get_time_ns(indio_dev)); - iio_trigger_notify_done(indio_dev->trig); err_unlock: + iio_trigger_notify_done(indio_dev->trig); mutex_unlock(&st->lock); return IRQ_HANDLED; From 6966df483d7b5b218aeb0e13e7e334a8fc3c1744 Mon Sep 17 00:00:00 2001 From: Matti Vaittinen Date: Thu, 18 Nov 2021 13:49:51 +0200 Subject: [PATCH 025/868] regulator: Update protection IRQ helper docs The documentation of IRQ notification helper had still references to first RFC implementation which called BUG() while trying to protect the hardware. Behaviour was improved as calling the BUG() was not a proper solution. Current implementation attempts to call poweroff if handling of potentially damaging error notification fails. Update the documentation to reflect the actual behaviour. Signed-off-by: Matti Vaittinen Link: https://lore.kernel.org/r/0c9cc4bcf20c3da66fd5a85c97ee4288e5727538.1637233864.git.matti.vaittinen@fi.rohmeurope.com Signed-off-by: Mark Brown --- include/linux/regulator/driver.h | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h index bd7a73db2e66..54cf566616ae 100644 --- a/include/linux/regulator/driver.h +++ b/include/linux/regulator/driver.h @@ -499,7 +499,8 @@ struct regulator_irq_data { * best to shut-down regulator(s) or reboot the SOC if error * handling is repeatedly failing. If fatal_cnt is given the IRQ * handling is aborted if it fails for fatal_cnt times and die() - * callback (if populated) or BUG() is called to try to prevent + * callback (if populated) is called. If die() is not populated + * poweroff for the system is attempted in order to prevent any * further damage. * @reread_ms: The time which is waited before attempting to re-read status * at the worker if IC reading fails. Immediate re-read is done @@ -516,11 +517,12 @@ struct regulator_irq_data { * @data: Driver private data pointer which will be passed as such to * the renable, map_event and die callbacks in regulator_irq_data. * @die: Protection callback. If IC status reading or recovery actions - * fail fatal_cnt times this callback or BUG() is called. This - * callback should implement a final protection attempt like - * disabling the regulator. If protection succeeded this may - * return 0. If anything else is returned the core assumes final - * protection failed and calls BUG() as a last resort. + * fail fatal_cnt times this callback is called or system is + * powered off. This callback should implement a final protection + * attempt like disabling the regulator. If protection succeeded + * die() may return 0. If anything else is returned the core + * assumes final protection failed and attempts to perform a + * poweroff as a last resort. * @map_event: Driver callback to map IRQ status into regulator devices with * events / errors. NOTE: callback MUST initialize both the * errors and notifs for all rdevs which it signals having From 38207a5e81230d6ffbdd51e5fa5681be5116dcae Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Fri, 19 Nov 2021 10:14:17 -0800 Subject: [PATCH 026/868] bpf, sockmap: Attach map progs to psock early for feature probes When a TCP socket is added to a sock map we look at the programs attached to the map to determine what proto op hooks need to be changed. Before the patch in the 'fixes' tag there were only two categories -- the empty set of programs or a TX policy. In any case the base set handled the receive case. After the fix we have an optimized program for receive that closes a small, but possible, race on receive. This program is loaded only when the map the psock is being added to includes a RX policy. Otherwise, the race is not possible so we don't need to handle the race condition. In order for the call to sk_psock_init() to correctly evaluate the above conditions all progs need to be set in the psock before the call. However, in the current code this is not the case. We end up evaluating the requirements on the old prog state. If your psock is attached to multiple maps -- for example a tx map and rx map -- then the second update would pull in the correct maps. But, the other pattern with a single rx enabled map the correct receive hooks are not used. The result is the race fixed by the patch in the fixes tag below may still be seen in this case. To fix we simply set all psock->progs before doing the call into sock_map_init(). With this the init() call gets the full list of programs and chooses the correct proto ops on the first iteration instead of requiring the second update to pull them in. This fixes the race case when only a single map is used. Fixes: c5d2177a72a16 ("bpf, sockmap: Fix race in ingress receive verdict with redirect to self") Signed-off-by: John Fastabend Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20211119181418.353932-2-john.fastabend@gmail.com --- net/core/sock_map.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/net/core/sock_map.c b/net/core/sock_map.c index f39ef79ced67..9b528c644fb7 100644 --- a/net/core/sock_map.c +++ b/net/core/sock_map.c @@ -282,6 +282,12 @@ static int sock_map_link(struct bpf_map *map, struct sock *sk) if (msg_parser) psock_set_prog(&psock->progs.msg_parser, msg_parser); + if (stream_parser) + psock_set_prog(&psock->progs.stream_parser, stream_parser); + if (stream_verdict) + psock_set_prog(&psock->progs.stream_verdict, stream_verdict); + if (skb_verdict) + psock_set_prog(&psock->progs.skb_verdict, skb_verdict); ret = sock_map_init_proto(sk, psock); if (ret < 0) @@ -292,14 +298,10 @@ static int sock_map_link(struct bpf_map *map, struct sock *sk) ret = sk_psock_init_strp(sk, psock); if (ret) goto out_unlock_drop; - psock_set_prog(&psock->progs.stream_verdict, stream_verdict); - psock_set_prog(&psock->progs.stream_parser, stream_parser); sk_psock_start_strp(sk, psock); } else if (!stream_parser && stream_verdict && !psock->saved_data_ready) { - psock_set_prog(&psock->progs.stream_verdict, stream_verdict); sk_psock_start_verdict(sk,psock); } else if (!stream_verdict && skb_verdict && !psock->saved_data_ready) { - psock_set_prog(&psock->progs.skb_verdict, skb_verdict); sk_psock_start_verdict(sk, psock); } write_unlock_bh(&sk->sk_callback_lock); From c0d95d3380ee099d735e08618c0d599e72f6c8b0 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Fri, 19 Nov 2021 10:14:18 -0800 Subject: [PATCH 027/868] bpf, sockmap: Re-evaluate proto ops when psock is removed from sockmap When a sock is added to a sock map we evaluate what proto op hooks need to be used. However, when the program is removed from the sock map we have not been evaluating if that changes the required program layout. Before the patch listed in the 'fixes' tag this was not causing failures because the base program set handles all cases. Specifically, the case with a stream parser and the case with out a stream parser are both handled. With the fix below we identified a race when running with a proto op that attempts to read skbs off both the stream parser and the skb->receive_queue. Namely, that a race existed where when the stream parser is empty checking the skb->receive_queue from recvmsg at the precies moment when the parser is paused and the receive_queue is not empty could result in skipping the stream parser. This may break a RX policy depending on the parser to run. The fix tag then loads a specific proto ops that resolved this race. But, we missed removing that proto ops recv hook when the sock is removed from the sockmap. The result is the stream parser is stopped so no more skbs will be aggregated there, but the hook and BPF program continues to be attached on the psock. User space will then get an EBUSY when trying to read the socket because the recvmsg() handler is now waiting on a stopped stream parser. To fix we rerun the proto ops init() function which will look at the new set of progs attached to the psock and rest the proto ops hook to the correct handlers. And in the above case where we remove the sock from the sock map the RX prog will no longer be listed so the proto ops is removed. Fixes: c5d2177a72a16 ("bpf, sockmap: Fix race in ingress receive verdict with redirect to self") Signed-off-by: John Fastabend Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20211119181418.353932-3-john.fastabend@gmail.com --- net/core/skmsg.c | 5 +++++ net/core/sock_map.c | 5 ++++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/net/core/skmsg.c b/net/core/skmsg.c index 1ae52ac943f6..8eb671c827f9 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -1124,6 +1124,8 @@ void sk_psock_start_strp(struct sock *sk, struct sk_psock *psock) void sk_psock_stop_strp(struct sock *sk, struct sk_psock *psock) { + psock_set_prog(&psock->progs.stream_parser, NULL); + if (!psock->saved_data_ready) return; @@ -1212,6 +1214,9 @@ void sk_psock_start_verdict(struct sock *sk, struct sk_psock *psock) void sk_psock_stop_verdict(struct sock *sk, struct sk_psock *psock) { + psock_set_prog(&psock->progs.stream_verdict, NULL); + psock_set_prog(&psock->progs.skb_verdict, NULL); + if (!psock->saved_data_ready) return; diff --git a/net/core/sock_map.c b/net/core/sock_map.c index 9b528c644fb7..4ca4b11f4e5f 100644 --- a/net/core/sock_map.c +++ b/net/core/sock_map.c @@ -167,8 +167,11 @@ static void sock_map_del_link(struct sock *sk, write_lock_bh(&sk->sk_callback_lock); if (strp_stop) sk_psock_stop_strp(sk, psock); - else + if (verdict_stop) sk_psock_stop_verdict(sk, psock); + + if (psock->psock_update_sk_prot) + psock->psock_update_sk_prot(sk, psock, false); write_unlock_bh(&sk->sk_callback_lock); } } From 31c66bfa95c14321e754ade581a65a50fd482841 Mon Sep 17 00:00:00 2001 From: Vinod Koul Date: Tue, 16 Nov 2021 16:09:46 +0530 Subject: [PATCH 028/868] phy: mvebu-cp110-utmi: Fix kernel-doc warns Fix the format and add description for ops to fix the below warnings: drivers/phy/marvell/phy-mvebu-cp110-utmi.c:94: warning: Function parameter or member 'syscon' not described in 'mvebu_cp110_utmi' drivers/phy/marvell/phy-mvebu-cp110-utmi.c:94: warning: Function parameter or member 'ops' not described in 'mvebu_cp110_utmi' Signed-off-by: Vinod Koul Link: https://lore.kernel.org/r/20211116103951.34482-1-vkoul@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/marvell/phy-mvebu-cp110-utmi.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/phy/marvell/phy-mvebu-cp110-utmi.c b/drivers/phy/marvell/phy-mvebu-cp110-utmi.c index 08d178a4dc13..aa27c7994610 100644 --- a/drivers/phy/marvell/phy-mvebu-cp110-utmi.c +++ b/drivers/phy/marvell/phy-mvebu-cp110-utmi.c @@ -82,9 +82,9 @@ * struct mvebu_cp110_utmi - PHY driver data * * @regs: PHY registers - * @syscom: Regmap with system controller registers + * @syscon: Regmap with system controller registers * @dev: device driver handle - * @caps: PHY capabilities + * @ops: phy ops */ struct mvebu_cp110_utmi { void __iomem *regs; From e697ffe39a0df3cc0cd977059a9207cb3084ff11 Mon Sep 17 00:00:00 2001 From: Vinod Koul Date: Tue, 16 Nov 2021 16:09:48 +0530 Subject: [PATCH 029/868] phy: qualcomm: qmp: Add missing struct documentation dp-* members were added to qmp_phy_combo_cfg but documentation was missed, so add that. drivers/phy/qualcomm/phy-qcom-qmp.c:2995: warning: Function parameter or member 'dp_aux_cfg' not described in 'qmp_phy' drivers/phy/qualcomm/phy-qcom-qmp.c:2995: warning: Function parameter or member 'dp_opts' not described in 'qmp_phy' drivers/phy/qualcomm/phy-qcom-qmp.c:2995: warning: Function parameter or member 'dp_clks' not described in 'qmp_phy' Signed-off-by: Vinod Koul Reviewed-by: Bjorn Andersson Link: https://lore.kernel.org/r/20211116103951.34482-3-vkoul@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp.c b/drivers/phy/qualcomm/phy-qcom-qmp.c index 456a59d8c7d0..c96639d5f581 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp.c @@ -2973,6 +2973,9 @@ struct qmp_phy_combo_cfg { * @qmp: QMP phy to which this lane belongs * @lane_rst: lane's reset controller * @mode: current PHY mode + * @dp_aux_cfg: Display port aux config + * @dp_opts: Display port optional config + * @dp_clks: Display port clocks */ struct qmp_phy { struct phy *phy; From 1de7c6ad9a093100682e8d28e8e066d86a339b48 Mon Sep 17 00:00:00 2001 From: Vinod Koul Date: Tue, 16 Nov 2021 16:09:49 +0530 Subject: [PATCH 030/868] phy: qualcomm: usb-hsic: Fix the kernel-doc warn The comment is not kernel-doc one and starts with /**, so fix that. drivers/phy/qualcomm/phy-qcom-usb-hsic.c:3: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst Signed-off-by: Vinod Koul Reviewed-by: Bjorn Andersson Link: https://lore.kernel.org/r/20211116103951.34482-4-vkoul@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-usb-hsic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/phy/qualcomm/phy-qcom-usb-hsic.c b/drivers/phy/qualcomm/phy-qcom-usb-hsic.c index 04d18d52f700..716a77748ed8 100644 --- a/drivers/phy/qualcomm/phy-qcom-usb-hsic.c +++ b/drivers/phy/qualcomm/phy-qcom-usb-hsic.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0-only -/** +/* * Copyright (C) 2016 Linaro Ltd */ #include From 466b1516e74ffbb268dce83e41ca62bcfc822cb6 Mon Sep 17 00:00:00 2001 From: Vinod Koul Date: Tue, 16 Nov 2021 16:09:51 +0530 Subject: [PATCH 031/868] phy: ti: tusb1210: Fix the kernel-doc warn The comment is not kernel-doc one and starts with /**, so fix that. drivers/phy/ti/phy-tusb1210.c:16: warning: expecting prototype for tusb1210.c(). Prototype was for TUSB1210_VENDOR_SPECIFIC2() instead Signed-off-by: Vinod Koul Reviewed-by: Liam Beguin Link: https://lore.kernel.org/r/20211116103951.34482-6-vkoul@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/ti/phy-tusb1210.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/phy/ti/phy-tusb1210.c b/drivers/phy/ti/phy-tusb1210.c index a63213f5972a..15c1c79e5c29 100644 --- a/drivers/phy/ti/phy-tusb1210.c +++ b/drivers/phy/ti/phy-tusb1210.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0-only -/** +/* * tusb1210.c - TUSB1210 USB ULPI PHY driver * * Copyright (C) 2015 Intel Corporation From 0e4190d762ef2609111507e1b9553a166436f556 Mon Sep 17 00:00:00 2001 From: David Mosberger-Tang Date: Sat, 20 Nov 2021 21:28:56 +0000 Subject: [PATCH 032/868] hwmon: (sht4x) Fix EREMOTEIO errors Per datasheet, SHT4x may need up to 8.2ms for a "high repeatability" measurement to complete. Attempting to read the result too early triggers a NAK which then causes an EREMOTEIO error. This behavior has been confirmed with a logic analyzer while running the I2C bus at only 40kHz. The low frequency precludes any signal-integrity issues, which was also confirmed by the absence of any CRC8 errors. In this configuration, a NAK occurred on any read that followed the measurement command within less than 8.2ms. Signed-off-by: David Mosberger-Tang Link: https://lore.kernel.org/r/20211120212849.2300854-2-davidm@egauge.net Signed-off-by: Guenter Roeck --- drivers/hwmon/sht4x.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/hwmon/sht4x.c b/drivers/hwmon/sht4x.c index 09c2a0b06444..3415d7a0e0fc 100644 --- a/drivers/hwmon/sht4x.c +++ b/drivers/hwmon/sht4x.c @@ -23,7 +23,7 @@ /* * I2C command delays (in microseconds) */ -#define SHT4X_MEAS_DELAY 1000 +#define SHT4X_MEAS_DELAY_HPM 8200 /* see t_MEAS,h in datasheet */ #define SHT4X_DELAY_EXTRA 10000 /* @@ -90,7 +90,7 @@ static int sht4x_read_values(struct sht4x_data *data) if (ret < 0) goto unlock; - usleep_range(SHT4X_MEAS_DELAY, SHT4X_MEAS_DELAY + SHT4X_DELAY_EXTRA); + usleep_range(SHT4X_MEAS_DELAY_HPM, SHT4X_MEAS_DELAY_HPM + SHT4X_DELAY_EXTRA); ret = i2c_master_recv(client, raw_data, SHT4X_RESPONSE_LENGTH); if (ret != SHT4X_RESPONSE_LENGTH) { From f2c2e9ebb2cf476c09e59d073db031fbf7ef4914 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Wed, 20 Oct 2021 14:36:11 +0300 Subject: [PATCH 033/868] ARM: dts: imx6qp-prtwd3: update RGMII delays for sja1105 switch In the new behavior, the sja1105 driver expects there to be explicit RGMII delays present on the fixed-link ports, otherwise it will complain that it falls back to legacy behavior, which is to apply RGMII delays incorrectly derived from the phy-mode string. In this case, the legacy behavior of the driver is to apply both RX and TX delays. To preserve that, add explicit 2 nanosecond delays, which are identical with what the driver used to add (a 90 degree phase shift). The delays from the phy-mode are ignored by new kernels (it's still RGMII as long as it's "rgmii*" something), and the explicit {rx,tx}-internal-delay-ps properties are ignored by old kernels, so the change works both ways. Signed-off-by: Vladimir Oltean Reviewed-by: Florian Fainelli Reviewed-by: Oleksij Rempel Signed-off-by: Shawn Guo --- arch/arm/boot/dts/imx6qp-prtwd3.dts | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm/boot/dts/imx6qp-prtwd3.dts b/arch/arm/boot/dts/imx6qp-prtwd3.dts index 7648e8a02000..cf6571cc4682 100644 --- a/arch/arm/boot/dts/imx6qp-prtwd3.dts +++ b/arch/arm/boot/dts/imx6qp-prtwd3.dts @@ -178,6 +178,8 @@ label = "cpu"; ethernet = <&fec>; phy-mode = "rgmii-id"; + rx-internal-delay-ps = <2000>; + tx-internal-delay-ps = <2000>; fixed-link { speed = <100>; From e691f9282a89e24a8e87cdb91a181c6283ee5124 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Wed, 20 Oct 2021 14:36:12 +0300 Subject: [PATCH 034/868] ARM: dts: ls1021a-tsn: update RGMII delays for sja1105 switch In the new behavior, the sja1105 driver expects there to be explicit RGMII delays present on the fixed-link ports, otherwise it will complain that it falls back to legacy behavior, which is to apply RGMII delays incorrectly derived from the phy-mode string. In this case, the legacy behavior of the driver is to not apply delays in any direction (mostly because the SJA1105T can't do that, so this board uses PCB traces). To preserve that but also silence the driver, use explicit delays of 0 ns. The delay information from the phy-mode is ignored by new kernels (it's still RGMII as long as it's "rgmii*" something), and the explicit {rx,tx}-internal-delay-ps properties are ignored by old kernels, so the change works both ways. Signed-off-by: Vladimir Oltean Reviewed-by: Florian Fainelli Signed-off-by: Shawn Guo --- arch/arm/boot/dts/ls1021a-tsn.dts | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm/boot/dts/ls1021a-tsn.dts b/arch/arm/boot/dts/ls1021a-tsn.dts index ff0ffb22768b..1ea32fff4120 100644 --- a/arch/arm/boot/dts/ls1021a-tsn.dts +++ b/arch/arm/boot/dts/ls1021a-tsn.dts @@ -91,6 +91,8 @@ /* Internal port connected to eth2 */ ethernet = <&enet2>; phy-mode = "rgmii"; + rx-internal-delay-ps = <0>; + tx-internal-delay-ps = <0>; reg = <4>; fixed-link { From 25501d8d3ab3f5dc83799731dfb8ebaf03ca5000 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Wed, 20 Oct 2021 14:36:13 +0300 Subject: [PATCH 035/868] arm64: dts: lx2160abluebox3: update RGMII delays for sja1105 switch In the new behavior, the sja1105 driver expects there to be explicit RGMII delays present on the fixed-link ports, otherwise it will complain that it falls back to legacy behavior, which is to apply RGMII delays incorrectly derived from the phy-mode string. In this case, the legacy behavior of the driver is to apply both RX and TX delays. To preserve that, add explicit 2 nanosecond delays, which are identical with what the driver used to add (a 90 degree phase shift). The delays from the phy-mode are ignored by new kernels (it's still RGMII as long as it's "rgmii*" something), and the explicit {rx,tx}-internal-delay-ps properties are ignored by old kernels, so the change works both ways. Signed-off-by: Vladimir Oltean Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/fsl-lx2160a-bluebox3.dts | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm64/boot/dts/freescale/fsl-lx2160a-bluebox3.dts b/arch/arm64/boot/dts/freescale/fsl-lx2160a-bluebox3.dts index b21be03da0af..042c486bdda2 100644 --- a/arch/arm64/boot/dts/freescale/fsl-lx2160a-bluebox3.dts +++ b/arch/arm64/boot/dts/freescale/fsl-lx2160a-bluebox3.dts @@ -386,6 +386,8 @@ reg = <2>; ethernet = <&dpmac17>; phy-mode = "rgmii-id"; + rx-internal-delay-ps = <2000>; + tx-internal-delay-ps = <2000>; fixed-link { speed = <1000>; @@ -529,6 +531,8 @@ reg = <2>; ethernet = <&dpmac18>; phy-mode = "rgmii-id"; + rx-internal-delay-ps = <2000>; + tx-internal-delay-ps = <2000>; fixed-link { speed = <1000>; From fde272e78e004a45c7e4976876277d7e6a5a0ede Mon Sep 17 00:00:00 2001 From: Kister Genesis Jimenez Date: Mon, 15 Nov 2021 11:41:47 +0100 Subject: [PATCH 036/868] iio: gyro: adxrs290: fix data signedness MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Properly sign-extend the rate and temperature data. Fixes: 2c8920fff1457 ("iio: gyro: Add driver support for ADXRS290") Signed-off-by: Kister Genesis Jimenez Signed-off-by: Nuno Sá Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20211115104147.18669-1-nuno.sa@analog.com Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/gyro/adxrs290.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/iio/gyro/adxrs290.c b/drivers/iio/gyro/adxrs290.c index 3e0734ddafe3..600e9725da78 100644 --- a/drivers/iio/gyro/adxrs290.c +++ b/drivers/iio/gyro/adxrs290.c @@ -7,6 +7,7 @@ */ #include +#include #include #include #include @@ -124,7 +125,7 @@ static int adxrs290_get_rate_data(struct iio_dev *indio_dev, const u8 cmd, int * goto err_unlock; } - *val = temp; + *val = sign_extend32(temp, 15); err_unlock: mutex_unlock(&st->lock); @@ -146,7 +147,7 @@ static int adxrs290_get_temp_data(struct iio_dev *indio_dev, int *val) } /* extract lower 12 bits temperature reading */ - *val = temp & 0x0FFF; + *val = sign_extend32(temp, 11); err_unlock: mutex_unlock(&st->lock); From 92beafb76a31bdc02649eb44e93a8e4f4cfcdbe8 Mon Sep 17 00:00:00 2001 From: Evgeny Boger Date: Wed, 17 Nov 2021 00:37:46 +0300 Subject: [PATCH 037/868] iio: adc: axp20x_adc: fix charging current reporting on AXP22x Both the charging and discharging currents on AXP22x are stored as 12-bit integers, in accordance with the datasheet. It's also confirmed by vendor BSP (axp20x_adc.c:axp22_icharge_to_mA). The scale factor of 0.5 is never mentioned in datasheet, nor in the vendor source code. I think it was here to compensate for erroneous addition bit in register width. Tested on custom A40i+AXP221s board with external ammeter as a reference. Fixes: 0e34d5de961d ("iio: adc: add support for X-Powers AXP20X and AXP22X PMICs ADCs") Signed-off-by: Evgeny Boger Acked-by: Chen-Yu Tsai Link: https://lore.kernel.org/r/20211116213746.264378-1-boger@wirenboard.com Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/adc/axp20x_adc.c | 18 +++--------------- 1 file changed, 3 insertions(+), 15 deletions(-) diff --git a/drivers/iio/adc/axp20x_adc.c b/drivers/iio/adc/axp20x_adc.c index 3e0c0233b431..df99f1365c39 100644 --- a/drivers/iio/adc/axp20x_adc.c +++ b/drivers/iio/adc/axp20x_adc.c @@ -251,19 +251,8 @@ static int axp22x_adc_raw(struct iio_dev *indio_dev, struct iio_chan_spec const *chan, int *val) { struct axp20x_adc_iio *info = iio_priv(indio_dev); - int size; - /* - * N.B.: Unlike the Chinese datasheets tell, the charging current is - * stored on 12 bits, not 13 bits. Only discharging current is on 13 - * bits. - */ - if (chan->type == IIO_CURRENT && chan->channel == AXP22X_BATT_DISCHRG_I) - size = 13; - else - size = 12; - - *val = axp20x_read_variable_width(info->regmap, chan->address, size); + *val = axp20x_read_variable_width(info->regmap, chan->address, 12); if (*val < 0) return *val; @@ -386,9 +375,8 @@ static int axp22x_adc_scale(struct iio_chan_spec const *chan, int *val, return IIO_VAL_INT_PLUS_MICRO; case IIO_CURRENT: - *val = 0; - *val2 = 500000; - return IIO_VAL_INT_PLUS_MICRO; + *val = 1; + return IIO_VAL_INT; case IIO_TEMP: *val = 100; From 652e7df485c6884d552085ae2c73efa6cfea3547 Mon Sep 17 00:00:00 2001 From: Gwendal Grignou Date: Thu, 4 Nov 2021 01:24:08 -0700 Subject: [PATCH 038/868] iio: at91-sama5d2: Fix incorrect sign extension Use scan_type when processing raw data which also fixes that the sign extension was from the wrong bit. Use channel definition as root of trust and replace constant when reading elements directly using the raw sysfs attributes. Fixes: 6794e23fa3fe ("iio: adc: at91-sama5d2_adc: add support for oversampling resolution") Signed-off-by: Gwendal Grignou Reviewed-by: Eugen Hristev Cc: Link: https://lore.kernel.org/r/20211104082413.3681212-9-gwendal@chromium.org Signed-off-by: Jonathan Cameron --- drivers/iio/adc/at91-sama5d2_adc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/iio/adc/at91-sama5d2_adc.c b/drivers/iio/adc/at91-sama5d2_adc.c index 4c922ef634f8..92a57cf10fba 100644 --- a/drivers/iio/adc/at91-sama5d2_adc.c +++ b/drivers/iio/adc/at91-sama5d2_adc.c @@ -1586,7 +1586,8 @@ static int at91_adc_read_info_raw(struct iio_dev *indio_dev, *val = st->conversion_value; ret = at91_adc_adjust_val_osr(st, val); if (chan->scan_type.sign == 's') - *val = sign_extend32(*val, 11); + *val = sign_extend32(*val, + chan->scan_type.realbits - 1); st->conversion_done = false; } From 423e85e97aaf69e5198bbec6811e3825c8b5019a Mon Sep 17 00:00:00 2001 From: "Ivan T. Ivanov" Date: Tue, 16 Nov 2021 10:46:16 +0200 Subject: [PATCH 039/868] ARM: rockchip: Use memcpy_toio instead of memcpy on smp bring-up This fixes a potential kernel panic on memcpy when FORTIFY_SOURCE is enabled. Because memory is iomem use appropriate function for accessing it. Signed-off-by: Ivan T. Ivanov Link: https://lore.kernel.org/r/20211116084616.24811-1-iivanov@suse.de Signed-off-by: Heiko Stuebner --- arch/arm/mach-rockchip/platsmp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mach-rockchip/platsmp.c b/arch/arm/mach-rockchip/platsmp.c index d60856898d97..5ec58d004b7d 100644 --- a/arch/arm/mach-rockchip/platsmp.c +++ b/arch/arm/mach-rockchip/platsmp.c @@ -189,7 +189,7 @@ static int __init rockchip_smp_prepare_sram(struct device_node *node) rockchip_boot_fn = __pa_symbol(secondary_startup); /* copy the trampoline to sram, that runs during startup of the core */ - memcpy(sram_base_addr, &rockchip_secondary_trampoline, trampoline_sz); + memcpy_toio(sram_base_addr, &rockchip_secondary_trampoline, trampoline_sz); flush_cache_all(); outer_clean_range(0, trampoline_sz); From 6dd0053683804427529ef3523f7872f473440a19 Mon Sep 17 00:00:00 2001 From: Artem Lapkin Date: Mon, 15 Nov 2021 16:33:21 +0800 Subject: [PATCH 040/868] arm64: dts: rockchip: remove mmc-hs400-enhanced-strobe from rk3399-khadas-edge Remove mmc-hs400-enhanced-strobe from the rk3399-khadas-edge dts to improve compatibility with a wider range of eMMC chips. Before (BJTD4R 29.1 GiB): [ 7.001493] mmc2: CQHCI version 5.10 [ 7.027971] mmc2: SDHCI controller on fe330000.mmc [fe330000.mmc] using ADMA ....... [ 7.207086] mmc2: mmc_select_hs400es failed, error -110 [ 7.207129] mmc2: error -110 whilst initialising MMC card [ 7.308893] mmc2: mmc_select_hs400es failed, error -110 [ 7.308921] mmc2: error -110 whilst initialising MMC card [ 7.427524] mmc2: mmc_select_hs400es failed, error -110 [ 7.427546] mmc2: error -110 whilst initialising MMC card [ 7.590993] mmc2: mmc_select_hs400es failed, error -110 [ 7.591012] mmc2: error -110 whilst initialising MMC card After: [ 6.960785] mmc2: CQHCI version 5.10 [ 6.984672] mmc2: SDHCI controller on fe330000.mmc [fe330000.mmc] using ADMA [ 7.175021] mmc2: Command Queue Engine enabled [ 7.175053] mmc2: new HS400 MMC card at address 0001 [ 7.175808] mmcblk2: mmc2:0001 BJTD4R 29.1 GiB [ 7.176033] mmcblk2boot0: mmc2:0001 BJTD4R 4.00 MiB [ 7.176245] mmcblk2boot1: mmc2:0001 BJTD4R 4.00 MiB [ 7.176495] mmcblk2rpmb: mmc2:0001 BJTD4R 4.00 MiB, chardev (242:0) Fixes: c2aacceedc86 ("arm64: dts: rockchip: Add support for Khadas Edge/Edge-V/Captain boards") Signed-off-by: Artem Lapkin Link: https://lore.kernel.org/r/20211115083321.2627461-1-art@khadas.com Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3399-khadas-edge.dtsi | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3399-khadas-edge.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-khadas-edge.dtsi index d5c7648c841d..f1fcc6b5b402 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-khadas-edge.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399-khadas-edge.dtsi @@ -705,7 +705,6 @@ &sdhci { bus-width = <8>; mmc-hs400-1_8v; - mmc-hs400-enhanced-strobe; non-removable; status = "okay"; }; From 772fb46109f635dd75db20c86b7eaf48efa46cef Mon Sep 17 00:00:00 2001 From: John Keeping Date: Tue, 2 Nov 2021 18:29:07 +0000 Subject: [PATCH 041/868] arm64: dts: rockchip: fix rk3308-roc-cc vcc-sd supply Correct a typo in the vin-supply property. The input supply is always-on, so this mistake doesn't affect whether the supply is actually enabled correctly. Fixes: 4403e1237be3 ("arm64: dts: rockchip: Add devicetree for board roc-rk3308-cc") Signed-off-by: John Keeping Link: https://lore.kernel.org/r/20211102182908.3409670-2-john@metanate.com Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3308-roc-cc.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3308-roc-cc.dts b/arch/arm64/boot/dts/rockchip/rk3308-roc-cc.dts index 665b2e69455d..ea6820902ede 100644 --- a/arch/arm64/boot/dts/rockchip/rk3308-roc-cc.dts +++ b/arch/arm64/boot/dts/rockchip/rk3308-roc-cc.dts @@ -97,7 +97,7 @@ regulator-max-microvolt = <3300000>; regulator-always-on; regulator-boot-on; - vim-supply = <&vcc_io>; + vin-supply = <&vcc_io>; }; vdd_core: vdd-core { From 2b454a90e2ccdd6e03f88f930036da4df577be76 Mon Sep 17 00:00:00 2001 From: John Keeping Date: Tue, 2 Nov 2021 18:29:08 +0000 Subject: [PATCH 042/868] arm64: dts: rockchip: fix rk3399-leez-p710 vcc3v3-lan supply Correct a typo in the vin-supply property. The input supply is always-on, so this mistake doesn't affect whether the supply is actually enabled correctly. Fixes: fc702ed49a86 ("arm64: dts: rockchip: Add dts for Leez RK3399 P710 SBC") Signed-off-by: John Keeping Link: https://lore.kernel.org/r/20211102182908.3409670-3-john@metanate.com Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3399-leez-p710.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3399-leez-p710.dts b/arch/arm64/boot/dts/rockchip/rk3399-leez-p710.dts index 7c93f840bc64..e890166e7fd4 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-leez-p710.dts +++ b/arch/arm64/boot/dts/rockchip/rk3399-leez-p710.dts @@ -55,7 +55,7 @@ regulator-boot-on; regulator-min-microvolt = <3300000>; regulator-max-microvolt = <3300000>; - vim-supply = <&vcc3v3_sys>; + vin-supply = <&vcc3v3_sys>; }; vcc3v3_sys: vcc3v3-sys { From 8240e87f16d17a9592c9d67857a3dcdbcb98f10d Mon Sep 17 00:00:00 2001 From: Alex Bee Date: Wed, 27 Oct 2021 16:37:25 +0200 Subject: [PATCH 043/868] arm64: dts: rockchip: fix audio-supply for Rock Pi 4 As stated in the schematics [1] and [2] P5 the APIO5 domain is supplied by RK808-D Buck4, which in our case vcc1v8_codec - i.e. a 1.8 V regulator. Currently only white noise comes from the ES8316's output, which - for whatever reason - came up only after the the correct switch from i2s0_8ch_bus to i2s0_2ch_bus for i2s0's pinctrl was done. Fix this by setting the correct regulator for audio-supply. [1] https://dl.radxa.com/rockpi4/docs/hw/rockpi4/rockpi4_v13_sch_20181112.pdf [2] https://dl.radxa.com/rockpi4/docs/hw/rockpi4/rockpi_4c_v12_sch_20200620.pdf Fixes: 1b5715c602fd ("arm64: dts: rockchip: add ROCK Pi 4 DTS support") Signed-off-by: Alex Bee Link: https://lore.kernel.org/r/20211027143726.165809-1-knaerzche@gmail.com Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dtsi index 98136c88fa49..6a434be62819 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dtsi @@ -502,7 +502,7 @@ status = "okay"; bt656-supply = <&vcc_3v0>; - audio-supply = <&vcc_3v0>; + audio-supply = <&vcc1v8_codec>; sdmmc-supply = <&vcc_sdio>; gpio1830-supply = <&vcc_3v0>; }; From aef4b9a89a376a9cabe5e744729914e7766c59bb Mon Sep 17 00:00:00 2001 From: Florian Klink Date: Wed, 20 Oct 2021 11:59:23 +0200 Subject: [PATCH 044/868] arm64: dts: rockchip: fix poweroff on helios64 Adding the rockchip,system-power-controller property here will use the rk808 to power off the system. Fixes: 09e006cfb43e ("arm64: dts: rockchip: Add basic support for Kobol's Helios64") Signed-off-by: Florian Klink Tested-by: Dennis Gilmore Link: https://lore.kernel.org/r/20211020095926.735938-2-flokli@flokli.de Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3399-kobol-helios64.dts | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/boot/dts/rockchip/rk3399-kobol-helios64.dts b/arch/arm64/boot/dts/rockchip/rk3399-kobol-helios64.dts index 63c7681843da..b6ac00f64613 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-kobol-helios64.dts +++ b/arch/arm64/boot/dts/rockchip/rk3399-kobol-helios64.dts @@ -276,6 +276,7 @@ clock-output-names = "xin32k", "rk808-clkout2"; pinctrl-names = "default"; pinctrl-0 = <&pmic_int_l>; + rockchip,system-power-controller; vcc1-supply = <&vcc5v0_sys>; vcc2-supply = <&vcc5v0_sys>; vcc3-supply = <&vcc5v0_sys>; From 885633075847f475f26a29249d772cc0da85d8cd Mon Sep 17 00:00:00 2001 From: Tim Gardner Date: Mon, 25 Oct 2021 12:16:56 -0600 Subject: [PATCH 045/868] dmaengine: dw-axi-dmac: Fix uninitialized variable in axi_chan_block_xfer_start() Coverity complains of an uninitialized variable: 5. uninit_use_in_call: Using uninitialized value config.dst_per when calling axi_chan_config_write. [show details] 6. uninit_use_in_call: Using uninitialized value config.hs_sel_src when calling axi_chan_config_write. [show details] CID 121164 (#1-3 of 3): Uninitialized scalar variable (UNINIT) 7. uninit_use_in_call: Using uninitialized value config.src_per when calling axi_chan_config_write. [show details] 418 axi_chan_config_write(chan, &config); Fix this by initializing the structure to 0 which should at least be benign in axi_chan_config_write(). Also fix what looks like a cut-n-paste error when initializing config.hs_sel_dst. Fixes: 824351668a413 ("dmaengine: dw-axi-dmac: support DMAX_NUM_CHANNELS > 8") Cc: Eugeniy Paltsev Cc: Vinod Koul Cc: dmaengine@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Tim Gardner Link: https://lore.kernel.org/r/20211025181656.31658-1-tim.gardner@canonical.com Signed-off-by: Vinod Koul --- drivers/dma/dw-axi-dmac/dw-axi-dmac-platform.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/dma/dw-axi-dmac/dw-axi-dmac-platform.c b/drivers/dma/dw-axi-dmac/dw-axi-dmac-platform.c index cd0d745eb071..33baf1591a49 100644 --- a/drivers/dma/dw-axi-dmac/dw-axi-dmac-platform.c +++ b/drivers/dma/dw-axi-dmac/dw-axi-dmac-platform.c @@ -373,7 +373,7 @@ static void axi_chan_block_xfer_start(struct axi_dma_chan *chan, struct axi_dma_desc *first) { u32 priority = chan->chip->dw->hdata->priority[chan->id]; - struct axi_dma_chan_config config; + struct axi_dma_chan_config config = {}; u32 irq_mask; u8 lms = 0; /* Select AXI0 master for LLI fetching */ @@ -391,7 +391,7 @@ static void axi_chan_block_xfer_start(struct axi_dma_chan *chan, config.tt_fc = DWAXIDMAC_TT_FC_MEM_TO_MEM_DMAC; config.prior = priority; config.hs_sel_dst = DWAXIDMAC_HS_SEL_HW; - config.hs_sel_dst = DWAXIDMAC_HS_SEL_HW; + config.hs_sel_src = DWAXIDMAC_HS_SEL_HW; switch (chan->direction) { case DMA_MEM_TO_DEV: dw_axi_dma_set_byte_halfword(chan, true); From 1ffc6f359f7ab114ad0d2bbe6a85cbd848709ab2 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Tue, 9 Nov 2021 22:09:56 +0100 Subject: [PATCH 046/868] dmaengine: dw-edma: Fix return value check for dma_set_mask_and_coherent() The commit in the Fixes: tag has changed the logic of the code and now it is likely that the probe will return an early success (0), even if not completely executed. This should lead to a crash or similar issue later on when the code accesses to some never allocated resources. Change the '!err' into a 'err' when checking if 'dma_set_mask_and_coherent()' has failed or not. While at it, simplify the code and remove the "can't success code" related to 32 DMA mask. As stated in [1], 'dma_set_mask_and_coherent(DMA_BIT_MASK(64))' can't fail if 'dev->dma_mask' is non-NULL. And if it is NULL, it would fail for the same reason when tried with DMA_BIT_MASK(32). [1]: https://lkml.org/lkml/2021/6/7/398 Fixes: ecb8c88bd31c ("dmaengine: dw-edma-pcie: switch from 'pci_' to 'dma_' API") Signed-off-by: Christophe JAILLET Link: https://lore.kernel.org/r/935fbb40ae930c5fe87482a41dcb73abf2257973.1636492127.git.christophe.jaillet@wanadoo.fr Signed-off-by: Vinod Koul --- drivers/dma/dw-edma/dw-edma-pcie.c | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/drivers/dma/dw-edma/dw-edma-pcie.c b/drivers/dma/dw-edma/dw-edma-pcie.c index 198f6cd8ac1b..cee7aa231d7b 100644 --- a/drivers/dma/dw-edma/dw-edma-pcie.c +++ b/drivers/dma/dw-edma/dw-edma-pcie.c @@ -187,17 +187,9 @@ static int dw_edma_pcie_probe(struct pci_dev *pdev, /* DMA configuration */ err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); - if (!err) { + if (err) { pci_err(pdev, "DMA mask 64 set failed\n"); return err; - } else { - pci_err(pdev, "DMA mask 64 set failed\n"); - - err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); - if (err) { - pci_err(pdev, "DMA mask 32 set failed\n"); - return err; - } } /* Data structure allocation */ From fa51b16d05583c7aebbc06330afb50276243d198 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Wed, 17 Nov 2021 10:03:51 -0700 Subject: [PATCH 047/868] dmaengine: idxd: fix calling wq quiesce inside spinlock Dan reports that smatch has found idxd_wq_quiesce() is being called inside the idxd->dev_lock. idxd_wq_quiesce() calls wait_for_completion() and therefore it can sleep. Move the call outside of the spinlock as it does not need device lock. Fixes: 5b0c68c473a1 ("dmaengine: idxd: support reporting of halt interrupt") Reported-by: Dan Carpenter Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/163716858508.1721911.15051495873516709923.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul --- drivers/dma/idxd/irq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/idxd/irq.c b/drivers/dma/idxd/irq.c index 17f2f8a31b63..cf2c8bc4f147 100644 --- a/drivers/dma/idxd/irq.c +++ b/drivers/dma/idxd/irq.c @@ -137,10 +137,10 @@ halt: INIT_WORK(&idxd->work, idxd_device_reinit); queue_work(idxd->wq, &idxd->work); } else { - spin_lock(&idxd->dev_lock); idxd->state = IDXD_DEV_HALTED; idxd_wqs_quiesce(idxd); idxd_wqs_unmap_portal(idxd); + spin_lock(&idxd->dev_lock); idxd_device_clear_state(idxd); dev_err(&idxd->pdev->dev, "idxd halted, need %s.\n", From 017a716e7b0e9d4ac06a4d7779bd04fca009bbc9 Mon Sep 17 00:00:00 2001 From: Jernej Skrabec Date: Sun, 21 Nov 2021 09:35:37 +0100 Subject: [PATCH 048/868] bus: sunxi-rsb: Fix shutdown Function sunxi_rsb_hw_exit() is sometimes called with pm runtime disabled, so in such cases pm_runtime_resume() will fail with -EACCES. Instead of doing whole dance of enabling pm runtime and thus clock just to disable it again immediately, just check if disabling clock is needed. That way calling pm_runtime_resume() is not needed at all. Fixes: 4a0dbc12e618 ("bus: sunxi-rsb: Implement runtime power management") Signed-off-by: Jernej Skrabec Signed-off-by: Maxime Ripard Link: https://lore.kernel.org/r/20211121083537.612473-1-jernej.skrabec@gmail.com --- drivers/bus/sunxi-rsb.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/bus/sunxi-rsb.c b/drivers/bus/sunxi-rsb.c index 6f225dddc74f..4566e730ef2b 100644 --- a/drivers/bus/sunxi-rsb.c +++ b/drivers/bus/sunxi-rsb.c @@ -687,11 +687,11 @@ err_clk_disable: static void sunxi_rsb_hw_exit(struct sunxi_rsb *rsb) { - /* Keep the clock and PM reference counts consistent. */ - if (pm_runtime_status_suspended(rsb->dev)) - pm_runtime_resume(rsb->dev); reset_control_assert(rsb->rstc); - clk_disable_unprepare(rsb->clk); + + /* Keep the clock and PM reference counts consistent. */ + if (!pm_runtime_status_suspended(rsb->dev)) + clk_disable_unprepare(rsb->clk); } static int __maybe_unused sunxi_rsb_runtime_suspend(struct device *dev) From 0d1c7e5544581646ea22c42012434e92dfb40a58 Mon Sep 17 00:00:00 2001 From: Vinod Koul Date: Sat, 20 Nov 2021 11:45:30 +0530 Subject: [PATCH 049/868] phy: qualcomm: ipq806x-usb: Fix kernel-doc style The functions are documented but there were style issues, so fix the style and add missing description for phy_dwc3 drivers/phy/qualcomm/phy-qcom-ipq806x-usb.c:130: drivers/phy/qualcomm/phy-qcom-ipq806x-usb.c:174: drivers/phy/qualcomm/phy-qcom-ipq806x-usb.c:212: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst Reviewed-by: Randy Dunlap Signed-off-by: Vinod Koul Reviewed-by: Bjorn Andersson Link: https://lore.kernel.org/r/20211120061531.410771-1-vkoul@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-ipq806x-usb.c | 26 +++++++++++---------- 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-ipq806x-usb.c b/drivers/phy/qualcomm/phy-qcom-ipq806x-usb.c index bfff0c8c9130..fec1da470d26 100644 --- a/drivers/phy/qualcomm/phy-qcom-ipq806x-usb.c +++ b/drivers/phy/qualcomm/phy-qcom-ipq806x-usb.c @@ -127,12 +127,13 @@ struct phy_drvdata { }; /** - * Write register and read back masked value to confirm it is written + * usb_phy_write_readback() - Write register and read back masked value to + * confirm it is written * - * @base - QCOM DWC3 PHY base virtual address. - * @offset - register offset. - * @mask - register bitmask specifying what should be updated - * @val - value to write. + * @phy_dwc3: QCOM DWC3 phy context + * @offset: register offset. + * @mask: register bitmask specifying what should be updated + * @val: value to write. */ static inline void usb_phy_write_readback(struct usb_phy *phy_dwc3, u32 offset, @@ -171,11 +172,11 @@ static int wait_for_latch(void __iomem *addr) } /** - * Write SSPHY register + * usb_ss_write_phycreg() - Write SSPHY register * - * @base - QCOM DWC3 PHY base virtual address. - * @addr - SSPHY address to write. - * @val - value to write. + * @phy_dwc3: QCOM DWC3 phy context + * @addr: SSPHY address to write. + * @val: value to write. */ static int usb_ss_write_phycreg(struct usb_phy *phy_dwc3, u32 addr, u32 val) @@ -209,10 +210,11 @@ err_wait: } /** - * Read SSPHY register. + * usb_ss_read_phycreg() - Read SSPHY register. * - * @base - QCOM DWC3 PHY base virtual address. - * @addr - SSPHY address to read. + * @phy_dwc3: QCOM DWC3 phy context + * @addr: SSPHY address to read. + * @val: pointer in which read is store. */ static int usb_ss_read_phycreg(struct usb_phy *phy_dwc3, u32 addr, u32 *val) From 7947113fd07a372de813edddfce6cb0a38ab66e0 Mon Sep 17 00:00:00 2001 From: Vinod Koul Date: Sat, 20 Nov 2021 11:45:31 +0530 Subject: [PATCH 050/868] phy: ti: omap-usb2: Fix the kernel-doc style The documentation uses incorrect style, so fix that. drivers/phy/ti/phy-omap-usb2.c:102: warning: Function parameter or member 'comparator' not described in 'omap_usb2_set_comparator' While at it, use a single line for function description Signed-off-by: Vinod Koul Reviewed-by: Liam Beguin Link: https://lore.kernel.org/r/20211120061531.410771-2-vkoul@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/ti/phy-omap-usb2.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/phy/ti/phy-omap-usb2.c b/drivers/phy/ti/phy-omap-usb2.c index ebceb1520ce8..3a505fe5715a 100644 --- a/drivers/phy/ti/phy-omap-usb2.c +++ b/drivers/phy/ti/phy-omap-usb2.c @@ -89,9 +89,9 @@ static inline void omap_usb_writel(void __iomem *addr, unsigned int offset, } /** - * omap_usb2_set_comparator - links the comparator present in the system with - * this phy - * @comparator - the companion phy(comparator) for this phy + * omap_usb2_set_comparator() - links the comparator present in the system with this phy + * + * @comparator: the companion phy(comparator) for this phy * * The phy companion driver should call this API passing the phy_companion * filled with set_vbus and start_srp to be used by usb phy. From a1b6c81ba41fe0458c3678e7fa23a25775978108 Mon Sep 17 00:00:00 2001 From: Liam Beguin Date: Tue, 16 Nov 2021 19:38:41 -0500 Subject: [PATCH 051/868] dt-bindings: phy: zynqmp-psgtr: fix USB phy name PHY_TYPE_USB is undefined and was added as PHY_TYPE_USB2 and PHY_TYPE_USB3 in 2fbbc96d1600 (phy: Add PHY header file for DT x Driver defines, 2014-11-04). Fix documentation to avoid misleading users. Signed-off-by: Liam Beguin Reviewed-by: Laurent Pinchart Link: https://lore.kernel.org/r/20211117003841.2030813-1-lvb@xiphos.com Signed-off-by: Vinod Koul --- Documentation/devicetree/bindings/phy/xlnx,zynqmp-psgtr.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/phy/xlnx,zynqmp-psgtr.yaml b/Documentation/devicetree/bindings/phy/xlnx,zynqmp-psgtr.yaml index 04d5654efb38..79906519c652 100644 --- a/Documentation/devicetree/bindings/phy/xlnx,zynqmp-psgtr.yaml +++ b/Documentation/devicetree/bindings/phy/xlnx,zynqmp-psgtr.yaml @@ -29,7 +29,7 @@ properties: - PHY_TYPE_PCIE - PHY_TYPE_SATA - PHY_TYPE_SGMII - - PHY_TYPE_USB + - PHY_TYPE_USB3 - description: The PHY instance minimum: 0 maximum: 1 # for DP, SATA or USB From f0ae8685b2858fc1dabf5ea743642abb5f242375 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 17 Nov 2021 10:48:43 +0300 Subject: [PATCH 052/868] phy: HiSilicon: Fix copy and paste bug in error handling This should check ">pmctrl" instead of "->sysctrl". This bug could potentially lead to a crash if we dereference the error pointer. Fixes: 73075011ffff ("phy: HiSilicon: Add driver for Kirin 970 PCIe PHY") Signed-off-by: Dan Carpenter Reviewed-by: Mauro Carvalho Chehab Link: https://lore.kernel.org/r/20211117074843.GE5237@kili Signed-off-by: Vinod Koul --- drivers/phy/hisilicon/phy-hi3670-pcie.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/phy/hisilicon/phy-hi3670-pcie.c b/drivers/phy/hisilicon/phy-hi3670-pcie.c index c64c6679b1b9..0ac9634b398d 100644 --- a/drivers/phy/hisilicon/phy-hi3670-pcie.c +++ b/drivers/phy/hisilicon/phy-hi3670-pcie.c @@ -757,8 +757,8 @@ static int hi3670_pcie_phy_get_resources(struct hi3670_pcie_phy *phy, return PTR_ERR(phy->sysctrl); phy->pmctrl = syscon_regmap_lookup_by_compatible("hisilicon,hi3670-pmctrl"); - if (IS_ERR(phy->sysctrl)) - return PTR_ERR(phy->sysctrl); + if (IS_ERR(phy->pmctrl)) + return PTR_ERR(phy->pmctrl); /* clocks */ phy->phy_ref_clk = devm_clk_get(dev, "phy_ref"); From c88c5e461939a06ae769a01649d5c6b5a156f883 Mon Sep 17 00:00:00 2001 From: Mathew McBride Date: Mon, 22 Nov 2021 02:55:54 +0000 Subject: [PATCH 053/868] arm64: dts: ten64: remove redundant interrupt declaration for gpio-keys gpio-keys already 'inherits' the interrupts from the controller of the specified GPIO, so having another declaration is redundant. On >=v5.15 this started causing an oops under gpio_keys_probe as the IRQ was already claimed. Signed-off-by: Mathew McBride Fixes: 418962eea358 ("arm64: dts: add device tree for Traverse Ten64 (LS1088A)") Cc: stable@vger.kernel.org Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/fsl-ls1088a-ten64.dts | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1088a-ten64.dts b/arch/arm64/boot/dts/freescale/fsl-ls1088a-ten64.dts index 3063851c2fb9..d3f03dcbb8c3 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls1088a-ten64.dts +++ b/arch/arm64/boot/dts/freescale/fsl-ls1088a-ten64.dts @@ -38,7 +38,6 @@ powerdn { label = "External Power Down"; gpios = <&gpio1 17 GPIO_ACTIVE_LOW>; - interrupts = <&gpio1 17 IRQ_TYPE_EDGE_FALLING>; linux,code = ; }; @@ -46,7 +45,6 @@ admin { label = "ADMIN button"; gpios = <&gpio3 8 GPIO_ACTIVE_HIGH>; - interrupts = <&gpio3 8 IRQ_TYPE_EDGE_RISING>; linux,code = ; }; }; From 2d5446da5acecf9c67db1c9d55ae2c3e5de01f8d Mon Sep 17 00:00:00 2001 From: Guodong Liu Date: Wed, 10 Nov 2021 15:19:00 +0800 Subject: [PATCH 054/868] pinctrl: mediatek: fix global-out-of-bounds issue When eint virtual eint number is greater than gpio number, it maybe produce 'desc[eint_n]' size globle-out-of-bounds issue. Signed-off-by: Guodong Liu Signed-off-by: Zhiyong Tao Reviewed-by: Chen-Yu Tsai Link: https://lore.kernel.org/r/20211110071900.4490-2-zhiyong.tao@mediatek.com Signed-off-by: Linus Walleij --- drivers/pinctrl/mediatek/pinctrl-mtk-common-v2.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/pinctrl/mediatek/pinctrl-mtk-common-v2.c b/drivers/pinctrl/mediatek/pinctrl-mtk-common-v2.c index 91553b2fc160..53779822348d 100644 --- a/drivers/pinctrl/mediatek/pinctrl-mtk-common-v2.c +++ b/drivers/pinctrl/mediatek/pinctrl-mtk-common-v2.c @@ -285,8 +285,12 @@ static int mtk_xt_get_gpio_n(void *data, unsigned long eint_n, desc = (const struct mtk_pin_desc *)hw->soc->pins; *gpio_chip = &hw->chip; - /* Be greedy to guess first gpio_n is equal to eint_n */ - if (desc[eint_n].eint.eint_n == eint_n) + /* + * Be greedy to guess first gpio_n is equal to eint_n. + * Only eint virtual eint number is greater than gpio number. + */ + if (hw->soc->npins > eint_n && + desc[eint_n].eint.eint_n == eint_n) *gpio_n = eint_n; else *gpio_n = mtk_xt_find_eint_num(hw, eint_n); From 94047df12fec0e51e860b5317223f67a3ea4eb07 Mon Sep 17 00:00:00 2001 From: Luiz Sampaio Date: Tue, 9 Nov 2021 19:07:31 -0300 Subject: [PATCH 055/868] auxdisplay: charlcd: fixing coding style issue Removing 'int' from 'unsigned long int' declaration, which is unnecessary. Signed-off-by: Luiz Sampaio Signed-off-by: Miguel Ojeda --- drivers/auxdisplay/charlcd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/auxdisplay/charlcd.c b/drivers/auxdisplay/charlcd.c index 304accde365c..cca3b600c0ba 100644 --- a/drivers/auxdisplay/charlcd.c +++ b/drivers/auxdisplay/charlcd.c @@ -37,7 +37,7 @@ struct charlcd_priv { bool must_clear; /* contains the LCD config state */ - unsigned long int flags; + unsigned long flags; /* Current escape sequence and it's length or -1 if outside */ struct { From 4daa9ff89ef27be43c15995412d6aee393a78200 Mon Sep 17 00:00:00 2001 From: Luiz Sampaio Date: Tue, 9 Nov 2021 19:07:32 -0300 Subject: [PATCH 056/868] auxdisplay: charlcd: checking for pointer reference before dereferencing Check if the pointer lcd->ops->init_display exists before dereferencing it. If a driver called charlcd_init() without defining the ops, this would return segmentation fault, as happened to me when implementing a charlcd driver. Checking the pointer before dereferencing protects from segmentation fault. Signed-off-by: Luiz Sampaio Signed-off-by: Miguel Ojeda --- drivers/auxdisplay/charlcd.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/auxdisplay/charlcd.c b/drivers/auxdisplay/charlcd.c index cca3b600c0ba..6d309e4971b6 100644 --- a/drivers/auxdisplay/charlcd.c +++ b/drivers/auxdisplay/charlcd.c @@ -578,6 +578,9 @@ static int charlcd_init(struct charlcd *lcd) * Since charlcd_init_display() needs to write data, we have to * enable mark the LCD initialized just before. */ + if (WARN_ON(!lcd->ops->init_display)) + return -EINVAL; + ret = lcd->ops->init_display(lcd); if (ret) return ret; From 6331b8765cd0634a4e4cdcc1a6f1a74196616b94 Mon Sep 17 00:00:00 2001 From: Bin Meng Date: Wed, 16 Jun 2021 15:46:44 +0800 Subject: [PATCH 057/868] riscv: dts: unleashed: Add gpio card detect to mmc-spi-slot Per HiFive Unleashed schematics, the card detect signal of the micro SD card is connected to gpio pin #11, which should be reflected in the DT via the property, as described in Documentation/devicetree/bindings/mmc/mmc-spi-slot.txt. [1] https://sifive.cdn.prismic.io/sifive/c52a8e32-05ce-4aaf-95c8-7bf8453f8698_hifive-unleashed-a00-schematics-1.pdf Signed-off-by: Bin Meng Fixes: d573b5558abb ("riscv: dts: add initial board data for the SiFive HiFive Unmatched") Cc: stable@vger.kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts b/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts index ba304d4c455c..ced0d4e47938 100644 --- a/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts +++ b/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts @@ -76,6 +76,7 @@ spi-max-frequency = <20000000>; voltage-ranges = <3300 3300>; disable-wp; + gpios = <&gpio 11 GPIO_ACTIVE_LOW>; }; }; From 298d03c2d7f1b5daacb6d4f4053fd3d677d67087 Mon Sep 17 00:00:00 2001 From: Bin Meng Date: Wed, 16 Jun 2021 15:46:45 +0800 Subject: [PATCH 058/868] riscv: dts: unmatched: Add gpio card detect to mmc-spi-slot Per HiFive Unmatched schematics, the card detect signal of the micro SD card is connected to gpio pin #15, which should be reflected in the DT via the property, as described in Documentation/devicetree/bindings/mmc/mmc-spi-slot.txt. [1] https://sifive.cdn.prismic.io/sifive/6a06d6c0-6e66-49b5-8e9e-e68ce76f4192_hifive-unmatched-schematics-v3.pdf Signed-off-by: Bin Meng Fixes: d573b5558abb ("riscv: dts: add initial board data for the SiFive HiFive Unmatched") Cc: stable@vger.kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts b/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts index 4f66919215f6..3c796d64cf51 100644 --- a/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts +++ b/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts @@ -2,6 +2,7 @@ /* Copyright (c) 2020 SiFive, Inc */ #include "fu740-c000.dtsi" +#include #include /* Clock frequency (in Hz) of the PCB crystal for rtcclk */ @@ -223,6 +224,7 @@ spi-max-frequency = <20000000>; voltage-ranges = <3300 3300>; disable-wp; + gpios = <&gpio 15 GPIO_ACTIVE_LOW>; }; }; From 1b8d2789dad0005fd5e7d35dab26a8e1203fb6da Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Wed, 24 Nov 2021 12:07:39 -0500 Subject: [PATCH 059/868] dm btree remove: fix use after free in rebalance_children() Move dm_tm_unlock() after dm_tm_dec(). Cc: stable@vger.kernel.org Signed-off-by: Joe Thornber Signed-off-by: Mike Snitzer --- drivers/md/persistent-data/dm-btree-remove.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/persistent-data/dm-btree-remove.c b/drivers/md/persistent-data/dm-btree-remove.c index 70532335c7c7..cb670f16e98e 100644 --- a/drivers/md/persistent-data/dm-btree-remove.c +++ b/drivers/md/persistent-data/dm-btree-remove.c @@ -423,9 +423,9 @@ static int rebalance_children(struct shadow_spine *s, memcpy(n, dm_block_data(child), dm_bm_block_size(dm_tm_get_bm(info->tm))); - dm_tm_unlock(info->tm, child); dm_tm_dec(info->tm, dm_block_location(child)); + dm_tm_unlock(info->tm, child); return 0; } From 12dc48f545fd349ef2cadcc4d816706951b87998 Mon Sep 17 00:00:00 2001 From: David Heidelberg Date: Wed, 24 Nov 2021 16:51:01 +0100 Subject: [PATCH 060/868] ASoC: dt-bindings: wlf,wm8962: add missing interrupt property Both, hardware and drivers does support interrupts. Fix warnings as: arch/arm/boot/dts/tegra30-microsoft-surface-rt-efi.dt.yaml: audio-codec@1a: 'interrupt-parent', 'interrupts' do not match any of the regexes: 'pinctrl-[0-9]+' From schema: /home/runner/work/linux/linux/Documentation/devicetree/bindings/sound/wlf,wm8962.yaml Fixes: cd51b942f344 ("ASoC: dt-bindings: wlf,wm8962: Convert to json-schema") Signed-off-by: David Heidelberg Acked-by: Charles Keepax Reviewed-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20211124155101.59694-1-david@ixit.cz Signed-off-by: Mark Brown --- Documentation/devicetree/bindings/sound/wlf,wm8962.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Documentation/devicetree/bindings/sound/wlf,wm8962.yaml b/Documentation/devicetree/bindings/sound/wlf,wm8962.yaml index 0e6249d7c133..5e172e9462b9 100644 --- a/Documentation/devicetree/bindings/sound/wlf,wm8962.yaml +++ b/Documentation/devicetree/bindings/sound/wlf,wm8962.yaml @@ -19,6 +19,9 @@ properties: clocks: maxItems: 1 + interrupts: + maxItems: 1 + "#sound-dai-cells": const: 0 From b68f8a13e3b4bd2f956250cd428fee344f4d60a3 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 23 Nov 2021 22:04:19 +0100 Subject: [PATCH 061/868] platform/x86: thinkpad_acpi: Restore missing hotkey_tablet_mode and hotkey_radio_sw sysfs-attr Commit c99ca78d67a6 ("platform/x86: thinkpad_acpi: Switch to common use of attributes") removed the conditional adding of the hotkey_tablet_mode and hotkey_radio_sw sysfs-attributes, replacing this with a hotkey_attr_is_visible() callback which hides them when the feature is not present. But this commit forgot to add these 2 attributes to the default hotkey_attributes[] set, so they would now never get added at all. Add the 2 attributes to the default hotkey_attributes[] set so that they are available on systems with these features once more. Fixes: c99ca78d67a6 ("platform/x86: thinkpad_acpi: Switch to common use of attributes") Cc: Len Baker Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20211123210424.266607-2-hdegoede@redhat.com --- drivers/platform/x86/thinkpad_acpi.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c index b3ac9c3f3b7c..17d581e21e7f 100644 --- a/drivers/platform/x86/thinkpad_acpi.c +++ b/drivers/platform/x86/thinkpad_acpi.c @@ -3015,6 +3015,8 @@ static struct attribute *hotkey_attributes[] = { &dev_attr_hotkey_all_mask.attr, &dev_attr_hotkey_adaptive_all_mask.attr, &dev_attr_hotkey_recommended_mask.attr, + &dev_attr_hotkey_tablet_mode.attr, + &dev_attr_hotkey_radio_sw.attr, #ifdef CONFIG_THINKPAD_ACPI_HOTKEY_POLL &dev_attr_hotkey_source_mask.attr, &dev_attr_hotkey_poll_freq.attr, From be892e95361fb72365a4f81475a34c5d43e36708 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 23 Nov 2021 22:05:24 +0100 Subject: [PATCH 062/868] platform/x86: thinkpad_acpi: Add lid_logo_dot to the list of safe LEDs There have been various bugs / forum threads about allowing control of the LED in the ThinkPad logo on the lid of various models. This seems to be something which users want to control and there really is no reason to require setting CONFIG_THINKPAD_ACPI_UNSAFE_LEDS for this. The lid-logo-dot is LED number 10, so change the name of the 10th led from unknown_led2 to lid_logo_dot and add it to the TPACPI_SAFE_LEDS mask. Link: https://www.reddit.com/r/thinkpad/comments/7n8eyu/thinkpad_led_control_under_gnulinux/ BugLink: https://bugzilla.redhat.com/show_bug.cgi?id=1943318 Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20211123210524.266705-2-hdegoede@redhat.com --- drivers/platform/x86/thinkpad_acpi.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c index 17d581e21e7f..bb1abb947e1e 100644 --- a/drivers/platform/x86/thinkpad_acpi.c +++ b/drivers/platform/x86/thinkpad_acpi.c @@ -5728,11 +5728,11 @@ static const char * const tpacpi_led_names[TPACPI_LED_NUMLEDS] = { "tpacpi::standby", "tpacpi::dock_status1", "tpacpi::dock_status2", - "tpacpi::unknown_led2", + "tpacpi::lid_logo_dot", "tpacpi::unknown_led3", "tpacpi::thinkvantage", }; -#define TPACPI_SAFE_LEDS 0x1081U +#define TPACPI_SAFE_LEDS 0x1481U static inline bool tpacpi_is_led_restricted(const unsigned int led) { From 48d5e836ebc09cb766ab5e11945fa7e89120a3f6 Mon Sep 17 00:00:00 2001 From: Matan Ziv-Av Date: Tue, 23 Nov 2021 22:14:55 +0200 Subject: [PATCH 063/868] platform/x86: lg-laptop: Recognize more models LG uses 5 instead of 0 in the third digit (second digit after 2019) of the year string to indicate newer models in the same year. Handle this case as well. Signed-off-by: Matan Ziv-Av Link: https://lore.kernel.org/r/c752b3b2-9718-bd9a-732d-e165aa8a1fca@svgalib.org Signed-off-by: Hans de Goede --- drivers/platform/x86/lg-laptop.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/platform/x86/lg-laptop.c b/drivers/platform/x86/lg-laptop.c index ae9293024c77..a91847a551a7 100644 --- a/drivers/platform/x86/lg-laptop.c +++ b/drivers/platform/x86/lg-laptop.c @@ -657,6 +657,18 @@ static int acpi_add(struct acpi_device *device) if (product && strlen(product) > 4) switch (product[4]) { case '5': + if (strlen(product) > 5) + switch (product[5]) { + case 'N': + year = 2021; + break; + case '0': + year = 2016; + break; + default: + year = 2022; + } + break; case '6': year = 2016; break; From bbb9429a210ee79c2e4a0d1b6c41818975585ca9 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 24 Nov 2021 18:51:25 +0100 Subject: [PATCH 064/868] platform/x86: touchscreen_dmi: Add TrekStor SurfTab duo W1 touchscreen info The TrekStor SurfTab duo W1 (ST10432-10b) has a Goodix touchscreen which has its x-axis mirrored. Add a quirk to fix this. Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20211124175125.250329-1-hdegoede@redhat.com --- drivers/platform/x86/touchscreen_dmi.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/drivers/platform/x86/touchscreen_dmi.c b/drivers/platform/x86/touchscreen_dmi.c index fa8812039b82..17dd54d4b783 100644 --- a/drivers/platform/x86/touchscreen_dmi.c +++ b/drivers/platform/x86/touchscreen_dmi.c @@ -905,6 +905,16 @@ static const struct ts_dmi_data trekstor_primetab_t13b_data = { .properties = trekstor_primetab_t13b_props, }; +static const struct property_entry trekstor_surftab_duo_w1_props[] = { + PROPERTY_ENTRY_BOOL("touchscreen-inverted-x"), + { } +}; + +static const struct ts_dmi_data trekstor_surftab_duo_w1_data = { + .acpi_name = "GDIX1001:00", + .properties = trekstor_surftab_duo_w1_props, +}; + static const struct property_entry trekstor_surftab_twin_10_1_props[] = { PROPERTY_ENTRY_U32("touchscreen-min-x", 20), PROPERTY_ENTRY_U32("touchscreen-min-y", 0), @@ -1502,6 +1512,14 @@ const struct dmi_system_id touchscreen_dmi_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "Primetab T13B"), }, }, + { + /* TrekStor SurfTab duo W1 10.1 ST10432-10b */ + .driver_data = (void *)&trekstor_surftab_duo_w1_data, + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "TrekStor"), + DMI_MATCH(DMI_PRODUCT_NAME, "SurfTab duo W1 10.1 (VT4)"), + }, + }, { /* TrekStor SurfTab twin 10.1 ST10432-8 */ .driver_data = (void *)&trekstor_surftab_twin_10_1_data, From ce20eff57361e72878a772ef08b5239d3ae102b6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pali=20Roh=C3=A1r?= Date: Thu, 25 Nov 2021 14:00:56 +0100 Subject: [PATCH 065/868] irqchip/armada-370-xp: Fix return value of armada_370_xp_msi_alloc() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit IRQ domain alloc function should return zero on success. Non-zero value indicates failure. Signed-off-by: Pali Rohár Fixes: fcc392d501bd ("irqchip/armada-370-xp: Use the generic MSI infrastructure") Cc: stable@vger.kernel.org Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20211125130057.26705-1-pali@kernel.org --- drivers/irqchip/irq-armada-370-xp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/irqchip/irq-armada-370-xp.c b/drivers/irqchip/irq-armada-370-xp.c index 80906bfec845..41ad745cf343 100644 --- a/drivers/irqchip/irq-armada-370-xp.c +++ b/drivers/irqchip/irq-armada-370-xp.c @@ -250,7 +250,7 @@ static int armada_370_xp_msi_alloc(struct irq_domain *domain, unsigned int virq, NULL, NULL); } - return hwirq; + return 0; } static void armada_370_xp_msi_free(struct irq_domain *domain, From d0a553502efd545c1ce3fd08fc4d423f8e4ac3d6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pali=20Roh=C3=A1r?= Date: Thu, 25 Nov 2021 14:00:57 +0100 Subject: [PATCH 066/868] irqchip/armada-370-xp: Fix support for Multi-MSI interrupts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit irq-armada-370-xp driver already sets MSI_FLAG_MULTI_PCI_MSI flag into msi_domain_info structure. But allocated interrupt numbers for Multi-MSI needs to be properly aligned otherwise devices send MSI interrupt with wrong number. Fix this issue by using function bitmap_find_free_region() instead of bitmap_find_next_zero_area() to allocate aligned interrupt numbers. Signed-off-by: Pali Rohár Fixes: a71b9412c90c ("irqchip/armada-370-xp: Allow allocation of multiple MSIs") Cc: stable@vger.kernel.org Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20211125130057.26705-2-pali@kernel.org --- drivers/irqchip/irq-armada-370-xp.c | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/drivers/irqchip/irq-armada-370-xp.c b/drivers/irqchip/irq-armada-370-xp.c index 41ad745cf343..5b8d571c041d 100644 --- a/drivers/irqchip/irq-armada-370-xp.c +++ b/drivers/irqchip/irq-armada-370-xp.c @@ -232,17 +232,13 @@ static int armada_370_xp_msi_alloc(struct irq_domain *domain, unsigned int virq, int hwirq, i; mutex_lock(&msi_used_lock); - - hwirq = bitmap_find_next_zero_area(msi_used, PCI_MSI_DOORBELL_NR, - 0, nr_irqs, 0); - if (hwirq >= PCI_MSI_DOORBELL_NR) { - mutex_unlock(&msi_used_lock); - return -ENOSPC; - } - - bitmap_set(msi_used, hwirq, nr_irqs); + hwirq = bitmap_find_free_region(msi_used, PCI_MSI_DOORBELL_NR, + order_base_2(nr_irqs)); mutex_unlock(&msi_used_lock); + if (hwirq < 0) + return -ENOSPC; + for (i = 0; i < nr_irqs; i++) { irq_domain_set_info(domain, virq + i, hwirq + i, &armada_370_xp_msi_bottom_irq_chip, @@ -259,7 +255,7 @@ static void armada_370_xp_msi_free(struct irq_domain *domain, struct irq_data *d = irq_domain_get_irq_data(domain, virq); mutex_lock(&msi_used_lock); - bitmap_clear(msi_used, d->hwirq, nr_irqs); + bitmap_release_region(msi_used, d->hwirq, order_base_2(nr_irqs)); mutex_unlock(&msi_used_lock); } From 8958389681b929fcc7301e7dc5f0da12e4a256a0 Mon Sep 17 00:00:00 2001 From: Billy Tsai Date: Wed, 24 Nov 2021 17:43:48 +0800 Subject: [PATCH 067/868] irqchip/aspeed-scu: Replace update_bits with write_bits. The interrupt status bits are cleared by writing 1, we should force a write to clear the interrupt without checking if the value has changed. Fixes: 04f605906ff0 ("irqchip: Add Aspeed SCU interrupt controller") Signed-off-by: Billy Tsai Reviewed-by: Joel Stanley Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20211124094348.11621-1-billy_tsai@aspeedtech.com Cc: stable@vger.kernel.org --- drivers/irqchip/irq-aspeed-scu-ic.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/irqchip/irq-aspeed-scu-ic.c b/drivers/irqchip/irq-aspeed-scu-ic.c index f3c6855a4cef..18b77c3e6db4 100644 --- a/drivers/irqchip/irq-aspeed-scu-ic.c +++ b/drivers/irqchip/irq-aspeed-scu-ic.c @@ -76,8 +76,8 @@ static void aspeed_scu_ic_irq_handler(struct irq_desc *desc) generic_handle_domain_irq(scu_ic->irq_domain, bit - scu_ic->irq_shift); - regmap_update_bits(scu_ic->scu, scu_ic->reg, mask, - BIT(bit + ASPEED_SCU_IC_STATUS_SHIFT)); + regmap_write_bits(scu_ic->scu, scu_ic->reg, mask, + BIT(bit + ASPEED_SCU_IC_STATUS_SHIFT)); } chained_irq_exit(chip, desc); From 357a9c4b79f4c8bbceb77c64ea09d8da3a6a870d Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 22 Nov 2021 16:54:07 +0100 Subject: [PATCH 068/868] irqchip/mips-gic: Use bitfield helpers Use the FIELD_GET() helper, instead of open-coding the same operation. Signed-off-by: Geert Uytterhoeven Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/all/74f9d126961a90d3e311b92a54870eaac5b3ae57.1637593297.git.geert+renesas@glider.be --- drivers/irqchip/irq-mips-gic.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/irqchip/irq-mips-gic.c b/drivers/irqchip/irq-mips-gic.c index d02b05a067d9..ff89b36267dd 100644 --- a/drivers/irqchip/irq-mips-gic.c +++ b/drivers/irqchip/irq-mips-gic.c @@ -9,6 +9,7 @@ #define pr_fmt(fmt) "irq-mips-gic: " fmt +#include #include #include #include @@ -735,8 +736,7 @@ static int __init gic_of_init(struct device_node *node, mips_gic_base = ioremap(gic_base, gic_len); gicconfig = read_gic_config(); - gic_shared_intrs = gicconfig & GIC_CONFIG_NUMINTERRUPTS; - gic_shared_intrs >>= __ffs(GIC_CONFIG_NUMINTERRUPTS); + gic_shared_intrs = FIELD_GET(GIC_CONFIG_NUMINTERRUPTS, gicconfig); gic_shared_intrs = (gic_shared_intrs + 1) * 8; if (cpu_has_veic) { From 84b01721e8042cdd1e8ffeb648844a09cd4213e0 Mon Sep 17 00:00:00 2001 From: Pavel Skripkin Date: Sun, 21 Nov 2021 23:22:39 +0300 Subject: [PATCH 069/868] RDMA: Fix use-after-free in rxe_queue_cleanup On error handling path in rxe_qp_from_init() qp->sq.queue is freed and then rxe_create_qp() will drop last reference to this object. qp clean up function will try to free this queue one time and it causes UAF bug. Fix it by zeroing queue pointer after freeing queue in rxe_qp_from_init(). Fixes: 514aee660df4 ("RDMA: Globally allocate and release QP memory") Link: https://lore.kernel.org/r/20211121202239.3129-1-paskripkin@gmail.com Reported-by: syzbot+aab53008a5adf26abe91@syzkaller.appspotmail.com Signed-off-by: Pavel Skripkin Reviewed-by: Zhu Yanjun Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_qp.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c index 975321812c87..54b8711321c1 100644 --- a/drivers/infiniband/sw/rxe/rxe_qp.c +++ b/drivers/infiniband/sw/rxe/rxe_qp.c @@ -359,6 +359,7 @@ int rxe_qp_from_init(struct rxe_dev *rxe, struct rxe_qp *qp, struct rxe_pd *pd, err2: rxe_queue_cleanup(qp->sq.queue); + qp->sq.queue = NULL; err1: qp->pd = NULL; qp->rcq = NULL; From f0ae4afe3d35e67db042c58a52909e06262b740f Mon Sep 17 00:00:00 2001 From: Alaa Hleihel Date: Mon, 22 Nov 2021 13:41:51 +0200 Subject: [PATCH 070/868] RDMA/mlx5: Fix releasing unallocated memory in dereg MR flow For the case of IB_MR_TYPE_DM the mr does doesn't have a umem, even though it is a user MR. This causes function mlx5_free_priv_descs() to think that it is a kernel MR, leading to wrongly accessing mr->descs that will get wrong values in the union which leads to attempt to release resources that were not allocated in the first place. For example: DMA-API: mlx5_core 0000:08:00.1: device driver tries to free DMA memory it has not allocated [device address=0x0000000000000000] [size=0 bytes] WARNING: CPU: 8 PID: 1021 at kernel/dma/debug.c:961 check_unmap+0x54f/0x8b0 RIP: 0010:check_unmap+0x54f/0x8b0 Call Trace: debug_dma_unmap_page+0x57/0x60 mlx5_free_priv_descs+0x57/0x70 [mlx5_ib] mlx5_ib_dereg_mr+0x1fb/0x3d0 [mlx5_ib] ib_dereg_mr_user+0x60/0x140 [ib_core] uverbs_destroy_uobject+0x59/0x210 [ib_uverbs] uobj_destroy+0x3f/0x80 [ib_uverbs] ib_uverbs_cmd_verbs+0x435/0xd10 [ib_uverbs] ? uverbs_finalize_object+0x50/0x50 [ib_uverbs] ? lock_acquire+0xc4/0x2e0 ? lock_acquired+0x12/0x380 ? lock_acquire+0xc4/0x2e0 ? lock_acquire+0xc4/0x2e0 ? ib_uverbs_ioctl+0x7c/0x140 [ib_uverbs] ? lock_release+0x28a/0x400 ib_uverbs_ioctl+0xc0/0x140 [ib_uverbs] ? ib_uverbs_ioctl+0x7c/0x140 [ib_uverbs] __x64_sys_ioctl+0x7f/0xb0 do_syscall_64+0x38/0x90 Fix it by reorganizing the dereg flow and mlx5_ib_mr structure: - Move the ib_umem field into the user MRs structure in the union as it's applicable only there. - Function mlx5_ib_dereg_mr() will now call mlx5_free_priv_descs() only in case there isn't udata, which indicates that this isn't a user MR. Fixes: f18ec4223117 ("RDMA/mlx5: Use a union inside mlx5_ib_mr") Link: https://lore.kernel.org/r/66bb1dd253c1fd7ceaa9fc411061eefa457b86fb.1637581144.git.leonro@nvidia.com Signed-off-by: Alaa Hleihel Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/mlx5_ib.h | 6 +++--- drivers/infiniband/hw/mlx5/mr.c | 26 ++++++++++++-------------- 2 files changed, 15 insertions(+), 17 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index e636e954f6bf..4a7a56ed740b 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -664,7 +664,6 @@ struct mlx5_ib_mr { /* User MR data */ struct mlx5_cache_ent *cache_ent; - struct ib_umem *umem; /* This is zero'd when the MR is allocated */ union { @@ -676,7 +675,7 @@ struct mlx5_ib_mr { struct list_head list; }; - /* Used only by kernel MRs (umem == NULL) */ + /* Used only by kernel MRs */ struct { void *descs; void *descs_alloc; @@ -697,8 +696,9 @@ struct mlx5_ib_mr { int data_length; }; - /* Used only by User MRs (umem != NULL) */ + /* Used only by User MRs */ struct { + struct ib_umem *umem; unsigned int page_shift; /* Current access_flags */ int access_flags; diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 157d862fb864..63e2129f1142 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -1904,19 +1904,18 @@ err: return ret; } -static void -mlx5_free_priv_descs(struct mlx5_ib_mr *mr) +static void mlx5_free_priv_descs(struct mlx5_ib_mr *mr) { - if (!mr->umem && mr->descs) { - struct ib_device *device = mr->ibmr.device; - int size = mr->max_descs * mr->desc_size; - struct mlx5_ib_dev *dev = to_mdev(device); + struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device); + int size = mr->max_descs * mr->desc_size; - dma_unmap_single(&dev->mdev->pdev->dev, mr->desc_map, size, - DMA_TO_DEVICE); - kfree(mr->descs_alloc); - mr->descs = NULL; - } + if (!mr->descs) + return; + + dma_unmap_single(&dev->mdev->pdev->dev, mr->desc_map, size, + DMA_TO_DEVICE); + kfree(mr->descs_alloc); + mr->descs = NULL; } int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) @@ -1992,7 +1991,8 @@ int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) if (mr->cache_ent) { mlx5_mr_cache_free(dev, mr); } else { - mlx5_free_priv_descs(mr); + if (!udata) + mlx5_free_priv_descs(mr); kfree(mr); } return 0; @@ -2079,7 +2079,6 @@ static struct mlx5_ib_mr *mlx5_ib_alloc_pi_mr(struct ib_pd *pd, if (err) goto err_free_in; - mr->umem = NULL; kfree(in); return mr; @@ -2206,7 +2205,6 @@ static struct ib_mr *__mlx5_ib_alloc_mr(struct ib_pd *pd, } mr->ibmr.device = pd->device; - mr->umem = NULL; switch (mr_type) { case IB_MR_TYPE_MEM_REG: From c4a6f9cd10bd8f84c601516b96b24e3eb6dcc86a Mon Sep 17 00:00:00 2001 From: Doug Ledford Date: Mon, 22 Nov 2021 16:22:19 -0500 Subject: [PATCH 071/868] Remove Doug Ledford from MAINTAINERS Moving on to other things Link: https://lore.kernel.org/r/12fe41e3d0a515e4fcf5c9e62ac88c39e09c1639.1637616139.git.dledford@redhat.com Signed-off-by: Doug Ledford Signed-off-by: Jason Gunthorpe --- MAINTAINERS | 1 - 1 file changed, 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 5250298d2817..4d4a7011ba6c 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9318,7 +9318,6 @@ S: Maintained F: drivers/iio/pressure/dps310.c INFINIBAND SUBSYSTEM -M: Doug Ledford M: Jason Gunthorpe L: linux-rdma@vger.kernel.org S: Supported From 52414e27d6b568120b087d1fbafbb4482b0ccaab Mon Sep 17 00:00:00 2001 From: Yangyang Li Date: Tue, 23 Nov 2021 16:48:09 +0800 Subject: [PATCH 072/868] RDMA/hns: Do not halt commands during reset until later is_reset is used to indicate whether the hardware starts to reset. When hns_roce_hw_v2_reset_notify_down() is called, the hardware has not yet started to reset. If is_reset is set at this time, all mailbox operations of resource destroy actions will be intercepted by driver. When the driver cleans up resources, but the hardware is still accessed, the following errors will appear: arm-smmu-v3 arm-smmu-v3.2.auto: event 0x10 received: arm-smmu-v3 arm-smmu-v3.2.auto: 0x0000350100000010 arm-smmu-v3 arm-smmu-v3.2.auto: 0x000002088000003f arm-smmu-v3 arm-smmu-v3.2.auto: 0x00000000a50e0800 arm-smmu-v3 arm-smmu-v3.2.auto: 0x0000000000000000 arm-smmu-v3 arm-smmu-v3.2.auto: event 0x10 received: arm-smmu-v3 arm-smmu-v3.2.auto: 0x0000350100000010 arm-smmu-v3 arm-smmu-v3.2.auto: 0x000002088000043e arm-smmu-v3 arm-smmu-v3.2.auto: 0x00000000a50a0800 arm-smmu-v3 arm-smmu-v3.2.auto: 0x0000000000000000 arm-smmu-v3 arm-smmu-v3.2.auto: event 0x10 received: arm-smmu-v3 arm-smmu-v3.2.auto: 0x0000350100000010 arm-smmu-v3 arm-smmu-v3.2.auto: 0x0000020880000436 arm-smmu-v3 arm-smmu-v3.2.auto: 0x00000000a50a0880 arm-smmu-v3 arm-smmu-v3.2.auto: 0x0000000000000000 arm-smmu-v3 arm-smmu-v3.2.auto: event 0x10 received: arm-smmu-v3 arm-smmu-v3.2.auto: 0x0000350100000010 arm-smmu-v3 arm-smmu-v3.2.auto: 0x000002088000043a arm-smmu-v3 arm-smmu-v3.2.auto: 0x00000000a50e0840 hns3 0000:35:00.0: INT status: CMDQ(0x0) HW errors(0x0) other(0x0) arm-smmu-v3 arm-smmu-v3.2.auto: 0x0000000000000000 hns3 0000:35:00.0: received unknown or unhandled event of vector0 arm-smmu-v3 arm-smmu-v3.2.auto: event 0x10 received: arm-smmu-v3 arm-smmu-v3.2.auto: 0x0000350100000010 {34}[Hardware Error]: Hardware error from APEI Generic Hardware Error Source: 7 is_reset will be set correctly in check_aedev_reset_status(), so the setting in hns_roce_hw_v2_reset_notify_down() should be deleted. Fixes: 726be12f5ca0 ("RDMA/hns: Set reset flag when hw resetting") Link: https://lore.kernel.org/r/20211123084809.37318-1-liangwenpeng@huawei.com Signed-off-by: Yangyang Li Signed-off-by: Wenpeng Liang Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 9bfbaddd1763..ae14329c619c 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -6387,10 +6387,8 @@ static int hns_roce_hw_v2_reset_notify_down(struct hnae3_handle *handle) if (!hr_dev) return 0; - hr_dev->is_reset = true; hr_dev->active = false; hr_dev->dis_db = true; - hr_dev->state = HNS_ROCE_DEVICE_STATE_RST_DOWN; return 0; From b0969f83890bf8b47f5c8bd42539599b2b52fdeb Mon Sep 17 00:00:00 2001 From: Yangyang Li Date: Tue, 23 Nov 2021 22:24:02 +0800 Subject: [PATCH 073/868] RDMA/hns: Do not destroy QP resources in the hw resetting phase When hns_roce_v2_destroy_qp() is called, the brief calling process of the driver is as follows: ...... hns_roce_v2_destroy_qp hns_roce_v2_qp_modify hns_roce_cmd_mbox hns_roce_qp_destroy If hns_roce_cmd_mbox() detects that the hardware is being reset during the execution of the hns_roce_cmd_mbox(), the driver will not be able to get the return value from the hardware (the firmware cannot respond to the driver's mailbox during the hardware reset phase). The driver needs to wait for the hardware reset to complete before continuing to execute hns_roce_qp_destroy(), otherwise it may happen that the driver releases the resources but the hardware is still accessing. In order to fix this problem, HNS RoCE needs to add a piece of code to wait for the hardware reset to complete. The original interface get_hw_reset_stat() is the instantaneous state of the hardware reset, which cannot accurately reflect whether the hardware reset is completed, so it needs to be replaced with the ae_dev_reset_cnt interface. The sign that the hardware reset is complete is that the return value of the ae_dev_reset_cnt interface is greater than the original value reset_cnt recorded by the driver. Fixes: 6a04aed6afae ("RDMA/hns: Fix the chip hanging caused by sending mailbox&CMQ during reset") Link: https://lore.kernel.org/r/20211123142402.26936-1-liangwenpeng@huawei.com Signed-off-by: Yangyang Li Signed-off-by: Wenpeng Liang Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index ae14329c619c..bbfa1332dedc 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #include #include @@ -1050,9 +1051,14 @@ static u32 hns_roce_v2_cmd_hw_resetting(struct hns_roce_dev *hr_dev, unsigned long instance_stage, unsigned long reset_stage) { +#define HW_RESET_TIMEOUT_US 1000000 +#define HW_RESET_SLEEP_US 1000 + struct hns_roce_v2_priv *priv = hr_dev->priv; struct hnae3_handle *handle = priv->handle; const struct hnae3_ae_ops *ops = handle->ae_algo->ops; + unsigned long val; + int ret; /* When hardware reset is detected, we should stop sending mailbox&cmq& * doorbell to hardware. If now in .init_instance() function, we should @@ -1064,7 +1070,11 @@ static u32 hns_roce_v2_cmd_hw_resetting(struct hns_roce_dev *hr_dev, * again. */ hr_dev->dis_db = true; - if (!ops->get_hw_reset_stat(handle)) + + ret = read_poll_timeout(ops->ae_dev_reset_cnt, val, + val > hr_dev->reset_cnt, HW_RESET_SLEEP_US, + HW_RESET_TIMEOUT_US, false, handle); + if (!ret) hr_dev->is_reset = true; if (!hr_dev->is_reset || reset_stage == HNS_ROCE_STATE_RST_INIT || From 8979ead988d20887e563b77b16792594f76aa07a Mon Sep 17 00:00:00 2001 From: Janne Grunau Date: Mon, 22 Nov 2021 23:24:38 +0100 Subject: [PATCH 074/868] arm64: dts: apple: change ethernet0 device type to ethernet Fixes make dtbs_check errors for t8103-j274.dts due to missing pci properties. Fixes: e1bebf978151 ("arm64: dts: apple: j274: Expose PCI node for the Ethernet MAC address") Reviewed-by: Mark Kettenis Signed-off-by: Janne Grunau Tested-by: Hector Martin Signed-off-by: Hector Martin --- arch/arm64/boot/dts/apple/t8103-j274.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/apple/t8103-j274.dts b/arch/arm64/boot/dts/apple/t8103-j274.dts index 33a80f9501dc..02c36301e985 100644 --- a/arch/arm64/boot/dts/apple/t8103-j274.dts +++ b/arch/arm64/boot/dts/apple/t8103-j274.dts @@ -60,7 +60,7 @@ &port02 { bus-range = <3 3>; - ethernet0: pci@0,0 { + ethernet0: ethernet@0,0 { reg = <0x30000 0x0 0x0 0x0 0x0>; /* To be filled by the loader */ local-mac-address = [00 10 18 00 00 00]; From 48c06708e63e71b4395e4159797366aa03be10ff Mon Sep 17 00:00:00 2001 From: Maxime Bizon Date: Thu, 18 Nov 2021 12:58:24 +0100 Subject: [PATCH 075/868] mac80211: fix TCP performance on mesh interface sta is NULL for mesh point (resolved later), so sk pacing parameters were not applied. Signed-off-by: Maxime Bizon Link: https://lore.kernel.org/r/66f51659416ac35d6b11a313bd3ffe8b8a43dd55.camel@freebox.fr Signed-off-by: Johannes Berg --- net/mac80211/tx.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 278945e3e08a..51ec5f9bc2e0 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -4191,11 +4191,11 @@ void __ieee80211_subif_start_xmit(struct sk_buff *skb, ieee80211_aggr_check(sdata, sta, skb); + sk_pacing_shift_update(skb->sk, sdata->local->hw.tx_sk_pacing_shift); + if (sta) { struct ieee80211_fast_tx *fast_tx; - sk_pacing_shift_update(skb->sk, sdata->local->hw.tx_sk_pacing_shift); - fast_tx = rcu_dereference(sta->fast_tx); if (fast_tx && From d5e568c3a4ec2ddd23e7dc5ad5b0c64e4f22981a Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 22 Nov 2021 12:47:40 +0100 Subject: [PATCH 076/868] mac80211: track only QoS data frames for admission control For admission control, obviously all of that only works for QoS data frames, otherwise we cannot even access the QoS field in the header. Syzbot reported (see below) an uninitialized value here due to a status of a non-QoS nullfunc packet, which isn't even long enough to contain the QoS header. Fix this to only do anything for QoS data packets. Reported-by: syzbot+614e82b88a1a4973e534@syzkaller.appspotmail.com Fixes: 02219b3abca5 ("mac80211: add WMM admission control support") Link: https://lore.kernel.org/r/20211122124737.dad29e65902a.Ieb04587afacb27c14e0de93ec1bfbefb238cc2a0@changeid Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 54ab0e1ef6ca..37f7d975f3da 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -2452,11 +2452,18 @@ static void ieee80211_sta_tx_wmm_ac_notify(struct ieee80211_sub_if_data *sdata, u16 tx_time) { struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; - u16 tid = ieee80211_get_tid(hdr); - int ac = ieee80211_ac_from_tid(tid); - struct ieee80211_sta_tx_tspec *tx_tspec = &ifmgd->tx_tspec[ac]; + u16 tid; + int ac; + struct ieee80211_sta_tx_tspec *tx_tspec; unsigned long now = jiffies; + if (!ieee80211_is_data_qos(hdr->frame_control)) + return; + + tid = ieee80211_get_tid(hdr); + ac = ieee80211_ac_from_tid(tid); + tx_tspec = &ifmgd->tx_tspec[ac]; + if (likely(!tx_tspec->admitted_time)) return; From 18688c80ad8a8dd50523dc9276e929932cac86d4 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Mon, 22 Nov 2021 21:43:23 +0100 Subject: [PATCH 077/868] mac80211: fix rate control for retransmitted frames Since retransmission clears info->control, rate control needs to be called again, otherwise the driver might crash due to invalid rates. Cc: stable@vger.kernel.org # 5.14+ Reported-by: Aaro Koskinen Reported-by: Robert W Fixes: 03c3911d2d67 ("mac80211: call ieee80211_tx_h_rate_ctrl() when dequeue") Signed-off-by: Felix Fietkau Tested-by: Aaro Koskinen Link: https://lore.kernel.org/r/20211122204323.9787-1-nbd@nbd.name Signed-off-by: Johannes Berg --- net/mac80211/tx.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 51ec5f9bc2e0..86a54df3aabd 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -1822,15 +1822,15 @@ static int invoke_tx_handlers_late(struct ieee80211_tx_data *tx) struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb); ieee80211_tx_result res = TX_CONTINUE; + if (!ieee80211_hw_check(&tx->local->hw, HAS_RATE_CONTROL)) + CALL_TXH(ieee80211_tx_h_rate_ctrl); + if (unlikely(info->flags & IEEE80211_TX_INTFL_RETRANSMISSION)) { __skb_queue_tail(&tx->skbs, tx->skb); tx->skb = NULL; goto txh_done; } - if (!ieee80211_hw_check(&tx->local->hw, HAS_RATE_CONTROL)) - CALL_TXH(ieee80211_tx_h_rate_ctrl); - CALL_TXH(ieee80211_tx_h_michael_mic_add); CALL_TXH(ieee80211_tx_h_sequence); CALL_TXH(ieee80211_tx_h_fragment); From 73111efacd3c6d9e644acca1d132566932be8af0 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Wed, 24 Nov 2021 10:40:24 +0100 Subject: [PATCH 078/868] mac80211: fix regression in SSN handling of addba tx Some drivers that do their own sequence number allocation (e.g. ath9k) rely on being able to modify params->ssn on starting tx ampdu sessions. This was broken by a change that modified it to use sta->tid_seq[tid] instead. Cc: stable@vger.kernel.org Fixes: 31d8bb4e07f8 ("mac80211: agg-tx: refactor sending addba") Reported-by: Eneas U de Queiroz Signed-off-by: Felix Fietkau Link: https://lore.kernel.org/r/20211124094024.43222-1-nbd@nbd.name Signed-off-by: Johannes Berg --- net/mac80211/agg-tx.c | 4 ++-- net/mac80211/sta_info.h | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c index 430a58587538..c1558dd2d244 100644 --- a/net/mac80211/agg-tx.c +++ b/net/mac80211/agg-tx.c @@ -480,8 +480,7 @@ static void ieee80211_send_addba_with_timeout(struct sta_info *sta, /* send AddBA request */ ieee80211_send_addba_request(sdata, sta->sta.addr, tid, - tid_tx->dialog_token, - sta->tid_seq[tid] >> 4, + tid_tx->dialog_token, tid_tx->ssn, buf_size, tid_tx->timeout); WARN_ON(test_and_set_bit(HT_AGG_STATE_SENT_ADDBA, &tid_tx->state)); @@ -523,6 +522,7 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid) params.ssn = sta->tid_seq[tid] >> 4; ret = drv_ampdu_action(local, sdata, ¶ms); + tid_tx->ssn = params.ssn; if (ret == IEEE80211_AMPDU_TX_START_DELAY_ADDBA) { return; } else if (ret == IEEE80211_AMPDU_TX_START_IMMEDIATE) { diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index ba2796782008..e7443fc4669c 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -199,6 +199,7 @@ struct tid_ampdu_tx { u8 stop_initiator; bool tx_stop; u16 buf_size; + u16 ssn; u16 failed_bar_ssn; bool bar_pending; From 942bd1070c3a39d1302fc5db73d60c86e3033c81 Mon Sep 17 00:00:00 2001 From: Xing Song Date: Tue, 23 Nov 2021 11:31:23 +0800 Subject: [PATCH 079/868] mac80211: set up the fwd_skb->dev for mesh forwarding Mesh forwarding requires that the fwd_skb->dev is set up for TX handling, otherwise the following warning will be generated, so set it up for the pending frames. [ 72.835674 ] WARNING: CPU: 0 PID: 1193 at __skb_flow_dissect+0x284/0x1298 [ 72.842379 ] Modules linked in: ksmbd pppoe ppp_async l2tp_ppp ... [ 72.962020 ] CPU: 0 PID: 1193 Comm: kworker/u5:1 Tainted: P S 5.4.137 #0 [ 72.969938 ] Hardware name: MT7622_MT7531 RFB (DT) [ 72.974659 ] Workqueue: napi_workq napi_workfn [ 72.979025 ] pstate: 60000005 (nZCv daif -PAN -UAO) [ 72.983822 ] pc : __skb_flow_dissect+0x284/0x1298 [ 72.988444 ] lr : __skb_flow_dissect+0x54/0x1298 [ 72.992977 ] sp : ffffffc010c738c0 [ 72.996293 ] x29: ffffffc010c738c0 x28: 0000000000000000 [ 73.001615 ] x27: 000000000000ffc2 x26: ffffff800c2eb818 [ 73.006937 ] x25: ffffffc010a987c8 x24: 00000000000000ce [ 73.012259 ] x23: ffffffc010c73a28 x22: ffffffc010a99c60 [ 73.017581 ] x21: 000000000000ffc2 x20: ffffff80094da800 [ 73.022903 ] x19: 0000000000000000 x18: 0000000000000014 [ 73.028226 ] x17: 00000000084d16af x16: 00000000d1fc0bab [ 73.033548 ] x15: 00000000715f6034 x14: 000000009dbdd301 [ 73.038870 ] x13: 00000000ea4dcbc3 x12: 0000000000000040 [ 73.044192 ] x11: 000000000eb00ff0 x10: 0000000000000000 [ 73.049513 ] x9 : 000000000eb00073 x8 : 0000000000000088 [ 73.054834 ] x7 : 0000000000000000 x6 : 0000000000000001 [ 73.060155 ] x5 : 0000000000000000 x4 : 0000000000000000 [ 73.065476 ] x3 : ffffffc010a98000 x2 : 0000000000000000 [ 73.070797 ] x1 : 0000000000000000 x0 : 0000000000000000 [ 73.076120 ] Call trace: [ 73.078572 ] __skb_flow_dissect+0x284/0x1298 [ 73.082846 ] __skb_get_hash+0x7c/0x228 [ 73.086629 ] ieee80211_txq_may_transmit+0x7fc/0x17b8 [mac80211] [ 73.092564 ] ieee80211_tx_prepare_skb+0x20c/0x268 [mac80211] [ 73.098238 ] ieee80211_tx_pending+0x144/0x330 [mac80211] [ 73.103560 ] tasklet_action_common.isra.16+0xb4/0x158 [ 73.108618 ] tasklet_action+0x2c/0x38 [ 73.112286 ] __do_softirq+0x168/0x3b0 [ 73.115954 ] do_softirq.part.15+0x88/0x98 [ 73.119969 ] __local_bh_enable_ip+0xb0/0xb8 [ 73.124156 ] napi_workfn+0x58/0x90 [ 73.127565 ] process_one_work+0x20c/0x478 [ 73.131579 ] worker_thread+0x50/0x4f0 [ 73.135249 ] kthread+0x124/0x128 [ 73.138484 ] ret_from_fork+0x10/0x1c Signed-off-by: Xing Song Tested-By: Frank Wunderlich Link: https://lore.kernel.org/r/20211123033123.2684-1-xing.song@mediatek.com Signed-off-by: Johannes Berg --- net/mac80211/rx.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 9541a4c30aca..0544563ede52 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -2944,6 +2944,7 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx) if (!fwd_skb) goto out; + fwd_skb->dev = sdata->dev; fwd_hdr = (struct ieee80211_hdr *) fwd_skb->data; fwd_hdr->frame_control &= ~cpu_to_le16(IEEE80211_FCTL_RETRY); info = IEEE80211_SKB_CB(fwd_skb); From 8f9dcc29566626f683843ccac6113a12208315ca Mon Sep 17 00:00:00 2001 From: Ahmed Zaki Date: Sat, 2 Oct 2021 08:53:29 -0600 Subject: [PATCH 080/868] mac80211: fix a memory leak where sta_info is not freed The following is from a system that went OOM due to a memory leak: wlan0: Allocated STA 74:83:c2:64:0b:87 wlan0: Allocated STA 74:83:c2:64:0b:87 wlan0: IBSS finish 74:83:c2:64:0b:87 (---from ieee80211_ibss_add_sta) wlan0: Adding new IBSS station 74:83:c2:64:0b:87 wlan0: moving STA 74:83:c2:64:0b:87 to state 2 wlan0: moving STA 74:83:c2:64:0b:87 to state 3 wlan0: Inserted STA 74:83:c2:64:0b:87 wlan0: IBSS finish 74:83:c2:64:0b:87 (---from ieee80211_ibss_work) wlan0: Adding new IBSS station 74:83:c2:64:0b:87 wlan0: moving STA 74:83:c2:64:0b:87 to state 2 wlan0: moving STA 74:83:c2:64:0b:87 to state 3 . . wlan0: expiring inactive not authorized STA 74:83:c2:64:0b:87 wlan0: moving STA 74:83:c2:64:0b:87 to state 2 wlan0: moving STA 74:83:c2:64:0b:87 to state 1 wlan0: Removed STA 74:83:c2:64:0b:87 wlan0: Destroyed STA 74:83:c2:64:0b:87 The ieee80211_ibss_finish_sta() is called twice on the same STA from 2 different locations. On the second attempt, the allocated STA is not destroyed creating a kernel memory leak. This is happening because sta_info_insert_finish() does not call sta_info_free() the second time when the STA already exists (returns -EEXIST). Note that the caller sta_info_insert_rcu() assumes STA is destroyed upon errors. Same fix is applied to -ENOMEM. Signed-off-by: Ahmed Zaki Link: https://lore.kernel.org/r/20211002145329.3125293-1-anzaki@gmail.com [change the error path label to use the existing code] Signed-off-by: Johannes Berg --- net/mac80211/sta_info.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 51b49f0d3ad4..840ad1a860fa 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -644,13 +644,13 @@ static int sta_info_insert_finish(struct sta_info *sta) __acquires(RCU) /* check if STA exists already */ if (sta_info_get_bss(sdata, sta->sta.addr)) { err = -EEXIST; - goto out_err; + goto out_cleanup; } sinfo = kzalloc(sizeof(struct station_info), GFP_KERNEL); if (!sinfo) { err = -ENOMEM; - goto out_err; + goto out_cleanup; } local->num_sta++; @@ -706,8 +706,8 @@ static int sta_info_insert_finish(struct sta_info *sta) __acquires(RCU) out_drop_sta: local->num_sta--; synchronize_net(); + out_cleanup: cleanup_single_sta(sta); - out_err: mutex_unlock(&local->sta_mtx); kfree(sinfo); rcu_read_lock(); From 70408f755f589f67957b9ec6852e6b01f858d0a2 Mon Sep 17 00:00:00 2001 From: Sameer Pujar Date: Tue, 23 Nov 2021 19:37:34 +0530 Subject: [PATCH 081/868] ASoC: tegra: Balance runtime PM count After successful application of volume/mute settings via mixer control put calls, the control returns without balancing the runtime PM count. This makes device to be always runtime active. Fix this by allowing control to reach pm_runtime_put() call. Fixes: e539891f9687 ("ASoC: tegra: Add Tegra210 based MVC driver") Cc: stable@vger.kernel.org Signed-off-by: Sameer Pujar Link: https://lore.kernel.org/r/1637676459-31191-2-git-send-email-spujar@nvidia.com Signed-off-by: Mark Brown --- sound/soc/tegra/tegra210_mvc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/tegra/tegra210_mvc.c b/sound/soc/tegra/tegra210_mvc.c index 85b155887ec2..436bb151fed1 100644 --- a/sound/soc/tegra/tegra210_mvc.c +++ b/sound/soc/tegra/tegra210_mvc.c @@ -164,7 +164,7 @@ static int tegra210_mvc_put_mute(struct snd_kcontrol *kcontrol, if (err < 0) goto end; - return 1; + err = 1; end: pm_runtime_put(cmpnt->dev); @@ -236,7 +236,7 @@ static int tegra210_mvc_put_vol(struct snd_kcontrol *kcontrol, TEGRA210_MVC_VOLUME_SWITCH_MASK, TEGRA210_MVC_VOLUME_SWITCH_TRIGGER); - return 1; + err = 1; end: pm_runtime_put(cmpnt->dev); From af120d07bbb0721708b10204beed66ed2cb0cb62 Mon Sep 17 00:00:00 2001 From: Sameer Pujar Date: Tue, 23 Nov 2021 19:37:35 +0530 Subject: [PATCH 082/868] ASoC: tegra: Use normal system sleep for SFC The driver currently subscribes for a late system sleep call. The initcall_debug log shows that suspend call for SFC device happens after the parent device (AHUB). This seems to cause suspend failure on Jetson TX2 platform. Also there is no use of having late system sleep specifically for SFC device. Fix the order by using normal system sleep. Fixes: b2f74ec53a6c ("ASoC: tegra: Add Tegra210 based SFC driver") Cc: stable@vger.kernel.org Signed-off-by: Sameer Pujar Link: https://lore.kernel.org/r/1637676459-31191-3-git-send-email-spujar@nvidia.com Signed-off-by: Mark Brown --- sound/soc/tegra/tegra210_sfc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/tegra/tegra210_sfc.c b/sound/soc/tegra/tegra210_sfc.c index 7a2227ed3df6..368f077e7bee 100644 --- a/sound/soc/tegra/tegra210_sfc.c +++ b/sound/soc/tegra/tegra210_sfc.c @@ -3594,8 +3594,8 @@ static int tegra210_sfc_platform_remove(struct platform_device *pdev) static const struct dev_pm_ops tegra210_sfc_pm_ops = { SET_RUNTIME_PM_OPS(tegra210_sfc_runtime_suspend, tegra210_sfc_runtime_resume, NULL) - SET_LATE_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, - pm_runtime_force_resume) + SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, + pm_runtime_force_resume) }; static struct platform_driver tegra210_sfc_driver = { From c83d263a89f30d1c0274827c475f3583cf8e477f Mon Sep 17 00:00:00 2001 From: Sameer Pujar Date: Tue, 23 Nov 2021 19:37:36 +0530 Subject: [PATCH 083/868] ASoC: tegra: Use normal system sleep for MVC The driver currently subscribes for a late system sleep call. The initcall_debug log shows that suspend call for MVC device happens after the parent device (AHUB). This seems to cause suspend failure on Jetson TX2 platform. Also there is no use of having late system sleep specifically for MVC device. Fix the order by using normal system sleep. Fixes: e539891f9687 ("ASoC: tegra: Add Tegra210 based MVC driver") Cc: stable@vger.kernel.org Signed-off-by: Sameer Pujar Link: https://lore.kernel.org/r/1637676459-31191-4-git-send-email-spujar@nvidia.com Signed-off-by: Mark Brown --- sound/soc/tegra/tegra210_mvc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/tegra/tegra210_mvc.c b/sound/soc/tegra/tegra210_mvc.c index 436bb151fed1..acf59328dcb6 100644 --- a/sound/soc/tegra/tegra210_mvc.c +++ b/sound/soc/tegra/tegra210_mvc.c @@ -639,8 +639,8 @@ static int tegra210_mvc_platform_remove(struct platform_device *pdev) static const struct dev_pm_ops tegra210_mvc_pm_ops = { SET_RUNTIME_PM_OPS(tegra210_mvc_runtime_suspend, tegra210_mvc_runtime_resume, NULL) - SET_LATE_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, - pm_runtime_force_resume) + SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, + pm_runtime_force_resume) }; static struct platform_driver tegra210_mvc_driver = { From b78400e41653b3a752a4cd17d2fcbd4a96bb4bc2 Mon Sep 17 00:00:00 2001 From: Sameer Pujar Date: Tue, 23 Nov 2021 19:37:37 +0530 Subject: [PATCH 084/868] ASoC: tegra: Use normal system sleep for Mixer The driver currently subscribes for a late system sleep call. The initcall_debug log shows that suspend call for Mixer device happens after the parent device (AHUB). This seems to cause suspend failure on Jetson TX2 platform. Also there is no use of having late system sleep specifically for Mixer device. Fix the order by using normal system sleep. Fixes: 05bb3d5ec64a ("ASoC: tegra: Add Tegra210 based Mixer driver") Cc: stable@vger.kernel.org Signed-off-by: Sameer Pujar Link: https://lore.kernel.org/r/1637676459-31191-5-git-send-email-spujar@nvidia.com Signed-off-by: Mark Brown --- sound/soc/tegra/tegra210_mixer.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/tegra/tegra210_mixer.c b/sound/soc/tegra/tegra210_mixer.c index 51d375573cfa..16e679a95658 100644 --- a/sound/soc/tegra/tegra210_mixer.c +++ b/sound/soc/tegra/tegra210_mixer.c @@ -666,8 +666,8 @@ static int tegra210_mixer_platform_remove(struct platform_device *pdev) static const struct dev_pm_ops tegra210_mixer_pm_ops = { SET_RUNTIME_PM_OPS(tegra210_mixer_runtime_suspend, tegra210_mixer_runtime_resume, NULL) - SET_LATE_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, - pm_runtime_force_resume) + SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, + pm_runtime_force_resume) }; static struct platform_driver tegra210_mixer_driver = { From 638c31d542a576714a52bb6a9a7dedff98e32a1d Mon Sep 17 00:00:00 2001 From: Sameer Pujar Date: Tue, 23 Nov 2021 19:37:38 +0530 Subject: [PATCH 085/868] ASoC: tegra: Use normal system sleep for AMX The driver currently subscribes for a late system sleep call. The initcall_debug log shows that suspend call for AMX device happens after the parent device (AHUB). This seems to cause suspend failure on Jetson TX2 platform. Also there is no use of having late system sleep specifically for AMX device. Fix the order by using normal system sleep. Fixes: 77f7df346c45 ("ASoC: tegra: Add Tegra210 based AMX driver") Cc: stable@vger.kernel.org Signed-off-by: Sameer Pujar Link: https://lore.kernel.org/r/1637676459-31191-6-git-send-email-spujar@nvidia.com Signed-off-by: Mark Brown --- sound/soc/tegra/tegra210_amx.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/tegra/tegra210_amx.c b/sound/soc/tegra/tegra210_amx.c index 689576302ede..d064cc67fea6 100644 --- a/sound/soc/tegra/tegra210_amx.c +++ b/sound/soc/tegra/tegra210_amx.c @@ -583,8 +583,8 @@ static int tegra210_amx_platform_remove(struct platform_device *pdev) static const struct dev_pm_ops tegra210_amx_pm_ops = { SET_RUNTIME_PM_OPS(tegra210_amx_runtime_suspend, tegra210_amx_runtime_resume, NULL) - SET_LATE_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, - pm_runtime_force_resume) + SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, + pm_runtime_force_resume) }; static struct platform_driver tegra210_amx_driver = { From cf36de4fc5ce5502ce5070a793addd9d49df4113 Mon Sep 17 00:00:00 2001 From: Sameer Pujar Date: Tue, 23 Nov 2021 19:37:39 +0530 Subject: [PATCH 086/868] ASoC: tegra: Use normal system sleep for ADX The driver currently subscribes for a late system sleep call. The initcall_debug log shows that suspend call for ADX device happens after the parent device (AHUB). This seems to cause suspend failure on Jetson TX2 platform. Also there is no use of having late system sleep specifically for ADX device. Fix the order by using normal system sleep. Fixes: a99ab6f395a9 ("ASoC: tegra: Add Tegra210 based ADX driver") Cc: stable@vger.kernel.org Signed-off-by: Sameer Pujar Link: https://lore.kernel.org/r/1637676459-31191-7-git-send-email-spujar@nvidia.com Signed-off-by: Mark Brown --- sound/soc/tegra/tegra210_adx.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/tegra/tegra210_adx.c b/sound/soc/tegra/tegra210_adx.c index 933c4503fe50..3785cade2d9a 100644 --- a/sound/soc/tegra/tegra210_adx.c +++ b/sound/soc/tegra/tegra210_adx.c @@ -514,8 +514,8 @@ static int tegra210_adx_platform_remove(struct platform_device *pdev) static const struct dev_pm_ops tegra210_adx_pm_ops = { SET_RUNTIME_PM_OPS(tegra210_adx_runtime_suspend, tegra210_adx_runtime_resume, NULL) - SET_LATE_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, - pm_runtime_force_resume) + SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, + pm_runtime_force_resume) }; static struct platform_driver tegra210_adx_driver = { From 4999d703c0e66f9f196b6edc0b8fdeca8846b8b6 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Wed, 17 Nov 2021 17:04:52 -0800 Subject: [PATCH 087/868] ASoC: rt5682: Fix crash due to out of scope stack vars Move the declaration of temporary arrays to somewhere that won't go out of scope before the devm_clk_hw_register() call, lest we be at the whim of the compiler for whether those stack variables get overwritten. Fixes a crash seen with gcc version 11.2.1 20210728 (Red Hat 11.2.1-1) Fixes: edbd24ea1e5c ("ASoC: rt5682: Drop usage of __clk_get_name()") Signed-off-by: Rob Clark Reviewed-by: Stephen Boyd Link: https://lore.kernel.org/r/20211118010453.843286-1-robdclark@gmail.com Signed-off-by: Mark Brown --- sound/soc/codecs/rt5682.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/sound/soc/codecs/rt5682.c b/sound/soc/codecs/rt5682.c index 04cb747c2b12..5224123d0d3b 100644 --- a/sound/soc/codecs/rt5682.c +++ b/sound/soc/codecs/rt5682.c @@ -2858,6 +2858,8 @@ int rt5682_register_dai_clks(struct rt5682_priv *rt5682) for (i = 0; i < RT5682_DAI_NUM_CLKS; ++i) { struct clk_init_data init = { }; + struct clk_parent_data parent_data; + const struct clk_hw *parent; dai_clk_hw = &rt5682->dai_clks_hw[i]; @@ -2865,17 +2867,17 @@ int rt5682_register_dai_clks(struct rt5682_priv *rt5682) case RT5682_DAI_WCLK_IDX: /* Make MCLK the parent of WCLK */ if (rt5682->mclk) { - init.parent_data = &(struct clk_parent_data){ + parent_data = (struct clk_parent_data){ .fw_name = "mclk", }; + init.parent_data = &parent_data; init.num_parents = 1; } break; case RT5682_DAI_BCLK_IDX: /* Make WCLK the parent of BCLK */ - init.parent_hws = &(const struct clk_hw *){ - &rt5682->dai_clks_hw[RT5682_DAI_WCLK_IDX] - }; + parent = &rt5682->dai_clks_hw[RT5682_DAI_WCLK_IDX]; + init.parent_hws = &parent; init.num_parents = 1; break; default: From 750dc2f622192c08664a15413bc9746d9cbc4361 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Wed, 17 Nov 2021 17:04:53 -0800 Subject: [PATCH 088/868] ASoC: rt5682s: Fix crash due to out of scope stack vars Move the declaration of temporary arrays to somewhere that won't go out of scope before the devm_clk_hw_register() call, lest we be at the whim of the compiler for whether those stack variables get overwritten. Fixes a crash seen with gcc version 11.2.1 20210728 (Red Hat 11.2.1-1) Fixes: bdd229ab26be ("ASoC: rt5682s: Add driver for ALC5682I-VS codec") Signed-off-by: Rob Clark Reviewed-by: Stephen Boyd Link: https://lore.kernel.org/r/20211118010453.843286-2-robdclark@gmail.com Signed-off-by: Mark Brown --- sound/soc/codecs/rt5682s.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/sound/soc/codecs/rt5682s.c b/sound/soc/codecs/rt5682s.c index 470957fcad6b..d49a4f68566d 100644 --- a/sound/soc/codecs/rt5682s.c +++ b/sound/soc/codecs/rt5682s.c @@ -2693,6 +2693,8 @@ static int rt5682s_register_dai_clks(struct snd_soc_component *component) for (i = 0; i < RT5682S_DAI_NUM_CLKS; ++i) { struct clk_init_data init = { }; + struct clk_parent_data parent_data; + const struct clk_hw *parent; dai_clk_hw = &rt5682s->dai_clks_hw[i]; @@ -2700,17 +2702,17 @@ static int rt5682s_register_dai_clks(struct snd_soc_component *component) case RT5682S_DAI_WCLK_IDX: /* Make MCLK the parent of WCLK */ if (rt5682s->mclk) { - init.parent_data = &(struct clk_parent_data){ + parent_data = (struct clk_parent_data){ .fw_name = "mclk", }; + init.parent_data = &parent_data; init.num_parents = 1; } break; case RT5682S_DAI_BCLK_IDX: /* Make WCLK the parent of BCLK */ - init.parent_hws = &(const struct clk_hw *){ - &rt5682s->dai_clks_hw[RT5682S_DAI_WCLK_IDX] - }; + parent = &rt5682s->dai_clks_hw[RT5682S_DAI_WCLK_IDX]; + init.parent_hws = &parent; init.num_parents = 1; break; default: From 784b470728f5ae44f245338e4660144d46dc0876 Mon Sep 17 00:00:00 2001 From: Olivier Moysan Date: Mon, 22 Nov 2021 15:38:09 +0100 Subject: [PATCH 089/868] iio: adc: stm32: fix null pointer on defer_probe error dev_err_probe() calls __device_set_deferred_probe_reason() on -EPROBE_DEFER error. If device pointer to driver core private structure is not initialized, an null pointer error occurs. This pointer is set on iio_device_register() call for iio device. dev_err_probe() must be called with the device which is probing. Replace iio device by its parent device. Fixes: 0e346b2cfa85 ("iio: adc: stm32-adc: add vrefint calibration support") Signed-off-by: Olivier Moysan Reviewed-by: Ahmad Fatoum Link: https://lore.kernel.org/r/20211122143809.2332-1-olivier.moysan@foss.st.com Signed-off-by: Jonathan Cameron --- drivers/iio/adc/stm32-adc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/adc/stm32-adc.c b/drivers/iio/adc/stm32-adc.c index 60f2ccf7e342..8cd258cb2682 100644 --- a/drivers/iio/adc/stm32-adc.c +++ b/drivers/iio/adc/stm32-adc.c @@ -1987,7 +1987,7 @@ static int stm32_adc_populate_int_ch(struct iio_dev *indio_dev, const char *ch_n /* Get calibration data for vrefint channel */ ret = nvmem_cell_read_u16(&indio_dev->dev, "vrefint", &vrefint); if (ret && ret != -ENOENT) { - return dev_err_probe(&indio_dev->dev, ret, + return dev_err_probe(indio_dev->dev.parent, ret, "nvmem access error\n"); } if (ret == -ENOENT) From 1dc2f2b81a6a9895da59f3915760f6c0c3074492 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 25 Nov 2021 18:33:16 -0800 Subject: [PATCH 090/868] hv: utils: add PTP_1588_CLOCK to Kconfig to fix build The hyperv utilities use PTP clock interfaces and should depend a a kconfig symbol such that they will be built as a loadable module or builtin so that linker errors do not happen. Prevents these build errors: ld: drivers/hv/hv_util.o: in function `hv_timesync_deinit': hv_util.c:(.text+0x37d): undefined reference to `ptp_clock_unregister' ld: drivers/hv/hv_util.o: in function `hv_timesync_init': hv_util.c:(.text+0x738): undefined reference to `ptp_clock_register' Fixes: 3716a49a81ba ("hv_utils: implement Hyper-V PTP source") Signed-off-by: Randy Dunlap Reported-by: kernel test robot Cc: Arnd Bergmann Cc: "K. Y. Srinivasan" Cc: Haiyang Zhang Cc: Stephen Hemminger Cc: Wei Liu Cc: Dexuan Cui Cc: linux-hyperv@vger.kernel.org Cc: Greg Kroah-Hartman Reviewed-by: Michael Kelley Link: https://lore.kernel.org/r/20211126023316.25184-1-rdunlap@infradead.org Signed-off-by: Wei Liu --- drivers/hv/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/hv/Kconfig b/drivers/hv/Kconfig index dd12af20e467..0747a8f1fcee 100644 --- a/drivers/hv/Kconfig +++ b/drivers/hv/Kconfig @@ -19,6 +19,7 @@ config HYPERV_TIMER config HYPERV_UTILS tristate "Microsoft Hyper-V Utilities driver" depends on HYPERV && CONNECTOR && NLS + depends on PTP_1588_CLOCK_OPTIONAL help Select this option to enable the Hyper-V Utilities. From 3dc709e518b47386e6af937eaec37bb36539edfd Mon Sep 17 00:00:00 2001 From: Xiaoming Ni Date: Fri, 26 Nov 2021 12:11:53 +0800 Subject: [PATCH 091/868] powerpc/85xx: Fix oops when CONFIG_FSL_PMC=n When CONFIG_FSL_PMC is set to n, no value is assigned to cpu_up_prepare in the mpc85xx_pm_ops structure. As a result, oops is triggered in smp_85xx_start_cpu(). smp: Bringing up secondary CPUs ... kernel tried to execute user page (0) - exploit attempt? (uid: 0) BUG: Unable to handle kernel instruction fetch (NULL pointer?) Faulting instruction address: 0x00000000 Oops: Kernel access of bad area, sig: 11 [#1] ... NIP [00000000] 0x0 LR [c0021d2c] smp_85xx_kick_cpu+0xe8/0x568 Call Trace: [c1051da8] [c0021cb8] smp_85xx_kick_cpu+0x74/0x568 (unreliable) [c1051de8] [c0011460] __cpu_up+0xc0/0x228 [c1051e18] [c0031bbc] bringup_cpu+0x30/0x224 [c1051e48] [c0031f3c] cpu_up.constprop.0+0x180/0x33c [c1051e88] [c00322e8] bringup_nonboot_cpus+0x88/0xc8 [c1051eb8] [c07e67bc] smp_init+0x30/0x78 [c1051ed8] [c07d9e28] kernel_init_freeable+0x118/0x2a8 [c1051f18] [c00032d8] kernel_init+0x14/0x124 [c1051f38] [c0010278] ret_from_kernel_thread+0x14/0x1c Fixes: c45361abb918 ("powerpc/85xx: fix timebase sync issue when CONFIG_HOTPLUG_CPU=n") Reported-by: Martin Kennedy Signed-off-by: Xiaoming Ni Tested-by: Martin Kennedy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20211126041153.16926-1-nixiaoming@huawei.com --- arch/powerpc/platforms/85xx/smp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/platforms/85xx/smp.c b/arch/powerpc/platforms/85xx/smp.c index 83f4a6389a28..d7081e9af65c 100644 --- a/arch/powerpc/platforms/85xx/smp.c +++ b/arch/powerpc/platforms/85xx/smp.c @@ -220,7 +220,7 @@ static int smp_85xx_start_cpu(int cpu) local_irq_save(flags); hard_irq_disable(); - if (qoriq_pm_ops) + if (qoriq_pm_ops && qoriq_pm_ops->cpu_up_prepare) qoriq_pm_ops->cpu_up_prepare(cpu); /* if cpu is not spinning, reset it */ @@ -292,7 +292,7 @@ static int smp_85xx_kick_cpu(int nr) booting_thread_hwid = cpu_thread_in_core(nr); primary = cpu_first_thread_sibling(nr); - if (qoriq_pm_ops) + if (qoriq_pm_ops && qoriq_pm_ops->cpu_up_prepare) qoriq_pm_ops->cpu_up_prepare(nr); /* From 9222ba68c3f4065f6364b99cc641b6b019ef2d42 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Sun, 28 Nov 2021 23:21:41 -0800 Subject: [PATCH 092/868] Input: i8042 - add deferred probe support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We've got a bug report about the non-working keyboard on ASUS ZenBook UX425UA. It seems that the PS/2 device isn't ready immediately at boot but takes some seconds to get ready. Until now, the only workaround is to defer the probe, but it's available only when the driver is a module. However, many distros, including openSUSE as in the original report, build the PS/2 input drivers into kernel, hence it won't work easily. This patch adds the support for the deferred probe for i8042 stuff as a workaround of the problem above. When the deferred probe mode is enabled and the device couldn't be probed, it'll be repeated with the standard deferred probe mechanism. The deferred probe mode is enabled either via the new option i8042.probe_defer or via the quirk table entry. As of this patch, the quirk table contains only ASUS ZenBook UX425UA. The deferred probe part is based on Fabio's initial work. BugLink: https://bugzilla.suse.com/show_bug.cgi?id=1190256 Signed-off-by: Takashi Iwai Tested-by: Samuel Čavoj Link: https://lore.kernel.org/r/20211117063757.11380-1-tiwai@suse.de Signed-off-by: Dmitry Torokhov --- .../admin-guide/kernel-parameters.txt | 2 + drivers/input/serio/i8042-x86ia64io.h | 14 +++++ drivers/input/serio/i8042.c | 54 ++++++++++++------- 3 files changed, 51 insertions(+), 19 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index cb89dbdedc46..ddfa50578a3e 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -1627,6 +1627,8 @@ architectures force reset to be always executed i8042.unlock [HW] Unlock (ignore) the keylock i8042.kbdreset [HW] Reset device connected to KBD port + i8042.probe_defer + [HW] Allow deferred probing upon i8042 probe errors i810= [HW,DRM] diff --git a/drivers/input/serio/i8042-x86ia64io.h b/drivers/input/serio/i8042-x86ia64io.h index aedd05541044..1acc7c844929 100644 --- a/drivers/input/serio/i8042-x86ia64io.h +++ b/drivers/input/serio/i8042-x86ia64io.h @@ -995,6 +995,17 @@ static const struct dmi_system_id __initconst i8042_dmi_kbdreset_table[] = { { } }; +static const struct dmi_system_id i8042_dmi_probe_defer_table[] __initconst = { + { + /* ASUS ZenBook UX425UA */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), + DMI_MATCH(DMI_PRODUCT_NAME, "ZenBook UX425UA"), + }, + }, + { } +}; + #endif /* CONFIG_X86 */ #ifdef CONFIG_PNP @@ -1315,6 +1326,9 @@ static int __init i8042_platform_init(void) if (dmi_check_system(i8042_dmi_kbdreset_table)) i8042_kbdreset = true; + if (dmi_check_system(i8042_dmi_probe_defer_table)) + i8042_probe_defer = true; + /* * A20 was already enabled during early kernel init. But some buggy * BIOSes (in MSI Laptops) require A20 to be enabled using 8042 to diff --git a/drivers/input/serio/i8042.c b/drivers/input/serio/i8042.c index 0b9f1d0a8f8b..3fc0a89cc785 100644 --- a/drivers/input/serio/i8042.c +++ b/drivers/input/serio/i8042.c @@ -45,6 +45,10 @@ static bool i8042_unlock; module_param_named(unlock, i8042_unlock, bool, 0); MODULE_PARM_DESC(unlock, "Ignore keyboard lock."); +static bool i8042_probe_defer; +module_param_named(probe_defer, i8042_probe_defer, bool, 0); +MODULE_PARM_DESC(probe_defer, "Allow deferred probing."); + enum i8042_controller_reset_mode { I8042_RESET_NEVER, I8042_RESET_ALWAYS, @@ -711,7 +715,7 @@ static int i8042_set_mux_mode(bool multiplex, unsigned char *mux_version) * LCS/Telegraphics. */ -static int __init i8042_check_mux(void) +static int i8042_check_mux(void) { unsigned char mux_version; @@ -740,10 +744,10 @@ static int __init i8042_check_mux(void) /* * The following is used to test AUX IRQ delivery. */ -static struct completion i8042_aux_irq_delivered __initdata; -static bool i8042_irq_being_tested __initdata; +static struct completion i8042_aux_irq_delivered; +static bool i8042_irq_being_tested; -static irqreturn_t __init i8042_aux_test_irq(int irq, void *dev_id) +static irqreturn_t i8042_aux_test_irq(int irq, void *dev_id) { unsigned long flags; unsigned char str, data; @@ -770,7 +774,7 @@ static irqreturn_t __init i8042_aux_test_irq(int irq, void *dev_id) * verifies success by readinng CTR. Used when testing for presence of AUX * port. */ -static int __init i8042_toggle_aux(bool on) +static int i8042_toggle_aux(bool on) { unsigned char param; int i; @@ -798,7 +802,7 @@ static int __init i8042_toggle_aux(bool on) * the presence of an AUX interface. */ -static int __init i8042_check_aux(void) +static int i8042_check_aux(void) { int retval = -1; bool irq_registered = false; @@ -1005,7 +1009,7 @@ static int i8042_controller_init(void) if (i8042_command(&ctr[n++ % 2], I8042_CMD_CTL_RCTR)) { pr_err("Can't read CTR while initializing i8042\n"); - return -EIO; + return i8042_probe_defer ? -EPROBE_DEFER : -EIO; } } while (n < 2 || ctr[0] != ctr[1]); @@ -1320,7 +1324,7 @@ static void i8042_shutdown(struct platform_device *dev) i8042_controller_reset(false); } -static int __init i8042_create_kbd_port(void) +static int i8042_create_kbd_port(void) { struct serio *serio; struct i8042_port *port = &i8042_ports[I8042_KBD_PORT_NO]; @@ -1349,7 +1353,7 @@ static int __init i8042_create_kbd_port(void) return 0; } -static int __init i8042_create_aux_port(int idx) +static int i8042_create_aux_port(int idx) { struct serio *serio; int port_no = idx < 0 ? I8042_AUX_PORT_NO : I8042_MUX_PORT_NO + idx; @@ -1386,13 +1390,13 @@ static int __init i8042_create_aux_port(int idx) return 0; } -static void __init i8042_free_kbd_port(void) +static void i8042_free_kbd_port(void) { kfree(i8042_ports[I8042_KBD_PORT_NO].serio); i8042_ports[I8042_KBD_PORT_NO].serio = NULL; } -static void __init i8042_free_aux_ports(void) +static void i8042_free_aux_ports(void) { int i; @@ -1402,7 +1406,7 @@ static void __init i8042_free_aux_ports(void) } } -static void __init i8042_register_ports(void) +static void i8042_register_ports(void) { int i; @@ -1443,7 +1447,7 @@ static void i8042_free_irqs(void) i8042_aux_irq_registered = i8042_kbd_irq_registered = false; } -static int __init i8042_setup_aux(void) +static int i8042_setup_aux(void) { int (*aux_enable)(void); int error; @@ -1485,7 +1489,7 @@ static int __init i8042_setup_aux(void) return error; } -static int __init i8042_setup_kbd(void) +static int i8042_setup_kbd(void) { int error; @@ -1535,7 +1539,7 @@ static int i8042_kbd_bind_notifier(struct notifier_block *nb, return 0; } -static int __init i8042_probe(struct platform_device *dev) +static int i8042_probe(struct platform_device *dev) { int error; @@ -1600,6 +1604,7 @@ static struct platform_driver i8042_driver = { .pm = &i8042_pm_ops, #endif }, + .probe = i8042_probe, .remove = i8042_remove, .shutdown = i8042_shutdown, }; @@ -1610,7 +1615,6 @@ static struct notifier_block i8042_kbd_bind_notifier_block = { static int __init i8042_init(void) { - struct platform_device *pdev; int err; dbg_init(); @@ -1626,17 +1630,29 @@ static int __init i8042_init(void) /* Set this before creating the dev to allow i8042_command to work right away */ i8042_present = true; - pdev = platform_create_bundle(&i8042_driver, i8042_probe, NULL, 0, NULL, 0); - if (IS_ERR(pdev)) { - err = PTR_ERR(pdev); + err = platform_driver_register(&i8042_driver); + if (err) goto err_platform_exit; + + i8042_platform_device = platform_device_alloc("i8042", -1); + if (!i8042_platform_device) { + err = -ENOMEM; + goto err_unregister_driver; } + err = platform_device_add(i8042_platform_device); + if (err) + goto err_free_device; + bus_register_notifier(&serio_bus, &i8042_kbd_bind_notifier_block); panic_blink = i8042_panic_blink; return 0; +err_free_device: + platform_device_put(i8042_platform_device); +err_unregister_driver: + platform_driver_unregister(&i8042_driver); err_platform_exit: i8042_platform_exit(); return err; From e1f5e848209a1b51ccae50721b27684c6f9d978f Mon Sep 17 00:00:00 2001 From: Jeff LaBundy Date: Sun, 28 Nov 2021 23:41:42 -0800 Subject: [PATCH 093/868] Input: iqs626a - prohibit inlining of channel parsing functions Some automated builds report a stack frame size in excess of 2 kB for iqs626_probe(); the culprit appears to be the call to iqs626_parse_prop(). To solve this problem, specify noinline_for_stack for all of the iqs626_parse_*() helper functions which are called inside a for loop within iqs626_parse_prop(). As a result, a build with '-Wframe-larger-than' as low as 512 is free of any such warnings. Reported-by: kernel test robot Signed-off-by: Jeff LaBundy Link: https://lore.kernel.org/r/20211129004104.453930-1-jeff@labundy.com Signed-off-by: Dmitry Torokhov --- drivers/input/misc/iqs626a.c | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/drivers/input/misc/iqs626a.c b/drivers/input/misc/iqs626a.c index d57e996732cf..23b5dd9552dc 100644 --- a/drivers/input/misc/iqs626a.c +++ b/drivers/input/misc/iqs626a.c @@ -456,9 +456,10 @@ struct iqs626_private { unsigned int suspend_mode; }; -static int iqs626_parse_events(struct iqs626_private *iqs626, - const struct fwnode_handle *ch_node, - enum iqs626_ch_id ch_id) +static noinline_for_stack int +iqs626_parse_events(struct iqs626_private *iqs626, + const struct fwnode_handle *ch_node, + enum iqs626_ch_id ch_id) { struct iqs626_sys_reg *sys_reg = &iqs626->sys_reg; struct i2c_client *client = iqs626->client; @@ -604,9 +605,10 @@ static int iqs626_parse_events(struct iqs626_private *iqs626, return 0; } -static int iqs626_parse_ati_target(struct iqs626_private *iqs626, - const struct fwnode_handle *ch_node, - enum iqs626_ch_id ch_id) +static noinline_for_stack int +iqs626_parse_ati_target(struct iqs626_private *iqs626, + const struct fwnode_handle *ch_node, + enum iqs626_ch_id ch_id) { struct iqs626_sys_reg *sys_reg = &iqs626->sys_reg; struct i2c_client *client = iqs626->client; @@ -885,9 +887,10 @@ static int iqs626_parse_trackpad(struct iqs626_private *iqs626, return 0; } -static int iqs626_parse_channel(struct iqs626_private *iqs626, - const struct fwnode_handle *ch_node, - enum iqs626_ch_id ch_id) +static noinline_for_stack int +iqs626_parse_channel(struct iqs626_private *iqs626, + const struct fwnode_handle *ch_node, + enum iqs626_ch_id ch_id) { struct iqs626_sys_reg *sys_reg = &iqs626->sys_reg; struct i2c_client *client = iqs626->client; From 1d72d9f960ccf1052a0630a68c3d358791dbdaaa Mon Sep 17 00:00:00 2001 From: Andrea Righi Date: Mon, 29 Nov 2021 00:08:13 -0800 Subject: [PATCH 094/868] Input: elantech - fix stack out of bound access in elantech_change_report_id() The array param[] in elantech_change_report_id() must be at least 3 bytes, because elantech_read_reg_params() is calling ps2_command() with PSMOUSE_CMD_GETINFO, that is going to access 3 bytes from param[], but it's defined in the stack as an array of 2 bytes, therefore we have a potential stack out-of-bounds access here, also confirmed by KASAN: [ 6.512374] BUG: KASAN: stack-out-of-bounds in __ps2_command+0x372/0x7e0 [ 6.512397] Read of size 1 at addr ffff8881024d77c2 by task kworker/2:1/118 [ 6.512416] CPU: 2 PID: 118 Comm: kworker/2:1 Not tainted 5.13.0-22-generic #22+arighi20211110 [ 6.512428] Hardware name: LENOVO 20T8000QGE/20T8000QGE, BIOS R1AET32W (1.08 ) 08/14/2020 [ 6.512436] Workqueue: events_long serio_handle_event [ 6.512453] Call Trace: [ 6.512462] show_stack+0x52/0x58 [ 6.512474] dump_stack+0xa1/0xd3 [ 6.512487] print_address_description.constprop.0+0x1d/0x140 [ 6.512502] ? __ps2_command+0x372/0x7e0 [ 6.512516] __kasan_report.cold+0x7d/0x112 [ 6.512527] ? _raw_write_lock_irq+0x20/0xd0 [ 6.512539] ? __ps2_command+0x372/0x7e0 [ 6.512552] kasan_report+0x3c/0x50 [ 6.512564] __asan_load1+0x6a/0x70 [ 6.512575] __ps2_command+0x372/0x7e0 [ 6.512589] ? ps2_drain+0x240/0x240 [ 6.512601] ? dev_printk_emit+0xa2/0xd3 [ 6.512612] ? dev_vprintk_emit+0xc5/0xc5 [ 6.512621] ? __kasan_check_write+0x14/0x20 [ 6.512634] ? mutex_lock+0x8f/0xe0 [ 6.512643] ? __mutex_lock_slowpath+0x20/0x20 [ 6.512655] ps2_command+0x52/0x90 [ 6.512670] elantech_ps2_command+0x4f/0xc0 [psmouse] [ 6.512734] elantech_change_report_id+0x1e6/0x256 [psmouse] [ 6.512799] ? elantech_report_trackpoint.constprop.0.cold+0xd/0xd [psmouse] [ 6.512863] ? ps2_command+0x7f/0x90 [ 6.512877] elantech_query_info.cold+0x6bd/0x9ed [psmouse] [ 6.512943] ? elantech_setup_ps2+0x460/0x460 [psmouse] [ 6.513005] ? psmouse_reset+0x69/0xb0 [psmouse] [ 6.513064] ? psmouse_attr_set_helper+0x2a0/0x2a0 [psmouse] [ 6.513122] ? phys_pmd_init+0x30e/0x521 [ 6.513137] elantech_init+0x8a/0x200 [psmouse] [ 6.513200] ? elantech_init_ps2+0xf0/0xf0 [psmouse] [ 6.513249] ? elantech_query_info+0x440/0x440 [psmouse] [ 6.513296] ? synaptics_send_cmd+0x60/0x60 [psmouse] [ 6.513342] ? elantech_query_info+0x440/0x440 [psmouse] [ 6.513388] ? psmouse_try_protocol+0x11e/0x170 [psmouse] [ 6.513432] psmouse_extensions+0x65d/0x6e0 [psmouse] [ 6.513476] ? psmouse_try_protocol+0x170/0x170 [psmouse] [ 6.513519] ? mutex_unlock+0x22/0x40 [ 6.513526] ? ps2_command+0x7f/0x90 [ 6.513536] ? psmouse_probe+0xa3/0xf0 [psmouse] [ 6.513580] psmouse_switch_protocol+0x27d/0x2e0 [psmouse] [ 6.513624] psmouse_connect+0x272/0x530 [psmouse] [ 6.513669] serio_driver_probe+0x55/0x70 [ 6.513679] really_probe+0x190/0x720 [ 6.513689] driver_probe_device+0x160/0x1f0 [ 6.513697] device_driver_attach+0x119/0x130 [ 6.513705] ? device_driver_attach+0x130/0x130 [ 6.513713] __driver_attach+0xe7/0x1a0 [ 6.513720] ? device_driver_attach+0x130/0x130 [ 6.513728] bus_for_each_dev+0xfb/0x150 [ 6.513738] ? subsys_dev_iter_exit+0x10/0x10 [ 6.513748] ? _raw_write_unlock_bh+0x30/0x30 [ 6.513757] driver_attach+0x2d/0x40 [ 6.513764] serio_handle_event+0x199/0x3d0 [ 6.513775] process_one_work+0x471/0x740 [ 6.513785] worker_thread+0x2d2/0x790 [ 6.513794] ? process_one_work+0x740/0x740 [ 6.513802] kthread+0x1b4/0x1e0 [ 6.513809] ? set_kthread_struct+0x80/0x80 [ 6.513816] ret_from_fork+0x22/0x30 [ 6.513832] The buggy address belongs to the page: [ 6.513838] page:00000000bc35e189 refcount:0 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x1024d7 [ 6.513847] flags: 0x17ffffc0000000(node=0|zone=2|lastcpupid=0x1fffff) [ 6.513860] raw: 0017ffffc0000000 dead000000000100 dead000000000122 0000000000000000 [ 6.513867] raw: 0000000000000000 0000000000000000 00000000ffffffff 0000000000000000 [ 6.513872] page dumped because: kasan: bad access detected [ 6.513879] addr ffff8881024d77c2 is located in stack of task kworker/2:1/118 at offset 34 in frame: [ 6.513887] elantech_change_report_id+0x0/0x256 [psmouse] [ 6.513941] this frame has 1 object: [ 6.513947] [32, 34) 'param' [ 6.513956] Memory state around the buggy address: [ 6.513962] ffff8881024d7680: f2 f2 f2 f2 f2 00 00 f3 f3 00 00 00 00 00 00 00 [ 6.513969] ffff8881024d7700: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 6.513976] >ffff8881024d7780: 00 00 00 00 f1 f1 f1 f1 02 f3 f3 f3 00 00 00 00 [ 6.513982] ^ [ 6.513988] ffff8881024d7800: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 6.513995] ffff8881024d7880: 00 f1 f1 f1 f1 03 f2 03 f2 03 f3 f3 f3 00 00 00 [ 6.514000] ================================================================== Define param[] in elantech_change_report_id() as an array of 3 bytes to prevent the out-of-bounds access in the stack. Fixes: e4c9062717fe ("Input: elantech - fix protocol errors for some trackpoints in SMBus mode") BugLink: https://bugs.launchpad.net/bugs/1945590 Signed-off-by: Andrea Righi Reviewed-by: Wolfram Sang Link: https://lore.kernel.org/r/20211116095559.24395-1-andrea.righi@canonical.com Signed-off-by: Dmitry Torokhov --- drivers/input/mouse/elantech.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/input/mouse/elantech.c b/drivers/input/mouse/elantech.c index 956d9cd34796..ece97f8c6a3e 100644 --- a/drivers/input/mouse/elantech.c +++ b/drivers/input/mouse/elantech.c @@ -1588,7 +1588,13 @@ static const struct dmi_system_id no_hw_res_dmi_table[] = { */ static int elantech_change_report_id(struct psmouse *psmouse) { - unsigned char param[2] = { 0x10, 0x03 }; + /* + * NOTE: the code is expecting to receive param[] as an array of 3 + * items (see __ps2_command()), even if in this case only 2 are + * actually needed. Make sure the array size is 3 to avoid potential + * stack out-of-bound accesses. + */ + unsigned char param[3] = { 0x10, 0x03 }; if (elantech_write_reg_params(psmouse, 0x7, param) || elantech_read_reg_params(psmouse, 0x7, param) || From af9d3a2984dc3501acd612c657127517a1beaf9d Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 29 Nov 2021 09:19:49 +0100 Subject: [PATCH 095/868] mac80211: add docs for ssn in struct tid_ampdu_tx As pointed out by Stephen, add the missing docs. Reported-by: Stephen Rothwell Link: https://lore.kernel.org/r/20211129091948.1327ec82beab.Iecc5975406a3028d35c65ff8d2dec31a693888d3@changeid Signed-off-by: Johannes Berg --- net/mac80211/sta_info.h | 1 + 1 file changed, 1 insertion(+) diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index e7443fc4669c..379fd367197f 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -176,6 +176,7 @@ struct sta_info; * @failed_bar_ssn: ssn of the last failed BAR tx attempt * @bar_pending: BAR needs to be re-sent * @amsdu: support A-MSDU withing A-MDPU + * @ssn: starting sequence number of the session * * This structure's lifetime is managed by RCU, assignments to * the array holding it must hold the aggregation mutex. From 1eda919126b420fee6b8d546f7f728fbbd4b8f11 Mon Sep 17 00:00:00 2001 From: Finn Behrens Date: Sat, 27 Nov 2021 11:28:53 +0100 Subject: [PATCH 096/868] nl80211: reset regdom when reloading regdb Reload the regdom when the regulatory db is reloaded. Otherwise, the user had to change the regulatoy domain to a different one and then reset it to the correct one to have a new regulatory db take effect after a reload. Signed-off-by: Finn Behrens Link: https://lore.kernel.org/r/YaIIZfxHgqc/UTA7@gimli.kloenk.dev [edit commit message] Signed-off-by: Johannes Berg --- include/net/regulatory.h | 1 + net/wireless/reg.c | 27 +++++++++++++++++++++++++-- 2 files changed, 26 insertions(+), 2 deletions(-) diff --git a/include/net/regulatory.h b/include/net/regulatory.h index 47f06f6f5a67..0cf9335431e0 100644 --- a/include/net/regulatory.h +++ b/include/net/regulatory.h @@ -83,6 +83,7 @@ struct regulatory_request { enum nl80211_dfs_regions dfs_region; bool intersect; bool processed; + bool reload; enum environment_cap country_ie_env; struct list_head list; }; diff --git a/net/wireless/reg.c b/net/wireless/reg.c index df87c7f3a049..61f1bf1bc4a7 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -133,6 +133,7 @@ static u32 reg_is_indoor_portid; static void restore_regulatory_settings(bool reset_user, bool cached); static void print_regdomain(const struct ieee80211_regdomain *rd); +static void reg_process_hint(struct regulatory_request *reg_request); static const struct ieee80211_regdomain *get_cfg80211_regdom(void) { @@ -1098,6 +1099,8 @@ int reg_reload_regdb(void) const struct firmware *fw; void *db; int err; + const struct ieee80211_regdomain *current_regdomain; + struct regulatory_request *request; err = request_firmware(&fw, "regulatory.db", ®_pdev->dev); if (err) @@ -1118,8 +1121,27 @@ int reg_reload_regdb(void) if (!IS_ERR_OR_NULL(regdb)) kfree(regdb); regdb = db; - rtnl_unlock(); + /* reset regulatory domain */ + current_regdomain = get_cfg80211_regdom(); + + request = kzalloc(sizeof(*request), GFP_KERNEL); + if (!request) { + err = -ENOMEM; + goto out_unlock; + } + + request->wiphy_idx = WIPHY_IDX_INVALID; + request->alpha2[0] = current_regdomain->alpha2[0]; + request->alpha2[1] = current_regdomain->alpha2[1]; + request->initiator = NL80211_USER_REG_HINT_USER; + request->user_reg_hint_type = NL80211_USER_REG_HINT_USER; + request->reload = true; + + reg_process_hint(request); + +out_unlock: + rtnl_unlock(); out: release_firmware(fw); return err; @@ -2690,7 +2712,8 @@ reg_process_hint_user(struct regulatory_request *user_request) treatment = __reg_process_hint_user(user_request); if (treatment == REG_REQ_IGNORE || - treatment == REG_REQ_ALREADY_SET) + (treatment == REG_REQ_ALREADY_SET && + !user_request->reload)) return REG_REQ_IGNORE; user_request->intersect = treatment == REG_REQ_INTERSECT; From 9d7482771fac8d8e38e763263f2ca0ca12dd22c6 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 24 Nov 2021 17:54:04 +0300 Subject: [PATCH 097/868] tee: amdtee: fix an IS_ERR() vs NULL bug The __get_free_pages() function does not return error pointers it returns NULL so fix this condition to avoid a NULL dereference. Fixes: 757cc3e9ff1d ("tee: add AMD-TEE driver") Signed-off-by: Dan Carpenter Acked-by: Rijo Thomas Signed-off-by: Jens Wiklander --- drivers/tee/amdtee/core.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/tee/amdtee/core.c b/drivers/tee/amdtee/core.c index da6b88e80dc0..297dc62bca29 100644 --- a/drivers/tee/amdtee/core.c +++ b/drivers/tee/amdtee/core.c @@ -203,9 +203,8 @@ static int copy_ta_binary(struct tee_context *ctx, void *ptr, void **ta, *ta_size = roundup(fw->size, PAGE_SIZE); *ta = (void *)__get_free_pages(GFP_KERNEL, get_order(*ta_size)); - if (IS_ERR(*ta)) { - pr_err("%s: get_free_pages failed 0x%llx\n", __func__, - (u64)*ta); + if (!*ta) { + pr_err("%s: get_free_pages failed\n", __func__); rc = -ENOMEM; goto rel_fw; } From 9003fbe0f3674b972f56fa7e6bf3ac9dbfc4d0ec Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sun, 28 Nov 2021 22:07:48 +0100 Subject: [PATCH 098/868] HID: quirks: Add quirk for the Microsoft Surface 3 type-cover Add a HID_QUIRK_NO_INIT_REPORTS quirk for the Microsoft Surface 3 (non pro) type-cover. Trying to init the reports seems to confuse the type-cover and causes 2 issues: 1. Despite hid-multitouch sending the command to switch the touchpad to multitouch mode, it keeps sending events on the mouse emulation interface. 2. The touchpad completely stops sending events after a reboot. Adding the HID_QUIRK_NO_INIT_REPORTS quirk fixes both issues. Signed-off-by: Hans de Goede Signed-off-by: Jiri Kosina --- drivers/hid/hid-ids.h | 1 + drivers/hid/hid-quirks.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index 96a455921c67..aeb907b57ab3 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -886,6 +886,7 @@ #define USB_DEVICE_ID_MS_TOUCH_COVER_2 0x07a7 #define USB_DEVICE_ID_MS_TYPE_COVER_2 0x07a9 #define USB_DEVICE_ID_MS_POWER_COVER 0x07da +#define USB_DEVICE_ID_MS_SURFACE3_COVER 0x07de #define USB_DEVICE_ID_MS_XBOX_ONE_S_CONTROLLER 0x02fd #define USB_DEVICE_ID_MS_PIXART_MOUSE 0x00cb #define USB_DEVICE_ID_8BITDO_SN30_PRO_PLUS 0x02e0 diff --git a/drivers/hid/hid-quirks.c b/drivers/hid/hid-quirks.c index 06b7908c874c..ee7e504e7279 100644 --- a/drivers/hid/hid-quirks.c +++ b/drivers/hid/hid-quirks.c @@ -124,6 +124,7 @@ static const struct hid_device_id hid_quirks[] = { { HID_USB_DEVICE(USB_VENDOR_ID_MCS, USB_DEVICE_ID_MCS_GAMEPADBLOCK), HID_QUIRK_MULTI_INPUT }, { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_PIXART_MOUSE), HID_QUIRK_ALWAYS_POLL }, { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_POWER_COVER), HID_QUIRK_NO_INIT_REPORTS }, + { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_SURFACE3_COVER), HID_QUIRK_NO_INIT_REPORTS }, { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_SURFACE_PRO_2), HID_QUIRK_NO_INIT_REPORTS }, { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_TOUCH_COVER_2), HID_QUIRK_NO_INIT_REPORTS }, { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_TYPE_COVER_2), HID_QUIRK_NO_INIT_REPORTS }, From 07fb78a78de4e67b5d6d5407aeee1250a327a698 Mon Sep 17 00:00:00 2001 From: Nicolas Frattaroli Date: Fri, 26 Nov 2021 16:43:42 +0100 Subject: [PATCH 099/868] spi: spi-rockchip: Add rk3568-spi compatible This adds a compatible string for the SPI controller found on the RK3566 and RK3568 SoCs. Signed-off-by: Nicolas Frattaroli Link: https://lore.kernel.org/r/20211126154344.724316-2-frattaroli.nicolas@gmail.com Signed-off-by: Mark Brown --- Documentation/devicetree/bindings/spi/spi-rockchip.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/spi/spi-rockchip.yaml b/Documentation/devicetree/bindings/spi/spi-rockchip.yaml index 7f987e79337c..52a78a2e362e 100644 --- a/Documentation/devicetree/bindings/spi/spi-rockchip.yaml +++ b/Documentation/devicetree/bindings/spi/spi-rockchip.yaml @@ -33,6 +33,7 @@ properties: - rockchip,rk3328-spi - rockchip,rk3368-spi - rockchip,rk3399-spi + - rockchip,rk3568-spi - rockchip,rv1126-spi - const: rockchip,rk3066-spi From db6169b5bac1c75ed37cfdaedc7dfb1618f3f362 Mon Sep 17 00:00:00 2001 From: Guoqing Jiang Date: Sun, 28 Nov 2021 21:35:01 +0800 Subject: [PATCH 100/868] RDMA/rtrs: Call {get,put}_cpu_ptr to silence a debug kernel warning With preemption enabled (CONFIG_DEBUG_PREEMPT=y), the following appeared when rnbd client tries to map remote block device. BUG: using smp_processor_id() in preemptible [00000000] code: bash/1733 caller is debug_smp_processor_id+0x17/0x20 CPU: 0 PID: 1733 Comm: bash Not tainted 5.16.0-rc1 #5 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.14.0-0-g155821a-rebuilt.opensuse.org 04/01/2014 Call Trace: dump_stack_lvl+0x5d/0x78 dump_stack+0x10/0x12 check_preemption_disabled+0xe4/0xf0 debug_smp_processor_id+0x17/0x20 rtrs_clt_update_all_stats+0x3b/0x70 [rtrs_client] rtrs_clt_read_req+0xc3/0x380 [rtrs_client] ? rtrs_clt_init_req+0xe3/0x120 [rtrs_client] rtrs_clt_request+0x1a7/0x320 [rtrs_client] ? 0xffffffffc0ab1000 send_usr_msg+0xbf/0x160 [rnbd_client] ? rnbd_clt_put_sess+0x60/0x60 [rnbd_client] ? send_usr_msg+0x160/0x160 [rnbd_client] ? sg_alloc_table+0x27/0xb0 ? sg_zero_buffer+0xd0/0xd0 send_msg_sess_info+0xe9/0x180 [rnbd_client] ? rnbd_clt_put_sess+0x60/0x60 [rnbd_client] ? blk_mq_alloc_tag_set+0x2ef/0x370 rnbd_clt_map_device+0xba8/0xcd0 [rnbd_client] ? send_msg_open+0x200/0x200 [rnbd_client] rnbd_clt_map_device_store+0x3e5/0x620 [rnbd_client To supress the calltrace, let's call get_cpu_ptr/put_cpu_ptr pair in rtrs_clt_update_rdma_stats to disable preemption when accessing per-cpu variable. While at it, let's make the similar change in rtrs_clt_update_wc_stats. And for rtrs_clt_inc_failover_cnt, though it was only called inside rcu section, but it still can be preempted in case CONFIG_PREEMPT_RCU is enabled, so change it to {get,put}_cpu_ptr pair either. Link: https://lore.kernel.org/r/20211128133501.38710-1-guoqing.jiang@linux.dev Signed-off-by: Guoqing Jiang Reviewed-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/rtrs/rtrs-clt-stats.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt-stats.c b/drivers/infiniband/ulp/rtrs/rtrs-clt-stats.c index f7e459fe68be..76e4352fe3f6 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-clt-stats.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-clt-stats.c @@ -19,7 +19,7 @@ void rtrs_clt_update_wc_stats(struct rtrs_clt_con *con) int cpu; cpu = raw_smp_processor_id(); - s = this_cpu_ptr(stats->pcpu_stats); + s = get_cpu_ptr(stats->pcpu_stats); if (con->cpu != cpu) { s->cpu_migr.to++; @@ -27,14 +27,16 @@ void rtrs_clt_update_wc_stats(struct rtrs_clt_con *con) s = per_cpu_ptr(stats->pcpu_stats, con->cpu); atomic_inc(&s->cpu_migr.from); } + put_cpu_ptr(stats->pcpu_stats); } void rtrs_clt_inc_failover_cnt(struct rtrs_clt_stats *stats) { struct rtrs_clt_stats_pcpu *s; - s = this_cpu_ptr(stats->pcpu_stats); + s = get_cpu_ptr(stats->pcpu_stats); s->rdma.failover_cnt++; + put_cpu_ptr(stats->pcpu_stats); } int rtrs_clt_stats_migration_from_cnt_to_str(struct rtrs_clt_stats *stats, char *buf) @@ -169,9 +171,10 @@ static inline void rtrs_clt_update_rdma_stats(struct rtrs_clt_stats *stats, { struct rtrs_clt_stats_pcpu *s; - s = this_cpu_ptr(stats->pcpu_stats); + s = get_cpu_ptr(stats->pcpu_stats); s->rdma.dir[d].cnt++; s->rdma.dir[d].size_total += size; + put_cpu_ptr(stats->pcpu_stats); } void rtrs_clt_update_all_stats(struct rtrs_clt_io_req *req, int dir) From d69dab7de208748ddf79143b39d98db55eee9b4a Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Sat, 27 Nov 2021 10:14:49 +0100 Subject: [PATCH 101/868] docs: conf.py: fix support for Readthedocs v 1.0.0 As described at: https://stackoverflow.com/questions/23211695/modifying-content-width-of-the-sphinx-theme-read-the-docs since Sphinx 1.8, the standard way to setup a custom theme is to use html_css_files. While using html_context is OK with RTD 0.5.2, it doesn't work with 1.0.0, causing the theme to not load, producing a very weird html. Tested with: - Sphinx 1.7.9 + sphinx-rtd-theme 0.5.2 - Sphinx 2.4.4 + sphinx-rtd-theme 0.5.2 - Sphinx 2.4.4 + sphinx-rtd-theme 1.0.0 - Sphinx 4.3.0 + sphinx-rtd-theme 1.0.0 Reported-by: Hans Verkuil Tested-by: Hans Verkuil Reviewed-by: Laurent Pinchart Signed-off-by: Mauro Carvalho Chehab Tested-by: Akira Yokosawa Link: https://lore.kernel.org/r/80009f0d17ea0840d81e7e16fff6e7677919fdfc.1638004294.git.mchehab+huawei@kernel.org Signed-off-by: Jonathan Corbet --- Documentation/conf.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/Documentation/conf.py b/Documentation/conf.py index 17f7cee56987..76e5eb5cb62b 100644 --- a/Documentation/conf.py +++ b/Documentation/conf.py @@ -249,11 +249,16 @@ except ImportError: html_static_path = ['sphinx-static'] -html_context = { - 'css_files': [ - '_static/theme_overrides.css', - ], -} +html_css_files = [ + 'theme_overrides.css', +] + +if major <= 1 and minor < 8: + html_context = { + 'css_files': [ + '_static/theme_overrides.css', + ], + } # Add any extra paths that contain custom files (such as robots.txt or # .htaccess) here, relative to this directory. These files are copied From 5c81691bb6461a474ac9d6ad5737c12e8f558a8b Mon Sep 17 00:00:00 2001 From: Akira Yokosawa Date: Fri, 26 Nov 2021 13:11:17 +0900 Subject: [PATCH 102/868] docs: admin-guide/blockdev: Remove digraph of node-states While node-states-8.dot has two digraphs, the dot(1) command can not properly handle multiple graphs in a DOT file and the kernel-doc page at https://www.kernel.org/doc/html/latest/admin-guide/blockdev/drbd/figures.html fails to render the graphs. It turned out that the digraph of node_states can be removed. Quote from Joel's reflection: On reflection, the digraph node_states can be removed entirely. It is too basic to contain any useful information. In addition it references "ioctl_set_state". The ioctl configuration interface for DRBD has long been removed. In fact, it was never in the upstream version of DRBD. Remove node_states and rename the DOT file peer_states-8.dot. Suggested-by: Joel Colledge Acked-by: Joel Colledge Signed-off-by: Akira Yokosawa Cc: Philipp Reisner Cc: Lars Ellenberg Link: https://lore.kernel.org/r/7df04f45-8746-e666-1a9d-a998f1ab1f91@gmail.com Signed-off-by: Jonathan Corbet --- Documentation/admin-guide/blockdev/drbd/figures.rst | 4 ++-- .../blockdev/drbd/{node-states-8.dot => peer-states-8.dot} | 5 ----- 2 files changed, 2 insertions(+), 7 deletions(-) rename Documentation/admin-guide/blockdev/drbd/{node-states-8.dot => peer-states-8.dot} (71%) diff --git a/Documentation/admin-guide/blockdev/drbd/figures.rst b/Documentation/admin-guide/blockdev/drbd/figures.rst index bd9a4901fe46..9f73253ea353 100644 --- a/Documentation/admin-guide/blockdev/drbd/figures.rst +++ b/Documentation/admin-guide/blockdev/drbd/figures.rst @@ -25,6 +25,6 @@ Sub graphs of DRBD's state transitions :alt: disk-states-8.dot :align: center -.. kernel-figure:: node-states-8.dot - :alt: node-states-8.dot +.. kernel-figure:: peer-states-8.dot + :alt: peer-states-8.dot :align: center diff --git a/Documentation/admin-guide/blockdev/drbd/node-states-8.dot b/Documentation/admin-guide/blockdev/drbd/peer-states-8.dot similarity index 71% rename from Documentation/admin-guide/blockdev/drbd/node-states-8.dot rename to Documentation/admin-guide/blockdev/drbd/peer-states-8.dot index bfa54e1f8016..6dc3954954d6 100644 --- a/Documentation/admin-guide/blockdev/drbd/node-states-8.dot +++ b/Documentation/admin-guide/blockdev/drbd/peer-states-8.dot @@ -1,8 +1,3 @@ -digraph node_states { - Secondary -> Primary [ label = "ioctl_set_state()" ] - Primary -> Secondary [ label = "ioctl_set_state()" ] -} - digraph peer_states { Secondary -> Primary [ label = "recv state packet" ] Primary -> Secondary [ label = "recv state packet" ] From aa9b5e0df226edbf1879cb8f17a409cc3fd89c9d Mon Sep 17 00:00:00 2001 From: Erik Ekman Date: Fri, 19 Nov 2021 21:07:58 +0100 Subject: [PATCH 103/868] Documentation/process: fix self reference Instead link to the device tree document with the same name. Signed-off-by: Erik Ekman Link: https://lore.kernel.org/r/20211119200758.642474-1-erik@kryo.se Signed-off-by: Jonathan Corbet --- Documentation/process/submitting-patches.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Documentation/process/submitting-patches.rst b/Documentation/process/submitting-patches.rst index da085d63af9b..6b3aaed66fba 100644 --- a/Documentation/process/submitting-patches.rst +++ b/Documentation/process/submitting-patches.rst @@ -14,7 +14,8 @@ works, see Documentation/process/development-process.rst. Also, read Documentation/process/submit-checklist.rst for a list of items to check before submitting code. If you are submitting a driver, also read Documentation/process/submitting-drivers.rst; for device -tree binding patches, read Documentation/process/submitting-patches.rst. +tree binding patches, read +Documentation/devicetree/bindings/submitting-patches.rst. This documentation assumes that you're using ``git`` to prepare your patches. If you're unfamiliar with ``git``, you would be well-advised to learn how to From 333b11e541feeb79e7cce31dd5b280ceded388e4 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 19 Nov 2021 15:56:03 -0300 Subject: [PATCH 104/868] Documentation: Add minimum pahole version A report was made in https://github.com/acmel/dwarves/issues/26 about pahole not being listed in the process/changes.rst file as being needed for building the kernel, address that. Link: https://github.com/acmel/dwarves/issues/26 Acked-by: Andrii Nakryiko Acked-by: Daniel Borkmann Cc: Alexei Starovoitov Cc: Jiri Olsa Link: http://lore.kernel.org/lkml/YZPQ6+u2wTHRfR+W@kernel.org Signed-off-by: Arnaldo Carvalho de Melo Link: https://lore.kernel.org/r/YZfzQ0DvHD5o26Bt@kernel.org Signed-off-by: Jonathan Corbet --- Documentation/process/changes.rst | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/Documentation/process/changes.rst b/Documentation/process/changes.rst index b398b8576417..cf908d79666e 100644 --- a/Documentation/process/changes.rst +++ b/Documentation/process/changes.rst @@ -35,6 +35,7 @@ GNU make 3.81 make --version binutils 2.23 ld -v flex 2.5.35 flex --version bison 2.0 bison --version +pahole 1.16 pahole --version util-linux 2.10o fdformat --version kmod 13 depmod -V e2fsprogs 1.41.4 e2fsck -V @@ -108,6 +109,16 @@ Bison Since Linux 4.16, the build system generates parsers during build. This requires bison 2.0 or later. +pahole: +------- + +Since Linux 5.2, if CONFIG_DEBUG_INFO_BTF is selected, the build system +generates BTF (BPF Type Format) from DWARF in vmlinux, a bit later from kernel +modules as well. This requires pahole v1.16 or later. + +It is found in the 'dwarves' or 'pahole' distro packages or from +https://fedorapeople.org/~acme/dwarves/. + Perl ---- From 53689f7f91a2ab0079422e1d1b6e096cf68d58f4 Mon Sep 17 00:00:00 2001 From: Nicolas Frattaroli Date: Thu, 25 Nov 2021 09:48:59 +0100 Subject: [PATCH 105/868] ASoC: rockchip: i2s_tdm: Dup static DAI template Previously, the DAI template was used directly, which lead to fun bugs such as "why is my channels_max changing?" when one instantiated more than one i2s_tdm IP block in a device tree. This change makes it so that we instead duplicate the template struct, and then use that. Fixes: 081068fd6414 ("ASoC: rockchip: add support for i2s-tdm controller") Signed-off-by: Nicolas Frattaroli Link: https://lore.kernel.org/r/20211125084900.417102-1-frattaroli.nicolas@gmail.com Signed-off-by: Mark Brown --- sound/soc/rockchip/rockchip_i2s_tdm.c | 52 ++++++++++++++++----------- 1 file changed, 31 insertions(+), 21 deletions(-) diff --git a/sound/soc/rockchip/rockchip_i2s_tdm.c b/sound/soc/rockchip/rockchip_i2s_tdm.c index 17b9b287853a..5f9cb5c4c7f0 100644 --- a/sound/soc/rockchip/rockchip_i2s_tdm.c +++ b/sound/soc/rockchip/rockchip_i2s_tdm.c @@ -95,6 +95,7 @@ struct rk_i2s_tdm_dev { spinlock_t lock; /* xfer lock */ bool has_playback; bool has_capture; + struct snd_soc_dai_driver *dai; }; static int to_ch_num(unsigned int val) @@ -1310,19 +1311,14 @@ static const struct of_device_id rockchip_i2s_tdm_match[] = { {}, }; -static struct snd_soc_dai_driver i2s_tdm_dai = { +static const struct snd_soc_dai_driver i2s_tdm_dai = { .probe = rockchip_i2s_tdm_dai_probe, - .playback = { - .stream_name = "Playback", - }, - .capture = { - .stream_name = "Capture", - }, .ops = &rockchip_i2s_tdm_dai_ops, }; -static void rockchip_i2s_tdm_init_dai(struct rk_i2s_tdm_dev *i2s_tdm) +static int rockchip_i2s_tdm_init_dai(struct rk_i2s_tdm_dev *i2s_tdm) { + struct snd_soc_dai_driver *dai; struct property *dma_names; const char *dma_name; u64 formats = (SNDRV_PCM_FMTBIT_S8 | SNDRV_PCM_FMTBIT_S16_LE | @@ -1337,19 +1333,33 @@ static void rockchip_i2s_tdm_init_dai(struct rk_i2s_tdm_dev *i2s_tdm) i2s_tdm->has_capture = true; } + dai = devm_kmemdup(i2s_tdm->dev, &i2s_tdm_dai, + sizeof(*dai), GFP_KERNEL); + if (!dai) + return -ENOMEM; + if (i2s_tdm->has_playback) { - i2s_tdm_dai.playback.channels_min = 2; - i2s_tdm_dai.playback.channels_max = 8; - i2s_tdm_dai.playback.rates = SNDRV_PCM_RATE_8000_192000; - i2s_tdm_dai.playback.formats = formats; + dai->playback.stream_name = "Playback"; + dai->playback.channels_min = 2; + dai->playback.channels_max = 8; + dai->playback.rates = SNDRV_PCM_RATE_8000_192000; + dai->playback.formats = formats; } if (i2s_tdm->has_capture) { - i2s_tdm_dai.capture.channels_min = 2; - i2s_tdm_dai.capture.channels_max = 8; - i2s_tdm_dai.capture.rates = SNDRV_PCM_RATE_8000_192000; - i2s_tdm_dai.capture.formats = formats; + dai->capture.stream_name = "Capture"; + dai->capture.channels_min = 2; + dai->capture.channels_max = 8; + dai->capture.rates = SNDRV_PCM_RATE_8000_192000; + dai->capture.formats = formats; } + + if (i2s_tdm->clk_trcm != TRCM_TXRX) + dai->symmetric_rate = 1; + + i2s_tdm->dai = dai; + + return 0; } static int rockchip_i2s_tdm_path_check(struct rk_i2s_tdm_dev *i2s_tdm, @@ -1541,8 +1551,6 @@ static int rockchip_i2s_tdm_probe(struct platform_device *pdev) spin_lock_init(&i2s_tdm->lock); i2s_tdm->soc_data = (struct rk_i2s_soc_data *)of_id->data; - rockchip_i2s_tdm_init_dai(i2s_tdm); - i2s_tdm->frame_width = 64; i2s_tdm->clk_trcm = TRCM_TXRX; @@ -1555,8 +1563,10 @@ static int rockchip_i2s_tdm_probe(struct platform_device *pdev) } i2s_tdm->clk_trcm = TRCM_RX; } - if (i2s_tdm->clk_trcm != TRCM_TXRX) - i2s_tdm_dai.symmetric_rate = 1; + + ret = rockchip_i2s_tdm_init_dai(i2s_tdm); + if (ret) + return ret; i2s_tdm->grf = syscon_regmap_lookup_by_phandle(node, "rockchip,grf"); if (IS_ERR(i2s_tdm->grf)) @@ -1678,7 +1688,7 @@ static int rockchip_i2s_tdm_probe(struct platform_device *pdev) ret = devm_snd_soc_register_component(&pdev->dev, &rockchip_i2s_tdm_component, - &i2s_tdm_dai, 1); + i2s_tdm->dai, 1); if (ret) { dev_err(&pdev->dev, "Could not register DAI\n"); From d5c137f41352e8dd864522c417b45d8d1aebca68 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 30 Nov 2021 15:56:33 +0300 Subject: [PATCH 106/868] ASoC: amd: fix uninitialized variable in snd_acp6x_probe() The "index" is potentially used without being initialized on the error path. Fixes: fc329c1de498 ("ASoC: amd: add platform devices for acp6x pdm driver and dmic driver") Signed-off-by: Dan Carpenter Link: https://lore.kernel.org/r/20211130125633.GA24941@kili Signed-off-by: Mark Brown --- sound/soc/amd/yc/pci-acp6x.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sound/soc/amd/yc/pci-acp6x.c b/sound/soc/amd/yc/pci-acp6x.c index 957eeb6fb8e3..7e9a9a9d8ddd 100644 --- a/sound/soc/amd/yc/pci-acp6x.c +++ b/sound/soc/amd/yc/pci-acp6x.c @@ -146,10 +146,11 @@ static int snd_acp6x_probe(struct pci_dev *pci, { struct acp6x_dev_data *adata; struct platform_device_info pdevinfo[ACP6x_DEVS]; - int ret, index; + int index = 0; int val = 0x00; u32 addr; unsigned int irqflags; + int ret; irqflags = IRQF_SHARED; /* Yellow Carp device check */ From 046aede2f847676f93a2ea4f48b77909c51dba40 Mon Sep 17 00:00:00 2001 From: Hui Wang Date: Tue, 30 Nov 2021 11:06:06 +0200 Subject: [PATCH 107/868] ASoC: SOF: Intel: Retry codec probing if it fails On the latest Lenovo Thinkstation laptops, we often experience the speaker failure after rebooting, check the dmesg, we could see: sof-audio-pci-intel-tgl 0000:00:1f.3: codec #0 probe error, ret: -5 The analogue codec on the machine is ALC287, then we designed a testcase to reboot and check the codec probing result repeatedly, we found the analogue codec probing always failed at least once within several minutes to several hours (roughly 1 reboot per min). This issue happens on all laptops of this Thinkstation model, but with legacy HDA driver, we couldn't reproduce this issue on those laptops. And so far, this issue is not reproduced on machines which don't belong to this model. We tried to make the hda_dsp_ctrl_init_chip() same as hda_intel_init_chip() which is the controller init routine in the legacy HDA driver, but it didn't help. We found when issue happens, the resp is -1, and if we let driver re-run send_cmd() and get_response(), it will get the correct response 10ec0287, then driver continues the rest work, finally boot to the desktop and all audio function work well. Here adding codec probing retries to 3 times, it could fix the issue on this Thinkstation model, and it doesn't bring impact to other machines. Reviewed-by: Bard Liao Reviewed-by: Pierre-Louis Bossart Signed-off-by: Hui Wang Signed-off-by: Kai Vehmanen Link: https://lore.kernel.org/r/20211130090606.529348-1-kai.vehmanen@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/sof/intel/hda-codec.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/sound/soc/sof/intel/hda-codec.c b/sound/soc/sof/intel/hda-codec.c index 6744318de612..13cd96e6724a 100644 --- a/sound/soc/sof/intel/hda-codec.c +++ b/sound/soc/sof/intel/hda-codec.c @@ -22,6 +22,7 @@ #if IS_ENABLED(CONFIG_SND_SOC_SOF_HDA_AUDIO_CODEC) #define IDISP_VID_INTEL 0x80860000 +#define CODEC_PROBE_RETRIES 3 /* load the legacy HDA codec driver */ static int request_codec_module(struct hda_codec *codec) @@ -121,12 +122,15 @@ static int hda_codec_probe(struct snd_sof_dev *sdev, int address, u32 hda_cmd = (address << 28) | (AC_NODE_ROOT << 20) | (AC_VERB_PARAMETERS << 8) | AC_PAR_VENDOR_ID; u32 resp = -1; - int ret; + int ret, retry = 0; + + do { + mutex_lock(&hbus->core.cmd_mutex); + snd_hdac_bus_send_cmd(&hbus->core, hda_cmd); + snd_hdac_bus_get_response(&hbus->core, address, &resp); + mutex_unlock(&hbus->core.cmd_mutex); + } while (resp == -1 && retry++ < CODEC_PROBE_RETRIES); - mutex_lock(&hbus->core.cmd_mutex); - snd_hdac_bus_send_cmd(&hbus->core, hda_cmd); - snd_hdac_bus_get_response(&hbus->core, address, &resp); - mutex_unlock(&hbus->core.cmd_mutex); if (resp == -1) return -EIO; dev_dbg(sdev->dev, "HDA codec #%d probed OK: response: %x\n", From 6a631c0432dcccbcf45839016a07c015e335e9ae Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Sat, 27 Nov 2021 17:31:59 +0100 Subject: [PATCH 108/868] Documentation/locking/locktypes: Update migrate_disable() bits. The initial implementation of migrate_disable() for mainline was a wrapper around preempt_disable(). RT kernels substituted this with a real migrate disable implementation. Later on mainline gained true migrate disable support, but the documentation was not updated. Update the documentation, remove the claims about migrate_disable() mapping to preempt_disable() on non-PREEMPT_RT kernels. Fixes: 74d862b682f51 ("sched: Make migrate_disable/enable() independent of RT") Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20211127163200.10466-2-bigeasy@linutronix.de --- Documentation/locking/locktypes.rst | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/Documentation/locking/locktypes.rst b/Documentation/locking/locktypes.rst index ddada4a53749..4fd7b70fcde1 100644 --- a/Documentation/locking/locktypes.rst +++ b/Documentation/locking/locktypes.rst @@ -439,11 +439,9 @@ preemption. The following substitution works on both kernels:: spin_lock(&p->lock); p->count += this_cpu_read(var2); -On a non-PREEMPT_RT kernel migrate_disable() maps to preempt_disable() -which makes the above code fully equivalent. On a PREEMPT_RT kernel migrate_disable() ensures that the task is pinned on the current CPU which in turn guarantees that the per-CPU access to var1 and var2 are staying on -the same CPU. +the same CPU while the task remains preemptible. The migrate_disable() substitution is not valid for the following scenario:: @@ -456,9 +454,8 @@ scenario:: p = this_cpu_ptr(&var1); p->val = func2(); -While correct on a non-PREEMPT_RT kernel, this breaks on PREEMPT_RT because -here migrate_disable() does not protect against reentrancy from a -preempting task. A correct substitution for this case is:: +This breaks because migrate_disable() does not protect against reentrancy from +a preempting task. A correct substitution for this case is:: func() { From 79364031c5b4365ca28ac0fa00acfab5bf465be1 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Sat, 27 Nov 2021 17:32:00 +0100 Subject: [PATCH 109/868] bpf: Make sure bpf_disable_instrumentation() is safe vs preemption. The initial implementation of migrate_disable() for mainline was a wrapper around preempt_disable(). RT kernels substituted this with a real migrate disable implementation. Later on mainline gained true migrate disable support, but neither documentation nor affected code were updated. Remove stale comments claiming that migrate_disable() is PREEMPT_RT only. Don't use __this_cpu_inc() in the !PREEMPT_RT path because preemption is not disabled and the RMW operation can be preempted. Fixes: 74d862b682f51 ("sched: Make migrate_disable/enable() independent of RT") Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20211127163200.10466-3-bigeasy@linutronix.de --- include/linux/bpf.h | 16 ++-------------- include/linux/filter.h | 3 --- 2 files changed, 2 insertions(+), 17 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 84ff6ef49462..755f38e893be 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1353,28 +1353,16 @@ extern struct mutex bpf_stats_enabled_mutex; * kprobes, tracepoints) to prevent deadlocks on map operations as any of * these events can happen inside a region which holds a map bucket lock * and can deadlock on it. - * - * Use the preemption safe inc/dec variants on RT because migrate disable - * is preemptible on RT and preemption in the middle of the RMW operation - * might lead to inconsistent state. Use the raw variants for non RT - * kernels as migrate_disable() maps to preempt_disable() so the slightly - * more expensive save operation can be avoided. */ static inline void bpf_disable_instrumentation(void) { migrate_disable(); - if (IS_ENABLED(CONFIG_PREEMPT_RT)) - this_cpu_inc(bpf_prog_active); - else - __this_cpu_inc(bpf_prog_active); + this_cpu_inc(bpf_prog_active); } static inline void bpf_enable_instrumentation(void) { - if (IS_ENABLED(CONFIG_PREEMPT_RT)) - this_cpu_dec(bpf_prog_active); - else - __this_cpu_dec(bpf_prog_active); + this_cpu_dec(bpf_prog_active); migrate_enable(); } diff --git a/include/linux/filter.h b/include/linux/filter.h index 24b7ed2677af..534f678ca50f 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -640,9 +640,6 @@ static __always_inline u32 bpf_prog_run(const struct bpf_prog *prog, const void * This uses migrate_disable/enable() explicitly to document that the * invocation of a BPF program does not require reentrancy protection * against a BPF program which is invoked from a preempting task. - * - * For non RT enabled kernels migrate_disable/enable() maps to - * preempt_disable/enable(), i.e. it disables also preemption. */ static inline u32 bpf_prog_run_pin_on_cpu(const struct bpf_prog *prog, const void *ctx) From a2ca752055edd39be38b887e264d3de7ca2bc1bb Mon Sep 17 00:00:00 2001 From: Billy Tsai Date: Tue, 30 Nov 2021 17:22:12 +0800 Subject: [PATCH 110/868] hwmon: (pwm-fan) Ensure the fan going on in .probe() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Before commit 86585c61972f ("hwmon: (pwm-fan) stop using legacy PWM functions and some cleanups") pwm_apply_state() was called unconditionally in pwm_fan_probe(). In this commit this direct call was replaced by a call to __set_pwm(ct, MAX_PWM) which however is a noop if ctx->pwm_value already matches the value to set. After probe the fan is supposed to run at full speed, and the internal driver state suggests it does, but this isn't asserted and depending on bootloader and pwm low-level driver, the fan might just be off. So drop setting pwm_value to MAX_PWM to ensure the check in __set_pwm doesn't make it exit early and the fan goes on as intended. Cc: stable@vger.kernel.org Fixes: 86585c61972f ("hwmon: (pwm-fan) stop using legacy PWM functions and some cleanups") Signed-off-by: Billy Tsai Reviewed-by: Uwe Kleine-König Link: https://lore.kernel.org/r/20211130092212.17783-1-billy_tsai@aspeedtech.com Signed-off-by: Guenter Roeck --- drivers/hwmon/pwm-fan.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/hwmon/pwm-fan.c b/drivers/hwmon/pwm-fan.c index 17518b4cab1b..f12b9a28a232 100644 --- a/drivers/hwmon/pwm-fan.c +++ b/drivers/hwmon/pwm-fan.c @@ -336,8 +336,6 @@ static int pwm_fan_probe(struct platform_device *pdev) return ret; } - ctx->pwm_value = MAX_PWM; - pwm_init_state(ctx->pwm, &ctx->pwm_state); /* From 7dba402807a85fa3723f4a27504813caf81cc9d7 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Tue, 30 Nov 2021 14:23:09 +0100 Subject: [PATCH 111/868] mmc: renesas_sdhi: initialize variable properly when tuning 'cmd_error' is not necessarily initialized on some error paths in mmc_send_tuning(). Initialize it. Fixes: 2c9017d0b5d3 ("mmc: renesas_sdhi: abort tuning when timeout detected") Reported-by: Dan Carpenter Signed-off-by: Wolfram Sang Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20211130132309.18246-1-wsa+renesas@sang-engineering.com Signed-off-by: Ulf Hansson --- drivers/mmc/host/renesas_sdhi_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mmc/host/renesas_sdhi_core.c b/drivers/mmc/host/renesas_sdhi_core.c index a4407f391f66..f5b2684ad805 100644 --- a/drivers/mmc/host/renesas_sdhi_core.c +++ b/drivers/mmc/host/renesas_sdhi_core.c @@ -673,7 +673,7 @@ static int renesas_sdhi_execute_tuning(struct mmc_host *mmc, u32 opcode) /* Issue CMD19 twice for each tap */ for (i = 0; i < 2 * priv->tap_num; i++) { - int cmd_error; + int cmd_error = 0; /* Set sampling clock position */ sd_scc_write32(host, priv, SH_MOBILE_SDHI_SCC_TAPSET, i % priv->tap_num); From 49201b90af818654c5506a0decc18e111eadcb66 Mon Sep 17 00:00:00 2001 From: Fabrizio Bertocci Date: Mon, 29 Nov 2021 23:15:40 -0500 Subject: [PATCH 112/868] platform/x86: amd-pmc: Fix s2idle failures on certain AMD laptops On some AMD hardware laptops, the system fails communicating with the PMC when entering s2idle and the machine is battery powered. Hardware description: HP Pavilion Aero Laptop 13-be0097nr CPU: AMD Ryzen 7 5800U with Radeon Graphics GPU: 03:00.0 VGA compatible controller [0300]: Advanced Micro Devices, Inc. [AMD/ATI] Device [1002:1638] (rev c1) Detailed description of the problem (and investigation) here: https://gitlab.freedesktop.org/drm/amd/-/issues/1799 Patch is a single line: reduce the polling delay in half, from 100uSec to 50uSec when waiting for a change in state from the PMC after a write command operation. After changing the delay, I did not see a single failure on this machine (I have this fix for now more than one week and s2idle worked every single time on battery power). Cc: stable@vger.kernel.org Acked-by: Shyam Sundar S K Signed-off-by: Fabrizio Bertocci Link: https://lore.kernel.org/r/CADtzkx7TdfbwtaVEXUdD6YXPey52E-nZVQNs+Z41DTx7gqMqtw@mail.gmail.com Signed-off-by: Hans de Goede --- drivers/platform/x86/amd-pmc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/platform/x86/amd-pmc.c b/drivers/platform/x86/amd-pmc.c index b7e50ed050a8..841c44cd64c2 100644 --- a/drivers/platform/x86/amd-pmc.c +++ b/drivers/platform/x86/amd-pmc.c @@ -76,7 +76,7 @@ #define AMD_CPU_ID_CZN AMD_CPU_ID_RN #define AMD_CPU_ID_YC 0x14B5 -#define PMC_MSG_DELAY_MIN_US 100 +#define PMC_MSG_DELAY_MIN_US 50 #define RESPONSE_REGISTER_LOOP_MAX 20000 #define SOC_SUBSYSTEM_IP_MAX 12 From 099f83aa2d06680d5987e43fed1afeda14b5037e Mon Sep 17 00:00:00 2001 From: Johan Almbladh Date: Tue, 30 Nov 2021 17:08:24 +0100 Subject: [PATCH 113/868] mips, bpf: Fix reference to non-existing Kconfig symbol The Kconfig symbol for R10000 ll/sc errata workaround in the MIPS JIT was misspelled, causing the workaround to not take effect when enabled. Fixes: 72570224bb8f ("mips, bpf: Add JIT workarounds for CPU errata") Reported-by: Lukas Bulwahn Signed-off-by: Johan Almbladh Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20211130160824.3781635-1-johan.almbladh@anyfinetworks.com --- arch/mips/net/bpf_jit_comp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/net/bpf_jit_comp.h b/arch/mips/net/bpf_jit_comp.h index 6f3a7b07294b..a37fe20818eb 100644 --- a/arch/mips/net/bpf_jit_comp.h +++ b/arch/mips/net/bpf_jit_comp.h @@ -98,7 +98,7 @@ do { \ #define emit(...) __emit(__VA_ARGS__) /* Workaround for R10000 ll/sc errata */ -#ifdef CONFIG_WAR_R10000 +#ifdef CONFIG_WAR_R10000_LLSC #define LLSC_beqz beqzl #else #define LLSC_beqz beqz From b43c2793f5e9910862e8fe07846b74e45b104501 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 26 Nov 2021 13:04:03 +0100 Subject: [PATCH 114/868] netfilter: nfnetlink_queue: silence bogus compiler warning net/netfilter/nfnetlink_queue.c:601:36: warning: variable 'ctinfo' is uninitialized when used here [-Wuninitialized] if (ct && nfnl_ct->build(skb, ct, ctinfo, NFQA_CT, NFQA_CT_INFO) < 0) ctinfo is only uninitialized if ct == NULL. Init it to 0 to silence this. Reported-by: kernel test robot Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nfnetlink_queue.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index 4acc4b8e9fe5..5837e8efc9c2 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -387,7 +387,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, struct net_device *indev; struct net_device *outdev; struct nf_conn *ct = NULL; - enum ip_conntrack_info ctinfo; + enum ip_conntrack_info ctinfo = 0; struct nfnl_ct_hook *nfnl_ct; bool csum_verify; char *secdata = NULL; From c2584017f757da3c84a743b607d6cfc763ebcc2b Mon Sep 17 00:00:00 2001 From: Vyacheslav Bocharov Date: Thu, 25 Nov 2021 16:02:47 +0300 Subject: [PATCH 115/868] arm64: meson: fix dts for JetHub D1 Fix misplace of cpu_cooling_maps for JetHub D1, move it to right place. Fixes: 8e279fb29039 ("arm64: dts: meson-axg: add support for JetHub D1") Signed-off-by: Vyacheslav Bocharov Reviewed-by: Neil Armstrong Signed-off-by: Neil Armstrong Link: https://lore.kernel.org/r/20211125130246.1086627-1-adeep@lexina.in --- .../amlogic/meson-axg-jethome-jethub-j100.dts | 30 +++++++++---------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/arch/arm64/boot/dts/amlogic/meson-axg-jethome-jethub-j100.dts b/arch/arm64/boot/dts/amlogic/meson-axg-jethome-jethub-j100.dts index 52ebe371df26..561eec21b4de 100644 --- a/arch/arm64/boot/dts/amlogic/meson-axg-jethome-jethub-j100.dts +++ b/arch/arm64/boot/dts/amlogic/meson-axg-jethome-jethub-j100.dts @@ -134,23 +134,23 @@ type = "critical"; }; }; - }; - cpu_cooling_maps: cooling-maps { - map0 { - trip = <&cpu_passive>; - cooling-device = <&cpu0 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>, - <&cpu1 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>, - <&cpu2 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>, - <&cpu3 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>; - }; + cpu_cooling_maps: cooling-maps { + map0 { + trip = <&cpu_passive>; + cooling-device = <&cpu0 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>, + <&cpu1 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>, + <&cpu2 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>, + <&cpu3 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>; + }; - map1 { - trip = <&cpu_hot>; - cooling-device = <&cpu0 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>, - <&cpu1 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>, - <&cpu2 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>, - <&cpu3 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>; + map1 { + trip = <&cpu_hot>; + cooling-device = <&cpu0 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>, + <&cpu1 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>, + <&cpu2 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>, + <&cpu3 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>; + }; }; }; }; From 5ad77b1272fce36604779efe6e2036c500e6fe7a Mon Sep 17 00:00:00 2001 From: Jerome Brunet Date: Wed, 9 Jun 2021 22:20:09 +0200 Subject: [PATCH 116/868] arm64: meson: remove COMMON_CLK This reverts commit aea7a80ad5effd48f44a7a08c3903168be038a43. Selecting COMMON_CLK is not necessary, it is already selected by CONFIG_ARM64 Reported-by: Geert Uytterhoeven Signed-off-by: Jerome Brunet Reviewed-by: Kevin Hilman Signed-off-by: Neil Armstrong Link: https://lore.kernel.org/r/20210609202009.1424879-1-jbrunet@baylibre.com --- arch/arm64/Kconfig.platforms | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm64/Kconfig.platforms b/arch/arm64/Kconfig.platforms index 1aa8b7073218..54e3910e8b9b 100644 --- a/arch/arm64/Kconfig.platforms +++ b/arch/arm64/Kconfig.platforms @@ -161,7 +161,6 @@ config ARCH_MEDIATEK config ARCH_MESON bool "Amlogic Platforms" - select COMMON_CLK help This enables support for the arm64 based Amlogic SoCs such as the s905, S905X/D, S912, A113X/D or S905X/D2 From 4356fd6041877ef0c91a2f6051b59bb1a8961ca2 Mon Sep 17 00:00:00 2001 From: Janne Grunau Date: Mon, 22 Nov 2021 23:24:39 +0100 Subject: [PATCH 117/868] dt-bindings: i2c: apple,i2c: allow multiple compatibles The intention was to have a SoC-specific and base compatible string to allow forward compatibility and SoC specific quirks, Fixes: df7c4a8c1b47 ("dt-bindings: i2c: Add Apple I2C controller bindings") Signed-off-by: Janne Grunau Cc: Mark Kettenis Reviewed-by: Sven Peter Reviewed-by: Mark Kettenis Tested-by: Hector Martin Acked-by: Wolfram Sang Acked-by: Rob Herring Signed-off-by: Hector Martin --- Documentation/devicetree/bindings/i2c/apple,i2c.yaml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Documentation/devicetree/bindings/i2c/apple,i2c.yaml b/Documentation/devicetree/bindings/i2c/apple,i2c.yaml index 22fc8483256f..82b953181a52 100644 --- a/Documentation/devicetree/bindings/i2c/apple,i2c.yaml +++ b/Documentation/devicetree/bindings/i2c/apple,i2c.yaml @@ -20,9 +20,9 @@ allOf: properties: compatible: - enum: - - apple,t8103-i2c - - apple,i2c + items: + - const: apple,t8103-i2c + - const: apple,i2c reg: maxItems: 1 @@ -51,7 +51,7 @@ unevaluatedProperties: false examples: - | i2c@35010000 { - compatible = "apple,t8103-i2c"; + compatible = "apple,t8103-i2c", "apple,i2c"; reg = <0x35010000 0x4000>; interrupt-parent = <&aic>; interrupts = <0 627 4>; From 0668639eaf14813a39a8d3e0e6597d568581d4ea Mon Sep 17 00:00:00 2001 From: Janne Grunau Date: Mon, 22 Nov 2021 23:24:40 +0100 Subject: [PATCH 118/868] arm64: dts: apple: add #interrupt-cells property to pinctrl nodes Required for devices trying to use pinctrl devices as interrupt controller. Fixes: 0a8282b83119 ("arm64: apple: Add pinctrl nodes") Signed-off-by: Janne Grunau Cc: Mark Kettenis Reviewed-by: Sven Peter Reviewed-by: Mark Kettenis Tested-by: Hector Martin Signed-off-by: Hector Martin --- arch/arm64/boot/dts/apple/t8103.dtsi | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm64/boot/dts/apple/t8103.dtsi b/arch/arm64/boot/dts/apple/t8103.dtsi index fc8b2bb06ffe..c320c8baeb41 100644 --- a/arch/arm64/boot/dts/apple/t8103.dtsi +++ b/arch/arm64/boot/dts/apple/t8103.dtsi @@ -143,6 +143,7 @@ apple,npins = <212>; interrupt-controller; + #interrupt-cells = <2>; interrupt-parent = <&aic>; interrupts = , , @@ -169,6 +170,7 @@ apple,npins = <42>; interrupt-controller; + #interrupt-cells = <2>; interrupt-parent = <&aic>; interrupts = , , @@ -189,6 +191,7 @@ apple,npins = <23>; interrupt-controller; + #interrupt-cells = <2>; interrupt-parent = <&aic>; interrupts = , , @@ -209,6 +212,7 @@ apple,npins = <16>; interrupt-controller; + #interrupt-cells = <2>; interrupt-parent = <&aic>; interrupts = , , From 4739d88ad8e1900f809f8a5c98f3c1b65bf76220 Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Tue, 30 Nov 2021 16:31:10 +0000 Subject: [PATCH 119/868] ASoC: qdsp6: q6routing: Fix return value from msm_routing_put_audio_mixer msm_routing_put_audio_mixer() can return incorrect value in various scenarios. scenario 1: amixer cset iface=MIXER,name='SLIMBUS_0_RX Audio Mixer MultiMedia1' 1 amixer cset iface=MIXER,name='SLIMBUS_0_RX Audio Mixer MultiMedia1' 0 return value is 0 instead of 1 eventhough value was changed scenario 2: amixer cset iface=MIXER,name='SLIMBUS_0_RX Audio Mixer MultiMedia1' 1 amixer cset iface=MIXER,name='SLIMBUS_0_RX Audio Mixer MultiMedia1' 1 return value is 1 instead of 0 eventhough the value was not changed scenario 3: amixer cset iface=MIXER,name='SLIMBUS_0_RX Audio Mixer MultiMedia1' 0 return value is 1 instead of 0 eventhough the value was not changed Fix this by adding checks, so that change notifications are sent correctly. Fixes: e3a33673e845 ("ASoC: qdsp6: q6routing: Add q6routing driver") Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20211130163110.5628-1-srinivas.kandagatla@linaro.org Signed-off-by: Mark Brown --- sound/soc/qcom/qdsp6/q6routing.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/sound/soc/qcom/qdsp6/q6routing.c b/sound/soc/qcom/qdsp6/q6routing.c index cd74681e811e..928fd23e2c27 100644 --- a/sound/soc/qcom/qdsp6/q6routing.c +++ b/sound/soc/qcom/qdsp6/q6routing.c @@ -498,14 +498,16 @@ static int msm_routing_put_audio_mixer(struct snd_kcontrol *kcontrol, struct session_data *session = &data->sessions[session_id]; if (ucontrol->value.integer.value[0]) { + if (session->port_id == be_id) + return 0; + session->port_id = be_id; snd_soc_dapm_mixer_update_power(dapm, kcontrol, 1, update); } else { - if (session->port_id == be_id) { - session->port_id = -1; + if (session->port_id == -1 || session->port_id != be_id) return 0; - } + session->port_id = -1; snd_soc_dapm_mixer_update_power(dapm, kcontrol, 0, update); } From 23ba28616d3063bd4c4953598ed5e439ca891101 Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Tue, 30 Nov 2021 16:05:04 +0000 Subject: [PATCH 120/868] ASoC: codecs: wcd934x: handle channel mappping list correctly Currently each channel is added as list to dai channel list, however there is danger of adding same channel to multiple dai channel list which endups corrupting the other list where its already added. This patch ensures that the channel is actually free before adding to the dai channel list and also ensures that the channel is on the list before deleting it. This check was missing previously, and we did not hit this issue as we were testing very simple usecases with sequence of amixer commands. Fixes: a70d9245759a ("ASoC: wcd934x: add capture dapm widgets") Fixes: dd9eb19b5673 ("ASoC: wcd934x: add playback dapm widgets") Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20211130160507.22180-2-srinivas.kandagatla@linaro.org Signed-off-by: Mark Brown --- sound/soc/codecs/wcd934x.c | 119 +++++++++++++++++++++++++++---------- 1 file changed, 88 insertions(+), 31 deletions(-) diff --git a/sound/soc/codecs/wcd934x.c b/sound/soc/codecs/wcd934x.c index 4f568abd59e2..eb4e2f2a24ae 100644 --- a/sound/soc/codecs/wcd934x.c +++ b/sound/soc/codecs/wcd934x.c @@ -3326,6 +3326,31 @@ static int slim_rx_mux_get(struct snd_kcontrol *kc, return 0; } +static int slim_rx_mux_to_dai_id(int mux) +{ + int aif_id; + + switch (mux) { + case 1: + aif_id = AIF1_PB; + break; + case 2: + aif_id = AIF2_PB; + break; + case 3: + aif_id = AIF3_PB; + break; + case 4: + aif_id = AIF4_PB; + break; + default: + aif_id = -1; + break; + } + + return aif_id; +} + static int slim_rx_mux_put(struct snd_kcontrol *kc, struct snd_ctl_elem_value *ucontrol) { @@ -3333,43 +3358,59 @@ static int slim_rx_mux_put(struct snd_kcontrol *kc, struct wcd934x_codec *wcd = dev_get_drvdata(w->dapm->dev); struct soc_enum *e = (struct soc_enum *)kc->private_value; struct snd_soc_dapm_update *update = NULL; + struct wcd934x_slim_ch *ch, *c; u32 port_id = w->shift; + bool found = false; + int mux_idx; + int prev_mux_idx = wcd->rx_port_value[port_id]; + int aif_id; - if (wcd->rx_port_value[port_id] == ucontrol->value.enumerated.item[0]) + mux_idx = ucontrol->value.enumerated.item[0]; + + if (mux_idx == prev_mux_idx) return 0; - wcd->rx_port_value[port_id] = ucontrol->value.enumerated.item[0]; - - switch (wcd->rx_port_value[port_id]) { + switch(mux_idx) { case 0: - list_del_init(&wcd->rx_chs[port_id].list); + aif_id = slim_rx_mux_to_dai_id(prev_mux_idx); + if (aif_id < 0) + return 0; + + list_for_each_entry_safe(ch, c, &wcd->dai[aif_id].slim_ch_list, list) { + if (ch->port == port_id + WCD934X_RX_START) { + found = true; + list_del_init(&ch->list); + break; + } + } + if (!found) + return 0; + break; - case 1: - list_add_tail(&wcd->rx_chs[port_id].list, - &wcd->dai[AIF1_PB].slim_ch_list); - break; - case 2: - list_add_tail(&wcd->rx_chs[port_id].list, - &wcd->dai[AIF2_PB].slim_ch_list); - break; - case 3: - list_add_tail(&wcd->rx_chs[port_id].list, - &wcd->dai[AIF3_PB].slim_ch_list); - break; - case 4: - list_add_tail(&wcd->rx_chs[port_id].list, - &wcd->dai[AIF4_PB].slim_ch_list); + case 1 ... 4: + aif_id = slim_rx_mux_to_dai_id(mux_idx); + if (aif_id < 0) + return 0; + + if (list_empty(&wcd->rx_chs[port_id].list)) { + list_add_tail(&wcd->rx_chs[port_id].list, + &wcd->dai[aif_id].slim_ch_list); + } else { + dev_err(wcd->dev ,"SLIM_RX%d PORT is busy\n", port_id); + return 0; + } break; + default: - dev_err(wcd->dev, "Unknown AIF %d\n", - wcd->rx_port_value[port_id]); + dev_err(wcd->dev, "Unknown AIF %d\n", mux_idx); goto err; } + wcd->rx_port_value[port_id] = mux_idx; snd_soc_dapm_mux_update_power(w->dapm, kc, wcd->rx_port_value[port_id], e, update); - return 0; + return 1; err: return -EINVAL; } @@ -3815,6 +3856,7 @@ static int slim_tx_mixer_put(struct snd_kcontrol *kc, struct soc_mixer_control *mixer = (struct soc_mixer_control *)kc->private_value; int enable = ucontrol->value.integer.value[0]; + struct wcd934x_slim_ch *ch, *c; int dai_id = widget->shift; int port_id = mixer->shift; @@ -3822,17 +3864,32 @@ static int slim_tx_mixer_put(struct snd_kcontrol *kc, if (enable == wcd->tx_port_value[port_id]) return 0; + if (enable) { + if (list_empty(&wcd->tx_chs[port_id].list)) { + list_add_tail(&wcd->tx_chs[port_id].list, + &wcd->dai[dai_id].slim_ch_list); + } else { + dev_err(wcd->dev ,"SLIM_TX%d PORT is busy\n", port_id); + return 0; + } + } else { + bool found = false; + + list_for_each_entry_safe(ch, c, &wcd->dai[dai_id].slim_ch_list, list) { + if (ch->port == port_id) { + found = true; + list_del_init(&wcd->tx_chs[port_id].list); + break; + } + } + if (!found) + return 0; + } + wcd->tx_port_value[port_id] = enable; - - if (enable) - list_add_tail(&wcd->tx_chs[port_id].list, - &wcd->dai[dai_id].slim_ch_list); - else - list_del_init(&wcd->tx_chs[port_id].list); - snd_soc_dapm_mixer_update_power(widget->dapm, kc, enable, update); - return 0; + return 1; } static const struct snd_kcontrol_new aif1_slim_cap_mixer[] = { From d9be0ff4796d1b6f5ee391c1b7e3653a43cedfab Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Tue, 30 Nov 2021 16:05:06 +0000 Subject: [PATCH 121/868] ASoC: codecs: wcd934x: return correct value from mixer put wcd934x_compander_set() currently returns zero eventhough it changes the value. Fix this, so that change notifications are sent correctly. Fixes: 1cde8b822332 ("ASoC: wcd934x: add basic controls") Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20211130160507.22180-4-srinivas.kandagatla@linaro.org Signed-off-by: Mark Brown --- sound/soc/codecs/wcd934x.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/sound/soc/codecs/wcd934x.c b/sound/soc/codecs/wcd934x.c index eb4e2f2a24ae..e63c6b723d76 100644 --- a/sound/soc/codecs/wcd934x.c +++ b/sound/soc/codecs/wcd934x.c @@ -3256,6 +3256,9 @@ static int wcd934x_compander_set(struct snd_kcontrol *kc, int value = ucontrol->value.integer.value[0]; int sel; + if (wcd->comp_enabled[comp] == value) + return 0; + wcd->comp_enabled[comp] = value; sel = value ? WCD934X_HPH_GAIN_SRC_SEL_COMPANDER : WCD934X_HPH_GAIN_SRC_SEL_REGISTER; @@ -3279,10 +3282,10 @@ static int wcd934x_compander_set(struct snd_kcontrol *kc, case COMPANDER_8: break; default: - break; + return 0; } - return 0; + return 1; } static int wcd934x_rx_hph_mode_get(struct snd_kcontrol *kc, From 3fc27e9a1f619b50700f020e6cd270c1b74755f0 Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Tue, 30 Nov 2021 16:05:07 +0000 Subject: [PATCH 122/868] ASoC: codecs: wsa881x: fix return values from kcontrol put wsa881x_set_port() and wsa881x_put_pa_gain() currently returns zero eventhough it changes the value. Fix this, so that change notifications are sent correctly. Fixes: a0aab9e1404a ("ASoC: codecs: add wsa881x amplifier support") Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20211130160507.22180-5-srinivas.kandagatla@linaro.org Signed-off-by: Mark Brown --- sound/soc/codecs/wsa881x.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/sound/soc/codecs/wsa881x.c b/sound/soc/codecs/wsa881x.c index 2da4a5fa7a18..564b78f3cdd0 100644 --- a/sound/soc/codecs/wsa881x.c +++ b/sound/soc/codecs/wsa881x.c @@ -772,7 +772,8 @@ static int wsa881x_put_pa_gain(struct snd_kcontrol *kc, usleep_range(1000, 1010); } - return 0; + + return 1; } static int wsa881x_get_port(struct snd_kcontrol *kcontrol, @@ -816,15 +817,22 @@ static int wsa881x_set_port(struct snd_kcontrol *kcontrol, (struct soc_mixer_control *)kcontrol->private_value; int portidx = mixer->reg; - if (ucontrol->value.integer.value[0]) + if (ucontrol->value.integer.value[0]) { + if (data->port_enable[portidx]) + return 0; + data->port_enable[portidx] = true; - else + } else { + if (!data->port_enable[portidx]) + return 0; + data->port_enable[portidx] = false; + } if (portidx == WSA881X_PORT_BOOST) /* Boost Switch */ wsa881x_boost_ctrl(comp, data->port_enable[portidx]); - return 0; + return 1; } static const char * const smart_boost_lvl_text[] = { From 973e5245637accc4002843f6b888495a6a7762bc Mon Sep 17 00:00:00 2001 From: Hu Weiwen Date: Mon, 22 Nov 2021 22:22:12 +0800 Subject: [PATCH 123/868] ceph: fix duplicate increment of opened_inodes metric opened_inodes is incremented twice when the same inode is opened twice with O_RDONLY and O_WRONLY respectively. To reproduce, run this python script, then check the metrics: import os for _ in range(10000): fd_r = os.open('a', os.O_RDONLY) fd_w = os.open('a', os.O_WRONLY) os.close(fd_r) os.close(fd_w) Fixes: 1dd8d4708136 ("ceph: metrics for opened files, pinned caps and opened inodes") Signed-off-by: Hu Weiwen Reviewed-by: Xiubo Li Signed-off-by: Ilya Dryomov --- fs/ceph/caps.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index b9460b6fb76f..c447fa2e2d1f 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -4350,7 +4350,7 @@ void ceph_get_fmode(struct ceph_inode_info *ci, int fmode, int count) { struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(ci->vfs_inode.i_sb); int bits = (fmode << 1) | 1; - bool is_opened = false; + bool already_opened = false; int i; if (count == 1) @@ -4358,19 +4358,19 @@ void ceph_get_fmode(struct ceph_inode_info *ci, int fmode, int count) spin_lock(&ci->i_ceph_lock); for (i = 0; i < CEPH_FILE_MODE_BITS; i++) { - if (bits & (1 << i)) - ci->i_nr_by_mode[i] += count; - /* - * If any of the mode ref is larger than 1, + * If any of the mode ref is larger than 0, * that means it has been already opened by * others. Just skip checking the PIN ref. */ - if (i && ci->i_nr_by_mode[i] > 1) - is_opened = true; + if (i && ci->i_nr_by_mode[i]) + already_opened = true; + + if (bits & (1 << i)) + ci->i_nr_by_mode[i] += count; } - if (!is_opened) + if (!already_opened) percpu_counter_inc(&mdsc->metric.opened_inodes); spin_unlock(&ci->i_ceph_lock); } From e485d028bb1075d6167558b47f63e10713ad2034 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 23 Nov 2021 07:30:38 -0500 Subject: [PATCH 124/868] ceph: initialize i_size variable in ceph_sync_read Newer compilers seem to determine that this variable being uninitialized isn't a problem, but older compilers (from the RHEL8 era) seem to choke on it and complain that it could be used uninitialized. Go ahead and initialize the variable at declaration time to silence potential compiler warnings. Fixes: c3d8e0b5de48 ("ceph: return the real size read when it hits EOF") Signed-off-by: Jeff Layton Reviewed-by: Xiubo Li Signed-off-by: Ilya Dryomov --- fs/ceph/file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 02a0a0fd9ccd..b24442e27e4e 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -847,7 +847,7 @@ static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *to, ssize_t ret; u64 off = iocb->ki_pos; u64 len = iov_iter_count(to); - u64 i_size; + u64 i_size = i_size_read(inode); dout("sync_read on file %p %llu~%u %s\n", file, off, (unsigned)len, (file->f_flags & O_DIRECT) ? "O_DIRECT" : ""); From ee2a095d3b24f300a5e11944d208801e928f108c Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Tue, 30 Nov 2021 19:20:34 +0800 Subject: [PATCH 125/868] ceph: initialize pathlen variable in reconnect_caps_cb The smatch static checker warned about an uninitialized symbol usage in this function, in the case where ceph_mdsc_build_path returns an error. It turns out that that case is harmless, but it just looks sketchy. Initialize the variable at declaration time, and remove the unneeded setting of it later. Fixes: a33f6432b3a6 ("ceph: encode inodes' parent/d_name in cap reconnect message") Reported-by: Dan Carpenter Signed-off-by: Xiubo Li Reviewed-by: Jeff Layton Signed-off-by: Ilya Dryomov --- fs/ceph/mds_client.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 250aad330a10..c30eefc0ac19 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -3683,7 +3683,7 @@ static int reconnect_caps_cb(struct inode *inode, struct ceph_cap *cap, struct ceph_pagelist *pagelist = recon_state->pagelist; struct dentry *dentry; char *path; - int pathlen, err; + int pathlen = 0, err; u64 pathbase; u64 snap_follows; @@ -3703,7 +3703,6 @@ static int reconnect_caps_cb(struct inode *inode, struct ceph_cap *cap, } } else { path = NULL; - pathlen = 0; pathbase = 0; } From fd84bfdddd169c219c3a637889a8b87f70a072c2 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Mon, 29 Nov 2021 12:16:39 +0100 Subject: [PATCH 126/868] ceph: fix up non-directory creation in SGID directories Ceph always inherits the SGID bit if it is set on the parent inode, while the generic inode_init_owner does not do this in a few cases where it can create a possible security problem (cf. [1]). Update ceph to strip the SGID bit just as inode_init_owner would. This bug was detected by the mapped mount testsuite in [3]. The testsuite tests all core VFS functionality and semantics with and without mapped mounts. That is to say it functions as a generic VFS testsuite in addition to a mapped mount testsuite. While working on mapped mount support for ceph, SIGD inheritance was the only failing test for ceph after the port. The same bug was detected by the mapped mount testsuite in XFS in January 2021 (cf. [2]). [1]: commit 0fa3ecd87848 ("Fix up non-directory creation in SGID directories") [2]: commit 01ea173e103e ("xfs: fix up non-directory creation in SGID directories") [3]: https://git.kernel.org/fs/xfs/xfstests-dev.git Cc: stable@vger.kernel.org Signed-off-by: Christian Brauner Reviewed-by: Jeff Layton Signed-off-by: Ilya Dryomov --- fs/ceph/file.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/fs/ceph/file.c b/fs/ceph/file.c index b24442e27e4e..c138e8126286 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -605,13 +605,25 @@ static int ceph_finish_async_create(struct inode *dir, struct dentry *dentry, in.cap.realm = cpu_to_le64(ci->i_snap_realm->ino); in.cap.flags = CEPH_CAP_FLAG_AUTH; in.ctime = in.mtime = in.atime = iinfo.btime; - in.mode = cpu_to_le32((u32)mode); in.truncate_seq = cpu_to_le32(1); in.truncate_size = cpu_to_le64(-1ULL); in.xattr_version = cpu_to_le64(1); in.uid = cpu_to_le32(from_kuid(&init_user_ns, current_fsuid())); - in.gid = cpu_to_le32(from_kgid(&init_user_ns, dir->i_mode & S_ISGID ? - dir->i_gid : current_fsgid())); + if (dir->i_mode & S_ISGID) { + in.gid = cpu_to_le32(from_kgid(&init_user_ns, dir->i_gid)); + + /* Directories always inherit the setgid bit. */ + if (S_ISDIR(mode)) + mode |= S_ISGID; + else if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP) && + !in_group_p(dir->i_gid) && + !capable_wrt_inode_uidgid(&init_user_ns, dir, CAP_FSETID)) + mode &= ~S_ISGID; + } else { + in.gid = cpu_to_le32(from_kgid(&init_user_ns, current_fsgid())); + } + in.mode = cpu_to_le32((u32)mode); + in.nlink = cpu_to_le32(1); in.max_size = cpu_to_le64(lo->stripe_unit); From 7e4dcc13965c57869684d57a1dc6dd7be589488c Mon Sep 17 00:00:00 2001 From: Mitch Williams Date: Fri, 4 Jun 2021 09:53:28 -0700 Subject: [PATCH 127/868] iavf: restore MSI state on reset If the PF experiences an FLR, the VF's MSI and MSI-X configuration will be conveniently and silently removed in the process. When this happens, reset recovery will appear to complete normally but no traffic will pass. The netdev watchdog will helpfully notify everyone of this issue. To prevent such public embarrassment, restore MSI configuration at every reset. For normal resets, this will do no harm, but for VF resets resulting from a PF FLR, this will keep the VF working. Fixes: 5eae00c57f5e ("i40evf: main driver core") Signed-off-by: Mitch Williams Tested-by: George Kuruvinakunnel Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/iavf/iavf_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index 14934a7a13ef..cfdbf8c08d18 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -2248,6 +2248,7 @@ static void iavf_reset_task(struct work_struct *work) } pci_set_master(adapter->pdev); + pci_restore_msi_state(adapter->pdev); if (i == IAVF_RESET_WAIT_COMPLETE_COUNT) { dev_err(&adapter->pdev->dev, "Reset never finished (%x)\n", From cc5faf26decfcfd9edbafee9b7204ac9a9514c12 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 25 Nov 2021 16:21:54 +0100 Subject: [PATCH 128/868] dt-bindings: iio: adc: exynos-adc: Fix node name in example "make dt_binding_check": Documentation/devicetree/bindings/iio/adc/samsung,exynos-adc.example.dt.yaml: ncp15wb473: $nodename:0: 'ncp15wb473' does not match '^thermistor(.*)?$' From schema: Documentation/devicetree/bindings/hwmon/ntc-thermistor.yaml Signed-off-by: Geert Uytterhoeven Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20211125152154.162780-1-geert@linux-m68k.org Signed-off-by: Rob Herring --- .../devicetree/bindings/iio/adc/samsung,exynos-adc.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/iio/adc/samsung,exynos-adc.yaml b/Documentation/devicetree/bindings/iio/adc/samsung,exynos-adc.yaml index c65921e66dc1..81c87295912c 100644 --- a/Documentation/devicetree/bindings/iio/adc/samsung,exynos-adc.yaml +++ b/Documentation/devicetree/bindings/iio/adc/samsung,exynos-adc.yaml @@ -136,7 +136,7 @@ examples: samsung,syscon-phandle = <&pmu_system_controller>; /* NTC thermistor is a hwmon device */ - ncp15wb473 { + thermistor { compatible = "murata,ncp15wb473"; pullup-uv = <1800000>; pullup-ohm = <47000>; From 39bd54d43b3f8b3c7b3a75f5d868d8bb858860e7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Beh=C3=BAn?= Date: Thu, 25 Nov 2021 17:01:48 +0100 Subject: [PATCH 129/868] Revert "PCI: aardvark: Fix support for PCI_ROM_ADDRESS1 on emulated bridge" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 239edf686c14a9ff926dec2f350289ed7adfefe2. 239edf686c14 ("PCI: aardvark: Fix support for PCI_ROM_ADDRESS1 on emulated bridge") added support for the Type 1 Expansion ROM BAR at config offset 0x38, based on the register being listed in the Marvell Armada A3720 spec. But the spec doesn't document it at all for RC mode, and there is no ROM in the SOC, so remove this emulation for now. The PCI bridge which represents aardvark's PCIe Root Port has an Expansion ROM Base Address register at offset 0x30, but its meaning is different than PCI's Expansion ROM BAR register, although the layout is the same. (This is why we thought it does the same thing.) First: there is no ROM (or part of BootROM) in the A3720 SOC dedicated for PCIe Root Port (or controller in RC mode) containing executable code that would initialize the Root Port, suitable for execution in bootloader (this is how Expansion ROM BAR is used on x86). Second: in A3720 spec the register (address 0xD0070030) is not documented at all for Root Complex mode, but similar to other BAR registers, it has an "entangled partner" in register 0xD0075920, which does address translation for the BAR in 0xD0070030: - the BAR register sets the address from the view of PCIe bus - the translation register sets the address from the view of the CPU The other BAR registers also have this entangled partner, and they can be used to: - in RC mode: address-checking on the receive side of the RC (they can define address ranges for memory accesses from remote Endpoints to the RC) - in Endpoint mode: allow the remote CPU to access memory on A3720 The Expansion ROM BAR has only the Endpoint part documented, but from the similarities we think that it can also be used in RC mode in that way. So either Expansion ROM BAR has different meaning (if the hypothesis above is true), or we don't know it's meaning (since it is not documented for RC mode). Remove the register from the emulated bridge accessing functions. [bhelgaas: summarize reason for removal (first paragraph)] Fixes: 239edf686c14 ("PCI: aardvark: Fix support for PCI_ROM_ADDRESS1 on emulated bridge") Link: https://lore.kernel.org/r/20211125160148.26029-3-kabel@kernel.org Signed-off-by: Marek Behún Signed-off-by: Bjorn Helgaas Reviewed-by: Pali Rohár --- drivers/pci/controller/pci-aardvark.c | 9 --------- 1 file changed, 9 deletions(-) diff --git a/drivers/pci/controller/pci-aardvark.c b/drivers/pci/controller/pci-aardvark.c index c5300d49807a..c3b725afa11f 100644 --- a/drivers/pci/controller/pci-aardvark.c +++ b/drivers/pci/controller/pci-aardvark.c @@ -32,7 +32,6 @@ #define PCIE_CORE_DEV_ID_REG 0x0 #define PCIE_CORE_CMD_STATUS_REG 0x4 #define PCIE_CORE_DEV_REV_REG 0x8 -#define PCIE_CORE_EXP_ROM_BAR_REG 0x30 #define PCIE_CORE_PCIEXP_CAP 0xc0 #define PCIE_CORE_ERR_CAPCTL_REG 0x118 #define PCIE_CORE_ERR_CAPCTL_ECRC_CHK_TX BIT(5) @@ -774,10 +773,6 @@ advk_pci_bridge_emul_base_conf_read(struct pci_bridge_emul *bridge, *value = advk_readl(pcie, PCIE_CORE_CMD_STATUS_REG); return PCI_BRIDGE_EMUL_HANDLED; - case PCI_ROM_ADDRESS1: - *value = advk_readl(pcie, PCIE_CORE_EXP_ROM_BAR_REG); - return PCI_BRIDGE_EMUL_HANDLED; - case PCI_INTERRUPT_LINE: { /* * From the whole 32bit register we support reading from HW only @@ -810,10 +805,6 @@ advk_pci_bridge_emul_base_conf_write(struct pci_bridge_emul *bridge, advk_writel(pcie, new, PCIE_CORE_CMD_STATUS_REG); break; - case PCI_ROM_ADDRESS1: - advk_writel(pcie, new, PCIE_CORE_EXP_ROM_BAR_REG); - break; - case PCI_INTERRUPT_LINE: if (mask & (PCI_BRIDGE_CTL_BUS_RESET << 16)) { u32 val = advk_readl(pcie, PCIE_CORE_CTRL1_REG); From 9d2479c960875ca1239bcb899f386970c13d9cfe Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 1 Dec 2021 08:36:04 +0100 Subject: [PATCH 130/868] ALSA: pcm: oss: Fix negative period/buffer sizes The period size calculation in OSS layer may receive a negative value as an error, but the code there assumes only the positive values and handle them with size_t. Due to that, a too big value may be passed to the lower layers. This patch changes the code to handle with ssize_t and adds the proper error checks appropriately. Reported-by: syzbot+bb348e9f9a954d42746f@syzkaller.appspotmail.com Reported-by: Bixuan Cui Cc: Link: https://lore.kernel.org/r/1638270978-42412-1-git-send-email-cuibixuan@linux.alibaba.com Link: https://lore.kernel.org/r/20211201073606.11660-2-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/core/oss/pcm_oss.c | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/sound/core/oss/pcm_oss.c b/sound/core/oss/pcm_oss.c index 82a818734a5f..bec7590bc84b 100644 --- a/sound/core/oss/pcm_oss.c +++ b/sound/core/oss/pcm_oss.c @@ -147,7 +147,7 @@ snd_pcm_hw_param_value_min(const struct snd_pcm_hw_params *params, * * Return the maximum value for field PAR. */ -static unsigned int +static int snd_pcm_hw_param_value_max(const struct snd_pcm_hw_params *params, snd_pcm_hw_param_t var, int *dir) { @@ -682,18 +682,24 @@ static int snd_pcm_oss_period_size(struct snd_pcm_substream *substream, struct snd_pcm_hw_params *oss_params, struct snd_pcm_hw_params *slave_params) { - size_t s; - size_t oss_buffer_size, oss_period_size, oss_periods; - size_t min_period_size, max_period_size; + ssize_t s; + ssize_t oss_buffer_size; + ssize_t oss_period_size, oss_periods; + ssize_t min_period_size, max_period_size; struct snd_pcm_runtime *runtime = substream->runtime; size_t oss_frame_size; oss_frame_size = snd_pcm_format_physical_width(params_format(oss_params)) * params_channels(oss_params) / 8; + oss_buffer_size = snd_pcm_hw_param_value_max(slave_params, + SNDRV_PCM_HW_PARAM_BUFFER_SIZE, + NULL); + if (oss_buffer_size <= 0) + return -EINVAL; oss_buffer_size = snd_pcm_plug_client_size(substream, - snd_pcm_hw_param_value_max(slave_params, SNDRV_PCM_HW_PARAM_BUFFER_SIZE, NULL)) * oss_frame_size; - if (!oss_buffer_size) + oss_buffer_size * oss_frame_size); + if (oss_buffer_size <= 0) return -EINVAL; oss_buffer_size = rounddown_pow_of_two(oss_buffer_size); if (atomic_read(&substream->mmap_count)) { @@ -730,7 +736,7 @@ static int snd_pcm_oss_period_size(struct snd_pcm_substream *substream, min_period_size = snd_pcm_plug_client_size(substream, snd_pcm_hw_param_value_min(slave_params, SNDRV_PCM_HW_PARAM_PERIOD_SIZE, NULL)); - if (min_period_size) { + if (min_period_size > 0) { min_period_size *= oss_frame_size; min_period_size = roundup_pow_of_two(min_period_size); if (oss_period_size < min_period_size) @@ -739,7 +745,7 @@ static int snd_pcm_oss_period_size(struct snd_pcm_substream *substream, max_period_size = snd_pcm_plug_client_size(substream, snd_pcm_hw_param_value_max(slave_params, SNDRV_PCM_HW_PARAM_PERIOD_SIZE, NULL)); - if (max_period_size) { + if (max_period_size > 0) { max_period_size *= oss_frame_size; max_period_size = rounddown_pow_of_two(max_period_size); if (oss_period_size > max_period_size) @@ -752,7 +758,7 @@ static int snd_pcm_oss_period_size(struct snd_pcm_substream *substream, oss_periods = substream->oss.setup.periods; s = snd_pcm_hw_param_value_max(slave_params, SNDRV_PCM_HW_PARAM_PERIODS, NULL); - if (runtime->oss.maxfrags && s > runtime->oss.maxfrags) + if (s > 0 && runtime->oss.maxfrags && s > runtime->oss.maxfrags) s = runtime->oss.maxfrags; if (oss_periods > s) oss_periods = s; From 8839c8c0f77ab8fc0463f4ab8b37fca3f70677c2 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 1 Dec 2021 08:36:05 +0100 Subject: [PATCH 131/868] ALSA: pcm: oss: Limit the period size to 16MB Set the practical limit to the period size (the fragment shift in OSS) instead of a full 31bit; a too large value could lead to the exhaust of memory as we allocate temporary buffers of the period size, too. As of this patch, we set to 16MB limit, which should cover all use cases. Reported-by: syzbot+bb348e9f9a954d42746f@syzkaller.appspotmail.com Reported-by: Bixuan Cui Cc: Link: https://lore.kernel.org/r/1638270978-42412-1-git-send-email-cuibixuan@linux.alibaba.com Link: https://lore.kernel.org/r/20211201073606.11660-3-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/core/oss/pcm_oss.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/core/oss/pcm_oss.c b/sound/core/oss/pcm_oss.c index bec7590bc84b..89c4910daf02 100644 --- a/sound/core/oss/pcm_oss.c +++ b/sound/core/oss/pcm_oss.c @@ -1962,7 +1962,7 @@ static int snd_pcm_oss_set_fragment1(struct snd_pcm_substream *substream, unsign if (runtime->oss.subdivision || runtime->oss.fragshift) return -EINVAL; fragshift = val & 0xffff; - if (fragshift >= 31) + if (fragshift >= 25) /* should be large enough */ return -EINVAL; runtime->oss.fragshift = fragshift; runtime->oss.maxfrags = (val >> 16) & 0xffff; From 6665bb30a6b1a4a853d52557c05482ee50e71391 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 1 Dec 2021 08:36:06 +0100 Subject: [PATCH 132/868] ALSA: pcm: oss: Handle missing errors in snd_pcm_oss_change_params*() A couple of calls in snd_pcm_oss_change_params_locked() ignore the possible errors. Catch those errors and abort the operation for avoiding further problems. Cc: Link: https://lore.kernel.org/r/20211201073606.11660-4-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/core/oss/pcm_oss.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/sound/core/oss/pcm_oss.c b/sound/core/oss/pcm_oss.c index 89c4910daf02..20a0a4771b9a 100644 --- a/sound/core/oss/pcm_oss.c +++ b/sound/core/oss/pcm_oss.c @@ -884,8 +884,15 @@ static int snd_pcm_oss_change_params_locked(struct snd_pcm_substream *substream) err = -EINVAL; goto failure; } - choose_rate(substream, sparams, runtime->oss.rate); - snd_pcm_hw_param_near(substream, sparams, SNDRV_PCM_HW_PARAM_CHANNELS, runtime->oss.channels, NULL); + + err = choose_rate(substream, sparams, runtime->oss.rate); + if (err < 0) + goto failure; + err = snd_pcm_hw_param_near(substream, sparams, + SNDRV_PCM_HW_PARAM_CHANNELS, + runtime->oss.channels, NULL); + if (err < 0) + goto failure; format = snd_pcm_oss_format_from(runtime->oss.format); From c5e0cbe2858d278a27d5b3fe31890aea5be064c4 Mon Sep 17 00:00:00 2001 From: Vladimir Murzin Date: Wed, 1 Dec 2021 11:02:58 +0000 Subject: [PATCH 133/868] irqchip: nvic: Fix offset for Interrupt Priority Offsets According to ARM(v7M) ARM Interrupt Priority Offsets located at 0xE000E400-0xE000E5EC, while 0xE000E300-0xE000E33C covers read-only Interrupt Active Bit Registers Fixes: 292ec080491d ("irqchip: Add support for ARMv7-M NVIC") Signed-off-by: Vladimir Murzin Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20211201110259.84857-1-vladimir.murzin@arm.com --- drivers/irqchip/irq-nvic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/irqchip/irq-nvic.c b/drivers/irqchip/irq-nvic.c index 63bac3f78863..ba4759b3e269 100644 --- a/drivers/irqchip/irq-nvic.c +++ b/drivers/irqchip/irq-nvic.c @@ -26,7 +26,7 @@ #define NVIC_ISER 0x000 #define NVIC_ICER 0x080 -#define NVIC_IPR 0x300 +#define NVIC_IPR 0x400 #define NVIC_MAX_BANKS 16 /* From f83baa0cb6cfc92ebaf7f9d3a99d7e34f2e77a8a Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 1 Dec 2021 19:35:01 +0100 Subject: [PATCH 134/868] HID: add hid_is_usb() function to make it simpler for USB detection A number of HID drivers already call hid_is_using_ll_driver() but only for the detection of if this is a USB device or not. Make this more obvious by creating hid_is_usb() and calling the function that way. Also converts the existing hid_is_using_ll_driver() functions to use the new call. Cc: Jiri Kosina Cc: Benjamin Tissoires Cc: linux-input@vger.kernel.org Cc: stable@vger.kernel.org Tested-by: Benjamin Tissoires Signed-off-by: Greg Kroah-Hartman Signed-off-by: Benjamin Tissoires Link: https://lore.kernel.org/r/20211201183503.2373082-1-gregkh@linuxfoundation.org --- drivers/hid/hid-asus.c | 6 ++---- drivers/hid/hid-logitech-dj.c | 2 +- drivers/hid/hid-u2fzero.c | 2 +- drivers/hid/hid-uclogic-params.c | 3 +-- drivers/hid/wacom_sys.c | 2 +- include/linux/hid.h | 5 +++++ 6 files changed, 11 insertions(+), 9 deletions(-) diff --git a/drivers/hid/hid-asus.c b/drivers/hid/hid-asus.c index f3ecddc519ee..08c9a9a60ae4 100644 --- a/drivers/hid/hid-asus.c +++ b/drivers/hid/hid-asus.c @@ -1028,8 +1028,7 @@ static int asus_probe(struct hid_device *hdev, const struct hid_device_id *id) if (drvdata->quirks & QUIRK_IS_MULTITOUCH) drvdata->tp = &asus_i2c_tp; - if ((drvdata->quirks & QUIRK_T100_KEYBOARD) && - hid_is_using_ll_driver(hdev, &usb_hid_driver)) { + if ((drvdata->quirks & QUIRK_T100_KEYBOARD) && hid_is_usb(hdev)) { struct usb_interface *intf = to_usb_interface(hdev->dev.parent); if (intf->altsetting->desc.bInterfaceNumber == T100_TPAD_INTF) { @@ -1057,8 +1056,7 @@ static int asus_probe(struct hid_device *hdev, const struct hid_device_id *id) drvdata->tp = &asus_t100chi_tp; } - if ((drvdata->quirks & QUIRK_MEDION_E1239T) && - hid_is_using_ll_driver(hdev, &usb_hid_driver)) { + if ((drvdata->quirks & QUIRK_MEDION_E1239T) && hid_is_usb(hdev)) { struct usb_host_interface *alt = to_usb_interface(hdev->dev.parent)->altsetting; diff --git a/drivers/hid/hid-logitech-dj.c b/drivers/hid/hid-logitech-dj.c index a0017b010c34..7106b921b53c 100644 --- a/drivers/hid/hid-logitech-dj.c +++ b/drivers/hid/hid-logitech-dj.c @@ -1777,7 +1777,7 @@ static int logi_dj_probe(struct hid_device *hdev, case recvr_type_bluetooth: no_dj_interfaces = 2; break; case recvr_type_dinovo: no_dj_interfaces = 2; break; } - if (hid_is_using_ll_driver(hdev, &usb_hid_driver)) { + if (hid_is_usb(hdev)) { intf = to_usb_interface(hdev->dev.parent); if (intf && intf->altsetting->desc.bInterfaceNumber >= no_dj_interfaces) { diff --git a/drivers/hid/hid-u2fzero.c b/drivers/hid/hid-u2fzero.c index 31ea7fc69916..ad489caf53ad 100644 --- a/drivers/hid/hid-u2fzero.c +++ b/drivers/hid/hid-u2fzero.c @@ -311,7 +311,7 @@ static int u2fzero_probe(struct hid_device *hdev, unsigned int minor; int ret; - if (!hid_is_using_ll_driver(hdev, &usb_hid_driver)) + if (!hid_is_usb(hdev)) return -EINVAL; dev = devm_kzalloc(&hdev->dev, sizeof(*dev), GFP_KERNEL); diff --git a/drivers/hid/hid-uclogic-params.c b/drivers/hid/hid-uclogic-params.c index 3d67b748a3b9..adff1bd68d9f 100644 --- a/drivers/hid/hid-uclogic-params.c +++ b/drivers/hid/hid-uclogic-params.c @@ -843,8 +843,7 @@ int uclogic_params_init(struct uclogic_params *params, struct uclogic_params p = {0, }; /* Check arguments */ - if (params == NULL || hdev == NULL || - !hid_is_using_ll_driver(hdev, &usb_hid_driver)) { + if (params == NULL || hdev == NULL || !hid_is_usb(hdev)) { rc = -EINVAL; goto cleanup; } diff --git a/drivers/hid/wacom_sys.c b/drivers/hid/wacom_sys.c index 2717d39600b4..22d73772fbc5 100644 --- a/drivers/hid/wacom_sys.c +++ b/drivers/hid/wacom_sys.c @@ -2214,7 +2214,7 @@ static void wacom_update_name(struct wacom *wacom, const char *suffix) if ((features->type == HID_GENERIC) && !strcmp("Wacom HID", features->name)) { char *product_name = wacom->hdev->name; - if (hid_is_using_ll_driver(wacom->hdev, &usb_hid_driver)) { + if (hid_is_usb(wacom->hdev)) { struct usb_interface *intf = to_usb_interface(wacom->hdev->dev.parent); struct usb_device *dev = interface_to_usbdev(intf); product_name = dev->product; diff --git a/include/linux/hid.h b/include/linux/hid.h index 9e067f937dbc..f453be385bd4 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -840,6 +840,11 @@ static inline bool hid_is_using_ll_driver(struct hid_device *hdev, return hdev->ll_driver == driver; } +static inline bool hid_is_usb(struct hid_device *hdev) +{ + return hid_is_using_ll_driver(hdev, &usb_hid_driver); +} + #define PM_HINT_FULLON 1<<5 #define PM_HINT_NORMAL 1<<1 From 720ac467204a70308bd687927ed475afb904e11b Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 1 Dec 2021 19:35:02 +0100 Subject: [PATCH 135/868] HID: wacom: fix problems when device is not a valid USB device The wacom driver accepts devices of more than just USB types, but some code paths can cause problems if the device being controlled is not a USB device due to a lack of checking. Add the needed checks to ensure that the USB device accesses are only happening on a "real" USB device, and not one on some other bus. Cc: Jiri Kosina Cc: Benjamin Tissoires Cc: linux-input@vger.kernel.org Cc: stable@vger.kernel.org Tested-by: Benjamin Tissoires Signed-off-by: Greg Kroah-Hartman Signed-off-by: Benjamin Tissoires Link: https://lore.kernel.org/r/20211201183503.2373082-2-gregkh@linuxfoundation.org --- drivers/hid/wacom_sys.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/drivers/hid/wacom_sys.c b/drivers/hid/wacom_sys.c index 22d73772fbc5..066c567dbaa2 100644 --- a/drivers/hid/wacom_sys.c +++ b/drivers/hid/wacom_sys.c @@ -726,7 +726,7 @@ static void wacom_retrieve_hid_descriptor(struct hid_device *hdev, * Skip the query for this type and modify defaults based on * interface number. */ - if (features->type == WIRELESS) { + if (features->type == WIRELESS && intf) { if (intf->cur_altsetting->desc.bInterfaceNumber == 0) features->device_type = WACOM_DEVICETYPE_WL_MONITOR; else @@ -2451,6 +2451,9 @@ static void wacom_wireless_work(struct work_struct *work) wacom_destroy_battery(wacom); + if (!usbdev) + return; + /* Stylus interface */ hdev1 = usb_get_intfdata(usbdev->config->interface[1]); wacom1 = hid_get_drvdata(hdev1); @@ -2730,8 +2733,6 @@ static void wacom_mode_change_work(struct work_struct *work) static int wacom_probe(struct hid_device *hdev, const struct hid_device_id *id) { - struct usb_interface *intf = to_usb_interface(hdev->dev.parent); - struct usb_device *dev = interface_to_usbdev(intf); struct wacom *wacom; struct wacom_wac *wacom_wac; struct wacom_features *features; @@ -2766,8 +2767,14 @@ static int wacom_probe(struct hid_device *hdev, wacom_wac->hid_data.inputmode = -1; wacom_wac->mode_report = -1; - wacom->usbdev = dev; - wacom->intf = intf; + if (hid_is_usb(hdev)) { + struct usb_interface *intf = to_usb_interface(hdev->dev.parent); + struct usb_device *dev = interface_to_usbdev(intf); + + wacom->usbdev = dev; + wacom->intf = intf; + } + mutex_init(&wacom->lock); INIT_DELAYED_WORK(&wacom->init_work, wacom_init_work); INIT_WORK(&wacom->wireless_work, wacom_wireless_work); From 93020953d0fa7035fd036ad87a47ae2b7aa4ae33 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 1 Dec 2021 19:35:03 +0100 Subject: [PATCH 136/868] HID: check for valid USB device for many HID drivers Many HID drivers assume that the HID device assigned to them is a USB device as that was the only way HID devices used to be able to be created in Linux. However, with the additional ways that HID devices can be created for many different bus types, that is no longer true, so properly check that we have a USB device associated with the HID device before allowing a driver that makes this assumption to claim it. Cc: Jiri Kosina Cc: Benjamin Tissoires Cc: Michael Zaidman Cc: Stefan Achatz Cc: Maxime Coquelin Cc: Alexandre Torgue Cc: linux-input@vger.kernel.org Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman Tested-by: Benjamin Tissoires [bentiss: amended for thrustmater.c hunk to apply] Signed-off-by: Benjamin Tissoires Link: https://lore.kernel.org/r/20211201183503.2373082-3-gregkh@linuxfoundation.org --- drivers/hid/hid-chicony.c | 3 +++ drivers/hid/hid-corsair.c | 7 ++++++- drivers/hid/hid-elan.c | 2 +- drivers/hid/hid-elo.c | 3 +++ drivers/hid/hid-ft260.c | 3 +++ drivers/hid/hid-holtek-kbd.c | 9 +++++++-- drivers/hid/hid-holtek-mouse.c | 9 +++++++++ drivers/hid/hid-lg.c | 10 ++++++++-- drivers/hid/hid-prodikeys.c | 10 ++++++++-- drivers/hid/hid-roccat-arvo.c | 3 +++ drivers/hid/hid-roccat-isku.c | 3 +++ drivers/hid/hid-roccat-kone.c | 3 +++ drivers/hid/hid-roccat-koneplus.c | 3 +++ drivers/hid/hid-roccat-konepure.c | 3 +++ drivers/hid/hid-roccat-kovaplus.c | 3 +++ drivers/hid/hid-roccat-lua.c | 3 +++ drivers/hid/hid-roccat-pyra.c | 3 +++ drivers/hid/hid-roccat-ryos.c | 3 +++ drivers/hid/hid-roccat-savu.c | 3 +++ drivers/hid/hid-samsung.c | 3 +++ drivers/hid/hid-sony.c | 6 +++++- drivers/hid/hid-thrustmaster.c | 3 +++ drivers/hid/hid-uclogic-core.c | 3 +++ 23 files changed, 92 insertions(+), 9 deletions(-) diff --git a/drivers/hid/hid-chicony.c b/drivers/hid/hid-chicony.c index ca556d39da2a..f04d2aa23efe 100644 --- a/drivers/hid/hid-chicony.c +++ b/drivers/hid/hid-chicony.c @@ -114,6 +114,9 @@ static int ch_probe(struct hid_device *hdev, const struct hid_device_id *id) { int ret; + if (!hid_is_usb(hdev)) + return -EINVAL; + hdev->quirks |= HID_QUIRK_INPUT_PER_APP; ret = hid_parse(hdev); if (ret) { diff --git a/drivers/hid/hid-corsair.c b/drivers/hid/hid-corsair.c index 902a60e249ed..8c895c820b67 100644 --- a/drivers/hid/hid-corsair.c +++ b/drivers/hid/hid-corsair.c @@ -553,7 +553,12 @@ static int corsair_probe(struct hid_device *dev, const struct hid_device_id *id) int ret; unsigned long quirks = id->driver_data; struct corsair_drvdata *drvdata; - struct usb_interface *usbif = to_usb_interface(dev->dev.parent); + struct usb_interface *usbif; + + if (!hid_is_usb(dev)) + return -EINVAL; + + usbif = to_usb_interface(dev->dev.parent); drvdata = devm_kzalloc(&dev->dev, sizeof(struct corsair_drvdata), GFP_KERNEL); diff --git a/drivers/hid/hid-elan.c b/drivers/hid/hid-elan.c index 021049805bb7..3091355d48df 100644 --- a/drivers/hid/hid-elan.c +++ b/drivers/hid/hid-elan.c @@ -50,7 +50,7 @@ struct elan_drvdata { static int is_not_elan_touchpad(struct hid_device *hdev) { - if (hdev->bus == BUS_USB) { + if (hid_is_usb(hdev)) { struct usb_interface *intf = to_usb_interface(hdev->dev.parent); return (intf->altsetting->desc.bInterfaceNumber != diff --git a/drivers/hid/hid-elo.c b/drivers/hid/hid-elo.c index 383dfda8c12f..8e960d7b233b 100644 --- a/drivers/hid/hid-elo.c +++ b/drivers/hid/hid-elo.c @@ -230,6 +230,9 @@ static int elo_probe(struct hid_device *hdev, const struct hid_device_id *id) int ret; struct usb_device *udev; + if (!hid_is_usb(hdev)) + return -EINVAL; + priv = kzalloc(sizeof(*priv), GFP_KERNEL); if (!priv) return -ENOMEM; diff --git a/drivers/hid/hid-ft260.c b/drivers/hid/hid-ft260.c index 8ee77f4afe9f..79505c64dbfe 100644 --- a/drivers/hid/hid-ft260.c +++ b/drivers/hid/hid-ft260.c @@ -915,6 +915,9 @@ static int ft260_probe(struct hid_device *hdev, const struct hid_device_id *id) struct ft260_get_chip_version_report version; int ret; + if (!hid_is_usb(hdev)) + return -EINVAL; + dev = devm_kzalloc(&hdev->dev, sizeof(*dev), GFP_KERNEL); if (!dev) return -ENOMEM; diff --git a/drivers/hid/hid-holtek-kbd.c b/drivers/hid/hid-holtek-kbd.c index 0a38e8e9bc78..403506b9697e 100644 --- a/drivers/hid/hid-holtek-kbd.c +++ b/drivers/hid/hid-holtek-kbd.c @@ -140,12 +140,17 @@ static int holtek_kbd_input_event(struct input_dev *dev, unsigned int type, static int holtek_kbd_probe(struct hid_device *hdev, const struct hid_device_id *id) { - struct usb_interface *intf = to_usb_interface(hdev->dev.parent); - int ret = hid_parse(hdev); + struct usb_interface *intf; + int ret; + if (!hid_is_usb(hdev)) + return -EINVAL; + + ret = hid_parse(hdev); if (!ret) ret = hid_hw_start(hdev, HID_CONNECT_DEFAULT); + intf = to_usb_interface(hdev->dev.parent); if (!ret && intf->cur_altsetting->desc.bInterfaceNumber == 1) { struct hid_input *hidinput; list_for_each_entry(hidinput, &hdev->inputs, list) { diff --git a/drivers/hid/hid-holtek-mouse.c b/drivers/hid/hid-holtek-mouse.c index 195b735b001d..b7172c48ef9f 100644 --- a/drivers/hid/hid-holtek-mouse.c +++ b/drivers/hid/hid-holtek-mouse.c @@ -62,6 +62,14 @@ static __u8 *holtek_mouse_report_fixup(struct hid_device *hdev, __u8 *rdesc, return rdesc; } +static int holtek_mouse_probe(struct hid_device *hdev, + const struct hid_device_id *id) +{ + if (!hid_is_usb(hdev)) + return -EINVAL; + return 0; +} + static const struct hid_device_id holtek_mouse_devices[] = { { HID_USB_DEVICE(USB_VENDOR_ID_HOLTEK_ALT, USB_DEVICE_ID_HOLTEK_ALT_MOUSE_A067) }, @@ -83,6 +91,7 @@ static struct hid_driver holtek_mouse_driver = { .name = "holtek_mouse", .id_table = holtek_mouse_devices, .report_fixup = holtek_mouse_report_fixup, + .probe = holtek_mouse_probe, }; module_hid_driver(holtek_mouse_driver); diff --git a/drivers/hid/hid-lg.c b/drivers/hid/hid-lg.c index d40af911df63..fb3f7258009c 100644 --- a/drivers/hid/hid-lg.c +++ b/drivers/hid/hid-lg.c @@ -749,12 +749,18 @@ static int lg_raw_event(struct hid_device *hdev, struct hid_report *report, static int lg_probe(struct hid_device *hdev, const struct hid_device_id *id) { - struct usb_interface *iface = to_usb_interface(hdev->dev.parent); - __u8 iface_num = iface->cur_altsetting->desc.bInterfaceNumber; + struct usb_interface *iface; + __u8 iface_num; unsigned int connect_mask = HID_CONNECT_DEFAULT; struct lg_drv_data *drv_data; int ret; + if (!hid_is_usb(hdev)) + return -EINVAL; + + iface = to_usb_interface(hdev->dev.parent); + iface_num = iface->cur_altsetting->desc.bInterfaceNumber; + /* G29 only work with the 1st interface */ if ((hdev->product == USB_DEVICE_ID_LOGITECH_G29_WHEEL) && (iface_num != 0)) { diff --git a/drivers/hid/hid-prodikeys.c b/drivers/hid/hid-prodikeys.c index 2666af02d5c1..e4e9471d0f1e 100644 --- a/drivers/hid/hid-prodikeys.c +++ b/drivers/hid/hid-prodikeys.c @@ -798,12 +798,18 @@ static int pk_raw_event(struct hid_device *hdev, struct hid_report *report, static int pk_probe(struct hid_device *hdev, const struct hid_device_id *id) { int ret; - struct usb_interface *intf = to_usb_interface(hdev->dev.parent); - unsigned short ifnum = intf->cur_altsetting->desc.bInterfaceNumber; + struct usb_interface *intf; + unsigned short ifnum; unsigned long quirks = id->driver_data; struct pk_device *pk; struct pcmidi_snd *pm = NULL; + if (!hid_is_usb(hdev)) + return -EINVAL; + + intf = to_usb_interface(hdev->dev.parent); + ifnum = intf->cur_altsetting->desc.bInterfaceNumber; + pk = kzalloc(sizeof(*pk), GFP_KERNEL); if (pk == NULL) { hid_err(hdev, "can't alloc descriptor\n"); diff --git a/drivers/hid/hid-roccat-arvo.c b/drivers/hid/hid-roccat-arvo.c index 4556d2a50f75..d94ee0539421 100644 --- a/drivers/hid/hid-roccat-arvo.c +++ b/drivers/hid/hid-roccat-arvo.c @@ -344,6 +344,9 @@ static int arvo_probe(struct hid_device *hdev, { int retval; + if (!hid_is_usb(hdev)) + return -EINVAL; + retval = hid_parse(hdev); if (retval) { hid_err(hdev, "parse failed\n"); diff --git a/drivers/hid/hid-roccat-isku.c b/drivers/hid/hid-roccat-isku.c index ce5f22519956..e95d59cd8d07 100644 --- a/drivers/hid/hid-roccat-isku.c +++ b/drivers/hid/hid-roccat-isku.c @@ -324,6 +324,9 @@ static int isku_probe(struct hid_device *hdev, { int retval; + if (!hid_is_usb(hdev)) + return -EINVAL; + retval = hid_parse(hdev); if (retval) { hid_err(hdev, "parse failed\n"); diff --git a/drivers/hid/hid-roccat-kone.c b/drivers/hid/hid-roccat-kone.c index ea17abc7ad52..76da04801ca9 100644 --- a/drivers/hid/hid-roccat-kone.c +++ b/drivers/hid/hid-roccat-kone.c @@ -749,6 +749,9 @@ static int kone_probe(struct hid_device *hdev, const struct hid_device_id *id) { int retval; + if (!hid_is_usb(hdev)) + return -EINVAL; + retval = hid_parse(hdev); if (retval) { hid_err(hdev, "parse failed\n"); diff --git a/drivers/hid/hid-roccat-koneplus.c b/drivers/hid/hid-roccat-koneplus.c index 0316edf8c5bb..1896c69ea512 100644 --- a/drivers/hid/hid-roccat-koneplus.c +++ b/drivers/hid/hid-roccat-koneplus.c @@ -431,6 +431,9 @@ static int koneplus_probe(struct hid_device *hdev, { int retval; + if (!hid_is_usb(hdev)) + return -EINVAL; + retval = hid_parse(hdev); if (retval) { hid_err(hdev, "parse failed\n"); diff --git a/drivers/hid/hid-roccat-konepure.c b/drivers/hid/hid-roccat-konepure.c index 5248b3c7cf78..cf8eeb33a125 100644 --- a/drivers/hid/hid-roccat-konepure.c +++ b/drivers/hid/hid-roccat-konepure.c @@ -133,6 +133,9 @@ static int konepure_probe(struct hid_device *hdev, { int retval; + if (!hid_is_usb(hdev)) + return -EINVAL; + retval = hid_parse(hdev); if (retval) { hid_err(hdev, "parse failed\n"); diff --git a/drivers/hid/hid-roccat-kovaplus.c b/drivers/hid/hid-roccat-kovaplus.c index 960012881570..6fb9b9563769 100644 --- a/drivers/hid/hid-roccat-kovaplus.c +++ b/drivers/hid/hid-roccat-kovaplus.c @@ -501,6 +501,9 @@ static int kovaplus_probe(struct hid_device *hdev, { int retval; + if (!hid_is_usb(hdev)) + return -EINVAL; + retval = hid_parse(hdev); if (retval) { hid_err(hdev, "parse failed\n"); diff --git a/drivers/hid/hid-roccat-lua.c b/drivers/hid/hid-roccat-lua.c index 4a88a76d5c62..d5ddf0d68346 100644 --- a/drivers/hid/hid-roccat-lua.c +++ b/drivers/hid/hid-roccat-lua.c @@ -160,6 +160,9 @@ static int lua_probe(struct hid_device *hdev, { int retval; + if (!hid_is_usb(hdev)) + return -EINVAL; + retval = hid_parse(hdev); if (retval) { hid_err(hdev, "parse failed\n"); diff --git a/drivers/hid/hid-roccat-pyra.c b/drivers/hid/hid-roccat-pyra.c index 989927defe8d..4fcc8e7d276f 100644 --- a/drivers/hid/hid-roccat-pyra.c +++ b/drivers/hid/hid-roccat-pyra.c @@ -449,6 +449,9 @@ static int pyra_probe(struct hid_device *hdev, const struct hid_device_id *id) { int retval; + if (!hid_is_usb(hdev)) + return -EINVAL; + retval = hid_parse(hdev); if (retval) { hid_err(hdev, "parse failed\n"); diff --git a/drivers/hid/hid-roccat-ryos.c b/drivers/hid/hid-roccat-ryos.c index 3956a6c9c521..5bf1971a2b14 100644 --- a/drivers/hid/hid-roccat-ryos.c +++ b/drivers/hid/hid-roccat-ryos.c @@ -141,6 +141,9 @@ static int ryos_probe(struct hid_device *hdev, { int retval; + if (!hid_is_usb(hdev)) + return -EINVAL; + retval = hid_parse(hdev); if (retval) { hid_err(hdev, "parse failed\n"); diff --git a/drivers/hid/hid-roccat-savu.c b/drivers/hid/hid-roccat-savu.c index 818701f7a028..a784bb4ee651 100644 --- a/drivers/hid/hid-roccat-savu.c +++ b/drivers/hid/hid-roccat-savu.c @@ -113,6 +113,9 @@ static int savu_probe(struct hid_device *hdev, { int retval; + if (!hid_is_usb(hdev)) + return -EINVAL; + retval = hid_parse(hdev); if (retval) { hid_err(hdev, "parse failed\n"); diff --git a/drivers/hid/hid-samsung.c b/drivers/hid/hid-samsung.c index 2e1c31156eca..cf5992e97094 100644 --- a/drivers/hid/hid-samsung.c +++ b/drivers/hid/hid-samsung.c @@ -152,6 +152,9 @@ static int samsung_probe(struct hid_device *hdev, int ret; unsigned int cmask = HID_CONNECT_DEFAULT; + if (!hid_is_usb(hdev)) + return -EINVAL; + ret = hid_parse(hdev); if (ret) { hid_err(hdev, "parse failed\n"); diff --git a/drivers/hid/hid-sony.c b/drivers/hid/hid-sony.c index d1b107d547f5..c186af552129 100644 --- a/drivers/hid/hid-sony.c +++ b/drivers/hid/hid-sony.c @@ -3000,7 +3000,6 @@ static int sony_probe(struct hid_device *hdev, const struct hid_device_id *id) sc->quirks = quirks; hid_set_drvdata(hdev, sc); sc->hdev = hdev; - usbdev = to_usb_device(sc->hdev->dev.parent->parent); ret = hid_parse(hdev); if (ret) { @@ -3043,6 +3042,11 @@ static int sony_probe(struct hid_device *hdev, const struct hid_device_id *id) } if (sc->quirks & (GHL_GUITAR_PS3WIIU | GHL_GUITAR_PS4)) { + if (!hid_is_usb(hdev)) + return -EINVAL; + + usbdev = to_usb_device(sc->hdev->dev.parent->parent); + sc->ghl_urb = usb_alloc_urb(0, GFP_ATOMIC); if (!sc->ghl_urb) return -ENOMEM; diff --git a/drivers/hid/hid-thrustmaster.c b/drivers/hid/hid-thrustmaster.c index 3a5333424aa3..03b935ff02d5 100644 --- a/drivers/hid/hid-thrustmaster.c +++ b/drivers/hid/hid-thrustmaster.c @@ -274,6 +274,9 @@ static int thrustmaster_probe(struct hid_device *hdev, const struct hid_device_i int ret = 0; struct tm_wheel *tm_wheel = NULL; + if (!hid_is_usb(hdev)) + return -EINVAL; + ret = hid_parse(hdev); if (ret) { hid_err(hdev, "parse failed with error %d\n", ret); diff --git a/drivers/hid/hid-uclogic-core.c b/drivers/hid/hid-uclogic-core.c index 6a9865dd703c..d8ab0139e5cd 100644 --- a/drivers/hid/hid-uclogic-core.c +++ b/drivers/hid/hid-uclogic-core.c @@ -164,6 +164,9 @@ static int uclogic_probe(struct hid_device *hdev, struct uclogic_drvdata *drvdata = NULL; bool params_initialized = false; + if (!hid_is_usb(hdev)) + return -EINVAL; + /* * libinput requires the pad interface to be on a different node * than the pen, so use QUIRK_MULTI_INPUT for all tablets. From f237d9028f844a86955fc9da59d7ac4a5c55d7d5 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 2 Dec 2021 12:48:19 +0100 Subject: [PATCH 137/868] HID: add USB_HID dependancy on some USB HID drivers Some HID drivers are only for USB drivers, yet did not depend on CONFIG_USB_HID. This was hidden by the fact that the USB functions were stubbed out in the past, but now that drivers are checking for USB devices properly, build errors can occur with some random configurations. Reported-by: kernel test robot Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman Signed-off-by: Benjamin Tissoires Link: https://lore.kernel.org/r/20211202114819.2511954-1-gregkh@linuxfoundation.org --- drivers/hid/Kconfig | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/hid/Kconfig b/drivers/hid/Kconfig index 9f5435b55949..828c2995ec34 100644 --- a/drivers/hid/Kconfig +++ b/drivers/hid/Kconfig @@ -214,7 +214,7 @@ config HID_CHICONY config HID_CORSAIR tristate "Corsair devices" - depends on HID && USB && LEDS_CLASS + depends on USB_HID && LEDS_CLASS help Support for Corsair devices that are not fully compliant with the HID standard. @@ -560,7 +560,7 @@ config HID_LENOVO config HID_LOGITECH tristate "Logitech devices" - depends on HID + depends on USB_HID depends on LEDS_CLASS default !EXPERT help @@ -951,7 +951,7 @@ config HID_SAITEK config HID_SAMSUNG tristate "Samsung InfraRed remote control or keyboards" - depends on HID + depends on USB_HID help Support for Samsung InfraRed remote control or keyboards. From 7998193bccc1c6e1537c5f3880fd0d5b949ec9d1 Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Thu, 2 Dec 2021 10:53:31 +0100 Subject: [PATCH 138/868] HID: sony: fix error path in probe When the setup of the GHL fails, we are not calling hid_hw_stop(). This leads to the hidraw node not being released, meaning a crash whenever somebody attempts to open the file. Cc: stable@vger.kernel.org Signed-off-by: Benjamin Tissoires Reviewed-by: Greg Kroah-Hartman Link: https://lore.kernel.org/r/20211202095334.14399-2-benjamin.tissoires@redhat.com --- drivers/hid/hid-sony.c | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/drivers/hid/hid-sony.c b/drivers/hid/hid-sony.c index c186af552129..60ec2b29d54d 100644 --- a/drivers/hid/hid-sony.c +++ b/drivers/hid/hid-sony.c @@ -3037,19 +3037,23 @@ static int sony_probe(struct hid_device *hdev, const struct hid_device_id *id) */ if (!(hdev->claimed & HID_CLAIMED_INPUT)) { hid_err(hdev, "failed to claim input\n"); - hid_hw_stop(hdev); - return -ENODEV; + ret = -ENODEV; + goto err; } if (sc->quirks & (GHL_GUITAR_PS3WIIU | GHL_GUITAR_PS4)) { - if (!hid_is_usb(hdev)) - return -EINVAL; + if (!hid_is_usb(hdev)) { + ret = -EINVAL; + goto err; + } usbdev = to_usb_device(sc->hdev->dev.parent->parent); sc->ghl_urb = usb_alloc_urb(0, GFP_ATOMIC); - if (!sc->ghl_urb) - return -ENOMEM; + if (!sc->ghl_urb) { + ret = -ENOMEM; + goto err; + } if (sc->quirks & GHL_GUITAR_PS3WIIU) ret = ghl_init_urb(sc, usbdev, ghl_ps3wiiu_magic_data, @@ -3059,7 +3063,7 @@ static int sony_probe(struct hid_device *hdev, const struct hid_device_id *id) ARRAY_SIZE(ghl_ps4_magic_data)); if (ret) { hid_err(hdev, "error preparing URB\n"); - return ret; + goto err; } timer_setup(&sc->ghl_poke_timer, ghl_magic_poke, 0); @@ -3068,6 +3072,10 @@ static int sony_probe(struct hid_device *hdev, const struct hid_device_id *id) } return ret; + +err: + hid_hw_stop(hdev); + return ret; } static void sony_remove(struct hid_device *hdev) From 918aa1ef104d286d16b9e7ef139a463ac7a296f0 Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Thu, 2 Dec 2021 10:53:32 +0100 Subject: [PATCH 139/868] HID: bigbenff: prevent null pointer dereference When emulating the device through uhid, there is a chance we don't have output reports and so report_field is null. Cc: stable@vger.kernel.org Signed-off-by: Benjamin Tissoires Reviewed-by: Greg Kroah-Hartman Link: https://lore.kernel.org/r/20211202095334.14399-3-benjamin.tissoires@redhat.com --- drivers/hid/hid-bigbenff.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hid/hid-bigbenff.c b/drivers/hid/hid-bigbenff.c index db6da21ade06..74ad8bf98bfd 100644 --- a/drivers/hid/hid-bigbenff.c +++ b/drivers/hid/hid-bigbenff.c @@ -191,7 +191,7 @@ static void bigben_worker(struct work_struct *work) struct bigben_device, worker); struct hid_field *report_field = bigben->report->field[0]; - if (bigben->removed) + if (bigben->removed || !report_field) return; if (bigben->work_led) { From b6409dd6bdc03aa178bbff0d80db2a30d29b63ac Mon Sep 17 00:00:00 2001 From: Alan Young Date: Thu, 2 Dec 2021 15:06:07 +0000 Subject: [PATCH 140/868] ALSA: ctl: Fix copy of updated id with element read/write When control_compat.c:copy_ctl_value_to_user() is used, by ctl_elem_read_user() & ctl_elem_write_user(), it must also copy back the snd_ctl_elem_id value that may have been updated (filled in) by the call to snd_ctl_elem_read/snd_ctl_elem_write(). This matches the functionality provided by snd_ctl_elem_read_user() and snd_ctl_elem_write_user(), via snd_ctl_build_ioff(). Without this, and without making additional calls to snd_ctl_info() which are unnecessary when using the non-compat calls, a userspace application will not know the numid value for the element and consequently will not be able to use the poll/read interface on the control file to determine which elements have updates. Signed-off-by: Alan Young Cc: Link: https://lore.kernel.org/r/20211202150607.543389-1-consult.awy@gmail.com Signed-off-by: Takashi Iwai --- sound/core/control_compat.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sound/core/control_compat.c b/sound/core/control_compat.c index 470dabc60aa0..edff063e088d 100644 --- a/sound/core/control_compat.c +++ b/sound/core/control_compat.c @@ -264,6 +264,7 @@ static int copy_ctl_value_to_user(void __user *userdata, struct snd_ctl_elem_value *data, int type, int count) { + struct snd_ctl_elem_value32 __user *data32 = userdata; int i, size; if (type == SNDRV_CTL_ELEM_TYPE_BOOLEAN || @@ -280,6 +281,8 @@ static int copy_ctl_value_to_user(void __user *userdata, if (copy_to_user(valuep, data->value.bytes.data, size)) return -EFAULT; } + if (copy_to_user(&data32->id, &data->id, sizeof(data32->id))) + return -EFAULT; return 0; } From d9847eb8be3d895b2b5f514fdf3885d47a0b92a2 Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Mon, 22 Nov 2021 20:17:40 +0530 Subject: [PATCH 141/868] bpf: Make CONFIG_DEBUG_INFO_BTF depend upon CONFIG_BPF_SYSCALL Vinicius Costa Gomes reported [0] that build fails when CONFIG_DEBUG_INFO_BTF is enabled and CONFIG_BPF_SYSCALL is disabled. This leads to btf.c not being compiled, and then no symbol being present in vmlinux for the declarations in btf.h. Since BTF is not useful without enabling BPF subsystem, disallow this combination. However, theoretically disabling both now could still fail, as the symbol for kfunc_btf_id_list variables is not available. This isn't a problem as the compiler usually optimizes the whole register/unregister call, but at lower optimization levels it can fail the build in linking stage. Fix that by adding dummy variables so that modules taking address of them still work, but the whole thing is a noop. [0]: https://lore.kernel.org/bpf/20211110205418.332403-1-vinicius.gomes@intel.com Fixes: 14f267d95fe4 ("bpf: btf: Introduce helpers for dynamic BTF set registration") Reported-by: Vinicius Costa Gomes Signed-off-by: Kumar Kartikeya Dwivedi Signed-off-by: Andrii Nakryiko Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20211122144742.477787-2-memxor@gmail.com --- include/linux/btf.h | 14 ++++++++++---- kernel/bpf/btf.c | 9 ++------- lib/Kconfig.debug | 1 + 3 files changed, 13 insertions(+), 11 deletions(-) diff --git a/include/linux/btf.h b/include/linux/btf.h index 203eef993d76..0e1b6281fd8f 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -245,7 +245,10 @@ struct kfunc_btf_id_set { struct module *owner; }; -struct kfunc_btf_id_list; +struct kfunc_btf_id_list { + struct list_head list; + struct mutex mutex; +}; #ifdef CONFIG_DEBUG_INFO_BTF_MODULES void register_kfunc_btf_id_set(struct kfunc_btf_id_list *l, @@ -254,6 +257,9 @@ void unregister_kfunc_btf_id_set(struct kfunc_btf_id_list *l, struct kfunc_btf_id_set *s); bool bpf_check_mod_kfunc_call(struct kfunc_btf_id_list *klist, u32 kfunc_id, struct module *owner); + +extern struct kfunc_btf_id_list bpf_tcp_ca_kfunc_list; +extern struct kfunc_btf_id_list prog_test_kfunc_list; #else static inline void register_kfunc_btf_id_set(struct kfunc_btf_id_list *l, struct kfunc_btf_id_set *s) @@ -268,13 +274,13 @@ static inline bool bpf_check_mod_kfunc_call(struct kfunc_btf_id_list *klist, { return false; } + +static struct kfunc_btf_id_list bpf_tcp_ca_kfunc_list __maybe_unused; +static struct kfunc_btf_id_list prog_test_kfunc_list __maybe_unused; #endif #define DEFINE_KFUNC_BTF_ID_SET(set, name) \ struct kfunc_btf_id_set name = { LIST_HEAD_INIT(name.list), (set), \ THIS_MODULE } -extern struct kfunc_btf_id_list bpf_tcp_ca_kfunc_list; -extern struct kfunc_btf_id_list prog_test_kfunc_list; - #endif diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index dbc3ad07e21b..ea3df9867cec 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -6346,11 +6346,6 @@ BTF_ID_LIST_GLOBAL_SINGLE(btf_task_struct_ids, struct, task_struct) /* BTF ID set registration API for modules */ -struct kfunc_btf_id_list { - struct list_head list; - struct mutex mutex; -}; - #ifdef CONFIG_DEBUG_INFO_BTF_MODULES void register_kfunc_btf_id_set(struct kfunc_btf_id_list *l, @@ -6389,8 +6384,6 @@ bool bpf_check_mod_kfunc_call(struct kfunc_btf_id_list *klist, u32 kfunc_id, return false; } -#endif - #define DEFINE_KFUNC_BTF_ID_LIST(name) \ struct kfunc_btf_id_list name = { LIST_HEAD_INIT(name.list), \ __MUTEX_INITIALIZER(name.mutex) }; \ @@ -6398,3 +6391,5 @@ bool bpf_check_mod_kfunc_call(struct kfunc_btf_id_list *klist, u32 kfunc_id, DEFINE_KFUNC_BTF_ID_LIST(bpf_tcp_ca_kfunc_list); DEFINE_KFUNC_BTF_ID_LIST(prog_test_kfunc_list); + +#endif diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 9ef7ce18b4f5..596bb5e4790c 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -316,6 +316,7 @@ config DEBUG_INFO_BTF bool "Generate BTF typeinfo" depends on !DEBUG_INFO_SPLIT && !DEBUG_INFO_REDUCED depends on !GCC_PLUGIN_RANDSTRUCT || COMPILE_TEST + depends on BPF_SYSCALL help Generate deduplicated BTF type information from DWARF debug info. Turning this on expects presence of pahole tool, which will convert From b12f031043247b80999bf5e03b8cded3b0b40f8d Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Mon, 22 Nov 2021 20:17:41 +0530 Subject: [PATCH 142/868] bpf: Fix bpf_check_mod_kfunc_call for built-in modules When module registering its set is built-in, THIS_MODULE will be NULL, hence we cannot return early in case owner is NULL. Fixes: 14f267d95fe4 ("bpf: btf: Introduce helpers for dynamic BTF set registration") Signed-off-by: Kumar Kartikeya Dwivedi Signed-off-by: Andrii Nakryiko Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20211122144742.477787-3-memxor@gmail.com --- kernel/bpf/btf.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index ea3df9867cec..9bdb03767db5 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -6371,8 +6371,6 @@ bool bpf_check_mod_kfunc_call(struct kfunc_btf_id_list *klist, u32 kfunc_id, { struct kfunc_btf_id_set *s; - if (!owner) - return false; mutex_lock(&klist->mutex); list_for_each_entry(s, &klist->list, list) { if (s->owner == owner && btf_id_set_contains(s->set, kfunc_id)) { From 3345193f6f3cc24791c245d4ba2c38502f1cf684 Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Mon, 22 Nov 2021 20:17:42 +0530 Subject: [PATCH 143/868] tools/resolve_btfids: Skip unresolved symbol warning for empty BTF sets resolve_btfids prints a warning when it finds an unresolved symbol, (id == 0) in id_patch. This can be the case for BTF sets that are empty (due to disabled config options), hence printing warnings for certain builds, most recently seen in [0]. The reason behind this is because id->cnt aliases id->id in btf_id struct, leading to empty set showing up as ID 0 when we get to id_patch, which triggers the warning. Since sets are an exception here, accomodate by reusing hole in btf_id for bool is_set member, setting it to true for BTF set when setting id->cnt, and use that to skip extraneous warning. [0]: https://lore.kernel.org/all/1b99ae14-abb4-d18f-cc6a-d7e523b25542@gmail.com Before: ; ./tools/bpf/resolve_btfids/resolve_btfids -v -b vmlinux net/ipv4/tcp_cubic.ko adding symbol tcp_cubic_kfunc_ids WARN: resolve_btfids: unresolved symbol tcp_cubic_kfunc_ids patching addr 0: ID 0 [tcp_cubic_kfunc_ids] sorting addr 4: cnt 0 [tcp_cubic_kfunc_ids] update ok for net/ipv4/tcp_cubic.ko After: ; ./tools/bpf/resolve_btfids/resolve_btfids -v -b vmlinux net/ipv4/tcp_cubic.ko adding symbol tcp_cubic_kfunc_ids patching addr 0: ID 0 [tcp_cubic_kfunc_ids] sorting addr 4: cnt 0 [tcp_cubic_kfunc_ids] update ok for net/ipv4/tcp_cubic.ko Fixes: 0e32dfc80bae ("bpf: Enable TCP congestion control kfunc from modules") Reported-by: Pavel Skripkin Signed-off-by: Kumar Kartikeya Dwivedi Signed-off-by: Andrii Nakryiko Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20211122144742.477787-4-memxor@gmail.com --- tools/bpf/resolve_btfids/main.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/tools/bpf/resolve_btfids/main.c b/tools/bpf/resolve_btfids/main.c index a59cb0ee609c..73409e27be01 100644 --- a/tools/bpf/resolve_btfids/main.c +++ b/tools/bpf/resolve_btfids/main.c @@ -83,6 +83,7 @@ struct btf_id { int cnt; }; int addr_cnt; + bool is_set; Elf64_Addr addr[ADDR_CNT]; }; @@ -451,8 +452,10 @@ static int symbols_collect(struct object *obj) * in symbol's size, together with 'cnt' field hence * that - 1. */ - if (id) + if (id) { id->cnt = sym.st_size / sizeof(int) - 1; + id->is_set = true; + } } else { pr_err("FAILED unsupported prefix %s\n", prefix); return -1; @@ -568,9 +571,8 @@ static int id_patch(struct object *obj, struct btf_id *id) int *ptr = data->d_buf; int i; - if (!id->id) { + if (!id->id && !id->is_set) pr_err("WARN: resolve_btfids: unresolved symbol %s\n", id->name); - } for (i = 0; i < id->addr_cnt; i++) { unsigned long addr = id->addr[i]; From 9a61f813fcc8d56d85fcf9ca6119cf2b5ac91dd5 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Tue, 16 Nov 2021 02:34:07 +0300 Subject: [PATCH 144/868] clk: qcom: regmap-mux: fix parent clock lookup The function mux_get_parent() uses qcom_find_src_index() to find the parent clock index, which is incorrect: qcom_find_src_index() uses src enum for the lookup, while mux_get_parent() should use cfg field (which corresponds to the register value). Add qcom_find_cfg_index() function doing this kind of lookup and use it for mux parent lookup. Fixes: df964016490b ("clk: qcom: add parent map for regmap mux") Cc: stable@vger.kernel.org Signed-off-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20211115233407.1046179-1-dmitry.baryshkov@linaro.org Signed-off-by: Stephen Boyd --- drivers/clk/qcom/clk-regmap-mux.c | 2 +- drivers/clk/qcom/common.c | 12 ++++++++++++ drivers/clk/qcom/common.h | 2 ++ 3 files changed, 15 insertions(+), 1 deletion(-) diff --git a/drivers/clk/qcom/clk-regmap-mux.c b/drivers/clk/qcom/clk-regmap-mux.c index b2d00b451963..45d9cca28064 100644 --- a/drivers/clk/qcom/clk-regmap-mux.c +++ b/drivers/clk/qcom/clk-regmap-mux.c @@ -28,7 +28,7 @@ static u8 mux_get_parent(struct clk_hw *hw) val &= mask; if (mux->parent_map) - return qcom_find_src_index(hw, mux->parent_map, val); + return qcom_find_cfg_index(hw, mux->parent_map, val); return val; } diff --git a/drivers/clk/qcom/common.c b/drivers/clk/qcom/common.c index 0932e019dd12..75f09e6e057e 100644 --- a/drivers/clk/qcom/common.c +++ b/drivers/clk/qcom/common.c @@ -69,6 +69,18 @@ int qcom_find_src_index(struct clk_hw *hw, const struct parent_map *map, u8 src) } EXPORT_SYMBOL_GPL(qcom_find_src_index); +int qcom_find_cfg_index(struct clk_hw *hw, const struct parent_map *map, u8 cfg) +{ + int i, num_parents = clk_hw_get_num_parents(hw); + + for (i = 0; i < num_parents; i++) + if (cfg == map[i].cfg) + return i; + + return -ENOENT; +} +EXPORT_SYMBOL_GPL(qcom_find_cfg_index); + struct regmap * qcom_cc_map(struct platform_device *pdev, const struct qcom_cc_desc *desc) { diff --git a/drivers/clk/qcom/common.h b/drivers/clk/qcom/common.h index bb39a7e106d8..9c8f7b798d9f 100644 --- a/drivers/clk/qcom/common.h +++ b/drivers/clk/qcom/common.h @@ -49,6 +49,8 @@ extern void qcom_pll_set_fsm_mode(struct regmap *m, u32 reg, u8 bias_count, u8 lock_count); extern int qcom_find_src_index(struct clk_hw *hw, const struct parent_map *map, u8 src); +extern int qcom_find_cfg_index(struct clk_hw *hw, const struct parent_map *map, + u8 cfg); extern int qcom_cc_register_board_clk(struct device *dev, const char *path, const char *name, unsigned long rate); From a1f0019c342bd83240b05be68c9888549dde7935 Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Tue, 23 Nov 2021 08:25:08 -0800 Subject: [PATCH 145/868] clk: qcom: clk-alpha-pll: Don't reconfigure running Trion In the event that the bootloader has configured the Trion PLL as source for the display clocks, e.g. for the continuous splashscreen, then there will also be RCGs that are clocked by this instance. Reconfiguring, and in particular disabling the output of, the PLL will cause issues for these downstream RCGs and has been shown to prevent them from being re-parented. Follow downstream and skip configuration if it's determined that the PLL is already running. Fixes: 59128c20a6a9 ("clk: qcom: clk-alpha-pll: Add support for controlling Lucid PLLs") Signed-off-by: Bjorn Andersson Reviewed-by: Robert Foss Reviewed-by: Vinod Koul Link: https://lore.kernel.org/r/20211123162508.153711-1-bjorn.andersson@linaro.org Signed-off-by: Stephen Boyd --- drivers/clk/qcom/clk-alpha-pll.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/clk/qcom/clk-alpha-pll.c b/drivers/clk/qcom/clk-alpha-pll.c index eaedcceb766f..8f65b9bdafce 100644 --- a/drivers/clk/qcom/clk-alpha-pll.c +++ b/drivers/clk/qcom/clk-alpha-pll.c @@ -1429,6 +1429,15 @@ EXPORT_SYMBOL_GPL(clk_alpha_pll_postdiv_fabia_ops); void clk_trion_pll_configure(struct clk_alpha_pll *pll, struct regmap *regmap, const struct alpha_pll_config *config) { + /* + * If the bootloader left the PLL enabled it's likely that there are + * RCGs that will lock up if we disable the PLL below. + */ + if (trion_pll_is_enabled(pll, regmap)) { + pr_debug("Trion PLL is already enabled, skipping configuration\n"); + return; + } + clk_alpha_pll_write_config(regmap, PLL_L_VAL(pll), config->l); regmap_write(regmap, PLL_CAL_L_VAL(pll), TRION_PLL_CAL_VAL); clk_alpha_pll_write_config(regmap, PLL_ALPHA_VAL(pll), config->alpha); From eee377b8f44e7ac4f76bbf2440e5cbbc1d25c25f Mon Sep 17 00:00:00 2001 From: Miles Chen Date: Sun, 5 Sep 2021 07:54:18 +0800 Subject: [PATCH 146/868] clk: imx: use module_platform_driver Replace builtin_platform_driver_probe with module_platform_driver_probe because CONFIG_CLK_IMX8QXP can be set to =m (kernel module). Fixes: e0d0d4d86c766 ("clk: imx8qxp: Support building i.MX8QXP clock driver as module") Cc: Fabio Estevam Cc: Stephen Boyd Signed-off-by: Miles Chen Link: https://lore.kernel.org/r/20210904235418.2442-1-miles.chen@mediatek.com Reviewed-by: Fabio Estevam Reviewed-by: Stephen Boyd Signed-off-by: Stephen Boyd --- drivers/clk/imx/clk-imx8qxp-lpcg.c | 2 +- drivers/clk/imx/clk-imx8qxp.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/clk/imx/clk-imx8qxp-lpcg.c b/drivers/clk/imx/clk-imx8qxp-lpcg.c index d3e905cf867d..b23758083ce5 100644 --- a/drivers/clk/imx/clk-imx8qxp-lpcg.c +++ b/drivers/clk/imx/clk-imx8qxp-lpcg.c @@ -370,7 +370,7 @@ static struct platform_driver imx8qxp_lpcg_clk_driver = { .probe = imx8qxp_lpcg_clk_probe, }; -builtin_platform_driver(imx8qxp_lpcg_clk_driver); +module_platform_driver(imx8qxp_lpcg_clk_driver); MODULE_AUTHOR("Aisheng Dong "); MODULE_DESCRIPTION("NXP i.MX8QXP LPCG clock driver"); diff --git a/drivers/clk/imx/clk-imx8qxp.c b/drivers/clk/imx/clk-imx8qxp.c index c53a688d8ccc..40a2efb1329b 100644 --- a/drivers/clk/imx/clk-imx8qxp.c +++ b/drivers/clk/imx/clk-imx8qxp.c @@ -308,7 +308,7 @@ static struct platform_driver imx8qxp_clk_driver = { }, .probe = imx8qxp_clk_probe, }; -builtin_platform_driver(imx8qxp_clk_driver); +module_platform_driver(imx8qxp_clk_driver); MODULE_AUTHOR("Aisheng Dong "); MODULE_DESCRIPTION("NXP i.MX8QXP clock driver"); From f6071e5e3961eeb5300bd0901c9e128598730ae3 Mon Sep 17 00:00:00 2001 From: Peilin Ye Date: Tue, 30 Nov 2021 16:47:20 -0800 Subject: [PATCH 147/868] selftests/fib_tests: Rework fib_rp_filter_test() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently rp_filter tests in fib_tests.sh:fib_rp_filter_test() are failing. ping sockets are bound to dummy1 using the "-I" option (SO_BINDTODEVICE), but socket lookup is failing when receiving ping replies, since the routing table thinks they belong to dummy0. For example, suppose ping is using a SOCK_RAW socket for ICMP messages. When receiving ping replies, in __raw_v4_lookup(), sk->sk_bound_dev_if is 3 (dummy1), but dif (skb_rtable(skb)->rt_iif) says 2 (dummy0), so the raw_sk_bound_dev_eq() check fails. Similar things happen in ping_lookup() for SOCK_DGRAM sockets. These tests used to pass due to a bug [1] in iputils, where "ping -I" actually did not bind ICMP message sockets to device. The bug has been fixed by iputils commit f455fee41c07 ("ping: also bind the ICMP socket to the specific device") in 2016, which is why our rp_filter tests started to fail. See [2] . Fixing the tests while keeping everything in one netns turns out to be nontrivial. Rework the tests and build the following topology: ┌─────────────────────────────┐ ┌─────────────────────────────┐ │ network namespace 1 (ns1) │ │ network namespace 2 (ns2) │ │ │ │ │ │ ┌────┐ ┌─────┐ │ │ ┌─────┐ ┌────┐ │ │ │ lo │<───>│veth1│<────────┼────┼─>│veth2│<──────────>│ lo │ │ │ └────┘ ├─────┴──────┐ │ │ ├─────┴──────┐ └────┘ │ │ │192.0.2.1/24│ │ │ │192.0.2.1/24│ │ │ └────────────┘ │ │ └────────────┘ │ └─────────────────────────────┘ └─────────────────────────────┘ Consider sending an ICMP_ECHO packet A in ns2. Both source and destination IP addresses are 192.0.2.1, and we use strict mode rp_filter in both ns1 and ns2: 1. A is routed to lo since its destination IP address is one of ns2's local addresses (veth2); 2. A is redirected from lo's egress to veth2's egress using mirred; 3. A arrives at veth1's ingress in ns1; 4. A is redirected from veth1's ingress to lo's ingress, again, using mirred; 5. In __fib_validate_source(), fib_info_nh_uses_dev() returns false, since A was received on lo, but reverse path lookup says veth1; 6. However A is not dropped since we have relaxed this check for lo in commit 66f8209547cc ("fib: relax source validation check for loopback packets"); Making sure A is not dropped here in this corner case is the whole point of having this test. 7. As A reaches the ICMP layer, an ICMP_ECHOREPLY packet, B, is generated; 8. Similarly, B is redirected from lo's egress to veth1's egress (in ns1), then redirected once again from veth2's ingress to lo's ingress (in ns2), using mirred. Also test "ping 127.0.0.1" from ns2. It does not trigger the relaxed check in __fib_validate_source(), but just to make sure the topology works with loopback addresses. Tested with ping from iputils 20210722-41-gf9fb573: $ ./fib_tests.sh -t rp_filter IPv4 rp_filter tests TEST: rp_filter passes local packets [ OK ] TEST: rp_filter passes loopback packets [ OK ] [1] https://github.com/iputils/iputils/issues/55 [2] https://github.com/iputils/iputils/commit/f455fee41c077d4b700a473b2f5b3487b8febc1d Reported-by: Hangbin Liu Fixes: adb701d6cfa4 ("selftests: add a test case for rp_filter") Reviewed-by: Cong Wang Signed-off-by: Peilin Ye Acked-by: David Ahern Link: https://lore.kernel.org/r/20211201004720.6357-1-yepeilin.cs@gmail.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/fib_tests.sh | 59 ++++++++++++++++++++---- 1 file changed, 49 insertions(+), 10 deletions(-) diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh index 5abe92d55b69..996af1ae3d3d 100755 --- a/tools/testing/selftests/net/fib_tests.sh +++ b/tools/testing/selftests/net/fib_tests.sh @@ -444,24 +444,63 @@ fib_rp_filter_test() setup set -e + ip netns add ns2 + ip netns set ns2 auto + + ip -netns ns2 link set dev lo up + + $IP link add name veth1 type veth peer name veth2 + $IP link set dev veth2 netns ns2 + $IP address add 192.0.2.1/24 dev veth1 + ip -netns ns2 address add 192.0.2.1/24 dev veth2 + $IP link set dev veth1 up + ip -netns ns2 link set dev veth2 up + $IP link set dev lo address 52:54:00:6a:c7:5e - $IP link set dummy0 address 52:54:00:6a:c7:5e - $IP link add dummy1 type dummy - $IP link set dummy1 address 52:54:00:6a:c7:5e - $IP link set dev dummy1 up + $IP link set dev veth1 address 52:54:00:6a:c7:5e + ip -netns ns2 link set dev lo address 52:54:00:6a:c7:5e + ip -netns ns2 link set dev veth2 address 52:54:00:6a:c7:5e + + # 1. (ns2) redirect lo's egress to veth2's egress + ip netns exec ns2 tc qdisc add dev lo parent root handle 1: fq_codel + ip netns exec ns2 tc filter add dev lo parent 1: protocol arp basic \ + action mirred egress redirect dev veth2 + ip netns exec ns2 tc filter add dev lo parent 1: protocol ip basic \ + action mirred egress redirect dev veth2 + + # 2. (ns1) redirect veth1's ingress to lo's ingress + $NS_EXEC tc qdisc add dev veth1 ingress + $NS_EXEC tc filter add dev veth1 ingress protocol arp basic \ + action mirred ingress redirect dev lo + $NS_EXEC tc filter add dev veth1 ingress protocol ip basic \ + action mirred ingress redirect dev lo + + # 3. (ns1) redirect lo's egress to veth1's egress + $NS_EXEC tc qdisc add dev lo parent root handle 1: fq_codel + $NS_EXEC tc filter add dev lo parent 1: protocol arp basic \ + action mirred egress redirect dev veth1 + $NS_EXEC tc filter add dev lo parent 1: protocol ip basic \ + action mirred egress redirect dev veth1 + + # 4. (ns2) redirect veth2's ingress to lo's ingress + ip netns exec ns2 tc qdisc add dev veth2 ingress + ip netns exec ns2 tc filter add dev veth2 ingress protocol arp basic \ + action mirred ingress redirect dev lo + ip netns exec ns2 tc filter add dev veth2 ingress protocol ip basic \ + action mirred ingress redirect dev lo + $NS_EXEC sysctl -qw net.ipv4.conf.all.rp_filter=1 $NS_EXEC sysctl -qw net.ipv4.conf.all.accept_local=1 $NS_EXEC sysctl -qw net.ipv4.conf.all.route_localnet=1 - - $NS_EXEC tc qd add dev dummy1 parent root handle 1: fq_codel - $NS_EXEC tc filter add dev dummy1 parent 1: protocol arp basic action mirred egress redirect dev lo - $NS_EXEC tc filter add dev dummy1 parent 1: protocol ip basic action mirred egress redirect dev lo + ip netns exec ns2 sysctl -qw net.ipv4.conf.all.rp_filter=1 + ip netns exec ns2 sysctl -qw net.ipv4.conf.all.accept_local=1 + ip netns exec ns2 sysctl -qw net.ipv4.conf.all.route_localnet=1 set +e - run_cmd "ip netns exec ns1 ping -I dummy1 -w1 -c1 198.51.100.1" + run_cmd "ip netns exec ns2 ping -w1 -c1 192.0.2.1" log_test $? 0 "rp_filter passes local packets" - run_cmd "ip netns exec ns1 ping -I dummy1 -w1 -c1 127.0.0.1" + run_cmd "ip netns exec ns2 ping -w1 -c1 127.0.0.1" log_test $? 0 "rp_filter passes loopback packets" cleanup From 653926205741add87a6cf452e21950eebc6ac10b Mon Sep 17 00:00:00 2001 From: Igor Pylypiv Date: Tue, 30 Nov 2021 20:16:27 -0800 Subject: [PATCH 148/868] scsi: pm80xx: Do not call scsi_remove_host() in pm8001_alloc() Calling scsi_remove_host() before scsi_add_host() results in a crash: BUG: kernel NULL pointer dereference, address: 0000000000000108 RIP: 0010:device_del+0x63/0x440 Call Trace: device_unregister+0x17/0x60 scsi_remove_host+0xee/0x2a0 pm8001_pci_probe+0x6ef/0x1b90 [pm80xx] local_pci_probe+0x3f/0x90 We cannot call scsi_remove_host() in pm8001_alloc() because scsi_add_host() has not been called yet at that point in time. Function call tree: pm8001_pci_probe() | `- pm8001_pci_alloc() | | | `- pm8001_alloc() | | | `- scsi_remove_host() | `- scsi_add_host() Link: https://lore.kernel.org/r/20211201041627.1592487-1-ipylypiv@google.com Fixes: 05c6c029a44d ("scsi: pm80xx: Increase number of supported queues") Reviewed-by: Vishakha Channapattan Acked-by: Jack Wang Signed-off-by: Igor Pylypiv Signed-off-by: Martin K. Petersen --- drivers/scsi/pm8001/pm8001_init.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/scsi/pm8001/pm8001_init.c b/drivers/scsi/pm8001/pm8001_init.c index bed8cc125544..fbfeb0b046dd 100644 --- a/drivers/scsi/pm8001/pm8001_init.c +++ b/drivers/scsi/pm8001/pm8001_init.c @@ -282,12 +282,12 @@ static int pm8001_alloc(struct pm8001_hba_info *pm8001_ha, if (rc) { pm8001_dbg(pm8001_ha, FAIL, "pm8001_setup_irq failed [ret: %d]\n", rc); - goto err_out_shost; + goto err_out; } /* Request Interrupt */ rc = pm8001_request_irq(pm8001_ha); if (rc) - goto err_out_shost; + goto err_out; count = pm8001_ha->max_q_num; /* Queues are chosen based on the number of cores/msix availability */ @@ -423,8 +423,6 @@ static int pm8001_alloc(struct pm8001_hba_info *pm8001_ha, pm8001_tag_init(pm8001_ha); return 0; -err_out_shost: - scsi_remove_host(pm8001_ha->shost); err_out_nodev: for (i = 0; i < pm8001_ha->max_memcnt; i++) { if (pm8001_ha->memoryMap.region[i].virt_ptr != NULL) { From d080811f27936f712f619f847389f403ac873b8f Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 3 Dec 2021 08:59:27 +0100 Subject: [PATCH 149/868] HID: add USB_HID dependancy to hid-chicony The chicony HID driver only controls USB devices, yet did not have a dependancy on USB_HID. This causes build errors on some configurations like sparc when building due to new changes to the chicony driver. Reported-by: Stephen Rothwell Cc: stable@vger.kernel.org Cc: Jiri Kosina Cc: Benjamin Tissoires Signed-off-by: Greg Kroah-Hartman Signed-off-by: Benjamin Tissoires Link: https://lore.kernel.org/r/20211203075927.2829218-1-gregkh@linuxfoundation.org --- drivers/hid/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hid/Kconfig b/drivers/hid/Kconfig index 828c2995ec34..e9bc8efed5a1 100644 --- a/drivers/hid/Kconfig +++ b/drivers/hid/Kconfig @@ -207,7 +207,7 @@ config HID_CHERRY config HID_CHICONY tristate "Chicony devices" - depends on HID + depends on USB_HID default !EXPERT help Support for Chicony Tactical pad and special keys on Chicony keyboards. From 30cb3c2ad24b66fb7639a6d1f4390c74d6e68f94 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 3 Dec 2021 09:12:31 +0100 Subject: [PATCH 150/868] HID: add USB_HID dependancy to hid-prodikeys The prodikeys HID driver only controls USB devices, yet did not have a dependancy on USB_HID. This causes build errors on some configurations like nios2 when building due to new changes to the prodikeys driver. Reported-by: kernel test robot Cc: stable@vger.kernel.org Cc: Jiri Kosina Cc: Benjamin Tissoires Signed-off-by: Greg Kroah-Hartman Signed-off-by: Benjamin Tissoires Link: https://lore.kernel.org/r/20211203081231.2856936-1-gregkh@linuxfoundation.org --- drivers/hid/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hid/Kconfig b/drivers/hid/Kconfig index e9bc8efed5a1..a7c78ac96270 100644 --- a/drivers/hid/Kconfig +++ b/drivers/hid/Kconfig @@ -245,7 +245,7 @@ config HID_MACALLY config HID_PRODIKEYS tristate "Prodikeys PC-MIDI Keyboard support" - depends on HID && SND + depends on USB_HID && SND select SND_RAWMIDI help Support for Prodikeys PC-MIDI Keyboard device support. From caff009098e6cf59fd6ac21c3a3befcc854978b4 Mon Sep 17 00:00:00 2001 From: xiazhengqiao Date: Fri, 3 Dec 2021 11:01:19 +0800 Subject: [PATCH 151/868] HID: google: add eel USB id Add one additional hammer-like device. Signed-off-by: xiazhengqiao Signed-off-by: Benjamin Tissoires Link: https://lore.kernel.org/r/20211203030119.28612-1-xiazhengqiao@huaqin.corp-partner.google.com --- drivers/hid/hid-google-hammer.c | 2 ++ drivers/hid/hid-ids.h | 1 + 2 files changed, 3 insertions(+) diff --git a/drivers/hid/hid-google-hammer.c b/drivers/hid/hid-google-hammer.c index 8123b871a3eb..0403beb3104b 100644 --- a/drivers/hid/hid-google-hammer.c +++ b/drivers/hid/hid-google-hammer.c @@ -585,6 +585,8 @@ static void hammer_remove(struct hid_device *hdev) static const struct hid_device_id hammer_devices[] = { { HID_DEVICE(BUS_USB, HID_GROUP_GENERIC, USB_VENDOR_ID_GOOGLE, USB_DEVICE_ID_GOOGLE_DON) }, + { HID_DEVICE(BUS_USB, HID_GROUP_GENERIC, + USB_VENDOR_ID_GOOGLE, USB_DEVICE_ID_GOOGLE_EEL) }, { HID_DEVICE(BUS_USB, HID_GROUP_GENERIC, USB_VENDOR_ID_GOOGLE, USB_DEVICE_ID_GOOGLE_HAMMER) }, { HID_DEVICE(BUS_USB, HID_GROUP_GENERIC, diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index aeb907b57ab3..ca418bffd3b2 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -501,6 +501,7 @@ #define USB_DEVICE_ID_GOOGLE_MAGNEMITE 0x503d #define USB_DEVICE_ID_GOOGLE_MOONBALL 0x5044 #define USB_DEVICE_ID_GOOGLE_DON 0x5050 +#define USB_DEVICE_ID_GOOGLE_EEL 0x5057 #define USB_VENDOR_ID_GOTOP 0x08f2 #define USB_DEVICE_ID_SUPER_Q2 0x007f From 086e81f6b90e41a07a1a885bb11e93daa6915747 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Tue, 30 Nov 2021 07:01:17 +0100 Subject: [PATCH 152/868] HID: intel-ish-hid: ipc: only enable IRQ wakeup when requested MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes spurious wakeups from s0ix on Lenovo ThinkPad X1 Cargon Gen 9 on lid close. These wakeups are generated by interrupts from the ISH on changes to the lid status. By disabling the wake IRQ from the ISH we inhibit these spurious wakeups while keeping the resume from LID open through the ACPI interrupt. Reports on the Lenovo forums indicate that Lenovo ThinkPad X1 Yoga Gen6 is also affected. Fixes: ae02e5d40d5f ("HID: intel-ish-hid: ipc layer") BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=214855 Signed-off-by: Thomas Weißschuh Acked-by: Srinivas Pandruvada Signed-off-by: Benjamin Tissoires Link: https://lore.kernel.org/r/20211130060117.3026-1-linux@weissschuh.net --- drivers/hid/intel-ish-hid/ipc/pci-ish.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/hid/intel-ish-hid/ipc/pci-ish.c b/drivers/hid/intel-ish-hid/ipc/pci-ish.c index 1c5039081db2..8e9d9450cb83 100644 --- a/drivers/hid/intel-ish-hid/ipc/pci-ish.c +++ b/drivers/hid/intel-ish-hid/ipc/pci-ish.c @@ -266,7 +266,8 @@ static void __maybe_unused ish_resume_handler(struct work_struct *work) if (ish_should_leave_d0i3(pdev) && !dev->suspend_flag && IPC_IS_ISH_ILUP(fwsts)) { - disable_irq_wake(pdev->irq); + if (device_may_wakeup(&pdev->dev)) + disable_irq_wake(pdev->irq); ish_set_host_ready(dev); @@ -337,7 +338,8 @@ static int __maybe_unused ish_suspend(struct device *device) */ pci_save_state(pdev); - enable_irq_wake(pdev->irq); + if (device_may_wakeup(&pdev->dev)) + enable_irq_wake(pdev->irq); } } else { /* From e2022cbec9c2606514c4edc4a760e3acb7419d8a Mon Sep 17 00:00:00 2001 From: Slark Xiao Date: Fri, 26 Nov 2021 16:19:51 +0530 Subject: [PATCH 153/868] bus: mhi: pci_generic: Fix device recovery failed issue For Foxconn T99W175 device(sdx55 platform) in some host platform, it would be unavailable once the host execute the err handler. After checking, it's caused by the delay time too short to get a successful reset. Please see my test evidence as bewlow(BTW, I add some extra test logs in function mhi_pci_reset_prepare and mhi_pci_reset_done): When MHI_POST_RESET_DELAY_MS equals to 500ms: Nov 4 14:30:03 jbd-ThinkEdge kernel: [ 146.222477] mhi mhi0: Device MHI is not in valid state Nov 4 14:30:03 jbd-ThinkEdge kernel: [ 146.222628] mhi-pci-generic 0000:2d:00.0: mhi_pci_reset_prepare reset Nov 4 14:30:03 jbd-ThinkEdge kernel: [ 146.222631] mhi-pci-generic 0000:2d:00.0: mhi_pci_reset_prepare mhi_soc_reset Nov 4 14:30:03 jbd-ThinkEdge kernel: [ 146.222632] mhi mhi0: mhi_soc_reset write soc to reset Nov 4 14:30:05 jbd-ThinkEdge kernel: [ 147.839993] mhi-pci-generic 0000:2d:00.0: mhi_pci_reset_done Nov 4 14:30:05 jbd-ThinkEdge kernel: [ 147.902063] mhi-pci-generic 0000:2d:00.0: reset failed When MHI_POST_RESET_DELAY_MS equals to 1000ms or 1500ms: Nov 4 19:07:26 jbd-ThinkEdge kernel: [ 157.067857] mhi mhi0: Device MHI is not in valid state Nov 4 19:07:26 jbd-ThinkEdge kernel: [ 157.068029] mhi-pci-generic 0000:2d:00.0: mhi_pci_reset_prepare reset Nov 4 19:07:26 jbd-ThinkEdge kernel: [ 157.068032] mhi-pci-generic 0000:2d:00.0: mhi_pci_reset_prepare mhi_soc_reset Nov 4 19:07:26 jbd-ThinkEdge kernel: [ 157.068034] mhi mhi0: mhi_soc_reset write soc to reset Nov 4 19:07:29 jbd-ThinkEdge kernel: [ 159.607006] mhi-pci-generic 0000:2d:00.0: mhi_pci_reset_done Nov 4 19:07:29 jbd-ThinkEdge kernel: [ 159.607152] mhi mhi0: Requested to power ON Nov 4 19:07:51 jbd-ThinkEdge kernel: [ 181.302872] mhi mhi0: Failed to reset MHI due to syserr state Nov 4 19:07:51 jbd-ThinkEdge kernel: [ 181.303011] mhi-pci-generic 0000:2d:00.0: failed to power up MHI controller When MHI_POST_RESET_DELAY_MS equals to 2000ms: Nov 4 17:51:08 jbd-ThinkEdge kernel: [ 147.180527] mhi mhi0: Failed to transition from PM state: Linkdown or Error Fatal Detect to: SYS ERROR Process Nov 4 17:51:08 jbd-ThinkEdge kernel: [ 147.180535] mhi mhi0: Device MHI is not in valid state Nov 4 17:51:08 jbd-ThinkEdge kernel: [ 147.180722] mhi-pci-generic 0000:2d:00.0: mhi_pci_reset_prepare reset Nov 4 17:51:08 jbd-ThinkEdge kernel: [ 147.180725] mhi-pci-generic 0000:2d:00.0: mhi_pci_reset_prepare mhi_soc_reset Nov 4 17:51:08 jbd-ThinkEdge kernel: [ 147.180727] mhi mhi0: mhi_soc_reset write soc to reset Nov 4 17:51:11 jbd-ThinkEdge kernel: [ 150.230787] mhi-pci-generic 0000:2d:00.0: mhi_pci_reset_done Nov 4 17:51:11 jbd-ThinkEdge kernel: [ 150.230928] mhi mhi0: Requested to power ON Nov 4 17:51:11 jbd-ThinkEdge kernel: [ 150.231173] mhi mhi0: Power on setup success Nov 4 17:51:14 jbd-ThinkEdge kernel: [ 153.254747] mhi mhi0: Wait for device to enter SBL or Mission mode I also tried big data like 3000, and it worked as well. 500ms may not be enough for all support mhi device. We shall increase it to 2000ms at least. Link: https://lore.kernel.org/r/20211108113127.3938-1-slark_xiao@163.com [mani: massaged commit message little bit, added Fixes tag and CCed stable] Fixes: 8ccc3279fcad ("mhi: pci_generic: Add support for reset") Cc: stable@vger.kernel.org Reviewed-by: Manivannan Sadhasivam Signed-off-by: Slark Xiao Signed-off-by: Manivannan Sadhasivam Link: https://lore.kernel.org/r/20211126104951.35685-2-manivannan.sadhasivam@linaro.org Signed-off-by: Greg Kroah-Hartman --- drivers/bus/mhi/pci_generic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/bus/mhi/pci_generic.c b/drivers/bus/mhi/pci_generic.c index 59a4896a8030..4c577a731709 100644 --- a/drivers/bus/mhi/pci_generic.c +++ b/drivers/bus/mhi/pci_generic.c @@ -20,7 +20,7 @@ #define MHI_PCI_DEFAULT_BAR_NUM 0 -#define MHI_POST_RESET_DELAY_MS 500 +#define MHI_POST_RESET_DELAY_MS 2000 #define HEALTH_CHECK_PERIOD (HZ * 2) From 96f3896780153214040a6747974bebc1355307c0 Mon Sep 17 00:00:00 2001 From: Li Zhijian Date: Fri, 3 Dec 2021 10:53:21 +0800 Subject: [PATCH 154/868] selftests/tc-testing: add exit code Mark the summary result as FAIL to prevent from confusing the selftest framework if some of them are failed. Previously, the selftest framework always treats it as *ok* even though some of them are failed actually. That's because the script tdc.sh always return 0. # All test results: # # 1..97 # ok 1 83be - Create FQ-PIE with invalid number of flows # ok 2 8b6e - Create RED with no flags [...snip] # ok 6 5f15 - Create RED with flags ECN, harddrop # ok 7 53e8 - Create RED with flags ECN, nodrop # ok 8 d091 - Fail to create RED with only nodrop flag # ok 9 af8e - Create RED with flags ECN, nodrop, harddrop # not ok 10 ce7d - Add mq Qdisc to multi-queue device (4 queues) # Could not match regex pattern. Verify command output: # qdisc mq 1: root # qdisc fq_codel 0: parent 1:4 limit 10240p flows 1024 quantum 1514 target 5ms interval 100ms memory_limit 32Mb ecn drop_batch 64 # qdisc fq_codel 0: parent 1:3 limit 10240p flows 1024 quantum 1514 target 5ms interval 100ms memory_limit 32Mb ecn drop_batch 64 [...snip] # ok 96 6979 - Change quantum of a strict ETS band # ok 97 9a7d - Change ETS strict band without quantum # # # # ok 1 selftests: tc-testing: tdc.sh <<< summary result CC: Philip Li Reported-by: kernel test robot Signed-off-by: Li Zhijian Acked-by: Davide Caratti Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- tools/testing/selftests/tc-testing/tdc.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/tc-testing/tdc.py b/tools/testing/selftests/tc-testing/tdc.py index a3e43189d940..ee22e3447ec7 100755 --- a/tools/testing/selftests/tc-testing/tdc.py +++ b/tools/testing/selftests/tc-testing/tdc.py @@ -716,6 +716,7 @@ def set_operation_mode(pm, parser, args, remaining): list_test_cases(alltests) exit(0) + exit_code = 0 # KSFT_PASS if len(alltests): req_plugins = pm.get_required_plugins(alltests) try: @@ -724,6 +725,8 @@ def set_operation_mode(pm, parser, args, remaining): print('The following plugins were not found:') print('{}'.format(pde.missing_pg)) catresults = test_runner(pm, args, alltests) + if catresults.count_failures() != 0: + exit_code = 1 # KSFT_FAIL if args.format == 'none': print('Test results output suppression requested\n') else: @@ -748,6 +751,8 @@ def set_operation_mode(pm, parser, args, remaining): gid=int(os.getenv('SUDO_GID'))) else: print('No tests found\n') + exit_code = 4 # KSFT_SKIP + exit(exit_code) def main(): """ @@ -767,8 +772,5 @@ def main(): set_operation_mode(pm, parser, args, remaining) - exit(0) - - if __name__ == "__main__": main() From a8c9505c53c5f1f0aba572a4c70e2d91ad08434e Mon Sep 17 00:00:00 2001 From: Li Zhijian Date: Fri, 3 Dec 2021 10:53:22 +0800 Subject: [PATCH 155/868] selftests/tc-testing: add missing config qdiscs/fq_pie requires CONFIG_NET_SCH_FQ_PIE, otherwise tc will fail to create a fq_pie qdisc. It fixes following issue: # not ok 57 83be - Create FQ-PIE with invalid number of flows # Command exited with 2, expected 0 # Error: Specified qdisc not found. Signed-off-by: Li Zhijian Signed-off-by: David S. Miller --- tools/testing/selftests/tc-testing/config | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/tc-testing/config b/tools/testing/selftests/tc-testing/config index b71828df5a6d..b1cd7efa4512 100644 --- a/tools/testing/selftests/tc-testing/config +++ b/tools/testing/selftests/tc-testing/config @@ -60,6 +60,7 @@ CONFIG_NET_IFE_SKBTCINDEX=m CONFIG_NET_SCH_FIFO=y CONFIG_NET_SCH_ETS=m CONFIG_NET_SCH_RED=m +CONFIG_NET_SCH_FQ_PIE=m # ## Network testing From db925bca33a9f0029a9891defd926a4856dd5c87 Mon Sep 17 00:00:00 2001 From: Li Zhijian Date: Fri, 3 Dec 2021 10:53:23 +0800 Subject: [PATCH 156/868] selftests/tc-testing: Fix cannot create /sys/bus/netdevsim/new_device: Directory nonexistent Install netdevsim to provide /sys/bus/netdevsim/new_device interface. It helps to fix: # ok 97 9a7d - Change ETS strict band without quantum # skipped - skipped - previous setup failed 11 ce7d # # # -----> prepare stage *** Could not execute: "echo "1 1 4" > /sys/bus/netdevsim/new_device" # # -----> prepare stage *** Error message: "/bin/sh: 1: cannot create /sys/bus/netdevsim/new_device: Directory nonexistent # " # # -----> prepare stage *** Aborting test run. # # # <_io.BufferedReader name=5> *** stdout *** # Signed-off-by: Li Zhijian Signed-off-by: David S. Miller --- tools/testing/selftests/tc-testing/config | 1 + tools/testing/selftests/tc-testing/tdc.sh | 1 + 2 files changed, 2 insertions(+) diff --git a/tools/testing/selftests/tc-testing/config b/tools/testing/selftests/tc-testing/config index b1cd7efa4512..a3239d5e40c7 100644 --- a/tools/testing/selftests/tc-testing/config +++ b/tools/testing/selftests/tc-testing/config @@ -61,6 +61,7 @@ CONFIG_NET_SCH_FIFO=y CONFIG_NET_SCH_ETS=m CONFIG_NET_SCH_RED=m CONFIG_NET_SCH_FQ_PIE=m +CONFIG_NETDEVSIM=m # ## Network testing diff --git a/tools/testing/selftests/tc-testing/tdc.sh b/tools/testing/selftests/tc-testing/tdc.sh index 7fe38c76db44..afb0cd86fa3d 100755 --- a/tools/testing/selftests/tc-testing/tdc.sh +++ b/tools/testing/selftests/tc-testing/tdc.sh @@ -1,5 +1,6 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 +modprobe netdevsim ./tdc.py -c actions --nobuildebpf ./tdc.py -c qdisc From e485382ea7eb4b81f4b59073cd831084820497de Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Thu, 2 Dec 2021 11:29:08 +0100 Subject: [PATCH 157/868] drm/ttm: fix ttm_bo_swapout MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 7120a447c7fe ("drm/ttm: Double check mem_type of BO while eviction") made ttm_bo_evict_swapout_allowable() function actually check the placement, but we always used a dummy placement in ttm_bo_swapout. Fix this by using the real placement instead. Signed-off-by: Christian König Fixes: 7120a447c7fe ("drm/ttm: Double check mem_type of BO while eviction") Reviewed-by: Pan, Xinhui Link: https://patchwork.freedesktop.org/patch/msgid/20211202103828.44573-1-christian.koenig@amd.com --- drivers/gpu/drm/ttm/ttm_bo.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 739f11c0109c..047adc42d9a0 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -1103,7 +1103,7 @@ int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx, * as an indication that we're about to swap out. */ memset(&place, 0, sizeof(place)); - place.mem_type = TTM_PL_SYSTEM; + place.mem_type = bo->resource->mem_type; if (!ttm_bo_evict_swapout_allowable(bo, ctx, &place, &locked, NULL)) return -EBUSY; @@ -1135,6 +1135,7 @@ int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx, struct ttm_place hop; memset(&hop, 0, sizeof(hop)); + place.mem_type = TTM_PL_SYSTEM; ret = ttm_resource_alloc(bo, &place, &evict_mem); if (unlikely(ret)) goto out; From f12972018b3c478a7e17669ee9e46ac525aadbea Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Wed, 24 Nov 2021 14:23:25 +0000 Subject: [PATCH 158/868] MAINTAINERS: add maintainer for Qualcomm FastRPC driver For some reason I forgot to add myself as maintainer when we upstreamed FastRPC patches. Add myself and Amol from Qualcomm as maintainers for Qualcomm FastRPC driver. Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20211124142325.27108-1-srinivas.kandagatla@linaro.org Signed-off-by: Greg Kroah-Hartman --- MAINTAINERS | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 360e9aa0205d..e9459f5e9b91 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -15771,6 +15771,15 @@ S: Maintained F: Documentation/devicetree/bindings/net/qcom,ethqos.txt F: drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c +QUALCOMM FASTRPC DRIVER +M: Srinivas Kandagatla +M: Amol Maheshwari +L: linux-arm-msm@vger.kernel.org +S: Maintained +F: Documentation/devicetree/bindings/misc/qcom,fastrpc.txt +F: drivers/misc/fastrpc.c +F: include/uapi/misc/fastrpc.h + QUALCOMM GENERIC INTERFACE I2C DRIVER M: Akash Asthana M: Mukesh Savaliya From 3a1bf591e9a410f220b7405a142a47407394a1d5 Mon Sep 17 00:00:00 2001 From: Jeya R Date: Wed, 24 Nov 2021 22:01:21 +0530 Subject: [PATCH 159/868] misc: fastrpc: fix improper packet size calculation The buffer list is sorted and this is not being considered while calculating packet size. This would lead to improper copy length calculation for non-dmaheap buffers which would eventually cause sending improper buffers to DSP. Fixes: c68cfb718c8f ("misc: fastrpc: Add support for context Invoke method") Reviewed-by: Srinivas Kandagatla Signed-off-by: Jeya R Link: https://lore.kernel.org/r/1637771481-4299-1-git-send-email-jeyr@codeaurora.org Signed-off-by: Greg Kroah-Hartman --- drivers/misc/fastrpc.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/misc/fastrpc.c b/drivers/misc/fastrpc.c index 39aca7753719..4ccbf43e6bfa 100644 --- a/drivers/misc/fastrpc.c +++ b/drivers/misc/fastrpc.c @@ -719,16 +719,18 @@ static int fastrpc_get_meta_size(struct fastrpc_invoke_ctx *ctx) static u64 fastrpc_get_payload_size(struct fastrpc_invoke_ctx *ctx, int metalen) { u64 size = 0; - int i; + int oix; size = ALIGN(metalen, FASTRPC_ALIGN); - for (i = 0; i < ctx->nscalars; i++) { + for (oix = 0; oix < ctx->nbufs; oix++) { + int i = ctx->olaps[oix].raix; + if (ctx->args[i].fd == 0 || ctx->args[i].fd == -1) { - if (ctx->olaps[i].offset == 0) + if (ctx->olaps[oix].offset == 0) size = ALIGN(size, FASTRPC_ALIGN); - size += (ctx->olaps[i].mend - ctx->olaps[i].mstart); + size += (ctx->olaps[oix].mend - ctx->olaps[oix].mstart); } } From 9a626577398c24ecab63c0a684436c8928092367 Mon Sep 17 00:00:00 2001 From: Ralph Siemsen Date: Mon, 8 Nov 2021 13:16:27 -0500 Subject: [PATCH 160/868] nvmem: eeprom: at25: fix FRAM byte_len Commit fd307a4ad332 ("nvmem: prepare basics for FRAM support") added support for FRAM devices such as the Cypress FM25V. During testing, it was found that the FRAM detects properly, however reads and writes fail. Upon further investigation, two problem were found in at25_probe() routine. 1) In the case of an FRAM device without platform data, eg. fram == true && spi->dev.platform_data == NULL the stack local variable "struct spi_eeprom chip" is not initialized fully, prior to being copied into at25->chip. The chip.flags field in particular can cause problems. 2) The byte_len of FRAM is computed from its ID register, and is stored into the stack local "struct spi_eeprom chip" structure. This happens after the same structure has been copied into at25->chip. As a result, at25->chip.byte_len does not contain the correct length of the device. In turn this can cause checks at beginning of at25_ee_read() to fail (or equally, it could allow reads beyond the end of the device length). Fix both of these issues by eliminating the on-stack struct spi_eeprom. Instead use the one inside at25_data structure, which starts of zeroed. Fixes: fd307a4ad332 ("nvmem: prepare basics for FRAM support") Cc: stable Reviewed-by: Arnd Bergmann Signed-off-by: Ralph Siemsen Link: https://lore.kernel.org/r/20211108181627.645638-1-ralph.siemsen@linaro.org Signed-off-by: Greg Kroah-Hartman --- drivers/misc/eeprom/at25.c | 38 ++++++++++++++++++-------------------- 1 file changed, 18 insertions(+), 20 deletions(-) diff --git a/drivers/misc/eeprom/at25.c b/drivers/misc/eeprom/at25.c index 632325474233..b38978a3b3ff 100644 --- a/drivers/misc/eeprom/at25.c +++ b/drivers/misc/eeprom/at25.c @@ -376,7 +376,6 @@ MODULE_DEVICE_TABLE(spi, at25_spi_ids); static int at25_probe(struct spi_device *spi) { struct at25_data *at25 = NULL; - struct spi_eeprom chip; int err; int sr; u8 id[FM25_ID_LEN]; @@ -389,15 +388,18 @@ static int at25_probe(struct spi_device *spi) if (match && !strcmp(match->compatible, "cypress,fm25")) is_fram = 1; + at25 = devm_kzalloc(&spi->dev, sizeof(struct at25_data), GFP_KERNEL); + if (!at25) + return -ENOMEM; + /* Chip description */ - if (!spi->dev.platform_data) { - if (!is_fram) { - err = at25_fw_to_chip(&spi->dev, &chip); - if (err) - return err; - } - } else - chip = *(struct spi_eeprom *)spi->dev.platform_data; + if (spi->dev.platform_data) { + memcpy(&at25->chip, spi->dev.platform_data, sizeof(at25->chip)); + } else if (!is_fram) { + err = at25_fw_to_chip(&spi->dev, &at25->chip); + if (err) + return err; + } /* Ping the chip ... the status register is pretty portable, * unlike probing manufacturer IDs. We do expect that system @@ -409,12 +411,7 @@ static int at25_probe(struct spi_device *spi) return -ENXIO; } - at25 = devm_kzalloc(&spi->dev, sizeof(struct at25_data), GFP_KERNEL); - if (!at25) - return -ENOMEM; - mutex_init(&at25->lock); - at25->chip = chip; at25->spi = spi; spi_set_drvdata(spi, at25); @@ -431,7 +428,7 @@ static int at25_probe(struct spi_device *spi) dev_err(&spi->dev, "Error: unsupported size (id %02x)\n", id[7]); return -ENODEV; } - chip.byte_len = int_pow(2, id[7] - 0x21 + 4) * 1024; + at25->chip.byte_len = int_pow(2, id[7] - 0x21 + 4) * 1024; if (at25->chip.byte_len > 64 * 1024) at25->chip.flags |= EE_ADDR3; @@ -464,7 +461,7 @@ static int at25_probe(struct spi_device *spi) at25->nvmem_config.type = is_fram ? NVMEM_TYPE_FRAM : NVMEM_TYPE_EEPROM; at25->nvmem_config.name = dev_name(&spi->dev); at25->nvmem_config.dev = &spi->dev; - at25->nvmem_config.read_only = chip.flags & EE_READONLY; + at25->nvmem_config.read_only = at25->chip.flags & EE_READONLY; at25->nvmem_config.root_only = true; at25->nvmem_config.owner = THIS_MODULE; at25->nvmem_config.compat = true; @@ -474,17 +471,18 @@ static int at25_probe(struct spi_device *spi) at25->nvmem_config.priv = at25; at25->nvmem_config.stride = 1; at25->nvmem_config.word_size = 1; - at25->nvmem_config.size = chip.byte_len; + at25->nvmem_config.size = at25->chip.byte_len; at25->nvmem = devm_nvmem_register(&spi->dev, &at25->nvmem_config); if (IS_ERR(at25->nvmem)) return PTR_ERR(at25->nvmem); dev_info(&spi->dev, "%d %s %s %s%s, pagesize %u\n", - (chip.byte_len < 1024) ? chip.byte_len : (chip.byte_len / 1024), - (chip.byte_len < 1024) ? "Byte" : "KByte", + (at25->chip.byte_len < 1024) ? + at25->chip.byte_len : (at25->chip.byte_len / 1024), + (at25->chip.byte_len < 1024) ? "Byte" : "KByte", at25->chip.name, is_fram ? "fram" : "eeprom", - (chip.flags & EE_READONLY) ? " (readonly)" : "", + (at25->chip.flags & EE_READONLY) ? " (readonly)" : "", at25->chip.page_size); return 0; } From 0edeb8992db8e7de9b8fe3164ace9a4356b17021 Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Fri, 26 Nov 2021 08:32:44 +0800 Subject: [PATCH 161/868] misc: rtsx: Avoid mangling IRQ during runtime PM After commit 5b4258f6721f ("misc: rtsx: rts5249 support runtime PM"), when the rtsx controller is runtime suspended, bring CPUs offline and back online, the runtime resume of the controller will fail: [ 47.319391] smpboot: CPU 1 is now offline [ 47.414140] x86: Booting SMP configuration: [ 47.414147] smpboot: Booting Node 0 Processor 1 APIC 0x2 [ 47.571334] smpboot: CPU 2 is now offline [ 47.686055] smpboot: Booting Node 0 Processor 2 APIC 0x4 [ 47.808174] smpboot: CPU 3 is now offline [ 47.878146] smpboot: Booting Node 0 Processor 3 APIC 0x6 [ 48.003679] smpboot: CPU 4 is now offline [ 48.086187] smpboot: Booting Node 0 Processor 4 APIC 0x1 [ 48.239627] smpboot: CPU 5 is now offline [ 48.326059] smpboot: Booting Node 0 Processor 5 APIC 0x3 [ 48.472193] smpboot: CPU 6 is now offline [ 48.574181] smpboot: Booting Node 0 Processor 6 APIC 0x5 [ 48.743375] smpboot: CPU 7 is now offline [ 48.838047] smpboot: Booting Node 0 Processor 7 APIC 0x7 [ 48.965447] __common_interrupt: 1.35 No irq handler for vector [ 51.174065] mmc0: error -110 doing runtime resume [ 54.978088] I/O error, dev mmcblk0, sector 21479 op 0x1:(WRITE) flags 0x0 phys_seg 11 prio class 0 [ 54.978108] Buffer I/O error on dev mmcblk0p1, logical block 19431, lost async page write [ 54.978129] Buffer I/O error on dev mmcblk0p1, logical block 19432, lost async page write [ 54.978134] Buffer I/O error on dev mmcblk0p1, logical block 19433, lost async page write [ 54.978137] Buffer I/O error on dev mmcblk0p1, logical block 19434, lost async page write [ 54.978141] Buffer I/O error on dev mmcblk0p1, logical block 19435, lost async page write [ 54.978145] Buffer I/O error on dev mmcblk0p1, logical block 19436, lost async page write [ 54.978148] Buffer I/O error on dev mmcblk0p1, logical block 19437, lost async page write [ 54.978152] Buffer I/O error on dev mmcblk0p1, logical block 19438, lost async page write [ 54.978155] Buffer I/O error on dev mmcblk0p1, logical block 19439, lost async page write [ 54.978160] Buffer I/O error on dev mmcblk0p1, logical block 19440, lost async page write [ 54.978244] mmc0: card aaaa removed [ 54.978452] FAT-fs (mmcblk0p1): FAT read failed (blocknr 4257) There's interrupt immediately raised on rtsx_pci_write_register() in runtime resume routine, but the IRQ handler hasn't registered yet. So we can either move rtsx_pci_write_register() after rtsx_pci_acquire_irq(), or just stop mangling IRQ on runtime PM. Choose the latter to save some CPU cycles. Fixes: 5b4258f6721f ("misc: rtsx: rts5249 support runtime PM") Cc: stable Signed-off-by: Kai-Heng Feng BugLink: https://bugs.launchpad.net/bugs/1951784 Link: https://lore.kernel.org/r/20211126003246.1068770-1-kai.heng.feng@canonical.com Signed-off-by: Greg Kroah-Hartman --- drivers/misc/cardreader/rtsx_pcr.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/misc/cardreader/rtsx_pcr.c b/drivers/misc/cardreader/rtsx_pcr.c index 8c72eb590f79..6ac509c1821c 100644 --- a/drivers/misc/cardreader/rtsx_pcr.c +++ b/drivers/misc/cardreader/rtsx_pcr.c @@ -1803,8 +1803,6 @@ static int rtsx_pci_runtime_suspend(struct device *device) mutex_lock(&pcr->pcr_mutex); rtsx_pci_power_off(pcr, HOST_ENTER_S3); - free_irq(pcr->irq, (void *)pcr); - mutex_unlock(&pcr->pcr_mutex); pcr->is_runtime_suspended = true; @@ -1825,8 +1823,6 @@ static int rtsx_pci_runtime_resume(struct device *device) mutex_lock(&pcr->pcr_mutex); rtsx_pci_write_register(pcr, HOST_SLEEP_STATE, 0x03, 0x00); - rtsx_pci_acquire_irq(pcr); - synchronize_irq(pcr->irq); if (pcr->ops->fetch_vendor_settings) pcr->ops->fetch_vendor_settings(pcr); From 2e69e18aec4c1d308b2da461cb6d21500fa441c7 Mon Sep 17 00:00:00 2001 From: Cai Huoqing Date: Tue, 9 Nov 2021 21:47:58 +0800 Subject: [PATCH 162/868] mtd: rawnand: denali: Add the dependency on HAS_IOMEM The helper function devm_platform_ioremap_resource_xxx() needs HAS_IOMEM enabled, so add the dependency on HAS_IOMEM. Fixes: 5f14a8ca1b49 ("mtd: rawnand: denali: Make use of the helper function devm_platform_ioremap_resource_byname()") Signed-off-by: Cai Huoqing Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20211109134758.417-1-caihuoqing@baidu.com --- drivers/mtd/nand/raw/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mtd/nand/raw/Kconfig b/drivers/mtd/nand/raw/Kconfig index 67b7cb67c030..0a45d3c6c15b 100644 --- a/drivers/mtd/nand/raw/Kconfig +++ b/drivers/mtd/nand/raw/Kconfig @@ -26,7 +26,7 @@ config MTD_NAND_DENALI_PCI config MTD_NAND_DENALI_DT tristate "Denali NAND controller as a DT device" select MTD_NAND_DENALI - depends on HAS_DMA && HAVE_CLK && OF + depends on HAS_DMA && HAVE_CLK && OF && HAS_IOMEM help Enable the driver for NAND flash on platforms using a Denali NAND controller as a DT device. From 16d8b628a4152e8e8b01b6a1d82e30208ee2dd30 Mon Sep 17 00:00:00 2001 From: Herve Codina Date: Fri, 19 Nov 2021 16:03:13 +0100 Subject: [PATCH 163/868] mtd: rawnand: Fix nand_erase_op delay NAND_OP_CMD() expects a delay parameter in nanoseconds. The delay value is wrongly given in milliseconds. Fix the conversion macro used in order to set this delay in nanoseconds. Fixes: d7a773e8812b ("mtd: rawnand: Access SDR and NV-DDR timings through a common macro") Signed-off-by: Herve Codina Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20211119150316.43080-2-herve.codina@bootlin.com --- drivers/mtd/nand/raw/nand_base.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c index 3d6c6e880520..5c6b065837ef 100644 --- a/drivers/mtd/nand/raw/nand_base.c +++ b/drivers/mtd/nand/raw/nand_base.c @@ -1837,7 +1837,7 @@ int nand_erase_op(struct nand_chip *chip, unsigned int eraseblock) NAND_OP_CMD(NAND_CMD_ERASE1, 0), NAND_OP_ADDR(2, addrs, 0), NAND_OP_CMD(NAND_CMD_ERASE2, - NAND_COMMON_TIMING_MS(conf, tWB_max)), + NAND_COMMON_TIMING_NS(conf, tWB_max)), NAND_OP_WAIT_RDY(NAND_COMMON_TIMING_MS(conf, tBERS_max), 0), }; From 36a65982a98c4bc72fdcfef2c4aaf90193746631 Mon Sep 17 00:00:00 2001 From: Herve Codina Date: Fri, 19 Nov 2021 16:03:14 +0100 Subject: [PATCH 164/868] mtd: rawnand: Fix nand_choose_best_timings() on unsupported interface When the NV-DDR interface is not supported by the NAND chip, the value of onfi->nvddr_timing_modes is 0. In this case, the best_mode variable value in nand_choose_best_nvddr_timings() is -1. The last for-loop is skipped and the function returns an uninitialized value. If this returned value is 0, the nand_choose_best_sdr_timings() is not executed and no 'best timing' are set. This leads the host controller and the NAND chip working at default mode 0 timing even if a better timing can be used. Fix this uninitialized returned value. nand_choose_best_sdr_timings() is pretty similar to nand_choose_best_nvddr_timings(). Even if onfi->sdr_timing_modes should never be seen as 0, nand_choose_best_sdr_timings() returned value is fixed. Fixes: a9ecc8c814e9 ("mtd: rawnand: Choose the best timings, NV-DDR included") Signed-off-by: Herve Codina Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20211119150316.43080-3-herve.codina@bootlin.com --- drivers/mtd/nand/raw/nand_base.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c index 5c6b065837ef..a130320de412 100644 --- a/drivers/mtd/nand/raw/nand_base.c +++ b/drivers/mtd/nand/raw/nand_base.c @@ -926,7 +926,7 @@ int nand_choose_best_sdr_timings(struct nand_chip *chip, struct nand_sdr_timings *spec_timings) { const struct nand_controller_ops *ops = chip->controller->ops; - int best_mode = 0, mode, ret; + int best_mode = 0, mode, ret = -EOPNOTSUPP; iface->type = NAND_SDR_IFACE; @@ -977,7 +977,7 @@ int nand_choose_best_nvddr_timings(struct nand_chip *chip, struct nand_nvddr_timings *spec_timings) { const struct nand_controller_ops *ops = chip->controller->ops; - int best_mode = 0, mode, ret; + int best_mode = 0, mode, ret = -EOPNOTSUPP; iface->type = NAND_NVDDR_IFACE; From a4ca0c439f2d5ce9a3dc118d882f9f03449864c8 Mon Sep 17 00:00:00 2001 From: Herve Codina Date: Fri, 19 Nov 2021 16:03:15 +0100 Subject: [PATCH 165/868] mtd: rawnand: fsmc: Take instruction delay into account The FSMC NAND controller should apply a delay after the instruction has been issued on the bus. The FSMC NAND controller driver did not handle this delay. Add this waiting delay in the FSMC NAND controller driver. Fixes: 4da712e70294 ("mtd: nand: fsmc: use ->exec_op()") Signed-off-by: Herve Codina Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20211119150316.43080-4-herve.codina@bootlin.com --- drivers/mtd/nand/raw/fsmc_nand.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/mtd/nand/raw/fsmc_nand.c b/drivers/mtd/nand/raw/fsmc_nand.c index 658f0cbe7ce8..0a6c9ef0ea8b 100644 --- a/drivers/mtd/nand/raw/fsmc_nand.c +++ b/drivers/mtd/nand/raw/fsmc_nand.c @@ -15,6 +15,7 @@ #include #include +#include #include #include #include @@ -664,6 +665,9 @@ static int fsmc_exec_op(struct nand_chip *chip, const struct nand_operation *op, instr->ctx.waitrdy.timeout_ms); break; } + + if (instr->delay_ns) + ndelay(instr->delay_ns); } return ret; From 9472335eaa1452b51dc8e8edaa1a342997cb80c7 Mon Sep 17 00:00:00 2001 From: Herve Codina Date: Fri, 19 Nov 2021 16:03:16 +0100 Subject: [PATCH 166/868] mtd: rawnand: fsmc: Fix timing computation Under certain circumstances, the timing settings calculated by the FSMC NAND controller driver were inaccurate. These settings led to incorrect data reads or fallback to timing mode 0 depending on the NAND chip used. The timing computation did not take into account the following constraint given in SPEAr3xx reference manual: twait >= tCEA - (tset * TCLK) + TOUTDEL + TINDEL Enhance the timings calculation by taking into account this additional constraint. This change has no impact on slow timing modes such as mode 0. Indeed, on mode 0, computed values are the same with and without the patch. NANDs which previously stayed in mode 0 because of fallback to mode 0 can now work at higher speeds and NANDs which were not working at all because of the corrupted data work at high speeds without troubles. Overall improvement on a Micron/MT29F1G08 (flash_speed tool): mode0 mode3 eraseblock write speed 3220 KiB/s 4511 KiB/s eraseblock read speed 4491 KiB/s 7529 KiB/s Fixes: d9fb079571833 ("mtd: nand: fsmc: add support for SDR timings") Signed-off-by: Herve Codina Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20211119150316.43080-5-herve.codina@bootlin.com --- drivers/mtd/nand/raw/fsmc_nand.c | 32 ++++++++++++++++++++++++-------- 1 file changed, 24 insertions(+), 8 deletions(-) diff --git a/drivers/mtd/nand/raw/fsmc_nand.c b/drivers/mtd/nand/raw/fsmc_nand.c index 0a6c9ef0ea8b..6b2bda815b88 100644 --- a/drivers/mtd/nand/raw/fsmc_nand.c +++ b/drivers/mtd/nand/raw/fsmc_nand.c @@ -94,6 +94,14 @@ #define FSMC_BUSY_WAIT_TIMEOUT (1 * HZ) +/* + * According to SPEAr300 Reference Manual (RM0082) + * TOUDEL = 7ns (Output delay from the flip-flops to the board) + * TINDEL = 5ns (Input delay from the board to the flipflop) + */ +#define TOUTDEL 7000 +#define TINDEL 5000 + struct fsmc_nand_timings { u8 tclr; u8 tar; @@ -278,7 +286,7 @@ static int fsmc_calc_timings(struct fsmc_nand_data *host, { unsigned long hclk = clk_get_rate(host->clk); unsigned long hclkn = NSEC_PER_SEC / hclk; - u32 thiz, thold, twait, tset; + u32 thiz, thold, twait, tset, twait_min; if (sdrt->tRC_min < 30000) return -EOPNOTSUPP; @@ -310,13 +318,6 @@ static int fsmc_calc_timings(struct fsmc_nand_data *host, else if (tims->thold > FSMC_THOLD_MASK) tims->thold = FSMC_THOLD_MASK; - twait = max(sdrt->tRP_min, sdrt->tWP_min); - tims->twait = DIV_ROUND_UP(twait / 1000, hclkn) - 1; - if (tims->twait == 0) - tims->twait = 1; - else if (tims->twait > FSMC_TWAIT_MASK) - tims->twait = FSMC_TWAIT_MASK; - tset = max(sdrt->tCS_min - sdrt->tWP_min, sdrt->tCEA_max - sdrt->tREA_max); tims->tset = DIV_ROUND_UP(tset / 1000, hclkn) - 1; @@ -325,6 +326,21 @@ static int fsmc_calc_timings(struct fsmc_nand_data *host, else if (tims->tset > FSMC_TSET_MASK) tims->tset = FSMC_TSET_MASK; + /* + * According to SPEAr300 Reference Manual (RM0082) which gives more + * information related to FSMSC timings than the SPEAr600 one (RM0305), + * twait >= tCEA - (tset * TCLK) + TOUTDEL + TINDEL + */ + twait_min = sdrt->tCEA_max - ((tims->tset + 1) * hclkn * 1000) + + TOUTDEL + TINDEL; + twait = max3(sdrt->tRP_min, sdrt->tWP_min, twait_min); + + tims->twait = DIV_ROUND_UP(twait / 1000, hclkn) - 1; + if (tims->twait == 0) + tims->twait = 1; + else if (tims->twait > FSMC_TWAIT_MASK) + tims->twait = FSMC_TWAIT_MASK; + return 0; } From 27a030e8729255b2068f35c1cd609b532b263311 Mon Sep 17 00:00:00 2001 From: Jon Hunter Date: Tue, 30 Nov 2021 11:24:43 +0000 Subject: [PATCH 167/868] mtd: dataflash: Add device-tree SPI IDs Commit 5fa6863ba692 ("spi: Check we have a spi_device_id for each DT compatible") added a test to check that every SPI driver has a spi_device_id for each DT compatiable string defined by the driver and warns if the spi_device_id is missing. The spi_device_ids are missing for the dataflash driver and the following warnings are now seen. WARNING KERN SPI driver mtd_dataflash has no spi_device_id for atmel,at45 WARNING KERN SPI driver mtd_dataflash has no spi_device_id for atmel,dataflash Fix this by adding the necessary spi_device_ids. Fixes: 96c8395e2166 ("spi: Revert modalias changes") Signed-off-by: Jon Hunter Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20211130112443.107730-1-jonathanh@nvidia.com --- drivers/mtd/devices/mtd_dataflash.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/mtd/devices/mtd_dataflash.c b/drivers/mtd/devices/mtd_dataflash.c index 9802e265fca8..2b317ed6c103 100644 --- a/drivers/mtd/devices/mtd_dataflash.c +++ b/drivers/mtd/devices/mtd_dataflash.c @@ -96,6 +96,13 @@ struct dataflash { struct mtd_info mtd; }; +static const struct spi_device_id dataflash_dev_ids[] = { + { "at45" }, + { "dataflash" }, + { }, +}; +MODULE_DEVICE_TABLE(spi, dataflash_dev_ids); + #ifdef CONFIG_OF static const struct of_device_id dataflash_dt_ids[] = { { .compatible = "atmel,at45", }, @@ -927,6 +934,7 @@ static struct spi_driver dataflash_driver = { .name = "mtd_dataflash", .of_match_table = of_match_ptr(dataflash_dt_ids), }, + .id_table = dataflash_dev_ids, .probe = dataflash_probe, .remove = dataflash_remove, From 619764cc2ec9ce1283a8bbcd89a1376a7c68293b Mon Sep 17 00:00:00 2001 From: Werner Sembach Date: Thu, 2 Dec 2021 17:50:10 +0100 Subject: [PATCH 168/868] ALSA: hda/realtek: Fix quirk for TongFang PHxTxX1 This fixes the SND_PCI_QUIRK(...) of the TongFang PHxTxX1 barebone. This fixes the issue of sound not working after s3 suspend. When waking up from s3 suspend the Coef 0x10 is set to 0x0220 instead of 0x0020. Setting the value manually makes the sound work again. This patch does this automatically. While being on it, I also fixed the comment formatting of the quirk and shortened variable and function names. Signed-off-by: Werner Sembach Fixes: dd6dd6e3c791 ("ALSA: hda/realtek: Add quirk for TongFang PHxTxX1") Cc: Link: https://lore.kernel.org/r/20211202165010.876431-1-wse@tuxedocomputers.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 40 +++++++++++++++++++---------------- 1 file changed, 22 insertions(+), 18 deletions(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 9ce7457533c9..d361a1260d5a 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -6503,22 +6503,26 @@ static void alc287_fixup_legion_15imhg05_speakers(struct hda_codec *codec, /* for alc285_fixup_ideapad_s740_coef() */ #include "ideapad_s740_helper.c" -static void alc256_fixup_tongfang_reset_persistent_settings(struct hda_codec *codec, - const struct hda_fixup *fix, - int action) +static const struct coef_fw alc256_fixup_set_coef_defaults_coefs[] = { + WRITE_COEF(0x10, 0x0020), WRITE_COEF(0x24, 0x0000), + WRITE_COEF(0x26, 0x0000), WRITE_COEF(0x29, 0x3000), + WRITE_COEF(0x37, 0xfe05), WRITE_COEF(0x45, 0x5089), + {} +}; + +static void alc256_fixup_set_coef_defaults(struct hda_codec *codec, + const struct hda_fixup *fix, + int action) { /* - * A certain other OS sets these coeffs to different values. On at least one TongFang - * barebone these settings might survive even a cold reboot. So to restore a clean slate the - * values are explicitly reset to default here. Without this, the external microphone is - * always in a plugged-in state, while the internal microphone is always in an unplugged - * state, breaking the ability to use the internal microphone. - */ - alc_write_coef_idx(codec, 0x24, 0x0000); - alc_write_coef_idx(codec, 0x26, 0x0000); - alc_write_coef_idx(codec, 0x29, 0x3000); - alc_write_coef_idx(codec, 0x37, 0xfe05); - alc_write_coef_idx(codec, 0x45, 0x5089); + * A certain other OS sets these coeffs to different values. On at least + * one TongFang barebone these settings might survive even a cold + * reboot. So to restore a clean slate the values are explicitly reset + * to default here. Without this, the external microphone is always in a + * plugged-in state, while the internal microphone is always in an + * unplugged state, breaking the ability to use the internal microphone. + */ + alc_process_coef_fw(codec, alc256_fixup_set_coef_defaults_coefs); } static const struct coef_fw alc233_fixup_no_audio_jack_coefs[] = { @@ -6759,7 +6763,7 @@ enum { ALC287_FIXUP_LEGION_15IMHG05_AUTOMUTE, ALC287_FIXUP_YOGA7_14ITL_SPEAKERS, ALC287_FIXUP_13S_GEN2_SPEAKERS, - ALC256_FIXUP_TONGFANG_RESET_PERSISTENT_SETTINGS, + ALC256_FIXUP_SET_COEF_DEFAULTS, ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE, ALC233_FIXUP_NO_AUDIO_JACK, }; @@ -8465,9 +8469,9 @@ static const struct hda_fixup alc269_fixups[] = { .chained = true, .chain_id = ALC269_FIXUP_HEADSET_MODE, }, - [ALC256_FIXUP_TONGFANG_RESET_PERSISTENT_SETTINGS] = { + [ALC256_FIXUP_SET_COEF_DEFAULTS] = { .type = HDA_FIXUP_FUNC, - .v.func = alc256_fixup_tongfang_reset_persistent_settings, + .v.func = alc256_fixup_set_coef_defaults, }, [ALC245_FIXUP_HP_GPIO_LED] = { .type = HDA_FIXUP_FUNC, @@ -8929,7 +8933,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1b7d, 0xa831, "Ordissimo EVE2 ", ALC269VB_FIXUP_ORDISSIMO_EVE2), /* Also known as Malata PC-B1303 */ SND_PCI_QUIRK(0x1c06, 0x2013, "Lemote A1802", ALC269_FIXUP_LEMOTE_A1802), SND_PCI_QUIRK(0x1c06, 0x2015, "Lemote A190X", ALC269_FIXUP_LEMOTE_A190X), - SND_PCI_QUIRK(0x1d05, 0x1132, "TongFang PHxTxX1", ALC256_FIXUP_TONGFANG_RESET_PERSISTENT_SETTINGS), + SND_PCI_QUIRK(0x1d05, 0x1132, "TongFang PHxTxX1", ALC256_FIXUP_SET_COEF_DEFAULTS), SND_PCI_QUIRK(0x1d72, 0x1602, "RedmiBook", ALC255_FIXUP_XIAOMI_HEADSET_MIC), SND_PCI_QUIRK(0x1d72, 0x1701, "XiaomiNotebook Pro", ALC298_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1d72, 0x1901, "RedmiBook 14", ALC256_FIXUP_ASUS_HEADSET_MIC), From a9418924552e52e63903cbb0310d7537260702bf Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 2 Dec 2021 14:42:18 -0800 Subject: [PATCH 169/868] inet: use #ifdef CONFIG_SOCK_RX_QUEUE_MAPPING consistently Since commit 4e1beecc3b58 ("net/sock: Add kernel config SOCK_RX_QUEUE_MAPPING"), sk_rx_queue_mapping access is guarded by CONFIG_SOCK_RX_QUEUE_MAPPING. Fixes: 54b92e841937 ("tcp: Migrate TCP_ESTABLISHED/TCP_SYN_RECV sockets in accept queues.") Signed-off-by: Eric Dumazet Cc: Kuniyuki Iwashima Cc: Daniel Borkmann Cc: Martin KaFai Lau Cc: Tariq Toukan Acked-by: Kuniyuki Iwashima Reviewed-by: Tariq Toukan Signed-off-by: David S. Miller --- net/ipv4/inet_connection_sock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index f7fea3a7c5e6..62a67fdc344c 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -721,7 +721,7 @@ static struct request_sock *inet_reqsk_clone(struct request_sock *req, sk_node_init(&nreq_sk->sk_node); nreq_sk->sk_tx_queue_mapping = req_sk->sk_tx_queue_mapping; -#ifdef CONFIG_XPS +#ifdef CONFIG_SOCK_RX_QUEUE_MAPPING nreq_sk->sk_rx_queue_mapping = req_sk->sk_rx_queue_mapping; #endif nreq_sk->sk_incoming_cpu = req_sk->sk_incoming_cpu; From 03cfda4fa6ea9bea2f30160579a78c2b8c1e616e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 2 Dec 2021 15:37:24 -0800 Subject: [PATCH 170/868] tcp: fix another uninit-value (sk_rx_queue_mapping) KMSAN is still not happy [1]. I missed that passive connections do not inherit their sk_rx_queue_mapping values from the request socket, but instead tcp_child_process() is calling sk_mark_napi_id(child, skb) We have many sk_mark_napi_id() callers, so I am providing a new helper, forcing the setting sk_rx_queue_mapping and sk_napi_id. Note that we had no KMSAN report for sk_napi_id because passive connections got a copy of this field from the listener. sk_rx_queue_mapping in the other hand is inside the sk_dontcopy_begin/sk_dontcopy_end so sk_clone_lock() leaves this field uninitialized. We might remove dead code populating req->sk_rx_queue_mapping in the future. [1] BUG: KMSAN: uninit-value in __sk_rx_queue_set include/net/sock.h:1924 [inline] BUG: KMSAN: uninit-value in sk_rx_queue_update include/net/sock.h:1938 [inline] BUG: KMSAN: uninit-value in sk_mark_napi_id include/net/busy_poll.h:136 [inline] BUG: KMSAN: uninit-value in tcp_child_process+0xb42/0x1050 net/ipv4/tcp_minisocks.c:833 __sk_rx_queue_set include/net/sock.h:1924 [inline] sk_rx_queue_update include/net/sock.h:1938 [inline] sk_mark_napi_id include/net/busy_poll.h:136 [inline] tcp_child_process+0xb42/0x1050 net/ipv4/tcp_minisocks.c:833 tcp_v4_rcv+0x3d83/0x4ed0 net/ipv4/tcp_ipv4.c:2066 ip_protocol_deliver_rcu+0x760/0x10b0 net/ipv4/ip_input.c:204 ip_local_deliver_finish net/ipv4/ip_input.c:231 [inline] NF_HOOK include/linux/netfilter.h:307 [inline] ip_local_deliver+0x584/0x8c0 net/ipv4/ip_input.c:252 dst_input include/net/dst.h:460 [inline] ip_sublist_rcv_finish net/ipv4/ip_input.c:551 [inline] ip_list_rcv_finish net/ipv4/ip_input.c:601 [inline] ip_sublist_rcv+0x11fd/0x1520 net/ipv4/ip_input.c:609 ip_list_rcv+0x95f/0x9a0 net/ipv4/ip_input.c:644 __netif_receive_skb_list_ptype net/core/dev.c:5505 [inline] __netif_receive_skb_list_core+0xe34/0x1240 net/core/dev.c:5553 __netif_receive_skb_list+0x7fc/0x960 net/core/dev.c:5605 netif_receive_skb_list_internal+0x868/0xde0 net/core/dev.c:5696 gro_normal_list net/core/dev.c:5850 [inline] napi_complete_done+0x579/0xdd0 net/core/dev.c:6587 virtqueue_napi_complete drivers/net/virtio_net.c:339 [inline] virtnet_poll+0x17b6/0x2350 drivers/net/virtio_net.c:1557 __napi_poll+0x14e/0xbc0 net/core/dev.c:7020 napi_poll net/core/dev.c:7087 [inline] net_rx_action+0x824/0x1880 net/core/dev.c:7174 __do_softirq+0x1fe/0x7eb kernel/softirq.c:558 run_ksoftirqd+0x33/0x50 kernel/softirq.c:920 smpboot_thread_fn+0x616/0xbf0 kernel/smpboot.c:164 kthread+0x721/0x850 kernel/kthread.c:327 ret_from_fork+0x1f/0x30 Uninit was created at: __alloc_pages+0xbc7/0x10a0 mm/page_alloc.c:5409 alloc_pages+0x8a5/0xb80 alloc_slab_page mm/slub.c:1810 [inline] allocate_slab+0x287/0x1c20 mm/slub.c:1947 new_slab mm/slub.c:2010 [inline] ___slab_alloc+0xbdf/0x1e90 mm/slub.c:3039 __slab_alloc mm/slub.c:3126 [inline] slab_alloc_node mm/slub.c:3217 [inline] slab_alloc mm/slub.c:3259 [inline] kmem_cache_alloc+0xbb3/0x11c0 mm/slub.c:3264 sk_prot_alloc+0xeb/0x570 net/core/sock.c:1914 sk_clone_lock+0xd6/0x1940 net/core/sock.c:2118 inet_csk_clone_lock+0x8d/0x6a0 net/ipv4/inet_connection_sock.c:956 tcp_create_openreq_child+0xb1/0x1ef0 net/ipv4/tcp_minisocks.c:453 tcp_v4_syn_recv_sock+0x268/0x2710 net/ipv4/tcp_ipv4.c:1563 tcp_check_req+0x207c/0x2a30 net/ipv4/tcp_minisocks.c:765 tcp_v4_rcv+0x36f5/0x4ed0 net/ipv4/tcp_ipv4.c:2047 ip_protocol_deliver_rcu+0x760/0x10b0 net/ipv4/ip_input.c:204 ip_local_deliver_finish net/ipv4/ip_input.c:231 [inline] NF_HOOK include/linux/netfilter.h:307 [inline] ip_local_deliver+0x584/0x8c0 net/ipv4/ip_input.c:252 dst_input include/net/dst.h:460 [inline] ip_sublist_rcv_finish net/ipv4/ip_input.c:551 [inline] ip_list_rcv_finish net/ipv4/ip_input.c:601 [inline] ip_sublist_rcv+0x11fd/0x1520 net/ipv4/ip_input.c:609 ip_list_rcv+0x95f/0x9a0 net/ipv4/ip_input.c:644 __netif_receive_skb_list_ptype net/core/dev.c:5505 [inline] __netif_receive_skb_list_core+0xe34/0x1240 net/core/dev.c:5553 __netif_receive_skb_list+0x7fc/0x960 net/core/dev.c:5605 netif_receive_skb_list_internal+0x868/0xde0 net/core/dev.c:5696 gro_normal_list net/core/dev.c:5850 [inline] napi_complete_done+0x579/0xdd0 net/core/dev.c:6587 virtqueue_napi_complete drivers/net/virtio_net.c:339 [inline] virtnet_poll+0x17b6/0x2350 drivers/net/virtio_net.c:1557 __napi_poll+0x14e/0xbc0 net/core/dev.c:7020 napi_poll net/core/dev.c:7087 [inline] net_rx_action+0x824/0x1880 net/core/dev.c:7174 __do_softirq+0x1fe/0x7eb kernel/softirq.c:558 Fixes: 342159ee394d ("net: avoid dirtying sk->sk_rx_queue_mapping") Fixes: a37a0ee4d25c ("net: avoid uninit-value from tcp_conn_request") Signed-off-by: Eric Dumazet Reported-by: syzbot Tested-by: Alexander Potapenko Signed-off-by: David S. Miller --- include/net/busy_poll.h | 13 +++++++++++++ net/ipv4/tcp_minisocks.c | 4 ++-- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/include/net/busy_poll.h b/include/net/busy_poll.h index 7994455ec714..c4898fcbf923 100644 --- a/include/net/busy_poll.h +++ b/include/net/busy_poll.h @@ -136,6 +136,19 @@ static inline void sk_mark_napi_id(struct sock *sk, const struct sk_buff *skb) sk_rx_queue_update(sk, skb); } +/* Variant of sk_mark_napi_id() for passive flow setup, + * as sk->sk_napi_id and sk->sk_rx_queue_mapping content + * needs to be set. + */ +static inline void sk_mark_napi_id_set(struct sock *sk, + const struct sk_buff *skb) +{ +#ifdef CONFIG_NET_RX_BUSY_POLL + WRITE_ONCE(sk->sk_napi_id, skb->napi_id); +#endif + sk_rx_queue_set(sk, skb); +} + static inline void __sk_mark_napi_id_once(struct sock *sk, unsigned int napi_id) { #ifdef CONFIG_NET_RX_BUSY_POLL diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index cf913a66df17..7c2d3ac2363a 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -829,8 +829,8 @@ int tcp_child_process(struct sock *parent, struct sock *child, int ret = 0; int state = child->sk_state; - /* record NAPI ID of child */ - sk_mark_napi_id(child, skb); + /* record sk_napi_id and sk_rx_queue_mapping of child. */ + sk_mark_napi_id_set(child, skb); tcp_segs_in(tcp_sk(child), skb); if (!sock_owned_by_user(child)) { From dac8e00fb640e9569cdeefd3ce8a75639e5d0711 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 2 Dec 2021 18:27:18 -0800 Subject: [PATCH 171/868] bonding: make tx_rebalance_counter an atomic KCSAN reported a data-race [1] around tx_rebalance_counter which can be accessed from different contexts, without the protection of a lock/mutex. [1] BUG: KCSAN: data-race in bond_alb_init_slave / bond_alb_monitor write to 0xffff888157e8ca24 of 4 bytes by task 7075 on cpu 0: bond_alb_init_slave+0x713/0x860 drivers/net/bonding/bond_alb.c:1613 bond_enslave+0xd94/0x3010 drivers/net/bonding/bond_main.c:1949 do_set_master net/core/rtnetlink.c:2521 [inline] __rtnl_newlink net/core/rtnetlink.c:3475 [inline] rtnl_newlink+0x1298/0x13b0 net/core/rtnetlink.c:3506 rtnetlink_rcv_msg+0x745/0x7e0 net/core/rtnetlink.c:5571 netlink_rcv_skb+0x14e/0x250 net/netlink/af_netlink.c:2491 rtnetlink_rcv+0x18/0x20 net/core/rtnetlink.c:5589 netlink_unicast_kernel net/netlink/af_netlink.c:1319 [inline] netlink_unicast+0x5fc/0x6c0 net/netlink/af_netlink.c:1345 netlink_sendmsg+0x6e1/0x7d0 net/netlink/af_netlink.c:1916 sock_sendmsg_nosec net/socket.c:704 [inline] sock_sendmsg net/socket.c:724 [inline] ____sys_sendmsg+0x39a/0x510 net/socket.c:2409 ___sys_sendmsg net/socket.c:2463 [inline] __sys_sendmsg+0x195/0x230 net/socket.c:2492 __do_sys_sendmsg net/socket.c:2501 [inline] __se_sys_sendmsg net/socket.c:2499 [inline] __x64_sys_sendmsg+0x42/0x50 net/socket.c:2499 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x44/0xd0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x44/0xae read to 0xffff888157e8ca24 of 4 bytes by task 1082 on cpu 1: bond_alb_monitor+0x8f/0xc00 drivers/net/bonding/bond_alb.c:1511 process_one_work+0x3fc/0x980 kernel/workqueue.c:2298 worker_thread+0x616/0xa70 kernel/workqueue.c:2445 kthread+0x2c7/0x2e0 kernel/kthread.c:327 ret_from_fork+0x1f/0x30 value changed: 0x00000001 -> 0x00000064 Reported by Kernel Concurrency Sanitizer on: CPU: 1 PID: 1082 Comm: kworker/u4:3 Not tainted 5.16.0-rc3-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Workqueue: bond1 bond_alb_monitor Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Eric Dumazet Reported-by: syzbot Signed-off-by: David S. Miller --- drivers/net/bonding/bond_alb.c | 14 ++++++++------ include/net/bond_alb.h | 2 +- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c index 2ec8e015c7b3..533e476988f2 100644 --- a/drivers/net/bonding/bond_alb.c +++ b/drivers/net/bonding/bond_alb.c @@ -1501,14 +1501,14 @@ void bond_alb_monitor(struct work_struct *work) struct slave *slave; if (!bond_has_slaves(bond)) { - bond_info->tx_rebalance_counter = 0; + atomic_set(&bond_info->tx_rebalance_counter, 0); bond_info->lp_counter = 0; goto re_arm; } rcu_read_lock(); - bond_info->tx_rebalance_counter++; + atomic_inc(&bond_info->tx_rebalance_counter); bond_info->lp_counter++; /* send learning packets */ @@ -1530,7 +1530,7 @@ void bond_alb_monitor(struct work_struct *work) } /* rebalance tx traffic */ - if (bond_info->tx_rebalance_counter >= BOND_TLB_REBALANCE_TICKS) { + if (atomic_read(&bond_info->tx_rebalance_counter) >= BOND_TLB_REBALANCE_TICKS) { bond_for_each_slave_rcu(bond, slave, iter) { tlb_clear_slave(bond, slave, 1); if (slave == rcu_access_pointer(bond->curr_active_slave)) { @@ -1540,7 +1540,7 @@ void bond_alb_monitor(struct work_struct *work) bond_info->unbalanced_load = 0; } } - bond_info->tx_rebalance_counter = 0; + atomic_set(&bond_info->tx_rebalance_counter, 0); } if (bond_info->rlb_enabled) { @@ -1610,7 +1610,8 @@ int bond_alb_init_slave(struct bonding *bond, struct slave *slave) tlb_init_slave(slave); /* order a rebalance ASAP */ - bond->alb_info.tx_rebalance_counter = BOND_TLB_REBALANCE_TICKS; + atomic_set(&bond->alb_info.tx_rebalance_counter, + BOND_TLB_REBALANCE_TICKS); if (bond->alb_info.rlb_enabled) bond->alb_info.rlb_rebalance = 1; @@ -1647,7 +1648,8 @@ void bond_alb_handle_link_change(struct bonding *bond, struct slave *slave, char rlb_clear_slave(bond, slave); } else if (link == BOND_LINK_UP) { /* order a rebalance ASAP */ - bond_info->tx_rebalance_counter = BOND_TLB_REBALANCE_TICKS; + atomic_set(&bond_info->tx_rebalance_counter, + BOND_TLB_REBALANCE_TICKS); if (bond->alb_info.rlb_enabled) { bond->alb_info.rlb_rebalance = 1; /* If the updelay module parameter is smaller than the diff --git a/include/net/bond_alb.h b/include/net/bond_alb.h index f6af76c87a6c..191c36afa1f4 100644 --- a/include/net/bond_alb.h +++ b/include/net/bond_alb.h @@ -126,7 +126,7 @@ struct tlb_slave_info { struct alb_bond_info { struct tlb_client_info *tx_hashtbl; /* Dynamically allocated */ u32 unbalanced_load; - int tx_rebalance_counter; + atomic_t tx_rebalance_counter; int lp_counter; /* -------- rlb parameters -------- */ int rlb_enabled; From 0f8a3b48f91b8dc1f3eff06b77a63a17183fccbd Mon Sep 17 00:00:00 2001 From: Li Zhijian Date: Fri, 3 Dec 2021 10:32:13 +0800 Subject: [PATCH 172/868] selftests: net/fcnal-test.sh: add exit code Previously, the selftest framework always treats it as *ok* even though some of them are failed actually. That's because the script always returns 0. It supports PASS/FAIL/SKIP exit code now. CC: Philip Li Reported-by: kernel test robot Signed-off-by: Li Zhijian Signed-off-by: David S. Miller --- tools/testing/selftests/net/fcnal-test.sh | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tools/testing/selftests/net/fcnal-test.sh b/tools/testing/selftests/net/fcnal-test.sh index 7f5b265fcb90..a1da013d847b 100755 --- a/tools/testing/selftests/net/fcnal-test.sh +++ b/tools/testing/selftests/net/fcnal-test.sh @@ -4077,3 +4077,11 @@ cleanup 2>/dev/null printf "\nTests passed: %3d\n" ${nsuccess} printf "Tests failed: %3d\n" ${nfail} + +if [ $nfail -ne 0 ]; then + exit 1 # KSFT_FAIL +elif [ $nsuccess -eq 0 ]; then + exit $ksft_skip +fi + +exit 0 # KSFT_PASS From 128f6ec95a282b2d8bc1041e59bf65810703fa44 Mon Sep 17 00:00:00 2001 From: Jiasheng Jiang Date: Fri, 3 Dec 2021 11:31:06 +0800 Subject: [PATCH 173/868] net: bcm4908: Handle dma_set_coherent_mask error codes The return value of dma_set_coherent_mask() is not always 0. To catch the exception in case that dma is not support the mask. Fixes: 9d61d138ab30 ("net: broadcom: rename BCM4908 driver & update DT binding") Signed-off-by: Jiasheng Jiang Acked-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bcm4908_enet.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bcm4908_enet.c b/drivers/net/ethernet/broadcom/bcm4908_enet.c index 7cc5213c575a..b07cb9bc5f2d 100644 --- a/drivers/net/ethernet/broadcom/bcm4908_enet.c +++ b/drivers/net/ethernet/broadcom/bcm4908_enet.c @@ -708,7 +708,9 @@ static int bcm4908_enet_probe(struct platform_device *pdev) enet->irq_tx = platform_get_irq_byname(pdev, "tx"); - dma_set_coherent_mask(dev, DMA_BIT_MASK(32)); + err = dma_set_coherent_mask(dev, DMA_BIT_MASK(32)); + if (err) + return err; err = bcm4908_enet_dma_alloc(enet); if (err) From badd7857f5c933a3dc34942a2c11d67fdbdc24de Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 3 Dec 2021 13:11:28 +0300 Subject: [PATCH 174/868] net: altera: set a couple error code in probe() There are two error paths which accidentally return success instead of a negative error code. Fixes: bbd2190ce96d ("Altera TSE: Add main and header file for Altera Ethernet Driver") Signed-off-by: Dan Carpenter Signed-off-by: David S. Miller --- drivers/net/ethernet/altera/altera_tse_main.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/altera/altera_tse_main.c b/drivers/net/ethernet/altera/altera_tse_main.c index d75d95a97dd9..993b2fb42961 100644 --- a/drivers/net/ethernet/altera/altera_tse_main.c +++ b/drivers/net/ethernet/altera/altera_tse_main.c @@ -1430,16 +1430,19 @@ static int altera_tse_probe(struct platform_device *pdev) priv->rxdescmem_busaddr = dma_res->start; } else { + ret = -ENODEV; goto err_free_netdev; } - if (!dma_set_mask(priv->device, DMA_BIT_MASK(priv->dmaops->dmamask))) + if (!dma_set_mask(priv->device, DMA_BIT_MASK(priv->dmaops->dmamask))) { dma_set_coherent_mask(priv->device, DMA_BIT_MASK(priv->dmaops->dmamask)); - else if (!dma_set_mask(priv->device, DMA_BIT_MASK(32))) + } else if (!dma_set_mask(priv->device, DMA_BIT_MASK(32))) { dma_set_coherent_mask(priv->device, DMA_BIT_MASK(32)); - else + } else { + ret = -EIO; goto err_free_netdev; + } /* MAC address space */ ret = request_and_map(pdev, "control_port", &control_port, From de4adddcbcc25dcd82ffbf3a4bbd8db5f64da056 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 1 Dec 2021 11:41:02 +0000 Subject: [PATCH 175/868] of/irq: Add a quirk for controllers with their own definition of interrupt-map Since commit 041284181226 ("of/irq: Allow matching of an interrupt-map local to an interrupt controller"), a handful of interrupt controllers have stopped working correctly. This is due to the DT exposing a non-sensical interrupt-map property, and their drivers relying on the kernel ignoring this property. Since we cannot realistically fix this terrible behaviour, add a quirk for the limited set of devices that have implemented this monster, and document that this is a pretty bad practice. Fixes: 041284181226 ("of/irq: Allow matching of an interrupt-map local to an interrupt controller") Cc: Rob Herring Cc: John Crispin Cc: Biwen Li Cc: Chris Brandt Cc: Geert Uytterhoeven Cc: Sander Vanheule Signed-off-by: Marc Zyngier Tested-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20211201114102.13446-1-maz@kernel.org Signed-off-by: Rob Herring --- drivers/of/irq.c | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/drivers/of/irq.c b/drivers/of/irq.c index b10f015b2e37..2b07677a386b 100644 --- a/drivers/of/irq.c +++ b/drivers/of/irq.c @@ -76,6 +76,26 @@ struct device_node *of_irq_find_parent(struct device_node *child) } EXPORT_SYMBOL_GPL(of_irq_find_parent); +/* + * These interrupt controllers abuse interrupt-map for unspeakable + * reasons and rely on the core code to *ignore* it (the drivers do + * their own parsing of the property). + * + * If you think of adding to the list for something *new*, think + * again. There is a high chance that you will be sent back to the + * drawing board. + */ +static const char * const of_irq_imap_abusers[] = { + "CBEA,platform-spider-pic", + "sti,platform-spider-pic", + "realtek,rtl-intc", + "fsl,ls1021a-extirq", + "fsl,ls1043a-extirq", + "fsl,ls1088a-extirq", + "renesas,rza1-irqc", + NULL, +}; + /** * of_irq_parse_raw - Low level interrupt tree parsing * @addr: address specifier (start of "reg" property of the device) in be32 format @@ -159,12 +179,15 @@ int of_irq_parse_raw(const __be32 *addr, struct of_phandle_args *out_irq) /* * Now check if cursor is an interrupt-controller and * if it is then we are done, unless there is an - * interrupt-map which takes precedence. + * interrupt-map which takes precedence except on one + * of these broken platforms that want to parse + * interrupt-map themselves for $reason. */ bool intc = of_property_read_bool(ipar, "interrupt-controller"); imap = of_get_property(ipar, "interrupt-map", &imaplen); - if (imap == NULL && intc) { + if (intc && + (!imap || of_device_compatible_match(ipar, of_irq_imap_abusers))) { pr_debug(" -> got it !\n"); return 0; } From b54472a02cefd0dc468158bbc4d636b27cd6fc34 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Fri, 3 Dec 2021 10:48:28 -0600 Subject: [PATCH 176/868] dt-bindings: media: nxp,imx7-mipi-csi2: Drop bad if/then schema The if/then schema for 'data-lanes' doesn't work as 'compatible' is at a different level than 'data-lanes'. To make it work, the if/then schema would have to be moved to the top level and then whole hierarchy of nodes down to 'data-lanes' created. I don't think it is worth the complexity to do that, so let's just drop it. The error in this schema is masked by a fixup in the tools causing the 'allOf' to get overwritten. Removing the fixup as part of moving to json-schema draft 2019-09 revealed the issue: Documentation/devicetree/bindings/media/nxp,imx7-mipi-csi2.example.dt.yaml: mipi-csi@30750000: ports:port@0:endpoint:data-lanes:0: [1] is too short From schema: /builds/robherring/linux-dt-review/Documentation/devicetree/bindings/media/nxp,imx7-mipi-csi2.yaml Documentation/devicetree/bindings/media/nxp,imx7-mipi-csi2.example.dt.yaml: mipi-csi@32e30000: ports:port@0:endpoint:data-lanes:0: [1, 2, 3, 4] is too long From schema: /builds/robherring/linux-dt-review/Documentation/devicetree/bindings/media/nxp,imx7-mipi-csi2.yaml The if condition was always true because 'compatible' did not exist in 'endpoint' node and a non-existent property is true for json-schema. Fixes: 85b62ff2cb97 ("media: dt-bindings: media: nxp,imx7-mipi-csi2: Add i.MX8MM support") Cc: Rui Miguel Silva Cc: Laurent Pinchart Cc: Mauro Carvalho Chehab Cc: Shawn Guo Cc: Sascha Hauer Cc: Pengutronix Kernel Team Cc: Fabio Estevam Cc: NXP Linux Team Cc: linux-media@vger.kernel.org Cc: linux-arm-kernel@lists.infradead.org Signed-off-by: Rob Herring Reviewed-by: Laurent Pinchart Acked-by: Rui Miguel Silva Link: https://lore.kernel.org/r/20211203164828.187642-1-robh@kernel.org --- .../bindings/media/nxp,imx7-mipi-csi2.yaml | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/Documentation/devicetree/bindings/media/nxp,imx7-mipi-csi2.yaml b/Documentation/devicetree/bindings/media/nxp,imx7-mipi-csi2.yaml index 877183cf4278..1ef849dc74d7 100644 --- a/Documentation/devicetree/bindings/media/nxp,imx7-mipi-csi2.yaml +++ b/Documentation/devicetree/bindings/media/nxp,imx7-mipi-csi2.yaml @@ -79,6 +79,8 @@ properties: properties: data-lanes: + description: + Note that 'fsl,imx7-mipi-csi2' only supports up to 2 data lines. items: minItems: 1 maxItems: 4 @@ -91,18 +93,6 @@ properties: required: - data-lanes - allOf: - - if: - properties: - compatible: - contains: - const: fsl,imx7-mipi-csi2 - then: - properties: - data-lanes: - items: - maxItems: 2 - port@1: $ref: /schemas/graph.yaml#/properties/port description: From cb25b11943cbcc5a34531129952870420f8be858 Mon Sep 17 00:00:00 2001 From: Dinh Nguyen Date: Mon, 1 Nov 2021 19:36:30 -0500 Subject: [PATCH 177/868] ARM: socfpga: dts: fix qspi node compatible The QSPI flash node needs to have the required "jedec,spi-nor" in the compatible string. Fixes: 1df99da8953 ("ARM: dts: socfpga: Enable QSPI in Arria10 devkit") Signed-off-by: Dinh Nguyen --- arch/arm/boot/dts/socfpga_arria10_socdk_qspi.dts | 2 +- arch/arm/boot/dts/socfpga_arria5_socdk.dts | 2 +- arch/arm/boot/dts/socfpga_cyclone5_socdk.dts | 2 +- arch/arm/boot/dts/socfpga_cyclone5_sockit.dts | 2 +- arch/arm/boot/dts/socfpga_cyclone5_socrates.dts | 2 +- arch/arm/boot/dts/socfpga_cyclone5_sodia.dts | 2 +- arch/arm/boot/dts/socfpga_cyclone5_vining_fpga.dts | 4 ++-- 7 files changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/arm/boot/dts/socfpga_arria10_socdk_qspi.dts b/arch/arm/boot/dts/socfpga_arria10_socdk_qspi.dts index 2b645642b935..2a745522404d 100644 --- a/arch/arm/boot/dts/socfpga_arria10_socdk_qspi.dts +++ b/arch/arm/boot/dts/socfpga_arria10_socdk_qspi.dts @@ -12,7 +12,7 @@ flash0: n25q00@0 { #address-cells = <1>; #size-cells = <1>; - compatible = "n25q00aa"; + compatible = "micron,mt25qu02g", "jedec,spi-nor"; reg = <0>; spi-max-frequency = <100000000>; diff --git a/arch/arm/boot/dts/socfpga_arria5_socdk.dts b/arch/arm/boot/dts/socfpga_arria5_socdk.dts index 90e676e7019f..1b02d46496a8 100644 --- a/arch/arm/boot/dts/socfpga_arria5_socdk.dts +++ b/arch/arm/boot/dts/socfpga_arria5_socdk.dts @@ -119,7 +119,7 @@ flash: flash@0 { #address-cells = <1>; #size-cells = <1>; - compatible = "n25q256a"; + compatible = "micron,n25q256a", "jedec,spi-nor"; reg = <0>; spi-max-frequency = <100000000>; diff --git a/arch/arm/boot/dts/socfpga_cyclone5_socdk.dts b/arch/arm/boot/dts/socfpga_cyclone5_socdk.dts index 6f138b2b2616..51bb436784e2 100644 --- a/arch/arm/boot/dts/socfpga_cyclone5_socdk.dts +++ b/arch/arm/boot/dts/socfpga_cyclone5_socdk.dts @@ -124,7 +124,7 @@ flash0: n25q00@0 { #address-cells = <1>; #size-cells = <1>; - compatible = "n25q00"; + compatible = "micron,mt25qu02g", "jedec,spi-nor"; reg = <0>; /* chip select */ spi-max-frequency = <100000000>; diff --git a/arch/arm/boot/dts/socfpga_cyclone5_sockit.dts b/arch/arm/boot/dts/socfpga_cyclone5_sockit.dts index c155ff02eb6e..cae9ddd5ed38 100644 --- a/arch/arm/boot/dts/socfpga_cyclone5_sockit.dts +++ b/arch/arm/boot/dts/socfpga_cyclone5_sockit.dts @@ -169,7 +169,7 @@ flash: flash@0 { #address-cells = <1>; #size-cells = <1>; - compatible = "n25q00"; + compatible = "micron,mt25qu02g", "jedec,spi-nor"; reg = <0>; spi-max-frequency = <100000000>; diff --git a/arch/arm/boot/dts/socfpga_cyclone5_socrates.dts b/arch/arm/boot/dts/socfpga_cyclone5_socrates.dts index 8d5d3996f6f2..ca18b959e655 100644 --- a/arch/arm/boot/dts/socfpga_cyclone5_socrates.dts +++ b/arch/arm/boot/dts/socfpga_cyclone5_socrates.dts @@ -80,7 +80,7 @@ flash: flash@0 { #address-cells = <1>; #size-cells = <1>; - compatible = "n25q256a"; + compatible = "micron,n25q256a", "jedec,spi-nor"; reg = <0>; spi-max-frequency = <100000000>; m25p,fast-read; diff --git a/arch/arm/boot/dts/socfpga_cyclone5_sodia.dts b/arch/arm/boot/dts/socfpga_cyclone5_sodia.dts index 99a71757cdf4..3f7aa7bf0863 100644 --- a/arch/arm/boot/dts/socfpga_cyclone5_sodia.dts +++ b/arch/arm/boot/dts/socfpga_cyclone5_sodia.dts @@ -116,7 +116,7 @@ flash0: n25q512a@0 { #address-cells = <1>; #size-cells = <1>; - compatible = "n25q512a"; + compatible = "micron,n25q512a", "jedec,spi-nor"; reg = <0>; spi-max-frequency = <100000000>; diff --git a/arch/arm/boot/dts/socfpga_cyclone5_vining_fpga.dts b/arch/arm/boot/dts/socfpga_cyclone5_vining_fpga.dts index a060718758b6..25874e1b9c82 100644 --- a/arch/arm/boot/dts/socfpga_cyclone5_vining_fpga.dts +++ b/arch/arm/boot/dts/socfpga_cyclone5_vining_fpga.dts @@ -224,7 +224,7 @@ n25q128@0 { #address-cells = <1>; #size-cells = <1>; - compatible = "n25q128"; + compatible = "micron,n25q128", "jedec,spi-nor"; reg = <0>; /* chip select */ spi-max-frequency = <100000000>; m25p,fast-read; @@ -241,7 +241,7 @@ n25q00@1 { #address-cells = <1>; #size-cells = <1>; - compatible = "n25q00"; + compatible = "micron,mt25qu02g", "jedec,spi-nor"; reg = <1>; /* chip select */ spi-max-frequency = <100000000>; m25p,fast-read; From 8581fd402a0cf80b5298e3b225e7a7bd8f110e69 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 2 Dec 2021 12:34:00 -0800 Subject: [PATCH 178/868] treewide: Add missing includes masked by cgroup -> bpf dependency MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit cgroup.h (therefore swap.h, therefore half of the universe) includes bpf.h which in turn includes module.h and slab.h. Since we're about to get rid of that dependency we need to clean things up. v2: drop the cpu.h include from cacheinfo.h, it's not necessary and it makes riscv sensitive to ordering of include files. Signed-off-by: Jakub Kicinski Signed-off-by: Alexei Starovoitov Reviewed-by: Christoph Hellwig Acked-by: Krzysztof Wilczyński Acked-by: Peter Chen Acked-by: SeongJae Park Acked-by: Jani Nikula Acked-by: Greg Kroah-Hartman Link: https://lore.kernel.org/all/20211120035253.72074-1-kuba@kernel.org/ # v1 Link: https://lore.kernel.org/all/20211120165528.197359-1-kuba@kernel.org/ # cacheinfo discussion Link: https://lore.kernel.org/bpf/20211202203400.1208663-1-kuba@kernel.org --- block/fops.c | 1 + drivers/gpu/drm/drm_gem_shmem_helper.c | 1 + drivers/gpu/drm/i915/gt/intel_gtt.c | 1 + drivers/gpu/drm/i915/i915_request.c | 1 + drivers/gpu/drm/lima/lima_device.c | 1 + drivers/gpu/drm/msm/msm_gem_shrinker.c | 1 + drivers/gpu/drm/ttm/ttm_tt.c | 1 + drivers/net/ethernet/huawei/hinic/hinic_sriov.c | 1 + drivers/net/ethernet/marvell/octeontx2/nic/otx2_ptp.c | 2 ++ drivers/pci/controller/dwc/pci-exynos.c | 1 + drivers/pci/controller/dwc/pcie-qcom-ep.c | 1 + drivers/usb/cdns3/host.c | 1 + include/linux/cacheinfo.h | 1 - include/linux/device/driver.h | 1 + include/linux/filter.h | 2 +- mm/damon/vaddr.c | 1 + mm/memory_hotplug.c | 1 + mm/swap_slots.c | 1 + 18 files changed, 18 insertions(+), 2 deletions(-) diff --git a/block/fops.c b/block/fops.c index ad732a36f9b3..3cb1e81929bc 100644 --- a/block/fops.c +++ b/block/fops.c @@ -15,6 +15,7 @@ #include #include #include +#include #include "blk.h" static inline struct inode *bdev_file_inode(struct file *file) diff --git a/drivers/gpu/drm/drm_gem_shmem_helper.c b/drivers/gpu/drm/drm_gem_shmem_helper.c index 7b9f69f21f1e..bca0de92802e 100644 --- a/drivers/gpu/drm/drm_gem_shmem_helper.c +++ b/drivers/gpu/drm/drm_gem_shmem_helper.c @@ -9,6 +9,7 @@ #include #include #include +#include #ifdef CONFIG_X86 #include diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c b/drivers/gpu/drm/i915/gt/intel_gtt.c index 67d14afa6623..b67f620c3d93 100644 --- a/drivers/gpu/drm/i915/gt/intel_gtt.c +++ b/drivers/gpu/drm/i915/gt/intel_gtt.c @@ -6,6 +6,7 @@ #include /* fault-inject.h is not standalone! */ #include +#include #include "gem/i915_gem_lmem.h" #include "i915_trace.h" diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 820a1f38b271..89cccefeea63 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -29,6 +29,7 @@ #include #include #include +#include #include "gem/i915_gem_context.h" #include "gt/intel_breadcrumbs.h" diff --git a/drivers/gpu/drm/lima/lima_device.c b/drivers/gpu/drm/lima/lima_device.c index 65fdca366e41..f74f8048af8f 100644 --- a/drivers/gpu/drm/lima/lima_device.c +++ b/drivers/gpu/drm/lima/lima_device.c @@ -4,6 +4,7 @@ #include #include #include +#include #include #include diff --git a/drivers/gpu/drm/msm/msm_gem_shrinker.c b/drivers/gpu/drm/msm/msm_gem_shrinker.c index 4a1420b05e97..086dacf2f26a 100644 --- a/drivers/gpu/drm/msm/msm_gem_shrinker.c +++ b/drivers/gpu/drm/msm/msm_gem_shrinker.c @@ -5,6 +5,7 @@ */ #include +#include #include "msm_drv.h" #include "msm_gem.h" diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c index 7e83c00a3f48..79c870a3bef8 100644 --- a/drivers/gpu/drm/ttm/ttm_tt.c +++ b/drivers/gpu/drm/ttm/ttm_tt.c @@ -34,6 +34,7 @@ #include #include #include +#include #include #include diff --git a/drivers/net/ethernet/huawei/hinic/hinic_sriov.c b/drivers/net/ethernet/huawei/hinic/hinic_sriov.c index a78c398bf5b2..01e7d3c0b68e 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_sriov.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_sriov.c @@ -8,6 +8,7 @@ #include #include #include +#include #include "hinic_hw_dev.h" #include "hinic_dev.h" diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ptp.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ptp.c index 0ef68fdd1f26..61c20907315f 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ptp.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ptp.c @@ -5,6 +5,8 @@ * */ +#include + #include "otx2_common.h" #include "otx2_ptp.h" diff --git a/drivers/pci/controller/dwc/pci-exynos.c b/drivers/pci/controller/dwc/pci-exynos.c index c24dab383654..722dacdd5a17 100644 --- a/drivers/pci/controller/dwc/pci-exynos.c +++ b/drivers/pci/controller/dwc/pci-exynos.c @@ -19,6 +19,7 @@ #include #include #include +#include #include "pcie-designware.h" diff --git a/drivers/pci/controller/dwc/pcie-qcom-ep.c b/drivers/pci/controller/dwc/pcie-qcom-ep.c index 7b17da2f9b3f..cfe66bf04c1d 100644 --- a/drivers/pci/controller/dwc/pcie-qcom-ep.c +++ b/drivers/pci/controller/dwc/pcie-qcom-ep.c @@ -18,6 +18,7 @@ #include #include #include +#include #include "pcie-designware.h" diff --git a/drivers/usb/cdns3/host.c b/drivers/usb/cdns3/host.c index 84dadfa726aa..9643b905e2d8 100644 --- a/drivers/usb/cdns3/host.c +++ b/drivers/usb/cdns3/host.c @@ -10,6 +10,7 @@ */ #include +#include #include "core.h" #include "drd.h" #include "host-export.h" diff --git a/include/linux/cacheinfo.h b/include/linux/cacheinfo.h index 2f909ed084c6..4ff37cb763ae 100644 --- a/include/linux/cacheinfo.h +++ b/include/linux/cacheinfo.h @@ -3,7 +3,6 @@ #define _LINUX_CACHEINFO_H #include -#include #include #include diff --git a/include/linux/device/driver.h b/include/linux/device/driver.h index a498ebcf4993..15e7c5e15d62 100644 --- a/include/linux/device/driver.h +++ b/include/linux/device/driver.h @@ -18,6 +18,7 @@ #include #include #include +#include /** * enum probe_type - device driver probe type to try diff --git a/include/linux/filter.h b/include/linux/filter.h index 534f678ca50f..7f1e88e3e2b5 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -6,6 +6,7 @@ #define __LINUX_FILTER_H__ #include +#include #include #include #include @@ -26,7 +27,6 @@ #include #include -#include struct sk_buff; struct sock; diff --git a/mm/damon/vaddr.c b/mm/damon/vaddr.c index 35fe49080ee9..47f47f60440e 100644 --- a/mm/damon/vaddr.c +++ b/mm/damon/vaddr.c @@ -13,6 +13,7 @@ #include #include #include +#include #include "prmtv-common.h" diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 852041f6be41..2a9627dc784c 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -35,6 +35,7 @@ #include #include #include +#include #include diff --git a/mm/swap_slots.c b/mm/swap_slots.c index 16f706c55d92..2b5531840583 100644 --- a/mm/swap_slots.c +++ b/mm/swap_slots.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include From 2fa7d94afc1afbb4d702760c058dc2d7ed30f226 Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Tue, 30 Nov 2021 20:16:07 +0200 Subject: [PATCH 179/868] bpf: Fix the off-by-two error in range markings The first commit cited below attempts to fix the off-by-one error that appeared in some comparisons with an open range. Due to this error, arithmetically equivalent pieces of code could get different verdicts from the verifier, for example (pseudocode): // 1. Passes the verifier: if (data + 8 > data_end) return early read *(u64 *)data, i.e. [data; data+7] // 2. Rejected by the verifier (should still pass): if (data + 7 >= data_end) return early read *(u64 *)data, i.e. [data; data+7] The attempted fix, however, shifts the range by one in a wrong direction, so the bug not only remains, but also such piece of code starts failing in the verifier: // 3. Rejected by the verifier, but the check is stricter than in #1. if (data + 8 >= data_end) return early read *(u64 *)data, i.e. [data; data+7] The change performed by that fix converted an off-by-one bug into off-by-two. The second commit cited below added the BPF selftests written to ensure than code chunks like #3 are rejected, however, they should be accepted. This commit fixes the off-by-two error by adjusting new_range in the right direction and fixes the tests by changing the range into the one that should actually fail. Fixes: fb2a311a31d3 ("bpf: fix off by one for range markings with L{T, E} patterns") Fixes: b37242c773b2 ("bpf: add test cases to bpf selftests to cover all access tests") Signed-off-by: Maxim Mikityanskiy Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20211130181607.593149-1-maximmi@nvidia.com --- kernel/bpf/verifier.c | 2 +- .../bpf/verifier/xdp_direct_packet_access.c | 32 +++++++++---------- 2 files changed, 17 insertions(+), 17 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 50efda51515b..f3001937bbb9 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -8422,7 +8422,7 @@ static void find_good_pkt_pointers(struct bpf_verifier_state *vstate, new_range = dst_reg->off; if (range_right_open) - new_range--; + new_range++; /* Examples for register markings: * diff --git a/tools/testing/selftests/bpf/verifier/xdp_direct_packet_access.c b/tools/testing/selftests/bpf/verifier/xdp_direct_packet_access.c index bfb97383e6b5..de172a5b8754 100644 --- a/tools/testing/selftests/bpf/verifier/xdp_direct_packet_access.c +++ b/tools/testing/selftests/bpf/verifier/xdp_direct_packet_access.c @@ -112,10 +112,10 @@ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data_end)), BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 6), BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_1, 1), BPF_JMP_IMM(BPF_JA, 0, 0, 1), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -6), BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, @@ -167,10 +167,10 @@ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data_end)), BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 6), BPF_JMP_REG(BPF_JLT, BPF_REG_1, BPF_REG_3, 1), BPF_JMP_IMM(BPF_JA, 0, 0, 1), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -6), BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, @@ -274,9 +274,9 @@ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data_end)), BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 6), BPF_JMP_REG(BPF_JGE, BPF_REG_1, BPF_REG_3, 1), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -6), BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, @@ -437,9 +437,9 @@ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data_end)), BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 6), BPF_JMP_REG(BPF_JLE, BPF_REG_3, BPF_REG_1, 1), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -6), BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, @@ -544,10 +544,10 @@ offsetof(struct xdp_md, data_meta)), BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 6), BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_1, 1), BPF_JMP_IMM(BPF_JA, 0, 0, 1), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -6), BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, @@ -599,10 +599,10 @@ offsetof(struct xdp_md, data_meta)), BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 6), BPF_JMP_REG(BPF_JLT, BPF_REG_1, BPF_REG_3, 1), BPF_JMP_IMM(BPF_JA, 0, 0, 1), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -6), BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, @@ -706,9 +706,9 @@ offsetof(struct xdp_md, data_meta)), BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 6), BPF_JMP_REG(BPF_JGE, BPF_REG_1, BPF_REG_3, 1), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -6), BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, @@ -869,9 +869,9 @@ offsetof(struct xdp_md, data_meta)), BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 6), BPF_JMP_REG(BPF_JLE, BPF_REG_3, BPF_REG_1, 1), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -6), BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, From 8e227b198a55859bf790dc7f4b1e30c0859c6756 Mon Sep 17 00:00:00 2001 From: Manish Chopra Date: Fri, 3 Dec 2021 09:44:13 -0800 Subject: [PATCH 180/868] qede: validate non LSO skb length Although it is unlikely that stack could transmit a non LSO skb with length > MTU, however in some cases or environment such occurrences actually resulted into firmware asserts due to packet length being greater than the max supported by the device (~9700B). This patch adds the safeguard for such odd cases to avoid firmware asserts. v2: Added "Fixes" tag with one of the initial driver commit which enabled the TX traffic actually (as this was probably day1 issue which was discovered recently by some customer environment) Fixes: a2ec6172d29c ("qede: Add support for link") Signed-off-by: Manish Chopra Signed-off-by: Alok Prasad Signed-off-by: Prabhakar Kushwaha Signed-off-by: Ariel Elior Link: https://lore.kernel.org/r/20211203174413.13090-1-manishc@marvell.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/qlogic/qede/qede_fp.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/ethernet/qlogic/qede/qede_fp.c b/drivers/net/ethernet/qlogic/qede/qede_fp.c index 065e9004598e..999abcfe3310 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_fp.c +++ b/drivers/net/ethernet/qlogic/qede/qede_fp.c @@ -1643,6 +1643,13 @@ netdev_tx_t qede_start_xmit(struct sk_buff *skb, struct net_device *ndev) data_split = true; } } else { + if (unlikely(skb->len > ETH_TX_MAX_NON_LSO_PKT_LEN)) { + DP_ERR(edev, "Unexpected non LSO skb length = 0x%x\n", skb->len); + qede_free_failed_tx_pkt(txq, first_bd, 0, false); + qede_update_tx_producer(txq); + return NETDEV_TX_OK; + } + val |= ((skb->len & ETH_TX_DATA_1ST_BD_PKT_LEN_MASK) << ETH_TX_DATA_1ST_BD_PKT_LEN_SHIFT); } From 2be6d4d16a0849455a5c22490e3c5983495fed00 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Thu, 2 Dec 2021 14:34:37 +0000 Subject: [PATCH 181/868] net: cdc_ncm: Allow for dwNtbOutMaxSize to be unset or zero MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently, due to the sequential use of min_t() and clamp_t() macros, in cdc_ncm_check_tx_max(), if dwNtbOutMaxSize is not set, the logic sets tx_max to 0. This is then used to allocate the data area of the SKB requested later in cdc_ncm_fill_tx_frame(). This does not cause an issue presently because when memory is allocated during initialisation phase of SKB creation, more memory (512b) is allocated than is required for the SKB headers alone (320b), leaving some space (512b - 320b = 192b) for CDC data (172b). However, if more elements (for example 3 x u64 = [24b]) were added to one of the SKB header structs, say 'struct skb_shared_info', increasing its original size (320b [320b aligned]) to something larger (344b [384b aligned]), then suddenly the CDC data (172b) no longer fits in the spare SKB data area (512b - 384b = 128b). Consequently the SKB bounds checking semantics fails and panics: skbuff: skb_over_panic: text:ffffffff830a5b5f len:184 put:172 \ head:ffff888119227c00 data:ffff888119227c00 tail:0xb8 end:0x80 dev: ------------[ cut here ]------------ kernel BUG at net/core/skbuff.c:110! RIP: 0010:skb_panic+0x14f/0x160 net/core/skbuff.c:106 Call Trace: skb_over_panic+0x2c/0x30 net/core/skbuff.c:115 skb_put+0x205/0x210 net/core/skbuff.c:1877 skb_put_zero include/linux/skbuff.h:2270 [inline] cdc_ncm_ndp16 drivers/net/usb/cdc_ncm.c:1116 [inline] cdc_ncm_fill_tx_frame+0x127f/0x3d50 drivers/net/usb/cdc_ncm.c:1293 cdc_ncm_tx_fixup+0x98/0xf0 drivers/net/usb/cdc_ncm.c:1514 By overriding the max value with the default CDC_NCM_NTB_MAX_SIZE_TX when not offered through the system provided params, we ensure enough data space is allocated to handle the CDC data, meaning no crash will occur. Cc: Oliver Neukum Fixes: 289507d3364f9 ("net: cdc_ncm: use sysfs for rx/tx aggregation tuning") Signed-off-by: Lee Jones Reviewed-by: Bjørn Mork Link: https://lore.kernel.org/r/20211202143437.1411410-1-lee.jones@linaro.org Signed-off-by: Jakub Kicinski --- drivers/net/usb/cdc_ncm.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c index 24753a4da7e6..e303b522efb5 100644 --- a/drivers/net/usb/cdc_ncm.c +++ b/drivers/net/usb/cdc_ncm.c @@ -181,6 +181,8 @@ static u32 cdc_ncm_check_tx_max(struct usbnet *dev, u32 new_tx) min = ctx->max_datagram_size + ctx->max_ndp_size + sizeof(struct usb_cdc_ncm_nth32); max = min_t(u32, CDC_NCM_NTB_MAX_SIZE_TX, le32_to_cpu(ctx->ncm_parm.dwNtbOutMaxSize)); + if (max == 0) + max = CDC_NCM_NTB_MAX_SIZE_TX; /* dwNtbOutMaxSize not set */ /* some devices set dwNtbOutMaxSize too low for the above default */ min = min(min, max); From 893621e0606747c5bbefcaf2794d12c7aa6212b7 Mon Sep 17 00:00:00 2001 From: Alyssa Ross Date: Thu, 25 Nov 2021 18:28:48 +0000 Subject: [PATCH 182/868] iio: trigger: stm32-timer: fix MODULE_ALIAS modprobe can't handle spaces in aliases. Fixes: 93fbe91b5521 ("iio: Add STM32 timer trigger driver") Signed-off-by: Alyssa Ross Link: https://lore.kernel.org/r/20211125182850.2645424-1-hi@alyssa.is Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/trigger/stm32-timer-trigger.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/trigger/stm32-timer-trigger.c b/drivers/iio/trigger/stm32-timer-trigger.c index 33083877cd19..4353b749ecef 100644 --- a/drivers/iio/trigger/stm32-timer-trigger.c +++ b/drivers/iio/trigger/stm32-timer-trigger.c @@ -912,6 +912,6 @@ static struct platform_driver stm32_timer_trigger_driver = { }; module_platform_driver(stm32_timer_trigger_driver); -MODULE_ALIAS("platform: stm32-timer-trigger"); +MODULE_ALIAS("platform:stm32-timer-trigger"); MODULE_DESCRIPTION("STMicroelectronics STM32 Timer Trigger driver"); MODULE_LICENSE("GPL v2"); From 1ff2fc02862d52e18fd3daabcfe840ec27e920a8 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Wed, 20 Oct 2021 13:02:11 -0500 Subject: [PATCH 183/868] x86/sme: Explicitly map new EFI memmap table as encrypted Reserving memory using efi_mem_reserve() calls into the x86 efi_arch_mem_reserve() function. This function will insert a new EFI memory descriptor into the EFI memory map representing the area of memory to be reserved and marking it as EFI runtime memory. As part of adding this new entry, a new EFI memory map is allocated and mapped. The mapping is where a problem can occur. This new memory map is mapped using early_memremap() and generally mapped encrypted, unless the new memory for the mapping happens to come from an area of memory that is marked as EFI_BOOT_SERVICES_DATA memory. In this case, the new memory will be mapped unencrypted. However, during replacement of the old memory map, efi_mem_type() is disabled, so the new memory map will now be long-term mapped encrypted (in efi.memmap), resulting in the map containing invalid data and causing the kernel boot to crash. Since it is known that the area will be mapped encrypted going forward, explicitly map the new memory map as encrypted using early_memremap_prot(). Cc: # 4.14.x Fixes: 8f716c9b5feb ("x86/mm: Add support to access boot related data in the clear") Link: https://lore.kernel.org/all/ebf1eb2940405438a09d51d121ec0d02c8755558.1634752931.git.thomas.lendacky@amd.com/ Signed-off-by: Tom Lendacky [ardb: incorporate Kconfig fix by Arnd] Signed-off-by: Ard Biesheuvel --- arch/x86/Kconfig | 1 + arch/x86/platform/efi/quirks.c | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 95dd1ee01546..9636a3122496 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1932,6 +1932,7 @@ config EFI depends on ACPI select UCS2_STRING select EFI_RUNTIME_WRAPPERS + select ARCH_USE_MEMREMAP_PROT help This enables the kernel to use EFI runtime services that are available (such as the EFI variable services). diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c index b15ebfe40a73..b0b848d6933a 100644 --- a/arch/x86/platform/efi/quirks.c +++ b/arch/x86/platform/efi/quirks.c @@ -277,7 +277,8 @@ void __init efi_arch_mem_reserve(phys_addr_t addr, u64 size) return; } - new = early_memremap(data.phys_map, data.size); + new = early_memremap_prot(data.phys_map, data.size, + pgprot_val(pgprot_encrypted(FIXMAP_PAGE_NORMAL))); if (!new) { pr_err("Failed to map new boot services memmap\n"); return; From 815b6cb37e8e9c4da06e7a52d7215a6dc1965e02 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Thu, 2 Dec 2021 15:27:08 +0900 Subject: [PATCH 184/868] ata: ahci_ceva: Fix id array access in ceva_ahci_read_id() ATA IDENTIFY command returns an array of le16 words. Accessing it as a u16 array triggers the following sparse warning: drivers/ata/ahci_ceva.c:107:33: warning: invalid assignment: &= drivers/ata/ahci_ceva.c:107:33: left side has type unsigned short drivers/ata/ahci_ceva.c:107:33: right side has type restricted __le16 Use a local variable to explicitly cast the id array to __le16 to avoid this warning. Signed-off-by: Damien Le Moal --- drivers/ata/ahci_ceva.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/ata/ahci_ceva.c b/drivers/ata/ahci_ceva.c index 50b56cd0039d..e9c7c07fd84c 100644 --- a/drivers/ata/ahci_ceva.c +++ b/drivers/ata/ahci_ceva.c @@ -94,6 +94,7 @@ struct ceva_ahci_priv { static unsigned int ceva_ahci_read_id(struct ata_device *dev, struct ata_taskfile *tf, u16 *id) { + __le16 *__id = (__le16 *)id; u32 err_mask; err_mask = ata_do_dev_read_id(dev, tf, id); @@ -103,7 +104,7 @@ static unsigned int ceva_ahci_read_id(struct ata_device *dev, * Since CEVA controller does not support device sleep feature, we * need to clear DEVSLP (bit 8) in word78 of the IDENTIFY DEVICE data. */ - id[ATA_ID_FEATURE_SUPP] &= cpu_to_le16(~(1 << 8)); + __id[ATA_ID_FEATURE_SUPP] &= cpu_to_le16(~(1 << 8)); return 0; } From e5e6268f77badf18bd6ab435364cfe21c7396c31 Mon Sep 17 00:00:00 2001 From: Martin Kepplinger Date: Wed, 24 Nov 2021 09:37:03 +0100 Subject: [PATCH 185/868] arm64: dts: imx8mq: remove interconnect property from lcdif The mxsfb driver handling imx8mq lcdif doesn't yet request the interconnect bandwidth that's needed at runtime when the description is present in the DT node. So remove that description and bring it back when it's supported. Fixes: ad1abc8a03fd ("arm64: dts: imx8mq: Add interconnect for lcdif") Signed-off-by: Martin Kepplinger Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx8mq.dtsi | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/arm64/boot/dts/freescale/imx8mq.dtsi b/arch/arm64/boot/dts/freescale/imx8mq.dtsi index 972766b67a15..71bf497f99c2 100644 --- a/arch/arm64/boot/dts/freescale/imx8mq.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mq.dtsi @@ -524,8 +524,6 @@ <&clk IMX8MQ_VIDEO_PLL1>, <&clk IMX8MQ_VIDEO_PLL1_OUT>; assigned-clock-rates = <0>, <0>, <0>, <594000000>; - interconnects = <&noc IMX8MQ_ICM_LCDIF &noc IMX8MQ_ICS_DRAM>; - interconnect-names = "dram"; status = "disabled"; port@0 { From 737e65c7956795b3553781fb7bc82fce1c39503f Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Wed, 24 Nov 2021 15:45:41 -0300 Subject: [PATCH 186/868] ARM: dts: imx6ull-pinfunc: Fix CSI_DATA07__ESAI_TX0 pad name According to the i.MX6ULL Reference Manual, pad CSI_DATA07 may have the ESAI_TX0 functionality, not ESAI_T0. Also, NXP's i.MX Config Tools 10.0 generates dtsi with the MX6ULL_PAD_CSI_DATA07__ESAI_TX0 naming, so fix it accordingly. There are no devicetree users in mainline that use the old name, so just remove the old entry. Fixes: c201369d4aa5 ("ARM: dts: imx6ull: add imx6ull support") Reported-by: George Makarov Signed-off-by: Fabio Estevam Signed-off-by: Shawn Guo --- arch/arm/boot/dts/imx6ull-pinfunc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/imx6ull-pinfunc.h b/arch/arm/boot/dts/imx6ull-pinfunc.h index eb025a9d4759..7328d4ef8559 100644 --- a/arch/arm/boot/dts/imx6ull-pinfunc.h +++ b/arch/arm/boot/dts/imx6ull-pinfunc.h @@ -82,6 +82,6 @@ #define MX6ULL_PAD_CSI_DATA04__ESAI_TX_FS 0x01F4 0x0480 0x0000 0x9 0x0 #define MX6ULL_PAD_CSI_DATA05__ESAI_TX_CLK 0x01F8 0x0484 0x0000 0x9 0x0 #define MX6ULL_PAD_CSI_DATA06__ESAI_TX5_RX0 0x01FC 0x0488 0x0000 0x9 0x0 -#define MX6ULL_PAD_CSI_DATA07__ESAI_T0 0x0200 0x048C 0x0000 0x9 0x0 +#define MX6ULL_PAD_CSI_DATA07__ESAI_TX0 0x0200 0x048C 0x0000 0x9 0x0 #endif /* __DTS_IMX6ULL_PINFUNC_H */ From 042b67799e2991e301df8269e166d8bc5944495e Mon Sep 17 00:00:00 2001 From: Adam Ford Date: Sun, 28 Nov 2021 06:50:07 -0600 Subject: [PATCH 187/868] soc: imx: imx8m-blk-ctrl: Fix imx8mm mipi reset Most of the blk-ctrl reset bits are found in one register, however there are two bits in offset 8 for pulling the MIPI DPHY out of reset and one of them needs to be set when IMX8MM_DISPBLK_PD_MIPI_CSI is brought out of reset or the MIPI_CSI hangs. Since MIPI_DSI is impacted, add the additional one for MIPI_DSI too. Fixes: 926e57c065df ("soc: imx: imx8m-blk-ctrl: add DISP blk-ctrl") Signed-off-by: Adam Ford Reviewed-by: Fabio Estevam Reviewed-by: Lucas Stach Reviewed-by: Laurent Pinchart Tested by: Tim Harvey Signed-off-by: Shawn Guo --- drivers/soc/imx/imx8m-blk-ctrl.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/drivers/soc/imx/imx8m-blk-ctrl.c b/drivers/soc/imx/imx8m-blk-ctrl.c index 519b3651d1d9..c2f076b56e24 100644 --- a/drivers/soc/imx/imx8m-blk-ctrl.c +++ b/drivers/soc/imx/imx8m-blk-ctrl.c @@ -17,6 +17,7 @@ #define BLK_SFT_RSTN 0x0 #define BLK_CLK_EN 0x4 +#define BLK_MIPI_RESET_DIV 0x8 /* Mini/Nano DISPLAY_BLK_CTRL only */ struct imx8m_blk_ctrl_domain; @@ -36,6 +37,15 @@ struct imx8m_blk_ctrl_domain_data { const char *gpc_name; u32 rst_mask; u32 clk_mask; + + /* + * i.MX8M Mini and Nano have a third DISPLAY_BLK_CTRL register + * which is used to control the reset for the MIPI Phy. + * Since it's only present in certain circumstances, + * an if-statement should be used before setting and clearing this + * register. + */ + u32 mipi_phy_rst_mask; }; #define DOMAIN_MAX_CLKS 3 @@ -78,6 +88,8 @@ static int imx8m_blk_ctrl_power_on(struct generic_pm_domain *genpd) /* put devices into reset */ regmap_clear_bits(bc->regmap, BLK_SFT_RSTN, data->rst_mask); + if (data->mipi_phy_rst_mask) + regmap_clear_bits(bc->regmap, BLK_MIPI_RESET_DIV, data->mipi_phy_rst_mask); /* enable upstream and blk-ctrl clocks to allow reset to propagate */ ret = clk_bulk_prepare_enable(data->num_clks, domain->clks); @@ -99,6 +111,8 @@ static int imx8m_blk_ctrl_power_on(struct generic_pm_domain *genpd) /* release reset */ regmap_set_bits(bc->regmap, BLK_SFT_RSTN, data->rst_mask); + if (data->mipi_phy_rst_mask) + regmap_set_bits(bc->regmap, BLK_MIPI_RESET_DIV, data->mipi_phy_rst_mask); /* disable upstream clocks */ clk_bulk_disable_unprepare(data->num_clks, domain->clks); @@ -120,6 +134,9 @@ static int imx8m_blk_ctrl_power_off(struct generic_pm_domain *genpd) struct imx8m_blk_ctrl *bc = domain->bc; /* put devices into reset and disable clocks */ + if (data->mipi_phy_rst_mask) + regmap_clear_bits(bc->regmap, BLK_MIPI_RESET_DIV, data->mipi_phy_rst_mask); + regmap_clear_bits(bc->regmap, BLK_SFT_RSTN, data->rst_mask); regmap_clear_bits(bc->regmap, BLK_CLK_EN, data->clk_mask); @@ -480,6 +497,7 @@ static const struct imx8m_blk_ctrl_domain_data imx8mm_disp_blk_ctl_domain_data[] .gpc_name = "mipi-dsi", .rst_mask = BIT(5), .clk_mask = BIT(8) | BIT(9), + .mipi_phy_rst_mask = BIT(17), }, [IMX8MM_DISPBLK_PD_MIPI_CSI] = { .name = "dispblk-mipi-csi", @@ -488,6 +506,7 @@ static const struct imx8m_blk_ctrl_domain_data imx8mm_disp_blk_ctl_domain_data[] .gpc_name = "mipi-csi", .rst_mask = BIT(3) | BIT(4), .clk_mask = BIT(10) | BIT(11), + .mipi_phy_rst_mask = BIT(16), }, }; From 16cc33b23732d3ec55e428ddadb39c225f23de7e Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Mon, 29 Nov 2021 08:24:34 -0800 Subject: [PATCH 188/868] nvme: show subsys nqn for duplicate cntlids The driver assigned nvme handle isn't persistent across reboots, so is not enough information to match up where the collisions are occuring. Add the subsys nqn string to the output so that it can more easily be identified later. Link: https://bugzilla.kernel.org/show_bug.cgi?id=215099 Signed-off-by: Keith Busch Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 4c63564adeaa..d476ad65def3 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2696,8 +2696,9 @@ static bool nvme_validate_cntlid(struct nvme_subsystem *subsys, if (tmp->cntlid == ctrl->cntlid) { dev_err(ctrl->device, - "Duplicate cntlid %u with %s, rejecting\n", - ctrl->cntlid, dev_name(tmp->device)); + "Duplicate cntlid %u with %s, subsys %s, rejecting\n", + ctrl->cntlid, dev_name(tmp->device), + subsys->subnqn); return false; } From d39ad2a45c0e38def3e0c95f5b90d9af4274c939 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Tue, 30 Nov 2021 08:14:54 -0800 Subject: [PATCH 189/868] nvme: disable namespace access for unsupported metadata The only fabrics target that supports metadata handling through the separate integrity buffer is RDMA. It is currently usable only if the size is 8B per block and formatted for protection information. If an rdma target were to export a namespace with a different format (ex: 4k+64B), the driver will not be able to submit valid read/write commands for that namespace. Suppress setting the metadata feature in the namespace so that the gendisk capacity will be set to 0. This will prevent read/write access through the block stack, but will continue to allow ioctl passthrough commands. Cc: Max Gurtovoy Cc: Sagi Grimberg Signed-off-by: Keith Busch Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index d476ad65def3..4ee7d2f8b8d8 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1749,9 +1749,20 @@ static int nvme_configure_metadata(struct nvme_ns *ns, struct nvme_id_ns *id) */ if (WARN_ON_ONCE(!(id->flbas & NVME_NS_FLBAS_META_EXT))) return -EINVAL; - if (ctrl->max_integrity_segments) - ns->features |= - (NVME_NS_METADATA_SUPPORTED | NVME_NS_EXT_LBAS); + + ns->features |= NVME_NS_EXT_LBAS; + + /* + * The current fabrics transport drivers support namespace + * metadata formats only if nvme_ns_has_pi() returns true. + * Suppress support for all other formats so the namespace will + * have a 0 capacity and not be usable through the block stack. + * + * Note, this check will need to be modified if any drivers + * gain the ability to use other metadata formats. + */ + if (ctrl->max_integrity_segments && nvme_ns_has_pi(ns)) + ns->features |= NVME_NS_METADATA_SUPPORTED; } else { /* * For PCIe controllers, we can't easily remap the separate From 793fcab83f38661e22e6f7c682dfba6fd0d97bb2 Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Fri, 26 Nov 2021 10:42:44 +0000 Subject: [PATCH 190/868] nvme: report write pointer for a full zone as zone start + zone len The write pointer in NVMe ZNS is invalid for a zone in zone state full. The same also holds true for ZAC/ZBC. The current behavior for NVMe is to simply propagate the wp reported by the drive, even for full zones. Since the wp is invalid for a full zone, the wp reported by the drive may be any value. The way that the sd_zbc driver handles a full zone is to always report the wp as zone start + zone len, regardless of what the drive reported. null_blk also follows this convention. Do the same for NVMe, so that a BLKREPORTZONE ioctl reports the write pointer for a full zone in a consistent way, regardless of the interface of the underlying zoned block device. blkzone report before patch: start: 0x000040000, len 0x040000, cap 0x03e000, wptr 0xfffffffffffbfff8 reset:0 non-seq:0, zcond:14(fu) [type: 2(SEQ_WRITE_REQUIRED)] blkzone report after patch: start: 0x000040000, len 0x040000, cap 0x03e000, wptr 0x040000 reset:0 non-seq:0, zcond:14(fu) [type: 2(SEQ_WRITE_REQUIRED)] Signed-off-by: Niklas Cassel Reviewed-by: Keith Busch Reviewed-by: Damien Le Moal Reviewed-by: Johannes Thumshirn Signed-off-by: Christoph Hellwig --- drivers/nvme/host/zns.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/zns.c b/drivers/nvme/host/zns.c index bfc259e0d7b8..9f81beb4df4e 100644 --- a/drivers/nvme/host/zns.c +++ b/drivers/nvme/host/zns.c @@ -166,7 +166,10 @@ static int nvme_zone_parse_entry(struct nvme_ns *ns, zone.len = ns->zsze; zone.capacity = nvme_lba_to_sect(ns, le64_to_cpu(entry->zcap)); zone.start = nvme_lba_to_sect(ns, le64_to_cpu(entry->zslba)); - zone.wp = nvme_lba_to_sect(ns, le64_to_cpu(entry->wp)); + if (zone.cond == BLK_ZONE_COND_FULL) + zone.wp = zone.start + zone.len; + else + zone.wp = nvme_lba_to_sect(ns, le64_to_cpu(entry->wp)); return cb(&zone, idx, data); } From fb1af5bea4670c835e42fc0c14c49d3499468774 Mon Sep 17 00:00:00 2001 From: Geraldo Nascimento Date: Sat, 4 Dec 2021 15:52:24 -0300 Subject: [PATCH 191/868] ALSA: usb-audio: Reorder snd_djm_devices[] entries Olivia Mackintosh has posted to alsa-devel reporting that there's a potential bug that could break mixer quirks for Pioneer devices introduced by 6d27788160362a7ee6c0d317636fe4b1ddbe59a7 "ALSA: usb-audio: Add support for the Pioneer DJM 750MK2 Mixer/Soundcard". This happened because the DJM 750 MK2 was added last to the Pioneer DJM device table index and defined as 0x4 but was added to snd_djm_devices[] just after the DJM 750 (MK1) entry instead of last, after the DJM 900 NXS2. This escaped review. To prevent that from ever happening again, Takashi Iwai suggested to use C99 array designators in snd_djm_devices[] instead of simply reordering the entries. Fixes: 6d2778816036 ("ALSA: usb-audio: Add support for the Pioneer DJM 750MK2") Reported-by: Olivia Mackintosh Suggested-by: Takashi Iwai Signed-off-by: Geraldo Nascimento Link: https://lore.kernel.org/r/Yau46FDzoql0SNnW@geday Signed-off-by: Takashi Iwai --- sound/usb/mixer_quirks.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/sound/usb/mixer_quirks.c b/sound/usb/mixer_quirks.c index d489c1de3bae..823b6b8de942 100644 --- a/sound/usb/mixer_quirks.c +++ b/sound/usb/mixer_quirks.c @@ -3016,11 +3016,11 @@ static const struct snd_djm_ctl snd_djm_ctls_750mk2[] = { static const struct snd_djm_device snd_djm_devices[] = { - SND_DJM_DEVICE(250mk2), - SND_DJM_DEVICE(750), - SND_DJM_DEVICE(750mk2), - SND_DJM_DEVICE(850), - SND_DJM_DEVICE(900nxs2) + [SND_DJM_250MK2_IDX] = SND_DJM_DEVICE(250mk2), + [SND_DJM_750_IDX] = SND_DJM_DEVICE(750), + [SND_DJM_850_IDX] = SND_DJM_DEVICE(850), + [SND_DJM_900NXS2_IDX] = SND_DJM_DEVICE(900nxs2), + [SND_DJM_750MK2_IDX] = SND_DJM_DEVICE(750mk2), }; From cd57eb3c403cb864e5558874ecd57dd954a5a7f7 Mon Sep 17 00:00:00 2001 From: Kai Vehmanen Date: Fri, 3 Dec 2021 19:15:41 +0200 Subject: [PATCH 192/868] ASoC: SOF: Intel: pci-tgl: add ADL-N support Add PCI DID for Intel AlderLake-N. Signed-off-by: Kai Vehmanen Reviewed-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20211203171542.1021399-1-kai.vehmanen@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/sof/intel/pci-tgl.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/soc/sof/intel/pci-tgl.c b/sound/soc/sof/intel/pci-tgl.c index f2ea34df9741..302068fd0b81 100644 --- a/sound/soc/sof/intel/pci-tgl.c +++ b/sound/soc/sof/intel/pci-tgl.c @@ -114,6 +114,8 @@ static const struct pci_device_id sof_pci_ids[] = { .driver_data = (unsigned long)&adl_desc}, { PCI_DEVICE(0x8086, 0x51cc), /* ADL-M */ .driver_data = (unsigned long)&adl_desc}, + { PCI_DEVICE(0x8086, 0x54c8), /* ADL-N */ + .driver_data = (unsigned long)&adl_desc}, { 0, } }; MODULE_DEVICE_TABLE(pci, sof_pci_ids); From de7dd9092cd38384f774d345cccafe81b4b866b0 Mon Sep 17 00:00:00 2001 From: Kai Vehmanen Date: Fri, 3 Dec 2021 19:15:42 +0200 Subject: [PATCH 193/868] ASoC: SOF: Intel: pci-tgl: add new ADL-P variant Add a PCI DID for a variant of Intel AlderLake-P. Signed-off-by: Kai Vehmanen Reviewed-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20211203171542.1021399-2-kai.vehmanen@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/sof/intel/pci-tgl.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/soc/sof/intel/pci-tgl.c b/sound/soc/sof/intel/pci-tgl.c index 302068fd0b81..fd46210f1730 100644 --- a/sound/soc/sof/intel/pci-tgl.c +++ b/sound/soc/sof/intel/pci-tgl.c @@ -112,6 +112,8 @@ static const struct pci_device_id sof_pci_ids[] = { .driver_data = (unsigned long)&adls_desc}, { PCI_DEVICE(0x8086, 0x51c8), /* ADL-P */ .driver_data = (unsigned long)&adl_desc}, + { PCI_DEVICE(0x8086, 0x51cd), /* ADL-P */ + .driver_data = (unsigned long)&adl_desc}, { PCI_DEVICE(0x8086, 0x51cc), /* ADL-M */ .driver_data = (unsigned long)&adl_desc}, { PCI_DEVICE(0x8086, 0x54c8), /* ADL-N */ From 85223d609c99eaa07cc598632b426cb33753526f Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Mon, 6 Dec 2021 13:43:06 +0100 Subject: [PATCH 194/868] regulator: dt-bindings: samsung,s5m8767: add missing op_mode to bucks While converting bindings to dtschema, the buck regulators lost "op_mode" property. The "op_mode" is a valid property for all regulators (both LDOs and bucks), so add it. Reported-by: Rob Herring Fixes: fab58debc137 ("regulator: dt-bindings: samsung,s5m8767: convert to dtschema") Cc: Signed-off-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20211206124306.14006-1-krzysztof.kozlowski@canonical.com Signed-off-by: Mark Brown --- .../bindings/regulator/samsung,s5m8767.yaml | 25 +++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/Documentation/devicetree/bindings/regulator/samsung,s5m8767.yaml b/Documentation/devicetree/bindings/regulator/samsung,s5m8767.yaml index 80a63d47790a..c98929a213e9 100644 --- a/Documentation/devicetree/bindings/regulator/samsung,s5m8767.yaml +++ b/Documentation/devicetree/bindings/regulator/samsung,s5m8767.yaml @@ -51,6 +51,19 @@ patternProperties: description: Properties for single BUCK regulator. + properties: + op_mode: + $ref: /schemas/types.yaml#/definitions/uint32 + enum: [0, 1, 2, 3] + default: 1 + description: | + Describes the different operating modes of the regulator with power + mode change in SOC. The different possible values are: + 0 - always off mode + 1 - on in normal mode + 2 - low power mode + 3 - suspend mode + required: - regulator-name @@ -63,6 +76,18 @@ patternProperties: Properties for single BUCK regulator. properties: + op_mode: + $ref: /schemas/types.yaml#/definitions/uint32 + enum: [0, 1, 2, 3] + default: 1 + description: | + Describes the different operating modes of the regulator with power + mode change in SOC. The different possible values are: + 0 - always off mode + 1 - on in normal mode + 2 - low power mode + 3 - suspend mode + s5m8767,pmic-ext-control-gpios: maxItems: 1 description: | From db6689b643d8653092f5853751ea2cdbc299f8d3 Mon Sep 17 00:00:00 2001 From: Dongliang Mu Date: Mon, 6 Dec 2021 18:19:31 +0800 Subject: [PATCH 195/868] spi: change clk_disable_unprepare to clk_unprepare The corresponding API for clk_prepare is clk_unprepare, other than clk_disable_unprepare. Fix this by changing clk_disable_unprepare to clk_unprepare. Fixes: 5762ab71eb24 ("spi: Add support for Armada 3700 SPI Controller") Signed-off-by: Dongliang Mu Link: https://lore.kernel.org/r/20211206101931.2816597-1-mudongliangabcd@gmail.com Signed-off-by: Mark Brown --- drivers/spi/spi-armada-3700.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/spi/spi-armada-3700.c b/drivers/spi/spi-armada-3700.c index 46feafe4e201..d8cc4b270644 100644 --- a/drivers/spi/spi-armada-3700.c +++ b/drivers/spi/spi-armada-3700.c @@ -901,7 +901,7 @@ static int a3700_spi_probe(struct platform_device *pdev) return 0; error_clk: - clk_disable_unprepare(spi->clk); + clk_unprepare(spi->clk); error: spi_master_put(master); out: From 1a1aa356ddf3f16539f5962c01c5f702686dfc15 Mon Sep 17 00:00:00 2001 From: Michal Maloszewski Date: Tue, 26 Oct 2021 12:59:09 +0000 Subject: [PATCH 196/868] iavf: Fix reporting when setting descriptor count iavf_set_ringparams doesn't communicate to the user that 1. The user requested descriptor count is out of range. Instead it just quietly sets descriptors to the "clamped" value and calls it done. This makes it look an invalid value was successfully set as the descriptor count when this isn't actually true. 2. The user provided descriptor count needs to be inflated for alignment reasons. This behavior is confusing. The ice driver has already addressed this by rejecting invalid values for descriptor count and messaging for alignment adjustments. Do the same thing here by adding the error and info messages. Fixes: fbb7ddfef253 ("i40evf: core ethtool functionality") Signed-off-by: Anirudh Venkataramanan Signed-off-by: Michal Maloszewski Tested-by: Konrad Jankowski Signed-off-by: Tony Nguyen --- .../net/ethernet/intel/iavf/iavf_ethtool.c | 43 ++++++++++++++----- 1 file changed, 32 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c index 0cecaff38d04..461f5237a2f8 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c +++ b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c @@ -615,23 +615,44 @@ static int iavf_set_ringparam(struct net_device *netdev, if ((ring->rx_mini_pending) || (ring->rx_jumbo_pending)) return -EINVAL; - new_tx_count = clamp_t(u32, ring->tx_pending, - IAVF_MIN_TXD, - IAVF_MAX_TXD); - new_tx_count = ALIGN(new_tx_count, IAVF_REQ_DESCRIPTOR_MULTIPLE); + if (ring->tx_pending > IAVF_MAX_TXD || + ring->tx_pending < IAVF_MIN_TXD || + ring->rx_pending > IAVF_MAX_RXD || + ring->rx_pending < IAVF_MIN_RXD) { + netdev_err(netdev, "Descriptors requested (Tx: %d / Rx: %d) out of range [%d-%d] (increment %d)\n", + ring->tx_pending, ring->rx_pending, IAVF_MIN_TXD, + IAVF_MAX_RXD, IAVF_REQ_DESCRIPTOR_MULTIPLE); + return -EINVAL; + } - new_rx_count = clamp_t(u32, ring->rx_pending, - IAVF_MIN_RXD, - IAVF_MAX_RXD); - new_rx_count = ALIGN(new_rx_count, IAVF_REQ_DESCRIPTOR_MULTIPLE); + new_tx_count = ALIGN(ring->tx_pending, IAVF_REQ_DESCRIPTOR_MULTIPLE); + if (new_tx_count != ring->tx_pending) + netdev_info(netdev, "Requested Tx descriptor count rounded up to %d\n", + new_tx_count); + + new_rx_count = ALIGN(ring->rx_pending, IAVF_REQ_DESCRIPTOR_MULTIPLE); + if (new_rx_count != ring->rx_pending) + netdev_info(netdev, "Requested Rx descriptor count rounded up to %d\n", + new_rx_count); /* if nothing to do return success */ if ((new_tx_count == adapter->tx_desc_count) && - (new_rx_count == adapter->rx_desc_count)) + (new_rx_count == adapter->rx_desc_count)) { + netdev_dbg(netdev, "Nothing to change, descriptor count is same as requested\n"); return 0; + } - adapter->tx_desc_count = new_tx_count; - adapter->rx_desc_count = new_rx_count; + if (new_tx_count != adapter->tx_desc_count) { + netdev_dbg(netdev, "Changing Tx descriptor count from %d to %d\n", + adapter->tx_desc_count, new_tx_count); + adapter->tx_desc_count = new_tx_count; + } + + if (new_rx_count != adapter->rx_desc_count) { + netdev_dbg(netdev, "Changing Rx descriptor count from %d to %d\n", + adapter->rx_desc_count, new_rx_count); + adapter->rx_desc_count = new_rx_count; + } if (netif_running(netdev)) { adapter->flags |= IAVF_FLAG_RESET_NEEDED; From 776b54e97a7d993ba23696e032426d5dea5bbe70 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 6 Dec 2021 08:04:09 +0100 Subject: [PATCH 197/868] mtd_blkdevs: don't scan partitions for plain mtdblock mtdblock / mtdblock_ro set part_bits to 0 and thus nevever scanned partitions. Restore that behavior by setting the GENHD_FL_NO_PART flag. Fixes: 1ebe2e5f9d68e94c ("block: remove GENHD_FL_EXT_DEVT") Reported-by: Geert Uytterhoeven Signed-off-by: Christoph Hellwig Tested-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20211206070409.2836165-1-hch@lst.de Signed-off-by: Jens Axboe --- drivers/mtd/mtd_blkdevs.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c index 4eaba6f4ec68..a69d064a8eec 100644 --- a/drivers/mtd/mtd_blkdevs.c +++ b/drivers/mtd/mtd_blkdevs.c @@ -346,7 +346,7 @@ int add_mtd_blktrans_dev(struct mtd_blktrans_dev *new) gd->minors = 1 << tr->part_bits; gd->fops = &mtd_block_ops; - if (tr->part_bits) + if (tr->part_bits) { if (new->devnum < 26) snprintf(gd->disk_name, sizeof(gd->disk_name), "%s%c", tr->name, 'a' + new->devnum); @@ -355,9 +355,11 @@ int add_mtd_blktrans_dev(struct mtd_blktrans_dev *new) "%s%c%c", tr->name, 'a' - 1 + new->devnum / 26, 'a' + new->devnum % 26); - else + } else { snprintf(gd->disk_name, sizeof(gd->disk_name), "%s%d", tr->name, new->devnum); + gd->flags |= GENHD_FL_NO_PART; + } set_capacity(gd, ((u64)new->size * tr->blksize) >> 9); From 61125b8be85dfbc7e9c7fe1cc6c6d631ab603516 Mon Sep 17 00:00:00 2001 From: Karen Sornek Date: Fri, 14 May 2021 11:43:13 +0200 Subject: [PATCH 198/868] i40e: Fix failed opcode appearing if handling messages from VF Fix failed operation code appearing if handling messages from VF. Implemented by waiting for VF appropriate state if request starts handle while VF reset. Without this patch the message handling request while VF is in a reset state ends with error -5 (I40E_ERR_PARAM). Fixes: 5c3c48ac6bf5 ("i40e: implement virtual device interface") Signed-off-by: Grzegorz Szczurek Signed-off-by: Karen Sornek Tested-by: Tony Brelinski Signed-off-by: Tony Nguyen --- .../ethernet/intel/i40e/i40e_virtchnl_pf.c | 70 +++++++++++++------ .../ethernet/intel/i40e/i40e_virtchnl_pf.h | 2 + 2 files changed, 50 insertions(+), 22 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index 80ae264c99ba..f651861442c2 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -1948,6 +1948,32 @@ static int i40e_vc_send_resp_to_vf(struct i40e_vf *vf, return i40e_vc_send_msg_to_vf(vf, opcode, retval, NULL, 0); } +/** + * i40e_sync_vf_state + * @vf: pointer to the VF info + * @state: VF state + * + * Called from a VF message to synchronize the service with a potential + * VF reset state + **/ +static bool i40e_sync_vf_state(struct i40e_vf *vf, enum i40e_vf_states state) +{ + int i; + + /* When handling some messages, it needs VF state to be set. + * It is possible that this flag is cleared during VF reset, + * so there is a need to wait until the end of the reset to + * handle the request message correctly. + */ + for (i = 0; i < I40E_VF_STATE_WAIT_COUNT; i++) { + if (test_bit(state, &vf->vf_states)) + return true; + usleep_range(10000, 20000); + } + + return test_bit(state, &vf->vf_states); +} + /** * i40e_vc_get_version_msg * @vf: pointer to the VF info @@ -2008,7 +2034,7 @@ static int i40e_vc_get_vf_resources_msg(struct i40e_vf *vf, u8 *msg) size_t len = 0; int ret; - if (!test_bit(I40E_VF_STATE_INIT, &vf->vf_states)) { + if (!i40e_sync_vf_state(vf, I40E_VF_STATE_INIT)) { aq_ret = I40E_ERR_PARAM; goto err; } @@ -2131,7 +2157,7 @@ static int i40e_vc_config_promiscuous_mode_msg(struct i40e_vf *vf, u8 *msg) bool allmulti = false; bool alluni = false; - if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) { + if (!i40e_sync_vf_state(vf, I40E_VF_STATE_ACTIVE)) { aq_ret = I40E_ERR_PARAM; goto err_out; } @@ -2219,7 +2245,7 @@ static int i40e_vc_config_queues_msg(struct i40e_vf *vf, u8 *msg) struct i40e_vsi *vsi; u16 num_qps_all = 0; - if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) { + if (!i40e_sync_vf_state(vf, I40E_VF_STATE_ACTIVE)) { aq_ret = I40E_ERR_PARAM; goto error_param; } @@ -2368,7 +2394,7 @@ static int i40e_vc_config_irq_map_msg(struct i40e_vf *vf, u8 *msg) i40e_status aq_ret = 0; int i; - if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) { + if (!i40e_sync_vf_state(vf, I40E_VF_STATE_ACTIVE)) { aq_ret = I40E_ERR_PARAM; goto error_param; } @@ -2540,7 +2566,7 @@ static int i40e_vc_disable_queues_msg(struct i40e_vf *vf, u8 *msg) struct i40e_pf *pf = vf->pf; i40e_status aq_ret = 0; - if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) { + if (!i40e_sync_vf_state(vf, I40E_VF_STATE_ACTIVE)) { aq_ret = I40E_ERR_PARAM; goto error_param; } @@ -2590,7 +2616,7 @@ static int i40e_vc_request_queues_msg(struct i40e_vf *vf, u8 *msg) u8 cur_pairs = vf->num_queue_pairs; struct i40e_pf *pf = vf->pf; - if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) + if (!i40e_sync_vf_state(vf, I40E_VF_STATE_ACTIVE)) return -EINVAL; if (req_pairs > I40E_MAX_VF_QUEUES) { @@ -2635,7 +2661,7 @@ static int i40e_vc_get_stats_msg(struct i40e_vf *vf, u8 *msg) memset(&stats, 0, sizeof(struct i40e_eth_stats)); - if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) { + if (!i40e_sync_vf_state(vf, I40E_VF_STATE_ACTIVE)) { aq_ret = I40E_ERR_PARAM; goto error_param; } @@ -2752,7 +2778,7 @@ static int i40e_vc_add_mac_addr_msg(struct i40e_vf *vf, u8 *msg) i40e_status ret = 0; int i; - if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states) || + if (!i40e_sync_vf_state(vf, I40E_VF_STATE_ACTIVE) || !i40e_vc_isvalid_vsi_id(vf, al->vsi_id)) { ret = I40E_ERR_PARAM; goto error_param; @@ -2824,7 +2850,7 @@ static int i40e_vc_del_mac_addr_msg(struct i40e_vf *vf, u8 *msg) i40e_status ret = 0; int i; - if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states) || + if (!i40e_sync_vf_state(vf, I40E_VF_STATE_ACTIVE) || !i40e_vc_isvalid_vsi_id(vf, al->vsi_id)) { ret = I40E_ERR_PARAM; goto error_param; @@ -2968,7 +2994,7 @@ static int i40e_vc_remove_vlan_msg(struct i40e_vf *vf, u8 *msg) i40e_status aq_ret = 0; int i; - if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states) || + if (!i40e_sync_vf_state(vf, I40E_VF_STATE_ACTIVE) || !i40e_vc_isvalid_vsi_id(vf, vfl->vsi_id)) { aq_ret = I40E_ERR_PARAM; goto error_param; @@ -3088,9 +3114,9 @@ static int i40e_vc_config_rss_key(struct i40e_vf *vf, u8 *msg) struct i40e_vsi *vsi = NULL; i40e_status aq_ret = 0; - if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states) || + if (!i40e_sync_vf_state(vf, I40E_VF_STATE_ACTIVE) || !i40e_vc_isvalid_vsi_id(vf, vrk->vsi_id) || - (vrk->key_len != I40E_HKEY_ARRAY_SIZE)) { + vrk->key_len != I40E_HKEY_ARRAY_SIZE) { aq_ret = I40E_ERR_PARAM; goto err; } @@ -3119,9 +3145,9 @@ static int i40e_vc_config_rss_lut(struct i40e_vf *vf, u8 *msg) i40e_status aq_ret = 0; u16 i; - if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states) || + if (!i40e_sync_vf_state(vf, I40E_VF_STATE_ACTIVE) || !i40e_vc_isvalid_vsi_id(vf, vrl->vsi_id) || - (vrl->lut_entries != I40E_VF_HLUT_ARRAY_SIZE)) { + vrl->lut_entries != I40E_VF_HLUT_ARRAY_SIZE) { aq_ret = I40E_ERR_PARAM; goto err; } @@ -3154,7 +3180,7 @@ static int i40e_vc_get_rss_hena(struct i40e_vf *vf, u8 *msg) i40e_status aq_ret = 0; int len = 0; - if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) { + if (!i40e_sync_vf_state(vf, I40E_VF_STATE_ACTIVE)) { aq_ret = I40E_ERR_PARAM; goto err; } @@ -3190,7 +3216,7 @@ static int i40e_vc_set_rss_hena(struct i40e_vf *vf, u8 *msg) struct i40e_hw *hw = &pf->hw; i40e_status aq_ret = 0; - if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) { + if (!i40e_sync_vf_state(vf, I40E_VF_STATE_ACTIVE)) { aq_ret = I40E_ERR_PARAM; goto err; } @@ -3215,7 +3241,7 @@ static int i40e_vc_enable_vlan_stripping(struct i40e_vf *vf, u8 *msg) i40e_status aq_ret = 0; struct i40e_vsi *vsi; - if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) { + if (!i40e_sync_vf_state(vf, I40E_VF_STATE_ACTIVE)) { aq_ret = I40E_ERR_PARAM; goto err; } @@ -3241,7 +3267,7 @@ static int i40e_vc_disable_vlan_stripping(struct i40e_vf *vf, u8 *msg) i40e_status aq_ret = 0; struct i40e_vsi *vsi; - if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) { + if (!i40e_sync_vf_state(vf, I40E_VF_STATE_ACTIVE)) { aq_ret = I40E_ERR_PARAM; goto err; } @@ -3468,7 +3494,7 @@ static int i40e_vc_del_cloud_filter(struct i40e_vf *vf, u8 *msg) i40e_status aq_ret = 0; int i, ret; - if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) { + if (!i40e_sync_vf_state(vf, I40E_VF_STATE_ACTIVE)) { aq_ret = I40E_ERR_PARAM; goto err; } @@ -3599,7 +3625,7 @@ static int i40e_vc_add_cloud_filter(struct i40e_vf *vf, u8 *msg) i40e_status aq_ret = 0; int i, ret; - if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) { + if (!i40e_sync_vf_state(vf, I40E_VF_STATE_ACTIVE)) { aq_ret = I40E_ERR_PARAM; goto err_out; } @@ -3708,7 +3734,7 @@ static int i40e_vc_add_qch_msg(struct i40e_vf *vf, u8 *msg) i40e_status aq_ret = 0; u64 speed = 0; - if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) { + if (!i40e_sync_vf_state(vf, I40E_VF_STATE_ACTIVE)) { aq_ret = I40E_ERR_PARAM; goto err; } @@ -3824,7 +3850,7 @@ static int i40e_vc_del_qch_msg(struct i40e_vf *vf, u8 *msg) struct i40e_pf *pf = vf->pf; i40e_status aq_ret = 0; - if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) { + if (!i40e_sync_vf_state(vf, I40E_VF_STATE_ACTIVE)) { aq_ret = I40E_ERR_PARAM; goto err; } diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h index 091e32c1bb46..49575a640a84 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h @@ -18,6 +18,8 @@ #define I40E_MAX_VF_PROMISC_FLAGS 3 +#define I40E_VF_STATE_WAIT_COUNT 20 + /* Various queue ctrls */ enum i40e_queue_ctrl { I40E_QUEUE_CTRL_UNKNOWN = 0, From 8aa55ab422d9d0d825ebfb877702ed661e96e682 Mon Sep 17 00:00:00 2001 From: Mateusz Palczewski Date: Fri, 16 Jul 2021 11:33:56 +0200 Subject: [PATCH 199/868] i40e: Fix pre-set max number of queues for VF After setting pre-set combined to 16 queues and reserving 16 queues by tc qdisc, pre-set maximum combined queues returned to default value after VF reset being 4 and this generated errors during removing tc. Fixed by removing clear num_req_queues before reset VF. Fixes: e284fc280473 (i40e: Add and delete cloud filter) Signed-off-by: Grzegorz Szczurek Signed-off-by: Mateusz Palczewski Tested-by: Bindushree P Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index f651861442c2..2ea4deb8fc44 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -3823,11 +3823,6 @@ static int i40e_vc_add_qch_msg(struct i40e_vf *vf, u8 *msg) /* set this flag only after making sure all inputs are sane */ vf->adq_enabled = true; - /* num_req_queues is set when user changes number of queues via ethtool - * and this causes issue for default VSI(which depends on this variable) - * when ADq is enabled, hence reset it. - */ - vf->num_req_queues = 0; /* reset the VF in order to allocate resources */ i40e_vc_reset_vf(vf, true); From 23ec111bf3549aae37140330c31a16abfc172421 Mon Sep 17 00:00:00 2001 From: Norbert Zulinski Date: Mon, 22 Nov 2021 12:29:05 +0100 Subject: [PATCH 200/868] i40e: Fix NULL pointer dereference in i40e_dbg_dump_desc When trying to dump VFs VSI RX/TX descriptors using debugfs there was a crash due to NULL pointer dereference in i40e_dbg_dump_desc. Added a check to i40e_dbg_dump_desc that checks if VSI type is correct for dumping RX/TX descriptors. Fixes: 02e9c290814c ("i40e: debugfs interface") Signed-off-by: Sylwester Dziedziuch Signed-off-by: Norbert Zulinski Signed-off-by: Mateusz Palczewski Tested-by: Gurucharan G Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/i40e/i40e_debugfs.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c index 291e61ac3e44..2c1b1da1220e 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c +++ b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c @@ -553,6 +553,14 @@ static void i40e_dbg_dump_desc(int cnt, int vsi_seid, int ring_id, int desc_n, dev_info(&pf->pdev->dev, "vsi %d not found\n", vsi_seid); return; } + if (vsi->type != I40E_VSI_MAIN && + vsi->type != I40E_VSI_FDIR && + vsi->type != I40E_VSI_VMDQ2) { + dev_info(&pf->pdev->dev, + "vsi %d type %d descriptor rings not available\n", + vsi_seid, vsi->type); + return; + } if (type == RING_TYPE_XDP && !i40e_enabled_xdp_vsi(vsi)) { dev_info(&pf->pdev->dev, "XDP not enabled on VSI %d\n", vsi_seid); return; From 3583521aabac76e58675297cead02f9ecac518b6 Mon Sep 17 00:00:00 2001 From: Vladimir Murzin Date: Tue, 30 Nov 2021 17:29:54 +0000 Subject: [PATCH 201/868] percpu: km: ensure it is used with NOMMU (either UP or SMP) Currently, NOMMU pull km allocator via !SMP dependency because most of them are UP, yet for SMP+NOMMU vm allocator gets pulled which: * may lead to broken build [1] * ...or not working runtime due to [2] It looks like SMP+NOMMU case was overlooked in bbddff054587 ("percpu: use percpu allocator on UP too") so restore that. [1] For ARM SMP+NOMMU (R-class cores) arm-none-linux-gnueabihf-ld: mm/percpu.o: in function `pcpu_post_unmap_tlb_flush': mm/percpu-vm.c:188: undefined reference to `flush_tlb_kernel_range' [2] static inline int vmap_pages_range_noflush(unsigned long addr, unsigned long end, pgprot_t prot, struct page **pages, unsigned int page_shift) { return -EINVAL; } Signed-off-by: Vladimir Murzin Tested-by: Rob Landley Tested-by: Rich Felker [Dennis: use depends instead of default for condition] Signed-off-by: Dennis Zhou --- mm/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/Kconfig b/mm/Kconfig index 28edafc820ad..356f4f2c779e 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -428,7 +428,7 @@ config THP_SWAP # UP and nommu archs use km based percpu allocator # config NEED_PER_CPU_KM - depends on !SMP + depends on !SMP || !MMU bool default y From e47498afeca9a0c6d07eeeacc46d563555a3f677 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 6 Dec 2021 10:49:04 -0700 Subject: [PATCH 202/868] io-wq: remove spurious bit clear on task_work addition There's a small race here where the task_work could finish and drop the worker itself, so that by the time that task_work_add() returns with a successful addition we've already put the worker. The worker callbacks clear this bit themselves, so we don't actually need to manually clear it in the caller. Get rid of it. Reported-by: syzbot+b60c982cb0efc5e05a47@syzkaller.appspotmail.com Signed-off-by: Jens Axboe --- fs/io-wq.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/fs/io-wq.c b/fs/io-wq.c index 50cf9f92da36..35da9d90df76 100644 --- a/fs/io-wq.c +++ b/fs/io-wq.c @@ -359,10 +359,8 @@ static bool io_queue_worker_create(struct io_worker *worker, init_task_work(&worker->create_work, func); worker->create_index = acct->index; - if (!task_work_add(wq->task, &worker->create_work, TWA_SIGNAL)) { - clear_bit_unlock(0, &worker->create_state); + if (!task_work_add(wq->task, &worker->create_work, TWA_SIGNAL)) return true; - } clear_bit_unlock(0, &worker->create_state); fail_release: io_worker_release(worker); From 96db48c9d777a73a33b1d516c5cfed7a417a5f40 Mon Sep 17 00:00:00 2001 From: Alexander Stein Date: Tue, 30 Nov 2021 09:27:56 +0100 Subject: [PATCH 203/868] dt-bindings: net: Reintroduce PHY no lane swap binding This binding was already documented in phy.txt, commit 252ae5330daa ("Documentation: devicetree: Add PHY no lane swap binding"), but got accidently removed during YAML conversion in commit d8704342c109 ("dt-bindings: net: Add a YAML schemas for the generic PHY options"). Note: 'enet-phy-lane-no-swap' and the absence of 'enet-phy-lane-swap' are not identical, as the former one disable this feature, while the latter one doesn't change anything. Fixes: d8704342c109 ("dt-bindings: net: Add a YAML schemas for the generic PHY options") Signed-off-by: Alexander Stein Reviewed-by: Andrew Lunn Link: https://lore.kernel.org/r/20211130082756.713919-1-alexander.stein@ew.tq-group.com Signed-off-by: Rob Herring --- Documentation/devicetree/bindings/net/ethernet-phy.yaml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/Documentation/devicetree/bindings/net/ethernet-phy.yaml b/Documentation/devicetree/bindings/net/ethernet-phy.yaml index 2766fe45bb98..ee42328a109d 100644 --- a/Documentation/devicetree/bindings/net/ethernet-phy.yaml +++ b/Documentation/devicetree/bindings/net/ethernet-phy.yaml @@ -91,6 +91,14 @@ properties: compensate for the board being designed with the lanes swapped. + enet-phy-lane-no-swap: + $ref: /schemas/types.yaml#/definitions/flag + description: + If set, indicates that PHY will disable swap of the + TX/RX lanes. This property allows the PHY to work correcly after + e.g. wrong bootstrap configuration caused by issues in PCB + layout design. + eee-broken-100tx: $ref: /schemas/types.yaml#/definitions/flag description: From c4cb38b54b365edafb351e5371b9ae9eebf8e805 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Fri, 3 Dec 2021 14:35:06 +0100 Subject: [PATCH 204/868] dt-bindings: input: gpio-keys: Fix interrupts in example The "interrupts" property in the example looks weird: - The type is not in the last cell, - Level interrupts don't work well with gpio-keys, as they keep the interrupt asserted as long as the key is pressed, causing an interrupt storm. Use a more realistic falling-edge interrupt instead. Signed-off-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/47ecd2d8efcf09f8ab47de87a7bcfafc82208776.1638538079.git.geert+renesas@glider.be Signed-off-by: Rob Herring --- Documentation/devicetree/bindings/input/gpio-keys.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/input/gpio-keys.yaml b/Documentation/devicetree/bindings/input/gpio-keys.yaml index 060a309ff8e7..dbe7ecc19ccb 100644 --- a/Documentation/devicetree/bindings/input/gpio-keys.yaml +++ b/Documentation/devicetree/bindings/input/gpio-keys.yaml @@ -142,7 +142,7 @@ examples: down { label = "GPIO Key DOWN"; linux,code = <108>; - interrupts = <1 IRQ_TYPE_LEVEL_HIGH 7>; + interrupts = <1 IRQ_TYPE_EDGE_FALLING>; }; }; From 656eb419b5076bacc536ddb66d30f2f3bf0bba92 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Mon, 6 Dec 2021 16:29:05 +0100 Subject: [PATCH 205/868] dt-bindings: bq25980: Fixup the example Use the ti,watchdog-timeout-ms property instead of the unsupported ti,watchdog-timer property to make the example validate correctly. Signed-off-by: Thierry Reding Link: https://lore.kernel.org/r/20211206152905.226239-1-thierry.reding@gmail.com Signed-off-by: Rob Herring --- Documentation/devicetree/bindings/power/supply/bq25980.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/power/supply/bq25980.yaml b/Documentation/devicetree/bindings/power/supply/bq25980.yaml index 06eca6667f67..8367a1fd4057 100644 --- a/Documentation/devicetree/bindings/power/supply/bq25980.yaml +++ b/Documentation/devicetree/bindings/power/supply/bq25980.yaml @@ -105,7 +105,7 @@ examples: reg = <0x65>; interrupt-parent = <&gpio1>; interrupts = <16 IRQ_TYPE_EDGE_FALLING>; - ti,watchdog-timer = <0>; + ti,watchdog-timeout-ms = <0>; ti,sc-ocp-limit-microamp = <2000000>; ti,sc-ovp-limit-microvolt = <17800000>; monitored-battery = <&bat>; From 7d0c009043f6a970f62dbf5aecda9f8c3ccafcff Mon Sep 17 00:00:00 2001 From: Alex Hung Date: Fri, 3 Dec 2021 14:28:10 -0700 Subject: [PATCH 206/868] platform/x86/intel: hid: add quirk to support Surface Go 3 Similar to other systems Surface Go 3 requires a DMI quirk to enable 5 button array for power and volume buttons. Buglink: https://github.com/linux-surface/linux-surface/issues/595 Cc: stable@vger.kernel.org Signed-off-by: Alex Hung Link: https://lore.kernel.org/r/20211203212810.2666508-1-alex.hung@canonical.com Signed-off-by: Hans de Goede --- drivers/platform/x86/intel/hid.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/platform/x86/intel/hid.c b/drivers/platform/x86/intel/hid.c index 08598942a6d7..13f8cf70b9ae 100644 --- a/drivers/platform/x86/intel/hid.c +++ b/drivers/platform/x86/intel/hid.c @@ -99,6 +99,13 @@ static const struct dmi_system_id button_array_table[] = { DMI_MATCH(DMI_PRODUCT_FAMILY, "ThinkPad X1 Tablet Gen 2"), }, }, + { + .ident = "Microsoft Surface Go 3", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Microsoft Corporation"), + DMI_MATCH(DMI_PRODUCT_NAME, "Surface Go 3"), + }, + }, { } }; From e53f2086856c16ccab80fd0ac012baa1ae88af73 Mon Sep 17 00:00:00 2001 From: Martin Botka Date: Tue, 30 Nov 2021 22:20:15 +0100 Subject: [PATCH 207/868] clk: qcom: sm6125-gcc: Swap ops of ice and apps on sdcc1 Without this change eMMC runs at overclocked freq. Swap the ops to not OC the eMMC. Signed-off-by: Martin Botka Link: https://lore.kernel.org/r/20211130212015.25232-1-martin.botka@somainline.org Reviewed-by: Bjorn Andersson Fixes: 4b8d6ae57cdf ("clk: qcom: Add SM6125 (TRINKET) GCC driver") Signed-off-by: Stephen Boyd --- drivers/clk/qcom/gcc-sm6125.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/clk/qcom/gcc-sm6125.c b/drivers/clk/qcom/gcc-sm6125.c index 543cfab7561f..431b55bb0d2f 100644 --- a/drivers/clk/qcom/gcc-sm6125.c +++ b/drivers/clk/qcom/gcc-sm6125.c @@ -1121,7 +1121,7 @@ static struct clk_rcg2 gcc_sdcc1_apps_clk_src = { .name = "gcc_sdcc1_apps_clk_src", .parent_data = gcc_parent_data_1, .num_parents = ARRAY_SIZE(gcc_parent_data_1), - .ops = &clk_rcg2_ops, + .ops = &clk_rcg2_floor_ops, }, }; @@ -1143,7 +1143,7 @@ static struct clk_rcg2 gcc_sdcc1_ice_core_clk_src = { .name = "gcc_sdcc1_ice_core_clk_src", .parent_data = gcc_parent_data_0, .num_parents = ARRAY_SIZE(gcc_parent_data_0), - .ops = &clk_rcg2_floor_ops, + .ops = &clk_rcg2_ops, }, }; From cd8c917a56f20f48748dd43d9ae3caff51d5b987 Mon Sep 17 00:00:00 2001 From: Salvatore Bonaccorso Date: Mon, 6 Dec 2021 21:42:01 +0100 Subject: [PATCH 208/868] Makefile: Do not quote value for CONFIG_CC_IMPLICIT_FALLTHROUGH Andreas reported that a specific build environment for an external module, being a bit broken, does pass CC_IMPLICIT_FALLTHROUGH quoted as argument to gcc, causing an error gcc-11: error: "-Wimplicit-fallthrough=5": linker input file not found: No such file or directory Until this is more generally fixed as outlined in [1], by fixing scripts/link-vmlinux.sh, scripts/gen_autoksyms.sh, etc to not directly include the include/config/auto.conf, and in a second step, change Kconfig to generate the auto.conf without "", workaround the issue by explicitly unquoting CC_IMPLICIT_FALLTHROUGH. Reported-by: Andreas Beckmann Link: https://bugs.debian.org/1001083 Link: https://lore.kernel.org/linux-kbuild/CAK7LNAR-VXwHFEJqCcrFDZj+_4+Xd6oynbj_0eS8N504_ydmyw@mail.gmail.com/ [1] Signed-off-by: Salvatore Bonaccorso Reviewed-by: Kees Cook Reviewed-by: Gustavo A. R. Silva Signed-off-by: Linus Torvalds --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 8e35d7804fef..ef967a26bcd3 100644 --- a/Makefile +++ b/Makefile @@ -789,7 +789,7 @@ stackp-flags-$(CONFIG_STACKPROTECTOR_STRONG) := -fstack-protector-strong KBUILD_CFLAGS += $(stackp-flags-y) KBUILD_CFLAGS-$(CONFIG_WERROR) += -Werror -KBUILD_CFLAGS += $(KBUILD_CFLAGS-y) $(CONFIG_CC_IMPLICIT_FALLTHROUGH) +KBUILD_CFLAGS += $(KBUILD_CFLAGS-y) $(CONFIG_CC_IMPLICIT_FALLTHROUGH:"%"=%) ifdef CONFIG_CC_IS_CLANG KBUILD_CPPFLAGS += -Qunused-arguments From dde91ccfa25fd58f64c397d91b81a4b393100ffa Mon Sep 17 00:00:00 2001 From: Antoine Tenart Date: Fri, 3 Dec 2021 11:13:18 +0100 Subject: [PATCH 209/868] ethtool: do not perform operations on net devices being unregistered There is a short period between a net device starts to be unregistered and when it is actually gone. In that time frame ethtool operations could still be performed, which might end up in unwanted or undefined behaviours[1]. Do not allow ethtool operations after a net device starts its unregistration. This patch targets the netlink part as the ioctl one isn't affected: the reference to the net device is taken and the operation is executed within an rtnl lock section and the net device won't be found after unregister. [1] For example adding Tx queues after unregister ends up in NULL pointer exceptions and UaFs, such as: BUG: KASAN: use-after-free in kobject_get+0x14/0x90 Read of size 1 at addr ffff88801961248c by task ethtool/755 CPU: 0 PID: 755 Comm: ethtool Not tainted 5.15.0-rc6+ #778 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.14.0-4.fc34 04/014 Call Trace: dump_stack_lvl+0x57/0x72 print_address_description.constprop.0+0x1f/0x140 kasan_report.cold+0x7f/0x11b kobject_get+0x14/0x90 kobject_add_internal+0x3d1/0x450 kobject_init_and_add+0xba/0xf0 netdev_queue_update_kobjects+0xcf/0x200 netif_set_real_num_tx_queues+0xb4/0x310 veth_set_channels+0x1c3/0x550 ethnl_set_channels+0x524/0x610 Fixes: 041b1c5d4a53 ("ethtool: helper functions for netlink interface") Suggested-by: Jakub Kicinski Signed-off-by: Antoine Tenart Link: https://lore.kernel.org/r/20211203101318.435618-1-atenart@kernel.org Signed-off-by: Jakub Kicinski --- net/ethtool/netlink.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c index 38b44c0291b1..96f4180aabd2 100644 --- a/net/ethtool/netlink.c +++ b/net/ethtool/netlink.c @@ -40,7 +40,8 @@ int ethnl_ops_begin(struct net_device *dev) if (dev->dev.parent) pm_runtime_get_sync(dev->dev.parent); - if (!netif_device_present(dev)) { + if (!netif_device_present(dev) || + dev->reg_state == NETREG_UNREGISTERING) { ret = -ENODEV; goto err; } From 4dbb0dad8e63fcd0b5a117c2861d2abe7ff5f186 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 5 Dec 2021 11:28:22 -0800 Subject: [PATCH 210/868] devlink: fix netns refcount leak in devlink_nl_cmd_reload() While preparing my patch series adding netns refcount tracking, I spotted bugs in devlink_nl_cmd_reload() Some error paths forgot to release a refcount on a netns. To fix this, we can reduce the scope of get_net()/put_net() section around the call to devlink_reload(). Fixes: ccdf07219da6 ("devlink: Add reload action option to devlink reload command") Fixes: dc64cc7c6310 ("devlink: Add devlink reload limit option") Signed-off-by: Eric Dumazet Cc: Moshe Shemesh Cc: Jacob Keller Cc: Jiri Pirko Reviewed-by: Leon Romanovsky Link: https://lore.kernel.org/r/20211205192822.1741045-1-eric.dumazet@gmail.com Signed-off-by: Jakub Kicinski --- net/core/devlink.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/net/core/devlink.c b/net/core/devlink.c index 5ad72dbfcd07..c06c9ba6e8c5 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -4110,14 +4110,6 @@ static int devlink_nl_cmd_reload(struct sk_buff *skb, struct genl_info *info) return err; } - if (info->attrs[DEVLINK_ATTR_NETNS_PID] || - info->attrs[DEVLINK_ATTR_NETNS_FD] || - info->attrs[DEVLINK_ATTR_NETNS_ID]) { - dest_net = devlink_netns_get(skb, info); - if (IS_ERR(dest_net)) - return PTR_ERR(dest_net); - } - if (info->attrs[DEVLINK_ATTR_RELOAD_ACTION]) action = nla_get_u8(info->attrs[DEVLINK_ATTR_RELOAD_ACTION]); else @@ -4160,6 +4152,14 @@ static int devlink_nl_cmd_reload(struct sk_buff *skb, struct genl_info *info) return -EINVAL; } } + if (info->attrs[DEVLINK_ATTR_NETNS_PID] || + info->attrs[DEVLINK_ATTR_NETNS_FD] || + info->attrs[DEVLINK_ATTR_NETNS_ID]) { + dest_net = devlink_netns_get(skb, info); + if (IS_ERR(dest_net)) + return PTR_ERR(dest_net); + } + err = devlink_reload(devlink, dest_net, action, limit, &actions_performed, info->extack); if (dest_net) From 3f8d6577163f9903d4af5e5c0f90eb42944a8851 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 22 Nov 2021 10:11:05 -0300 Subject: [PATCH 211/868] Revert "perf bench: Fix two memory leaks detected with ASan" This: This reverts commit 92723ea0f11d92496687db8c9725248e9d1e5e1d. # perf test 91 91: perf stat --bpf-counters test :RRRRRRRRRRRRR FAILED! # perf test 91 91: perf stat --bpf-counters test :RRRRRRRRRRRRR FAILED! # perf test 91 91: perf stat --bpf-counters test :RRRRRRRRRRRR FAILED! # perf test 91 91: perf stat --bpf-counters test :RRRRRRRRRRRRRRRRRR Ok # perf test 91 91: perf stat --bpf-counters test :RRRRRRRRR FAILED! # perf test 91 91: perf stat --bpf-counters test :RRRRRRRRRRR Ok # perf test 91 91: perf stat --bpf-counters test :RRRRRRRRRRRRRRR Ok yep, it seems the perf bench is broken so the counts won't correlated if I revert this one: 92723ea0f11d perf bench: Fix two memory leaks detected with ASan it works for me again.. it seems to break -t option [root@dell-r440-01 perf]# ./perf bench sched messaging -g 1 -l 100 -t # Running 'sched/messaging' benchmark: RRRperf: CLIENT: ready write: Bad file descriptor Rperf: SENDER: write: Bad file descriptor Reported-by: Arnaldo Carvalho de Melo Signed-off-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: Ian Rogers Cc: Namhyung Kim Cc: Sohaib Mohamed Cc: Song Liu Link: https://lore.kernel.org/lkml/YZev7KClb%2Fud43Lc@krava/ Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/bench/sched-messaging.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/tools/perf/bench/sched-messaging.c b/tools/perf/bench/sched-messaging.c index fa0ff4ce2b74..488f6e6ba1a5 100644 --- a/tools/perf/bench/sched-messaging.c +++ b/tools/perf/bench/sched-messaging.c @@ -223,8 +223,6 @@ static unsigned int group(pthread_t *pth, snd_ctx->out_fds[i] = fds[1]; if (!thread_mode) close(fds[0]); - - free(ctx); } /* Now we have all the fds, fork the senders */ @@ -241,8 +239,6 @@ static unsigned int group(pthread_t *pth, for (i = 0; i < num_fds; i++) close(snd_ctx->out_fds[i]); - free(snd_ctx); - /* Return number of children to reap */ return num_fds * 2; } From 71a16df164b23210d4dcaf35c70825f47d7c5599 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 8 Sep 2021 16:09:08 -0300 Subject: [PATCH 212/868] tools headers UAPI: Sync s390 syscall table file changed by new futex_waitv syscall To pick the changes in these csets: 6c122360cf2f4c5a ("s390: wire up sys_futex_waitv system call") That add support for this new syscall in tools such as 'perf trace'. For instance, this is now possible (adapted from the x86_64 test output): # perf trace -e futex_waitv ^C# # perf trace -v -e futex_waitv event qualifier tracepoint filter: (common_pid != 807333 && common_pid != 3564) && (id == 449) ^C# # perf trace -v -e futex* --max-events 10 event qualifier tracepoint filter: (common_pid != 812168 && common_pid != 3564) && (id == 238 || id == 449) ? ( ): Timer/219310 ... [continued]: futex()) = -1 ETIMEDOUT (Connection timed out) 0.012 ( 0.002 ms): Timer/219310 futex(uaddr: 0x7fd0b152d3c8, op: WAKE|PRIVATE_FLAG, val: 1) = 0 0.024 ( 0.060 ms): Timer/219310 futex(uaddr: 0x7fd0b152d420, op: WAIT_BITSET|PRIVATE_FLAG, utime: 0x7fd0b1657840, val3: MATCH_ANY) = 0 0.086 ( 0.001 ms): Timer/219310 futex(uaddr: 0x7fd0b152d3c8, op: WAKE|PRIVATE_FLAG, val: 1) = 0 0.088 ( ): Timer/219310 futex(uaddr: 0x7fd0b152d424, op: WAIT_BITSET|PRIVATE_FLAG, utime: 0x7fd0b1657840, val3: MATCH_ANY) ... 0.075 ( 0.005 ms): Web Content/219299 futex(uaddr: 0x7fd0b152d420, op: WAKE|PRIVATE_FLAG, val: 1) = 1 0.169 ( 0.004 ms): Web Content/219299 futex(uaddr: 0x7fd0b152d424, op: WAKE|PRIVATE_FLAG, val: 1) = 1 0.088 ( 0.089 ms): Timer/219310 ... [continued]: futex()) = 0 0.179 ( 0.001 ms): Timer/219310 futex(uaddr: 0x7fd0b152d3c8, op: WAKE|PRIVATE_FLAG, val: 1) = 0 0.181 ( ): Timer/219310 futex(uaddr: 0x7fd0b152d420, op: WAIT_BITSET|PRIVATE_FLAG, utime: 0x7fd0b1657840, val3: MATCH_ANY) ... # That is the filter expression attached to the raw_syscalls:sys_{enter,exit} tracepoints. $ grep futex tools/perf/arch/s390/entry/syscalls/syscall.tbl 238 common futex sys_futex sys_futex_time32 422 32 futex_time64 - sys_futex 449 common futex_waitv sys_futex_waitv sys_futex_waitv $ This addresses this perf build warnings: Warning: Kernel ABI header at 'tools/perf/arch/s390/entry/syscalls/syscall.tbl' differs from latest version at 'arch/s390/kernel/syscalls/syscall.tbl' diff -u tools/perf/arch/s390/entry/syscalls/syscall.tbl arch/s390/kernel/syscalls/syscall.tbl Acked-by: Heiko Carstens Cc: Adrian Hunter , Cc: Jiri Olsa Cc: Namhyung Kim Cc: Vasily Gorbik Link: https://lore.kernel.org/lkml/YZ%2F2qRW%2FTScYTP1U@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/s390/entry/syscalls/syscall.tbl | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/arch/s390/entry/syscalls/syscall.tbl b/tools/perf/arch/s390/entry/syscalls/syscall.tbl index df5261e5cfe1..ed9c5c2eafad 100644 --- a/tools/perf/arch/s390/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/s390/entry/syscalls/syscall.tbl @@ -451,3 +451,4 @@ 446 common landlock_restrict_self sys_landlock_restrict_self sys_landlock_restrict_self # 447 reserved for memfd_secret 448 common process_mrelease sys_process_mrelease sys_process_mrelease +449 common futex_waitv sys_futex_waitv sys_futex_waitv From c29d9792607e67ed8a3f6e9db0d96836d885a8c5 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Thu, 25 Nov 2021 09:14:57 +0200 Subject: [PATCH 213/868] perf inject: Fix itrace space allowed for new attributes The space allowed for new attributes can be too small if existing header information is large. That can happen, for example, if there are very many CPUs, due to having an event ID per CPU per event being stored in the header information. Fix by adding the existing header.data_offset. Also increase the extra space allowed to 8KiB and align to a 4KiB boundary for neatness. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lore.kernel.org/lkml/20211125071457.2066863-1-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-inject.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index bc5259db5fd9..b9d6306cc14e 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -820,7 +820,7 @@ static int __cmd_inject(struct perf_inject *inject) inject->tool.ordered_events = true; inject->tool.ordering_requires_timestamps = true; /* Allow space in the header for new attributes */ - output_data_offset = 4096; + output_data_offset = roundup(8192 + session->header.data_offset, 4096); if (inject->strip) strip_init(inject); } From cba43fcf7aaf8369f80aac26cc2c50232c065a9e Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 8 Sep 2021 16:09:08 -0300 Subject: [PATCH 214/868] tools headers UAPI: Sync powerpc syscall table file changed by new futex_waitv syscall MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To pick the changes in this cset: a0eb2da92b715d0c ("futex: Wireup futex_waitv syscall") That add support for this new syscall in tools such as 'perf trace'. For instance, this is now possible (adapted from the x86_64 test output): # perf trace -e futex_waitv ^C# # perf trace -v -e futex_waitv event qualifier tracepoint filter: (common_pid != 807333 && common_pid != 3564) && (id == 449) ^C# # perf trace -v -e futex* --max-events 10 event qualifier tracepoint filter: (common_pid != 812168 && common_pid != 3564) && (id == 221 || id == 449) mmap size 528384B ? ( ): Timer/219310 ... [continued]: futex()) = -1 ETIMEDOUT (Connection timed out) 0.012 ( 0.002 ms): Timer/219310 futex(uaddr: 0x7fd0b152d3c8, op: WAKE|PRIVATE_FLAG, val: 1) = 0 0.024 ( 0.060 ms): Timer/219310 futex(uaddr: 0x7fd0b152d420, op: WAIT_BITSET|PRIVATE_FLAG, utime: 0x7fd0b1657840, val3: MATCH_ANY) = 0 0.086 ( 0.001 ms): Timer/219310 futex(uaddr: 0x7fd0b152d3c8, op: WAKE|PRIVATE_FLAG, val: 1) = 0 0.088 ( ): Timer/219310 futex(uaddr: 0x7fd0b152d424, op: WAIT_BITSET|PRIVATE_FLAG, utime: 0x7fd0b1657840, val3: MATCH_ANY) ... 0.075 ( 0.005 ms): Web Content/219299 futex(uaddr: 0x7fd0b152d420, op: WAKE|PRIVATE_FLAG, val: 1) = 1 0.169 ( 0.004 ms): Web Content/219299 futex(uaddr: 0x7fd0b152d424, op: WAKE|PRIVATE_FLAG, val: 1) = 1 0.088 ( 0.089 ms): Timer/219310 ... [continued]: futex()) = 0 0.179 ( 0.001 ms): Timer/219310 futex(uaddr: 0x7fd0b152d3c8, op: WAKE|PRIVATE_FLAG, val: 1) = 0 0.181 ( ): Timer/219310 futex(uaddr: 0x7fd0b152d420, op: WAIT_BITSET|PRIVATE_FLAG, utime: 0x7fd0b1657840, val3: MATCH_ANY) ... # That is the filter expression attached to the raw_syscalls:sys_{enter,exit} tracepoints. $ grep futex tools/perf/arch/powerpc/entry/syscalls/syscall.tbl 221 32 futex sys_futex_time32 221 64 futex sys_futex 221 spu futex sys_futex 422 32 futex_time64 sys_futex sys_futex 449 common futex_waitv sys_futex_waitv $ This addresses this perf build warnings: Warning: Kernel ABI header at 'tools/perf/arch/powerpc/entry/syscalls/syscall.tbl' differs from latest version at 'arch/powerpc/kernel/syscalls/syscall.tbl' diff -u tools/perf/arch/powerpc/entry/syscalls/syscall.tbl arch/powerpc/kernel/syscalls/syscall.tbl Reviewed-by: Athira Rajeev Tested-by: Athira Rajeev Cc: Adrian Hunter , Cc: André Almeida Cc: Arnd Bergmann Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lore.kernel.org/lkml/YZ%2F1OU9mJuyS2HMa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/powerpc/entry/syscalls/syscall.tbl | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl index 7bef917cc84e..15109af9d075 100644 --- a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl @@ -528,3 +528,4 @@ 446 common landlock_restrict_self sys_landlock_restrict_self # 447 reserved for memfd_secret 448 common process_mrelease sys_process_mrelease +449 common futex_waitv sys_futex_waitv From 4ffbe87e2d5b53bcb0213d8650bbe70bf942de6a Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 23 Nov 2021 16:12:29 -0800 Subject: [PATCH 215/868] perf tools: Fix SMT detection fast read path sysfs__read_int() returns 0 on success, and so the fast read path was always failing. Fixes: bb629484d924118e ("perf tools: Simplify checking if SMT is active.") Signed-off-by: Ian Rogers Cc: Alexander Shishkin Cc: Andi Kleen Cc: Jiri Olsa Cc: John Garry Cc: Kajol Jain Cc: Kan Liang Cc: Konstantin Khlebnikov Cc: Mark Rutland Cc: Namhyung Kim Cc: Paul Clarke Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20211124001231.3277836-2-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/smt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/smt.c b/tools/perf/util/smt.c index 20bacd5972ad..34f1b1b1176c 100644 --- a/tools/perf/util/smt.c +++ b/tools/perf/util/smt.c @@ -15,7 +15,7 @@ int smt_on(void) if (cached) return cached_result; - if (sysfs__read_int("devices/system/cpu/smt/active", &cached_result) > 0) + if (sysfs__read_int("devices/system/cpu/smt/active", &cached_result) >= 0) goto done; ncpu = sysconf(_SC_NPROCESSORS_CONF); From 3d1d57debee2d342a47615707588b96658fabb85 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 30 Nov 2021 10:12:41 -0300 Subject: [PATCH 216/868] tools build: Remove needless libpython-version feature check that breaks test-all fast path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since 66dfdff03d196e51 ("perf tools: Add Python 3 support") we don't use the tools/build/feature/test-libpython-version.c version in any Makefile feature check: $ find tools/ -type f | xargs grep feature-libpython-version $ The only place where this was used was removed in 66dfdff03d196e51: - ifneq ($(feature-libpython-version), 1) - $(warning Python 3 is not yet supported; please set) - $(warning PYTHON and/or PYTHON_CONFIG appropriately.) - $(warning If you also have Python 2 installed, then) - $(warning try something like:) - $(warning $(and ,)) - $(warning $(and ,) make PYTHON=python2) - $(warning $(and ,)) - $(warning Otherwise, disable Python support entirely:) - $(warning $(and ,)) - $(warning $(and ,) make NO_LIBPYTHON=1) - $(warning $(and ,)) - $(error $(and ,)) - else - LDFLAGS += $(PYTHON_EMBED_LDFLAGS) - EXTLIBS += $(PYTHON_EMBED_LIBADD) - LANG_BINDINGS += $(obj-perf)python/perf.so - $(call detected,CONFIG_LIBPYTHON) - endif And nowadays we either build with PYTHON=python3 or just install the python3 devel packages and perf will build against it. But the leftover feature-libpython-version check made the fast path feature detection to break in all cases except when python2 devel files were installed: $ rpm -qa | grep python.*devel python3-devel-3.9.7-1.fc34.x86_64 $ rm -rf /tmp/build/perf ; mkdir -p /tmp/build/perf ; $ make -C tools/perf O=/tmp/build/perf install-bin make: Entering directory '/var/home/acme/git/perf/tools/perf' BUILD: Doing 'make -j32' parallel build HOSTCC /tmp/build/perf/fixdep.o $ cat /tmp/build/perf/feature/test-all.make.output In file included from test-all.c:18: test-libpython-version.c:5:10: error: #error 5 | #error | ^~~~~ $ ldd ~/bin/perf | grep python libpython3.9.so.1.0 => /lib64/libpython3.9.so.1.0 (0x00007fda6dbcf000) $ As python3 is the norm these days, fix this by just removing the unused feature-libpython-version feature check, making the test-all fast path to work with the common case. With this: $ rm -rf /tmp/build/perf ; mkdir -p /tmp/build/perf ; $ make -C tools/perf O=/tmp/build/perf install-bin |& head make: Entering directory '/var/home/acme/git/perf/tools/perf' BUILD: Doing 'make -j32' parallel build HOSTCC /tmp/build/perf/fixdep.o HOSTLD /tmp/build/perf/fixdep-in.o LINK /tmp/build/perf/fixdep Auto-detecting system features: ... dwarf: [ on ] ... dwarf_getlocations: [ on ] ... glibc: [ on ] $ ldd ~/bin/perf | grep python libpython3.9.so.1.0 => /lib64/libpython3.9.so.1.0 (0x00007f58800b0000) $ cat /tmp/build/perf/feature/test-all.make.output $ Reviewed-by: James Clark Fixes: 66dfdff03d196e51 ("perf tools: Add Python 3 support") Cc: Adrian Hunter Cc: Ian Rogers Cc: Jaroslav Škarvada Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lore.kernel.org/lkml/YaYmeeC6CS2b8OSz@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/build/Makefile.feature | 1 - tools/build/feature/Makefile | 4 ---- tools/build/feature/test-all.c | 5 ----- tools/build/feature/test-libpython-version.c | 11 ----------- tools/perf/Makefile.config | 2 -- 5 files changed, 23 deletions(-) delete mode 100644 tools/build/feature/test-libpython-version.c diff --git a/tools/build/Makefile.feature b/tools/build/Makefile.feature index 45a9a59828c3..ae61f464043a 100644 --- a/tools/build/Makefile.feature +++ b/tools/build/Makefile.feature @@ -48,7 +48,6 @@ FEATURE_TESTS_BASIC := \ numa_num_possible_cpus \ libperl \ libpython \ - libpython-version \ libslang \ libslang-include-subdir \ libtraceevent \ diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile index 0a3244ad9673..1480910c792e 100644 --- a/tools/build/feature/Makefile +++ b/tools/build/feature/Makefile @@ -32,7 +32,6 @@ FILES= \ test-numa_num_possible_cpus.bin \ test-libperl.bin \ test-libpython.bin \ - test-libpython-version.bin \ test-libslang.bin \ test-libslang-include-subdir.bin \ test-libtraceevent.bin \ @@ -227,9 +226,6 @@ $(OUTPUT)test-libperl.bin: $(OUTPUT)test-libpython.bin: $(BUILD) $(FLAGS_PYTHON_EMBED) -$(OUTPUT)test-libpython-version.bin: - $(BUILD) - $(OUTPUT)test-libbfd.bin: $(BUILD) -DPACKAGE='"perf"' -lbfd -ldl diff --git a/tools/build/feature/test-all.c b/tools/build/feature/test-all.c index 0b243ce842be..5ffafb967b6e 100644 --- a/tools/build/feature/test-all.c +++ b/tools/build/feature/test-all.c @@ -14,10 +14,6 @@ # include "test-libpython.c" #undef main -#define main main_test_libpython_version -# include "test-libpython-version.c" -#undef main - #define main main_test_libperl # include "test-libperl.c" #undef main @@ -177,7 +173,6 @@ int main(int argc, char *argv[]) { main_test_libpython(); - main_test_libpython_version(); main_test_libperl(); main_test_hello(); main_test_libelf(); diff --git a/tools/build/feature/test-libpython-version.c b/tools/build/feature/test-libpython-version.c deleted file mode 100644 index 47714b942d4d..000000000000 --- a/tools/build/feature/test-libpython-version.c +++ /dev/null @@ -1,11 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include - -#if PY_VERSION_HEX >= 0x03000000 - #error -#endif - -int main(void) -{ - return 0; -} diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index afd144725a0b..3df74cf5651a 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -271,8 +271,6 @@ endif FEATURE_CHECK_CFLAGS-libpython := $(PYTHON_EMBED_CCOPTS) FEATURE_CHECK_LDFLAGS-libpython := $(PYTHON_EMBED_LDOPTS) -FEATURE_CHECK_CFLAGS-libpython-version := $(PYTHON_EMBED_CCOPTS) -FEATURE_CHECK_LDFLAGS-libpython-version := $(PYTHON_EMBED_LDOPTS) FEATURE_CHECK_LDFLAGS-libaio = -lrt From 6c481031c9f71e2b0ff9c49d21116a38ca671746 Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Mon, 29 Nov 2021 12:23:39 +0100 Subject: [PATCH 217/868] perf test: Fix 'Simple expression parser' test on arch without CPU die topology info Some platforms do not have CPU die support, for example s390. Commit Cc: Ian Rogers Fixes: fdf1e29b6118c18f ("perf expr: Add metric literals for topology.") fails on s390: # perf test -Fv 7 ... # FAILED tests/expr.c:173 #num_dies >= #num_packages ---- end ---- Simple expression parser: FAILED! # Investigating this issue leads to these functions: build_cpu_topology() +--> has_die_topology(void) { struct utsname uts; if (uname(&uts) < 0) return false; if (strncmp(uts.machine, "x86_64", 6)) return false; .... } which always returns false on s390. The caller build_cpu_topology() checks has_die_topology() return value. On false the the struct cpu_topology::die_cpu_list is not contructed and has zero entries. This leads to the failing comparison: #num_dies >= #num_packages. s390 of course has a positive number of packages. Fix this and check if the function build_cpu_topology() did build up a die_cpus_list. The number of entries in this list should be larger than 0. If the number of list element is zero, the die_cpus_list has not been created and the check in function test__expr(): TEST_ASSERT_VAL("#num_dies >= #num_packages", \ num_dies >= num_packages) always fails. Output after: # perf test -Fv 7 7: Simple expression parser : --- start --- division by zero syntax error ---- end ---- Simple expression parser: Ok # Fixes: fdf1e29b6118c18f ("perf expr: Add metric literals for topology.") Signed-off-by: Thomas Richter Acked-by: Ian Rogers Cc: Heiko Carstens Cc: Sumanth Korikkar Cc: Sven Schnelle Cc: Vasily Gorbik Link: http://lore.kernel.org/lkml/20211129112339.3003036-1-tmricht@linux.ibm.com [ Added comment in the added 'if (num_dies)' line about architectures not having die topology ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/expr.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/perf/tests/expr.c b/tools/perf/tests/expr.c index c895de481fe1..d54c5371c6a6 100644 --- a/tools/perf/tests/expr.c +++ b/tools/perf/tests/expr.c @@ -169,7 +169,9 @@ static int test__expr(struct test_suite *t __maybe_unused, int subtest __maybe_u TEST_ASSERT_VAL("#num_dies", expr__parse(&num_dies, ctx, "#num_dies") == 0); TEST_ASSERT_VAL("#num_cores >= #num_dies", num_cores >= num_dies); TEST_ASSERT_VAL("#num_packages", expr__parse(&num_packages, ctx, "#num_packages") == 0); - TEST_ASSERT_VAL("#num_dies >= #num_packages", num_dies >= num_packages); + + if (num_dies) // Some platforms do not have CPU die support, for example s390 + TEST_ASSERT_VAL("#num_dies >= #num_packages", num_dies >= num_packages); /* * Source count returns the number of events aggregating in a leader From 1aa79e57730980dfbabd44e7c0a12fbb56064cb6 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Sun, 28 Nov 2021 00:58:10 -0800 Subject: [PATCH 218/868] perf test: Reset shadow counts before loading Otherwise load counting is an average. Without this change duration_time in test_memory_bandwidth will alter its value if an earlier test contains duration_time. This patch fixes an issue that's introduced in the proposed patch: https://lore.kernel.org/lkml/20211124015226.3317994-1-irogers@google.com/ in perf test "Parse and process metrics". Signed-off-by: Ian Rogers Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: John Garry Cc: Kajol Jain Cc: Kan Liang Cc: Mark Rutland Cc: Namhyung Kim Cc: Paul Clarke Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20211128085810.4027314-1-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/parse-metric.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/tests/parse-metric.c b/tools/perf/tests/parse-metric.c index 574b7e4efd3a..07b6f4ec024f 100644 --- a/tools/perf/tests/parse-metric.c +++ b/tools/perf/tests/parse-metric.c @@ -109,6 +109,7 @@ static void load_runtime_stat(struct runtime_stat *st, struct evlist *evlist, struct evsel *evsel; u64 count; + perf_stat__reset_shadow_stats(); evlist__for_each_entry(evlist, evsel) { count = find_value(evsel->name, vals); perf_stat__update_shadow_stats(evsel, count, 0, st); From 4747395082abc67c700a75e4cf3b796e79c7cf3a Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 18 Nov 2021 12:17:30 -0800 Subject: [PATCH 219/868] perf header: Fix memory leaks when processing feature headers These leaks were found with leak sanitizer running "perf pipe recording and injection test". In pipe mode feat_fd may hold onto an events struct that needs freeing. When string features are processed they may overwrite an already created string, so free this before the overwrite. Signed-off-by: Ian Rogers Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20211118201730.2302927-1-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/header.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 79cce216727e..e3c1a532d059 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -2321,6 +2321,7 @@ out: #define FEAT_PROCESS_STR_FUN(__feat, __feat_env) \ static int process_##__feat(struct feat_fd *ff, void *data __maybe_unused) \ {\ + free(ff->ph->env.__feat_env); \ ff->ph->env.__feat_env = do_read_string(ff); \ return ff->ph->env.__feat_env ? 0 : -ENOMEM; \ } @@ -4124,6 +4125,7 @@ int perf_event__process_feature(struct perf_session *session, struct perf_record_header_feature *fe = (struct perf_record_header_feature *)event; int type = fe->header.type; u64 feat = fe->feat_id; + int ret = 0; if (type < 0 || type >= PERF_RECORD_HEADER_MAX) { pr_warning("invalid record type %d in pipe-mode\n", type); @@ -4141,11 +4143,13 @@ int perf_event__process_feature(struct perf_session *session, ff.size = event->header.size - sizeof(*fe); ff.ph = &session->header; - if (feat_ops[feat].process(&ff, NULL)) - return -1; + if (feat_ops[feat].process(&ff, NULL)) { + ret = -1; + goto out; + } if (!feat_ops[feat].print || !tool->show_feat_hdr) - return 0; + goto out; if (!feat_ops[feat].full_only || tool->show_feat_hdr >= SHOW_FEAT_HEADER_FULL_INFO) { @@ -4154,8 +4158,9 @@ int perf_event__process_feature(struct perf_session *session, fprintf(stdout, "# %s info available, use -I to display\n", feat_ops[feat].name); } - - return 0; +out: + free_event_desc(ff.events); + return ret; } size_t perf_event__fprintf_event_update(union perf_event *event, FILE *fp) From f7c4e85bccea960203e5872553957511df86913e Mon Sep 17 00:00:00 2001 From: Song Liu Date: Fri, 3 Dec 2021 19:32:34 +0000 Subject: [PATCH 220/868] perf bpf: Fix building perf with BUILD_BPF_SKEL=1 by default in more distros Arnaldo reported that building all his containers with BUILD_BPF_SKEL=1 to then make this the default he found problems in some distros where the system linux/bpf.h file was being used and lacked this: util/bpf_skel/bperf_leader.bpf.c:13:20: error: use of undeclared identifier 'BPF_F_PRESERVE_ELEMS' __uint(map_flags, BPF_F_PRESERVE_ELEMS); So use instead the vmlinux.h file generated by bpftool from BTF info. This fixed these as well, getting the build back working on debian:11, debian:experimental and ubuntu:21.10: In file included from In file included from util/bpf_skel/bperf_leader.bpf.cutil/bpf_skel/bpf_prog_profiler.bpf.c::33: : In file included from In file included from /usr/include/linux/bpf.h/usr/include/linux/bpf.h::1111: : /usr/include/linux/types.h/usr/include/linux/types.h::55::1010:: In file included from util/bpf_skel/bperf_follower.bpf.c:3fatal errorfatal error: : : In file included from /usr/include/linux/bpf.h:'asm/types.h' file not found11'asm/types.h' file not found: /usr/include/linux/types.h:5:10: fatal error: 'asm/types.h' file not found #include #include ^~~~~~~~~~~~~ ^~~~~~~~~~~~~ #include ^~~~~~~~~~~~~ 1 error generated. Reported-by: Arnaldo Carvalho de Melo Signed-off-by: Song Liu Tested-by: Athira Rajeev Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lore.kernel.org/lkml/CF175681-8101-43D1-ABDB-449E644BE986@fb.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/bpf_skel/bperf_follower.bpf.c | 3 +-- tools/perf/util/bpf_skel/bperf_leader.bpf.c | 3 +-- tools/perf/util/bpf_skel/bpf_prog_profiler.bpf.c | 2 +- 3 files changed, 3 insertions(+), 5 deletions(-) diff --git a/tools/perf/util/bpf_skel/bperf_follower.bpf.c b/tools/perf/util/bpf_skel/bperf_follower.bpf.c index b8fa3cb2da23..4a6acfde1493 100644 --- a/tools/perf/util/bpf_skel/bperf_follower.bpf.c +++ b/tools/perf/util/bpf_skel/bperf_follower.bpf.c @@ -1,7 +1,6 @@ // SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) // Copyright (c) 2021 Facebook -#include -#include +#include "vmlinux.h" #include #include #include "bperf.h" diff --git a/tools/perf/util/bpf_skel/bperf_leader.bpf.c b/tools/perf/util/bpf_skel/bperf_leader.bpf.c index 4f70d1459e86..40d962b05863 100644 --- a/tools/perf/util/bpf_skel/bperf_leader.bpf.c +++ b/tools/perf/util/bpf_skel/bperf_leader.bpf.c @@ -1,7 +1,6 @@ // SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) // Copyright (c) 2021 Facebook -#include -#include +#include "vmlinux.h" #include #include #include "bperf.h" diff --git a/tools/perf/util/bpf_skel/bpf_prog_profiler.bpf.c b/tools/perf/util/bpf_skel/bpf_prog_profiler.bpf.c index ab12b4c4ece2..97037d3b3d9f 100644 --- a/tools/perf/util/bpf_skel/bpf_prog_profiler.bpf.c +++ b/tools/perf/util/bpf_skel/bpf_prog_profiler.bpf.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) // Copyright (c) 2020 Facebook -#include +#include "vmlinux.h" #include #include From 5a897531e00243cebbcc4dbe4ab06cd559ccf53f Mon Sep 17 00:00:00 2001 From: Song Liu Date: Fri, 3 Dec 2021 15:14:41 -0800 Subject: [PATCH 221/868] perf bpf_skel: Do not use typedef to avoid error on old clang When building bpf_skel with clang-10, typedef causes confusions like: libbpf: map 'prev_readings': unexpected def kind var. Fix this by removing the typedef. Fixes: 7fac83aaf2eecc9e ("perf stat: Introduce 'bperf' to share hardware PMCs with BPF") Reported-by: Arnaldo Carvalho de Melo Signed-off-by: Song Liu Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lore.kernel.org/lkml/BEF5C312-4331-4A60-AEC0-AD7617CB2BC4@fb.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/bpf_skel/bperf.h | 14 -------------- tools/perf/util/bpf_skel/bperf_follower.bpf.c | 16 +++++++++++++--- tools/perf/util/bpf_skel/bperf_leader.bpf.c | 16 +++++++++++++--- 3 files changed, 26 insertions(+), 20 deletions(-) delete mode 100644 tools/perf/util/bpf_skel/bperf.h diff --git a/tools/perf/util/bpf_skel/bperf.h b/tools/perf/util/bpf_skel/bperf.h deleted file mode 100644 index 186a5551ddb9..000000000000 --- a/tools/perf/util/bpf_skel/bperf.h +++ /dev/null @@ -1,14 +0,0 @@ -// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) -// Copyright (c) 2021 Facebook - -#ifndef __BPERF_STAT_H -#define __BPERF_STAT_H - -typedef struct { - __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); - __uint(key_size, sizeof(__u32)); - __uint(value_size, sizeof(struct bpf_perf_event_value)); - __uint(max_entries, 1); -} reading_map; - -#endif /* __BPERF_STAT_H */ diff --git a/tools/perf/util/bpf_skel/bperf_follower.bpf.c b/tools/perf/util/bpf_skel/bperf_follower.bpf.c index 4a6acfde1493..f193998530d4 100644 --- a/tools/perf/util/bpf_skel/bperf_follower.bpf.c +++ b/tools/perf/util/bpf_skel/bperf_follower.bpf.c @@ -3,11 +3,21 @@ #include "vmlinux.h" #include #include -#include "bperf.h" #include "bperf_u.h" -reading_map diff_readings SEC(".maps"); -reading_map accum_readings SEC(".maps"); +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(struct bpf_perf_event_value)); + __uint(max_entries, 1); +} diff_readings SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(struct bpf_perf_event_value)); + __uint(max_entries, 1); +} accum_readings SEC(".maps"); struct { __uint(type, BPF_MAP_TYPE_HASH); diff --git a/tools/perf/util/bpf_skel/bperf_leader.bpf.c b/tools/perf/util/bpf_skel/bperf_leader.bpf.c index 40d962b05863..e2a2d4cd7779 100644 --- a/tools/perf/util/bpf_skel/bperf_leader.bpf.c +++ b/tools/perf/util/bpf_skel/bperf_leader.bpf.c @@ -3,7 +3,6 @@ #include "vmlinux.h" #include #include -#include "bperf.h" struct { __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); @@ -12,8 +11,19 @@ struct { __uint(map_flags, BPF_F_PRESERVE_ELEMS); } events SEC(".maps"); -reading_map prev_readings SEC(".maps"); -reading_map diff_readings SEC(".maps"); +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(struct bpf_perf_event_value)); + __uint(max_entries, 1); +} prev_readings SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(struct bpf_perf_event_value)); + __uint(max_entries, 1); +} diff_readings SEC(".maps"); SEC("raw_tp/sched_switch") int BPF_PROG(on_switch) From 3fe5185db46fedea7a6852d6a59d6e7cdb5d818a Mon Sep 17 00:00:00 2001 From: Manish Rangankar Date: Fri, 3 Dec 2021 01:52:18 -0800 Subject: [PATCH 222/868] scsi: qedi: Fix cmd_cleanup_cmpl counter mismatch issue When issued LUN reset under heavy I/O we hit the qedi WARN_ON because of a mismatch in firmware I/O cmd cleanup request count and I/O cmd cleanup response count received. The mismatch is because of a race caused by the postfix increment of cmd_cleanup_cmpl. [qedi_clearsq:1295]:18: fatal error, need hard reset, cid=0x0 WARNING: CPU: 48 PID: 110963 at drivers/scsi/qedi/qedi_fw.c:1296 qedi_clearsq+0xa5/0xd0 [qedi] CPU: 48 PID: 110963 Comm: kworker/u130:0 Kdump: loaded Tainted: G W Hardware name: HPE ProLiant DL385 Gen10/ProLiant DL385 Gen10, BIOS A40 04/15/2020 Workqueue: iscsi_conn_cleanup iscsi_cleanup_conn_work_fn [scsi_transport_iscsi] RIP: 0010:qedi_clearsq+0xa5/0xd0 [qedi] RSP: 0018:ffffac2162c7fd98 EFLAGS: 00010246 RAX: 0000000000000000 RBX: ffff975213c40ab8 RCX: 0000000000000000 RDX: 0000000000000000 RSI: ffff9761bf816858 RDI: ffff9761bf816858 RBP: ffff975247018628 R08: 000000000000522c R09: 000000000000005b R10: 0000000000000000 R11: ffffac2162c7fbd8 R12: ffff97522e1b2be8 R13: 0000000000000000 R14: ffff97522e1b2800 R15: 0000000000000001 FS: 0000000000000000(0000) GS:ffff9761bf800000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f1a34e3e1a0 CR3: 0000000108bb2000 CR4: 0000000000350ee0 Call Trace: qedi_ep_disconnect+0x533/0x550 [qedi] ? iscsi_dbg_trace+0x63/0x80 [scsi_transport_iscsi] ? _cond_resched+0x15/0x30 ? iscsi_suspend_queue+0x19/0x40 [libiscsi] iscsi_ep_disconnect+0xb0/0x130 [scsi_transport_iscsi] iscsi_cleanup_conn_work_fn+0x82/0x130 [scsi_transport_iscsi] process_one_work+0x1a7/0x360 ? create_worker+0x1a0/0x1a0 worker_thread+0x30/0x390 ? create_worker+0x1a0/0x1a0 kthread+0x116/0x130 ? kthread_flush_work_fn+0x10/0x10 ret_from_fork+0x22/0x40 ---[ end trace 5f1441f59082235c ]--- Link: https://lore.kernel.org/r/20211203095218.5477-1-mrangankar@marvell.com Reviewed-by: Lee Duncan Reviewed-by: Mike Christie Signed-off-by: Manish Rangankar Signed-off-by: Martin K. Petersen --- drivers/scsi/qedi/qedi_fw.c | 37 ++++++++++++++-------------------- drivers/scsi/qedi/qedi_iscsi.c | 2 +- drivers/scsi/qedi/qedi_iscsi.h | 2 +- 3 files changed, 17 insertions(+), 24 deletions(-) diff --git a/drivers/scsi/qedi/qedi_fw.c b/drivers/scsi/qedi/qedi_fw.c index 84a4204a2cb4..5916ed7662d5 100644 --- a/drivers/scsi/qedi/qedi_fw.c +++ b/drivers/scsi/qedi/qedi_fw.c @@ -732,7 +732,6 @@ static void qedi_process_cmd_cleanup_resp(struct qedi_ctx *qedi, { struct qedi_work_map *work, *work_tmp; u32 proto_itt = cqe->itid; - itt_t protoitt = 0; int found = 0; struct qedi_cmd *qedi_cmd = NULL; u32 iscsi_cid; @@ -812,16 +811,12 @@ unlock: return; check_cleanup_reqs: - if (qedi_conn->cmd_cleanup_req > 0) { - QEDI_INFO(&qedi->dbg_ctx, QEDI_LOG_TID, + if (atomic_inc_return(&qedi_conn->cmd_cleanup_cmpl) == + qedi_conn->cmd_cleanup_req) { + QEDI_INFO(&qedi->dbg_ctx, QEDI_LOG_SCSI_TM, "Freeing tid=0x%x for cid=0x%x\n", cqe->itid, qedi_conn->iscsi_conn_id); - qedi_conn->cmd_cleanup_cmpl++; wake_up(&qedi_conn->wait_queue); - } else { - QEDI_ERR(&qedi->dbg_ctx, - "Delayed or untracked cleanup response, itt=0x%x, tid=0x%x, cid=0x%x\n", - protoitt, cqe->itid, qedi_conn->iscsi_conn_id); } } @@ -1163,7 +1158,7 @@ int qedi_cleanup_all_io(struct qedi_ctx *qedi, struct qedi_conn *qedi_conn, } qedi_conn->cmd_cleanup_req = 0; - qedi_conn->cmd_cleanup_cmpl = 0; + atomic_set(&qedi_conn->cmd_cleanup_cmpl, 0); QEDI_INFO(&qedi->dbg_ctx, QEDI_LOG_SCSI_TM, "active_cmd_count=%d, cid=0x%x, in_recovery=%d, lun_reset=%d\n", @@ -1215,16 +1210,15 @@ int qedi_cleanup_all_io(struct qedi_ctx *qedi, struct qedi_conn *qedi_conn, qedi_conn->iscsi_conn_id); rval = wait_event_interruptible_timeout(qedi_conn->wait_queue, - ((qedi_conn->cmd_cleanup_req == - qedi_conn->cmd_cleanup_cmpl) || - test_bit(QEDI_IN_RECOVERY, - &qedi->flags)), - 5 * HZ); + (qedi_conn->cmd_cleanup_req == + atomic_read(&qedi_conn->cmd_cleanup_cmpl)) || + test_bit(QEDI_IN_RECOVERY, &qedi->flags), + 5 * HZ); if (rval) { QEDI_INFO(&qedi->dbg_ctx, QEDI_LOG_SCSI_TM, "i/o cmd_cleanup_req=%d, equal to cmd_cleanup_cmpl=%d, cid=0x%x\n", qedi_conn->cmd_cleanup_req, - qedi_conn->cmd_cleanup_cmpl, + atomic_read(&qedi_conn->cmd_cleanup_cmpl), qedi_conn->iscsi_conn_id); return 0; @@ -1233,7 +1227,7 @@ int qedi_cleanup_all_io(struct qedi_ctx *qedi, struct qedi_conn *qedi_conn, QEDI_INFO(&qedi->dbg_ctx, QEDI_LOG_SCSI_TM, "i/o cmd_cleanup_req=%d, not equal to cmd_cleanup_cmpl=%d, cid=0x%x\n", qedi_conn->cmd_cleanup_req, - qedi_conn->cmd_cleanup_cmpl, + atomic_read(&qedi_conn->cmd_cleanup_cmpl), qedi_conn->iscsi_conn_id); iscsi_host_for_each_session(qedi->shost, @@ -1242,11 +1236,10 @@ int qedi_cleanup_all_io(struct qedi_ctx *qedi, struct qedi_conn *qedi_conn, /* Enable IOs for all other sessions except current.*/ if (!wait_event_interruptible_timeout(qedi_conn->wait_queue, - (qedi_conn->cmd_cleanup_req == - qedi_conn->cmd_cleanup_cmpl) || - test_bit(QEDI_IN_RECOVERY, - &qedi->flags), - 5 * HZ)) { + (qedi_conn->cmd_cleanup_req == + atomic_read(&qedi_conn->cmd_cleanup_cmpl)) || + test_bit(QEDI_IN_RECOVERY, &qedi->flags), + 5 * HZ)) { iscsi_host_for_each_session(qedi->shost, qedi_mark_device_available); return -1; @@ -1266,7 +1259,7 @@ void qedi_clearsq(struct qedi_ctx *qedi, struct qedi_conn *qedi_conn, qedi_ep = qedi_conn->ep; qedi_conn->cmd_cleanup_req = 0; - qedi_conn->cmd_cleanup_cmpl = 0; + atomic_set(&qedi_conn->cmd_cleanup_cmpl, 0); if (!qedi_ep) { QEDI_WARN(&qedi->dbg_ctx, diff --git a/drivers/scsi/qedi/qedi_iscsi.c b/drivers/scsi/qedi/qedi_iscsi.c index 88aa7d8b11c9..282ecb4e39bb 100644 --- a/drivers/scsi/qedi/qedi_iscsi.c +++ b/drivers/scsi/qedi/qedi_iscsi.c @@ -412,7 +412,7 @@ static int qedi_conn_bind(struct iscsi_cls_session *cls_session, qedi_conn->iscsi_conn_id = qedi_ep->iscsi_cid; qedi_conn->fw_cid = qedi_ep->fw_cid; qedi_conn->cmd_cleanup_req = 0; - qedi_conn->cmd_cleanup_cmpl = 0; + atomic_set(&qedi_conn->cmd_cleanup_cmpl, 0); if (qedi_bind_conn_to_iscsi_cid(qedi, qedi_conn)) { rc = -EINVAL; diff --git a/drivers/scsi/qedi/qedi_iscsi.h b/drivers/scsi/qedi/qedi_iscsi.h index a282860da0aa..9b9f2e44fdde 100644 --- a/drivers/scsi/qedi/qedi_iscsi.h +++ b/drivers/scsi/qedi/qedi_iscsi.h @@ -155,7 +155,7 @@ struct qedi_conn { spinlock_t list_lock; /* internal conn lock */ u32 active_cmd_count; u32 cmd_cleanup_req; - u32 cmd_cleanup_cmpl; + atomic_t cmd_cleanup_cmpl; u32 iscsi_conn_id; int itt; From 7db0e0c8190a086ef92ce5bb960836cde49540aa Mon Sep 17 00:00:00 2001 From: Shin'ichiro Kawasaki Date: Tue, 7 Dec 2021 10:06:38 +0900 Subject: [PATCH 223/868] scsi: scsi_debug: Fix buffer size of REPORT ZONES command According to ZBC and SPC specifications, the unit of ALLOCATION LENGTH field of REPORT ZONES command is byte. However, current scsi_debug implementation handles it as number of zones to calculate buffer size to report zones. When the ALLOCATION LENGTH has a large number, this results in too large buffer size and causes memory allocation failure. Fix the failure by handling ALLOCATION LENGTH as byte unit. Link: https://lore.kernel.org/r/20211207010638.124280-1-shinichiro.kawasaki@wdc.com Fixes: f0d1cf9378bd ("scsi: scsi_debug: Add ZBC zone commands") Reviewed-by: Damien Le Moal Signed-off-by: Shin'ichiro Kawasaki Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_debug.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c index 3c0da3770edf..2104973a35cd 100644 --- a/drivers/scsi/scsi_debug.c +++ b/drivers/scsi/scsi_debug.c @@ -4342,7 +4342,7 @@ static int resp_report_zones(struct scsi_cmnd *scp, rep_max_zones = min((alloc_len - 64) >> ilog2(RZONES_DESC_HD), max_zones); - arr = kcalloc(RZONES_DESC_HD, alloc_len, GFP_ATOMIC); + arr = kzalloc(alloc_len, GFP_ATOMIC); if (!arr) { mk_sense_buffer(scp, ILLEGAL_REQUEST, INSUFF_RES_ASC, INSUFF_RES_ASCQ); From 69002c8ce914ef0ae22a6ea14b43bb30b9a9a6a8 Mon Sep 17 00:00:00 2001 From: Roman Bolshakov Date: Fri, 12 Nov 2021 17:54:46 +0300 Subject: [PATCH 224/868] scsi: qla2xxx: Format log strings only if needed Commit 598a90f2002c ("scsi: qla2xxx: add ring buffer for tracing debug logs") introduced unconditional log string formatting to ql_dbg() even if ql_dbg_log event is disabled. It harms performance because some strings are formatted in fastpath and/or interrupt context. Link: https://lore.kernel.org/r/20211112145446.51210-1-r.bolshakov@yadro.com Fixes: 598a90f2002c ("scsi: qla2xxx: add ring buffer for tracing debug logs") Cc: Rajan Shanmugavelu Cc: stable@vger.kernel.org Signed-off-by: Roman Bolshakov Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/qla_dbg.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/scsi/qla2xxx/qla_dbg.c b/drivers/scsi/qla2xxx/qla_dbg.c index 25549a8a2d72..7cf1f78cbaee 100644 --- a/drivers/scsi/qla2xxx/qla_dbg.c +++ b/drivers/scsi/qla2xxx/qla_dbg.c @@ -2491,6 +2491,9 @@ ql_dbg(uint level, scsi_qla_host_t *vha, uint id, const char *fmt, ...) struct va_format vaf; char pbuf[64]; + if (!ql_mask_match(level) && !trace_ql_dbg_log_enabled()) + return; + va_start(va, fmt); vaf.fmt = fmt; From 44ee250aeeabb28b52a10397ac17ffb8bfe94839 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Samuel=20=C4=8Cavoj?= Date: Sat, 4 Dec 2021 13:17:36 -0800 Subject: [PATCH 225/868] Input: i8042 - enable deferred probe quirk for ASUS UM325UA MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The ASUS UM325UA suffers from the same issue as the ASUS UX425UA, which is a very similar laptop. The i8042 device is not usable immediately after boot and fails to initialize, requiring a deferred retry. Enable the deferred probe quirk for the UM325UA. BugLink: https://bugzilla.suse.com/show_bug.cgi?id=1190256 Signed-off-by: Samuel Čavoj Link: https://lore.kernel.org/r/20211204015615.232948-1-samuel@cavoj.net Signed-off-by: Dmitry Torokhov --- drivers/input/serio/i8042-x86ia64io.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/input/serio/i8042-x86ia64io.h b/drivers/input/serio/i8042-x86ia64io.h index 1acc7c844929..148a7c5fd0e2 100644 --- a/drivers/input/serio/i8042-x86ia64io.h +++ b/drivers/input/serio/i8042-x86ia64io.h @@ -1003,6 +1003,13 @@ static const struct dmi_system_id i8042_dmi_probe_defer_table[] __initconst = { DMI_MATCH(DMI_PRODUCT_NAME, "ZenBook UX425UA"), }, }, + { + /* ASUS ZenBook UM325UA */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), + DMI_MATCH(DMI_PRODUCT_NAME, "ZenBook UX325UA_UM325UA"), + }, + }, { } }; From a2fd46cd3dbb83b373ba74f4043f8dae869c65f1 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 6 Dec 2021 23:15:09 -0800 Subject: [PATCH 226/868] Input: goodix - try not to touch the reset-pin on x86/ACPI devices Unless the controller is not responding at boot or after suspend/resume, the driver never resets the controller on x86/ACPI platforms. The driver still requesting the reset pin at probe() though in case it needs it. Until now the driver has always requested the reset pin with GPIOD_IN as type. The idea being to put the pin in high-impedance mode to save power until the driver actually wants to issue a reset. But this means that just requesting the pin can cause issues, since requesting it in another mode then GPIOD_ASIS may cause the pinctrl driver to touch the pin settings. We have already had issues before due to a bug in the pinctrl-cherryview.c driver which has been fixed in commit 921daeeca91b ("pinctrl: cherryview: Preserve CHV_PADCTRL1_INVRXTX_TXDATA flag on GPIOs"). And now it turns out that requesting the reset-pin as GPIOD_IN also stops the touchscreen from working on the GPD P2 max mini-laptop. The behavior of putting the pin in high-impedance mode relies on there being some external pull-up to keep it high and there seems to be no pull-up on the GPD P2 max, causing things to break. This commit fixes this by requesting the reset pin as is when using the x86/ACPI code paths to lookup the GPIOs; and by not dropping it back into input-mode in case the driver does end up issuing a reset for error-recovery. BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=209061 Fixes: a7d4b171660c ("Input: goodix - add support for getting IRQ + reset GPIOs on Cherry Trail devices") Cc: stable@vger.kernel.org Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20211206091116.44466-2-hdegoede@redhat.com Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/goodix.c | 30 +++++++++++++++++++++++++----- drivers/input/touchscreen/goodix.h | 1 + 2 files changed, 26 insertions(+), 5 deletions(-) diff --git a/drivers/input/touchscreen/goodix.c b/drivers/input/touchscreen/goodix.c index 906b5a6b52d1..e7efc32043e7 100644 --- a/drivers/input/touchscreen/goodix.c +++ b/drivers/input/touchscreen/goodix.c @@ -695,10 +695,16 @@ int goodix_reset_no_int_sync(struct goodix_ts_data *ts) usleep_range(6000, 10000); /* T4: > 5ms */ - /* end select I2C slave addr */ - error = gpiod_direction_input(ts->gpiod_rst); - if (error) - goto error; + /* + * Put the reset pin back in to input / high-impedance mode to save + * power. Only do this in the non ACPI case since some ACPI boards + * don't have a pull-up, so there the reset pin must stay active-high. + */ + if (ts->irq_pin_access_method == IRQ_PIN_ACCESS_GPIO) { + error = gpiod_direction_input(ts->gpiod_rst); + if (error) + goto error; + } return 0; @@ -832,6 +838,14 @@ static int goodix_add_acpi_gpio_mappings(struct goodix_ts_data *ts) return -EINVAL; } + /* + * Normally we put the reset pin in input / high-impedance mode to save + * power. But some x86/ACPI boards don't have a pull-up, so for the ACPI + * case, leave the pin as is. This results in the pin not being touched + * at all on x86/ACPI boards, except when needed for error-recover. + */ + ts->gpiod_rst_flags = GPIOD_ASIS; + return devm_acpi_dev_add_driver_gpios(dev, gpio_mapping); } #else @@ -857,6 +871,12 @@ static int goodix_get_gpio_config(struct goodix_ts_data *ts) return -EINVAL; dev = &ts->client->dev; + /* + * By default we request the reset pin as input, leaving it in + * high-impedance when not resetting the controller to save power. + */ + ts->gpiod_rst_flags = GPIOD_IN; + ts->avdd28 = devm_regulator_get(dev, "AVDD28"); if (IS_ERR(ts->avdd28)) { error = PTR_ERR(ts->avdd28); @@ -894,7 +914,7 @@ retry_get_irq_gpio: ts->gpiod_int = gpiod; /* Get the reset line GPIO pin number */ - gpiod = devm_gpiod_get_optional(dev, GOODIX_GPIO_RST_NAME, GPIOD_IN); + gpiod = devm_gpiod_get_optional(dev, GOODIX_GPIO_RST_NAME, ts->gpiod_rst_flags); if (IS_ERR(gpiod)) { error = PTR_ERR(gpiod); if (error != -EPROBE_DEFER) diff --git a/drivers/input/touchscreen/goodix.h b/drivers/input/touchscreen/goodix.h index 62138f930d1a..02065d1c3263 100644 --- a/drivers/input/touchscreen/goodix.h +++ b/drivers/input/touchscreen/goodix.h @@ -87,6 +87,7 @@ struct goodix_ts_data { struct gpio_desc *gpiod_rst; int gpio_count; int gpio_int_idx; + enum gpiod_flags gpiod_rst_flags; char id[GOODIX_ID_MAX_LEN + 1]; char cfg_name[64]; u16 version; From 81e818869be522bc8fa6f7df1b92d7e76537926c Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 6 Dec 2021 23:29:27 -0800 Subject: [PATCH 227/868] Input: goodix - add id->model mapping for the "9111" model Add d->model mapping for the "9111" model, this fixes uses using a wrong config_len of 240 bytes while the "9111" model uses only 186 bytes of config. Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20211206164747.197309-2-hdegoede@redhat.com Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/goodix.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/input/touchscreen/goodix.c b/drivers/input/touchscreen/goodix.c index e7efc32043e7..87263eb9e5a4 100644 --- a/drivers/input/touchscreen/goodix.c +++ b/drivers/input/touchscreen/goodix.c @@ -102,6 +102,7 @@ static const struct goodix_chip_id goodix_chip_ids[] = { { .id = "911", .data = >911_chip_data }, { .id = "9271", .data = >911_chip_data }, { .id = "9110", .data = >911_chip_data }, + { .id = "9111", .data = >911_chip_data }, { .id = "927", .data = >911_chip_data }, { .id = "928", .data = >911_chip_data }, From d7f32791a9fcf0dae8b073cdea9b79e29098c5f4 Mon Sep 17 00:00:00 2001 From: Kailang Yang Date: Tue, 23 Nov 2021 16:32:44 +0800 Subject: [PATCH 228/868] ALSA: hda/realtek - Add headset Mic support for Lenovo ALC897 platform Lenovo ALC897 platform had headset Mic. This patch enable supported headset Mic. Signed-off-by: Kailang Yang Cc: Link: https://lore.kernel.org/r/baab2c2536cb4cc18677a862c6f6d840@realtek.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 40 +++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index d361a1260d5a..3599f4c85ebf 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -10235,6 +10235,27 @@ static void alc671_fixup_hp_headset_mic2(struct hda_codec *codec, } } +static void alc897_hp_automute_hook(struct hda_codec *codec, + struct hda_jack_callback *jack) +{ + struct alc_spec *spec = codec->spec; + int vref; + + snd_hda_gen_hp_automute(codec, jack); + vref = spec->gen.hp_jack_present ? (PIN_HP | AC_PINCTL_VREF_100) : PIN_HP; + snd_hda_codec_write(codec, 0x1b, 0, AC_VERB_SET_PIN_WIDGET_CONTROL, + vref); +} + +static void alc897_fixup_lenovo_headset_mic(struct hda_codec *codec, + const struct hda_fixup *fix, int action) +{ + struct alc_spec *spec = codec->spec; + if (action == HDA_FIXUP_ACT_PRE_PROBE) { + spec->gen.hp_automute_hook = alc897_hp_automute_hook; + } +} + static const struct coef_fw alc668_coefs[] = { WRITE_COEF(0x01, 0xbebe), WRITE_COEF(0x02, 0xaaaa), WRITE_COEF(0x03, 0x0), WRITE_COEF(0x04, 0x0180), WRITE_COEF(0x06, 0x0), WRITE_COEF(0x07, 0x0f80), @@ -10315,6 +10336,8 @@ enum { ALC668_FIXUP_ASUS_NO_HEADSET_MIC, ALC668_FIXUP_HEADSET_MIC, ALC668_FIXUP_MIC_DET_COEF, + ALC897_FIXUP_LENOVO_HEADSET_MIC, + ALC897_FIXUP_HEADSET_MIC_PIN, }; static const struct hda_fixup alc662_fixups[] = { @@ -10721,6 +10744,19 @@ static const struct hda_fixup alc662_fixups[] = { {} }, }, + [ALC897_FIXUP_LENOVO_HEADSET_MIC] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc897_fixup_lenovo_headset_mic, + }, + [ALC897_FIXUP_HEADSET_MIC_PIN] = { + .type = HDA_FIXUP_PINS, + .v.pins = (const struct hda_pintbl[]) { + { 0x1a, 0x03a11050 }, + { } + }, + .chained = true, + .chain_id = ALC897_FIXUP_LENOVO_HEADSET_MIC + }, }; static const struct snd_pci_quirk alc662_fixup_tbl[] = { @@ -10765,6 +10801,10 @@ static const struct snd_pci_quirk alc662_fixup_tbl[] = { SND_PCI_QUIRK(0x144d, 0xc051, "Samsung R720", ALC662_FIXUP_IDEAPAD), SND_PCI_QUIRK(0x14cd, 0x5003, "USI", ALC662_FIXUP_USI_HEADSET_MODE), SND_PCI_QUIRK(0x17aa, 0x1036, "Lenovo P520", ALC662_FIXUP_LENOVO_MULTI_CODECS), + SND_PCI_QUIRK(0x17aa, 0x32ca, "Lenovo ThinkCentre M80", ALC897_FIXUP_HEADSET_MIC_PIN), + SND_PCI_QUIRK(0x17aa, 0x32cb, "Lenovo ThinkCentre M70", ALC897_FIXUP_HEADSET_MIC_PIN), + SND_PCI_QUIRK(0x17aa, 0x32cf, "Lenovo ThinkCentre M950", ALC897_FIXUP_HEADSET_MIC_PIN), + SND_PCI_QUIRK(0x17aa, 0x32f7, "Lenovo ThinkCentre M90", ALC897_FIXUP_HEADSET_MIC_PIN), SND_PCI_QUIRK(0x17aa, 0x38af, "Lenovo Ideapad Y550P", ALC662_FIXUP_IDEAPAD), SND_PCI_QUIRK(0x17aa, 0x3a0d, "Lenovo Ideapad Y550", ALC662_FIXUP_IDEAPAD), SND_PCI_QUIRK(0x1849, 0x5892, "ASRock B150M", ALC892_FIXUP_ASROCK_MOBO), From 94cddf1e9227a171b27292509d59691819c458db Mon Sep 17 00:00:00 2001 From: Vincent Mailhol Date: Tue, 23 Nov 2021 20:16:54 +0900 Subject: [PATCH 229/868] can: pch_can: pch_can_rx_normal: fix use after free After calling netif_receive_skb(skb), dereferencing skb is unsafe. Especially, the can_frame cf which aliases skb memory is dereferenced just after the call netif_receive_skb(skb). Reordering the lines solves the issue. Fixes: b21d18b51b31 ("can: Topcliff: Add PCH_CAN driver.") Link: https://lore.kernel.org/all/20211123111654.621610-1-mailhol.vincent@wanadoo.fr Cc: stable@vger.kernel.org Signed-off-by: Vincent Mailhol Signed-off-by: Marc Kleine-Budde --- drivers/net/can/pch_can.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/can/pch_can.c b/drivers/net/can/pch_can.c index 92a54a5fd4c5..964c8a09226a 100644 --- a/drivers/net/can/pch_can.c +++ b/drivers/net/can/pch_can.c @@ -692,11 +692,11 @@ static int pch_can_rx_normal(struct net_device *ndev, u32 obj_num, int quota) cf->data[i + 1] = data_reg >> 8; } - netif_receive_skb(skb); rcv_pkts++; stats->rx_packets++; quota--; stats->rx_bytes += cf->len; + netif_receive_skb(skb); pch_fifo_thresh(priv, obj_num); obj_num++; From 3ec6ca6b1a8e64389f0212b5a1b0f6fed1909e45 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 24 Nov 2021 17:50:41 +0300 Subject: [PATCH 230/868] can: sja1000: fix use after free in ems_pcmcia_add_card() If the last channel is not available then "dev" is freed. Fortunately, we can just use "pdev->irq" instead. Also we should check if at least one channel was set up. Fixes: fd734c6f25ae ("can/sja1000: add driver for EMS PCMCIA card") Link: https://lore.kernel.org/all/20211124145041.GB13656@kili Cc: stable@vger.kernel.org Signed-off-by: Dan Carpenter Acked-by: Oliver Hartkopp Tested-by: Oliver Hartkopp Signed-off-by: Marc Kleine-Budde --- drivers/net/can/sja1000/ems_pcmcia.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/net/can/sja1000/ems_pcmcia.c b/drivers/net/can/sja1000/ems_pcmcia.c index e21b169c14c0..4642b6d4aaf7 100644 --- a/drivers/net/can/sja1000/ems_pcmcia.c +++ b/drivers/net/can/sja1000/ems_pcmcia.c @@ -234,7 +234,12 @@ static int ems_pcmcia_add_card(struct pcmcia_device *pdev, unsigned long base) free_sja1000dev(dev); } - err = request_irq(dev->irq, &ems_pcmcia_interrupt, IRQF_SHARED, + if (!card->channels) { + err = -ENODEV; + goto failure_cleanup; + } + + err = request_irq(pdev->irq, &ems_pcmcia_interrupt, IRQF_SHARED, DRV_NAME, card); if (!err) return 0; From f58ac1adc76b5beda43c64ef359056077df4d93a Mon Sep 17 00:00:00 2001 From: Brian Silverman Date: Mon, 29 Nov 2021 14:26:28 -0800 Subject: [PATCH 231/868] can: m_can: Disable and ignore ELO interrupt With the design of this driver, this condition is often triggered. However, the counter that this interrupt indicates an overflow is never read either, so overflowing is harmless. On my system, when a CAN bus starts flapping up and down, this locks up the whole system with lots of interrupts and printks. Specifically, this interrupt indicates the CEL field of ECR has overflowed. All reads of ECR mask out CEL. Fixes: e0d1f4816f2a ("can: m_can: add Bosch M_CAN controller support") Link: https://lore.kernel.org/all/20211129222628.7490-1-brian.silverman@bluerivertech.com Cc: stable@vger.kernel.org Signed-off-by: Brian Silverman Signed-off-by: Marc Kleine-Budde --- drivers/net/can/m_can/m_can.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c index 2470c47b2e31..91be87c4f4d3 100644 --- a/drivers/net/can/m_can/m_can.c +++ b/drivers/net/can/m_can/m_can.c @@ -204,16 +204,16 @@ enum m_can_reg { /* Interrupts for version 3.0.x */ #define IR_ERR_LEC_30X (IR_STE | IR_FOE | IR_ACKE | IR_BE | IR_CRCE) -#define IR_ERR_BUS_30X (IR_ERR_LEC_30X | IR_WDI | IR_ELO | IR_BEU | \ - IR_BEC | IR_TOO | IR_MRAF | IR_TSW | IR_TEFL | \ - IR_RF1L | IR_RF0L) +#define IR_ERR_BUS_30X (IR_ERR_LEC_30X | IR_WDI | IR_BEU | IR_BEC | \ + IR_TOO | IR_MRAF | IR_TSW | IR_TEFL | IR_RF1L | \ + IR_RF0L) #define IR_ERR_ALL_30X (IR_ERR_STATE | IR_ERR_BUS_30X) /* Interrupts for version >= 3.1.x */ #define IR_ERR_LEC_31X (IR_PED | IR_PEA) -#define IR_ERR_BUS_31X (IR_ERR_LEC_31X | IR_WDI | IR_ELO | IR_BEU | \ - IR_BEC | IR_TOO | IR_MRAF | IR_TSW | IR_TEFL | \ - IR_RF1L | IR_RF0L) +#define IR_ERR_BUS_31X (IR_ERR_LEC_31X | IR_WDI | IR_BEU | IR_BEC | \ + IR_TOO | IR_MRAF | IR_TSW | IR_TEFL | IR_RF1L | \ + IR_RF0L) #define IR_ERR_ALL_31X (IR_ERR_STATE | IR_ERR_BUS_31X) /* Interrupt Line Select (ILS) */ @@ -810,8 +810,6 @@ static void m_can_handle_other_err(struct net_device *dev, u32 irqstatus) { if (irqstatus & IR_WDI) netdev_err(dev, "Message RAM Watchdog event due to missing READY\n"); - if (irqstatus & IR_ELO) - netdev_err(dev, "Error Logging Overflow\n"); if (irqstatus & IR_BEU) netdev_err(dev, "Bit Error Uncorrected\n"); if (irqstatus & IR_BEC) From 31cb32a590d62b18f69a9a6d433f4e69c74fdd56 Mon Sep 17 00:00:00 2001 From: Vincent Mailhol Date: Sun, 7 Nov 2021 14:07:55 +0900 Subject: [PATCH 232/868] can: m_can: m_can_read_fifo: fix memory leak in error branch In m_can_read_fifo(), if the second call to m_can_fifo_read() fails, the function jump to the out_fail label and returns without calling m_can_receive_skb(). This means that the skb previously allocated by alloc_can_skb() is not freed. In other terms, this is a memory leak. This patch adds a goto label to destroy the skb if an error occurs. Issue was found with GCC -fanalyzer, please follow the link below for details. Fixes: e39381770ec9 ("can: m_can: Disable IRQs on FIFO bus errors") Link: https://lore.kernel.org/all/20211107050755.70655-1-mailhol.vincent@wanadoo.fr Cc: stable@vger.kernel.org Cc: Matt Kline Signed-off-by: Vincent Mailhol Signed-off-by: Marc Kleine-Budde --- drivers/net/can/m_can/m_can.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c index 91be87c4f4d3..e330b4c121bf 100644 --- a/drivers/net/can/m_can/m_can.c +++ b/drivers/net/can/m_can/m_can.c @@ -517,7 +517,7 @@ static int m_can_read_fifo(struct net_device *dev, u32 rxfs) err = m_can_fifo_read(cdev, fgi, M_CAN_FIFO_DATA, cf->data, DIV_ROUND_UP(cf->len, 4)); if (err) - goto out_fail; + goto out_free_skb; } /* acknowledge rx fifo 0 */ @@ -532,6 +532,8 @@ static int m_can_read_fifo(struct net_device *dev, u32 rxfs) return 0; +out_free_skb: + kfree_skb(skb); out_fail: netdev_err(dev, "FIFO read returned %d\n", err); return err; From d737de2d7cc3efdacbf17d4e22efc75697bd76d9 Mon Sep 17 00:00:00 2001 From: Matthias Schiffer Date: Thu, 18 Nov 2021 15:40:11 +0100 Subject: [PATCH 233/868] can: m_can: pci: fix iomap_read_fifo() and iomap_write_fifo() The same fix that was previously done in m_can_platform in commit 99d173fbe894 ("can: m_can: fix iomap_read_fifo() and iomap_write_fifo()") is required in m_can_pci as well to make iomap_read_fifo() and iomap_write_fifo() work for val_count > 1. Fixes: 812270e5445b ("can: m_can: Batch FIFO writes during CAN transmit") Fixes: 1aa6772f64b4 ("can: m_can: Batch FIFO reads during CAN receive") Link: https://lore.kernel.org/all/20211118144011.10921-1-matthias.schiffer@ew.tq-group.com Cc: stable@vger.kernel.org Cc: Matt Kline Signed-off-by: Matthias Schiffer Tested-by: Jarkko Nikula Signed-off-by: Marc Kleine-Budde --- drivers/net/can/m_can/m_can_pci.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/net/can/m_can/m_can_pci.c b/drivers/net/can/m_can/m_can_pci.c index 89cc3d41e952..d72c294ac4d3 100644 --- a/drivers/net/can/m_can/m_can_pci.c +++ b/drivers/net/can/m_can/m_can_pci.c @@ -42,8 +42,13 @@ static u32 iomap_read_reg(struct m_can_classdev *cdev, int reg) static int iomap_read_fifo(struct m_can_classdev *cdev, int offset, void *val, size_t val_count) { struct m_can_pci_priv *priv = cdev_to_priv(cdev); + void __iomem *src = priv->base + offset; - ioread32_rep(priv->base + offset, val, val_count); + while (val_count--) { + *(unsigned int *)val = ioread32(src); + val += 4; + src += 4; + } return 0; } @@ -61,8 +66,13 @@ static int iomap_write_fifo(struct m_can_classdev *cdev, int offset, const void *val, size_t val_count) { struct m_can_pci_priv *priv = cdev_to_priv(cdev); + void __iomem *dst = priv->base + offset; - iowrite32_rep(priv->base + offset, val, val_count); + while (val_count--) { + iowrite32(*(unsigned int *)val, dst); + val += 4; + dst += 4; + } return 0; } From 8c03b8bff765ac4146342ef90931bb50e788c758 Mon Sep 17 00:00:00 2001 From: Matthias Schiffer Date: Mon, 15 Nov 2021 10:18:49 +0100 Subject: [PATCH 234/868] can: m_can: pci: fix incorrect reference clock rate When testing the CAN controller on our Ekhart Lake hardware, we determined that all communication was running with twice the configured bitrate. Changing the reference clock rate from 100MHz to 200MHz fixed this. Intel's support has confirmed to us that 200MHz is indeed the correct clock rate. Fixes: cab7ffc0324f ("can: m_can: add PCI glue driver for Intel Elkhart Lake") Link: https://lore.kernel.org/all/c9cf3995f45c363e432b3ae8eb1275e54f009fc8.1636967198.git.matthias.schiffer@ew.tq-group.com Cc: stable@vger.kernel.org Signed-off-by: Matthias Schiffer Acked-by: Jarkko Nikula Reviewed-by: Jarkko Nikula Signed-off-by: Marc Kleine-Budde --- drivers/net/can/m_can/m_can_pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/can/m_can/m_can_pci.c b/drivers/net/can/m_can/m_can_pci.c index d72c294ac4d3..8f184a852a0a 100644 --- a/drivers/net/can/m_can/m_can_pci.c +++ b/drivers/net/can/m_can/m_can_pci.c @@ -18,7 +18,7 @@ #define M_CAN_PCI_MMIO_BAR 0 -#define M_CAN_CLOCK_FREQ_EHL 100000000 +#define M_CAN_CLOCK_FREQ_EHL 200000000 #define CTL_CSR_INT_CTL_OFFSET 0x508 struct m_can_pci_priv { From ea768b2ffec6cc9c3e17c37ef75d0539b8f89ff5 Mon Sep 17 00:00:00 2001 From: Matthias Schiffer Date: Mon, 15 Nov 2021 10:18:50 +0100 Subject: [PATCH 235/868] Revert "can: m_can: remove support for custom bit timing" The timing limits specified by the Elkhart Lake CPU datasheets do not match the defaults. Let's reintroduce the support for custom bit timings. This reverts commit 0ddd83fbebbc5537f9d180d31f659db3564be708. Link: https://lore.kernel.org/all/00c9e2596b1a548906921a574d4ef7a03c0dace0.1636967198.git.matthias.schiffer@ew.tq-group.com Signed-off-by: Matthias Schiffer Signed-off-by: Marc Kleine-Budde --- drivers/net/can/m_can/m_can.c | 24 ++++++++++++++++++------ drivers/net/can/m_can/m_can.h | 3 +++ 2 files changed, 21 insertions(+), 6 deletions(-) diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c index e330b4c121bf..c2a8421e7845 100644 --- a/drivers/net/can/m_can/m_can.c +++ b/drivers/net/can/m_can/m_can.c @@ -1494,20 +1494,32 @@ static int m_can_dev_setup(struct m_can_classdev *cdev) case 30: /* CAN_CTRLMODE_FD_NON_ISO is fixed with M_CAN IP v3.0.x */ can_set_static_ctrlmode(dev, CAN_CTRLMODE_FD_NON_ISO); - cdev->can.bittiming_const = &m_can_bittiming_const_30X; - cdev->can.data_bittiming_const = &m_can_data_bittiming_const_30X; + cdev->can.bittiming_const = cdev->bit_timing ? + cdev->bit_timing : &m_can_bittiming_const_30X; + + cdev->can.data_bittiming_const = cdev->data_timing ? + cdev->data_timing : + &m_can_data_bittiming_const_30X; break; case 31: /* CAN_CTRLMODE_FD_NON_ISO is fixed with M_CAN IP v3.1.x */ can_set_static_ctrlmode(dev, CAN_CTRLMODE_FD_NON_ISO); - cdev->can.bittiming_const = &m_can_bittiming_const_31X; - cdev->can.data_bittiming_const = &m_can_data_bittiming_const_31X; + cdev->can.bittiming_const = cdev->bit_timing ? + cdev->bit_timing : &m_can_bittiming_const_31X; + + cdev->can.data_bittiming_const = cdev->data_timing ? + cdev->data_timing : + &m_can_data_bittiming_const_31X; break; case 32: case 33: /* Support both MCAN version v3.2.x and v3.3.0 */ - cdev->can.bittiming_const = &m_can_bittiming_const_31X; - cdev->can.data_bittiming_const = &m_can_data_bittiming_const_31X; + cdev->can.bittiming_const = cdev->bit_timing ? + cdev->bit_timing : &m_can_bittiming_const_31X; + + cdev->can.data_bittiming_const = cdev->data_timing ? + cdev->data_timing : + &m_can_data_bittiming_const_31X; cdev->can.ctrlmode_supported |= (m_can_niso_supported(cdev) ? diff --git a/drivers/net/can/m_can/m_can.h b/drivers/net/can/m_can/m_can.h index d18b515e6ccc..ad063b101411 100644 --- a/drivers/net/can/m_can/m_can.h +++ b/drivers/net/can/m_can/m_can.h @@ -85,6 +85,9 @@ struct m_can_classdev { struct sk_buff *tx_skb; struct phy *transceiver; + struct can_bittiming_const *bit_timing; + struct can_bittiming_const *data_timing; + struct m_can_ops *ops; int version; From ea22ba40debee29ee7257c42002409899e9311c1 Mon Sep 17 00:00:00 2001 From: Matthias Schiffer Date: Mon, 15 Nov 2021 10:18:51 +0100 Subject: [PATCH 236/868] can: m_can: make custom bittiming fields const The assigned timing structs will be defined a const anyway, so we can avoid a few casts by declaring the struct fields as const as well. Link: https://lore.kernel.org/all/4508fa4e639164b2584c49a065d90c78a91fa568.1636967198.git.matthias.schiffer@ew.tq-group.com Signed-off-by: Matthias Schiffer Signed-off-by: Marc Kleine-Budde --- drivers/net/can/m_can/m_can.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/can/m_can/m_can.h b/drivers/net/can/m_can/m_can.h index ad063b101411..2c5d40997168 100644 --- a/drivers/net/can/m_can/m_can.h +++ b/drivers/net/can/m_can/m_can.h @@ -85,8 +85,8 @@ struct m_can_classdev { struct sk_buff *tx_skb; struct phy *transceiver; - struct can_bittiming_const *bit_timing; - struct can_bittiming_const *data_timing; + const struct can_bittiming_const *bit_timing; + const struct can_bittiming_const *data_timing; struct m_can_ops *ops; From ea4c1787685dbf9842046f05b6390b6901ee6ba2 Mon Sep 17 00:00:00 2001 From: Matthias Schiffer Date: Mon, 15 Nov 2021 10:18:52 +0100 Subject: [PATCH 237/868] can: m_can: pci: use custom bit timings for Elkhart Lake MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The relevant datasheet [1] specifies nonstandard limits for the bit timing parameters. While it is unclear what the exact effect of violating these limits is, it seems like a good idea to adhere to the documentation. [1] Intel Atom® x6000E Series, and Intel® Pentium® and Celeron® N and J Series Processors for IoT Applications Datasheet, Volume 2 (Book 3 of 3), July 2021, Revision 001 Fixes: cab7ffc0324f ("can: m_can: add PCI glue driver for Intel Elkhart Lake") Link: https://lore.kernel.org/all/9eba5d7c05a48ead4024ffa6e5926f191d8c6b38.1636967198.git.matthias.schiffer@ew.tq-group.com Signed-off-by: Matthias Schiffer Signed-off-by: Marc Kleine-Budde --- drivers/net/can/m_can/m_can_pci.c | 48 ++++++++++++++++++++++++++++--- 1 file changed, 44 insertions(+), 4 deletions(-) diff --git a/drivers/net/can/m_can/m_can_pci.c b/drivers/net/can/m_can/m_can_pci.c index 8f184a852a0a..b56a54d6c5a9 100644 --- a/drivers/net/can/m_can/m_can_pci.c +++ b/drivers/net/can/m_can/m_can_pci.c @@ -18,9 +18,14 @@ #define M_CAN_PCI_MMIO_BAR 0 -#define M_CAN_CLOCK_FREQ_EHL 200000000 #define CTL_CSR_INT_CTL_OFFSET 0x508 +struct m_can_pci_config { + const struct can_bittiming_const *bit_timing; + const struct can_bittiming_const *data_timing; + unsigned int clock_freq; +}; + struct m_can_pci_priv { struct m_can_classdev cdev; @@ -84,9 +89,40 @@ static struct m_can_ops m_can_pci_ops = { .read_fifo = iomap_read_fifo, }; +static const struct can_bittiming_const m_can_bittiming_const_ehl = { + .name = KBUILD_MODNAME, + .tseg1_min = 2, /* Time segment 1 = prop_seg + phase_seg1 */ + .tseg1_max = 64, + .tseg2_min = 1, /* Time segment 2 = phase_seg2 */ + .tseg2_max = 128, + .sjw_max = 128, + .brp_min = 1, + .brp_max = 512, + .brp_inc = 1, +}; + +static const struct can_bittiming_const m_can_data_bittiming_const_ehl = { + .name = KBUILD_MODNAME, + .tseg1_min = 2, /* Time segment 1 = prop_seg + phase_seg1 */ + .tseg1_max = 16, + .tseg2_min = 1, /* Time segment 2 = phase_seg2 */ + .tseg2_max = 8, + .sjw_max = 4, + .brp_min = 1, + .brp_max = 32, + .brp_inc = 1, +}; + +static const struct m_can_pci_config m_can_pci_ehl = { + .bit_timing = &m_can_bittiming_const_ehl, + .data_timing = &m_can_data_bittiming_const_ehl, + .clock_freq = 200000000, +}; + static int m_can_pci_probe(struct pci_dev *pci, const struct pci_device_id *id) { struct device *dev = &pci->dev; + const struct m_can_pci_config *cfg; struct m_can_classdev *mcan_class; struct m_can_pci_priv *priv; void __iomem *base; @@ -114,6 +150,8 @@ static int m_can_pci_probe(struct pci_dev *pci, const struct pci_device_id *id) if (!mcan_class) return -ENOMEM; + cfg = (const struct m_can_pci_config *)id->driver_data; + priv = cdev_to_priv(mcan_class); priv->base = base; @@ -125,7 +163,9 @@ static int m_can_pci_probe(struct pci_dev *pci, const struct pci_device_id *id) mcan_class->dev = &pci->dev; mcan_class->net->irq = pci_irq_vector(pci, 0); mcan_class->pm_clock_support = 1; - mcan_class->can.clock.freq = id->driver_data; + mcan_class->bit_timing = cfg->bit_timing; + mcan_class->data_timing = cfg->data_timing; + mcan_class->can.clock.freq = cfg->clock_freq; mcan_class->ops = &m_can_pci_ops; pci_set_drvdata(pci, mcan_class); @@ -178,8 +218,8 @@ static SIMPLE_DEV_PM_OPS(m_can_pci_pm_ops, m_can_pci_suspend, m_can_pci_resume); static const struct pci_device_id m_can_pci_id_table[] = { - { PCI_VDEVICE(INTEL, 0x4bc1), M_CAN_CLOCK_FREQ_EHL, }, - { PCI_VDEVICE(INTEL, 0x4bc2), M_CAN_CLOCK_FREQ_EHL, }, + { PCI_VDEVICE(INTEL, 0x4bc1), (kernel_ulong_t)&m_can_pci_ehl, }, + { PCI_VDEVICE(INTEL, 0x4bc2), (kernel_ulong_t)&m_can_pci_ehl, }, { } /* Terminating Entry */ }; MODULE_DEVICE_TABLE(pci, m_can_pci_id_table); From 3d9e575f2acef57528ed6950b5f8ba99f5e52f3f Mon Sep 17 00:00:00 2001 From: Donghyeok Kim Date: Sun, 5 Dec 2021 01:42:28 +0900 Subject: [PATCH 238/868] irqchip/apple-aic: Mark aic_init_smp() as __init This function is only called from the driver init code. Signed-off-by: Donghyeok Kim Acked-by: Hector Martin Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20211204164228.5920-1-dthex5d@gmail.com --- drivers/irqchip/irq-apple-aic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/irqchip/irq-apple-aic.c b/drivers/irqchip/irq-apple-aic.c index 3759dc36cc8f..2543ef65825b 100644 --- a/drivers/irqchip/irq-apple-aic.c +++ b/drivers/irqchip/irq-apple-aic.c @@ -707,7 +707,7 @@ static const struct irq_domain_ops aic_ipi_domain_ops = { .free = aic_ipi_free, }; -static int aic_init_smp(struct aic_irq_chip *irqc, struct device_node *node) +static int __init aic_init_smp(struct aic_irq_chip *irqc, struct device_node *node) { struct irq_domain *ipi_domain; int base_ipi; From 598ad0bd09329818ee041cb3e4b60ba0a70cb1ee Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 7 Dec 2021 09:53:24 +0000 Subject: [PATCH 239/868] netfs: Fix lockdep warning from taking sb_writers whilst holding mmap_lock Taking sb_writers whilst holding mmap_lock isn't allowed and will result in a lockdep warning like that below. The problem comes from cachefiles needing to take the sb_writers lock in order to do a write to the cache, but being asked to do this by netfslib called from readpage, readahead or write_begin[1]. Fix this by always offloading the write to the cache off to a worker thread. The main thread doesn't need to wait for it, so deadlock can be avoided. This can be tested by running the quick xfstests on something like afs or ceph with lockdep enabled. WARNING: possible circular locking dependency detected 5.15.0-rc1-build2+ #292 Not tainted ------------------------------------------------------ holetest/65517 is trying to acquire lock: ffff88810c81d730 (mapping.invalidate_lock#3){.+.+}-{3:3}, at: filemap_fault+0x276/0x7a5 but task is already holding lock: ffff8881595b53e8 (&mm->mmap_lock#2){++++}-{3:3}, at: do_user_addr_fault+0x28d/0x59c which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #2 (&mm->mmap_lock#2){++++}-{3:3}: validate_chain+0x3c4/0x4a8 __lock_acquire+0x89d/0x949 lock_acquire+0x2dc/0x34b __might_fault+0x87/0xb1 strncpy_from_user+0x25/0x18c removexattr+0x7c/0xe5 __do_sys_fremovexattr+0x73/0x96 do_syscall_64+0x67/0x7a entry_SYSCALL_64_after_hwframe+0x44/0xae -> #1 (sb_writers#10){.+.+}-{0:0}: validate_chain+0x3c4/0x4a8 __lock_acquire+0x89d/0x949 lock_acquire+0x2dc/0x34b cachefiles_write+0x2b3/0x4bb netfs_rreq_do_write_to_cache+0x3b5/0x432 netfs_readpage+0x2de/0x39d filemap_read_page+0x51/0x94 filemap_get_pages+0x26f/0x413 filemap_read+0x182/0x427 new_sync_read+0xf0/0x161 vfs_read+0x118/0x16e ksys_read+0xb8/0x12e do_syscall_64+0x67/0x7a entry_SYSCALL_64_after_hwframe+0x44/0xae -> #0 (mapping.invalidate_lock#3){.+.+}-{3:3}: check_noncircular+0xe4/0x129 check_prev_add+0x16b/0x3a4 validate_chain+0x3c4/0x4a8 __lock_acquire+0x89d/0x949 lock_acquire+0x2dc/0x34b down_read+0x40/0x4a filemap_fault+0x276/0x7a5 __do_fault+0x96/0xbf do_fault+0x262/0x35a __handle_mm_fault+0x171/0x1b5 handle_mm_fault+0x12a/0x233 do_user_addr_fault+0x3d2/0x59c exc_page_fault+0x85/0xa5 asm_exc_page_fault+0x1e/0x30 other info that might help us debug this: Chain exists of: mapping.invalidate_lock#3 --> sb_writers#10 --> &mm->mmap_lock#2 Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&mm->mmap_lock#2); lock(sb_writers#10); lock(&mm->mmap_lock#2); lock(mapping.invalidate_lock#3); *** DEADLOCK *** 1 lock held by holetest/65517: #0: ffff8881595b53e8 (&mm->mmap_lock#2){++++}-{3:3}, at: do_user_addr_fault+0x28d/0x59c stack backtrace: CPU: 0 PID: 65517 Comm: holetest Not tainted 5.15.0-rc1-build2+ #292 Hardware name: ASUS All Series/H97-PLUS, BIOS 2306 10/09/2014 Call Trace: dump_stack_lvl+0x45/0x59 check_noncircular+0xe4/0x129 ? print_circular_bug+0x207/0x207 ? validate_chain+0x461/0x4a8 ? add_chain_block+0x88/0xd9 ? hlist_add_head_rcu+0x49/0x53 check_prev_add+0x16b/0x3a4 validate_chain+0x3c4/0x4a8 ? check_prev_add+0x3a4/0x3a4 ? mark_lock+0xa5/0x1c6 __lock_acquire+0x89d/0x949 lock_acquire+0x2dc/0x34b ? filemap_fault+0x276/0x7a5 ? rcu_read_unlock+0x59/0x59 ? add_to_page_cache_lru+0x13c/0x13c ? lock_is_held_type+0x7b/0xd3 down_read+0x40/0x4a ? filemap_fault+0x276/0x7a5 filemap_fault+0x276/0x7a5 ? pagecache_get_page+0x2dd/0x2dd ? __lock_acquire+0x8bc/0x949 ? pte_offset_kernel.isra.0+0x6d/0xc3 __do_fault+0x96/0xbf ? do_fault+0x124/0x35a do_fault+0x262/0x35a ? handle_pte_fault+0x1c1/0x20d __handle_mm_fault+0x171/0x1b5 ? handle_pte_fault+0x20d/0x20d ? __lock_release+0x151/0x254 ? mark_held_locks+0x1f/0x78 ? rcu_read_unlock+0x3a/0x59 handle_mm_fault+0x12a/0x233 do_user_addr_fault+0x3d2/0x59c ? pgtable_bad+0x70/0x70 ? rcu_read_lock_bh_held+0xab/0xab exc_page_fault+0x85/0xa5 ? asm_exc_page_fault+0x8/0x30 asm_exc_page_fault+0x1e/0x30 RIP: 0033:0x40192f Code: ff 48 89 c3 48 8b 05 50 28 00 00 48 85 ed 7e 23 31 d2 4b 8d 0c 2f eb 0a 0f 1f 00 48 8b 05 39 28 00 00 48 0f af c2 48 83 c2 01 <48> 89 1c 01 48 39 d5 7f e8 8b 0d f2 27 00 00 31 c0 85 c9 74 0e 8b RSP: 002b:00007f9931867eb0 EFLAGS: 00010202 RAX: 0000000000000000 RBX: 00007f9931868700 RCX: 00007f993206ac00 RDX: 0000000000000001 RSI: 0000000000000000 RDI: 00007ffc13e06ee0 RBP: 0000000000000100 R08: 0000000000000000 R09: 00007f9931868700 R10: 00007f99318689d0 R11: 0000000000000202 R12: 00007ffc13e06ee0 R13: 0000000000000c00 R14: 00007ffc13e06e00 R15: 00007f993206a000 Fixes: 726218fdc22c ("netfs: Define an interface to talk to a cache") Signed-off-by: David Howells Tested-by: Jeff Layton cc: Jan Kara cc: linux-cachefs@redhat.com cc: linux-fsdevel@vger.kernel.org Link: https://lore.kernel.org/r/20210922110420.GA21576@quack2.suse.cz/ [1] Link: https://lore.kernel.org/r/163887597541.1596626.2668163316598972956.stgit@warthog.procyon.org.uk/ # v1 --- fs/netfs/read_helper.c | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/fs/netfs/read_helper.c b/fs/netfs/read_helper.c index 7046f9bdd8dc..7c6e199618af 100644 --- a/fs/netfs/read_helper.c +++ b/fs/netfs/read_helper.c @@ -354,16 +354,11 @@ static void netfs_rreq_write_to_cache_work(struct work_struct *work) netfs_rreq_do_write_to_cache(rreq); } -static void netfs_rreq_write_to_cache(struct netfs_read_request *rreq, - bool was_async) +static void netfs_rreq_write_to_cache(struct netfs_read_request *rreq) { - if (was_async) { - rreq->work.func = netfs_rreq_write_to_cache_work; - if (!queue_work(system_unbound_wq, &rreq->work)) - BUG(); - } else { - netfs_rreq_do_write_to_cache(rreq); - } + rreq->work.func = netfs_rreq_write_to_cache_work; + if (!queue_work(system_unbound_wq, &rreq->work)) + BUG(); } /* @@ -558,7 +553,7 @@ again: wake_up_bit(&rreq->flags, NETFS_RREQ_IN_PROGRESS); if (test_bit(NETFS_RREQ_WRITE_TO_CACHE, &rreq->flags)) - return netfs_rreq_write_to_cache(rreq, was_async); + return netfs_rreq_write_to_cache(rreq); netfs_rreq_completed(rreq, was_async); } From 06d59d626a0a49c7bfeae52aa6f793538209f2fc Mon Sep 17 00:00:00 2001 From: Kalle Valo Date: Wed, 1 Dec 2021 15:39:52 +0200 Subject: [PATCH 240/868] MAINTAINERS: update Kalle Valo's email I switched to using kvalo@kernel.org, update MAINTAINERS file. Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20211201133952.31744-1-kvalo@kernel.org --- MAINTAINERS | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 7a2345ce8521..22bea147c7d8 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3056,7 +3056,7 @@ F: Documentation/devicetree/bindings/phy/phy-ath79-usb.txt F: drivers/phy/qualcomm/phy-ath79-usb.c ATHEROS ATH GENERIC UTILITIES -M: Kalle Valo +M: Kalle Valo L: linux-wireless@vger.kernel.org S: Supported F: drivers/net/wireless/ath/* @@ -3071,7 +3071,7 @@ W: https://wireless.wiki.kernel.org/en/users/Drivers/ath5k F: drivers/net/wireless/ath/ath5k/ ATHEROS ATH6KL WIRELESS DRIVER -M: Kalle Valo +M: Kalle Valo L: linux-wireless@vger.kernel.org S: Supported W: https://wireless.wiki.kernel.org/en/users/Drivers/ath6kl @@ -13238,7 +13238,7 @@ F: include/uapi/linux/if_* F: include/uapi/linux/netdevice.h NETWORKING DRIVERS (WIRELESS) -M: Kalle Valo +M: Kalle Valo L: linux-wireless@vger.kernel.org S: Maintained Q: http://patchwork.kernel.org/project/linux-wireless/list/ @@ -15694,7 +15694,7 @@ T: git git://linuxtv.org/anttip/media_tree.git F: drivers/media/tuners/qt1010* QUALCOMM ATHEROS ATH10K WIRELESS DRIVER -M: Kalle Valo +M: Kalle Valo L: ath10k@lists.infradead.org S: Supported W: https://wireless.wiki.kernel.org/en/users/Drivers/ath10k @@ -15702,7 +15702,7 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/kvalo/ath.git F: drivers/net/wireless/ath/ath10k/ QUALCOMM ATHEROS ATH11K WIRELESS DRIVER -M: Kalle Valo +M: Kalle Valo L: ath11k@lists.infradead.org S: Supported T: git git://git.kernel.org/pub/scm/linux/kernel/git/kvalo/ath.git @@ -15866,7 +15866,7 @@ F: Documentation/devicetree/bindings/media/*venus* F: drivers/media/platform/qcom/venus/ QUALCOMM WCN36XX WIRELESS DRIVER -M: Kalle Valo +M: Kalle Valo L: wcn36xx@lists.infradead.org S: Supported W: https://wireless.wiki.kernel.org/en/users/Drivers/wcn36xx From 3cfef1b612e15a0c2f5b1c9d3f3f31ad72d56fcd Mon Sep 17 00:00:00 2001 From: Jeffle Xu Date: Tue, 7 Dec 2021 11:14:49 +0800 Subject: [PATCH 241/868] netfs: fix parameter of cleanup() The order of these two parameters is just reversed. gcc didn't warn on that, probably because 'void *' can be converted from or to other pointer types without warning. Cc: stable@vger.kernel.org Fixes: 3d3c95046742 ("netfs: Provide readahead and readpage netfs helpers") Fixes: e1b1240c1ff5 ("netfs: Add write_begin helper") Signed-off-by: Jeffle Xu Signed-off-by: David Howells Reviewed-by: Jeff Layton Link: https://lore.kernel.org/r/20211207031449.100510-1-jefflexu@linux.alibaba.com/ # v1 --- fs/netfs/read_helper.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/netfs/read_helper.c b/fs/netfs/read_helper.c index 7c6e199618af..75c76cbb27cc 100644 --- a/fs/netfs/read_helper.c +++ b/fs/netfs/read_helper.c @@ -955,7 +955,7 @@ int netfs_readpage(struct file *file, rreq = netfs_alloc_read_request(ops, netfs_priv, file); if (!rreq) { if (netfs_priv) - ops->cleanup(netfs_priv, folio_file_mapping(folio)); + ops->cleanup(folio_file_mapping(folio), netfs_priv); folio_unlock(folio); return -ENOMEM; } @@ -1186,7 +1186,7 @@ have_folio: goto error; have_folio_no_wait: if (netfs_priv) - ops->cleanup(netfs_priv, mapping); + ops->cleanup(mapping, netfs_priv); *_folio = folio; _leave(" = 0"); return 0; @@ -1197,7 +1197,7 @@ error: folio_unlock(folio); folio_put(folio); if (netfs_priv) - ops->cleanup(netfs_priv, mapping); + ops->cleanup(mapping, netfs_priv); _leave(" = %d", ret); return ret; } From ee91cb570d9b12ae8cfcb96f7ea6fb80983b6a0a Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 23 Nov 2021 18:06:34 +0000 Subject: [PATCH 242/868] PCI: apple: Follow the PCIe specifications when resetting the port MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit While the Apple PCIe driver works correctly when directly booted from the firmware, it fails to initialise when the kernel is booted from a bootloader using PCIe such as u-boot. That's because we're missing a proper reset of the port (we only clear the reset, but never assert it). The PCIe spec requirements are two-fold: - PERST# must be asserted before setting up the clocks and stay asserted for at least 100us (Tperst-clk) - Once PERST# is deasserted, the OS must wait for at least 100ms "from the end of a Conventional Reset" before we can start talking to the devices Implementing this results in a booting system. [bhelgaas: #PERST -> PERST#, update spec references to current] Fixes: 1e33888fbe44 ("PCI: apple: Add initial hardware bring-up") Link: https://lore.kernel.org/r/20211123180636.80558-2-maz@kernel.org Signed-off-by: Marc Zyngier Signed-off-by: Bjorn Helgaas Reviewed-by: Luca Ceresoli Acked-by: Pali Rohár Cc: Alyssa Rosenzweig Cc: Lorenzo Pieralisi --- drivers/pci/controller/pcie-apple.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/pci/controller/pcie-apple.c b/drivers/pci/controller/pcie-apple.c index 1bf4d75b61be..c384777b0374 100644 --- a/drivers/pci/controller/pcie-apple.c +++ b/drivers/pci/controller/pcie-apple.c @@ -516,7 +516,7 @@ static int apple_pcie_setup_port(struct apple_pcie *pcie, int ret, i; reset = gpiod_get_from_of_node(np, "reset-gpios", 0, - GPIOD_OUT_LOW, "#PERST"); + GPIOD_OUT_LOW, "PERST#"); if (IS_ERR(reset)) return PTR_ERR(reset); @@ -539,13 +539,23 @@ static int apple_pcie_setup_port(struct apple_pcie *pcie, rmw_set(PORT_APPCLK_EN, port->base + PORT_APPCLK); + /* Assert PERST# before setting up the clock */ + gpiod_set_value(reset, 0); + ret = apple_pcie_setup_refclk(pcie, port); if (ret < 0) return ret; + /* The minimal Tperst-clk value is 100us (PCIe CEM r5.0, 2.9.2) */ + usleep_range(100, 200); + + /* Deassert PERST# */ rmw_set(PORT_PERST_OFF, port->base + PORT_PERST); gpiod_set_value(reset, 1); + /* Wait for 100ms after PERST# deassertion (PCIe r5.0, 6.6.1) */ + msleep(100); + ret = readl_relaxed_poll_timeout(port->base + PORT_STATUS, stat, stat & PORT_STATUS_READY, 100, 250000); if (ret < 0) { From c7c15ae3dc50c0ab46c5cbbf8d2f3d3307e51f37 Mon Sep 17 00:00:00 2001 From: Hou Tao Date: Fri, 3 Dec 2021 19:47:15 +0800 Subject: [PATCH 243/868] nvme-multipath: set ana_log_size to 0 after free ana_log_buf Set ana_log_size to 0 when ana_log_buf is freed to make sure nvme_mpath_init_identify will do the right thing when retrying after an earlier failure. Signed-off-by: Hou Tao Reviewed-by: Keith Busch Signed-off-by: Christoph Hellwig --- drivers/nvme/host/multipath.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index 7f2071f2460c..13e5d503ed07 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -866,7 +866,7 @@ int nvme_mpath_init_identify(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id) } if (ana_log_size > ctrl->ana_log_size) { nvme_mpath_stop(ctrl); - kfree(ctrl->ana_log_buf); + nvme_mpath_uninit(ctrl); ctrl->ana_log_buf = kmalloc(ana_log_size, GFP_KERNEL); if (!ctrl->ana_log_buf) return -ENOMEM; @@ -886,4 +886,5 @@ void nvme_mpath_uninit(struct nvme_ctrl *ctrl) { kfree(ctrl->ana_log_buf); ctrl->ana_log_buf = NULL; + ctrl->ana_log_size = 0; } From 8b77fa6fdce0fc7147bab91b1011048758290ca4 Mon Sep 17 00:00:00 2001 From: Ruozhu Li Date: Thu, 4 Nov 2021 15:13:32 +0800 Subject: [PATCH 244/868] nvme: fix use after free when disconnecting a reconnecting ctrl A crash happens when trying to disconnect a reconnecting ctrl: 1) The network was cut off when the connection was just established, scan work hang there waiting for some IOs complete. Those I/Os were retried because we return BLK_STS_RESOURCE to blk in reconnecting. 2) After a while, I tried to disconnect this connection. This procedure also hangs because it tried to obtain ctrl->scan_lock. It should be noted that now we have switched the controller state to NVME_CTRL_DELETING. 3) In nvme_check_ready(), we always return true when ctrl->state is NVME_CTRL_DELETING, so those retrying I/Os were issued to the bottom device which was already freed. To fix this, when ctrl->state is NVME_CTRL_DELETING, issue cmd to bottom device only when queue state is live. If not, return host path error to the block layer Signed-off-by: Ruozhu Li Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 1 + drivers/nvme/host/nvme.h | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 4ee7d2f8b8d8..1af8a4513708 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -666,6 +666,7 @@ blk_status_t nvme_fail_nonready_command(struct nvme_ctrl *ctrl, struct request *rq) { if (ctrl->state != NVME_CTRL_DELETING_NOIO && + ctrl->state != NVME_CTRL_DELETING && ctrl->state != NVME_CTRL_DEAD && !test_bit(NVME_CTRL_FAILFAST_EXPIRED, &ctrl->flags) && !blk_noretry_request(rq) && !(rq->cmd_flags & REQ_NVME_MPATH)) diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index b334af8aa264..9b095ee01364 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -709,7 +709,7 @@ static inline bool nvme_check_ready(struct nvme_ctrl *ctrl, struct request *rq, return true; if (ctrl->ops->flags & NVME_F_FABRICS && ctrl->state == NVME_CTRL_DELETING) - return true; + return queue_live; return __nvme_check_ready(ctrl, rq, queue_live); } int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, From 9292f8f9a2ac42eb320bced7153aa2e63d8cc13a Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Mon, 29 Nov 2021 14:19:52 -0500 Subject: [PATCH 245/868] IB/hfi1: Correct guard on eager buffer deallocation The code tests the dma address which legitimately can be 0. The code should test the kernel logical address to avoid leaking eager buffer allocations that happen to map to a dma address of 0. Fixes: 60368186fd85 ("IB/hfi1: Fix user-space buffers mapping with IOMMU enabled") Link: https://lore.kernel.org/r/20211129191952.101968.17137.stgit@awfm-01.cornelisnetworks.com Signed-off-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c index dbd1c31830b9..8e1236be46e1 100644 --- a/drivers/infiniband/hw/hfi1/init.c +++ b/drivers/infiniband/hw/hfi1/init.c @@ -1120,7 +1120,7 @@ void hfi1_free_ctxtdata(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd) rcd->egrbufs.rcvtids = NULL; for (e = 0; e < rcd->egrbufs.alloced; e++) { - if (rcd->egrbufs.buffers[e].dma) + if (rcd->egrbufs.buffers[e].addr) dma_free_coherent(&dd->pcidev->dev, rcd->egrbufs.buffers[e].len, rcd->egrbufs.buffers[e].addr, From b6d57e24ce6cc3df8a8845e1b193e88a65d501b1 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Mon, 29 Nov 2021 14:19:58 -0500 Subject: [PATCH 246/868] IB/hfi1: Insure use of smp_processor_id() is preempt disabled The following BUG has just surfaced with our 5.16 testing: BUG: using smp_processor_id() in preemptible [00000000] code: mpicheck/1581081 caller is sdma_select_user_engine+0x72/0x210 [hfi1] CPU: 0 PID: 1581081 Comm: mpicheck Tainted: G S 5.16.0-rc1+ #1 Hardware name: Intel Corporation S2600WT2R/S2600WT2R, BIOS SE5C610.86B.01.01.0016.033120161139 03/31/2016 Call Trace: dump_stack_lvl+0x33/0x42 check_preemption_disabled+0xbf/0xe0 sdma_select_user_engine+0x72/0x210 [hfi1] ? _raw_spin_unlock_irqrestore+0x1f/0x31 ? hfi1_mmu_rb_insert+0x6b/0x200 [hfi1] hfi1_user_sdma_process_request+0xa02/0x1120 [hfi1] ? hfi1_write_iter+0xb8/0x200 [hfi1] hfi1_write_iter+0xb8/0x200 [hfi1] do_iter_readv_writev+0x163/0x1c0 do_iter_write+0x80/0x1c0 vfs_writev+0x88/0x1a0 ? recalibrate_cpu_khz+0x10/0x10 ? ktime_get+0x3e/0xa0 ? __fget_files+0x66/0xa0 do_writev+0x65/0x100 do_syscall_64+0x3a/0x80 Fix this long standing bug by moving the smp_processor_id() to after the rcu_read_lock(). The rcu_read_lock() implicitly disables preemption. Link: https://lore.kernel.org/r/20211129191958.101968.87329.stgit@awfm-01.cornelisnetworks.com Cc: stable@vger.kernel.org Fixes: 0cb2aa690c7e ("IB/hfi1: Add sysfs interface for affinity setup") Signed-off-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/sdma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c index 2b6c24b7b586..f07d328689d3 100644 --- a/drivers/infiniband/hw/hfi1/sdma.c +++ b/drivers/infiniband/hw/hfi1/sdma.c @@ -838,8 +838,8 @@ struct sdma_engine *sdma_select_user_engine(struct hfi1_devdata *dd, if (current->nr_cpus_allowed != 1) goto out; - cpu_id = smp_processor_id(); rcu_read_lock(); + cpu_id = smp_processor_id(); rht_node = rhashtable_lookup(dd->sdma_rht, &cpu_id, sdma_rht_params); From f6a3cfec3c01f9983e961c3327cef0db129a3c43 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Mon, 29 Nov 2021 14:20:03 -0500 Subject: [PATCH 247/868] IB/hfi1: Fix early init panic The following trace can be observed with an init failure such as firmware load failures: BUG: unable to handle kernel NULL pointer dereference at 0000000000000000 PGD 0 P4D 0 Oops: 0010 [#1] SMP PTI CPU: 0 PID: 537 Comm: kworker/0:3 Tainted: G OE --------- - - 4.18.0-240.el8.x86_64 #1 Workqueue: events work_for_cpu_fn RIP: 0010:0x0 Code: Bad RIP value. RSP: 0000:ffffae5f878a3c98 EFLAGS: 00010046 RAX: 0000000000000000 RBX: ffff95e48e025c00 RCX: 0000000000000000 RDX: 0000000000000001 RSI: 0000000000000000 RDI: ffff95e48e025c00 RBP: ffff95e4bf3660a4 R08: 0000000000000000 R09: ffffffff86d5e100 R10: ffff95e49e1de600 R11: 0000000000000001 R12: ffff95e4bf366180 R13: ffff95e48e025c00 R14: ffff95e4bf366028 R15: ffff95e4bf366000 FS: 0000000000000000(0000) GS:ffff95e4df200000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: ffffffffffffffd6 CR3: 0000000f86a0a003 CR4: 00000000001606f0 Call Trace: receive_context_interrupt+0x1f/0x40 [hfi1] __free_irq+0x201/0x300 free_irq+0x2e/0x60 pci_free_irq+0x18/0x30 msix_free_irq.part.2+0x46/0x80 [hfi1] msix_clean_up_interrupts+0x2b/0x70 [hfi1] hfi1_init_dd+0x640/0x1a90 [hfi1] do_init_one.isra.19+0x34d/0x680 [hfi1] local_pci_probe+0x41/0x90 work_for_cpu_fn+0x16/0x20 process_one_work+0x1a7/0x360 worker_thread+0x1cf/0x390 ? create_worker+0x1a0/0x1a0 kthread+0x112/0x130 ? kthread_flush_work_fn+0x10/0x10 ret_from_fork+0x35/0x40 The free_irq() results in a callback to the registered interrupt handler, and rcd->do_interrupt is NULL because the receive context data structures are not fully initialized. Fix by ensuring that the do_interrupt is always assigned and adding a guards in the slow path handler to detect and handle a partially initialized receive context and noop the receive. Link: https://lore.kernel.org/r/20211129192003.101968.33612.stgit@awfm-01.cornelisnetworks.com Cc: stable@vger.kernel.org Fixes: b0ba3c18d6bf ("IB/hfi1: Move normal functions from hfi1_devdata to const array") Signed-off-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/chip.c | 2 ++ drivers/infiniband/hw/hfi1/driver.c | 2 ++ drivers/infiniband/hw/hfi1/init.c | 5 ++--- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index ec37f4fd8e96..f1245c94ae26 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -8415,6 +8415,8 @@ static void receive_interrupt_common(struct hfi1_ctxtdata *rcd) */ static void __hfi1_rcd_eoi_intr(struct hfi1_ctxtdata *rcd) { + if (!rcd->rcvhdrq) + return; clear_recv_intr(rcd); if (check_packet_present(rcd)) force_recv_intr(rcd); diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c index 61f341c3005c..e2c634af40e9 100644 --- a/drivers/infiniband/hw/hfi1/driver.c +++ b/drivers/infiniband/hw/hfi1/driver.c @@ -1012,6 +1012,8 @@ int handle_receive_interrupt(struct hfi1_ctxtdata *rcd, int thread) struct hfi1_packet packet; int skip_pkt = 0; + if (!rcd->rcvhdrq) + return RCV_PKT_OK; /* Control context will always use the slow path interrupt handler */ needset = (rcd->ctxt == HFI1_CTRL_CTXT) ? 0 : 1; diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c index 8e1236be46e1..6422dd6cae60 100644 --- a/drivers/infiniband/hw/hfi1/init.c +++ b/drivers/infiniband/hw/hfi1/init.c @@ -113,7 +113,6 @@ static int hfi1_create_kctxt(struct hfi1_devdata *dd, rcd->fast_handler = get_dma_rtail_setting(rcd) ? handle_receive_interrupt_dma_rtail : handle_receive_interrupt_nodma_rtail; - rcd->slow_handler = handle_receive_interrupt; hfi1_set_seq_cnt(rcd, 1); @@ -334,6 +333,8 @@ int hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, int numa, rcd->numa_id = numa; rcd->rcv_array_groups = dd->rcv_entries.ngroups; rcd->rhf_rcv_function_map = normal_rhf_rcv_functions; + rcd->slow_handler = handle_receive_interrupt; + rcd->do_interrupt = rcd->slow_handler; rcd->msix_intr = CCE_NUM_MSIX_VECTORS; mutex_init(&rcd->exp_mutex); @@ -898,8 +899,6 @@ int hfi1_init(struct hfi1_devdata *dd, int reinit) if (!rcd) continue; - rcd->do_interrupt = &handle_receive_interrupt; - lastfail = hfi1_create_rcvhdrq(dd, rcd); if (!lastfail) lastfail = hfi1_setup_eagerbufs(rcd); From 60a8b5a1611b4a26de4839ab9c1fc2a9cf3e17c1 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Mon, 29 Nov 2021 14:20:08 -0500 Subject: [PATCH 248/868] IB/hfi1: Fix leak of rcvhdrtail_dummy_kvaddr This buffer is currently allocated in hfi1_init(): if (reinit) ret = init_after_reset(dd); else ret = loadtime_init(dd); if (ret) goto done; /* allocate dummy tail memory for all receive contexts */ dd->rcvhdrtail_dummy_kvaddr = dma_alloc_coherent(&dd->pcidev->dev, sizeof(u64), &dd->rcvhdrtail_dummy_dma, GFP_KERNEL); if (!dd->rcvhdrtail_dummy_kvaddr) { dd_dev_err(dd, "cannot allocate dummy tail memory\n"); ret = -ENOMEM; goto done; } The reinit triggered path will overwrite the old allocation and leak it. Fix by moving the allocation to hfi1_alloc_devdata() and the deallocation to hfi1_free_devdata(). Link: https://lore.kernel.org/r/20211129192008.101968.91302.stgit@awfm-01.cornelisnetworks.com Cc: stable@vger.kernel.org Fixes: 46b010d3eeb8 ("staging/rdma/hfi1: Workaround to prevent corruption during packet delivery") Signed-off-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/init.c | 33 +++++++++++++------------------ 1 file changed, 14 insertions(+), 19 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c index 6422dd6cae60..4436ed41547c 100644 --- a/drivers/infiniband/hw/hfi1/init.c +++ b/drivers/infiniband/hw/hfi1/init.c @@ -875,18 +875,6 @@ int hfi1_init(struct hfi1_devdata *dd, int reinit) if (ret) goto done; - /* allocate dummy tail memory for all receive contexts */ - dd->rcvhdrtail_dummy_kvaddr = dma_alloc_coherent(&dd->pcidev->dev, - sizeof(u64), - &dd->rcvhdrtail_dummy_dma, - GFP_KERNEL); - - if (!dd->rcvhdrtail_dummy_kvaddr) { - dd_dev_err(dd, "cannot allocate dummy tail memory\n"); - ret = -ENOMEM; - goto done; - } - /* dd->rcd can be NULL if early initialization failed */ for (i = 0; dd->rcd && i < dd->first_dyn_alloc_ctxt; ++i) { /* @@ -1200,6 +1188,11 @@ void hfi1_free_devdata(struct hfi1_devdata *dd) dd->tx_opstats = NULL; kfree(dd->comp_vect); dd->comp_vect = NULL; + if (dd->rcvhdrtail_dummy_kvaddr) + dma_free_coherent(&dd->pcidev->dev, sizeof(u64), + (void *)dd->rcvhdrtail_dummy_kvaddr, + dd->rcvhdrtail_dummy_dma); + dd->rcvhdrtail_dummy_kvaddr = NULL; sdma_clean(dd, dd->num_sdma); rvt_dealloc_device(&dd->verbs_dev.rdi); } @@ -1297,6 +1290,15 @@ static struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, goto bail; } + /* allocate dummy tail memory for all receive contexts */ + dd->rcvhdrtail_dummy_kvaddr = + dma_alloc_coherent(&dd->pcidev->dev, sizeof(u64), + &dd->rcvhdrtail_dummy_dma, GFP_KERNEL); + if (!dd->rcvhdrtail_dummy_kvaddr) { + ret = -ENOMEM; + goto bail; + } + atomic_set(&dd->ipoib_rsm_usr_num, 0); return dd; @@ -1504,13 +1506,6 @@ static void cleanup_device_data(struct hfi1_devdata *dd) free_credit_return(dd); - if (dd->rcvhdrtail_dummy_kvaddr) { - dma_free_coherent(&dd->pcidev->dev, sizeof(u64), - (void *)dd->rcvhdrtail_dummy_kvaddr, - dd->rcvhdrtail_dummy_dma); - dd->rcvhdrtail_dummy_kvaddr = NULL; - } - /* * Free any resources still in use (usually just kernel contexts) * at unload; we do for ctxtcnt, because that's what we allocate. From 1e11a39a82e95ce86f849f40dda0d9c0498cebd9 Mon Sep 17 00:00:00 2001 From: Shiraz Saleem Date: Tue, 7 Dec 2021 09:21:36 -0600 Subject: [PATCH 249/868] RDMA/irdma: Fix a user-after-free in add_pble_prm When irdma_hmc_sd_one fails, 'chunk' is freed while its still on the PBLE info list. Add the chunk entry to the PBLE info list only after successful setting of the SD in irdma_hmc_sd_one. Fixes: e8c4dbc2fcac ("RDMA/irdma: Add PBLE resource manager") Link: https://lore.kernel.org/r/20211207152135.2192-1-shiraz.saleem@intel.com Reported-by: Dan Carpenter Signed-off-by: Shiraz Saleem Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/irdma/pble.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/irdma/pble.c b/drivers/infiniband/hw/irdma/pble.c index aeeb1c310965..da032b952755 100644 --- a/drivers/infiniband/hw/irdma/pble.c +++ b/drivers/infiniband/hw/irdma/pble.c @@ -283,7 +283,6 @@ add_pble_prm(struct irdma_hmc_pble_rsrc *pble_rsrc) "PBLE: next_fpm_addr = %llx chunk_size[%llu] = 0x%llx\n", pble_rsrc->next_fpm_addr, chunk->size, chunk->size); pble_rsrc->unallocated_pble -= (u32)(chunk->size >> 3); - list_add(&chunk->list, &pble_rsrc->pinfo.clist); sd_reg_val = (sd_entry_type == IRDMA_SD_TYPE_PAGED) ? sd_entry->u.pd_table.pd_page_addr.pa : sd_entry->u.bp.addr.pa; @@ -295,6 +294,7 @@ add_pble_prm(struct irdma_hmc_pble_rsrc *pble_rsrc) goto error; } + list_add(&chunk->list, &pble_rsrc->pinfo.clist); sd_entry->valid = true; return 0; From 117697cc935b0ab04ec66274d8e64ccfebd7d0d2 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 5 Dec 2021 09:17:24 +0100 Subject: [PATCH 250/868] RDMA/irdma: Fix a potential memory allocation issue in 'irdma_prm_add_pble_mem()' 'pchunk->bitmapbuf' is a bitmap. Its size (in number of bits) is stored in 'pchunk->sizeofbitmap'. When it is allocated, the size (in bytes) is computed by: size_in_bits >> 3 There are 2 issues (numbers bellow assume that longs are 64 bits): - there is no guarantee here that 'pchunk->bitmapmem.size' is modulo BITS_PER_LONG but bitmaps are stored as longs (sizeofbitmap=8 bits will only allocate 1 byte, instead of 8 (1 long)) - the number of bytes is computed with a shift, not a round up, so we may allocate less memory than needed (sizeofbitmap=65 bits will only allocate 8 bytes (i.e. 1 long), when 2 longs are needed = 16 bytes) Fix both issues by using 'bitmap_zalloc()' and remove the useless 'bitmapmem' from 'struct irdma_chunk'. While at it, remove some useless NULL test before calling kfree/bitmap_free. Fixes: 915cc7ac0f8e ("RDMA/irdma: Add miscellaneous utility definitions") Link: https://lore.kernel.org/r/5e670b640508e14b1869c3e8e4fb970d78cbe997.1638692171.git.christophe.jaillet@wanadoo.fr Signed-off-by: Christophe JAILLET Reviewed-by: Shiraz Saleem Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/irdma/pble.c | 6 ++---- drivers/infiniband/hw/irdma/pble.h | 1 - drivers/infiniband/hw/irdma/utils.c | 9 ++------- 3 files changed, 4 insertions(+), 12 deletions(-) diff --git a/drivers/infiniband/hw/irdma/pble.c b/drivers/infiniband/hw/irdma/pble.c index da032b952755..fed49da770f3 100644 --- a/drivers/infiniband/hw/irdma/pble.c +++ b/drivers/infiniband/hw/irdma/pble.c @@ -25,8 +25,7 @@ void irdma_destroy_pble_prm(struct irdma_hmc_pble_rsrc *pble_rsrc) list_del(&chunk->list); if (chunk->type == PBLE_SD_PAGED) irdma_pble_free_paged_mem(chunk); - if (chunk->bitmapbuf) - kfree(chunk->bitmapmem.va); + bitmap_free(chunk->bitmapbuf); kfree(chunk->chunkmem.va); } } @@ -299,8 +298,7 @@ add_pble_prm(struct irdma_hmc_pble_rsrc *pble_rsrc) return 0; error: - if (chunk->bitmapbuf) - kfree(chunk->bitmapmem.va); + bitmap_free(chunk->bitmapbuf); kfree(chunk->chunkmem.va); return ret_code; diff --git a/drivers/infiniband/hw/irdma/pble.h b/drivers/infiniband/hw/irdma/pble.h index e1b3b8118a2c..aa20827dcc9d 100644 --- a/drivers/infiniband/hw/irdma/pble.h +++ b/drivers/infiniband/hw/irdma/pble.h @@ -78,7 +78,6 @@ struct irdma_chunk { u32 pg_cnt; enum irdma_alloc_type type; struct irdma_sc_dev *dev; - struct irdma_virt_mem bitmapmem; struct irdma_virt_mem chunkmem; }; diff --git a/drivers/infiniband/hw/irdma/utils.c b/drivers/infiniband/hw/irdma/utils.c index 8b42c43fc14f..981107b40c90 100644 --- a/drivers/infiniband/hw/irdma/utils.c +++ b/drivers/infiniband/hw/irdma/utils.c @@ -2239,15 +2239,10 @@ enum irdma_status_code irdma_prm_add_pble_mem(struct irdma_pble_prm *pprm, sizeofbitmap = (u64)pchunk->size >> pprm->pble_shift; - pchunk->bitmapmem.size = sizeofbitmap >> 3; - pchunk->bitmapmem.va = kzalloc(pchunk->bitmapmem.size, GFP_KERNEL); - - if (!pchunk->bitmapmem.va) + pchunk->bitmapbuf = bitmap_zalloc(sizeofbitmap, GFP_KERNEL); + if (!pchunk->bitmapbuf) return IRDMA_ERR_NO_MEMORY; - pchunk->bitmapbuf = pchunk->bitmapmem.va; - bitmap_zero(pchunk->bitmapbuf, sizeofbitmap); - pchunk->sizeofbitmap = sizeofbitmap; /* each pble is 8 bytes hence shift by 3 */ pprm->total_pble_alloc += pchunk->size >> 3; From 25b5d6fd6d13b2de3780a0ae247befc43c4576fe Mon Sep 17 00:00:00 2001 From: Shiraz Saleem Date: Wed, 1 Dec 2021 17:15:08 -0600 Subject: [PATCH 251/868] RDMA/irdma: Report correct WC errors Return IBV_WC_REM_OP_ERR for responder QP errors instead of IBV_WC_REM_ACCESS_ERR. Return IBV_WC_LOC_QP_OP_ERR for errors detected on the SQ with bad opcodes Fixes: 44d9e52977a1 ("RDMA/irdma: Implement device initialization definitions") Link: https://lore.kernel.org/r/20211201231509.1930-1-shiraz.saleem@intel.com Signed-off-by: Shiraz Saleem Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/irdma/hw.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/irdma/hw.c b/drivers/infiniband/hw/irdma/hw.c index 4108dcabece2..1bae1dca6a62 100644 --- a/drivers/infiniband/hw/irdma/hw.c +++ b/drivers/infiniband/hw/irdma/hw.c @@ -146,6 +146,7 @@ static void irdma_set_flush_fields(struct irdma_sc_qp *qp, qp->flush_code = FLUSH_PROT_ERR; break; case IRDMA_AE_AMP_BAD_QP: + case IRDMA_AE_WQE_UNEXPECTED_OPCODE: qp->flush_code = FLUSH_LOC_QP_OP_ERR; break; case IRDMA_AE_AMP_BAD_STAG_KEY: @@ -156,7 +157,6 @@ static void irdma_set_flush_fields(struct irdma_sc_qp *qp, case IRDMA_AE_PRIV_OPERATION_DENIED: case IRDMA_AE_IB_INVALID_REQUEST: case IRDMA_AE_IB_REMOTE_ACCESS_ERROR: - case IRDMA_AE_IB_REMOTE_OP_ERROR: qp->flush_code = FLUSH_REM_ACCESS_ERR; qp->event_type = IRDMA_QP_EVENT_ACCESS_ERR; break; @@ -184,6 +184,9 @@ static void irdma_set_flush_fields(struct irdma_sc_qp *qp, case IRDMA_AE_AMP_MWBIND_INVALID_BOUNDS: qp->flush_code = FLUSH_MW_BIND_ERR; break; + case IRDMA_AE_IB_REMOTE_OP_ERROR: + qp->flush_code = FLUSH_REM_OP_ERR; + break; default: qp->flush_code = FLUSH_FATAL_ERR; break; From 10467ce09fefa2e74359f5b2ab1efb8909402f19 Mon Sep 17 00:00:00 2001 From: Tatyana Nikolova Date: Wed, 1 Dec 2021 17:15:09 -0600 Subject: [PATCH 252/868] RDMA/irdma: Don't arm the CQ more than two times if no CE for this CQ Completion events (CEs) are lost if the application is allowed to arm the CQ more than two times when no new CE for this CQ has been generated by the HW. Check if arming has been done for the CQ and if not, arm the CQ for any event otherwise promote to arm the CQ for any event only when the last arm event was solicited. Fixes: b48c24c2d710 ("RDMA/irdma: Implement device supported verb APIs") Link: https://lore.kernel.org/r/20211201231509.1930-2-shiraz.saleem@intel.com Signed-off-by: Tatyana Nikolova Signed-off-by: Shiraz Saleem Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/irdma/hw.c | 2 ++ drivers/infiniband/hw/irdma/main.h | 1 + drivers/infiniband/hw/irdma/utils.c | 15 +++++++++++++++ drivers/infiniband/hw/irdma/verbs.c | 23 ++++++++++++++++++----- drivers/infiniband/hw/irdma/verbs.h | 2 ++ 5 files changed, 38 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/hw/irdma/hw.c b/drivers/infiniband/hw/irdma/hw.c index 1bae1dca6a62..b4c657f5f2f9 100644 --- a/drivers/infiniband/hw/irdma/hw.c +++ b/drivers/infiniband/hw/irdma/hw.c @@ -60,6 +60,8 @@ static void irdma_iwarp_ce_handler(struct irdma_sc_cq *iwcq) { struct irdma_cq *cq = iwcq->back_cq; + if (!cq->user_mode) + cq->armed = false; if (cq->ibcq.comp_handler) cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context); } diff --git a/drivers/infiniband/hw/irdma/main.h b/drivers/infiniband/hw/irdma/main.h index 91a497139ba3..cb218cab79ac 100644 --- a/drivers/infiniband/hw/irdma/main.h +++ b/drivers/infiniband/hw/irdma/main.h @@ -542,6 +542,7 @@ int irdma_ah_cqp_op(struct irdma_pci_f *rf, struct irdma_sc_ah *sc_ah, u8 cmd, void (*callback_fcn)(struct irdma_cqp_request *cqp_request), void *cb_param); void irdma_gsi_ud_qp_ah_cb(struct irdma_cqp_request *cqp_request); +bool irdma_cq_empty(struct irdma_cq *iwcq); int irdma_inetaddr_event(struct notifier_block *notifier, unsigned long event, void *ptr); int irdma_inet6addr_event(struct notifier_block *notifier, unsigned long event, diff --git a/drivers/infiniband/hw/irdma/utils.c b/drivers/infiniband/hw/irdma/utils.c index 981107b40c90..398736d8c78a 100644 --- a/drivers/infiniband/hw/irdma/utils.c +++ b/drivers/infiniband/hw/irdma/utils.c @@ -2486,3 +2486,18 @@ void irdma_ib_qp_event(struct irdma_qp *iwqp, enum irdma_qp_event_type event) ibevent.element.qp = &iwqp->ibqp; iwqp->ibqp.event_handler(&ibevent, iwqp->ibqp.qp_context); } + +bool irdma_cq_empty(struct irdma_cq *iwcq) +{ + struct irdma_cq_uk *ukcq; + u64 qword3; + __le64 *cqe; + u8 polarity; + + ukcq = &iwcq->sc_cq.cq_uk; + cqe = IRDMA_GET_CURRENT_CQ_ELEM(ukcq); + get_64bit_val(cqe, 24, &qword3); + polarity = (u8)FIELD_GET(IRDMA_CQ_VALID, qword3); + + return polarity != ukcq->polarity; +} diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c index 0f66e809d418..8cd5f9261692 100644 --- a/drivers/infiniband/hw/irdma/verbs.c +++ b/drivers/infiniband/hw/irdma/verbs.c @@ -3584,18 +3584,31 @@ static int irdma_req_notify_cq(struct ib_cq *ibcq, struct irdma_cq *iwcq; struct irdma_cq_uk *ukcq; unsigned long flags; - enum irdma_cmpl_notify cq_notify = IRDMA_CQ_COMPL_EVENT; + enum irdma_cmpl_notify cq_notify; + bool promo_event = false; + int ret = 0; + cq_notify = notify_flags == IB_CQ_SOLICITED ? + IRDMA_CQ_COMPL_SOLICITED : IRDMA_CQ_COMPL_EVENT; iwcq = to_iwcq(ibcq); ukcq = &iwcq->sc_cq.cq_uk; - if (notify_flags == IB_CQ_SOLICITED) - cq_notify = IRDMA_CQ_COMPL_SOLICITED; spin_lock_irqsave(&iwcq->lock, flags); - irdma_uk_cq_request_notification(ukcq, cq_notify); + /* Only promote to arm the CQ for any event if the last arm event was solicited. */ + if (iwcq->last_notify == IRDMA_CQ_COMPL_SOLICITED && notify_flags != IB_CQ_SOLICITED) + promo_event = true; + + if (!iwcq->armed || promo_event) { + iwcq->armed = true; + iwcq->last_notify = cq_notify; + irdma_uk_cq_request_notification(ukcq, cq_notify); + } + + if ((notify_flags & IB_CQ_REPORT_MISSED_EVENTS) && !irdma_cq_empty(iwcq)) + ret = 1; spin_unlock_irqrestore(&iwcq->lock, flags); - return 0; + return ret; } static int irdma_roce_port_immutable(struct ib_device *ibdev, u32 port_num, diff --git a/drivers/infiniband/hw/irdma/verbs.h b/drivers/infiniband/hw/irdma/verbs.h index 5c244cd321a3..d0fdef8d09ea 100644 --- a/drivers/infiniband/hw/irdma/verbs.h +++ b/drivers/infiniband/hw/irdma/verbs.h @@ -110,6 +110,8 @@ struct irdma_cq { u16 cq_size; u16 cq_num; bool user_mode; + bool armed; + enum irdma_cmpl_notify last_notify; u32 polled_cmpls; u32 cq_mem_size; struct irdma_dma_mem kmem; From 089558bc7ba785c03815a49c89e28ad9b8de51f9 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 6 Dec 2021 15:38:20 -0800 Subject: [PATCH 253/868] xfs: remove all COW fork extents when remounting readonly As part of multiple customer escalations due to file data corruption after copy on write operations, I wrote some fstests that use fsstress to hammer on COW to shake things loose. Regrettably, I caught some filesystem shutdowns due to incorrect rmap operations with the following loop: mount # (0) fsstress & # (1) while true; do fsstress mount -o remount,ro # (2) fsstress mount -o remount,rw # (3) done When (2) happens, notice that (1) is still running. xfs_remount_ro will call xfs_blockgc_stop to walk the inode cache to free all the COW extents, but the blockgc mechanism races with (1)'s reader threads to take IOLOCKs and loses, which means that it doesn't clean them all out. Call such a file (A). When (3) happens, xfs_remount_rw calls xfs_reflink_recover_cow, which walks the ondisk refcount btree and frees any COW extent that it finds. This function does not check the inode cache, which means that incore COW forks of inode (A) is now inconsistent with the ondisk metadata. If one of those former COW extents are allocated and mapped into another file (B) and someone triggers a COW to the stale reservation in (A), A's dirty data will be written into (B) and once that's done, those blocks will be transferred to (A)'s data fork without bumping the refcount. The results are catastrophic -- file (B) and the refcount btree are now corrupt. Solve this race by forcing the xfs_blockgc_free_space to run synchronously, which causes xfs_icwalk to return to inodes that were skipped because the blockgc code couldn't take the IOLOCK. This is safe to do here because the VFS has already prohibited new writer threads. Fixes: 10ddf64e420f ("xfs: remove leftover CoW reservations when remounting ro") Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner Reviewed-by: Chandan Babu R --- fs/xfs/xfs_super.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index e21459f9923a..778b57b1f020 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -1765,7 +1765,10 @@ static int xfs_remount_ro( struct xfs_mount *mp) { - int error; + struct xfs_icwalk icw = { + .icw_flags = XFS_ICWALK_FLAG_SYNC, + }; + int error; /* * Cancel background eofb scanning so it cannot race with the final @@ -1773,8 +1776,13 @@ xfs_remount_ro( */ xfs_blockgc_stop(mp); - /* Get rid of any leftover CoW reservations... */ - error = xfs_blockgc_free_space(mp, NULL); + /* + * Clear out all remaining COW staging extents and speculative post-EOF + * preallocations so that we don't leave inodes requiring inactivation + * cleanups during reclaim on a read-only mount. We must process every + * cached inode, so this requires a synchronous cache scan. + */ + error = xfs_blockgc_free_space(mp, &icw); if (error) { xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); return error; From d17b9737c2bc09b4ac6caf469826e5a7ce3ffab7 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 7 Dec 2021 11:24:16 +0300 Subject: [PATCH 254/868] net/qla3xxx: fix an error code in ql_adapter_up() The ql_wait_for_drvr_lock() fails and returns false, then this function should return an error code instead of returning success. The other problem is that the success path prints an error message netdev_err(ndev, "Releasing driver lock\n"); Delete that and re-order the code a little to make it more clear. Fixes: 5a4faa873782 ("[PATCH] qla3xxx NIC driver") Signed-off-by: Dan Carpenter Link: https://lore.kernel.org/r/20211207082416.GA16110@kili Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/qlogic/qla3xxx.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qla3xxx.c b/drivers/net/ethernet/qlogic/qla3xxx.c index 1e6d72adfe43..71523d747e93 100644 --- a/drivers/net/ethernet/qlogic/qla3xxx.c +++ b/drivers/net/ethernet/qlogic/qla3xxx.c @@ -3480,20 +3480,19 @@ static int ql_adapter_up(struct ql3_adapter *qdev) spin_lock_irqsave(&qdev->hw_lock, hw_flags); - err = ql_wait_for_drvr_lock(qdev); - if (err) { - err = ql_adapter_initialize(qdev); - if (err) { - netdev_err(ndev, "Unable to initialize adapter\n"); - goto err_init; - } - netdev_err(ndev, "Releasing driver lock\n"); - ql_sem_unlock(qdev, QL_DRVR_SEM_MASK); - } else { + if (!ql_wait_for_drvr_lock(qdev)) { netdev_err(ndev, "Could not acquire driver lock\n"); + err = -ENODEV; goto err_lock; } + err = ql_adapter_initialize(qdev); + if (err) { + netdev_err(ndev, "Unable to initialize adapter\n"); + goto err_init; + } + ql_sem_unlock(qdev, QL_DRVR_SEM_MASK); + spin_unlock_irqrestore(&qdev->hw_lock, hw_flags); set_bit(QL_ADAPTER_UP, &qdev->flags); From 2d4fcc5ab35fac2e995f497d62439dcbb416babc Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 17 Nov 2021 10:26:05 +0300 Subject: [PATCH 255/868] clk: versatile: clk-icst: use after free on error path This frees "name" and then tries to display in as part of the error message on the next line. Swap the order. Fixes: 1b2189f3aa50 ("clk: versatile: clk-icst: Ensure clock names are unique") Signed-off-by: Dan Carpenter Link: https://lore.kernel.org/r/20211117072604.GC5237@kili Reviewed-by: Linus Walleij Signed-off-by: Stephen Boyd --- drivers/clk/versatile/clk-icst.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/versatile/clk-icst.c b/drivers/clk/versatile/clk-icst.c index d52f976dc875..d5cb372f0901 100644 --- a/drivers/clk/versatile/clk-icst.c +++ b/drivers/clk/versatile/clk-icst.c @@ -543,8 +543,8 @@ static void __init of_syscon_icst_setup(struct device_node *np) regclk = icst_clk_setup(NULL, &icst_desc, name, parent_name, map, ctype); if (IS_ERR(regclk)) { - kfree(name); pr_err("error setting up syscon ICST clock %s\n", name); + kfree(name); return; } of_clk_add_provider(np, of_clk_src_simple_get, regclk); From 5b970dfcfee9e04e041c9eeac5dbd1ccc719c249 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 23 Nov 2021 18:06:35 +0000 Subject: [PATCH 256/868] arm64: dts: apple: t8103: Mark PCIe PERST# polarity active low in DT As the name indicates, PERST# is active low. Fix the DT description to match the HW behaviour. Fixes: ff2a8d91d80c ("arm64: apple: Add PCIe node") Link: https://lore.kernel.org/r/20211123180636.80558-3-maz@kernel.org Signed-off-by: Marc Zyngier Signed-off-by: Bjorn Helgaas Reviewed-by: Luca Ceresoli Reviewed-by: Mark Kettenis Acked-by: Arnd Bergmann --- arch/arm64/boot/dts/apple/t8103.dtsi | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/arm64/boot/dts/apple/t8103.dtsi b/arch/arm64/boot/dts/apple/t8103.dtsi index fc8b2bb06ffe..e22c9433d5e0 100644 --- a/arch/arm64/boot/dts/apple/t8103.dtsi +++ b/arch/arm64/boot/dts/apple/t8103.dtsi @@ -7,6 +7,7 @@ * Copyright The Asahi Linux Contributors */ +#include #include #include #include @@ -281,7 +282,7 @@ port00: pci@0,0 { device_type = "pci"; reg = <0x0 0x0 0x0 0x0 0x0>; - reset-gpios = <&pinctrl_ap 152 0>; + reset-gpios = <&pinctrl_ap 152 GPIO_ACTIVE_LOW>; max-link-speed = <2>; #address-cells = <3>; @@ -301,7 +302,7 @@ port01: pci@1,0 { device_type = "pci"; reg = <0x800 0x0 0x0 0x0 0x0>; - reset-gpios = <&pinctrl_ap 153 0>; + reset-gpios = <&pinctrl_ap 153 GPIO_ACTIVE_LOW>; max-link-speed = <2>; #address-cells = <3>; @@ -321,7 +322,7 @@ port02: pci@2,0 { device_type = "pci"; reg = <0x1000 0x0 0x0 0x0 0x0>; - reset-gpios = <&pinctrl_ap 33 0>; + reset-gpios = <&pinctrl_ap 33 GPIO_ACTIVE_LOW>; max-link-speed = <1>; #address-cells = <3>; From 87620512681a20ef24ece85ac21ff90c9efed37d Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 23 Nov 2021 18:06:36 +0000 Subject: [PATCH 257/868] PCI: apple: Fix PERST# polarity MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that PERST# is properly defined as active-low in the device tree, fix the driver to correctly drive the line independently of the implied polarity. Suggested-by: Pali Rohár Fixes: 1e33888fbe44 ("PCI: apple: Add initial hardware bring-up") Link: https://lore.kernel.org/r/20211123180636.80558-4-maz@kernel.org Signed-off-by: Marc Zyngier Signed-off-by: Bjorn Helgaas Reviewed-by: Luca Ceresoli --- drivers/pci/controller/pcie-apple.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/pci/controller/pcie-apple.c b/drivers/pci/controller/pcie-apple.c index c384777b0374..b090924b41fe 100644 --- a/drivers/pci/controller/pcie-apple.c +++ b/drivers/pci/controller/pcie-apple.c @@ -540,7 +540,7 @@ static int apple_pcie_setup_port(struct apple_pcie *pcie, rmw_set(PORT_APPCLK_EN, port->base + PORT_APPCLK); /* Assert PERST# before setting up the clock */ - gpiod_set_value(reset, 0); + gpiod_set_value(reset, 1); ret = apple_pcie_setup_refclk(pcie, port); if (ret < 0) @@ -551,7 +551,7 @@ static int apple_pcie_setup_port(struct apple_pcie *pcie, /* Deassert PERST# */ rmw_set(PORT_PERST_OFF, port->base + PORT_PERST); - gpiod_set_value(reset, 1); + gpiod_set_value(reset, 0); /* Wait for 100ms after PERST# deassertion (PCIe r5.0, 6.6.1) */ msleep(100); From f23ab04dd6f703e282bb2d51fe3ae14f4b88a628 Mon Sep 17 00:00:00 2001 From: Yahui Cao Date: Wed, 5 May 2021 14:18:00 -0700 Subject: [PATCH 258/868] ice: fix FDIR init missing when reset VF When VF is being reset, ice_reset_vf() will be called and FDIR resource should be released and initialized again. Fixes: 1f7ea1cd6a37 ("ice: Enable FDIR Configure for AVF") Signed-off-by: Yahui Cao Tested-by: Konrad Jankowski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c index 217ff5e9a6f1..c2431bc9d9ce 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c @@ -1617,6 +1617,7 @@ bool ice_reset_all_vfs(struct ice_pf *pf, bool is_vflr) ice_vc_set_default_allowlist(vf); ice_vf_fdir_exit(vf); + ice_vf_fdir_init(vf); /* clean VF control VSI when resetting VFs since it should be * setup only when VF creates its first FDIR rule. */ @@ -1747,6 +1748,7 @@ bool ice_reset_vf(struct ice_vf *vf, bool is_vflr) } ice_vf_fdir_exit(vf); + ice_vf_fdir_init(vf); /* clean VF control VSI when resetting VF since it should be setup * only when VF creates its first FDIR rule. */ From 2657e16d8c52fb6ffc7250b0b6536f93886e32d6 Mon Sep 17 00:00:00 2001 From: Paul Greenwalt Date: Mon, 12 Jul 2021 07:54:25 -0400 Subject: [PATCH 259/868] ice: rearm other interrupt cause register after enabling VFs The other interrupt cause register (OICR), global interrupt 0, is disabled when enabling VFs to prevent handling VFLR. If the OICR is not rearmed then the VF cannot communicate with the PF. Rearm the OICR after enabling VFs. Fixes: 916c7fdf5e93 ("ice: Separate VF VSI initialization/creation from reset flow") Signed-off-by: Paul Greenwalt Tested-by: Tony Brelinski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c index c2431bc9d9ce..6427e7ec93de 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c @@ -2023,6 +2023,10 @@ static int ice_ena_vfs(struct ice_pf *pf, u16 num_vfs) if (ret) goto err_unroll_sriov; + /* rearm global interrupts */ + if (test_and_clear_bit(ICE_OICR_INTR_DIS, pf->state)) + ice_irq_dynamic_ena(hw, NULL, NULL); + return 0; err_unroll_sriov: From 6d39ea19b0fb6cc72427c862b32d39f5af468be3 Mon Sep 17 00:00:00 2001 From: Dave Ertman Date: Tue, 12 Oct 2021 13:31:21 -0700 Subject: [PATCH 260/868] ice: Fix problems with DSCP QoS implementation The patch that implemented DSCP QoS implementation removed a bandwidth check that was used to check for a specific condition caused by some corner cases. This check should not of been removed. The same patch also added a check for when the DCBx state could be changed in relation to DSCP, but the check was erroneously added nested in a check for CEE mode, which made the check useless. Fix these problems by re-adding the bandwidth check and relocating the DSCP mode check earlier in the function that changes DCBx state in the driver. Fixes: 2a87bd73e50d ("ice: Add DSCP support") Reported-by: kernel test robot Signed-off-by: Dave Ertman Tested-by: Gurucharan G Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_dcb_nl.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_dcb_nl.c b/drivers/net/ethernet/intel/ice/ice_dcb_nl.c index 7fdeb411b6df..3eb01731e496 100644 --- a/drivers/net/ethernet/intel/ice/ice_dcb_nl.c +++ b/drivers/net/ethernet/intel/ice/ice_dcb_nl.c @@ -97,6 +97,9 @@ static int ice_dcbnl_setets(struct net_device *netdev, struct ieee_ets *ets) new_cfg->etscfg.maxtcs = pf->hw.func_caps.common_cap.maxtc; + if (!bwcfg) + new_cfg->etscfg.tcbwtable[0] = 100; + if (!bwrec) new_cfg->etsrec.tcbwtable[0] = 100; @@ -167,15 +170,18 @@ static u8 ice_dcbnl_setdcbx(struct net_device *netdev, u8 mode) if (mode == pf->dcbx_cap) return ICE_DCB_NO_HW_CHG; - pf->dcbx_cap = mode; qos_cfg = &pf->hw.port_info->qos_cfg; - if (mode & DCB_CAP_DCBX_VER_CEE) { - if (qos_cfg->local_dcbx_cfg.pfc_mode == ICE_QOS_MODE_DSCP) - return ICE_DCB_NO_HW_CHG; + + /* DSCP configuration is not DCBx negotiated */ + if (qos_cfg->local_dcbx_cfg.pfc_mode == ICE_QOS_MODE_DSCP) + return ICE_DCB_NO_HW_CHG; + + pf->dcbx_cap = mode; + + if (mode & DCB_CAP_DCBX_VER_CEE) qos_cfg->local_dcbx_cfg.dcbx_mode = ICE_DCBX_MODE_CEE; - } else { + else qos_cfg->local_dcbx_cfg.dcbx_mode = ICE_DCBX_MODE_IEEE; - } dev_info(ice_pf_to_dev(pf), "DCBx mode = 0x%x\n", mode); return ICE_DCB_HW_CHG_RST; From 28dc1b86f8ea9fd6f4c9e0b363db73ecabf84e22 Mon Sep 17 00:00:00 2001 From: Jesse Brandeburg Date: Fri, 22 Oct 2021 17:28:17 -0700 Subject: [PATCH 261/868] ice: ignore dropped packets during init If the hardware is constantly receiving unicast or broadcast packets during driver load, the device previously counted many GLV_RDPC (VSI dropped packets) events during init. This causes confusing dropped packet statistics during driver load. The dropped packets counter incrementing does stop once the driver finishes loading. Avoid this problem by baselining our statistics at the end of driver open instead of the end of probe. Fixes: cdedef59deb0 ("ice: Configure VSIs for Tx/Rx") Signed-off-by: Jesse Brandeburg Tested-by: Gurucharan G Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_main.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 4d1fc48c9744..c6d6ce52e2ca 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -5881,6 +5881,9 @@ static int ice_up_complete(struct ice_vsi *vsi) netif_carrier_on(vsi->netdev); } + /* clear this now, and the first stats read will be used as baseline */ + vsi->stat_offsets_loaded = false; + ice_service_task_schedule(pf); return 0; From 0e32ff024035b693a3304b3ffe30fba58e2ab48c Mon Sep 17 00:00:00 2001 From: Michal Swiatkowski Date: Tue, 16 Nov 2021 11:24:26 +0100 Subject: [PATCH 262/868] ice: fix choosing UDP header type In tunnels packet there can be two UDP headers: - outer which for hw should be mark as ICE_UDP_OF - inner which for hw should be mark as ICE_UDP_ILOS or as ICE_TCP_IL if inner header is of TCP type In none tunnels packet header can be: - UDP, which for hw should be mark as ICE_UDP_ILOS - TCP, which for hw should be mark as ICE_TCP_IL Change incorrect ICE_UDP_OF for none tunnel packets to ICE_UDP_ILOS. ICE_UDP_OF is incorrect for none tunnel packets and setting it leads to error from hw while adding this kind of recipe. In summary, for tunnel outer port type should always be set to ICE_UDP_OF, for none tunnel outer and tunnel inner it should always be set to ICE_UDP_ILOS. Fixes: 9e300987d4a8 ("ice: VXLAN and Geneve TC support") Signed-off-by: Michal Swiatkowski Tested-by: Sandeep Penigalapati Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_tc_lib.c | 27 ++++++++------------- 1 file changed, 10 insertions(+), 17 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_tc_lib.c b/drivers/net/ethernet/intel/ice/ice_tc_lib.c index e5d23feb6701..384439a267ad 100644 --- a/drivers/net/ethernet/intel/ice/ice_tc_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_tc_lib.c @@ -74,21 +74,13 @@ static enum ice_protocol_type ice_proto_type_from_ipv6(bool inner) return inner ? ICE_IPV6_IL : ICE_IPV6_OFOS; } -static enum ice_protocol_type -ice_proto_type_from_l4_port(bool inner, u16 ip_proto) +static enum ice_protocol_type ice_proto_type_from_l4_port(u16 ip_proto) { - if (inner) { - switch (ip_proto) { - case IPPROTO_UDP: - return ICE_UDP_ILOS; - } - } else { - switch (ip_proto) { - case IPPROTO_TCP: - return ICE_TCP_IL; - case IPPROTO_UDP: - return ICE_UDP_OF; - } + switch (ip_proto) { + case IPPROTO_TCP: + return ICE_TCP_IL; + case IPPROTO_UDP: + return ICE_UDP_ILOS; } return 0; @@ -191,8 +183,9 @@ ice_tc_fill_tunnel_outer(u32 flags, struct ice_tc_flower_fltr *fltr, i++; } - if (flags & ICE_TC_FLWR_FIELD_ENC_DEST_L4_PORT) { - list[i].type = ice_proto_type_from_l4_port(false, hdr->l3_key.ip_proto); + if ((flags & ICE_TC_FLWR_FIELD_ENC_DEST_L4_PORT) && + hdr->l3_key.ip_proto == IPPROTO_UDP) { + list[i].type = ICE_UDP_OF; list[i].h_u.l4_hdr.dst_port = hdr->l4_key.dst_port; list[i].m_u.l4_hdr.dst_port = hdr->l4_mask.dst_port; i++; @@ -317,7 +310,7 @@ ice_tc_fill_rules(struct ice_hw *hw, u32 flags, ICE_TC_FLWR_FIELD_SRC_L4_PORT)) { struct ice_tc_l4_hdr *l4_key, *l4_mask; - list[i].type = ice_proto_type_from_l4_port(inner, headers->l3_key.ip_proto); + list[i].type = ice_proto_type_from_l4_port(headers->l3_key.ip_proto); l4_key = &headers->l4_key; l4_mask = &headers->l4_mask; From de6acd1cdd4d38823b7f4adae82e8a7d62993354 Mon Sep 17 00:00:00 2001 From: Michal Swiatkowski Date: Mon, 22 Nov 2021 16:39:25 +0100 Subject: [PATCH 263/868] ice: fix adding different tunnels Adding filters with the same values inside for VXLAN and Geneve causes HW error, because it looks exactly the same. To choose between different type of tunnels new recipe is needed. Add storing tunnel types in creating recipes function and start checking it in finding function. Change getting open tunnels function to return port on correct tunnel type. This is needed to copy correct port to dummy packet. Block user from adding enc_dst_port via tc flower, because VXLAN and Geneve filters can be created only with destination port which was previously opened. Fixes: 8b032a55c1bd5 ("ice: low level support for tunnels") Signed-off-by: Michal Swiatkowski Tested-by: Sandeep Penigalapati Signed-off-by: Tony Nguyen --- .../net/ethernet/intel/ice/ice_ethtool_fdir.c | 4 ++-- drivers/net/ethernet/intel/ice/ice_fdir.c | 2 +- .../net/ethernet/intel/ice/ice_flex_pipe.c | 7 +++++-- .../net/ethernet/intel/ice/ice_flex_pipe.h | 3 ++- drivers/net/ethernet/intel/ice/ice_switch.c | 21 ++++++++++++------- drivers/net/ethernet/intel/ice/ice_tc_lib.c | 3 ++- 6 files changed, 26 insertions(+), 14 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c b/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c index 38960bcc384c..b6e7f47c8c78 100644 --- a/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c +++ b/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c @@ -1268,7 +1268,7 @@ ice_fdir_write_all_fltr(struct ice_pf *pf, struct ice_fdir_fltr *input, bool is_tun = tun == ICE_FD_HW_SEG_TUN; int err; - if (is_tun && !ice_get_open_tunnel_port(&pf->hw, &port_num)) + if (is_tun && !ice_get_open_tunnel_port(&pf->hw, &port_num, TNL_ALL)) continue; err = ice_fdir_write_fltr(pf, input, add, is_tun); if (err) @@ -1652,7 +1652,7 @@ int ice_add_fdir_ethtool(struct ice_vsi *vsi, struct ethtool_rxnfc *cmd) } /* return error if not an update and no available filters */ - fltrs_needed = ice_get_open_tunnel_port(hw, &tunnel_port) ? 2 : 1; + fltrs_needed = ice_get_open_tunnel_port(hw, &tunnel_port, TNL_ALL) ? 2 : 1; if (!ice_fdir_find_fltr_by_idx(hw, fsp->location) && ice_fdir_num_avail_fltr(hw, pf->vsi[vsi->idx]) < fltrs_needed) { dev_err(dev, "Failed to add filter. The maximum number of flow director filters has been reached.\n"); diff --git a/drivers/net/ethernet/intel/ice/ice_fdir.c b/drivers/net/ethernet/intel/ice/ice_fdir.c index cbd8424631e3..4dca009bdd50 100644 --- a/drivers/net/ethernet/intel/ice/ice_fdir.c +++ b/drivers/net/ethernet/intel/ice/ice_fdir.c @@ -924,7 +924,7 @@ ice_fdir_get_gen_prgm_pkt(struct ice_hw *hw, struct ice_fdir_fltr *input, memcpy(pkt, ice_fdir_pkt[idx].pkt, ice_fdir_pkt[idx].pkt_len); loc = pkt; } else { - if (!ice_get_open_tunnel_port(hw, &tnl_port)) + if (!ice_get_open_tunnel_port(hw, &tnl_port, TNL_ALL)) return ICE_ERR_DOES_NOT_EXIST; if (!ice_fdir_pkt[idx].tun_pkt) return ICE_ERR_PARAM; diff --git a/drivers/net/ethernet/intel/ice/ice_flex_pipe.c b/drivers/net/ethernet/intel/ice/ice_flex_pipe.c index 23cfcceb1536..6ad1c2559724 100644 --- a/drivers/net/ethernet/intel/ice/ice_flex_pipe.c +++ b/drivers/net/ethernet/intel/ice/ice_flex_pipe.c @@ -1899,9 +1899,11 @@ static struct ice_buf *ice_pkg_buf(struct ice_buf_build *bld) * ice_get_open_tunnel_port - retrieve an open tunnel port * @hw: pointer to the HW structure * @port: returns open port + * @type: type of tunnel, can be TNL_LAST if it doesn't matter */ bool -ice_get_open_tunnel_port(struct ice_hw *hw, u16 *port) +ice_get_open_tunnel_port(struct ice_hw *hw, u16 *port, + enum ice_tunnel_type type) { bool res = false; u16 i; @@ -1909,7 +1911,8 @@ ice_get_open_tunnel_port(struct ice_hw *hw, u16 *port) mutex_lock(&hw->tnl_lock); for (i = 0; i < hw->tnl.count && i < ICE_TUNNEL_MAX_ENTRIES; i++) - if (hw->tnl.tbl[i].valid && hw->tnl.tbl[i].port) { + if (hw->tnl.tbl[i].valid && hw->tnl.tbl[i].port && + (type == TNL_LAST || type == hw->tnl.tbl[i].type)) { *port = hw->tnl.tbl[i].port; res = true; break; diff --git a/drivers/net/ethernet/intel/ice/ice_flex_pipe.h b/drivers/net/ethernet/intel/ice/ice_flex_pipe.h index 344c2637facd..a2863f38fd1f 100644 --- a/drivers/net/ethernet/intel/ice/ice_flex_pipe.h +++ b/drivers/net/ethernet/intel/ice/ice_flex_pipe.h @@ -33,7 +33,8 @@ enum ice_status ice_get_sw_fv_list(struct ice_hw *hw, u8 *prot_ids, u16 ids_cnt, unsigned long *bm, struct list_head *fv_list); bool -ice_get_open_tunnel_port(struct ice_hw *hw, u16 *port); +ice_get_open_tunnel_port(struct ice_hw *hw, u16 *port, + enum ice_tunnel_type type); int ice_udp_tunnel_set_port(struct net_device *netdev, unsigned int table, unsigned int idx, struct udp_tunnel_info *ti); int ice_udp_tunnel_unset_port(struct net_device *netdev, unsigned int table, diff --git a/drivers/net/ethernet/intel/ice/ice_switch.c b/drivers/net/ethernet/intel/ice/ice_switch.c index 793f4a9fc2cd..183d93033890 100644 --- a/drivers/net/ethernet/intel/ice/ice_switch.c +++ b/drivers/net/ethernet/intel/ice/ice_switch.c @@ -3796,10 +3796,13 @@ static struct ice_protocol_entry ice_prot_id_tbl[ICE_PROTOCOL_LAST] = { * ice_find_recp - find a recipe * @hw: pointer to the hardware structure * @lkup_exts: extension sequence to match + * @tun_type: type of recipe tunnel * * Returns index of matching recipe, or ICE_MAX_NUM_RECIPES if not found. */ -static u16 ice_find_recp(struct ice_hw *hw, struct ice_prot_lkup_ext *lkup_exts) +static u16 +ice_find_recp(struct ice_hw *hw, struct ice_prot_lkup_ext *lkup_exts, + enum ice_sw_tunnel_type tun_type) { bool refresh_required = true; struct ice_sw_recipe *recp; @@ -3860,8 +3863,9 @@ static u16 ice_find_recp(struct ice_hw *hw, struct ice_prot_lkup_ext *lkup_exts) } /* If for "i"th recipe the found was never set to false * then it means we found our match + * Also tun type of recipe needs to be checked */ - if (found) + if (found && recp[i].tun_type == tun_type) return i; /* Return the recipe ID */ } } @@ -4651,11 +4655,12 @@ ice_add_adv_recipe(struct ice_hw *hw, struct ice_adv_lkup_elem *lkups, } /* Look for a recipe which matches our requested fv / mask list */ - *rid = ice_find_recp(hw, lkup_exts); + *rid = ice_find_recp(hw, lkup_exts, rinfo->tun_type); if (*rid < ICE_MAX_NUM_RECIPES) /* Success if found a recipe that match the existing criteria */ goto err_unroll; + rm->tun_type = rinfo->tun_type; /* Recipe we need does not exist, add a recipe */ status = ice_add_sw_recipe(hw, rm, profiles); if (status) @@ -4958,11 +4963,13 @@ ice_fill_adv_packet_tun(struct ice_hw *hw, enum ice_sw_tunnel_type tun_type, switch (tun_type) { case ICE_SW_TUN_VXLAN: - case ICE_SW_TUN_GENEVE: - if (!ice_get_open_tunnel_port(hw, &open_port)) + if (!ice_get_open_tunnel_port(hw, &open_port, TNL_VXLAN)) + return ICE_ERR_CFG; + break; + case ICE_SW_TUN_GENEVE: + if (!ice_get_open_tunnel_port(hw, &open_port, TNL_GENEVE)) return ICE_ERR_CFG; break; - default: /* Nothing needs to be done for this tunnel type */ return 0; @@ -5555,7 +5562,7 @@ ice_rem_adv_rule(struct ice_hw *hw, struct ice_adv_lkup_elem *lkups, if (status) return status; - rid = ice_find_recp(hw, &lkup_exts); + rid = ice_find_recp(hw, &lkup_exts, rinfo->tun_type); /* If did not find a recipe that match the existing criteria */ if (rid == ICE_MAX_NUM_RECIPES) return ICE_ERR_PARAM; diff --git a/drivers/net/ethernet/intel/ice/ice_tc_lib.c b/drivers/net/ethernet/intel/ice/ice_tc_lib.c index 384439a267ad..25cca5c4ae57 100644 --- a/drivers/net/ethernet/intel/ice/ice_tc_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_tc_lib.c @@ -795,7 +795,8 @@ ice_parse_tunnel_attr(struct net_device *dev, struct flow_rule *rule, headers->l3_mask.ttl = match.mask->ttl; } - if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_PORTS)) { + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_PORTS) && + fltr->tunnel_type != TNL_VXLAN && fltr->tunnel_type != TNL_GENEVE) { struct flow_match_ports match; flow_rule_match_enc_ports(rule, &match); From 75feae73a28020e492fbad2323245455ef69d687 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Tue, 7 Dec 2021 20:16:36 +0000 Subject: [PATCH 264/868] block: fix single bio async DIO error handling BUG: KASAN: use-after-free in io_submit_one+0x496/0x2fe0 fs/aio.c:1882 CPU: 2 PID: 15100 Comm: syz-executor873 Not tainted 5.16.0-rc1-syzk #1 Hardware name: Red Hat KVM, BIOS 1.13.0-2.module+el8.3.0+7860+a7792d29 04/01/2014 Call Trace: [...] refcount_dec_and_test include/linux/refcount.h:333 [inline] iocb_put fs/aio.c:1161 [inline] io_submit_one+0x496/0x2fe0 fs/aio.c:1882 __do_sys_io_submit fs/aio.c:1938 [inline] __se_sys_io_submit fs/aio.c:1908 [inline] __x64_sys_io_submit+0x1c7/0x4a0 fs/aio.c:1908 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x3a/0x80 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x44/0xae __blkdev_direct_IO_async() returns errors from bio_iov_iter_get_pages() directly, in which case upper layers won't be expecting ->ki_complete to be called by the block layer and will terminate the request. However, there is also bio_endio() leading to a second ->ki_complete and a double free. Fixes: 54a88eb838d37 ("block: add single bio async direct IO helper") Reported-by: George Kennedy Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/c9eb786f6cef041e159e6287de131bec0719ad5c.1638907997.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- block/fops.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/block/fops.c b/block/fops.c index ad732a36f9b3..8d329ca56b0f 100644 --- a/block/fops.c +++ b/block/fops.c @@ -340,8 +340,7 @@ static ssize_t __blkdev_direct_IO_async(struct kiocb *iocb, } else { ret = bio_iov_iter_get_pages(bio, iter); if (unlikely(ret)) { - bio->bi_status = BLK_STS_IOERR; - bio_endio(bio); + bio_put(bio); return ret; } } From d43b75fbc23f0ac1ef9c14a5a166d3ccb761a451 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Fri, 26 Nov 2021 15:36:12 +0100 Subject: [PATCH 265/868] vrf: don't run conntrack on vrf with !dflt qdisc After the below patch, the conntrack attached to skb is set to "notrack" in the context of vrf device, for locally generated packets. But this is true only when the default qdisc is set to the vrf device. When changing the qdisc, notrack is not set anymore. In fact, there is a shortcut in the vrf driver, when the default qdisc is set, see commit dcdd43c41e60 ("net: vrf: performance improvements for IPv4") for more details. This patch ensures that the behavior is always the same, whatever the qdisc is. To demonstrate the difference, a new test is added in conntrack_vrf.sh. Fixes: 8c9c296adfae ("vrf: run conntrack only in context of lower/physdev for locally generated packets") Signed-off-by: Nicolas Dichtel Acked-by: Florian Westphal Reviewed-by: David Ahern Signed-off-by: Pablo Neira Ayuso --- drivers/net/vrf.c | 8 ++--- .../selftests/netfilter/conntrack_vrf.sh | 30 ++++++++++++++++--- 2 files changed, 30 insertions(+), 8 deletions(-) diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index ccf677015d5b..38c2f0dbe795 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -768,8 +768,6 @@ static struct sk_buff *vrf_ip6_out_direct(struct net_device *vrf_dev, skb->dev = vrf_dev; - vrf_nf_set_untracked(skb); - err = nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, sk, skb, NULL, vrf_dev, vrf_ip6_out_direct_finish); @@ -790,6 +788,8 @@ static struct sk_buff *vrf_ip6_out(struct net_device *vrf_dev, if (rt6_need_strict(&ipv6_hdr(skb)->daddr)) return skb; + vrf_nf_set_untracked(skb); + if (qdisc_tx_is_default(vrf_dev) || IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) return vrf_ip6_out_direct(vrf_dev, sk, skb); @@ -998,8 +998,6 @@ static struct sk_buff *vrf_ip_out_direct(struct net_device *vrf_dev, skb->dev = vrf_dev; - vrf_nf_set_untracked(skb); - err = nf_hook(NFPROTO_IPV4, NF_INET_LOCAL_OUT, net, sk, skb, NULL, vrf_dev, vrf_ip_out_direct_finish); @@ -1021,6 +1019,8 @@ static struct sk_buff *vrf_ip_out(struct net_device *vrf_dev, ipv4_is_lbcast(ip_hdr(skb)->daddr)) return skb; + vrf_nf_set_untracked(skb); + if (qdisc_tx_is_default(vrf_dev) || IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) return vrf_ip_out_direct(vrf_dev, sk, skb); diff --git a/tools/testing/selftests/netfilter/conntrack_vrf.sh b/tools/testing/selftests/netfilter/conntrack_vrf.sh index 91f3ef0f1192..8b5ea9234588 100755 --- a/tools/testing/selftests/netfilter/conntrack_vrf.sh +++ b/tools/testing/selftests/netfilter/conntrack_vrf.sh @@ -150,11 +150,27 @@ EOF # oifname is the vrf device. test_masquerade_vrf() { + local qdisc=$1 + + if [ "$qdisc" != "default" ]; then + tc -net $ns0 qdisc add dev tvrf root $qdisc + fi + ip netns exec $ns0 conntrack -F 2>/dev/null ip netns exec $ns0 nft -f - < Date: Sat, 27 Nov 2021 11:33:37 +0100 Subject: [PATCH 266/868] nft_set_pipapo: Fix bucket load in AVX2 lookup routine for six 8-bit groups The sixth byte of packet data has to be looked up in the sixth group, not in the seventh one, even if we load the bucket data into ymm6 (and not ymm5, for convenience of tracking stalls). Without this fix, matching on a MAC address as first field of a set, if 8-bit groups are selected (due to a small set size) would fail, that is, the given MAC address would never match. Reported-by: Nikita Yushchenko Cc: # 5.6.x Fixes: 7400b063969b ("nft_set_pipapo: Introduce AVX2-based lookup implementation") Signed-off-by: Stefano Brivio Tested-By: Nikita Yushchenko Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_set_pipapo_avx2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nft_set_pipapo_avx2.c b/net/netfilter/nft_set_pipapo_avx2.c index e517663e0cd1..6f4116e72958 100644 --- a/net/netfilter/nft_set_pipapo_avx2.c +++ b/net/netfilter/nft_set_pipapo_avx2.c @@ -886,7 +886,7 @@ static int nft_pipapo_avx2_lookup_8b_6(unsigned long *map, unsigned long *fill, NFT_PIPAPO_AVX2_BUCKET_LOAD8(4, lt, 4, pkt[4], bsize); NFT_PIPAPO_AVX2_AND(5, 0, 1); - NFT_PIPAPO_AVX2_BUCKET_LOAD8(6, lt, 6, pkt[5], bsize); + NFT_PIPAPO_AVX2_BUCKET_LOAD8(6, lt, 5, pkt[5], bsize); NFT_PIPAPO_AVX2_AND(7, 2, 3); /* Stall */ From 0de53b0ffb5b22b52c1e0bd4d9e18cbbce5801d0 Mon Sep 17 00:00:00 2001 From: Stefano Brivio Date: Sat, 27 Nov 2021 11:33:38 +0100 Subject: [PATCH 267/868] selftests: netfilter: Add correctness test for mac,net set type The existing net,mac test didn't cover the issue recently reported by Nikita Yushchenko, where MAC addresses wouldn't match if given as first field of a concatenated set with AVX2 and 8-bit groups, because there's a different code path covering the lookup of six 8-bit groups (MAC addresses) if that's the first field. Add a similar mac,net test, with MAC address and IPv4 address swapped in the set specification. Signed-off-by: Stefano Brivio Signed-off-by: Pablo Neira Ayuso --- .../selftests/netfilter/nft_concat_range.sh | 24 ++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/netfilter/nft_concat_range.sh b/tools/testing/selftests/netfilter/nft_concat_range.sh index 5a4938d6dcf2..ed61f6cab60f 100755 --- a/tools/testing/selftests/netfilter/nft_concat_range.sh +++ b/tools/testing/selftests/netfilter/nft_concat_range.sh @@ -23,8 +23,8 @@ TESTS="reported_issues correctness concurrency timeout" # Set types, defined by TYPE_ variables below TYPES="net_port port_net net6_port port_proto net6_port_mac net6_port_mac_proto - net_port_net net_mac net_mac_icmp net6_mac_icmp net6_port_net6_port - net_port_mac_proto_net" + net_port_net net_mac mac_net net_mac_icmp net6_mac_icmp + net6_port_net6_port net_port_mac_proto_net" # Reported bugs, also described by TYPE_ variables below BUGS="flush_remove_add" @@ -277,6 +277,23 @@ perf_entries 1000 perf_proto ipv4 " +TYPE_mac_net=" +display mac,net +type_spec ether_addr . ipv4_addr +chain_spec ether saddr . ip saddr +dst +src mac addr4 +start 1 +count 5 +src_delta 2000 +tools sendip nc bash +proto udp + +race_repeat 0 + +perf_duration 0 +" + TYPE_net_mac_icmp=" display net,mac - ICMP type_spec ipv4_addr . ether_addr @@ -984,7 +1001,8 @@ format() { fi done for f in ${src}; do - __expr="${__expr} . " + [ "${__expr}" != "{ " ] && __expr="${__expr} . " + __start="$(eval format_"${f}" "${srcstart}")" __end="$(eval format_"${f}" "${srcend}")" From 962e5a40358787105f126ab1dc01604da3d169e9 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 30 Nov 2021 11:34:04 +0100 Subject: [PATCH 268/868] netfilter: nft_exthdr: break evaluation if setting TCP option fails Break rule evaluation on malformed TCP options. Fixes: 99d1712bc41c ("netfilter: exthdr: tcp option set support") Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_exthdr.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c index af4ee874a067..dbe1f2e7dd9e 100644 --- a/net/netfilter/nft_exthdr.c +++ b/net/netfilter/nft_exthdr.c @@ -236,7 +236,7 @@ static void nft_exthdr_tcp_set_eval(const struct nft_expr *expr, tcph = nft_tcp_header_pointer(pkt, sizeof(buff), buff, &tcphdr_len); if (!tcph) - return; + goto err; opt = (u8 *)tcph; for (i = sizeof(*tcph); i < tcphdr_len - 1; i += optl) { @@ -251,16 +251,16 @@ static void nft_exthdr_tcp_set_eval(const struct nft_expr *expr, continue; if (i + optl > tcphdr_len || priv->len + priv->offset > optl) - return; + goto err; if (skb_ensure_writable(pkt->skb, nft_thoff(pkt) + i + priv->len)) - return; + goto err; tcph = nft_tcp_header_pointer(pkt, sizeof(buff), buff, &tcphdr_len); if (!tcph) - return; + goto err; offset = i + priv->offset; @@ -303,6 +303,9 @@ static void nft_exthdr_tcp_set_eval(const struct nft_expr *expr, return; } + return; +err: + regs->verdict.code = NFT_BREAK; } static void nft_exthdr_sctp_eval(const struct nft_expr *expr, From d46cea0e6933da93c5373a46e3dc7e5d0e56bedb Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 3 Dec 2021 15:33:23 +0100 Subject: [PATCH 269/868] selftests: netfilter: switch zone stress to socat centos9 has nmap-ncat which doesn't like the '-q' option, use socat. While at it, mark test skipped if needed tools are missing. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- .../selftests/netfilter/nft_zones_many.sh | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/tools/testing/selftests/netfilter/nft_zones_many.sh b/tools/testing/selftests/netfilter/nft_zones_many.sh index ac646376eb01..04633119b29a 100755 --- a/tools/testing/selftests/netfilter/nft_zones_many.sh +++ b/tools/testing/selftests/netfilter/nft_zones_many.sh @@ -18,11 +18,17 @@ cleanup() ip netns del $ns } -ip netns add $ns -if [ $? -ne 0 ];then - echo "SKIP: Could not create net namespace $gw" - exit $ksft_skip -fi +checktool (){ + if ! $1 > /dev/null 2>&1; then + echo "SKIP: Could not $2" + exit $ksft_skip + fi +} + +checktool "nft --version" "run test without nft tool" +checktool "ip -Version" "run test without ip tool" +checktool "socat -V" "run test without socat tool" +checktool "ip netns add $ns" "create net namespace" trap cleanup EXIT @@ -71,7 +77,8 @@ EOF local start=$(date +%s%3N) i=$((i + 10000)) j=$((j + 1)) - dd if=/dev/zero of=/dev/stdout bs=8k count=10000 2>/dev/null | ip netns exec "$ns" nc -w 1 -q 1 -u -p 12345 127.0.0.1 12345 > /dev/null + # nft rule in output places each packet in a different zone. + dd if=/dev/zero of=/dev/stdout bs=8k count=10000 2>/dev/null | ip netns exec "$ns" socat STDIN UDP:127.0.0.1:12345,sourceport=12345 if [ $? -ne 0 ] ;then ret=1 break From 802a7dc5cf1bef06f7b290ce76d478138408d6b1 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 7 Dec 2021 10:03:23 -0800 Subject: [PATCH 270/868] netfilter: conntrack: annotate data-races around ct->timeout (struct nf_conn)->timeout can be read/written locklessly, add READ_ONCE()/WRITE_ONCE() to prevent load/store tearing. BUG: KCSAN: data-race in __nf_conntrack_alloc / __nf_conntrack_find_get write to 0xffff888132e78c08 of 4 bytes by task 6029 on cpu 0: __nf_conntrack_alloc+0x158/0x280 net/netfilter/nf_conntrack_core.c:1563 init_conntrack+0x1da/0xb30 net/netfilter/nf_conntrack_core.c:1635 resolve_normal_ct+0x502/0x610 net/netfilter/nf_conntrack_core.c:1746 nf_conntrack_in+0x1c5/0x88f net/netfilter/nf_conntrack_core.c:1901 ipv6_conntrack_local+0x19/0x20 net/netfilter/nf_conntrack_proto.c:414 nf_hook_entry_hookfn include/linux/netfilter.h:142 [inline] nf_hook_slow+0x72/0x170 net/netfilter/core.c:619 nf_hook include/linux/netfilter.h:262 [inline] NF_HOOK include/linux/netfilter.h:305 [inline] ip6_xmit+0xa3a/0xa60 net/ipv6/ip6_output.c:324 inet6_csk_xmit+0x1a2/0x1e0 net/ipv6/inet6_connection_sock.c:135 __tcp_transmit_skb+0x132a/0x1840 net/ipv4/tcp_output.c:1402 tcp_transmit_skb net/ipv4/tcp_output.c:1420 [inline] tcp_write_xmit+0x1450/0x4460 net/ipv4/tcp_output.c:2680 __tcp_push_pending_frames+0x68/0x1c0 net/ipv4/tcp_output.c:2864 tcp_push_pending_frames include/net/tcp.h:1897 [inline] tcp_data_snd_check+0x62/0x2e0 net/ipv4/tcp_input.c:5452 tcp_rcv_established+0x880/0x10e0 net/ipv4/tcp_input.c:5947 tcp_v6_do_rcv+0x36e/0xa50 net/ipv6/tcp_ipv6.c:1521 sk_backlog_rcv include/net/sock.h:1030 [inline] __release_sock+0xf2/0x270 net/core/sock.c:2768 release_sock+0x40/0x110 net/core/sock.c:3300 sk_stream_wait_memory+0x435/0x700 net/core/stream.c:145 tcp_sendmsg_locked+0xb85/0x25a0 net/ipv4/tcp.c:1402 tcp_sendmsg+0x2c/0x40 net/ipv4/tcp.c:1440 inet6_sendmsg+0x5f/0x80 net/ipv6/af_inet6.c:644 sock_sendmsg_nosec net/socket.c:704 [inline] sock_sendmsg net/socket.c:724 [inline] __sys_sendto+0x21e/0x2c0 net/socket.c:2036 __do_sys_sendto net/socket.c:2048 [inline] __se_sys_sendto net/socket.c:2044 [inline] __x64_sys_sendto+0x74/0x90 net/socket.c:2044 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x44/0xd0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x44/0xae read to 0xffff888132e78c08 of 4 bytes by task 17446 on cpu 1: nf_ct_is_expired include/net/netfilter/nf_conntrack.h:286 [inline] ____nf_conntrack_find net/netfilter/nf_conntrack_core.c:776 [inline] __nf_conntrack_find_get+0x1c7/0xac0 net/netfilter/nf_conntrack_core.c:807 resolve_normal_ct+0x273/0x610 net/netfilter/nf_conntrack_core.c:1734 nf_conntrack_in+0x1c5/0x88f net/netfilter/nf_conntrack_core.c:1901 ipv6_conntrack_local+0x19/0x20 net/netfilter/nf_conntrack_proto.c:414 nf_hook_entry_hookfn include/linux/netfilter.h:142 [inline] nf_hook_slow+0x72/0x170 net/netfilter/core.c:619 nf_hook include/linux/netfilter.h:262 [inline] NF_HOOK include/linux/netfilter.h:305 [inline] ip6_xmit+0xa3a/0xa60 net/ipv6/ip6_output.c:324 inet6_csk_xmit+0x1a2/0x1e0 net/ipv6/inet6_connection_sock.c:135 __tcp_transmit_skb+0x132a/0x1840 net/ipv4/tcp_output.c:1402 __tcp_send_ack+0x1fd/0x300 net/ipv4/tcp_output.c:3956 tcp_send_ack+0x23/0x30 net/ipv4/tcp_output.c:3962 __tcp_ack_snd_check+0x2d8/0x510 net/ipv4/tcp_input.c:5478 tcp_ack_snd_check net/ipv4/tcp_input.c:5523 [inline] tcp_rcv_established+0x8c2/0x10e0 net/ipv4/tcp_input.c:5948 tcp_v6_do_rcv+0x36e/0xa50 net/ipv6/tcp_ipv6.c:1521 sk_backlog_rcv include/net/sock.h:1030 [inline] __release_sock+0xf2/0x270 net/core/sock.c:2768 release_sock+0x40/0x110 net/core/sock.c:3300 tcp_sendpage+0x94/0xb0 net/ipv4/tcp.c:1114 inet_sendpage+0x7f/0xc0 net/ipv4/af_inet.c:833 rds_tcp_xmit+0x376/0x5f0 net/rds/tcp_send.c:118 rds_send_xmit+0xbed/0x1500 net/rds/send.c:367 rds_send_worker+0x43/0x200 net/rds/threads.c:200 process_one_work+0x3fc/0x980 kernel/workqueue.c:2298 worker_thread+0x616/0xa70 kernel/workqueue.c:2445 kthread+0x2c7/0x2e0 kernel/kthread.c:327 ret_from_fork+0x1f/0x30 value changed: 0x00027cc2 -> 0x00000000 Reported by Kernel Concurrency Sanitizer on: CPU: 1 PID: 17446 Comm: kworker/u4:5 Tainted: G W 5.16.0-rc4-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Workqueue: krdsd rds_send_worker Note: I chose an arbitrary commit for the Fixes: tag, because I do not think we need to backport this fix to very old kernels. Fixes: e37542ba111f ("netfilter: conntrack: avoid possible false sharing") Signed-off-by: Eric Dumazet Reported-by: syzbot Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack.h | 6 +++--- net/netfilter/nf_conntrack_core.c | 6 +++--- net/netfilter/nf_conntrack_netlink.c | 2 +- net/netfilter/nf_flow_table_core.c | 4 ++-- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index cc663c68ddc4..d24b0a34c8f0 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -276,14 +276,14 @@ static inline bool nf_is_loopback_packet(const struct sk_buff *skb) /* jiffies until ct expires, 0 if already expired */ static inline unsigned long nf_ct_expires(const struct nf_conn *ct) { - s32 timeout = ct->timeout - nfct_time_stamp; + s32 timeout = READ_ONCE(ct->timeout) - nfct_time_stamp; return timeout > 0 ? timeout : 0; } static inline bool nf_ct_is_expired(const struct nf_conn *ct) { - return (__s32)(ct->timeout - nfct_time_stamp) <= 0; + return (__s32)(READ_ONCE(ct->timeout) - nfct_time_stamp) <= 0; } /* use after obtaining a reference count */ @@ -302,7 +302,7 @@ static inline bool nf_ct_should_gc(const struct nf_conn *ct) static inline void nf_ct_offload_timeout(struct nf_conn *ct) { if (nf_ct_expires(ct) < NF_CT_DAY / 2) - ct->timeout = nfct_time_stamp + NF_CT_DAY; + WRITE_ONCE(ct->timeout, nfct_time_stamp + NF_CT_DAY); } struct kernel_param; diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 770a63103c7a..4712a90a1820 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -684,7 +684,7 @@ bool nf_ct_delete(struct nf_conn *ct, u32 portid, int report) tstamp = nf_conn_tstamp_find(ct); if (tstamp) { - s32 timeout = ct->timeout - nfct_time_stamp; + s32 timeout = READ_ONCE(ct->timeout) - nfct_time_stamp; tstamp->stop = ktime_get_real_ns(); if (timeout < 0) @@ -1036,7 +1036,7 @@ static int nf_ct_resolve_clash_harder(struct sk_buff *skb, u32 repl_idx) } /* We want the clashing entry to go away real soon: 1 second timeout. */ - loser_ct->timeout = nfct_time_stamp + HZ; + WRITE_ONCE(loser_ct->timeout, nfct_time_stamp + HZ); /* IPS_NAT_CLASH removes the entry automatically on the first * reply. Also prevents UDP tracker from moving the entry to @@ -1560,7 +1560,7 @@ __nf_conntrack_alloc(struct net *net, /* save hash for reusing when confirming */ *(unsigned long *)(&ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev) = hash; ct->status = 0; - ct->timeout = 0; + WRITE_ONCE(ct->timeout, 0); write_pnet(&ct->ct_net, net); memset(&ct->__nfct_init_offset, 0, offsetof(struct nf_conn, proto) - diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index c7708bde057c..81d03acf68d4 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -1998,7 +1998,7 @@ static int ctnetlink_change_timeout(struct nf_conn *ct, if (timeout > INT_MAX) timeout = INT_MAX; - ct->timeout = nfct_time_stamp + (u32)timeout; + WRITE_ONCE(ct->timeout, nfct_time_stamp + (u32)timeout); if (test_bit(IPS_DYING_BIT, &ct->status)) return -ETIME; diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c index 87a7388b6c89..ed37bb9b4e58 100644 --- a/net/netfilter/nf_flow_table_core.c +++ b/net/netfilter/nf_flow_table_core.c @@ -201,8 +201,8 @@ static void flow_offload_fixup_ct_timeout(struct nf_conn *ct) if (timeout < 0) timeout = 0; - if (nf_flow_timeout_delta(ct->timeout) > (__s32)timeout) - ct->timeout = nfct_time_stamp + timeout; + if (nf_flow_timeout_delta(READ_ONCE(ct->timeout)) > (__s32)timeout) + WRITE_ONCE(ct->timeout, nfct_time_stamp + timeout); } static void flow_offload_fixup_ct_state(struct nf_conn *ct) From 54baf56eaa40aa5cdcd02b3c20d593e4e1211220 Mon Sep 17 00:00:00 2001 From: Mike Tipton Date: Mon, 8 Nov 2021 20:34:38 -0800 Subject: [PATCH 271/868] clk: Don't parent clks until the parent is fully registered Before commit fc0c209c147f ("clk: Allow parents to be specified without string names") child clks couldn't find their parent until the parent clk was added to a list in __clk_core_init(). After that commit, child clks can reference their parent clks directly via a clk_hw pointer, or they can lookup that clk_hw pointer via DT if the parent clk is registered with an OF clk provider. The common clk framework treats hw->core being non-NULL as "the clk is registered" per the logic within clk_core_fill_parent_index(): parent = entry->hw->core; /* * We have a direct reference but it isn't registered yet? * Orphan it and let clk_reparent() update the orphan status * when the parent is registered. */ if (!parent) Therefore we need to be extra careful to not set hw->core until the clk is fully registered with the clk framework. Otherwise we can get into a situation where a child finds a parent clk and we move the child clk off the orphan list when the parent isn't actually registered, wrecking our enable accounting and breaking critical clks. Consider the following scenario: CPU0 CPU1 ---- ---- struct clk_hw clkBad; struct clk_hw clkA; clkA.init.parent_hws = { &clkBad }; clk_hw_register(&clkA) clk_hw_register(&clkBad) ... __clk_register() hw->core = core ... __clk_register() __clk_core_init() clk_prepare_lock() __clk_init_parent() clk_core_get_parent_by_index() clk_core_fill_parent_index() if (entry->hw) { parent = entry->hw->core; At this point, 'parent' points to clkBad even though clkBad hasn't been fully registered yet. Ouch! A similar problem can happen if a clk controller registers orphan clks that are referenced in the DT node of another clk controller. Let's fix all this by only setting the hw->core pointer underneath the clk prepare lock in __clk_core_init(). This way we know that clk_core_fill_parent_index() can't see hw->core be non-NULL until the clk is fully registered. Fixes: fc0c209c147f ("clk: Allow parents to be specified without string names") Signed-off-by: Mike Tipton Link: https://lore.kernel.org/r/20211109043438.4639-1-quic_mdtipton@quicinc.com [sboyd@kernel.org: Reword commit text, update comment] Signed-off-by: Stephen Boyd --- drivers/clk/clk.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c index f467d63bbf1e..566ee2c78709 100644 --- a/drivers/clk/clk.c +++ b/drivers/clk/clk.c @@ -3418,6 +3418,14 @@ static int __clk_core_init(struct clk_core *core) clk_prepare_lock(); + /* + * Set hw->core after grabbing the prepare_lock to synchronize with + * callers of clk_core_fill_parent_index() where we treat hw->core + * being NULL as the clk not being registered yet. This is crucial so + * that clks aren't parented until their parent is fully registered. + */ + core->hw->core = core; + ret = clk_pm_runtime_get(core); if (ret) goto unlock; @@ -3582,8 +3590,10 @@ static int __clk_core_init(struct clk_core *core) out: clk_pm_runtime_put(core); unlock: - if (ret) + if (ret) { hlist_del_init(&core->child_node); + core->hw->core = NULL; + } clk_prepare_unlock(); @@ -3847,7 +3857,6 @@ __clk_register(struct device *dev, struct device_node *np, struct clk_hw *hw) core->num_parents = init->num_parents; core->min_rate = 0; core->max_rate = ULONG_MAX; - hw->core = core; ret = clk_core_populate_parent_map(core, init); if (ret) @@ -3865,7 +3874,7 @@ __clk_register(struct device *dev, struct device_node *np, struct clk_hw *hw) goto fail_create_clk; } - clk_core_link_consumer(hw->core, hw->clk); + clk_core_link_consumer(core, hw->clk); ret = __clk_core_init(core); if (!ret) From d76c51f976ed1095dcd8c5c85ec9d8fed77a3e05 Mon Sep 17 00:00:00 2001 From: Vadim Fedorenko Date: Tue, 7 Dec 2021 00:39:31 +0300 Subject: [PATCH 272/868] selftests: tls: add missing AES-CCM cipher tests Add tests for TLSv1.2 and TLSv1.3 with AES-CCM cipher. Signed-off-by: Vadim Fedorenko Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/tls.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c index 8a22db0cca49..fb1bb402ee10 100644 --- a/tools/testing/selftests/net/tls.c +++ b/tools/testing/selftests/net/tls.c @@ -31,6 +31,7 @@ struct tls_crypto_info_keys { struct tls12_crypto_info_chacha20_poly1305 chacha20; struct tls12_crypto_info_sm4_gcm sm4gcm; struct tls12_crypto_info_sm4_ccm sm4ccm; + struct tls12_crypto_info_aes_ccm_128 aesccm128; }; size_t len; }; @@ -61,6 +62,11 @@ static void tls_crypto_info_init(uint16_t tls_version, uint16_t cipher_type, tls12->sm4ccm.info.version = tls_version; tls12->sm4ccm.info.cipher_type = cipher_type; break; + case TLS_CIPHER_AES_CCM_128: + tls12->len = sizeof(struct tls12_crypto_info_aes_ccm_128); + tls12->aesccm128.info.version = tls_version; + tls12->aesccm128.info.cipher_type = cipher_type; + break; default: break; } @@ -261,6 +267,18 @@ FIXTURE_VARIANT_ADD(tls, 13_sm4_ccm) .cipher_type = TLS_CIPHER_SM4_CCM, }; +FIXTURE_VARIANT_ADD(tls, 12_aes_ccm) +{ + .tls_version = TLS_1_2_VERSION, + .cipher_type = TLS_CIPHER_AES_CCM_128, +}; + +FIXTURE_VARIANT_ADD(tls, 13_aes_ccm) +{ + .tls_version = TLS_1_3_VERSION, + .cipher_type = TLS_CIPHER_AES_CCM_128, +}; + FIXTURE_SETUP(tls) { struct tls_crypto_info_keys tls12; From 13bf99ab2130783e2b1988ef415585e3af7df97b Mon Sep 17 00:00:00 2001 From: Vadim Fedorenko Date: Tue, 7 Dec 2021 00:39:32 +0300 Subject: [PATCH 273/868] selftests: tls: add missing AES256-GCM cipher Add tests for TLSv1.2 and TLSv1.3 with AES256-GCM cipher Signed-off-by: Vadim Fedorenko Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/tls.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c index fb1bb402ee10..6e468e0f42f7 100644 --- a/tools/testing/selftests/net/tls.c +++ b/tools/testing/selftests/net/tls.c @@ -32,6 +32,7 @@ struct tls_crypto_info_keys { struct tls12_crypto_info_sm4_gcm sm4gcm; struct tls12_crypto_info_sm4_ccm sm4ccm; struct tls12_crypto_info_aes_ccm_128 aesccm128; + struct tls12_crypto_info_aes_gcm_256 aesgcm256; }; size_t len; }; @@ -67,6 +68,11 @@ static void tls_crypto_info_init(uint16_t tls_version, uint16_t cipher_type, tls12->aesccm128.info.version = tls_version; tls12->aesccm128.info.cipher_type = cipher_type; break; + case TLS_CIPHER_AES_GCM_256: + tls12->len = sizeof(struct tls12_crypto_info_aes_gcm_256); + tls12->aesgcm256.info.version = tls_version; + tls12->aesgcm256.info.cipher_type = cipher_type; + break; default: break; } @@ -279,6 +285,18 @@ FIXTURE_VARIANT_ADD(tls, 13_aes_ccm) .cipher_type = TLS_CIPHER_AES_CCM_128, }; +FIXTURE_VARIANT_ADD(tls, 12_aes_gcm_256) +{ + .tls_version = TLS_1_2_VERSION, + .cipher_type = TLS_CIPHER_AES_GCM_256, +}; + +FIXTURE_VARIANT_ADD(tls, 13_aes_gcm_256) +{ + .tls_version = TLS_1_3_VERSION, + .cipher_type = TLS_CIPHER_AES_GCM_256, +}; + FIXTURE_SETUP(tls) { struct tls_crypto_info_keys tls12; From 6ebe4b350833a535d1df02591911b5f5fba3b275 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Mon, 6 Dec 2021 17:17:23 +0100 Subject: [PATCH 274/868] MAINTAINERS: net: mlxsw: Remove Jiri as a maintainer, add myself Jiri has moved on and will not carry out the mlxsw maintainership duty any longer. Add myself as a co-maintainer instead. Signed-off-by: Petr Machata Acked-by: Jiri Pirko Reviewed-by: Ido Schimmel Link: https://lore.kernel.org/r/45b54312cdebaf65c5d110b15a5dd2df795bf2be.1638807297.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index faa9c34d837d..7e51081b6708 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -12180,8 +12180,8 @@ F: drivers/net/ethernet/mellanox/mlx5/core/fpga/* F: include/linux/mlx5/mlx5_ifc_fpga.h MELLANOX ETHERNET SWITCH DRIVERS -M: Jiri Pirko M: Ido Schimmel +M: Petr Machata L: netdev@vger.kernel.org S: Supported W: http://www.mellanox.com From 51a08bdeca27988a17c87b87d8e64ffecbd2a172 Mon Sep 17 00:00:00 2001 From: Vincent Whitchurch Date: Tue, 7 Dec 2021 12:54:19 +0100 Subject: [PATCH 275/868] cifs: Fix crash on unload of cifs_arc4.ko The exit function is wrongly placed in the __init section and this leads to a crash when the module is unloaded. Just remove both the init and exit functions since this module does not need them. Fixes: 71c02863246167b3d ("cifs: fork arc4 and create a separate module...") Signed-off-by: Vincent Whitchurch Acked-by: Ronnie Sahlberg Acked-by: Paulo Alcantara (SUSE) Cc: stable@vger.kernel.org # 5.15 Signed-off-by: Steve French --- fs/smbfs_common/cifs_arc4.c | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/fs/smbfs_common/cifs_arc4.c b/fs/smbfs_common/cifs_arc4.c index 85ba15a60b13..043e4cb839fa 100644 --- a/fs/smbfs_common/cifs_arc4.c +++ b/fs/smbfs_common/cifs_arc4.c @@ -72,16 +72,3 @@ void cifs_arc4_crypt(struct arc4_ctx *ctx, u8 *out, const u8 *in, unsigned int l ctx->y = y; } EXPORT_SYMBOL_GPL(cifs_arc4_crypt); - -static int __init -init_smbfs_common(void) -{ - return 0; -} -static void __init -exit_smbfs_common(void) -{ -} - -module_init(init_smbfs_common) -module_exit(exit_smbfs_common) From e6f60c51f0435862020bcd2d1e3257caaafe5650 Mon Sep 17 00:00:00 2001 From: Ameer Hamza Date: Sun, 5 Dec 2021 23:38:10 +0500 Subject: [PATCH 276/868] gve: fix for null pointer dereference. Avoid passing NULL skb to __skb_put() function call if napi_alloc_skb() returns NULL. Fixes: 37149e9374bf ("gve: Implement packet continuation for RX.") Signed-off-by: Ameer Hamza Link: https://lore.kernel.org/r/20211205183810.8299-1-amhamza.mgc@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/google/gve/gve_utils.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/google/gve/gve_utils.c b/drivers/net/ethernet/google/gve/gve_utils.c index 88ca49cbc1e2..d57508bc4307 100644 --- a/drivers/net/ethernet/google/gve/gve_utils.c +++ b/drivers/net/ethernet/google/gve/gve_utils.c @@ -68,6 +68,9 @@ struct sk_buff *gve_rx_copy(struct net_device *dev, struct napi_struct *napi, set_protocol = ctx->curr_frag_cnt == ctx->expected_frag_cnt - 1; } else { skb = napi_alloc_skb(napi, len); + + if (unlikely(!skb)) + return NULL; set_protocol = true; } __skb_put(skb, len); From a97770cc4016c2733bcef9dbe3d5b1ad02d13356 Mon Sep 17 00:00:00 2001 From: Yanteng Si Date: Mon, 6 Dec 2021 16:12:27 +0800 Subject: [PATCH 277/868] net: phy: Remove unnecessary indentation in the comments of phy_device Fix warning as: linux-next/Documentation/networking/kapi:122: ./include/linux/phy.h:543: WARNING: Unexpected indentation. linux-next/Documentation/networking/kapi:122: ./include/linux/phy.h:544: WARNING: Block quote ends without a blank line; unexpected unindent. linux-next/Documentation/networking/kapi:122: ./include/linux/phy.h:546: WARNING: Unexpected indentation. Suggested-by: Akira Yokosawa Signed-off-by: Yanteng Si Signed-off-by: Jakub Kicinski --- include/linux/phy.h | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/include/linux/phy.h b/include/linux/phy.h index 96e43fbb2dd8..cbf03a5f9cf5 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -538,11 +538,12 @@ struct macsec_ops; * @mac_managed_pm: Set true if MAC driver takes of suspending/resuming PHY * @state: State of the PHY for management purposes * @dev_flags: Device-specific flags used by the PHY driver. - * Bits [15:0] are free to use by the PHY driver to communicate - * driver specific behavior. - * Bits [23:16] are currently reserved for future use. - * Bits [31:24] are reserved for defining generic - * PHY driver behavior. + * + * - Bits [15:0] are free to use by the PHY driver to communicate + * driver specific behavior. + * - Bits [23:16] are currently reserved for future use. + * - Bits [31:24] are reserved for defining generic + * PHY driver behavior. * @irq: IRQ number of the PHY's interrupt (-1 if none) * @phy_timer: The timer for handling the state machine * @phylink: Pointer to phylink instance for this PHY From c35e8de704560846dab964a2df2e548818a424d3 Mon Sep 17 00:00:00 2001 From: Yanteng Si Date: Mon, 6 Dec 2021 16:12:28 +0800 Subject: [PATCH 278/868] net: phy: Add the missing blank line in the phylink_suspend comment Fix warning as: Documentation/networking/kapi:147: ./drivers/net/phy/phylink.c:1657: WARNING: Unexpected indentation. Documentation/networking/kapi:147: ./drivers/net/phy/phylink.c:1658: WARNING: Block quote ends without a blank line; unexpected unindent. Signed-off-by: Yanteng Si Signed-off-by: Jakub Kicinski --- drivers/net/phy/phylink.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c index 5904546acae6..ea82ea5660e7 100644 --- a/drivers/net/phy/phylink.c +++ b/drivers/net/phy/phylink.c @@ -1388,6 +1388,7 @@ EXPORT_SYMBOL_GPL(phylink_stop); * @mac_wol: true if the MAC needs to receive packets for Wake-on-Lan * * Handle a network device suspend event. There are several cases: + * * - If Wake-on-Lan is not active, we can bring down the link between * the MAC and PHY by calling phylink_stop(). * - If Wake-on-Lan is active, and being handled only by the PHY, we From b5bd95d17102b6719e3531d627875b9690371383 Mon Sep 17 00:00:00 2001 From: Joakim Zhang Date: Mon, 6 Dec 2021 21:54:57 +0800 Subject: [PATCH 279/868] net: fec: only clear interrupt of handling queue in fec_enet_rx_queue() Background: We have a customer is running a Profinet stack on the 8MM which receives and responds PNIO packets every 4ms and PNIO-CM packets every 40ms. However, from time to time the received PNIO-CM package is "stock" and is only handled when receiving a new PNIO-CM or DCERPC-Ping packet (tcpdump shows the PNIO-CM and the DCERPC-Ping packet at the same time but the PNIO-CM HW timestamp is from the expected 40 ms and not the 2s delay of the DCERPC-Ping). After debugging, we noticed PNIO, PNIO-CM and DCERPC-Ping packets would be handled by different RX queues. The root cause should be driver ack all queues' interrupt when handle a specific queue in fec_enet_rx_queue(). The blamed patch is introduced to receive as much packets as possible once to avoid interrupt flooding. But it's unreasonable to clear other queues'interrupt when handling one queue, this patch tries to fix it. Fixes: ed63f1dcd578 (net: fec: clear receive interrupts before processing a packet) Cc: Russell King Reported-by: Nicolas Diaz Signed-off-by: Joakim Zhang Link: https://lore.kernel.org/r/20211206135457.15946-1-qiangqing.zhang@nxp.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/fec.h | 3 +++ drivers/net/ethernet/freescale/fec_main.c | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/freescale/fec.h b/drivers/net/ethernet/freescale/fec.h index 7b4961daa254..ed7301b69169 100644 --- a/drivers/net/ethernet/freescale/fec.h +++ b/drivers/net/ethernet/freescale/fec.h @@ -377,6 +377,9 @@ struct bufdesc_ex { #define FEC_ENET_WAKEUP ((uint)0x00020000) /* Wakeup request */ #define FEC_ENET_TXF (FEC_ENET_TXF_0 | FEC_ENET_TXF_1 | FEC_ENET_TXF_2) #define FEC_ENET_RXF (FEC_ENET_RXF_0 | FEC_ENET_RXF_1 | FEC_ENET_RXF_2) +#define FEC_ENET_RXF_GET(X) (((X) == 0) ? FEC_ENET_RXF_0 : \ + (((X) == 1) ? FEC_ENET_RXF_1 : \ + FEC_ENET_RXF_2)) #define FEC_ENET_TS_AVAIL ((uint)0x00010000) #define FEC_ENET_TS_TIMER ((uint)0x00008000) diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index bc418b910999..1b1f7f2a6130 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -1480,7 +1480,7 @@ fec_enet_rx_queue(struct net_device *ndev, int budget, u16 queue_id) break; pkt_received++; - writel(FEC_ENET_RXF, fep->hwp + FEC_IEVENT); + writel(FEC_ENET_RXF_GET(queue_id), fep->hwp + FEC_IEVENT); /* Check for errors. */ status ^= BD_ENET_RX_LAST; From a0793fdad9a11a32bc6d21317c93c83f4aa82ebc Mon Sep 17 00:00:00 2001 From: Kelly Devilliv Date: Mon, 1 Nov 2021 23:05:02 +0800 Subject: [PATCH 280/868] csky: fix typo of fpu config macro Fix typo which will cause fpe and privilege exception error. Signed-off-by: Kelly Devilliv Cc: stable Signed-off-by: Guo Ren --- arch/csky/kernel/traps.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/csky/kernel/traps.c b/arch/csky/kernel/traps.c index e5fbf8653a21..2020af88b636 100644 --- a/arch/csky/kernel/traps.c +++ b/arch/csky/kernel/traps.c @@ -209,7 +209,7 @@ asmlinkage void do_trap_illinsn(struct pt_regs *regs) asmlinkage void do_trap_fpe(struct pt_regs *regs) { -#ifdef CONFIG_CPU_HAS_FP +#ifdef CONFIG_CPU_HAS_FPU return fpu_fpe(regs); #else do_trap_error(regs, SIGILL, ILL_ILLOPC, regs->pc, @@ -219,7 +219,7 @@ asmlinkage void do_trap_fpe(struct pt_regs *regs) asmlinkage void do_trap_priv(struct pt_regs *regs) { -#ifdef CONFIG_CPU_HAS_FP +#ifdef CONFIG_CPU_HAS_FPU if (user_mode(regs) && fpu_libc_helper(regs)) return; #endif From b383a42ca523ce54bcbd63f7c8f3cf974abc9b9a Mon Sep 17 00:00:00 2001 From: Wudi Wang Date: Wed, 8 Dec 2021 09:54:29 +0800 Subject: [PATCH 281/868] irqchip/irq-gic-v3-its.c: Force synchronisation when issuing INVALL INVALL CMD specifies that the ITS must ensure any caching associated with the interrupt collection defined by ICID is consistent with the LPI configuration tables held in memory for all Redistributors. SYNC is required to ensure that INVALL is executed. Currently, LPI configuration data may be inconsistent with that in the memory within a short period of time after the INVALL command is executed. Signed-off-by: Wudi Wang Signed-off-by: Shaokun Zhang Signed-off-by: Marc Zyngier Fixes: cc2d3216f53c ("irqchip: GICv3: ITS command queue") Link: https://lore.kernel.org/r/20211208015429.5007-1-zhangshaokun@hisilicon.com --- drivers/irqchip/irq-gic-v3-its.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index eb0882d15366..0cb584d9815b 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -742,7 +742,7 @@ static struct its_collection *its_build_invall_cmd(struct its_node *its, its_fixup_cmd(cmd); - return NULL; + return desc->its_invall_cmd.col; } static struct its_vpe *its_build_vinvall_cmd(struct its_node *its, From 4ebd29f91629e69da7d57390cdc953772eee03ab Mon Sep 17 00:00:00 2001 From: Stephan Gerhold Date: Mon, 6 Dec 2021 12:38:28 +0100 Subject: [PATCH 282/868] soc: imx: Register SoC device only on i.MX boards At the moment, using the ARM32 multi_v7_defconfig always results in two SoCs being exposed in sysfs. This is wrong, as far as I'm aware the Qualcomm DragonBoard 410c does not actually make use of a i.MX SoC. :) qcom-db410c:/sys/devices/soc0$ grep . * family:Freescale i.MX machine:Qualcomm Technologies, Inc. APQ 8016 SBC revision:0.0 serial_number:0000000000000000 soc_id:Unknown qcom-db410c:/sys/devices/soc1$ grep . * family:Snapdragon machine:APQ8016 ... This happens because imx_soc_device_init() registers the soc device unconditionally, even when running on devices that do not make use of i.MX. Arnd already reported this more than a year ago and even suggested a fix similar to this commit, but for some reason it was never submitted. Fix it by checking if the "__mxc_cpu_type" variable was actually initialized by earlier platform code. On devices without i.MX it will simply stay 0. Cc: Peng Fan Fixes: d2199b34871b ("ARM: imx: use device_initcall for imx_soc_device_init") Reported-by: Arnd Bergmann Link: https://lore.kernel.org/r/CAK8P3a0hxO1TmK6oOMQ70AHSWJnP_CAq57YMOutrxkSYNjFeuw@mail.gmail.com/ Signed-off-by: Stephan Gerhold Reviewed-by: Fabio Estevam Reviewed-by: Peng Fan Signed-off-by: Shawn Guo --- drivers/soc/imx/soc-imx.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/soc/imx/soc-imx.c b/drivers/soc/imx/soc-imx.c index ac6d856ba228..77bc12039c3d 100644 --- a/drivers/soc/imx/soc-imx.c +++ b/drivers/soc/imx/soc-imx.c @@ -36,6 +36,10 @@ static int __init imx_soc_device_init(void) int ret; int i; + /* Return early if this is running on devices with different SoCs */ + if (!__mxc_cpu_type) + return 0; + if (of_machine_is_compatible("fsl,ls1021a")) return 0; From ee7f3666995d8537dec17b1d35425f28877671a9 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Wed, 8 Dec 2021 07:57:20 -0500 Subject: [PATCH 283/868] tracefs: Have new files inherit the ownership of their parent If directories in tracefs have their ownership changed, then any new files and directories that are created under those directories should inherit the ownership of the director they are created in. Link: https://lkml.kernel.org/r/20211208075720.4855d180@gandalf.local.home Cc: Kees Cook Cc: Ingo Molnar Cc: Andrew Morton Cc: Linus Torvalds Cc: Al Viro Cc: Greg Kroah-Hartman Cc: Yabin Cui Cc: Christian Brauner Cc: stable@vger.kernel.org Fixes: 4282d60689d4f ("tracefs: Add new tracefs file system") Reported-by: Kalesh Singh Reported: https://lore.kernel.org/all/CAC_TJve8MMAv+H_NdLSJXZUSoxOEq2zB_pVaJ9p=7H6Bu3X76g@mail.gmail.com/ Signed-off-by: Steven Rostedt (VMware) --- fs/tracefs/inode.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/tracefs/inode.c b/fs/tracefs/inode.c index 925a621b432e..06cf0534cc60 100644 --- a/fs/tracefs/inode.c +++ b/fs/tracefs/inode.c @@ -414,6 +414,8 @@ struct dentry *tracefs_create_file(const char *name, umode_t mode, inode->i_mode = mode; inode->i_fop = fops ? fops : &tracefs_file_operations; inode->i_private = data; + inode->i_uid = d_inode(dentry->d_parent)->i_uid; + inode->i_gid = d_inode(dentry->d_parent)->i_gid; d_instantiate(dentry, inode); fsnotify_create(dentry->d_parent->d_inode, dentry); return end_creating(dentry); @@ -436,6 +438,8 @@ static struct dentry *__create_dir(const char *name, struct dentry *parent, inode->i_mode = S_IFDIR | S_IRWXU | S_IRUSR| S_IRGRP | S_IXUSR | S_IXGRP; inode->i_op = ops; inode->i_fop = &simple_dir_operations; + inode->i_uid = d_inode(dentry->d_parent)->i_uid; + inode->i_gid = d_inode(dentry->d_parent)->i_gid; /* directory inodes start off with i_nlink == 2 (for "." entry) */ inc_nlink(inode); From 48b27b6b5191e2e1f2798cd80877b6e4ef47c351 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Tue, 7 Dec 2021 17:17:29 -0500 Subject: [PATCH 284/868] tracefs: Set all files to the same group ownership as the mount option As people have been asking to allow non-root processes to have access to the tracefs directory, it was considered best to only allow groups to have access to the directory, where it is easier to just set the tracefs file system to a specific group (as other would be too dangerous), and that way the admins could pick which processes would have access to tracefs. Unfortunately, this broke tooling on Android that expected the other bit to be set. For some special cases, for non-root tools to trace the system, tracefs would be mounted and change the permissions of the top level directory which gave access to all running tasks permission to the tracing directory. Even though this would be dangerous to do in a production environment, for testing environments this can be useful. Now with the new changes to not allow other (which is still the proper thing to do), it breaks the testing tooling. Now more code needs to be loaded on the system to change ownership of the tracing directory. The real solution is to have tracefs honor the gid=xxx option when mounting. That is, (tracing group tracing has value 1003) mount -t tracefs -o gid=1003 tracefs /sys/kernel/tracing should have it that all files in the tracing directory should be of the given group. Copy the logic from d_walk() from dcache.c and simplify it for the mount case of tracefs if gid is set. All the files in tracefs will be walked and their group will be set to the value passed in. Link: https://lkml.kernel.org/r/20211207171729.2a54e1b3@gandalf.local.home Cc: Ingo Molnar Cc: Kees Cook Cc: Andrew Morton Cc: Linus Torvalds Cc: linux-fsdevel@vger.kernel.org Cc: Al Viro Cc: Greg Kroah-Hartman Reported-by: Kalesh Singh Reported-by: Yabin Cui Fixes: 49d67e445742 ("tracefs: Have tracefs directories not set OTH permission bits by default") Signed-off-by: Steven Rostedt (VMware) --- fs/tracefs/inode.c | 72 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 72 insertions(+) diff --git a/fs/tracefs/inode.c b/fs/tracefs/inode.c index 06cf0534cc60..3616839c5c4b 100644 --- a/fs/tracefs/inode.c +++ b/fs/tracefs/inode.c @@ -161,6 +161,77 @@ struct tracefs_fs_info { struct tracefs_mount_opts mount_opts; }; +static void change_gid(struct dentry *dentry, kgid_t gid) +{ + if (!dentry->d_inode) + return; + dentry->d_inode->i_gid = gid; +} + +/* + * Taken from d_walk, but without he need for handling renames. + * Nothing can be renamed while walking the list, as tracefs + * does not support renames. This is only called when mounting + * or remounting the file system, to set all the files to + * the given gid. + */ +static void set_gid(struct dentry *parent, kgid_t gid) +{ + struct dentry *this_parent; + struct list_head *next; + + this_parent = parent; + spin_lock(&this_parent->d_lock); + + change_gid(this_parent, gid); +repeat: + next = this_parent->d_subdirs.next; +resume: + while (next != &this_parent->d_subdirs) { + struct list_head *tmp = next; + struct dentry *dentry = list_entry(tmp, struct dentry, d_child); + next = tmp->next; + + spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); + + change_gid(dentry, gid); + + if (!list_empty(&dentry->d_subdirs)) { + spin_unlock(&this_parent->d_lock); + spin_release(&dentry->d_lock.dep_map, _RET_IP_); + this_parent = dentry; + spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_); + goto repeat; + } + spin_unlock(&dentry->d_lock); + } + /* + * All done at this level ... ascend and resume the search. + */ + rcu_read_lock(); +ascend: + if (this_parent != parent) { + struct dentry *child = this_parent; + this_parent = child->d_parent; + + spin_unlock(&child->d_lock); + spin_lock(&this_parent->d_lock); + + /* go into the first sibling still alive */ + do { + next = child->d_child.next; + if (next == &this_parent->d_subdirs) + goto ascend; + child = list_entry(next, struct dentry, d_child); + } while (unlikely(child->d_flags & DCACHE_DENTRY_KILLED)); + rcu_read_unlock(); + goto resume; + } + rcu_read_unlock(); + spin_unlock(&this_parent->d_lock); + return; +} + static int tracefs_parse_options(char *data, struct tracefs_mount_opts *opts) { substring_t args[MAX_OPT_ARGS]; @@ -193,6 +264,7 @@ static int tracefs_parse_options(char *data, struct tracefs_mount_opts *opts) if (!gid_valid(gid)) return -EINVAL; opts->gid = gid; + set_gid(tracefs_mount->mnt_root, gid); break; case Opt_mode: if (match_octal(&args[0], &option)) From 34f35f8f14bc406efc06ee4ff73202c6fd245d15 Mon Sep 17 00:00:00 2001 From: Mian Yousaf Kaukab Date: Wed, 8 Dec 2021 10:32:39 +0100 Subject: [PATCH 285/868] ipmi: ssif: initialize ssif_info->client early During probe ssif_info->client is dereferenced in error path. However, it is set when some of the error checking has already been done. This causes following kernel crash if an error path is taken: [ 30.645593][ T674] ipmi_ssif 0-000e: ipmi_ssif: Not probing, Interface already present [ 30.657616][ T674] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000088 ... [ 30.657723][ T674] pc : __dev_printk+0x28/0xa0 [ 30.657732][ T674] lr : _dev_err+0x7c/0xa0 ... [ 30.657772][ T674] Call trace: [ 30.657775][ T674] __dev_printk+0x28/0xa0 [ 30.657778][ T674] _dev_err+0x7c/0xa0 [ 30.657781][ T674] ssif_probe+0x548/0x900 [ipmi_ssif 62ce4b08badc1458fd896206d9ef69a3c31f3d3e] [ 30.657791][ T674] i2c_device_probe+0x37c/0x3c0 ... Initialize ssif_info->client before any error path can be taken. Clear i2c_client data in the error path to prevent the dangling pointer from leaking. Fixes: c4436c9149c5 ("ipmi_ssif: avoid registering duplicate ssif interface") Cc: stable@vger.kernel.org # 5.4.x Suggested-by: Takashi Iwai Signed-off-by: Mian Yousaf Kaukab Message-Id: <20211208093239.4432-1-ykaukab@suse.de> Signed-off-by: Corey Minyard --- drivers/char/ipmi/ipmi_ssif.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/char/ipmi/ipmi_ssif.c b/drivers/char/ipmi/ipmi_ssif.c index 0c62e578749e..48aab77abebf 100644 --- a/drivers/char/ipmi/ipmi_ssif.c +++ b/drivers/char/ipmi/ipmi_ssif.c @@ -1659,6 +1659,9 @@ static int ssif_probe(struct i2c_client *client, const struct i2c_device_id *id) } } + ssif_info->client = client; + i2c_set_clientdata(client, ssif_info); + rv = ssif_check_and_remove(client, ssif_info); /* If rv is 0 and addr source is not SI_ACPI, continue probing */ if (!rv && ssif_info->addr_source == SI_ACPI) { @@ -1679,9 +1682,6 @@ static int ssif_probe(struct i2c_client *client, const struct i2c_device_id *id) ipmi_addr_src_to_str(ssif_info->addr_source), client->addr, client->adapter->name, slave_addr); - ssif_info->client = client; - i2c_set_clientdata(client, ssif_info); - /* Now check for system interface capabilities */ msg[0] = IPMI_NETFN_APP_REQUEST << 2; msg[1] = IPMI_GET_SYSTEM_INTERFACE_CAPABILITIES_CMD; @@ -1881,6 +1881,7 @@ static int ssif_probe(struct i2c_client *client, const struct i2c_device_id *id) dev_err(&ssif_info->client->dev, "Unable to start IPMI SSIF: %d\n", rv); + i2c_set_clientdata(client, NULL); kfree(ssif_info); } kfree(resp); From 11f8cb8903ba4e8ba900fa4e4ab29d0fb4c9ef5d Mon Sep 17 00:00:00 2001 From: Chen Yu Date: Tue, 23 Nov 2021 21:23:30 +0800 Subject: [PATCH 286/868] ACPI: tools: Fix compilation when output directory is not present Compiling the ACPI tools when output directory parameter is specified, but the output directory is not present, triggers the following error: make O=/data/test/tmp/ -C tools/power/acpi/ make: Entering directory '/data/src/kernel/linux/tools/power/acpi' DESCEND tools/acpidbg make[1]: Entering directory '/data/src/kernel/linux/tools/power/acpi/tools/acpidbg' MKDIR include CP include CC tools/acpidbg/acpidbg.o Assembler messages: Fatal error: can't create /data/test/tmp/tools/power/acpi/tools/acpidbg/acpidbg.o: No such file or directory make[1]: *** [../../Makefile.rules:24: /data/test/tmp/tools/power/acpi/tools/acpidbg/acpidbg.o] Error 1 make[1]: Leaving directory '/data/src/kernel/linux/tools/power/acpi/tools/acpidbg' make: *** [Makefile:18: acpidbg] Error 2 make: Leaving directory '/data/src/kernel/linux/tools/power/acpi' which occurs because the output directory has not been created yet. Fix this issue by creating the output directory before compiling. Reported-by: Andy Shevchenko Signed-off-by: Chen Yu Reviewed-by: Andy Shevchenko [ rjw: New subject, changelog edits ] Signed-off-by: Rafael J. Wysocki --- tools/power/acpi/Makefile.config | 1 + tools/power/acpi/Makefile.rules | 1 + 2 files changed, 2 insertions(+) diff --git a/tools/power/acpi/Makefile.config b/tools/power/acpi/Makefile.config index 331f6d30f472..cd7106876a5f 100644 --- a/tools/power/acpi/Makefile.config +++ b/tools/power/acpi/Makefile.config @@ -69,6 +69,7 @@ KERNEL_INCLUDE := $(OUTPUT)include ACPICA_INCLUDE := $(srctree)/../../../drivers/acpi/acpica CFLAGS += -D_LINUX -I$(KERNEL_INCLUDE) -I$(ACPICA_INCLUDE) CFLAGS += $(WARNINGS) +MKDIR = mkdir ifeq ($(strip $(V)),false) QUIET=@ diff --git a/tools/power/acpi/Makefile.rules b/tools/power/acpi/Makefile.rules index 2a6c170b57cd..1d7616f5d0ae 100644 --- a/tools/power/acpi/Makefile.rules +++ b/tools/power/acpi/Makefile.rules @@ -21,6 +21,7 @@ $(KERNEL_INCLUDE): $(objdir)%.o: %.c $(KERNEL_INCLUDE) $(ECHO) " CC " $(subst $(OUTPUT),,$@) + $(QUIET) $(MKDIR) -p $(objdir) 2>/dev/null $(QUIET) $(CC) -c $(CFLAGS) -o $@ $< all: $(OUTPUT)$(TOOL) From 444dd878e85fb33fcfb2682cfdab4c236f33ea3e Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 3 Dec 2021 17:19:47 +0100 Subject: [PATCH 287/868] PM: runtime: Fix pm_runtime_active() kerneldoc comment The kerneldoc comment of pm_runtime_active() does not reflect the behavior of the function, so update it accordingly. Fixes: 403d2d116ec0 ("PM: runtime: Add kerneldoc comments to multiple helpers") Signed-off-by: Rafael J. Wysocki Reviewed-by: Ulf Hansson --- include/linux/pm_runtime.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h index 222da43b7096..eddd66d426ca 100644 --- a/include/linux/pm_runtime.h +++ b/include/linux/pm_runtime.h @@ -129,7 +129,7 @@ static inline bool pm_runtime_suspended(struct device *dev) * pm_runtime_active - Check whether or not a device is runtime-active. * @dev: Target device. * - * Return %true if runtime PM is enabled for @dev and its runtime PM status is + * Return %true if runtime PM is disabled for @dev or its runtime PM status is * %RPM_ACTIVE, or %false otherwise. * * Note that the return value of this function can only be trusted if it is From f872f73601b92c86f3da8bdf3e19abd0f1780eb9 Mon Sep 17 00:00:00 2001 From: Sumeet Pawnikar Date: Tue, 7 Dec 2021 18:05:39 +0530 Subject: [PATCH 288/868] thermal: int340x: Fix VCoRefLow MMIO bit offset for TGL The VCoRefLow CPU FIVR register definition for Tiger Lake is incorrect. Current implementation reads it from MMIO offset 0x5A18 and bit offset [12:14], but the actual correct register definition is from bit offset [11:13]. Update to fix the bit offset. Fixes: 473be51142ad ("thermal: int340x: processor_thermal: Add RFIM driver") Signed-off-by: Sumeet Pawnikar Cc: 5.14+ # 5.14+ [ rjw: New subject, changelog edits ] Signed-off-by: Rafael J. Wysocki --- drivers/thermal/intel/int340x_thermal/processor_thermal_rfim.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/thermal/intel/int340x_thermal/processor_thermal_rfim.c b/drivers/thermal/intel/int340x_thermal/processor_thermal_rfim.c index b25b54d4bac1..e693ec8234fb 100644 --- a/drivers/thermal/intel/int340x_thermal/processor_thermal_rfim.c +++ b/drivers/thermal/intel/int340x_thermal/processor_thermal_rfim.c @@ -29,7 +29,7 @@ static const char * const fivr_strings[] = { }; static const struct mmio_reg tgl_fivr_mmio_regs[] = { - { 0, 0x5A18, 3, 0x7, 12}, /* vco_ref_code_lo */ + { 0, 0x5A18, 3, 0x7, 11}, /* vco_ref_code_lo */ { 0, 0x5A18, 8, 0xFF, 16}, /* vco_ref_code_hi */ { 0, 0x5A08, 8, 0xFF, 0}, /* spread_spectrum_pct */ { 0, 0x5A08, 1, 0x1, 8}, /* spread_spectrum_clk_enable */ From d815b3f2f273537cb8afaf5ab11a46851f6c03e5 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 16 Nov 2021 14:50:25 +0300 Subject: [PATCH 289/868] btrfs: fix error pointer dereference in btrfs_ioctl_rm_dev_v2() If memdup_user() fails the error handing will crash when it tries to kfree() an error pointer. Just return directly because there is no cleanup required. Fixes: 1a15eb724aae ("btrfs: use btrfs_get_dev_args_from_path in dev removal ioctls") Reviewed-by: Josef Bacik Signed-off-by: Dan Carpenter Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/ioctl.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 012fbfdfbebf..1b85d98df66b 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -3188,10 +3188,8 @@ static long btrfs_ioctl_rm_dev_v2(struct file *file, void __user *arg) return -EPERM; vol_args = memdup_user(arg, sizeof(*vol_args)); - if (IS_ERR(vol_args)) { - ret = PTR_ERR(vol_args); - goto out; - } + if (IS_ERR(vol_args)) + return PTR_ERR(vol_args); if (vol_args->flags & ~BTRFS_DEVICE_REMOVE_ARGS_MASK) { ret = -EOPNOTSUPP; From f981fec12cc5d2c07942301744b9ea61228bf246 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Mon, 22 Nov 2021 17:04:19 -0500 Subject: [PATCH 290/868] btrfs: fail if fstrim_range->start == U64_MAX We've always been failing generic/260 because it's testing things we actually don't care about and thus won't fail for. However we probably should fail for fstrim_range->start == U64_MAX since we clearly can't trim anything past that. This in combination with an update to generic/260 will allow us to pass this test properly. Signed-off-by: Josef Bacik Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/extent-tree.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 3fd736a02c1e..fc4895e6a62c 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -6051,6 +6051,9 @@ int btrfs_trim_fs(struct btrfs_fs_info *fs_info, struct fstrim_range *range) int dev_ret = 0; int ret = 0; + if (range->start == U64_MAX) + return -EINVAL; + /* * Check range overflow if range->len is set. * The default range->len is U64_MAX. From b560b21f71eb4ef9dfc7c8ec1d0e4d7f9aa54b51 Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Tue, 7 Dec 2021 10:15:21 +0200 Subject: [PATCH 291/868] bpf: Add selftests to cover packet access corner cases This commit adds BPF verifier selftests that cover all corner cases by packet boundary checks. Specifically, 8-byte packet reads are tested at the beginning of data and at the beginning of data_meta, using all kinds of boundary checks (all comparison operators: <, >, <=, >=; both permutations of operands: data + length compared to end, end compared to data + length). For each case there are three tests: 1. Length is just enough for an 8-byte read. Length is either 7 or 8, depending on the comparison. 2. Length is increased by 1 - should still pass the verifier. These cases are useful, because they failed before commit 2fa7d94afc1a ("bpf: Fix the off-by-two error in range markings"). 3. Length is decreased by 1 - should be rejected by the verifier. Some existing tests are just renamed to avoid duplication. Signed-off-by: Maxim Mikityanskiy Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20211207081521.41923-1-maximmi@nvidia.com --- .../bpf/verifier/xdp_direct_packet_access.c | 600 +++++++++++++++++- 1 file changed, 584 insertions(+), 16 deletions(-) diff --git a/tools/testing/selftests/bpf/verifier/xdp_direct_packet_access.c b/tools/testing/selftests/bpf/verifier/xdp_direct_packet_access.c index de172a5b8754..b4ec228eb95d 100644 --- a/tools/testing/selftests/bpf/verifier/xdp_direct_packet_access.c +++ b/tools/testing/selftests/bpf/verifier/xdp_direct_packet_access.c @@ -35,7 +35,7 @@ .prog_type = BPF_PROG_TYPE_XDP, }, { - "XDP pkt read, pkt_data' > pkt_end, good access", + "XDP pkt read, pkt_data' > pkt_end, corner case, good access", .insns = { BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, @@ -87,6 +87,41 @@ .prog_type = BPF_PROG_TYPE_XDP, .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, +{ + "XDP pkt read, pkt_data' > pkt_end, corner case +1, good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 9), + BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -9), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_data' > pkt_end, corner case -1, bad access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), + BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -7), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, { "XDP pkt read, pkt_end > pkt_data', good access", .insns = { @@ -106,7 +141,7 @@ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { - "XDP pkt read, pkt_end > pkt_data', bad access 1", + "XDP pkt read, pkt_end > pkt_data', corner case -1, bad access", .insns = { BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, @@ -142,6 +177,42 @@ .prog_type = BPF_PROG_TYPE_XDP, .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, +{ + "XDP pkt read, pkt_end > pkt_data', corner case, good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), + BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_1, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -7), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_end > pkt_data', corner case +1, good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_1, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, { "XDP pkt read, pkt_data' < pkt_end, good access", .insns = { @@ -161,7 +232,7 @@ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { - "XDP pkt read, pkt_data' < pkt_end, bad access 1", + "XDP pkt read, pkt_data' < pkt_end, corner case -1, bad access", .insns = { BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, @@ -198,7 +269,43 @@ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { - "XDP pkt read, pkt_end < pkt_data', good access", + "XDP pkt read, pkt_data' < pkt_end, corner case, good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), + BPF_JMP_REG(BPF_JLT, BPF_REG_1, BPF_REG_3, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -7), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_data' < pkt_end, corner case +1, good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JLT, BPF_REG_1, BPF_REG_3, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_end < pkt_data', corner case, good access", .insns = { BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, @@ -250,6 +357,41 @@ .prog_type = BPF_PROG_TYPE_XDP, .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, +{ + "XDP pkt read, pkt_end < pkt_data', corner case +1, good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 9), + BPF_JMP_REG(BPF_JLT, BPF_REG_3, BPF_REG_1, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -9), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_end < pkt_data', corner case -1, bad access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), + BPF_JMP_REG(BPF_JLT, BPF_REG_3, BPF_REG_1, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -7), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, { "XDP pkt read, pkt_data' >= pkt_end, good access", .insns = { @@ -268,7 +410,7 @@ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { - "XDP pkt read, pkt_data' >= pkt_end, bad access 1", + "XDP pkt read, pkt_data' >= pkt_end, corner case -1, bad access", .insns = { BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, @@ -304,7 +446,41 @@ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { - "XDP pkt read, pkt_end >= pkt_data', good access", + "XDP pkt read, pkt_data' >= pkt_end, corner case, good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), + BPF_JMP_REG(BPF_JGE, BPF_REG_1, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -7), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_data' >= pkt_end, corner case +1, good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JGE, BPF_REG_1, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_end >= pkt_data', corner case, good access", .insns = { BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, @@ -359,7 +535,44 @@ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { - "XDP pkt read, pkt_data' <= pkt_end, good access", + "XDP pkt read, pkt_end >= pkt_data', corner case +1, good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 9), + BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_1, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -9), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_end >= pkt_data', corner case -1, bad access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), + BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_1, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -7), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_data' <= pkt_end, corner case, good access", .insns = { BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, @@ -413,6 +626,43 @@ .prog_type = BPF_PROG_TYPE_XDP, .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, +{ + "XDP pkt read, pkt_data' <= pkt_end, corner case +1, good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 9), + BPF_JMP_REG(BPF_JLE, BPF_REG_1, BPF_REG_3, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -9), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_data' <= pkt_end, corner case -1, bad access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), + BPF_JMP_REG(BPF_JLE, BPF_REG_1, BPF_REG_3, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -7), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, { "XDP pkt read, pkt_end <= pkt_data', good access", .insns = { @@ -431,7 +681,7 @@ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { - "XDP pkt read, pkt_end <= pkt_data', bad access 1", + "XDP pkt read, pkt_end <= pkt_data', corner case -1, bad access", .insns = { BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, @@ -467,7 +717,41 @@ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { - "XDP pkt read, pkt_meta' > pkt_data, good access", + "XDP pkt read, pkt_end <= pkt_data', corner case, good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), + BPF_JMP_REG(BPF_JLE, BPF_REG_3, BPF_REG_1, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -7), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_end <= pkt_data', corner case +1, good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JLE, BPF_REG_3, BPF_REG_1, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_meta' > pkt_data, corner case, good access", .insns = { BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data_meta)), @@ -519,6 +803,41 @@ .prog_type = BPF_PROG_TYPE_XDP, .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, +{ + "XDP pkt read, pkt_meta' > pkt_data, corner case +1, good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 9), + BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -9), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_meta' > pkt_data, corner case -1, bad access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), + BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -7), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, { "XDP pkt read, pkt_data > pkt_meta', good access", .insns = { @@ -538,7 +857,7 @@ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { - "XDP pkt read, pkt_data > pkt_meta', bad access 1", + "XDP pkt read, pkt_data > pkt_meta', corner case -1, bad access", .insns = { BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data_meta)), @@ -574,6 +893,42 @@ .prog_type = BPF_PROG_TYPE_XDP, .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, +{ + "XDP pkt read, pkt_data > pkt_meta', corner case, good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), + BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_1, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -7), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_data > pkt_meta', corner case +1, good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_1, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, { "XDP pkt read, pkt_meta' < pkt_data, good access", .insns = { @@ -593,7 +948,7 @@ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { - "XDP pkt read, pkt_meta' < pkt_data, bad access 1", + "XDP pkt read, pkt_meta' < pkt_data, corner case -1, bad access", .insns = { BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data_meta)), @@ -630,7 +985,43 @@ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { - "XDP pkt read, pkt_data < pkt_meta', good access", + "XDP pkt read, pkt_meta' < pkt_data, corner case, good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), + BPF_JMP_REG(BPF_JLT, BPF_REG_1, BPF_REG_3, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -7), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_meta' < pkt_data, corner case +1, good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JLT, BPF_REG_1, BPF_REG_3, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_data < pkt_meta', corner case, good access", .insns = { BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data_meta)), @@ -682,6 +1073,41 @@ .prog_type = BPF_PROG_TYPE_XDP, .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, +{ + "XDP pkt read, pkt_data < pkt_meta', corner case +1, good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 9), + BPF_JMP_REG(BPF_JLT, BPF_REG_3, BPF_REG_1, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -9), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_data < pkt_meta', corner case -1, bad access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), + BPF_JMP_REG(BPF_JLT, BPF_REG_3, BPF_REG_1, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -7), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, { "XDP pkt read, pkt_meta' >= pkt_data, good access", .insns = { @@ -700,7 +1126,7 @@ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { - "XDP pkt read, pkt_meta' >= pkt_data, bad access 1", + "XDP pkt read, pkt_meta' >= pkt_data, corner case -1, bad access", .insns = { BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data_meta)), @@ -736,7 +1162,41 @@ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { - "XDP pkt read, pkt_data >= pkt_meta', good access", + "XDP pkt read, pkt_meta' >= pkt_data, corner case, good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), + BPF_JMP_REG(BPF_JGE, BPF_REG_1, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -7), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_meta' >= pkt_data, corner case +1, good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JGE, BPF_REG_1, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_data >= pkt_meta', corner case, good access", .insns = { BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data_meta)), @@ -791,7 +1251,44 @@ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { - "XDP pkt read, pkt_meta' <= pkt_data, good access", + "XDP pkt read, pkt_data >= pkt_meta', corner case +1, good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 9), + BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_1, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -9), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_data >= pkt_meta', corner case -1, bad access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), + BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_1, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -7), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_meta' <= pkt_data, corner case, good access", .insns = { BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data_meta)), @@ -845,6 +1342,43 @@ .prog_type = BPF_PROG_TYPE_XDP, .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, +{ + "XDP pkt read, pkt_meta' <= pkt_data, corner case +1, good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 9), + BPF_JMP_REG(BPF_JLE, BPF_REG_1, BPF_REG_3, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -9), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_meta' <= pkt_data, corner case -1, bad access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), + BPF_JMP_REG(BPF_JLE, BPF_REG_1, BPF_REG_3, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -7), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, { "XDP pkt read, pkt_data <= pkt_meta', good access", .insns = { @@ -863,7 +1397,7 @@ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { - "XDP pkt read, pkt_data <= pkt_meta', bad access 1", + "XDP pkt read, pkt_data <= pkt_meta', corner case -1, bad access", .insns = { BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data_meta)), @@ -898,3 +1432,37 @@ .prog_type = BPF_PROG_TYPE_XDP, .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, +{ + "XDP pkt read, pkt_data <= pkt_meta', corner case, good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), + BPF_JMP_REG(BPF_JLE, BPF_REG_3, BPF_REG_1, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -7), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_data <= pkt_meta', corner case +1, good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JLE, BPF_REG_3, BPF_REG_1, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, From c2e39305299f0118298c2201f6d6cc7d3485f29e Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 24 Nov 2021 14:14:23 -0500 Subject: [PATCH 292/868] btrfs: clear extent buffer uptodate when we fail to write it I got dmesg errors on generic/281 on our overnight fstests. Looking at the history this happens occasionally, with errors like this WARNING: CPU: 0 PID: 673217 at fs/btrfs/extent_io.c:6848 assert_eb_page_uptodate+0x3f/0x50 CPU: 0 PID: 673217 Comm: kworker/u4:13 Tainted: G W 5.16.0-rc2+ #469 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.13.0-2.fc32 04/01/2014 Workqueue: btrfs-cache btrfs_work_helper RIP: 0010:assert_eb_page_uptodate+0x3f/0x50 RSP: 0018:ffffae598230bc60 EFLAGS: 00010246 RAX: 0017ffffc0002112 RBX: ffffebaec4100900 RCX: 0000000000001000 RDX: ffffebaec45733c7 RSI: ffffebaec4100900 RDI: ffff9fd98919f340 RBP: 0000000000000d56 R08: ffff9fd98e300000 R09: 0000000000000000 R10: 0001207370a91c50 R11: 0000000000000000 R12: 00000000000007b0 R13: ffff9fd98919f340 R14: 0000000001500000 R15: 0000000001cb0000 FS: 0000000000000000(0000) GS:ffff9fd9fbc00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f549fcf8940 CR3: 0000000114908004 CR4: 0000000000370ef0 Call Trace: extent_buffer_test_bit+0x3f/0x70 free_space_test_bit+0xa6/0xc0 load_free_space_tree+0x1d6/0x430 caching_thread+0x454/0x630 ? rcu_read_lock_sched_held+0x12/0x60 ? rcu_read_lock_sched_held+0x12/0x60 ? rcu_read_lock_sched_held+0x12/0x60 ? lock_release+0x1f0/0x2d0 btrfs_work_helper+0xf2/0x3e0 ? lock_release+0x1f0/0x2d0 ? finish_task_switch.isra.0+0xf9/0x3a0 process_one_work+0x270/0x5a0 worker_thread+0x55/0x3c0 ? process_one_work+0x5a0/0x5a0 kthread+0x174/0x1a0 ? set_kthread_struct+0x40/0x40 ret_from_fork+0x1f/0x30 This happens because we're trying to read from a extent buffer page that is !PageUptodate. This happens because we will clear the page uptodate when we have an IO error, but we don't clear the extent buffer uptodate. If we do a read later and find this extent buffer we'll think its valid and not return an error, and then trip over this warning. Fix this by also clearing uptodate on the extent buffer when this happens, so that we get an error when we do a btrfs_search_slot() and find this block later. CC: stable@vger.kernel.org # 5.4+ Signed-off-by: Josef Bacik Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/extent_io.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 4e03a6d3aa32..dcdb97d9205d 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -4313,6 +4313,12 @@ static void set_btree_ioerr(struct page *page, struct extent_buffer *eb) if (test_and_set_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags)) return; + /* + * A read may stumble upon this buffer later, make sure that it gets an + * error and knows there was an error. + */ + clear_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); + /* * If we error out, we should add back the dirty_metadata_bytes * to make it consistent. From 68b85589ba8114514d83ae87dd6f3fe9b315cae0 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 24 Nov 2021 14:14:25 -0500 Subject: [PATCH 293/868] btrfs: call mapping_set_error() on btree inode with a write error generic/484 fails sometimes with compression on because the write ends up small enough that it goes into the btree. This means that we never call mapping_set_error() on the inode itself, because the page gets marked as fine when we inline it into the metadata. When the metadata writeback happens we see it and abort the transaction properly and mark the fs as readonly, however we don't do the mapping_set_error() on anything. In syncfs() we will simply return 0 if the sb is marked read-only, so we can't check for this in our syncfs callback. The only way the error gets returned if we called mapping_set_error() on something. Fix this by calling mapping_set_error() on the btree inode mapping. This allows us to properly return an error on syncfs and pass generic/484 with compression on. Reviewed-by: Nikolay Borisov Signed-off-by: Josef Bacik Signed-off-by: David Sterba --- fs/btrfs/extent_io.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index dcdb97d9205d..3258b6f01e85 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -4319,6 +4319,14 @@ static void set_btree_ioerr(struct page *page, struct extent_buffer *eb) */ clear_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); + /* + * We need to set the mapping with the io error as well because a write + * error will flip the file system readonly, and then syncfs() will + * return a 0 because we are readonly if we don't modify the err seq for + * the superblock. + */ + mapping_set_error(page->mapping, -EIO); + /* * If we error out, we should add back the dirty_metadata_bytes * to make it consistent. From 84c25448929942edacba905cecc0474e91114e7a Mon Sep 17 00:00:00 2001 From: Naohiro Aota Date: Tue, 30 Nov 2021 12:40:21 +0900 Subject: [PATCH 294/868] btrfs: fix re-dirty process of tree-log nodes There is a report of a transaction abort of -EAGAIN with the following script. #!/bin/sh for d in sda sdb; do mkfs.btrfs -d single -m single -f /dev/\${d} done mount /dev/sda /mnt/test mount /dev/sdb /mnt/scratch for dir in test scratch; do echo 3 >/proc/sys/vm/drop_caches fio --directory=/mnt/\${dir} --name=fio.\${dir} --rw=read --size=50G --bs=64m \ --numjobs=$(nproc) --time_based --ramp_time=5 --runtime=480 \ --group_reporting |& tee /dev/shm/fio.\${dir} echo 3 >/proc/sys/vm/drop_caches done for d in sda sdb; do umount /dev/\${d} done The stack trace is shown in below. [3310.967991] BTRFS: error (device sda) in btrfs_commit_transaction:2341: errno=-11 unknown (Error while writing out transaction) [3310.968060] BTRFS info (device sda): forced readonly [3310.968064] BTRFS warning (device sda): Skipping commit of aborted transaction. [3310.968065] ------------[ cut here ]------------ [3310.968066] BTRFS: Transaction aborted (error -11) [3310.968074] WARNING: CPU: 14 PID: 1684 at fs/btrfs/transaction.c:1946 btrfs_commit_transaction.cold+0x209/0x2c8 [3310.968131] CPU: 14 PID: 1684 Comm: fio Not tainted 5.14.10-300.fc35.x86_64 #1 [3310.968135] Hardware name: DIAWAY Tartu/Tartu, BIOS V2.01.B10 04/08/2021 [3310.968137] RIP: 0010:btrfs_commit_transaction.cold+0x209/0x2c8 [3310.968144] RSP: 0018:ffffb284ce393e10 EFLAGS: 00010282 [3310.968147] RAX: 0000000000000026 RBX: ffff973f147b0f60 RCX: 0000000000000027 [3310.968149] RDX: ffff974ecf098a08 RSI: 0000000000000001 RDI: ffff974ecf098a00 [3310.968150] RBP: ffff973f147b0f08 R08: 0000000000000000 R09: ffffb284ce393c48 [3310.968151] R10: ffffb284ce393c40 R11: ffffffff84f47468 R12: ffff973f101bfc00 [3310.968153] R13: ffff971f20cf2000 R14: 00000000fffffff5 R15: ffff973f147b0e58 [3310.968154] FS: 00007efe65468740(0000) GS:ffff974ecf080000(0000) knlGS:0000000000000000 [3310.968157] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [3310.968158] CR2: 000055691bcbe260 CR3: 000000105cfa4001 CR4: 0000000000770ee0 [3310.968160] PKRU: 55555554 [3310.968161] Call Trace: [3310.968167] ? dput+0xd4/0x300 [3310.968174] btrfs_sync_file+0x3f1/0x490 [3310.968180] __x64_sys_fsync+0x33/0x60 [3310.968185] do_syscall_64+0x3b/0x90 [3310.968190] entry_SYSCALL_64_after_hwframe+0x44/0xae [3310.968194] RIP: 0033:0x7efe6557329b [3310.968200] RSP: 002b:00007ffe0236ebc0 EFLAGS: 00000293 ORIG_RAX: 000000000000004a [3310.968203] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007efe6557329b [3310.968204] RDX: 0000000000000000 RSI: 00007efe58d77010 RDI: 0000000000000006 [3310.968205] RBP: 0000000004000000 R08: 0000000000000000 R09: 00007efe58d77010 [3310.968207] R10: 0000000016cacc0c R11: 0000000000000293 R12: 00007efe5ce95980 [3310.968208] R13: 0000000000000000 R14: 00007efe6447c790 R15: 0000000c80000000 [3310.968212] ---[ end trace 1a346f4d3c0d96ba ]--- [3310.968214] BTRFS: error (device sda) in cleanup_transaction:1946: errno=-11 unknown The abort occurs because of a write hole while writing out freeing tree nodes of a tree-log tree. For zoned btrfs, we re-dirty a freed tree node to ensure btrfs can write the region and does not leave a hole on write on a zoned device. The current code fails to re-dirty a node when the tree-log tree's depth is greater or equal to 2. That leads to a transaction abort with -EAGAIN. Fix the issue by properly re-dirtying a node on walking up the tree. Fixes: d3575156f662 ("btrfs: zoned: redirty released extent buffers") CC: stable@vger.kernel.org # 5.12+ Link: https://github.com/kdave/btrfs-progs/issues/415 Reviewed-by: Johannes Thumshirn Signed-off-by: Naohiro Aota Signed-off-by: David Sterba --- fs/btrfs/tree-log.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 8ab33caf016f..3e6f14e13918 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -2908,6 +2908,8 @@ static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans, path->nodes[*level]->len); if (ret) return ret; + btrfs_redirty_list_add(trans->transaction, + next); } else { if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &next->bflags)) clear_extent_buffer_dirty(next); @@ -2988,6 +2990,7 @@ static int walk_log_tree(struct btrfs_trans_handle *trans, next->start, next->len); if (ret) goto out; + btrfs_redirty_list_add(trans->transaction, next); } else { if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &next->bflags)) clear_extent_buffer_dirty(next); @@ -3438,8 +3441,6 @@ static void free_log_tree(struct btrfs_trans_handle *trans, EXTENT_DIRTY | EXTENT_NEW | EXTENT_NEED_WAIT); extent_io_tree_release(&log->log_csum_range); - if (trans && log->node) - btrfs_redirty_list_add(trans->transaction, log->node); btrfs_put_root(log); } From da5e817d9d75422eaaa05490d0b9a5e328fc1a51 Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Fri, 3 Dec 2021 02:55:33 -0800 Subject: [PATCH 295/868] btrfs: free exchange changeset on failures Fstests runs on my VMs have show several kmemleak reports like the following. unreferenced object 0xffff88811ae59080 (size 64): comm "xfs_io", pid 12124, jiffies 4294987392 (age 6.368s) hex dump (first 32 bytes): 00 c0 1c 00 00 00 00 00 ff cf 1c 00 00 00 00 00 ................ 90 97 e5 1a 81 88 ff ff 90 97 e5 1a 81 88 ff ff ................ backtrace: [<00000000ac0176d2>] ulist_add_merge+0x60/0x150 [btrfs] [<0000000076e9f312>] set_state_bits+0x86/0xc0 [btrfs] [<0000000014fe73d6>] set_extent_bit+0x270/0x690 [btrfs] [<000000004f675208>] set_record_extent_bits+0x19/0x20 [btrfs] [<00000000b96137b1>] qgroup_reserve_data+0x274/0x310 [btrfs] [<0000000057e9dcbb>] btrfs_check_data_free_space+0x5c/0xa0 [btrfs] [<0000000019c4511d>] btrfs_delalloc_reserve_space+0x1b/0xa0 [btrfs] [<000000006d37e007>] btrfs_dio_iomap_begin+0x415/0x970 [btrfs] [<00000000fb8a74b8>] iomap_iter+0x161/0x1e0 [<0000000071dff6ff>] __iomap_dio_rw+0x1df/0x700 [<000000002567ba53>] iomap_dio_rw+0x5/0x20 [<0000000072e555f8>] btrfs_file_write_iter+0x290/0x530 [btrfs] [<000000005eb3d845>] new_sync_write+0x106/0x180 [<000000003fb505bf>] vfs_write+0x24d/0x2f0 [<000000009bb57d37>] __x64_sys_pwrite64+0x69/0xa0 [<000000003eba3fdf>] do_syscall_64+0x43/0x90 In case brtfs_qgroup_reserve_data() or btrfs_delalloc_reserve_metadata() fail the allocated extent_changeset will not be freed. So in btrfs_check_data_free_space() and btrfs_delalloc_reserve_space() free the allocated extent_changeset to get rid of the allocated memory. The issue currently only happens in the direct IO write path, but only after 65b3c08606e5 ("btrfs: fix ENOSPC failure when attempting direct IO write into NOCOW range"), and also at defrag_one_locked_target(). Every other place is always calling extent_changeset_free() even if its call to btrfs_delalloc_reserve_space() or btrfs_check_data_free_space() has failed. CC: stable@vger.kernel.org # 5.15+ Reviewed-by: Filipe Manana Signed-off-by: Johannes Thumshirn Signed-off-by: David Sterba --- fs/btrfs/delalloc-space.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/delalloc-space.c b/fs/btrfs/delalloc-space.c index 2059d1504149..40c4d6ba3fb9 100644 --- a/fs/btrfs/delalloc-space.c +++ b/fs/btrfs/delalloc-space.c @@ -143,10 +143,13 @@ int btrfs_check_data_free_space(struct btrfs_inode *inode, /* Use new btrfs_qgroup_reserve_data to reserve precious data space. */ ret = btrfs_qgroup_reserve_data(inode, reserved, start, len); - if (ret < 0) + if (ret < 0) { btrfs_free_reserved_data_space_noquota(fs_info, len); - else + extent_changeset_free(*reserved); + *reserved = NULL; + } else { ret = 0; + } return ret; } @@ -452,8 +455,11 @@ int btrfs_delalloc_reserve_space(struct btrfs_inode *inode, if (ret < 0) return ret; ret = btrfs_delalloc_reserve_metadata(inode, len); - if (ret < 0) + if (ret < 0) { btrfs_free_reserved_data_space(inode, *reserved, start, len); + extent_changeset_free(*reserved); + *reserved = NULL; + } return ret; } From 5911f5382022aff2b817cb88f276756af229664d Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Thu, 2 Dec 2021 00:47:14 -0800 Subject: [PATCH 296/868] btrfs: zoned: clear data relocation bg on zone finish When finishing a zone that is used by a dedicated data relocation block group, also remove its reference from fs_info, so we're not trying to use a full block group for allocations during data relocation, which will always fail. The result is we're not making any forward progress and end up in a deadlock situation. Fixes: c2707a255623 ("btrfs: zoned: add a dedicated data relocation block group") Reviewed-by: Naohiro Aota Signed-off-by: Johannes Thumshirn Signed-off-by: David Sterba --- fs/btrfs/zoned.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index 67d932d70798..678a29469511 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -1860,6 +1860,7 @@ int btrfs_zone_finish(struct btrfs_block_group *block_group) block_group->alloc_offset = block_group->zone_capacity; block_group->free_space_ctl->free_space = 0; btrfs_clear_treelog_bg(block_group); + btrfs_clear_data_reloc_bg(block_group); spin_unlock(&block_group->lock); ret = blkdev_zone_mgmt(device->bdev, REQ_OP_ZONE_FINISH, @@ -1942,6 +1943,7 @@ void btrfs_zone_finish_endio(struct btrfs_fs_info *fs_info, u64 logical, u64 len ASSERT(block_group->alloc_offset == block_group->zone_capacity); ASSERT(block_group->free_space_ctl->free_space == 0); btrfs_clear_treelog_bg(block_group); + btrfs_clear_data_reloc_bg(block_group); spin_unlock(&block_group->lock); map = block_group->physical_map; From 8289ed9f93bef2762f9184e136d994734b16d997 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Wed, 1 Dec 2021 19:56:17 +0800 Subject: [PATCH 297/868] btrfs: replace the BUG_ON in btrfs_del_root_ref with proper error handling I hit the BUG_ON() with generic/475 test case, and to my surprise, all callers of btrfs_del_root_ref() are already aborting transaction, thus there is not need for such BUG_ON(), just go to @out label and caller will properly handle the error. CC: stable@vger.kernel.org # 5.4+ Reviewed-by: Josef Bacik Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/root-tree.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index 702dc5441f03..db37a3799649 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c @@ -336,7 +336,8 @@ int btrfs_del_root_ref(struct btrfs_trans_handle *trans, u64 root_id, key.offset = ref_id; again: ret = btrfs_search_slot(trans, tree_root, &key, path, -1, 1); - BUG_ON(ret < 0); + if (ret < 0) + goto out; if (ret == 0) { leaf = path->nodes[0]; ref = btrfs_item_ptr(leaf, path->slots[0], From 30e32f300be6d0160fd1b3fc6d0f62917acd9be2 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Wed, 8 Dec 2021 15:35:06 +0200 Subject: [PATCH 298/868] nvmet-tcp: fix possible list corruption for unexpected command failure nvmet_tcp_handle_req_failure needs to understand weather to prepare for incoming data or the next pdu. However if we misidentify this, we will wait for 0-length data, and queue the response although nvmet_req_init already did that. The particular command was namespace management command with no data, which was incorrectly categorized as a command with incapsule data. Also, add a code comment of what we are trying to do here. Signed-off-by: Sagi Grimberg Reviewed-by: Keith Busch Signed-off-by: Christoph Hellwig --- drivers/nvme/target/tcp.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c index cb6a473c3eaf..7c1c43ce466b 100644 --- a/drivers/nvme/target/tcp.c +++ b/drivers/nvme/target/tcp.c @@ -922,7 +922,14 @@ static void nvmet_tcp_handle_req_failure(struct nvmet_tcp_queue *queue, size_t data_len = le32_to_cpu(req->cmd->common.dptr.sgl.length); int ret; - if (!nvme_is_write(cmd->req.cmd) || + /* + * This command has not been processed yet, hence we are trying to + * figure out if there is still pending data left to receive. If + * we don't, we can simply prepare for the next pdu and bail out, + * otherwise we will need to prepare a buffer and receive the + * stale data before continuing forward. + */ + if (!nvme_is_write(cmd->req.cmd) || !data_len || data_len > cmd->req.port->inline_data_size) { nvmet_prepare_receive_pdu(queue); return; From b19926d4f3a660a8b76e5d989ffd1168e619a5c4 Mon Sep 17 00:00:00 2001 From: Bas Nieuwenhuizen Date: Wed, 8 Dec 2021 03:39:35 +0100 Subject: [PATCH 299/868] drm/syncobj: Deal with signalled fences in drm_syncobj_find_fence. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit dma_fence_chain_find_seqno only ever returns the top fence in the chain or an unsignalled fence. Hence if we request a seqno that is already signalled it returns a NULL fence. Some callers are not prepared to handle this, like the syncobj transfer functions for example. This behavior is "new" with timeline syncobj and it looks like not all callers were updated. To fix this behavior make sure that a successful drm_sync_find_fence always returns a non-NULL fence. v2: Move the fix to drm_syncobj_find_fence from the transfer functions. Fixes: ea569910cbab ("drm/syncobj: add transition iotcls between binary and timeline v2") Cc: stable@vger.kernel.org Signed-off-by: Bas Nieuwenhuizen Reviewed-by: Christian König Acked-by: Lionel Landwerlin Signed-off-by: Christian König Link: https://patchwork.freedesktop.org/patch/msgid/20211208023935.17018-1-bas@basnieuwenhuizen.nl --- drivers/gpu/drm/drm_syncobj.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c index c9a9d74f338c..c313a5b4549c 100644 --- a/drivers/gpu/drm/drm_syncobj.c +++ b/drivers/gpu/drm/drm_syncobj.c @@ -404,8 +404,17 @@ int drm_syncobj_find_fence(struct drm_file *file_private, if (*fence) { ret = dma_fence_chain_find_seqno(fence, point); - if (!ret) + if (!ret) { + /* If the requested seqno is already signaled + * drm_syncobj_find_fence may return a NULL + * fence. To make sure the recipient gets + * signalled, use a new fence instead. + */ + if (!*fence) + *fence = dma_fence_get_stub(); + goto out; + } dma_fence_put(*fence); } else { ret = -EINVAL; From 7d5b7cad79da76f3dad4a9f6040e524217814e5a Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 6 Dec 2021 19:20:30 +0100 Subject: [PATCH 300/868] ftrace: Use direct_ops hash in unregister_ftrace_direct Now when we have *direct_multi interface the direct_functions hash is no longer owned just by direct_ops. It's also used by any other ftrace_ops passed to *direct_multi interface. Thus to find out that we are unregistering the last function from direct_ops, we need to check directly direct_ops's hash. Link: https://lkml.kernel.org/r/20211206182032.87248-2-jolsa@kernel.org Cc: Ingo Molnar Cc: Heiko Carstens Fixes: f64dd4627ec6 ("ftrace: Add multi direct register/unregister interface") Signed-off-by: Jiri Olsa Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/ftrace.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 30bc880c3849..7f0594e28226 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -5217,6 +5217,7 @@ int unregister_ftrace_direct(unsigned long ip, unsigned long addr) { struct ftrace_direct_func *direct; struct ftrace_func_entry *entry; + struct ftrace_hash *hash; int ret = -ENODEV; mutex_lock(&direct_mutex); @@ -5225,7 +5226,8 @@ int unregister_ftrace_direct(unsigned long ip, unsigned long addr) if (!entry) goto out_unlock; - if (direct_functions->count == 1) + hash = direct_ops.func_hash->filter_hash; + if (hash->count == 1) unregister_ftrace_function(&direct_ops); ret = ftrace_set_filter_ip(&direct_ops, ip, 1, 0); From fea3ffa48c6d42a11dca766c89284d22eaf5603f Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 6 Dec 2021 19:20:31 +0100 Subject: [PATCH 301/868] ftrace: Add cleanup to unregister_ftrace_direct_multi Adding ops cleanup to unregister_ftrace_direct_multi, so it can be reused in another register call. Link: https://lkml.kernel.org/r/20211206182032.87248-3-jolsa@kernel.org Cc: Ingo Molnar Cc: Heiko Carstens Fixes: f64dd4627ec6 ("ftrace: Add multi direct register/unregister interface") Signed-off-by: Jiri Olsa Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/ftrace.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 7f0594e28226..be5f6b32a012 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -5542,6 +5542,10 @@ int unregister_ftrace_direct_multi(struct ftrace_ops *ops, unsigned long addr) err = unregister_ftrace_function(ops); remove_direct_functions_hash(hash, addr); mutex_unlock(&direct_mutex); + + /* cleanup for possible another register call */ + ops->func = NULL; + ops->trampoline = 0; return err; } EXPORT_SYMBOL_GPL(unregister_ftrace_direct_multi); From 14902f8961dca9c66bf190f7b1583767c97a4197 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 7 Dec 2021 13:10:53 +0100 Subject: [PATCH 302/868] HID: Ignore battery for Elan touchscreen on Asus UX550VE Battery status is reported for the Asus UX550VE touchscreen even though it does not have a battery. Prevent it from always reporting the battery as low. BugLink: https://bugzilla.redhat.com/show_bug.cgi?id=1897823 Signed-off-by: Hans de Goede Signed-off-by: Jiri Kosina --- drivers/hid/hid-ids.h | 1 + drivers/hid/hid-input.c | 2 ++ 2 files changed, 3 insertions(+) diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index ca418bffd3b2..19da07777d62 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -399,6 +399,7 @@ #define USB_DEVICE_ID_HP_X2_10_COVER 0x0755 #define I2C_DEVICE_ID_HP_ENVY_X360_15 0x2d05 #define I2C_DEVICE_ID_HP_SPECTRE_X360_15 0x2817 +#define USB_DEVICE_ID_ASUS_UX550VE_TOUCHSCREEN 0x2544 #define USB_DEVICE_ID_ASUS_UX550_TOUCHSCREEN 0x2706 #define I2C_DEVICE_ID_SURFACE_GO_TOUCHSCREEN 0x261A diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c index 217f2d1b91c5..03f994541981 100644 --- a/drivers/hid/hid-input.c +++ b/drivers/hid/hid-input.c @@ -325,6 +325,8 @@ static const struct hid_device_id hid_battery_quirks[] = { HID_BATTERY_QUIRK_IGNORE }, { HID_USB_DEVICE(USB_VENDOR_ID_ELAN, USB_DEVICE_ID_ASUS_UX550_TOUCHSCREEN), HID_BATTERY_QUIRK_IGNORE }, + { HID_USB_DEVICE(USB_VENDOR_ID_ELAN, USB_DEVICE_ID_ASUS_UX550VE_TOUCHSCREEN), + HID_BATTERY_QUIRK_IGNORE }, { HID_I2C_DEVICE(USB_VENDOR_ID_ELAN, I2C_DEVICE_ID_HP_ENVY_X360_15), HID_BATTERY_QUIRK_IGNORE }, { HID_I2C_DEVICE(USB_VENDOR_ID_ELAN, I2C_DEVICE_ID_HP_SPECTRE_X360_15), From 9cdb54be3e463f5c0607fcac045d5a9c67575775 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 1 Dec 2021 20:48:31 -0800 Subject: [PATCH 303/868] drm/i915: Fix error pointer dereference in i915_gem_do_execbuffer() Originally "out_fence" was set using out_fence = sync_file_create() but which returns NULL, but now it is set with out_fence = eb_requests_create() which returns error pointers. The error path needs to be modified to avoid an Oops in the "goto err_request;" path. Fixes: 544460c33821 ("drm/i915: Multi-BB execbuf") Signed-off-by: Dan Carpenter Signed-off-by: Matthew Brost Reviewed-by: Matthew Brost Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20211202044831.29583-1-matthew.brost@intel.com (cherry picked from commit 8722ded49ce8a0c706b373e8087eb810684962ff) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 4d7da07442f2..9b24d9b5ade1 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -3277,6 +3277,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, out_fence = eb_requests_create(&eb, in_fence, out_fence_fd); if (IS_ERR(out_fence)) { err = PTR_ERR(out_fence); + out_fence = NULL; if (eb.requests[0]) goto err_request; else From d599f714b73e4177dfdfe64fce09175568288ee9 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 3 Dec 2021 14:04:24 +0200 Subject: [PATCH 304/868] iwlwifi: mvm: don't crash on invalid rate w/o STA If we get to the WARN_ONCE(..., "Got a HT rate (...)", ...) here with a NULL sta, then we crash because mvmsta is bad and we try to dereference it. Fix that by printing -1 as the state if no station was given. Signed-off-by: Johannes Berg Fixes: 6761a718263a ("iwlwifi: mvm: add explicit check for non-data frames in get Tx rate") Signed-off-by: Luca Coelho Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/iwlwifi.20211203140410.1a1541d7dcb5.I606c746e11447fe168cf046376b70b04e278c3b4@changeid --- drivers/net/wireless/intel/iwlwifi/mvm/tx.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c index bdd4ee432548..76e0b7b45980 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c @@ -269,17 +269,18 @@ static u32 iwl_mvm_get_tx_rate(struct iwl_mvm *mvm, u8 rate_plcp; u32 rate_flags = 0; bool is_cck; - struct iwl_mvm_sta *mvmsta = iwl_mvm_sta_from_mac80211(sta); /* info->control is only relevant for non HW rate control */ if (!ieee80211_hw_check(mvm->hw, HAS_RATE_CONTROL)) { + struct iwl_mvm_sta *mvmsta = iwl_mvm_sta_from_mac80211(sta); + /* HT rate doesn't make sense for a non data frame */ WARN_ONCE(info->control.rates[0].flags & IEEE80211_TX_RC_MCS && !ieee80211_is_data(fc), "Got a HT rate (flags:0x%x/mcs:%d/fc:0x%x/state:%d) for a non data frame\n", info->control.rates[0].flags, info->control.rates[0].idx, - le16_to_cpu(fc), mvmsta->sta_state); + le16_to_cpu(fc), sta ? mvmsta->sta_state : -1); rate_idx = info->control.rates[0].idx; } From efdbfa0ad03e764419378485d1b8f6e7706fb1a3 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Sat, 4 Dec 2021 18:38:33 +0100 Subject: [PATCH 305/868] iwlwifi: fix LED dependencies The dependencies for LED configuration are highly inconsistent and too complicated at the moment. One of the results is a randconfig failure I get very rarely when LEDS_CLASS is in a loadable module, but the wireless core is built-in: WARNING: unmet direct dependencies detected for MAC80211_LEDS Depends on [n]: NET [=y] && WIRELESS [=y] && MAC80211 [=y] && (LEDS_CLASS [=m]=y || LEDS_CLASS [=m]=MAC80211 [=y]) Selected by [m]: - IWLEGACY [=m] && NETDEVICES [=y] && WLAN [=y] && WLAN_VENDOR_INTEL [=y] - IWLWIFI_LEDS [=y] && NETDEVICES [=y] && WLAN [=y] && WLAN_VENDOR_INTEL [=y] && IWLWIFI [=m] && (LEDS_CLASS [=m]=y || LEDS_CLASS [=m]=IWLWIFI [=m]) && (IWLMVM [=m] || IWLDVM [=m]) aarch64-linux-ld: drivers/net/wireless/ath/ath5k/led.o: in function `ath5k_register_led': led.c:(.text+0x60): undefined reference to `led_classdev_register_ext' aarch64-linux-ld: drivers/net/wireless/ath/ath5k/led.o: in function `ath5k_unregister_leds': led.c:(.text+0x200): undefined reference to `led_classdev_unregister' For iwlwifi, the dependency is wrong, since this config prevents the MAC80211_LEDS code from being part of a built-in MAC80211 driver. For iwlegacy, this is worse because the driver tries to force-enable the other subsystems, which is both a layering violation and a bug because it will still fail with MAC80211=y and IWLEGACY=m, leading to LEDS_CLASS being a module as well. The actual link failure in the ath5k driver is a result of MAC80211_LEDS being enabled but not usable. With the Kconfig logic fixed in the Intel drivers, the ath5k driver works as expected again. Signed-off-by: Arnd Bergmann Acked-by: Luca Coelho Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20211204173848.873293-1-arnd@kernel.org --- drivers/net/wireless/intel/iwlegacy/Kconfig | 4 ++-- drivers/net/wireless/intel/iwlwifi/Kconfig | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/intel/iwlegacy/Kconfig b/drivers/net/wireless/intel/iwlegacy/Kconfig index 24fe3f63c321..7eacc8e58ee1 100644 --- a/drivers/net/wireless/intel/iwlegacy/Kconfig +++ b/drivers/net/wireless/intel/iwlegacy/Kconfig @@ -2,14 +2,13 @@ config IWLEGACY tristate select FW_LOADER - select NEW_LEDS - select LEDS_CLASS select LEDS_TRIGGERS select MAC80211_LEDS config IWL4965 tristate "Intel Wireless WiFi 4965AGN (iwl4965)" depends on PCI && MAC80211 + depends on LEDS_CLASS=y || LEDS_CLASS=MAC80211 select IWLEGACY help This option enables support for @@ -38,6 +37,7 @@ config IWL4965 config IWL3945 tristate "Intel PRO/Wireless 3945ABG/BG Network Connection (iwl3945)" depends on PCI && MAC80211 + depends on LEDS_CLASS=y || LEDS_CLASS=MAC80211 select IWLEGACY help Select to build the driver supporting the: diff --git a/drivers/net/wireless/intel/iwlwifi/Kconfig b/drivers/net/wireless/intel/iwlwifi/Kconfig index 1085afbefba8..418ae4f870ab 100644 --- a/drivers/net/wireless/intel/iwlwifi/Kconfig +++ b/drivers/net/wireless/intel/iwlwifi/Kconfig @@ -47,7 +47,7 @@ if IWLWIFI config IWLWIFI_LEDS bool - depends on LEDS_CLASS=y || LEDS_CLASS=IWLWIFI + depends on LEDS_CLASS=y || LEDS_CLASS=MAC80211 depends on IWLMVM || IWLDVM select LEDS_TRIGGERS select MAC80211_LEDS From c68115fc537518b8ff2c54b3e2984a60977affed Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Sat, 4 Dec 2021 18:38:34 +0100 Subject: [PATCH 306/868] brcmsmac: rework LED dependencies This is now the only driver that selects the LEDS_CLASS framework, which is normally user-selectable. While it doesn't strictly cause a bug, rework the Kconfig logic to be more consistent with what other drivers do, and only enable LED support in brcmsmac if the dependencies are all there, rather than using 'select' to enable what it needs. Signed-off-by: Arnd Bergmann Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20211204173848.873293-2-arnd@kernel.org --- drivers/net/wireless/broadcom/brcm80211/Kconfig | 14 +++++++++----- .../wireless/broadcom/brcm80211/brcmsmac/Makefile | 2 +- .../net/wireless/broadcom/brcm80211/brcmsmac/led.h | 2 +- 3 files changed, 11 insertions(+), 7 deletions(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/Kconfig b/drivers/net/wireless/broadcom/brcm80211/Kconfig index 5bf2318763c5..3a1a35b5672f 100644 --- a/drivers/net/wireless/broadcom/brcm80211/Kconfig +++ b/drivers/net/wireless/broadcom/brcm80211/Kconfig @@ -7,16 +7,20 @@ config BRCMSMAC depends on MAC80211 depends on BCMA_POSSIBLE select BCMA - select NEW_LEDS if BCMA_DRIVER_GPIO - select LEDS_CLASS if BCMA_DRIVER_GPIO select BRCMUTIL select FW_LOADER select CORDIC help This module adds support for PCIe wireless adapters based on Broadcom - IEEE802.11n SoftMAC chipsets. It also has WLAN led support, which will - be available if you select BCMA_DRIVER_GPIO. If you choose to build a - module, the driver will be called brcmsmac.ko. + IEEE802.11n SoftMAC chipsets. If you choose to build a module, the + driver will be called brcmsmac.ko. + +config BRCMSMAC_LEDS + def_bool BRCMSMAC && BCMA_DRIVER_GPIO && MAC80211_LEDS + help + The brcmsmac LED support depends on the presence of the + BCMA_DRIVER_GPIO driver, and it only works if LED support + is enabled and reachable from the driver module. source "drivers/net/wireless/broadcom/brcm80211/brcmfmac/Kconfig" diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/Makefile b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/Makefile index 482d7737764d..090757730ba6 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/Makefile +++ b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/Makefile @@ -42,6 +42,6 @@ brcmsmac-y := \ brcms_trace_events.o \ debug.o -brcmsmac-$(CONFIG_BCMA_DRIVER_GPIO) += led.o +brcmsmac-$(CONFIG_BRCMSMAC_LEDS) += led.o obj-$(CONFIG_BRCMSMAC) += brcmsmac.o diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/led.h b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/led.h index d65f5c268fd7..2a5cbeb9e783 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/led.h +++ b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/led.h @@ -24,7 +24,7 @@ struct brcms_led { struct gpio_desc *gpiod; }; -#ifdef CONFIG_BCMA_DRIVER_GPIO +#ifdef CONFIG_BRCMSMAC_LEDS void brcms_led_unregister(struct brcms_info *wl); int brcms_led_register(struct brcms_info *wl); #else From f7d55d2e439fa4430755d69a5d7ad16d43a5ebe6 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Sat, 4 Dec 2021 18:38:35 +0100 Subject: [PATCH 307/868] mt76: mt7921: fix build regression After mt7921s got added, there are two possible build problems: a) mt7921s does not get built at all if mt7921e is not also enabled b) there is a link error when mt7921e is a loadable module, but mt7921s configured as built-in: ERROR: modpost: "mt7921_mac_sta_add" [drivers/net/wireless/mediatek/mt76/mt7921/mt7921e.ko] undefined! ERROR: modpost: "mt7921_mac_sta_assoc" [drivers/net/wireless/mediatek/mt76/mt7921/mt7921e.ko] undefined! ERROR: modpost: "mt7921_mac_sta_remove" [drivers/net/wireless/mediatek/mt76/mt7921/mt7921e.ko] undefined! ERROR: modpost: "mt7921_mac_write_txwi" [drivers/net/wireless/mediatek/mt76/mt7921/mt7921e.ko] undefined! ERROR: modpost: "mt7921_mcu_drv_pmctrl" [drivers/net/wireless/mediatek/mt76/mt7921/mt7921e.ko] undefined! ERROR: modpost: "mt7921_mcu_fill_message" [drivers/net/wireless/mediatek/mt76/mt7921/mt7921e.ko] undefined! ERROR: modpost: "mt7921_mcu_parse_response" [drivers/net/wireless/mediatek/mt76/mt7921/mt7921e.ko] undefined! ERROR: modpost: "mt7921_ops" [drivers/net/wireless/mediatek/mt76/mt7921/mt7921e.ko] undefined! ERROR: modpost: "mt7921_queue_rx_skb" [drivers/net/wireless/mediatek/mt76/mt7921/mt7921e.ko] undefined! ERROR: modpost: "mt7921_update_channel" [drivers/net/wireless/mediatek/mt76/mt7921/mt7921e.ko] undefined! Fix both by making sure that Kbuild enters the subdirectory when either one is enabled. Fixes: 48fab5bbef40 ("mt76: mt7921: introduce mt7921s support") Signed-off-by: Arnd Bergmann Acked-by: Felix Fietkau Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20211204173848.873293-3-arnd@kernel.org --- drivers/net/wireless/mediatek/mt76/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/mediatek/mt76/Makefile b/drivers/net/wireless/mediatek/mt76/Makefile index 79ab850a45a2..c78ae4b89761 100644 --- a/drivers/net/wireless/mediatek/mt76/Makefile +++ b/drivers/net/wireless/mediatek/mt76/Makefile @@ -34,4 +34,4 @@ obj-$(CONFIG_MT76x2_COMMON) += mt76x2/ obj-$(CONFIG_MT7603E) += mt7603/ obj-$(CONFIG_MT7615_COMMON) += mt7615/ obj-$(CONFIG_MT7915E) += mt7915/ -obj-$(CONFIG_MT7921E) += mt7921/ +obj-$(CONFIG_MT7921_COMMON) += mt7921/ From 1a0f25a52e08b1f67510cabbb44888d2b3c46359 Mon Sep 17 00:00:00 2001 From: Jesse Brandeburg Date: Fri, 12 Nov 2021 17:06:02 -0800 Subject: [PATCH 308/868] ice: safer stats processing The driver was zeroing live stats that could be fetched by ndo_get_stats64 at any time. This could result in inconsistent statistics, and the telltale sign was when reading stats frequently from /proc/net/dev, the stats would go backwards. Fix by collecting stats into a local, and delaying when we write to the structure so it's not incremental. Fixes: fcea6f3da546 ("ice: Add stats and ethtool support") Signed-off-by: Jesse Brandeburg Tested-by: Gurucharan G Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_main.c | 29 ++++++++++++++--------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index c6d6ce52e2ca..73c61cdb036f 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -5930,14 +5930,15 @@ ice_fetch_u64_stats_per_ring(struct u64_stats_sync *syncp, struct ice_q_stats st /** * ice_update_vsi_tx_ring_stats - Update VSI Tx ring stats counters * @vsi: the VSI to be updated + * @vsi_stats: the stats struct to be updated * @rings: rings to work on * @count: number of rings */ static void -ice_update_vsi_tx_ring_stats(struct ice_vsi *vsi, struct ice_tx_ring **rings, - u16 count) +ice_update_vsi_tx_ring_stats(struct ice_vsi *vsi, + struct rtnl_link_stats64 *vsi_stats, + struct ice_tx_ring **rings, u16 count) { - struct rtnl_link_stats64 *vsi_stats = &vsi->net_stats; u16 i; for (i = 0; i < count; i++) { @@ -5961,15 +5962,13 @@ ice_update_vsi_tx_ring_stats(struct ice_vsi *vsi, struct ice_tx_ring **rings, */ static void ice_update_vsi_ring_stats(struct ice_vsi *vsi) { - struct rtnl_link_stats64 *vsi_stats = &vsi->net_stats; + struct rtnl_link_stats64 *vsi_stats; u64 pkts, bytes; int i; - /* reset netdev stats */ - vsi_stats->tx_packets = 0; - vsi_stats->tx_bytes = 0; - vsi_stats->rx_packets = 0; - vsi_stats->rx_bytes = 0; + vsi_stats = kzalloc(sizeof(*vsi_stats), GFP_ATOMIC); + if (!vsi_stats) + return; /* reset non-netdev (extended) stats */ vsi->tx_restart = 0; @@ -5981,7 +5980,8 @@ static void ice_update_vsi_ring_stats(struct ice_vsi *vsi) rcu_read_lock(); /* update Tx rings counters */ - ice_update_vsi_tx_ring_stats(vsi, vsi->tx_rings, vsi->num_txq); + ice_update_vsi_tx_ring_stats(vsi, vsi_stats, vsi->tx_rings, + vsi->num_txq); /* update Rx rings counters */ ice_for_each_rxq(vsi, i) { @@ -5996,10 +5996,17 @@ static void ice_update_vsi_ring_stats(struct ice_vsi *vsi) /* update XDP Tx rings counters */ if (ice_is_xdp_ena_vsi(vsi)) - ice_update_vsi_tx_ring_stats(vsi, vsi->xdp_rings, + ice_update_vsi_tx_ring_stats(vsi, vsi_stats, vsi->xdp_rings, vsi->num_xdp_txq); rcu_read_unlock(); + + vsi->net_stats.tx_packets = vsi_stats->tx_packets; + vsi->net_stats.tx_bytes = vsi_stats->tx_bytes; + vsi->net_stats.rx_packets = vsi_stats->rx_packets; + vsi->net_stats.rx_bytes = vsi_stats->rx_bytes; + + kfree(vsi_stats); } /** From ff9f9c6e74848170fcb45c8403c80d661484c8c9 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 8 Dec 2021 13:33:07 +0300 Subject: [PATCH 309/868] vduse: fix memory corruption in vduse_dev_ioctl() The "config.offset" comes from the user. There needs to a check to prevent it being out of bounds. The "config.offset" and "dev->config_size" variables are both type u32. So if the offset if out of bounds then the "dev->config_size - config.offset" subtraction results in a very high u32 value. The out of bounds offset can result in memory corruption. Fixes: c8a6153b6c59 ("vduse: Introduce VDUSE - vDPA Device in Userspace") Signed-off-by: Dan Carpenter Link: https://lore.kernel.org/r/20211208103307.GA3778@kili Signed-off-by: Michael S. Tsirkin Cc: stable@vger.kernel.org --- drivers/vdpa/vdpa_user/vduse_dev.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/vdpa/vdpa_user/vduse_dev.c b/drivers/vdpa/vdpa_user/vduse_dev.c index c9204c62f339..1a206f95d73a 100644 --- a/drivers/vdpa/vdpa_user/vduse_dev.c +++ b/drivers/vdpa/vdpa_user/vduse_dev.c @@ -975,7 +975,8 @@ static long vduse_dev_ioctl(struct file *file, unsigned int cmd, break; ret = -EINVAL; - if (config.length == 0 || + if (config.offset > dev->config_size || + config.length == 0 || config.length > dev->config_size - config.offset) break; From 3ed21c1451a14d139e1ceb18f2fa70865ce3195a Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 8 Dec 2021 13:33:37 +0300 Subject: [PATCH 310/868] vdpa: check that offsets are within bounds In this function "c->off" is a u32 and "size" is a long. On 64bit systems if "c->off" is greater than "size" then "size - c->off" is a negative and we always return -E2BIG. But on 32bit systems the subtraction is type promoted to a high positive u32 value and basically any "c->len" is accepted. Fixes: 4c8cf31885f6 ("vhost: introduce vDPA-based backend") Reported-by: Xie Yongji Signed-off-by: Dan Carpenter Link: https://lore.kernel.org/r/20211208103337.GA4047@kili Signed-off-by: Michael S. Tsirkin Cc: stable@vger.kernel.org --- drivers/vhost/vdpa.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index 29cced1cd277..e3c4f059b21a 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -197,7 +197,7 @@ static int vhost_vdpa_config_validate(struct vhost_vdpa *v, struct vdpa_device *vdpa = v->vdpa; long size = vdpa->config->get_config_size(vdpa); - if (c->len == 0) + if (c->len == 0 || c->off > size) return -EINVAL; if (c->len > size - c->off) From dc1db0060c02d119fd4196924eff2d1129e9a442 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 8 Dec 2021 18:09:56 +0300 Subject: [PATCH 311/868] vduse: check that offset is within bounds in get_config() This condition checks "len" but it does not check "offset" and that could result in an out of bounds read if "offset > dev->config_size". The problem is that since both variables are unsigned the "dev->config_size - offset" subtraction would result in a very high unsigned value. I think these checks might not be necessary because "len" and "offset" are supposed to already have been validated using the vhost_vdpa_config_validate() function. But I do not know the code perfectly, and I like to be safe. Fixes: c8a6153b6c59 ("vduse: Introduce VDUSE - vDPA Device in Userspace") Signed-off-by: Dan Carpenter Link: https://lore.kernel.org/r/20211208150956.GA29160@kili Signed-off-by: Michael S. Tsirkin Cc: stable@vger.kernel.org --- drivers/vdpa/vdpa_user/vduse_dev.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/vdpa/vdpa_user/vduse_dev.c b/drivers/vdpa/vdpa_user/vduse_dev.c index 1a206f95d73a..eddcb64a910a 100644 --- a/drivers/vdpa/vdpa_user/vduse_dev.c +++ b/drivers/vdpa/vdpa_user/vduse_dev.c @@ -655,7 +655,8 @@ static void vduse_vdpa_get_config(struct vdpa_device *vdpa, unsigned int offset, { struct vduse_dev *dev = vdpa_to_vduse(vdpa); - if (len > dev->config_size - offset) + if (offset > dev->config_size || + len > dev->config_size - offset) return; memcpy(buf, dev->config + offset, len); From 27d9839f17940e8edc475df616bbd9cf7ede8d05 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 6 Dec 2021 09:50:18 +0100 Subject: [PATCH 312/868] virtio: always enter drivers/virtio/ When neither VIRTIO_PCI_LIB nor VIRTIO are enabled, but the alibaba vdpa driver is, the kernel runs into a link error because the legacy virtio module never gets built: x86_64-linux-ld: drivers/vdpa/alibaba/eni_vdpa.o: in function `eni_vdpa_set_features': eni_vdpa.c:(.text+0x23f): undefined reference to `vp_legacy_set_features' x86_64-linux-ld: drivers/vdpa/alibaba/eni_vdpa.o: in function `eni_vdpa_set_vq_state': eni_vdpa.c:(.text+0x2fe): undefined reference to `vp_legacy_get_queue_enable' x86_64-linux-ld: drivers/vdpa/alibaba/eni_vdpa.o: in function `eni_vdpa_set_vq_address': eni_vdpa.c:(.text+0x376): undefined reference to `vp_legacy_set_queue_address' x86_64-linux-ld: drivers/vdpa/alibaba/eni_vdpa.o: in function `eni_vdpa_set_vq_ready': eni_vdpa.c:(.text+0x3b4): undefined reference to `vp_legacy_set_queue_address' x86_64-linux-ld: drivers/vdpa/alibaba/eni_vdpa.o: in function `eni_vdpa_free_irq': eni_vdpa.c:(.text+0x460): undefined reference to `vp_legacy_queue_vector' x86_64-linux-ld: eni_vdpa.c:(.text+0x4b7): undefined reference to `vp_legacy_config_vector' x86_64-linux-ld: drivers/vdpa/alibaba/eni_vdpa.o: in function `eni_vdpa_reset': When VIRTIO_PCI_LIB was added, it was correctly added to drivers/Makefile as well, but for the legacy module, this is missing. Solve this by always entering drivers/virtio during the build and letting its Makefile take care of the individual options, rather than having a separate line for each sub-option. Fixes: 64b9f64f80a6 ("vdpa: introduce virtio pci driver") Fixes: e85087beedca ("eni_vdpa: add vDPA driver for Alibaba ENI") Fixes: d89c8169bd70 ("virtio-pci: introduce legacy device module") Signed-off-by: Arnd Bergmann Link: https://lore.kernel.org/r/20211206085034.2836099-1-arnd@kernel.org Signed-off-by: Michael S. Tsirkin --- drivers/Makefile | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/Makefile b/drivers/Makefile index be5d40ae1488..a110338c860c 100644 --- a/drivers/Makefile +++ b/drivers/Makefile @@ -41,8 +41,7 @@ obj-$(CONFIG_DMADEVICES) += dma/ # SOC specific infrastructure drivers. obj-y += soc/ -obj-$(CONFIG_VIRTIO) += virtio/ -obj-$(CONFIG_VIRTIO_PCI_LIB) += virtio/ +obj-y += virtio/ obj-$(CONFIG_VDPA) += vdpa/ obj-$(CONFIG_XEN) += xen/ From 817fc978b5a29b039db0418a91072b31c9aab152 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 1 Dec 2021 11:20:18 +0000 Subject: [PATCH 313/868] virtio_ring: Fix querying of maximum DMA mapping size for virtio device virtio_max_dma_size() returns the maximum DMA mapping size of the virtio device by querying dma_max_mapping_size() for the device when the DMA API is in use for the vring. Unfortunately, the device passed is initialised by register_virtio_device() and does not inherit the DMA configuration from its parent, resulting in SWIOTLB errors when bouncing is enabled and the default 256K mapping limit (IO_TLB_SEGSIZE) is not respected: | virtio-pci 0000:00:01.0: swiotlb buffer is full (sz: 294912 bytes), total 1024 (slots), used 725 (slots) Follow the pattern used elsewhere in the virtio_ring code when calling into the DMA layer and pass the parent device to dma_max_mapping_size() instead. Cc: Marc Zyngier Cc: Quentin Perret Cc: "Michael S. Tsirkin" Cc: Jason Wang Signed-off-by: Will Deacon Link: https://lore.kernel.org/r/20211201112018.25276-1-will@kernel.org Acked-by: Jason Wang Tested-by: Suzuki K Poulose Fixes: e6d6dd6c875e ("virtio: Introduce virtio_max_dma_size()") Cc: Joerg Roedel Cc: Konrad Rzeszutek Wilk Cc: Christoph Hellwig Cc: Robin Murphy Signed-off-by: Steven Price Signed-off-by: Suzuki K Poulose Cc: stable@vger.kernel.org Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_ring.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 6d2614e34470..028b05d44546 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -268,7 +268,7 @@ size_t virtio_max_dma_size(struct virtio_device *vdev) size_t max_segment_size = SIZE_MAX; if (vring_use_dma_api(vdev)) - max_segment_size = dma_max_mapping_size(&vdev->dev); + max_segment_size = dma_max_mapping_size(vdev->dev.parent); return max_segment_size; } From 1db8f5fc2e5c66a5c51e1f6488e0ba7d45c29ae4 Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Thu, 25 Nov 2021 20:18:23 -0500 Subject: [PATCH 314/868] virtio/vsock: fix the transport to work with VMADDR_CID_ANY The VMADDR_CID_ANY flag used by a socket means that the socket isn't bound to any specific CID. For example, a host vsock server may want to be bound with VMADDR_CID_ANY, so that a guest vsock client can connect to the host server with CID=VMADDR_CID_HOST (i.e. 2), and meanwhile, a host vsock client can connect to the same local server with CID=VMADDR_CID_LOCAL (i.e. 1). The current implementation sets the destination socket's svm_cid to a fixed CID value after the first client's connection, which isn't an expected operation. For example, if the guest client first connects to the host server, the server's svm_cid gets set to VMADDR_CID_HOST, then other host clients won't be able to connect to the server anymore. Reproduce steps: 1. Run the host server: socat VSOCK-LISTEN:1234,fork - 2. Run a guest client to connect to the host server: socat - VSOCK-CONNECT:2:1234 3. Run a host client to connect to the host server: socat - VSOCK-CONNECT:1:1234 Without this patch, step 3. above fails to connect, and socat complains "socat[1720] E connect(5, AF=40 cid:1 port:1234, 16): Connection reset by peer". With this patch, the above works well. Fixes: c0cfa2d8a788 ("vsock: add multi-transports support") Signed-off-by: Wei Wang Link: https://lore.kernel.org/r/20211126011823.1760-1-wei.w.wang@intel.com Signed-off-by: Michael S. Tsirkin Reviewed-by: Stefano Garzarella --- net/vmw_vsock/virtio_transport_common.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c index 59ee1be5a6dd..ec2c2afbf0d0 100644 --- a/net/vmw_vsock/virtio_transport_common.c +++ b/net/vmw_vsock/virtio_transport_common.c @@ -1299,7 +1299,8 @@ void virtio_transport_recv_pkt(struct virtio_transport *t, space_available = virtio_transport_space_update(sk, pkt); /* Update CID in case it has changed after a transport reset event */ - vsk->local_addr.svm_cid = dst.svm_cid; + if (vsk->local_addr.svm_cid != VMADDR_CID_ANY) + vsk->local_addr.svm_cid = dst.svm_cid; if (space_available) sk->sk_write_space(sk); From bb47620be322c5e9e372536cb6b54e17b3a00258 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Tue, 30 Nov 2021 06:29:49 +0200 Subject: [PATCH 315/868] vdpa: Consider device id larger than 31 virtio device id value can be more than 31. Hence, use BIT_ULL in assignment. Fixes: 33b347503f01 ("vdpa: Define vdpa mgmt device, ops and a netlink interface") Reported-by: kernel test robot Reported-by: Dan Carpenter Signed-off-by: Parav Pandit Acked-by: Jason Wang Link: https://lore.kernel.org/r/20211130042949.88958-1-parav@nvidia.com Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/vdpa.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/vdpa/vdpa.c b/drivers/vdpa/vdpa.c index 7332a74a4b00..09bbe53c3ac4 100644 --- a/drivers/vdpa/vdpa.c +++ b/drivers/vdpa/vdpa.c @@ -404,7 +404,8 @@ static int vdpa_mgmtdev_fill(const struct vdpa_mgmt_dev *mdev, struct sk_buff *m goto msg_err; while (mdev->id_table[i].device) { - supported_classes |= BIT(mdev->id_table[i].device); + if (mdev->id_table[i].device <= 63) + supported_classes |= BIT_ULL(mdev->id_table[i].device); i++; } From cabdc3a8475b918e55744f43719b26a82dc8fa6b Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Sat, 4 Dec 2021 10:14:02 +0100 Subject: [PATCH 316/868] sched,x86: Don't use cluster topology for x86 hybrid CPUs For x86 hybrid CPUs like Alder Lake, the order of CPU selection should be based strictly on CPU priority. Don't include cluster topology for hybrid CPUs to avoid interference with such CPU selection order. On Alder Lake, the Atom CPU cluster has more capacity (4 Atom CPUs) vs Big core cluster (2 hyperthread CPUs). This could potentially bias CPU selection towards Atom over Big Core, when Big core CPU has higher priority. Fixes: 66558b730f25 ("sched: Add cluster scheduler level for x86") Suggested-by: Tim Chen Signed-off-by: Peter Zijlstra (Intel) Acked-by: Tim Chen Tested-by: Ricardo Neri Link: https://lkml.kernel.org/r/20211204091402.GM16608@worktop.programming.kicks-ass.net --- arch/x86/kernel/smpboot.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index ac2909f0cab3..617012f4619f 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -579,6 +579,17 @@ static struct sched_domain_topology_level x86_numa_in_package_topology[] = { { NULL, }, }; +static struct sched_domain_topology_level x86_hybrid_topology[] = { +#ifdef CONFIG_SCHED_SMT + { cpu_smt_mask, x86_smt_flags, SD_INIT_NAME(SMT) }, +#endif +#ifdef CONFIG_SCHED_MC + { cpu_coregroup_mask, x86_core_flags, SD_INIT_NAME(MC) }, +#endif + { cpu_cpu_mask, SD_INIT_NAME(DIE) }, + { NULL, }, +}; + static struct sched_domain_topology_level x86_topology[] = { #ifdef CONFIG_SCHED_SMT { cpu_smt_mask, x86_smt_flags, SD_INIT_NAME(SMT) }, @@ -1469,8 +1480,11 @@ void __init native_smp_cpus_done(unsigned int max_cpus) calculate_max_logical_packages(); + /* XXX for now assume numa-in-package and hybrid don't overlap */ if (x86_has_numa_in_package) set_sched_topology(x86_numa_in_package_topology); + if (cpu_feature_enabled(X86_FEATURE_HYBRID_CPU)) + set_sched_topology(x86_hybrid_topology); nmi_selftest(); impress_friends(); From 75e895343d5a2fcbdf4cb3d31ab7492bd65925f0 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Wed, 8 Dec 2021 15:39:16 -0600 Subject: [PATCH 317/868] Revert "kbuild: Enable DT schema checks for %.dtb targets" This reverts commit 53182e81f47d4ea0c727c49ad23cb782173ab849. This added tool dependencies on various build systems using %.dtb targets. Validation will need to be controlled by a kconfig or make variable instead, but for now let's just revert it. Signed-off-by: Rob Herring --- Makefile | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/Makefile b/Makefile index 9e12c14ea0fb..fa5070e53979 100644 --- a/Makefile +++ b/Makefile @@ -1374,17 +1374,17 @@ endif ifneq ($(dtstree),) -%.dtb: dt_binding_check include/config/kernel.release scripts_dtc - $(Q)$(MAKE) $(build)=$(dtstree) $(dtstree)/$@ $(dtstree)/$*.dt.yaml +%.dtb: include/config/kernel.release scripts_dtc + $(Q)$(MAKE) $(build)=$(dtstree) $(dtstree)/$@ -%.dtbo: dt_binding_check include/config/kernel.release scripts_dtc - $(Q)$(MAKE) $(build)=$(dtstree) $(dtstree)/$@ $(dtstree)/$*.dt.yaml +%.dtbo: include/config/kernel.release scripts_dtc + $(Q)$(MAKE) $(build)=$(dtstree) $(dtstree)/$@ PHONY += dtbs dtbs_install dtbs_check dtbs: include/config/kernel.release scripts_dtc $(Q)$(MAKE) $(build)=$(dtstree) -ifneq ($(filter dtbs_check %.dtb %.dtbo, $(MAKECMDGOALS)),) +ifneq ($(filter dtbs_check, $(MAKECMDGOALS)),) export CHECK_DTBS=y dtbs: dt_binding_check endif From 2b29cb9e3f7f038c7f50ad2583b47caf5cb1eaf2 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Tue, 7 Dec 2021 10:32:43 +0000 Subject: [PATCH 318/868] net: dsa: mv88e6xxx: fix "don't use PHY_DETECT on internal PHY's" This commit fixes a misunderstanding in commit 4a3e0aeddf09 ("net: dsa: mv88e6xxx: don't use PHY_DETECT on internal PHY's"). For Marvell DSA switches with the PHY_DETECT bit (for non-6250 family devices), controls whether the PPU polls the PHY to retrieve the link, speed, duplex and pause status to update the port configuration. This applies for both internal and external PHYs. For some switches such as 88E6352 and 88E6390X, PHY_DETECT has an additional function of enabling auto-media mode between the internal PHY and SERDES blocks depending on which first gains link. The original intention of commit 5d5b231da7ac (net: dsa: mv88e6xxx: use PHY_DETECT in mac_link_up/mac_link_down) was to allow this bit to be used to detect when this propagation is enabled, and allow software to update the port configuration. This has found to be necessary for some switches which do not automatically propagate status from the SERDES to the port, which includes the 88E6390. However, commit 4a3e0aeddf09 ("net: dsa: mv88e6xxx: don't use PHY_DETECT on internal PHY's") breaks this assumption. Maarten Zanders has confirmed that the issue he was addressing was for an 88E6250 switch, which does not have a PHY_DETECT bit in bit 12, but instead a link status bit. Therefore, mv88e6xxx_port_ppu_updates() does not report correctly. This patch resolves the above issues by reverting Maarten's change and instead making mv88e6xxx_port_ppu_updates() indicate whether the port is internal for the 88E6250 family of switches. Yes, you're right, I'm targeting the 6250 family. And yes, your suggestion would solve my case and is a better implementation for the other devices (as far as I can see). Fixes: 4a3e0aeddf09 ("net: dsa: mv88e6xxx: don't use PHY_DETECT on internal PHY's") Signed-off-by: Russell King Tested-by: Maarten Zanders Link: https://lore.kernel.org/r/E1muXm7-00EwJB-7n@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/dsa/mv88e6xxx/chip.c | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index f00cbf5753b9..9f675464efc3 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -471,6 +471,12 @@ static int mv88e6xxx_port_ppu_updates(struct mv88e6xxx_chip *chip, int port) u16 reg; int err; + /* The 88e6250 family does not have the PHY detect bit. Instead, + * report whether the port is internal. + */ + if (chip->info->family == MV88E6XXX_FAMILY_6250) + return port < chip->info->num_internal_phys; + err = mv88e6xxx_port_read(chip, port, MV88E6XXX_PORT_STS, ®); if (err) { dev_err(chip->dev, @@ -752,11 +758,10 @@ static void mv88e6xxx_mac_link_down(struct dsa_switch *ds, int port, ops = chip->info->ops; mv88e6xxx_reg_lock(chip); - /* Internal PHYs propagate their configuration directly to the MAC. - * External PHYs depend on whether the PPU is enabled for this port. + /* Force the link down if we know the port may not be automatically + * updated by the switch or if we are using fixed-link mode. */ - if (((!mv88e6xxx_phy_is_internal(ds, port) && - !mv88e6xxx_port_ppu_updates(chip, port)) || + if ((!mv88e6xxx_port_ppu_updates(chip, port) || mode == MLO_AN_FIXED) && ops->port_sync_link) err = ops->port_sync_link(chip, port, mode, false); mv88e6xxx_reg_unlock(chip); @@ -779,11 +784,11 @@ static void mv88e6xxx_mac_link_up(struct dsa_switch *ds, int port, ops = chip->info->ops; mv88e6xxx_reg_lock(chip); - /* Internal PHYs propagate their configuration directly to the MAC. - * External PHYs depend on whether the PPU is enabled for this port. + /* Configure and force the link up if we know that the port may not + * automatically updated by the switch or if we are using fixed-link + * mode. */ - if ((!mv88e6xxx_phy_is_internal(ds, port) && - !mv88e6xxx_port_ppu_updates(chip, port)) || + if (!mv88e6xxx_port_ppu_updates(chip, port) || mode == MLO_AN_FIXED) { /* FIXME: for an automedia port, should we force the link * down here - what if the link comes up due to "other" media From 9de0737d5ba0425c3154d5d83da12a8fa8595c0f Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Tue, 7 Dec 2021 22:51:04 -0300 Subject: [PATCH 319/868] cifs: fix ntlmssp auth when there is no key exchange Warn on the lack of key exchange during NTLMSSP authentication rather than aborting it as there are some servers that do not set it in CHALLENGE message. Signed-off-by: Paulo Alcantara (SUSE) Acked-by: Ronnie Sahlberg Signed-off-by: Steve French --- fs/cifs/sess.c | 54 +++++++++++++++++++++++++++++++++----------------- 1 file changed, 36 insertions(+), 18 deletions(-) diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c index af63548eaf26..035dc3e245dc 100644 --- a/fs/cifs/sess.c +++ b/fs/cifs/sess.c @@ -590,8 +590,8 @@ int decode_ntlmssp_challenge(char *bcc_ptr, int blob_len, { unsigned int tioffset; /* challenge message target info area */ unsigned int tilen; /* challenge message target info area length */ - CHALLENGE_MESSAGE *pblob = (CHALLENGE_MESSAGE *)bcc_ptr; + __u32 server_flags; if (blob_len < sizeof(CHALLENGE_MESSAGE)) { cifs_dbg(VFS, "challenge blob len %d too small\n", blob_len); @@ -609,12 +609,37 @@ int decode_ntlmssp_challenge(char *bcc_ptr, int blob_len, return -EINVAL; } + server_flags = le32_to_cpu(pblob->NegotiateFlags); + cifs_dbg(FYI, "%s: negotiate=0x%08x challenge=0x%08x\n", __func__, + ses->ntlmssp->client_flags, server_flags); + + if ((ses->ntlmssp->client_flags & (NTLMSSP_NEGOTIATE_SEAL | NTLMSSP_NEGOTIATE_SIGN)) && + (!(server_flags & NTLMSSP_NEGOTIATE_56) && !(server_flags & NTLMSSP_NEGOTIATE_128))) { + cifs_dbg(VFS, "%s: requested signing/encryption but server did not return either 56-bit or 128-bit session key size\n", + __func__); + return -EINVAL; + } + if (!(server_flags & NTLMSSP_NEGOTIATE_NTLM) && !(server_flags & NTLMSSP_NEGOTIATE_EXTENDED_SEC)) { + cifs_dbg(VFS, "%s: server does not seem to support either NTLMv1 or NTLMv2\n", __func__); + return -EINVAL; + } + if (ses->server->sign && !(server_flags & NTLMSSP_NEGOTIATE_SIGN)) { + cifs_dbg(VFS, "%s: forced packet signing but server does not seem to support it\n", + __func__); + return -EOPNOTSUPP; + } + if ((ses->ntlmssp->client_flags & NTLMSSP_NEGOTIATE_KEY_XCH) && + !(server_flags & NTLMSSP_NEGOTIATE_KEY_XCH)) + pr_warn_once("%s: authentication has been weakened as server does not support key exchange\n", + __func__); + + ses->ntlmssp->server_flags = server_flags; + memcpy(ses->ntlmssp->cryptkey, pblob->Challenge, CIFS_CRYPTO_KEY_SIZE); - /* BB we could decode pblob->NegotiateFlags; some may be useful */ /* In particular we can examine sign flags */ /* BB spec says that if AvId field of MsvAvTimestamp is populated then we must set the MIC field of the AUTHENTICATE_MESSAGE */ - ses->ntlmssp->server_flags = le32_to_cpu(pblob->NegotiateFlags); + tioffset = le32_to_cpu(pblob->TargetInfoArray.BufferOffset); tilen = le16_to_cpu(pblob->TargetInfoArray.Length); if (tioffset > blob_len || tioffset + tilen > blob_len) { @@ -721,13 +746,13 @@ int build_ntlmssp_negotiate_blob(unsigned char **pbuffer, flags = NTLMSSP_NEGOTIATE_56 | NTLMSSP_REQUEST_TARGET | NTLMSSP_NEGOTIATE_128 | NTLMSSP_NEGOTIATE_UNICODE | NTLMSSP_NEGOTIATE_NTLM | NTLMSSP_NEGOTIATE_EXTENDED_SEC | - NTLMSSP_NEGOTIATE_SEAL; - if (server->sign) - flags |= NTLMSSP_NEGOTIATE_SIGN; + NTLMSSP_NEGOTIATE_ALWAYS_SIGN | NTLMSSP_NEGOTIATE_SEAL | + NTLMSSP_NEGOTIATE_SIGN; if (!server->session_estab || ses->ntlmssp->sesskey_per_smbsess) flags |= NTLMSSP_NEGOTIATE_KEY_XCH; tmp = *pbuffer + sizeof(NEGOTIATE_MESSAGE); + ses->ntlmssp->client_flags = flags; sec_blob->NegotiateFlags = cpu_to_le32(flags); /* these fields should be null in negotiate phase MS-NLMP 3.1.5.1.1 */ @@ -779,15 +804,8 @@ int build_ntlmssp_auth_blob(unsigned char **pbuffer, memcpy(sec_blob->Signature, NTLMSSP_SIGNATURE, 8); sec_blob->MessageType = NtLmAuthenticate; - flags = NTLMSSP_NEGOTIATE_56 | - NTLMSSP_REQUEST_TARGET | NTLMSSP_NEGOTIATE_TARGET_INFO | - NTLMSSP_NEGOTIATE_128 | NTLMSSP_NEGOTIATE_UNICODE | - NTLMSSP_NEGOTIATE_NTLM | NTLMSSP_NEGOTIATE_EXTENDED_SEC | - NTLMSSP_NEGOTIATE_SEAL | NTLMSSP_NEGOTIATE_WORKSTATION_SUPPLIED; - if (ses->server->sign) - flags |= NTLMSSP_NEGOTIATE_SIGN; - if (!ses->server->session_estab || ses->ntlmssp->sesskey_per_smbsess) - flags |= NTLMSSP_NEGOTIATE_KEY_XCH; + flags = ses->ntlmssp->server_flags | NTLMSSP_REQUEST_TARGET | + NTLMSSP_NEGOTIATE_TARGET_INFO | NTLMSSP_NEGOTIATE_WORKSTATION_SUPPLIED; tmp = *pbuffer + sizeof(AUTHENTICATE_MESSAGE); sec_blob->NegotiateFlags = cpu_to_le32(flags); @@ -834,9 +852,9 @@ int build_ntlmssp_auth_blob(unsigned char **pbuffer, *pbuffer, &tmp, nls_cp); - if (((ses->ntlmssp->server_flags & NTLMSSP_NEGOTIATE_KEY_XCH) || - (ses->ntlmssp->server_flags & NTLMSSP_NEGOTIATE_EXTENDED_SEC)) - && !calc_seckey(ses)) { + if ((ses->ntlmssp->server_flags & NTLMSSP_NEGOTIATE_KEY_XCH) && + (!ses->server->session_estab || ses->ntlmssp->sesskey_per_smbsess) && + !calc_seckey(ses)) { memcpy(tmp, ses->ntlmssp->ciphertext, CIFS_CPHTXT_SIZE); sec_blob->SessionKey.BufferOffset = cpu_to_le32(tmp - *pbuffer); sec_blob->SessionKey.Length = cpu_to_le16(CIFS_CPHTXT_SIZE); From e195e9b5dee6459d8c8e6a314cc71a644a0537fd Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 6 Dec 2021 08:53:29 -0800 Subject: [PATCH 320/868] net, neigh: clear whole pneigh_entry at alloc time Commit 2c611ad97a82 ("net, neigh: Extend neigh->flags to 32 bit to allow for extensions") enables a new KMSAM warning [1] I think the bug is actually older, because the following intruction only occurred if ndm->ndm_flags had NTF_PROXY set. pn->flags = ndm->ndm_flags; Let's clear all pneigh_entry fields at alloc time. [1] BUG: KMSAN: uninit-value in pneigh_fill_info+0x986/0xb30 net/core/neighbour.c:2593 pneigh_fill_info+0x986/0xb30 net/core/neighbour.c:2593 pneigh_dump_table net/core/neighbour.c:2715 [inline] neigh_dump_info+0x1e3f/0x2c60 net/core/neighbour.c:2832 netlink_dump+0xaca/0x16a0 net/netlink/af_netlink.c:2265 __netlink_dump_start+0xd1c/0xee0 net/netlink/af_netlink.c:2370 netlink_dump_start include/linux/netlink.h:254 [inline] rtnetlink_rcv_msg+0x181b/0x18c0 net/core/rtnetlink.c:5534 netlink_rcv_skb+0x447/0x800 net/netlink/af_netlink.c:2491 rtnetlink_rcv+0x50/0x60 net/core/rtnetlink.c:5589 netlink_unicast_kernel net/netlink/af_netlink.c:1319 [inline] netlink_unicast+0x1095/0x1360 net/netlink/af_netlink.c:1345 netlink_sendmsg+0x16f3/0x1870 net/netlink/af_netlink.c:1916 sock_sendmsg_nosec net/socket.c:704 [inline] sock_sendmsg net/socket.c:724 [inline] sock_write_iter+0x594/0x690 net/socket.c:1057 call_write_iter include/linux/fs.h:2162 [inline] new_sync_write fs/read_write.c:503 [inline] vfs_write+0x1318/0x2030 fs/read_write.c:590 ksys_write+0x28c/0x520 fs/read_write.c:643 __do_sys_write fs/read_write.c:655 [inline] __se_sys_write fs/read_write.c:652 [inline] __x64_sys_write+0xdb/0x120 fs/read_write.c:652 do_syscall_x64 arch/x86/entry/common.c:51 [inline] do_syscall_64+0x54/0xd0 arch/x86/entry/common.c:82 entry_SYSCALL_64_after_hwframe+0x44/0xae Uninit was created at: slab_post_alloc_hook mm/slab.h:524 [inline] slab_alloc_node mm/slub.c:3251 [inline] slab_alloc mm/slub.c:3259 [inline] __kmalloc+0xc3c/0x12d0 mm/slub.c:4437 kmalloc include/linux/slab.h:595 [inline] pneigh_lookup+0x60f/0xd70 net/core/neighbour.c:766 arp_req_set_public net/ipv4/arp.c:1016 [inline] arp_req_set+0x430/0x10a0 net/ipv4/arp.c:1032 arp_ioctl+0x8d4/0xb60 net/ipv4/arp.c:1232 inet_ioctl+0x4ef/0x820 net/ipv4/af_inet.c:947 sock_do_ioctl net/socket.c:1118 [inline] sock_ioctl+0xa3f/0x13e0 net/socket.c:1235 vfs_ioctl fs/ioctl.c:51 [inline] __do_sys_ioctl fs/ioctl.c:874 [inline] __se_sys_ioctl+0x2df/0x4a0 fs/ioctl.c:860 __x64_sys_ioctl+0xd8/0x110 fs/ioctl.c:860 do_syscall_x64 arch/x86/entry/common.c:51 [inline] do_syscall_64+0x54/0xd0 arch/x86/entry/common.c:82 entry_SYSCALL_64_after_hwframe+0x44/0xae CPU: 1 PID: 20001 Comm: syz-executor.0 Not tainted 5.16.0-rc3-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Fixes: 62dd93181aaa ("[IPV6] NDISC: Set per-entry is_router flag in Proxy NA.") Signed-off-by: Eric Dumazet Cc: Roopa Prabhu Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20211206165329.1049835-1-eric.dumazet@gmail.com Signed-off-by: Jakub Kicinski --- net/core/neighbour.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 72ba027c34cf..dda12fbd177b 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -763,11 +763,10 @@ struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl, ASSERT_RTNL(); - n = kmalloc(sizeof(*n) + key_len, GFP_KERNEL); + n = kzalloc(sizeof(*n) + key_len, GFP_KERNEL); if (!n) goto out; - n->protocol = 0; write_pnet(&n->net, net); memcpy(n->key, pkey, key_len); n->dev = dev; From f71ef02f1a4a3c49962fa341ad8de19071f0f9bf Mon Sep 17 00:00:00 2001 From: Ronak Doshi Date: Tue, 7 Dec 2021 00:17:37 -0800 Subject: [PATCH 321/868] vmxnet3: fix minimum vectors alloc issue 'Commit 39f9895a00f4 ("vmxnet3: add support for 32 Tx/Rx queues")' added support for 32Tx/Rx queues. Within that patch, value of VMXNET3_LINUX_MIN_MSIX_VECT was updated. However, there is a case (numvcpus = 2) which actually requires 3 intrs which matches VMXNET3_LINUX_MIN_MSIX_VECT which then is treated as failure by stack to allocate more vectors. This patch fixes this issue. Fixes: 39f9895a00f4 ("vmxnet3: add support for 32 Tx/Rx queues") Signed-off-by: Ronak Doshi Acked-by: Guolin Yang Link: https://lore.kernel.org/r/20211207081737.14000-1-doshir@vmware.com Signed-off-by: Jakub Kicinski --- drivers/net/vmxnet3/vmxnet3_drv.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c index 14fae317bc70..fd407c0e2856 100644 --- a/drivers/net/vmxnet3/vmxnet3_drv.c +++ b/drivers/net/vmxnet3/vmxnet3_drv.c @@ -3261,7 +3261,7 @@ vmxnet3_alloc_intr_resources(struct vmxnet3_adapter *adapter) #ifdef CONFIG_PCI_MSI if (adapter->intr.type == VMXNET3_IT_MSIX) { - int i, nvec; + int i, nvec, nvec_allocated; nvec = adapter->share_intr == VMXNET3_INTR_TXSHARE ? 1 : adapter->num_tx_queues; @@ -3274,14 +3274,15 @@ vmxnet3_alloc_intr_resources(struct vmxnet3_adapter *adapter) for (i = 0; i < nvec; i++) adapter->intr.msix_entries[i].entry = i; - nvec = vmxnet3_acquire_msix_vectors(adapter, nvec); - if (nvec < 0) + nvec_allocated = vmxnet3_acquire_msix_vectors(adapter, nvec); + if (nvec_allocated < 0) goto msix_err; /* If we cannot allocate one MSIx vector per queue * then limit the number of rx queues to 1 */ - if (nvec == VMXNET3_LINUX_MIN_MSIX_VECT) { + if (nvec_allocated == VMXNET3_LINUX_MIN_MSIX_VECT && + nvec != VMXNET3_LINUX_MIN_MSIX_VECT) { if (adapter->share_intr != VMXNET3_INTR_BUDDYSHARE || adapter->num_rx_queues != 1) { adapter->share_intr = VMXNET3_INTR_TXSHARE; @@ -3291,14 +3292,14 @@ vmxnet3_alloc_intr_resources(struct vmxnet3_adapter *adapter) } } - adapter->intr.num_intrs = nvec; + adapter->intr.num_intrs = nvec_allocated; return; msix_err: /* If we cannot allocate MSIx vectors use only one rx queue */ dev_info(&adapter->pdev->dev, "Failed to enable MSI-X, error %d. " - "Limiting #rx queues to 1, try MSI.\n", nvec); + "Limiting #rx queues to 1, try MSI.\n", nvec_allocated); adapter->intr.type = VMXNET3_IT_MSI; } From a66307d473077b7aeba74e9b09c841ab3d399c2d Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Wed, 8 Dec 2021 07:58:53 +0100 Subject: [PATCH 322/868] libata: add horkage for ASMedia 1092 The ASMedia 1092 has a configuration mode which will present a dummy device; sadly the implementation falsely claims to provide a device with 100M which doesn't actually exist. So disable this device to avoid errors during boot. Cc: stable@vger.kernel.org Signed-off-by: Hannes Reinecke Signed-off-by: Damien Le Moal --- drivers/ata/libata-core.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 59ad8c979cb3..aba0c67d1bd6 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -3920,6 +3920,8 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = { { "VRFDFC22048UCHC-TE*", NULL, ATA_HORKAGE_NODMA }, /* Odd clown on sil3726/4726 PMPs */ { "Config Disk", NULL, ATA_HORKAGE_DISABLE }, + /* Similar story with ASMedia 1092 */ + { "ASMT109x- Config", NULL, ATA_HORKAGE_DISABLE }, /* Weird ATAPI devices */ { "TORiSAN DVD-ROM DRD-N216", NULL, ATA_HORKAGE_MAX_SEC_128 }, From a50e659b2a1be14784e80f8492aab177e67c53a2 Mon Sep 17 00:00:00 2001 From: Louis Amas Date: Tue, 7 Dec 2021 15:34:22 +0100 Subject: [PATCH 323/868] net: mvpp2: fix XDP rx queues registering The registration of XDP queue information is incorrect because the RX queue id we use is invalid. When port->id == 0 it appears to works as expected yet it's no longer the case when port->id != 0. The problem arised while using a recent kernel version on the MACCHIATOBin. This board has several ports: * eth0 and eth1 are 10Gbps interfaces ; both ports has port->id == 0; * eth2 is a 1Gbps interface with port->id != 0. Code from xdp-tutorial (more specifically advanced03-AF_XDP) was used to test packet capture and injection on all these interfaces. The XDP kernel was simplified to: SEC("xdp_sock") int xdp_sock_prog(struct xdp_md *ctx) { int index = ctx->rx_queue_index; /* A set entry here means that the correspnding queue_id * has an active AF_XDP socket bound to it. */ if (bpf_map_lookup_elem(&xsks_map, &index)) return bpf_redirect_map(&xsks_map, index, 0); return XDP_PASS; } Starting the program using: ./af_xdp_user -d DEV Gives the following result: * eth0 : ok * eth1 : ok * eth2 : no capture, no injection Investigating the issue shows that XDP rx queues for eth2 are wrong: XDP expects their id to be in the range [0..3] but we found them to be in the range [32..35]. Trying to force rx queue ids using: ./af_xdp_user -d eth2 -Q 32 fails as expected (we shall not have more than 4 queues). When we register the XDP rx queue information (using xdp_rxq_info_reg() in function mvpp2_rxq_init()) we tell it to use rxq->id as the queue id. This value is computed as: rxq->id = port->id * max_rxq_count + queue_id where max_rxq_count depends on the device version. In the MACCHIATOBin case, this value is 32, meaning that rx queues on eth2 are numbered from 32 to 35 - there are four of them. Clearly, this is not the per-port queue id that XDP is expecting: it wants a value in the range [0..3]. It shall directly use queue_id which is stored in rxq->logic_rxq -- so let's use that value instead. rxq->id is left untouched ; its value is indeed valid but it should not be used in this context. This is consistent with the remaining part of the code in mvpp2_rxq_init(). With this change, packet capture is working as expected on all the MACCHIATOBin ports. Fixes: b27db2274ba8 ("mvpp2: use page_pool allocator") Signed-off-by: Louis Amas Signed-off-by: Emmanuel Deloget Reviewed-by: Marcin Wojtas Acked-by: John Fastabend Acked-by: Jesper Dangaard Brouer Link: https://lore.kernel.org/r/20211207143423.916334-1-louis.amas@eho.link Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c index 6480696c979b..6da8a595026b 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c @@ -2960,11 +2960,11 @@ static int mvpp2_rxq_init(struct mvpp2_port *port, mvpp2_rxq_status_update(port, rxq->id, 0, rxq->size); if (priv->percpu_pools) { - err = xdp_rxq_info_reg(&rxq->xdp_rxq_short, port->dev, rxq->id, 0); + err = xdp_rxq_info_reg(&rxq->xdp_rxq_short, port->dev, rxq->logic_rxq, 0); if (err < 0) goto err_free_dma; - err = xdp_rxq_info_reg(&rxq->xdp_rxq_long, port->dev, rxq->id, 0); + err = xdp_rxq_info_reg(&rxq->xdp_rxq_long, port->dev, rxq->logic_rxq, 0); if (err < 0) goto err_unregister_rxq_short; From af6902ec415655236adea91826bd96ed0ab16f42 Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Tue, 23 Nov 2021 11:56:38 -0500 Subject: [PATCH 324/868] drm/amd/display: Fix DPIA outbox timeout after S3/S4/reset [Why] The HW interrupt gets disabled after S3/S4/reset so we don't receive notifications for HPD or AUX from DMUB - leading to timeout and black screen with (or without) DPIA links connected. [How] Re-enable the interrupt after S3/S4/reset like we do for the other DC interrupts. Guard both instances of the outbox interrupt enable or we'll hang during restore on ASIC that don't support it. Fixes: 6eff272dbee7ad ("drm/amd/display: Fix DPIA outbox timeout after GPU reset") Reviewed-by: Jude Shih Acked-by: Pavle Kotarac Signed-off-by: Nicholas Kazlauskas Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 1cd6b9f4a568..122dae1a1813 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -2576,7 +2576,8 @@ static int dm_resume(void *handle) */ link_enc_cfg_init(dm->dc, dc_state); - amdgpu_dm_outbox_init(adev); + if (dc_enable_dmub_notifications(adev->dm.dc)) + amdgpu_dm_outbox_init(adev); r = dm_dmub_hw_init(adev); if (r) @@ -2625,6 +2626,10 @@ static int dm_resume(void *handle) /* TODO: Remove dc_state->dccg, use dc->dccg directly. */ dc_resource_state_construct(dm->dc, dm_state->context); + /* Re-enable outbox interrupts for DPIA. */ + if (dc_enable_dmub_notifications(adev->dm.dc)) + amdgpu_dm_outbox_init(adev); + /* Before powering on DC we need to re-initialize DMUB. */ r = dm_dmub_hw_init(adev); if (r) From 0755c38eb007196a5f779298b4a5f46c4eec41d2 Mon Sep 17 00:00:00 2001 From: Mikita Lipski Date: Mon, 15 Nov 2021 16:07:38 -0500 Subject: [PATCH 325/868] drm/amd/display: prevent reading unitialized links [why/how] The function can be called on boot or after suspend when links are not initialized, to prevent it guard it with NULL pointer check Reviewed-by: Nicholas Kazlauskas Acked-by: Pavle Kotarac Signed-off-by: Mikita Lipski Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc_link.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/dc_link.h b/drivers/gpu/drm/amd/display/dc/dc_link.h index b01077a6af0e..fad3d883ed89 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_link.h +++ b/drivers/gpu/drm/amd/display/dc/dc_link.h @@ -226,6 +226,8 @@ static inline void get_edp_links(const struct dc *dc, *edp_num = 0; for (i = 0; i < dc->link_count; i++) { // report any eDP links, even unconnected DDI's + if (!dc->links[i]) + continue; if (dc->links[i]->connector_signal == SIGNAL_TYPE_EDP) { edp_links[*edp_num] = dc->links[i]; if (++(*edp_num) == MAX_NUM_EDP) From 36aea60fc892ce73f96d45dc7eb239c7c4c1fa69 Mon Sep 17 00:00:00 2001 From: Jimmy Assarsson Date: Wed, 8 Dec 2021 16:21:21 +0100 Subject: [PATCH 326/868] can: kvaser_pciefd: kvaser_pciefd_rx_error_frame(): increase correct stats->{rx,tx}_errors counter Check the direction bit in the error frame packet (EPACK) to determine which net_device_stats {rx,tx}_errors counter to increase. Fixes: 26ad340e582d ("can: kvaser_pciefd: Add driver for Kvaser PCIEcan devices") Link: https://lore.kernel.org/all/20211208152122.250852-1-extja@kvaser.com Cc: stable@vger.kernel.org Signed-off-by: Jimmy Assarsson Signed-off-by: Marc Kleine-Budde --- drivers/net/can/kvaser_pciefd.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/net/can/kvaser_pciefd.c b/drivers/net/can/kvaser_pciefd.c index 74d9899fc904..eb74cdf26b88 100644 --- a/drivers/net/can/kvaser_pciefd.c +++ b/drivers/net/can/kvaser_pciefd.c @@ -248,6 +248,9 @@ MODULE_DESCRIPTION("CAN driver for Kvaser CAN/PCIe devices"); #define KVASER_PCIEFD_SPACK_EWLR BIT(23) #define KVASER_PCIEFD_SPACK_EPLR BIT(24) +/* Kvaser KCAN_EPACK second word */ +#define KVASER_PCIEFD_EPACK_DIR_TX BIT(0) + struct kvaser_pciefd; struct kvaser_pciefd_can { @@ -1285,7 +1288,10 @@ static int kvaser_pciefd_rx_error_frame(struct kvaser_pciefd_can *can, can->err_rep_cnt++; can->can.can_stats.bus_error++; - stats->rx_errors++; + if (p->header[1] & KVASER_PCIEFD_EPACK_DIR_TX) + stats->tx_errors++; + else + stats->rx_errors++; can->bec.txerr = bec.txerr; can->bec.rxerr = bec.rxerr; From fb12797ab1fef480ad8a32a30984844444eeb00d Mon Sep 17 00:00:00 2001 From: Jimmy Assarsson Date: Wed, 8 Dec 2021 16:21:22 +0100 Subject: [PATCH 327/868] can: kvaser_usb: get CAN clock frequency from device The CAN clock frequency is used when calculating the CAN bittiming parameters. When wrong clock frequency is used, the device may end up with wrong bittiming parameters, depending on user requested bittiming parameters. To avoid this, get the CAN clock frequency from the device. Various existing Kvaser Leaf products use different CAN clocks. Fixes: 080f40a6fa28 ("can: kvaser_usb: Add support for Kvaser CAN/USB devices") Link: https://lore.kernel.org/all/20211208152122.250852-2-extja@kvaser.com Cc: stable@vger.kernel.org Signed-off-by: Jimmy Assarsson Signed-off-by: Marc Kleine-Budde --- .../net/can/usb/kvaser_usb/kvaser_usb_leaf.c | 101 +++++++++++++----- 1 file changed, 73 insertions(+), 28 deletions(-) diff --git a/drivers/net/can/usb/kvaser_usb/kvaser_usb_leaf.c b/drivers/net/can/usb/kvaser_usb/kvaser_usb_leaf.c index 59ba7c7beec0..f7af1bf5ab46 100644 --- a/drivers/net/can/usb/kvaser_usb/kvaser_usb_leaf.c +++ b/drivers/net/can/usb/kvaser_usb/kvaser_usb_leaf.c @@ -28,10 +28,6 @@ #include "kvaser_usb.h" -/* Forward declaration */ -static const struct kvaser_usb_dev_cfg kvaser_usb_leaf_dev_cfg; - -#define CAN_USB_CLOCK 8000000 #define MAX_USBCAN_NET_DEVICES 2 /* Command header size */ @@ -80,6 +76,12 @@ static const struct kvaser_usb_dev_cfg kvaser_usb_leaf_dev_cfg; #define CMD_LEAF_LOG_MESSAGE 106 +/* Leaf frequency options */ +#define KVASER_USB_LEAF_SWOPTION_FREQ_MASK 0x60 +#define KVASER_USB_LEAF_SWOPTION_FREQ_16_MHZ_CLK 0 +#define KVASER_USB_LEAF_SWOPTION_FREQ_32_MHZ_CLK BIT(5) +#define KVASER_USB_LEAF_SWOPTION_FREQ_24_MHZ_CLK BIT(6) + /* error factors */ #define M16C_EF_ACKE BIT(0) #define M16C_EF_CRCE BIT(1) @@ -340,6 +342,50 @@ struct kvaser_usb_err_summary { }; }; +static const struct can_bittiming_const kvaser_usb_leaf_bittiming_const = { + .name = "kvaser_usb", + .tseg1_min = KVASER_USB_TSEG1_MIN, + .tseg1_max = KVASER_USB_TSEG1_MAX, + .tseg2_min = KVASER_USB_TSEG2_MIN, + .tseg2_max = KVASER_USB_TSEG2_MAX, + .sjw_max = KVASER_USB_SJW_MAX, + .brp_min = KVASER_USB_BRP_MIN, + .brp_max = KVASER_USB_BRP_MAX, + .brp_inc = KVASER_USB_BRP_INC, +}; + +static const struct kvaser_usb_dev_cfg kvaser_usb_leaf_dev_cfg_8mhz = { + .clock = { + .freq = 8000000, + }, + .timestamp_freq = 1, + .bittiming_const = &kvaser_usb_leaf_bittiming_const, +}; + +static const struct kvaser_usb_dev_cfg kvaser_usb_leaf_dev_cfg_16mhz = { + .clock = { + .freq = 16000000, + }, + .timestamp_freq = 1, + .bittiming_const = &kvaser_usb_leaf_bittiming_const, +}; + +static const struct kvaser_usb_dev_cfg kvaser_usb_leaf_dev_cfg_24mhz = { + .clock = { + .freq = 24000000, + }, + .timestamp_freq = 1, + .bittiming_const = &kvaser_usb_leaf_bittiming_const, +}; + +static const struct kvaser_usb_dev_cfg kvaser_usb_leaf_dev_cfg_32mhz = { + .clock = { + .freq = 32000000, + }, + .timestamp_freq = 1, + .bittiming_const = &kvaser_usb_leaf_bittiming_const, +}; + static void * kvaser_usb_leaf_frame_to_cmd(const struct kvaser_usb_net_priv *priv, const struct sk_buff *skb, int *frame_len, @@ -471,6 +517,27 @@ static int kvaser_usb_leaf_send_simple_cmd(const struct kvaser_usb *dev, return rc; } +static void kvaser_usb_leaf_get_software_info_leaf(struct kvaser_usb *dev, + const struct leaf_cmd_softinfo *softinfo) +{ + u32 sw_options = le32_to_cpu(softinfo->sw_options); + + dev->fw_version = le32_to_cpu(softinfo->fw_version); + dev->max_tx_urbs = le16_to_cpu(softinfo->max_outstanding_tx); + + switch (sw_options & KVASER_USB_LEAF_SWOPTION_FREQ_MASK) { + case KVASER_USB_LEAF_SWOPTION_FREQ_16_MHZ_CLK: + dev->cfg = &kvaser_usb_leaf_dev_cfg_16mhz; + break; + case KVASER_USB_LEAF_SWOPTION_FREQ_24_MHZ_CLK: + dev->cfg = &kvaser_usb_leaf_dev_cfg_24mhz; + break; + case KVASER_USB_LEAF_SWOPTION_FREQ_32_MHZ_CLK: + dev->cfg = &kvaser_usb_leaf_dev_cfg_32mhz; + break; + } +} + static int kvaser_usb_leaf_get_software_info_inner(struct kvaser_usb *dev) { struct kvaser_cmd cmd; @@ -486,14 +553,13 @@ static int kvaser_usb_leaf_get_software_info_inner(struct kvaser_usb *dev) switch (dev->card_data.leaf.family) { case KVASER_LEAF: - dev->fw_version = le32_to_cpu(cmd.u.leaf.softinfo.fw_version); - dev->max_tx_urbs = - le16_to_cpu(cmd.u.leaf.softinfo.max_outstanding_tx); + kvaser_usb_leaf_get_software_info_leaf(dev, &cmd.u.leaf.softinfo); break; case KVASER_USBCAN: dev->fw_version = le32_to_cpu(cmd.u.usbcan.softinfo.fw_version); dev->max_tx_urbs = le16_to_cpu(cmd.u.usbcan.softinfo.max_outstanding_tx); + dev->cfg = &kvaser_usb_leaf_dev_cfg_8mhz; break; } @@ -1225,24 +1291,11 @@ static int kvaser_usb_leaf_init_card(struct kvaser_usb *dev) { struct kvaser_usb_dev_card_data *card_data = &dev->card_data; - dev->cfg = &kvaser_usb_leaf_dev_cfg; card_data->ctrlmode_supported |= CAN_CTRLMODE_3_SAMPLES; return 0; } -static const struct can_bittiming_const kvaser_usb_leaf_bittiming_const = { - .name = "kvaser_usb", - .tseg1_min = KVASER_USB_TSEG1_MIN, - .tseg1_max = KVASER_USB_TSEG1_MAX, - .tseg2_min = KVASER_USB_TSEG2_MIN, - .tseg2_max = KVASER_USB_TSEG2_MAX, - .sjw_max = KVASER_USB_SJW_MAX, - .brp_min = KVASER_USB_BRP_MIN, - .brp_max = KVASER_USB_BRP_MAX, - .brp_inc = KVASER_USB_BRP_INC, -}; - static int kvaser_usb_leaf_set_bittiming(struct net_device *netdev) { struct kvaser_usb_net_priv *priv = netdev_priv(netdev); @@ -1348,11 +1401,3 @@ const struct kvaser_usb_dev_ops kvaser_usb_leaf_dev_ops = { .dev_read_bulk_callback = kvaser_usb_leaf_read_bulk_callback, .dev_frame_to_cmd = kvaser_usb_leaf_frame_to_cmd, }; - -static const struct kvaser_usb_dev_cfg kvaser_usb_leaf_dev_cfg = { - .clock = { - .freq = CAN_USB_CLOCK, - }, - .timestamp_freq = 1, - .bittiming_const = &kvaser_usb_leaf_bittiming_const, -}; From b503de239f62eca898cfb7e820d9a35499137d22 Mon Sep 17 00:00:00 2001 From: Vincent Whitchurch Date: Thu, 2 Dec 2021 16:32:14 +0100 Subject: [PATCH 328/868] i2c: virtio: fix completion handling The driver currently assumes that the notify callback is only received when the device is done with all the queued buffers. However, this is not true, since the notify callback could be called without any of the queued buffers being completed (for example, with virtio-pci and shared interrupts) or with only some of the buffers being completed (since the driver makes them available to the device in multiple separate virtqueue_add_sgs() calls). This can lead to incorrect data on the I2C bus or memory corruption in the guest if the device operates on buffers which are have been freed by the driver. (The WARN_ON in the driver is also triggered.) BUG kmalloc-128 (Tainted: G W ): Poison overwritten First byte 0x0 instead of 0x6b Allocated in i2cdev_ioctl_rdwr+0x9d/0x1de age=243 cpu=0 pid=28 memdup_user+0x2e/0xbd i2cdev_ioctl_rdwr+0x9d/0x1de i2cdev_ioctl+0x247/0x2ed vfs_ioctl+0x21/0x30 sys_ioctl+0xb18/0xb41 Freed in i2cdev_ioctl_rdwr+0x1bb/0x1de age=68 cpu=0 pid=28 kfree+0x1bd/0x1cc i2cdev_ioctl_rdwr+0x1bb/0x1de i2cdev_ioctl+0x247/0x2ed vfs_ioctl+0x21/0x30 sys_ioctl+0xb18/0xb41 Fix this by calling virtio_get_buf() from the notify handler like other virtio drivers and by actually waiting for all the buffers to be completed. Fixes: 3cfc88380413d20f ("i2c: virtio: add a virtio i2c frontend driver") Acked-by: Viresh Kumar Signed-off-by: Vincent Whitchurch Acked-by: Michael S. Tsirkin Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-virtio.c | 32 ++++++++++++-------------------- 1 file changed, 12 insertions(+), 20 deletions(-) diff --git a/drivers/i2c/busses/i2c-virtio.c b/drivers/i2c/busses/i2c-virtio.c index 95378780da6d..41eb0dcc3204 100644 --- a/drivers/i2c/busses/i2c-virtio.c +++ b/drivers/i2c/busses/i2c-virtio.c @@ -22,24 +22,24 @@ /** * struct virtio_i2c - virtio I2C data * @vdev: virtio device for this controller - * @completion: completion of virtio I2C message * @adap: I2C adapter for this controller * @vq: the virtio virtqueue for communication */ struct virtio_i2c { struct virtio_device *vdev; - struct completion completion; struct i2c_adapter adap; struct virtqueue *vq; }; /** * struct virtio_i2c_req - the virtio I2C request structure + * @completion: completion of virtio I2C message * @out_hdr: the OUT header of the virtio I2C message * @buf: the buffer into which data is read, or from which it's written * @in_hdr: the IN header of the virtio I2C message */ struct virtio_i2c_req { + struct completion completion; struct virtio_i2c_out_hdr out_hdr ____cacheline_aligned; uint8_t *buf ____cacheline_aligned; struct virtio_i2c_in_hdr in_hdr ____cacheline_aligned; @@ -47,9 +47,11 @@ struct virtio_i2c_req { static void virtio_i2c_msg_done(struct virtqueue *vq) { - struct virtio_i2c *vi = vq->vdev->priv; + struct virtio_i2c_req *req; + unsigned int len; - complete(&vi->completion); + while ((req = virtqueue_get_buf(vq, &len))) + complete(&req->completion); } static int virtio_i2c_prepare_reqs(struct virtqueue *vq, @@ -62,6 +64,8 @@ static int virtio_i2c_prepare_reqs(struct virtqueue *vq, for (i = 0; i < num; i++) { int outcnt = 0, incnt = 0; + init_completion(&reqs[i].completion); + /* * Only 7-bit mode supported for this moment. For the address * format, Please check the Virtio I2C Specification. @@ -106,21 +110,15 @@ static int virtio_i2c_complete_reqs(struct virtqueue *vq, struct virtio_i2c_req *reqs, struct i2c_msg *msgs, int num) { - struct virtio_i2c_req *req; bool failed = false; - unsigned int len; int i, j = 0; for (i = 0; i < num; i++) { - /* Detach the ith request from the vq */ - req = virtqueue_get_buf(vq, &len); + struct virtio_i2c_req *req = &reqs[i]; - /* - * Condition req == &reqs[i] should always meet since we have - * total num requests in the vq. reqs[i] can never be NULL here. - */ - if (!failed && (WARN_ON(req != &reqs[i]) || - req->in_hdr.status != VIRTIO_I2C_MSG_OK)) + wait_for_completion(&req->completion); + + if (!failed && req->in_hdr.status != VIRTIO_I2C_MSG_OK) failed = true; i2c_put_dma_safe_msg_buf(reqs[i].buf, &msgs[i], !failed); @@ -156,12 +154,8 @@ static int virtio_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg *msgs, * remote here to clear the virtqueue, so we can try another set of * messages later on. */ - - reinit_completion(&vi->completion); virtqueue_kick(vq); - wait_for_completion(&vi->completion); - count = virtio_i2c_complete_reqs(vq, reqs, msgs, count); err_free: @@ -210,8 +204,6 @@ static int virtio_i2c_probe(struct virtio_device *vdev) vdev->priv = vi; vi->vdev = vdev; - init_completion(&vi->completion); - ret = virtio_i2c_setup_vqs(vi); if (ret) return ret; From 09d97da660ff77df20984496aa0abcd6b88819f2 Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Wed, 8 Dec 2021 17:27:19 +0800 Subject: [PATCH 329/868] MIPS: Only define pci_remap_iospace() for Ralink After commit 9f76779f2418 ("MIPS: implement architecture-specific 'pci_remap_iospace()'"), there exists the following warning on the Loongson64 platform: loongson-pci 1a000000.pci: IO 0x0018020000..0x001803ffff -> 0x0000020000 loongson-pci 1a000000.pci: MEM 0x0040000000..0x007fffffff -> 0x0040000000 ------------[ cut here ]------------ WARNING: CPU: 2 PID: 1 at arch/mips/pci/pci-generic.c:55 pci_remap_iospace+0x84/0x90 resource start address is not zero ... Call Trace: [] show_stack+0x40/0x120 [] dump_stack_lvl+0x58/0x74 [] __warn+0xe0/0x110 [] warn_slowpath_fmt+0xa4/0xd0 [] pci_remap_iospace+0x84/0x90 [] devm_pci_remap_iospace+0x5c/0xb8 [] devm_of_pci_bridge_init+0x178/0x1f8 [] devm_pci_alloc_host_bridge+0x78/0x98 [] loongson_pci_probe+0x34/0x160 [] platform_probe+0x6c/0xe0 [] really_probe+0xbc/0x340 [] __driver_probe_device+0x98/0x110 [] driver_probe_device+0x50/0x118 [] __driver_attach+0x80/0x118 [] bus_for_each_dev+0x80/0xc8 [] bus_add_driver+0x130/0x210 [] driver_register+0x8c/0x150 [] do_one_initcall+0x54/0x288 [] kernel_init_freeable+0x27c/0x2e4 [] kernel_init+0x2c/0x134 [] ret_from_kernel_thread+0x14/0x1c ---[ end trace e4a0efe10aa5cce6 ]--- loongson-pci 1a000000.pci: error -19: failed to map resource [io 0x20000-0x3ffff] We can see that the resource start address is 0x0000020000, because the ISA Bridge used the zero address which is defined in the dts file arch/mips/boot/dts/loongson/ls7a-pch.dtsi: ISA Bridge: /bus@10000000/isa@18000000 IO 0x0000000018000000..0x000000001801ffff -> 0x0000000000000000 Based on the above analysis, the architecture-specific pci_remap_iospace() is not suitable for Loongson64, we should only define pci_remap_iospace() for Ralink on MIPS based on the commit background. Fixes: 9f76779f2418 ("MIPS: implement architecture-specific 'pci_remap_iospace()'") Suggested-by: Thomas Bogendoerfer Signed-off-by: Tiezhu Yang Tested-by: Sergio Paracuellos Acked-by: Sergio Paracuellos Signed-off-by: Thomas Bogendoerfer --- arch/mips/include/asm/mach-ralink/spaces.h | 2 ++ arch/mips/include/asm/pci.h | 4 ---- arch/mips/pci/pci-generic.c | 2 ++ 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/mips/include/asm/mach-ralink/spaces.h b/arch/mips/include/asm/mach-ralink/spaces.h index 05d14c21c417..f7af11ea2d61 100644 --- a/arch/mips/include/asm/mach-ralink/spaces.h +++ b/arch/mips/include/asm/mach-ralink/spaces.h @@ -6,5 +6,7 @@ #define PCI_IOSIZE SZ_64K #define IO_SPACE_LIMIT (PCI_IOSIZE - 1) +#define pci_remap_iospace pci_remap_iospace + #include #endif diff --git a/arch/mips/include/asm/pci.h b/arch/mips/include/asm/pci.h index 421231f55935..9ffc8192adae 100644 --- a/arch/mips/include/asm/pci.h +++ b/arch/mips/include/asm/pci.h @@ -20,10 +20,6 @@ #include #include -#ifdef CONFIG_PCI_DRIVERS_GENERIC -#define pci_remap_iospace pci_remap_iospace -#endif - #ifdef CONFIG_PCI_DRIVERS_LEGACY /* diff --git a/arch/mips/pci/pci-generic.c b/arch/mips/pci/pci-generic.c index 18eb8a453a86..d2d68bac3d25 100644 --- a/arch/mips/pci/pci-generic.c +++ b/arch/mips/pci/pci-generic.c @@ -47,6 +47,7 @@ void pcibios_fixup_bus(struct pci_bus *bus) pci_read_bridge_bases(bus); } +#ifdef pci_remap_iospace int pci_remap_iospace(const struct resource *res, phys_addr_t phys_addr) { unsigned long vaddr; @@ -60,3 +61,4 @@ int pci_remap_iospace(const struct resource *res, phys_addr_t phys_addr) set_io_port_base(vaddr); return 0; } +#endif From d594b35d3b31bc04b6ef36589f38135d3acb8df5 Mon Sep 17 00:00:00 2001 From: Wenbin Mei Date: Tue, 7 Dec 2021 15:50:13 +0800 Subject: [PATCH 330/868] mmc: mediatek: free the ext_csd when mmc_get_ext_csd success If mmc_get_ext_csd success, the ext_csd are not freed. Add the missing kfree() calls. Signed-off-by: Wenbin Mei Fixes: c4ac38c6539b ("mmc: mtk-sd: Add HS400 online tuning support") Link: https://lore.kernel.org/r/20211207075013.22911-1-wenbin.mei@mediatek.com Signed-off-by: Ulf Hansson --- drivers/mmc/host/mtk-sd.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/mmc/host/mtk-sd.c b/drivers/mmc/host/mtk-sd.c index 943940b44e83..632775217d35 100644 --- a/drivers/mmc/host/mtk-sd.c +++ b/drivers/mmc/host/mtk-sd.c @@ -2291,8 +2291,10 @@ static int msdc_execute_hs400_tuning(struct mmc_host *mmc, struct mmc_card *card sdr_set_field(host->base + PAD_DS_TUNE, PAD_DS_TUNE_DLY1, i); ret = mmc_get_ext_csd(card, &ext_csd); - if (!ret) + if (!ret) { result_dly1 |= (1 << i); + kfree(ext_csd); + } } host->hs400_tuning = false; From 842470c4e211f284a224842849b1fa81b130c154 Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Wed, 20 Oct 2021 18:57:40 +0200 Subject: [PATCH 331/868] Revert "drm/fb-helper: improve DRM fbdev emulation device names" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit b3484d2b03e4c940a9598aa841a52d69729c582a. That change attempted to improve the DRM drivers fbdev emulation device names to avoid having confusing names like "simpledrmdrmfb" in /proc/fb. But unfortunately, there are user-space programs such as pm-utils that match against the fbdev names and so broke after the mentioned commit. Since the names in /proc/fb are used by tools that consider it an uAPI, let's restore the old names even when this lead to silly names like the one mentioned above. Fixes: b3484d2b03e4 ("drm/fb-helper: improve DRM fbdev emulation device names") Reported-by: Johannes Stezenbach Signed-off-by: Javier Martinez Canillas Reviewed-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20211020165740.3011927-1-javierm@redhat.com --- drivers/gpu/drm/drm_fb_helper.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c index 8e7a124d6c5a..22bf690910b2 100644 --- a/drivers/gpu/drm/drm_fb_helper.c +++ b/drivers/gpu/drm/drm_fb_helper.c @@ -1743,7 +1743,13 @@ void drm_fb_helper_fill_info(struct fb_info *info, sizes->fb_width, sizes->fb_height); info->par = fb_helper; - snprintf(info->fix.id, sizeof(info->fix.id), "%s", + /* + * The DRM drivers fbdev emulation device name can be confusing if the + * driver name also has a "drm" suffix on it. Leading to names such as + * "simpledrmdrmfb" in /proc/fb. Unfortunately, it's an uAPI and can't + * be changed due user-space tools (e.g: pm-utils) matching against it. + */ + snprintf(info->fix.id, sizeof(info->fix.id), "%sdrmfb", fb_helper->dev->driver->name); } From 52255ef662a5d490678fbad64a735f88fcba564d Mon Sep 17 00:00:00 2001 From: Raviteja Goud Talla Date: Fri, 3 Dec 2021 20:26:03 +0530 Subject: [PATCH 332/868] drm/i915/gen11: Moving WAs to icl_gt_workarounds_init() Bspec page says "Reset: BUS", Accordingly moving w/a's: Wa_1407352427,Wa_1406680159 to proper function icl_gt_workarounds_init() Which will resolve guc enabling error v2: - Previous patch rev2 was created by email client which caused the Build failure, This v2 is to resolve the previous broken series Reviewed-by: John Harrison Signed-off-by: Raviteja Goud Talla Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20211203145603.4006937-1-ravitejax.goud.talla@intel.com (cherry picked from commit 67b858dd89932086ae0ee2d0ce4dd070a2c88bb3) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/gt/intel_workarounds.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index ed73d9bc9d40..2400d6423ba5 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -1127,6 +1127,15 @@ icl_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) GAMT_CHKN_BIT_REG, GAMT_CHKN_DISABLE_L3_COH_PIPE); + /* Wa_1407352427:icl,ehl */ + wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE2, + PSDUNIT_CLKGATE_DIS); + + /* Wa_1406680159:icl,ehl */ + wa_write_or(wal, + SUBSLICE_UNIT_LEVEL_CLKGATE, + GWUNIT_CLKGATE_DIS); + /* Wa_1607087056:icl,ehl,jsl */ if (IS_ICELAKE(i915) || IS_JSL_EHL_GT_STEP(i915, STEP_A0, STEP_B0)) @@ -1852,15 +1861,6 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE, VSUNIT_CLKGATE_DIS | HSUNIT_CLKGATE_DIS); - /* Wa_1407352427:icl,ehl */ - wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE2, - PSDUNIT_CLKGATE_DIS); - - /* Wa_1406680159:icl,ehl */ - wa_write_or(wal, - SUBSLICE_UNIT_LEVEL_CLKGATE, - GWUNIT_CLKGATE_DIS); - /* * Wa_1408767742:icl[a2..forever],ehl[all] * Wa_1605460711:icl[a0..c0] From 0416e7af2369b0d12a28dea8d30b104df9a6953d Mon Sep 17 00:00:00 2001 From: Ameer Hamza Date: Thu, 9 Dec 2021 09:15:52 +0500 Subject: [PATCH 333/868] net: dsa: mv88e6xxx: error handling for serdes_power functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Added default case to handle undefined cmode scenario in mv88e6393x_serdes_power() and mv88e6393x_serdes_power() methods. Addresses-Coverity: 1494644 ("Uninitialized scalar variable") Fixes: 21635d9203e1c (net: dsa: mv88e6xxx: Fix application of erratum 4.8 for 88E6393X) Reviewed-by: Marek Behún Signed-off-by: Ameer Hamza Link: https://lore.kernel.org/r/20211209041552.9810-1-amhamza.mgc@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/mv88e6xxx/serdes.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/net/dsa/mv88e6xxx/serdes.c b/drivers/net/dsa/mv88e6xxx/serdes.c index 55273013bfb5..2b05ead515cd 100644 --- a/drivers/net/dsa/mv88e6xxx/serdes.c +++ b/drivers/net/dsa/mv88e6xxx/serdes.c @@ -830,7 +830,7 @@ int mv88e6390_serdes_power(struct mv88e6xxx_chip *chip, int port, int lane, bool up) { u8 cmode = chip->ports[port].cmode; - int err = 0; + int err; switch (cmode) { case MV88E6XXX_PORT_STS_CMODE_SGMII: @@ -842,6 +842,9 @@ int mv88e6390_serdes_power(struct mv88e6xxx_chip *chip, int port, int lane, case MV88E6XXX_PORT_STS_CMODE_RXAUI: err = mv88e6390_serdes_power_10g(chip, lane, up); break; + default: + err = -EINVAL; + break; } if (!err && up) @@ -1541,6 +1544,9 @@ int mv88e6393x_serdes_power(struct mv88e6xxx_chip *chip, int port, int lane, case MV88E6393X_PORT_STS_CMODE_10GBASER: err = mv88e6390_serdes_power_10g(chip, lane, on); break; + default: + err = -EINVAL; + break; } if (err) From 158390e45612ef0fde160af0826f1740c36daf21 Mon Sep 17 00:00:00 2001 From: Jianguo Wu Date: Wed, 8 Dec 2021 18:03:33 +0800 Subject: [PATCH 334/868] udp: using datalen to cap max gso segments The max number of UDP gso segments is intended to cap to UDP_MAX_SEGMENTS, this is checked in udp_send_skb(): if (skb->len > cork->gso_size * UDP_MAX_SEGMENTS) { kfree_skb(skb); return -EINVAL; } skb->len contains network and transport header len here, we should use only data len instead. Fixes: bec1f6f69736 ("udp: generate gso with UDP_SEGMENT") Signed-off-by: Jianguo Wu Reviewed-by: Willem de Bruijn Link: https://lore.kernel.org/r/900742e5-81fb-30dc-6e0b-375c6cdd7982@163.com Signed-off-by: Jakub Kicinski --- net/ipv4/udp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 8bcecdd6aeda..23b05e28490b 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -916,7 +916,7 @@ static int udp_send_skb(struct sk_buff *skb, struct flowi4 *fl4, kfree_skb(skb); return -EINVAL; } - if (skb->len > cork->gso_size * UDP_MAX_SEGMENTS) { + if (datalen > cork->gso_size * UDP_MAX_SEGMENTS) { kfree_skb(skb); return -EINVAL; } From fd79a0cbf0b2e34bcc45b13acf962e2032a82203 Mon Sep 17 00:00:00 2001 From: Tadeusz Struk Date: Wed, 8 Dec 2021 10:27:42 -0800 Subject: [PATCH 335/868] nfc: fix segfault in nfc_genl_dump_devices_done When kmalloc in nfc_genl_dump_devices() fails then nfc_genl_dump_devices_done() segfaults as below KASAN: null-ptr-deref in range [0x0000000000000008-0x000000000000000f] CPU: 0 PID: 25 Comm: kworker/0:1 Not tainted 5.16.0-rc4-01180-g2a987e65025e-dirty #5 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.14.0-6.fc35 04/01/2014 Workqueue: events netlink_sock_destruct_work RIP: 0010:klist_iter_exit+0x26/0x80 Call Trace: class_dev_iter_exit+0x15/0x20 nfc_genl_dump_devices_done+0x3b/0x50 genl_lock_done+0x84/0xd0 netlink_sock_destruct+0x8f/0x270 __sk_destruct+0x64/0x3b0 sk_destruct+0xa8/0xd0 __sk_free+0x2e8/0x3d0 sk_free+0x51/0x90 netlink_sock_destruct_work+0x1c/0x20 process_one_work+0x411/0x710 worker_thread+0x6fd/0xa80 Link: https://syzkaller.appspot.com/bug?id=fc0fa5a53db9edd261d56e74325419faf18bd0df Reported-by: syzbot+f9f76f4a0766420b4a02@syzkaller.appspotmail.com Signed-off-by: Tadeusz Struk Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20211208182742.340542-1-tadeusz.struk@linaro.org Signed-off-by: Jakub Kicinski --- net/nfc/netlink.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c index 334f63c9529e..0b4fae183a4b 100644 --- a/net/nfc/netlink.c +++ b/net/nfc/netlink.c @@ -636,8 +636,10 @@ static int nfc_genl_dump_devices_done(struct netlink_callback *cb) { struct class_dev_iter *iter = (struct class_dev_iter *) cb->args[0]; - nfc_device_iter_exit(iter); - kfree(iter); + if (iter) { + nfc_device_iter_exit(iter); + kfree(iter); + } return 0; } From 4cd8371a234d051f9c9557fcbb1f8c523b1c0d10 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Thu, 9 Dec 2021 09:13:07 +0100 Subject: [PATCH 336/868] nfc: fix potential NULL pointer deref in nfc_genl_dump_ses_done The done() netlink callback nfc_genl_dump_ses_done() should check if received argument is non-NULL, because its allocation could fail earlier in dumpit() (nfc_genl_dump_ses()). Fixes: ac22ac466a65 ("NFC: Add a GET_SE netlink API") Signed-off-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20211209081307.57337-1-krzysztof.kozlowski@canonical.com Signed-off-by: Jakub Kicinski --- net/nfc/netlink.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c index 0b4fae183a4b..f184b0db79d4 100644 --- a/net/nfc/netlink.c +++ b/net/nfc/netlink.c @@ -1394,8 +1394,10 @@ static int nfc_genl_dump_ses_done(struct netlink_callback *cb) { struct class_dev_iter *iter = (struct class_dev_iter *) cb->args[0]; - nfc_device_iter_exit(iter); - kfree(iter); + if (iter) { + nfc_device_iter_exit(iter); + kfree(iter); + } return 0; } From c56c96303e9289cc34716b1179597b6f470833de Mon Sep 17 00:00:00 2001 From: Jianglei Nie Date: Thu, 9 Dec 2021 14:15:11 +0800 Subject: [PATCH 337/868] nfp: Fix memory leak in nfp_cpp_area_cache_add() In line 800 (#1), nfp_cpp_area_alloc() allocates and initializes a CPP area structure. But in line 807 (#2), when the cache is allocated failed, this CPP area structure is not freed, which will result in memory leak. We can fix it by freeing the CPP area when the cache is allocated failed (#2). 792 int nfp_cpp_area_cache_add(struct nfp_cpp *cpp, size_t size) 793 { 794 struct nfp_cpp_area_cache *cache; 795 struct nfp_cpp_area *area; 800 area = nfp_cpp_area_alloc(cpp, NFP_CPP_ID(7, NFP_CPP_ACTION_RW, 0), 801 0, size); // #1: allocates and initializes 802 if (!area) 803 return -ENOMEM; 805 cache = kzalloc(sizeof(*cache), GFP_KERNEL); 806 if (!cache) 807 return -ENOMEM; // #2: missing free 817 return 0; 818 } Fixes: 4cb584e0ee7d ("nfp: add CPP access core") Signed-off-by: Jianglei Nie Acked-by: Simon Horman Link: https://lore.kernel.org/r/20211209061511.122535-1-niejianglei2021@163.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cppcore.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cppcore.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cppcore.c index d7ac0307797f..34c0d2ddf9ef 100644 --- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cppcore.c +++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cppcore.c @@ -803,8 +803,10 @@ int nfp_cpp_area_cache_add(struct nfp_cpp *cpp, size_t size) return -ENOMEM; cache = kzalloc(sizeof(*cache), GFP_KERNEL); - if (!cache) + if (!cache) { + nfp_cpp_area_free(area); return -ENOMEM; + } cache->id = 0; cache->addr = 0; From ae68d93354e5bf5191ee673982251864ea24dd5c Mon Sep 17 00:00:00 2001 From: Andrea Mayer Date: Wed, 8 Dec 2021 20:54:09 +0100 Subject: [PATCH 338/868] seg6: fix the iif in the IPv6 socket control block When an IPv4 packet is received, the ip_rcv_core(...) sets the receiving interface index into the IPv4 socket control block (v5.16-rc4, net/ipv4/ip_input.c line 510): IPCB(skb)->iif = skb->skb_iif; If that IPv4 packet is meant to be encapsulated in an outer IPv6+SRH header, the seg6_do_srh_encap(...) performs the required encapsulation. In this case, the seg6_do_srh_encap function clears the IPv6 socket control block (v5.16-rc4 net/ipv6/seg6_iptunnel.c line 163): memset(IP6CB(skb), 0, sizeof(*IP6CB(skb))); The memset(...) was introduced in commit ef489749aae5 ("ipv6: sr: clear IP6CB(skb) on SRH ip4ip6 encapsulation") a long time ago (2019-01-29). Since the IPv6 socket control block and the IPv4 socket control block share the same memory area (skb->cb), the receiving interface index info is lost (IP6CB(skb)->iif is set to zero). As a side effect, that condition triggers a NULL pointer dereference if commit 0857d6f8c759 ("ipv6: When forwarding count rx stats on the orig netdev") is applied. To fix that issue, we set the IP6CB(skb)->iif with the index of the receiving interface once again. Fixes: ef489749aae5 ("ipv6: sr: clear IP6CB(skb) on SRH ip4ip6 encapsulation") Signed-off-by: Andrea Mayer Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20211208195409.12169-1-andrea.mayer@uniroma2.it Signed-off-by: Jakub Kicinski --- net/ipv6/seg6_iptunnel.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c index 3adc5d9211ad..d64855010948 100644 --- a/net/ipv6/seg6_iptunnel.c +++ b/net/ipv6/seg6_iptunnel.c @@ -161,6 +161,14 @@ int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto) hdr->hop_limit = ip6_dst_hoplimit(skb_dst(skb)); memset(IP6CB(skb), 0, sizeof(*IP6CB(skb))); + + /* the control block has been erased, so we have to set the + * iif once again. + * We read the receiving interface index directly from the + * skb->skb_iif as it is done in the IPv4 receiving path (i.e.: + * ip_rcv_core(...)). + */ + IP6CB(skb)->iif = skb->skb_iif; } hdr->nexthdr = NEXTHDR_ROUTING; From 9acfc57fa2b8944ed079cedbf846823ea32b8a31 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Exp=C3=B3sito?= Date: Wed, 8 Dec 2021 23:37:23 +0100 Subject: [PATCH 339/868] net: mana: Fix memory leak in mana_hwc_create_wq MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If allocating the DMA buffer fails, mana_hwc_destroy_wq was called without previously storing the pointer to the queue. In order to avoid leaking the pointer to the queue, store it as soon as it is allocated. Addresses-Coverity-ID: 1484720 ("Resource leak") Signed-off-by: José Expósito Reviewed-by: Dexuan Cui Link: https://lore.kernel.org/r/20211208223723.18520-1-jose.exposito89@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/microsoft/mana/hw_channel.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/microsoft/mana/hw_channel.c b/drivers/net/ethernet/microsoft/mana/hw_channel.c index 34b971ff8ef8..078d6a5a0768 100644 --- a/drivers/net/ethernet/microsoft/mana/hw_channel.c +++ b/drivers/net/ethernet/microsoft/mana/hw_channel.c @@ -480,16 +480,16 @@ static int mana_hwc_create_wq(struct hw_channel_context *hwc, if (err) goto out; - err = mana_hwc_alloc_dma_buf(hwc, q_depth, max_msg_size, - &hwc_wq->msg_buf); - if (err) - goto out; - hwc_wq->hwc = hwc; hwc_wq->gdma_wq = queue; hwc_wq->queue_depth = q_depth; hwc_wq->hwc_cq = hwc_cq; + err = mana_hwc_alloc_dma_buf(hwc, q_depth, max_msg_size, + &hwc_wq->msg_buf); + if (err) + goto out; + *hwc_wq_ptr = hwc_wq; return 0; out: From 61c2402665f1e10c5742033fce18392e369931d7 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 9 Dec 2021 00:49:37 -0800 Subject: [PATCH 340/868] net/sched: fq_pie: prevent dismantle issue For some reason, fq_pie_destroy() did not copy working code from pie_destroy() and other qdiscs, thus causing elusive bug. Before calling del_timer_sync(&q->adapt_timer), we need to ensure timer will not rearm itself. rcu: INFO: rcu_preempt self-detected stall on CPU rcu: 0-....: (4416 ticks this GP) idle=60d/1/0x4000000000000000 softirq=10433/10434 fqs=2579 (t=10501 jiffies g=13085 q=3989) NMI backtrace for cpu 0 CPU: 0 PID: 13 Comm: ksoftirqd/0 Not tainted 5.16.0-rc4-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0xcd/0x134 lib/dump_stack.c:106 nmi_cpu_backtrace.cold+0x47/0x144 lib/nmi_backtrace.c:111 nmi_trigger_cpumask_backtrace+0x1b3/0x230 lib/nmi_backtrace.c:62 trigger_single_cpu_backtrace include/linux/nmi.h:164 [inline] rcu_dump_cpu_stacks+0x25e/0x3f0 kernel/rcu/tree_stall.h:343 print_cpu_stall kernel/rcu/tree_stall.h:627 [inline] check_cpu_stall kernel/rcu/tree_stall.h:711 [inline] rcu_pending kernel/rcu/tree.c:3878 [inline] rcu_sched_clock_irq.cold+0x9d/0x746 kernel/rcu/tree.c:2597 update_process_times+0x16d/0x200 kernel/time/timer.c:1785 tick_sched_handle+0x9b/0x180 kernel/time/tick-sched.c:226 tick_sched_timer+0x1b0/0x2d0 kernel/time/tick-sched.c:1428 __run_hrtimer kernel/time/hrtimer.c:1685 [inline] __hrtimer_run_queues+0x1c0/0xe50 kernel/time/hrtimer.c:1749 hrtimer_interrupt+0x31c/0x790 kernel/time/hrtimer.c:1811 local_apic_timer_interrupt arch/x86/kernel/apic/apic.c:1086 [inline] __sysvec_apic_timer_interrupt+0x146/0x530 arch/x86/kernel/apic/apic.c:1103 sysvec_apic_timer_interrupt+0x8e/0xc0 arch/x86/kernel/apic/apic.c:1097 asm_sysvec_apic_timer_interrupt+0x12/0x20 arch/x86/include/asm/idtentry.h:638 RIP: 0010:write_comp_data kernel/kcov.c:221 [inline] RIP: 0010:__sanitizer_cov_trace_const_cmp1+0x1d/0x80 kernel/kcov.c:273 Code: 54 c8 20 48 89 10 c3 66 0f 1f 44 00 00 53 41 89 fb 41 89 f1 bf 03 00 00 00 65 48 8b 0c 25 40 70 02 00 48 89 ce 4c 8b 54 24 08 4e f7 ff ff 84 c0 74 51 48 8b 81 88 15 00 00 44 8b 81 84 15 00 RSP: 0018:ffffc90000d27b28 EFLAGS: 00000246 RAX: 0000000000000000 RBX: ffff888064bf1bf0 RCX: ffff888011928000 RDX: ffff888011928000 RSI: ffff888011928000 RDI: 0000000000000003 RBP: ffff888064bf1c28 R08: 0000000000000000 R09: 0000000000000000 R10: ffffffff875d8295 R11: 0000000000000000 R12: 0000000000000000 R13: ffff8880783dd300 R14: 0000000000000000 R15: 0000000000000000 pie_calculate_probability+0x405/0x7c0 net/sched/sch_pie.c:418 fq_pie_timer+0x170/0x2a0 net/sched/sch_fq_pie.c:383 call_timer_fn+0x1a5/0x6b0 kernel/time/timer.c:1421 expire_timers kernel/time/timer.c:1466 [inline] __run_timers.part.0+0x675/0xa20 kernel/time/timer.c:1734 __run_timers kernel/time/timer.c:1715 [inline] run_timer_softirq+0xb3/0x1d0 kernel/time/timer.c:1747 __do_softirq+0x29b/0x9c2 kernel/softirq.c:558 run_ksoftirqd kernel/softirq.c:921 [inline] run_ksoftirqd+0x2d/0x60 kernel/softirq.c:913 smpboot_thread_fn+0x645/0x9c0 kernel/smpboot.c:164 kthread+0x405/0x4f0 kernel/kthread.c:327 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:295 Fixes: ec97ecf1ebe4 ("net: sched: add Flow Queue PIE packet scheduler") Signed-off-by: Eric Dumazet Reported-by: syzbot Cc: Mohit P. Tahiliani Cc: Sachin D. Patil Cc: V. Saicharan Cc: Mohit Bhasi Cc: Leslie Monis Cc: Gautam Ramakrishnan Link: https://lore.kernel.org/r/20211209084937.3500020-1-eric.dumazet@gmail.com Signed-off-by: Jakub Kicinski --- net/sched/sch_fq_pie.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/sched/sch_fq_pie.c b/net/sched/sch_fq_pie.c index 830f3559f727..d6aba6edd16e 100644 --- a/net/sched/sch_fq_pie.c +++ b/net/sched/sch_fq_pie.c @@ -531,6 +531,7 @@ static void fq_pie_destroy(struct Qdisc *sch) struct fq_pie_sched_data *q = qdisc_priv(sch); tcf_block_put(q->block); + q->p_params.tupdate = 0; del_timer_sync(&q->adapt_timer); kvfree(q->flows); } From 37ad4e2a77180841dffb1df64cdbd95541512d3d Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Thu, 9 Dec 2021 16:35:46 +0100 Subject: [PATCH 341/868] MAINTAINERS: s390/net: remove myself as maintainer I won't have access to the relevant HW and docs much longer. Signed-off-by: Julian Wiedmann Link: https://lore.kernel.org/r/20211209153546.1152921-1-jwi@linux.ibm.com Signed-off-by: Jakub Kicinski --- MAINTAINERS | 2 -- 1 file changed, 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 7e51081b6708..6dd20c31d875 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -16623,7 +16623,6 @@ W: http://www.ibm.com/developerworks/linux/linux390/ F: drivers/iommu/s390-iommu.c S390 IUCV NETWORK LAYER -M: Julian Wiedmann M: Alexandra Winter M: Wenjia Zhang L: linux-s390@vger.kernel.org @@ -16635,7 +16634,6 @@ F: include/net/iucv/ F: net/iucv/ S390 NETWORK DRIVERS -M: Julian Wiedmann M: Alexandra Winter M: Wenjia Zhang L: linux-s390@vger.kernel.org From e8b1d7698038e76363859fb47ae0a262080646f5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Exp=C3=B3sito?= Date: Thu, 9 Dec 2021 12:05:40 +0100 Subject: [PATCH 342/868] net: dsa: felix: Fix memory leak in felix_setup_mmio_filtering MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Avoid a memory leak if there is not a CPU port defined. Fixes: 8d5f7954b7c8 ("net: dsa: felix: break at first CPU port during init and teardown") Addresses-Coverity-ID: 1492897 ("Resource leak") Addresses-Coverity-ID: 1492899 ("Resource leak") Signed-off-by: José Expósito Reviewed-by: Vladimir Oltean Link: https://lore.kernel.org/r/20211209110538.11585-1-jose.exposito89@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/ocelot/felix.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/dsa/ocelot/felix.c b/drivers/net/dsa/ocelot/felix.c index 327cc4654806..f1a05e7dc818 100644 --- a/drivers/net/dsa/ocelot/felix.c +++ b/drivers/net/dsa/ocelot/felix.c @@ -290,8 +290,11 @@ static int felix_setup_mmio_filtering(struct felix *felix) } } - if (cpu < 0) + if (cpu < 0) { + kfree(tagging_rule); + kfree(redirect_rule); return -EINVAL; + } tagging_rule->key_type = OCELOT_VCAP_KEY_ETYPE; *(__be16 *)tagging_rule->key.etype.etype.value = htons(ETH_P_1588); From 373f121a3c3a741f90b2a81f120f37c539fa0c86 Mon Sep 17 00:00:00 2001 From: M Chetan Kumar Date: Thu, 9 Dec 2021 15:46:27 +0530 Subject: [PATCH 343/868] net: wwan: iosm: fixes unnecessary doorbell send In TX packet accumulation flow transport layer is giving a doorbell to device even though there is no pending control TX transfer that needs immediate attention. Introduced a new hpda_ctrl_pending variable to keep track of pending control TX transfer. If there is a pending control TX transfer which needs an immediate attention only then give a doorbell to device. Signed-off-by: M Chetan Kumar Reviewed-by: Sergey Ryazanov Signed-off-by: Jakub Kicinski --- drivers/net/wwan/iosm/iosm_ipc_imem.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/drivers/net/wwan/iosm/iosm_ipc_imem.c b/drivers/net/wwan/iosm/iosm_ipc_imem.c index cff3b43ca4d7..b4d47b31ba91 100644 --- a/drivers/net/wwan/iosm/iosm_ipc_imem.c +++ b/drivers/net/wwan/iosm/iosm_ipc_imem.c @@ -181,9 +181,9 @@ void ipc_imem_hrtimer_stop(struct hrtimer *hr_timer) bool ipc_imem_ul_write_td(struct iosm_imem *ipc_imem) { struct ipc_mem_channel *channel; + bool hpda_ctrl_pending = false; struct sk_buff_head *ul_list; bool hpda_pending = false; - bool forced_hpdu = false; struct ipc_pipe *pipe; int i; @@ -200,15 +200,19 @@ bool ipc_imem_ul_write_td(struct iosm_imem *ipc_imem) ul_list = &channel->ul_list; /* Fill the transfer descriptor with the uplink buffer info. */ - hpda_pending |= ipc_protocol_ul_td_send(ipc_imem->ipc_protocol, + if (!ipc_imem_check_wwan_ips(channel)) { + hpda_ctrl_pending |= + ipc_protocol_ul_td_send(ipc_imem->ipc_protocol, pipe, ul_list); - - /* forced HP update needed for non data channels */ - if (hpda_pending && !ipc_imem_check_wwan_ips(channel)) - forced_hpdu = true; + } else { + hpda_pending |= + ipc_protocol_ul_td_send(ipc_imem->ipc_protocol, + pipe, ul_list); + } } - if (forced_hpdu) { + /* forced HP update needed for non data channels */ + if (hpda_ctrl_pending) { hpda_pending = false; ipc_protocol_doorbell_trigger(ipc_imem->ipc_protocol, IPC_HP_UL_WRITE_TD); From 07d3f2743decaeefcf076457719ae01c8b43b6d2 Mon Sep 17 00:00:00 2001 From: M Chetan Kumar Date: Thu, 9 Dec 2021 15:46:28 +0530 Subject: [PATCH 344/868] net: wwan: iosm: fixes net interface nonfunctional after fw flash Devlink initialization flow was overwriting the IP traffic channel configuration. This was causing wwan0 network interface to be unusable after fw flash. When device boots to fully functional mode restore the IP channel configuration. Signed-off-by: M Chetan Kumar Reviewed-by: Sergey Ryazanov Signed-off-by: Jakub Kicinski --- drivers/net/wwan/iosm/iosm_ipc_imem.c | 7 ++++++- drivers/net/wwan/iosm/iosm_ipc_imem.h | 1 + drivers/net/wwan/iosm/iosm_ipc_imem_ops.c | 1 + 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/net/wwan/iosm/iosm_ipc_imem.c b/drivers/net/wwan/iosm/iosm_ipc_imem.c index b4d47b31ba91..e2c096863488 100644 --- a/drivers/net/wwan/iosm/iosm_ipc_imem.c +++ b/drivers/net/wwan/iosm/iosm_ipc_imem.c @@ -531,6 +531,9 @@ static void ipc_imem_run_state_worker(struct work_struct *instance) return; } + if (test_and_clear_bit(IOSM_DEVLINK_INIT, &ipc_imem->flag)) + ipc_devlink_deinit(ipc_imem->ipc_devlink); + if (!ipc_imem_setup_cp_mux_cap_init(ipc_imem, &mux_cfg)) ipc_imem->mux = ipc_mux_init(&mux_cfg, ipc_imem); @@ -1171,7 +1174,7 @@ void ipc_imem_cleanup(struct iosm_imem *ipc_imem) ipc_port_deinit(ipc_imem->ipc_port); } - if (ipc_imem->ipc_devlink) + if (test_and_clear_bit(IOSM_DEVLINK_INIT, &ipc_imem->flag)) ipc_devlink_deinit(ipc_imem->ipc_devlink); ipc_imem_device_ipc_uninit(ipc_imem); @@ -1335,6 +1338,8 @@ struct iosm_imem *ipc_imem_init(struct iosm_pcie *pcie, unsigned int device_id, if (ipc_flash_link_establish(ipc_imem)) goto devlink_channel_fail; + + set_bit(IOSM_DEVLINK_INIT, &ipc_imem->flag); } return ipc_imem; devlink_channel_fail: diff --git a/drivers/net/wwan/iosm/iosm_ipc_imem.h b/drivers/net/wwan/iosm/iosm_ipc_imem.h index 6be6708b4eec..6b479fe23a42 100644 --- a/drivers/net/wwan/iosm/iosm_ipc_imem.h +++ b/drivers/net/wwan/iosm/iosm_ipc_imem.h @@ -101,6 +101,7 @@ struct ipc_chnl_cfg; #define IOSM_CHIP_INFO_SIZE_MAX 100 #define FULLY_FUNCTIONAL 0 +#define IOSM_DEVLINK_INIT 1 /* List of the supported UL/DL pipes. */ enum ipc_mem_pipes { diff --git a/drivers/net/wwan/iosm/iosm_ipc_imem_ops.c b/drivers/net/wwan/iosm/iosm_ipc_imem_ops.c index 825e8e5ffb2a..09261fbb79c1 100644 --- a/drivers/net/wwan/iosm/iosm_ipc_imem_ops.c +++ b/drivers/net/wwan/iosm/iosm_ipc_imem_ops.c @@ -450,6 +450,7 @@ void ipc_imem_sys_devlink_close(struct iosm_devlink *ipc_devlink) /* Release the pipe resources */ ipc_imem_pipe_cleanup(ipc_imem, &channel->ul_pipe); ipc_imem_pipe_cleanup(ipc_imem, &channel->dl_pipe); + ipc_imem->nr_of_channels--; } void ipc_imem_sys_devlink_notify_rx(struct iosm_devlink *ipc_devlink, From 383451ceb07831d37dafdf011c09366d1c034df5 Mon Sep 17 00:00:00 2001 From: M Chetan Kumar Date: Thu, 9 Dec 2021 15:46:29 +0530 Subject: [PATCH 345/868] net: wwan: iosm: fixes unable to send AT command during mbim tx ev_cdev_write_pending flag is preventing a TX message post for AT port while MBIM transfer is ongoing. Removed the unnecessary check around control port TX transfer. Signed-off-by: M Chetan Kumar Reviewed-by: Sergey Ryazanov Signed-off-by: Jakub Kicinski --- drivers/net/wwan/iosm/iosm_ipc_imem.c | 1 - drivers/net/wwan/iosm/iosm_ipc_imem.h | 3 --- drivers/net/wwan/iosm/iosm_ipc_imem_ops.c | 6 ------ 3 files changed, 10 deletions(-) diff --git a/drivers/net/wwan/iosm/iosm_ipc_imem.c b/drivers/net/wwan/iosm/iosm_ipc_imem.c index e2c096863488..12c03dacb5dd 100644 --- a/drivers/net/wwan/iosm/iosm_ipc_imem.c +++ b/drivers/net/wwan/iosm/iosm_ipc_imem.c @@ -1270,7 +1270,6 @@ struct iosm_imem *ipc_imem_init(struct iosm_pcie *pcie, unsigned int device_id, ipc_imem->pci_device_id = device_id; - ipc_imem->ev_cdev_write_pending = false; ipc_imem->cp_version = 0; ipc_imem->device_sleep = IPC_HOST_SLEEP_ENTER_SLEEP; diff --git a/drivers/net/wwan/iosm/iosm_ipc_imem.h b/drivers/net/wwan/iosm/iosm_ipc_imem.h index 6b479fe23a42..6b8a837faef2 100644 --- a/drivers/net/wwan/iosm/iosm_ipc_imem.h +++ b/drivers/net/wwan/iosm/iosm_ipc_imem.h @@ -336,8 +336,6 @@ enum ipc_phase { * process the irq actions. * @flag: Flag to monitor the state of driver * @td_update_timer_suspended: if true then td update timer suspend - * @ev_cdev_write_pending: 0 means inform the IPC tasklet to pass - * the accumulated uplink buffers to CP. * @ev_mux_net_transmit_pending:0 means inform the IPC tasklet to pass * @reset_det_n: Reset detect flag * @pcie_wake_n: Pcie wake flag @@ -375,7 +373,6 @@ struct iosm_imem { u8 ev_irq_pending[IPC_IRQ_VECTORS]; unsigned long flag; u8 td_update_timer_suspended:1, - ev_cdev_write_pending:1, ev_mux_net_transmit_pending:1, reset_det_n:1, pcie_wake_n:1; diff --git a/drivers/net/wwan/iosm/iosm_ipc_imem_ops.c b/drivers/net/wwan/iosm/iosm_ipc_imem_ops.c index 09261fbb79c1..831cdae28e8a 100644 --- a/drivers/net/wwan/iosm/iosm_ipc_imem_ops.c +++ b/drivers/net/wwan/iosm/iosm_ipc_imem_ops.c @@ -41,7 +41,6 @@ void ipc_imem_sys_wwan_close(struct iosm_imem *ipc_imem, int if_id, static int ipc_imem_tq_cdev_write(struct iosm_imem *ipc_imem, int arg, void *msg, size_t size) { - ipc_imem->ev_cdev_write_pending = false; ipc_imem_ul_send(ipc_imem); return 0; @@ -50,11 +49,6 @@ static int ipc_imem_tq_cdev_write(struct iosm_imem *ipc_imem, int arg, /* Through tasklet to do sio write. */ static int ipc_imem_call_cdev_write(struct iosm_imem *ipc_imem) { - if (ipc_imem->ev_cdev_write_pending) - return -1; - - ipc_imem->ev_cdev_write_pending = true; - return ipc_task_queue_send_task(ipc_imem, ipc_imem_tq_cdev_write, 0, NULL, 0, false); } From 04ec4e6250e5f58b525b08f3dca45c7d7427620e Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Thu, 9 Dec 2021 09:26:47 +0000 Subject: [PATCH 346/868] net: dsa: mv88e6xxx: allow use of PHYs on CPU and DSA ports Martyn Welch reports that his CPU port is unable to link where it has been necessary to use one of the switch ports with an internal PHY for the CPU port. The reason behind this is the port control register is left forcing the link down, preventing traffic flow. This occurs because during initialisation, phylink expects the link to be down, and DSA forces the link down by synthesising a call to the DSA drivers phylink_mac_link_down() method, but we don't touch the forced-link state when we later reconfigure the port. Resolve this by also unforcing the link state when we are operating in PHY mode and the PPU is set to poll the PHY to retrieve link status information. Reported-by: Martyn Welch Tested-by: Martyn Welch Fixes: 3be98b2d5fbc ("net: dsa: Down cpu/dsa ports phylink will control") Cc: # 5.7: 2b29cb9e3f7f: net: dsa: mv88e6xxx: fix "don't use PHY_DETECT on internal PHY's" Signed-off-by: Russell King (Oracle) Link: https://lore.kernel.org/r/E1mvFhP-00F8Zb-Ul@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/dsa/mv88e6xxx/chip.c | 62 +++++++++++++++++--------------- 1 file changed, 33 insertions(+), 29 deletions(-) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 9f675464efc3..14f87f6ac479 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -698,44 +698,48 @@ static void mv88e6xxx_mac_config(struct dsa_switch *ds, int port, { struct mv88e6xxx_chip *chip = ds->priv; struct mv88e6xxx_port *p; - int err; + int err = 0; p = &chip->ports[port]; - /* FIXME: is this the correct test? If we're in fixed mode on an - * internal port, why should we process this any different from - * PHY mode? On the other hand, the port may be automedia between - * an internal PHY and the serdes... - */ - if ((mode == MLO_AN_PHY) && mv88e6xxx_phy_is_internal(ds, port)) - return; - mv88e6xxx_reg_lock(chip); - /* In inband mode, the link may come up at any time while the link - * is not forced down. Force the link down while we reconfigure the - * interface mode. - */ - if (mode == MLO_AN_INBAND && p->interface != state->interface && - chip->info->ops->port_set_link) - chip->info->ops->port_set_link(chip, port, LINK_FORCED_DOWN); - err = mv88e6xxx_port_config_interface(chip, port, state->interface); - if (err && err != -EOPNOTSUPP) - goto err_unlock; + if (mode != MLO_AN_PHY || !mv88e6xxx_phy_is_internal(ds, port)) { + /* In inband mode, the link may come up at any time while the + * link is not forced down. Force the link down while we + * reconfigure the interface mode. + */ + if (mode == MLO_AN_INBAND && + p->interface != state->interface && + chip->info->ops->port_set_link) + chip->info->ops->port_set_link(chip, port, + LINK_FORCED_DOWN); - err = mv88e6xxx_serdes_pcs_config(chip, port, mode, state->interface, - state->advertising); - /* FIXME: we should restart negotiation if something changed - which - * is something we get if we convert to using phylinks PCS operations. - */ - if (err > 0) - err = 0; + err = mv88e6xxx_port_config_interface(chip, port, + state->interface); + if (err && err != -EOPNOTSUPP) + goto err_unlock; + + err = mv88e6xxx_serdes_pcs_config(chip, port, mode, + state->interface, + state->advertising); + /* FIXME: we should restart negotiation if something changed - + * which is something we get if we convert to using phylinks + * PCS operations. + */ + if (err > 0) + err = 0; + } /* Undo the forced down state above after completing configuration - * irrespective of its state on entry, which allows the link to come up. + * irrespective of its state on entry, which allows the link to come + * up in the in-band case where there is no separate SERDES. Also + * ensure that the link can come up if the PPU is in use and we are + * in PHY mode (we treat the PPU as an effective in-band mechanism.) */ - if (mode == MLO_AN_INBAND && p->interface != state->interface && - chip->info->ops->port_set_link) + if (chip->info->ops->port_set_link && + ((mode == MLO_AN_INBAND && p->interface != state->interface) || + (mode == MLO_AN_PHY && mv88e6xxx_port_ppu_updates(chip, port)))) chip->info->ops->port_set_link(chip, port, LINK_UNFORCED); p->interface = state->interface; From 3a49cc22d31eccceb856f468be0646faa2d4643f Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Thu, 9 Dec 2021 11:51:13 -0500 Subject: [PATCH 347/868] tools/lib/lockdep: drop leftover liblockdep headers Clean up remaining headers that are specific to liblockdep but lived in the shared header directory. These are all unused after the liblockdep code was removed in commit 7246f4dcaccc ("tools/lib/lockdep: drop liblockdep"). Note that there are still headers that were originally created for liblockdep, that still have liblockdep references, but they are used by other tools/ code at this point. Signed-off-by: Sasha Levin Cc: Ingo Molnar Cc: Peter Zijlstra Signed-off-by: Linus Torvalds --- tools/include/linux/debug_locks.h | 14 ------ tools/include/linux/hardirq.h | 12 ------ tools/include/linux/irqflags.h | 39 ----------------- tools/include/linux/lockdep.h | 72 ------------------------------- tools/include/linux/proc_fs.h | 4 -- tools/include/linux/spinlock.h | 2 - tools/include/linux/stacktrace.h | 33 -------------- 7 files changed, 176 deletions(-) delete mode 100644 tools/include/linux/debug_locks.h delete mode 100644 tools/include/linux/hardirq.h delete mode 100644 tools/include/linux/irqflags.h delete mode 100644 tools/include/linux/lockdep.h delete mode 100644 tools/include/linux/proc_fs.h delete mode 100644 tools/include/linux/stacktrace.h diff --git a/tools/include/linux/debug_locks.h b/tools/include/linux/debug_locks.h deleted file mode 100644 index 72d595ce764a..000000000000 --- a/tools/include/linux/debug_locks.h +++ /dev/null @@ -1,14 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _LIBLOCKDEP_DEBUG_LOCKS_H_ -#define _LIBLOCKDEP_DEBUG_LOCKS_H_ - -#include -#include -#include - -#define DEBUG_LOCKS_WARN_ON(x) WARN_ON(x) - -extern bool debug_locks; -extern bool debug_locks_silent; - -#endif diff --git a/tools/include/linux/hardirq.h b/tools/include/linux/hardirq.h deleted file mode 100644 index b25580b6a9be..000000000000 --- a/tools/include/linux/hardirq.h +++ /dev/null @@ -1,12 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _LIBLOCKDEP_LINUX_HARDIRQ_H_ -#define _LIBLOCKDEP_LINUX_HARDIRQ_H_ - -#define SOFTIRQ_BITS 0UL -#define HARDIRQ_BITS 0UL -#define SOFTIRQ_SHIFT 0UL -#define HARDIRQ_SHIFT 0UL -#define hardirq_count() 0UL -#define softirq_count() 0UL - -#endif diff --git a/tools/include/linux/irqflags.h b/tools/include/linux/irqflags.h deleted file mode 100644 index 501262aee8ff..000000000000 --- a/tools/include/linux/irqflags.h +++ /dev/null @@ -1,39 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _LIBLOCKDEP_LINUX_TRACE_IRQFLAGS_H_ -#define _LIBLOCKDEP_LINUX_TRACE_IRQFLAGS_H_ - -# define lockdep_hardirq_context() 0 -# define lockdep_softirq_context(p) 0 -# define lockdep_hardirqs_enabled() 0 -# define lockdep_softirqs_enabled(p) 0 -# define lockdep_hardirq_enter() do { } while (0) -# define lockdep_hardirq_exit() do { } while (0) -# define lockdep_softirq_enter() do { } while (0) -# define lockdep_softirq_exit() do { } while (0) -# define INIT_TRACE_IRQFLAGS - -# define stop_critical_timings() do { } while (0) -# define start_critical_timings() do { } while (0) - -#define raw_local_irq_disable() do { } while (0) -#define raw_local_irq_enable() do { } while (0) -#define raw_local_irq_save(flags) ((flags) = 0) -#define raw_local_irq_restore(flags) ((void)(flags)) -#define raw_local_save_flags(flags) ((flags) = 0) -#define raw_irqs_disabled_flags(flags) ((void)(flags)) -#define raw_irqs_disabled() 0 -#define raw_safe_halt() - -#define local_irq_enable() do { } while (0) -#define local_irq_disable() do { } while (0) -#define local_irq_save(flags) ((flags) = 0) -#define local_irq_restore(flags) ((void)(flags)) -#define local_save_flags(flags) ((flags) = 0) -#define irqs_disabled() (1) -#define irqs_disabled_flags(flags) ((void)(flags), 0) -#define safe_halt() do { } while (0) - -#define trace_lock_release(x, y) -#define trace_lock_acquire(a, b, c, d, e, f, g) - -#endif diff --git a/tools/include/linux/lockdep.h b/tools/include/linux/lockdep.h deleted file mode 100644 index e56997288f2b..000000000000 --- a/tools/include/linux/lockdep.h +++ /dev/null @@ -1,72 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _LIBLOCKDEP_LOCKDEP_H_ -#define _LIBLOCKDEP_LOCKDEP_H_ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#define MAX_LOCK_DEPTH 63UL - -#define asmlinkage -#define __visible - -#include "../../../include/linux/lockdep.h" - -struct task_struct { - u64 curr_chain_key; - int lockdep_depth; - unsigned int lockdep_recursion; - struct held_lock held_locks[MAX_LOCK_DEPTH]; - gfp_t lockdep_reclaim_gfp; - int pid; - int state; - char comm[17]; -}; - -#define TASK_RUNNING 0 - -extern struct task_struct *__curr(void); - -#define current (__curr()) - -static inline int debug_locks_off(void) -{ - return 1; -} - -#define task_pid_nr(tsk) ((tsk)->pid) - -#define KSYM_NAME_LEN 128 -#define printk(...) dprintf(STDOUT_FILENO, __VA_ARGS__) -#define pr_err(format, ...) fprintf (stderr, format, ## __VA_ARGS__) -#define pr_warn pr_err -#define pr_cont pr_err - -#define list_del_rcu list_del - -#define atomic_t unsigned long -#define atomic_inc(x) ((*(x))++) - -#define print_tainted() "" -#define static_obj(x) 1 - -#define debug_show_all_locks() -extern void debug_check_no_locks_held(void); - -static __used bool __is_kernel_percpu_address(unsigned long addr, void *can_addr) -{ - return false; -} - -#endif diff --git a/tools/include/linux/proc_fs.h b/tools/include/linux/proc_fs.h deleted file mode 100644 index 8b3b03b64fda..000000000000 --- a/tools/include/linux/proc_fs.h +++ /dev/null @@ -1,4 +0,0 @@ -#ifndef _TOOLS_INCLUDE_LINUX_PROC_FS_H -#define _TOOLS_INCLUDE_LINUX_PROC_FS_H - -#endif /* _TOOLS_INCLUDE_LINUX_PROC_FS_H */ diff --git a/tools/include/linux/spinlock.h b/tools/include/linux/spinlock.h index c934572d935c..622266b197d0 100644 --- a/tools/include/linux/spinlock.h +++ b/tools/include/linux/spinlock.h @@ -37,6 +37,4 @@ static inline bool arch_spin_is_locked(arch_spinlock_t *mutex) return true; } -#include - #endif diff --git a/tools/include/linux/stacktrace.h b/tools/include/linux/stacktrace.h deleted file mode 100644 index ae343ac35bfa..000000000000 --- a/tools/include/linux/stacktrace.h +++ /dev/null @@ -1,33 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _LIBLOCKDEP_LINUX_STACKTRACE_H_ -#define _LIBLOCKDEP_LINUX_STACKTRACE_H_ - -#include - -struct stack_trace { - unsigned int nr_entries, max_entries; - unsigned long *entries; - int skip; -}; - -static inline void print_stack_trace(struct stack_trace *trace, int spaces) -{ - backtrace_symbols_fd((void **)trace->entries, trace->nr_entries, 1); -} - -#define save_stack_trace(trace) \ - ((trace)->nr_entries = \ - backtrace((void **)(trace)->entries, (trace)->max_entries)) - -static inline int dump_stack(void) -{ - void *array[64]; - size_t size; - - size = backtrace(array, 64); - backtrace_symbols_fd(array, size, 1); - - return 0; -} - -#endif From cab2d3fd6866e089b5c50db09dece131f85bfebd Mon Sep 17 00:00:00 2001 From: Loic Poulain Date: Thu, 9 Dec 2021 18:46:33 +0530 Subject: [PATCH 348/868] bus: mhi: core: Add support for forced PM resume For whatever reason, some devices like QCA6390, WCN6855 using ath11k are not in M3 state during PM resume, but still functional. The mhi_pm_resume should then not fail in those cases, and let the higher level device specific stack continue resuming process. Add an API mhi_pm_resume_force(), to force resuming irrespective of the current MHI state. This fixes a regression with non functional ath11k WiFi after suspend/resume cycle on some machines. Bug report: https://bugzilla.kernel.org/show_bug.cgi?id=214179 Link: https://lore.kernel.org/regressions/871r5p0x2u.fsf@codeaurora.org/ Fixes: 020d3b26c07a ("bus: mhi: Early MHI resume failure in non M3 state") Cc: stable@vger.kernel.org #5.13 Reported-by: Kalle Valo Reported-by: Pengyu Ma Tested-by: Kalle Valo Acked-by: Kalle Valo Signed-off-by: Loic Poulain [mani: Switched to API, added bug report, reported-by tags and CCed stable] Signed-off-by: Manivannan Sadhasivam Link: https://lore.kernel.org/r/20211209131633.4168-1-manivannan.sadhasivam@linaro.org Signed-off-by: Greg Kroah-Hartman --- drivers/bus/mhi/core/pm.c | 21 ++++++++++++++++++--- drivers/net/wireless/ath/ath11k/mhi.c | 6 +++++- include/linux/mhi.h | 13 +++++++++++++ 3 files changed, 36 insertions(+), 4 deletions(-) diff --git a/drivers/bus/mhi/core/pm.c b/drivers/bus/mhi/core/pm.c index fb99e3727155..547e6e769546 100644 --- a/drivers/bus/mhi/core/pm.c +++ b/drivers/bus/mhi/core/pm.c @@ -881,7 +881,7 @@ int mhi_pm_suspend(struct mhi_controller *mhi_cntrl) } EXPORT_SYMBOL_GPL(mhi_pm_suspend); -int mhi_pm_resume(struct mhi_controller *mhi_cntrl) +static int __mhi_pm_resume(struct mhi_controller *mhi_cntrl, bool force) { struct mhi_chan *itr, *tmp; struct device *dev = &mhi_cntrl->mhi_dev->dev; @@ -898,8 +898,12 @@ int mhi_pm_resume(struct mhi_controller *mhi_cntrl) if (MHI_PM_IN_ERROR_STATE(mhi_cntrl->pm_state)) return -EIO; - if (mhi_get_mhi_state(mhi_cntrl) != MHI_STATE_M3) - return -EINVAL; + if (mhi_get_mhi_state(mhi_cntrl) != MHI_STATE_M3) { + dev_warn(dev, "Resuming from non M3 state (%s)\n", + TO_MHI_STATE_STR(mhi_get_mhi_state(mhi_cntrl))); + if (!force) + return -EINVAL; + } /* Notify clients about exiting LPM */ list_for_each_entry_safe(itr, tmp, &mhi_cntrl->lpm_chans, node) { @@ -940,8 +944,19 @@ int mhi_pm_resume(struct mhi_controller *mhi_cntrl) return 0; } + +int mhi_pm_resume(struct mhi_controller *mhi_cntrl) +{ + return __mhi_pm_resume(mhi_cntrl, false); +} EXPORT_SYMBOL_GPL(mhi_pm_resume); +int mhi_pm_resume_force(struct mhi_controller *mhi_cntrl) +{ + return __mhi_pm_resume(mhi_cntrl, true); +} +EXPORT_SYMBOL_GPL(mhi_pm_resume_force); + int __mhi_device_get_sync(struct mhi_controller *mhi_cntrl) { int ret; diff --git a/drivers/net/wireless/ath/ath11k/mhi.c b/drivers/net/wireless/ath/ath11k/mhi.c index 26c7ae242db6..49c0b1ad40a0 100644 --- a/drivers/net/wireless/ath/ath11k/mhi.c +++ b/drivers/net/wireless/ath/ath11k/mhi.c @@ -533,7 +533,11 @@ static int ath11k_mhi_set_state(struct ath11k_pci *ab_pci, ret = mhi_pm_suspend(ab_pci->mhi_ctrl); break; case ATH11K_MHI_RESUME: - ret = mhi_pm_resume(ab_pci->mhi_ctrl); + /* Do force MHI resume as some devices like QCA6390, WCN6855 + * are not in M3 state but they are functional. So just ignore + * the MHI state while resuming. + */ + ret = mhi_pm_resume_force(ab_pci->mhi_ctrl); break; case ATH11K_MHI_TRIGGER_RDDM: ret = mhi_force_rddm_mode(ab_pci->mhi_ctrl); diff --git a/include/linux/mhi.h b/include/linux/mhi.h index 723985879035..a5cc4cdf9cc8 100644 --- a/include/linux/mhi.h +++ b/include/linux/mhi.h @@ -663,6 +663,19 @@ int mhi_pm_suspend(struct mhi_controller *mhi_cntrl); */ int mhi_pm_resume(struct mhi_controller *mhi_cntrl); +/** + * mhi_pm_resume_force - Force resume MHI from suspended state + * @mhi_cntrl: MHI controller + * + * Resume the device irrespective of its MHI state. As per the MHI spec, devices + * has to be in M3 state during resume. But some devices seem to be in a + * different MHI state other than M3 but they continue working fine if allowed. + * This API is intented to be used for such devices. + * + * Return: 0 if the resume succeeds, a negative error code otherwise + */ +int mhi_pm_resume_force(struct mhi_controller *mhi_cntrl); + /** * mhi_download_rddm_image - Download ramdump image from device for * debugging purpose. From e1067a07cfbc5a36abad3752fafe4c79e06db1bb Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 6 Dec 2021 19:20:32 +0100 Subject: [PATCH 349/868] ftrace/samples: Add module to test multi direct modify interface Adding ftrace-direct-multi-modify.ko kernel module that uses modify_ftrace_direct_multi API. The core functionality is taken from ftrace-direct-modify.ko kernel module and changed to fit multi direct interface. The init function creates kthread that periodically calls modify_ftrace_direct_multi to change the trampoline address for the direct ftrace_ops. The ftrace trace_pipe then shows trace from both trampolines. Link: https://lkml.kernel.org/r/20211206182032.87248-4-jolsa@kernel.org Cc: Ingo Molnar Reviewed-by: Heiko Carstens Tested-by: Heiko Carstens Signed-off-by: Jiri Olsa Signed-off-by: Steven Rostedt (VMware) --- samples/ftrace/Makefile | 1 + samples/ftrace/ftrace-direct-multi-modify.c | 152 ++++++++++++++++++++ 2 files changed, 153 insertions(+) create mode 100644 samples/ftrace/ftrace-direct-multi-modify.c diff --git a/samples/ftrace/Makefile b/samples/ftrace/Makefile index b9198e2eef28..faf8cdb79c5f 100644 --- a/samples/ftrace/Makefile +++ b/samples/ftrace/Makefile @@ -4,6 +4,7 @@ obj-$(CONFIG_SAMPLE_FTRACE_DIRECT) += ftrace-direct.o obj-$(CONFIG_SAMPLE_FTRACE_DIRECT) += ftrace-direct-too.o obj-$(CONFIG_SAMPLE_FTRACE_DIRECT) += ftrace-direct-modify.o obj-$(CONFIG_SAMPLE_FTRACE_DIRECT_MULTI) += ftrace-direct-multi.o +obj-$(CONFIG_SAMPLE_FTRACE_DIRECT_MULTI) += ftrace-direct-multi-modify.o CFLAGS_sample-trace-array.o := -I$(src) obj-$(CONFIG_SAMPLE_TRACE_ARRAY) += sample-trace-array.o diff --git a/samples/ftrace/ftrace-direct-multi-modify.c b/samples/ftrace/ftrace-direct-multi-modify.c new file mode 100644 index 000000000000..91bc42a7adb9 --- /dev/null +++ b/samples/ftrace/ftrace-direct-multi-modify.c @@ -0,0 +1,152 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include +#include +#include +#include + +void my_direct_func1(unsigned long ip) +{ + trace_printk("my direct func1 ip %lx\n", ip); +} + +void my_direct_func2(unsigned long ip) +{ + trace_printk("my direct func2 ip %lx\n", ip); +} + +extern void my_tramp1(void *); +extern void my_tramp2(void *); + +#ifdef CONFIG_X86_64 + +asm ( +" .pushsection .text, \"ax\", @progbits\n" +" .type my_tramp1, @function\n" +" .globl my_tramp1\n" +" my_tramp1:" +" pushq %rbp\n" +" movq %rsp, %rbp\n" +" pushq %rdi\n" +" movq 8(%rbp), %rdi\n" +" call my_direct_func1\n" +" popq %rdi\n" +" leave\n" +" ret\n" +" .size my_tramp1, .-my_tramp1\n" +" .type my_tramp2, @function\n" +"\n" +" .globl my_tramp2\n" +" my_tramp2:" +" pushq %rbp\n" +" movq %rsp, %rbp\n" +" pushq %rdi\n" +" movq 8(%rbp), %rdi\n" +" call my_direct_func2\n" +" popq %rdi\n" +" leave\n" +" ret\n" +" .size my_tramp2, .-my_tramp2\n" +" .popsection\n" +); + +#endif /* CONFIG_X86_64 */ + +#ifdef CONFIG_S390 + +asm ( +" .pushsection .text, \"ax\", @progbits\n" +" .type my_tramp1, @function\n" +" .globl my_tramp1\n" +" my_tramp1:" +" lgr %r1,%r15\n" +" stmg %r0,%r5,"__stringify(__SF_GPRS)"(%r15)\n" +" stg %r14,"__stringify(__SF_GPRS+8*8)"(%r15)\n" +" aghi %r15,"__stringify(-STACK_FRAME_OVERHEAD)"\n" +" stg %r1,"__stringify(__SF_BACKCHAIN)"(%r15)\n" +" lgr %r2,%r0\n" +" brasl %r14,my_direct_func1\n" +" aghi %r15,"__stringify(STACK_FRAME_OVERHEAD)"\n" +" lmg %r0,%r5,"__stringify(__SF_GPRS)"(%r15)\n" +" lg %r14,"__stringify(__SF_GPRS+8*8)"(%r15)\n" +" lgr %r1,%r0\n" +" br %r1\n" +" .size my_tramp1, .-my_tramp1\n" +"\n" +" .type my_tramp2, @function\n" +" .globl my_tramp2\n" +" my_tramp2:" +" lgr %r1,%r15\n" +" stmg %r0,%r5,"__stringify(__SF_GPRS)"(%r15)\n" +" stg %r14,"__stringify(__SF_GPRS+8*8)"(%r15)\n" +" aghi %r15,"__stringify(-STACK_FRAME_OVERHEAD)"\n" +" stg %r1,"__stringify(__SF_BACKCHAIN)"(%r15)\n" +" lgr %r2,%r0\n" +" brasl %r14,my_direct_func2\n" +" aghi %r15,"__stringify(STACK_FRAME_OVERHEAD)"\n" +" lmg %r0,%r5,"__stringify(__SF_GPRS)"(%r15)\n" +" lg %r14,"__stringify(__SF_GPRS+8*8)"(%r15)\n" +" lgr %r1,%r0\n" +" br %r1\n" +" .size my_tramp2, .-my_tramp2\n" +" .popsection\n" +); + +#endif /* CONFIG_S390 */ + +static unsigned long my_tramp = (unsigned long)my_tramp1; +static unsigned long tramps[2] = { + (unsigned long)my_tramp1, + (unsigned long)my_tramp2, +}; + +static struct ftrace_ops direct; + +static int simple_thread(void *arg) +{ + static int t; + int ret = 0; + + while (!kthread_should_stop()) { + set_current_state(TASK_INTERRUPTIBLE); + schedule_timeout(2 * HZ); + + if (ret) + continue; + t ^= 1; + ret = modify_ftrace_direct_multi(&direct, tramps[t]); + if (!ret) + my_tramp = tramps[t]; + WARN_ON_ONCE(ret); + } + + return 0; +} + +static struct task_struct *simple_tsk; + +static int __init ftrace_direct_multi_init(void) +{ + int ret; + + ftrace_set_filter_ip(&direct, (unsigned long) wake_up_process, 0, 0); + ftrace_set_filter_ip(&direct, (unsigned long) schedule, 0, 0); + + ret = register_ftrace_direct_multi(&direct, my_tramp); + + if (!ret) + simple_tsk = kthread_run(simple_thread, NULL, "event-sample-fn"); + return ret; +} + +static void __exit ftrace_direct_multi_exit(void) +{ + kthread_stop(simple_tsk); + unregister_ftrace_direct_multi(&direct, my_tramp); +} + +module_init(ftrace_direct_multi_init); +module_exit(ftrace_direct_multi_exit); + +MODULE_AUTHOR("Jiri Olsa"); +MODULE_DESCRIPTION("Example use case of using modify_ftrace_direct_multi()"); +MODULE_LICENSE("GPL"); From c24be24aed405d64ebcf04526614c13b2adfb1d2 Mon Sep 17 00:00:00 2001 From: Miaoqian Lin Date: Thu, 9 Dec 2021 02:43:17 +0000 Subject: [PATCH 350/868] tracing: Fix possible memory leak in __create_synth_event() error path There's error paths in __create_synth_event() after the argv is allocated that fail to free it. Add a jump to free it when necessary. Link: https://lkml.kernel.org/r/20211209024317.11783-1-linmq006@gmail.com Suggested-by: Steven Rostedt (VMware) Signed-off-by: Miaoqian Lin [ Fixed up the patch and change log ] Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_events_synth.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/kernel/trace/trace_events_synth.c b/kernel/trace/trace_events_synth.c index 22db3ce95e74..ca9c13b2ecf4 100644 --- a/kernel/trace/trace_events_synth.c +++ b/kernel/trace/trace_events_synth.c @@ -1237,9 +1237,8 @@ static int __create_synth_event(const char *name, const char *raw_fields) argv + consumed, &consumed, &field_version); if (IS_ERR(field)) { - argv_free(argv); ret = PTR_ERR(field); - goto err; + goto err_free_arg; } /* @@ -1262,18 +1261,19 @@ static int __create_synth_event(const char *name, const char *raw_fields) if (cmd_version > 1 && n_fields_this_loop >= 1) { synth_err(SYNTH_ERR_INVALID_CMD, errpos(field_str)); ret = -EINVAL; - goto err; + goto err_free_arg; } fields[n_fields++] = field; if (n_fields == SYNTH_FIELDS_MAX) { synth_err(SYNTH_ERR_TOO_MANY_FIELDS, 0); ret = -EINVAL; - goto err; + goto err_free_arg; } n_fields_this_loop++; } + argv_free(argv); if (consumed < argc) { synth_err(SYNTH_ERR_INVALID_CMD, 0); @@ -1281,7 +1281,6 @@ static int __create_synth_event(const char *name, const char *raw_fields) goto err; } - argv_free(argv); } if (n_fields == 0) { @@ -1307,6 +1306,8 @@ static int __create_synth_event(const char *name, const char *raw_fields) kfree(saved_fields); return ret; + err_free_arg: + argv_free(argv); err: for (i = 0; i < n_fields; i++) free_synth_field(fields[i]); From 42288cb44c4b5fff7653bc392b583a2b8bd6a8c0 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 8 Dec 2021 17:04:51 -0800 Subject: [PATCH 351/868] wait: add wake_up_pollfree() Several ->poll() implementations are special in that they use a waitqueue whose lifetime is the current task, rather than the struct file as is normally the case. This is okay for blocking polls, since a blocking poll occurs within one task; however, non-blocking polls require another solution. This solution is for the queue to be cleared before it is freed, using 'wake_up_poll(wq, EPOLLHUP | POLLFREE);'. However, that has a bug: wake_up_poll() calls __wake_up() with nr_exclusive=1. Therefore, if there are multiple "exclusive" waiters, and the wakeup function for the first one returns a positive value, only that one will be called. That's *not* what's needed for POLLFREE; POLLFREE is special in that it really needs to wake up everyone. Considering the three non-blocking poll systems: - io_uring poll doesn't handle POLLFREE at all, so it is broken anyway. - aio poll is unaffected, since it doesn't support exclusive waits. However, that's fragile, as someone could add this feature later. - epoll doesn't appear to be broken by this, since its wakeup function returns 0 when it sees POLLFREE. But this is fragile. Although there is a workaround (see epoll), it's better to define a function which always sends POLLFREE to all waiters. Add such a function. Also make it verify that the queue really becomes empty after all waiters have been woken up. Reported-by: Linus Torvalds Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20211209010455.42744-2-ebiggers@kernel.org Signed-off-by: Eric Biggers --- include/linux/wait.h | 26 ++++++++++++++++++++++++++ kernel/sched/wait.c | 7 +++++++ 2 files changed, 33 insertions(+) diff --git a/include/linux/wait.h b/include/linux/wait.h index 2d0df57c9902..851e07da2583 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -217,6 +217,7 @@ void __wake_up_sync_key(struct wait_queue_head *wq_head, unsigned int mode, void void __wake_up_locked_sync_key(struct wait_queue_head *wq_head, unsigned int mode, void *key); void __wake_up_locked(struct wait_queue_head *wq_head, unsigned int mode, int nr); void __wake_up_sync(struct wait_queue_head *wq_head, unsigned int mode); +void __wake_up_pollfree(struct wait_queue_head *wq_head); #define wake_up(x) __wake_up(x, TASK_NORMAL, 1, NULL) #define wake_up_nr(x, nr) __wake_up(x, TASK_NORMAL, nr, NULL) @@ -245,6 +246,31 @@ void __wake_up_sync(struct wait_queue_head *wq_head, unsigned int mode); #define wake_up_interruptible_sync_poll_locked(x, m) \ __wake_up_locked_sync_key((x), TASK_INTERRUPTIBLE, poll_to_key(m)) +/** + * wake_up_pollfree - signal that a polled waitqueue is going away + * @wq_head: the wait queue head + * + * In the very rare cases where a ->poll() implementation uses a waitqueue whose + * lifetime is tied to a task rather than to the 'struct file' being polled, + * this function must be called before the waitqueue is freed so that + * non-blocking polls (e.g. epoll) are notified that the queue is going away. + * + * The caller must also RCU-delay the freeing of the wait_queue_head, e.g. via + * an explicit synchronize_rcu() or call_rcu(), or via SLAB_TYPESAFE_BY_RCU. + */ +static inline void wake_up_pollfree(struct wait_queue_head *wq_head) +{ + /* + * For performance reasons, we don't always take the queue lock here. + * Therefore, we might race with someone removing the last entry from + * the queue, and proceed while they still hold the queue lock. + * However, rcu_read_lock() is required to be held in such cases, so we + * can safely proceed with an RCU-delayed free. + */ + if (waitqueue_active(wq_head)) + __wake_up_pollfree(wq_head); +} + #define ___wait_cond_timeout(condition) \ ({ \ bool __cond = (condition); \ diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c index 76577d1642a5..eca38107b32f 100644 --- a/kernel/sched/wait.c +++ b/kernel/sched/wait.c @@ -238,6 +238,13 @@ void __wake_up_sync(struct wait_queue_head *wq_head, unsigned int mode) } EXPORT_SYMBOL_GPL(__wake_up_sync); /* For internal use only */ +void __wake_up_pollfree(struct wait_queue_head *wq_head) +{ + __wake_up(wq_head, TASK_NORMAL, 0, poll_to_key(EPOLLHUP | POLLFREE)); + /* POLLFREE must have cleared the queue. */ + WARN_ON_ONCE(waitqueue_active(wq_head)); +} + /* * Note: we use "set_current_state()" _after_ the wait-queue add, * because we need a memory barrier there on SMP, so that any From a880b28a71e39013e357fd3adccd1d8a31bc69a8 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 8 Dec 2021 17:04:52 -0800 Subject: [PATCH 352/868] binder: use wake_up_pollfree() wake_up_poll() uses nr_exclusive=1, so it's not guaranteed to wake up all exclusive waiters. Yet, POLLFREE *must* wake up all waiters. epoll and aio poll are fortunately not affected by this, but it's very fragile. Thus, the new function wake_up_pollfree() has been introduced. Convert binder to use wake_up_pollfree(). Reported-by: Linus Torvalds Fixes: f5cb779ba163 ("ANDROID: binder: remove waitqueue when thread exits.") Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20211209010455.42744-3-ebiggers@kernel.org Signed-off-by: Eric Biggers --- drivers/android/binder.c | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index cffbe57a8e08..c75fb600740c 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -4422,23 +4422,20 @@ static int binder_thread_release(struct binder_proc *proc, __release(&t->lock); /* - * If this thread used poll, make sure we remove the waitqueue - * from any epoll data structures holding it with POLLFREE. - * waitqueue_active() is safe to use here because we're holding - * the inner lock. + * If this thread used poll, make sure we remove the waitqueue from any + * poll data structures holding it. */ - if ((thread->looper & BINDER_LOOPER_STATE_POLL) && - waitqueue_active(&thread->wait)) { - wake_up_poll(&thread->wait, EPOLLHUP | POLLFREE); - } + if (thread->looper & BINDER_LOOPER_STATE_POLL) + wake_up_pollfree(&thread->wait); binder_inner_proc_unlock(thread->proc); /* - * This is needed to avoid races between wake_up_poll() above and - * and ep_remove_waitqueue() called for other reasons (eg the epoll file - * descriptor being closed); ep_remove_waitqueue() holds an RCU read - * lock, so we can be sure it's done after calling synchronize_rcu(). + * This is needed to avoid races between wake_up_pollfree() above and + * someone else removing the last entry from the queue for other reasons + * (e.g. ep_remove_wait_queue() being called due to an epoll file + * descriptor being closed). Such other users hold an RCU read lock, so + * we can be sure they're done after we call synchronize_rcu(). */ if (thread->looper & BINDER_LOOPER_STATE_POLL) synchronize_rcu(); From 9537bae0da1f8d1e2361ab6d0479e8af7824e160 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 8 Dec 2021 17:04:53 -0800 Subject: [PATCH 353/868] signalfd: use wake_up_pollfree() wake_up_poll() uses nr_exclusive=1, so it's not guaranteed to wake up all exclusive waiters. Yet, POLLFREE *must* wake up all waiters. epoll and aio poll are fortunately not affected by this, but it's very fragile. Thus, the new function wake_up_pollfree() has been introduced. Convert signalfd to use wake_up_pollfree(). Reported-by: Linus Torvalds Fixes: d80e731ecab4 ("epoll: introduce POLLFREE to flush ->signalfd_wqh before kfree()") Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20211209010455.42744-4-ebiggers@kernel.org Signed-off-by: Eric Biggers --- fs/signalfd.c | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/fs/signalfd.c b/fs/signalfd.c index 040e1cf90528..65ce0e72e7b9 100644 --- a/fs/signalfd.c +++ b/fs/signalfd.c @@ -35,17 +35,7 @@ void signalfd_cleanup(struct sighand_struct *sighand) { - wait_queue_head_t *wqh = &sighand->signalfd_wqh; - /* - * The lockless check can race with remove_wait_queue() in progress, - * but in this case its caller should run under rcu_read_lock() and - * sighand_cachep is SLAB_TYPESAFE_BY_RCU, we can safely return. - */ - if (likely(!waitqueue_active(wqh))) - return; - - /* wait_queue_entry_t->func(POLLFREE) should do remove_wait_queue() */ - wake_up_poll(wqh, EPOLLHUP | POLLFREE); + wake_up_pollfree(&sighand->signalfd_wqh); } struct signalfd_ctx { From 363bee27e25804d8981dd1c025b4ad49dc39c530 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 8 Dec 2021 17:04:54 -0800 Subject: [PATCH 354/868] aio: keep poll requests on waitqueue until completed Currently, aio_poll_wake() will always remove the poll request from the waitqueue. Then, if aio_poll_complete_work() sees that none of the polled events are ready and the request isn't cancelled, it re-adds the request to the waitqueue. (This can easily happen when polling a file that doesn't pass an event mask when waking up its waitqueue.) This is fundamentally broken for two reasons: 1. If a wakeup occurs between vfs_poll() and the request being re-added to the waitqueue, it will be missed because the request wasn't on the waitqueue at the time. Therefore, IOCB_CMD_POLL might never complete even if the polled file is ready. 2. When the request isn't on the waitqueue, there is no way to be notified that the waitqueue is being freed (which happens when its lifetime is shorter than the struct file's). This is supposed to happen via the waitqueue entries being woken up with POLLFREE. Therefore, leave the requests on the waitqueue until they are actually completed (or cancelled). To keep track of when aio_poll_complete_work needs to be scheduled, use new fields in struct poll_iocb. Remove the 'done' field which is now redundant. Note that this is consistent with how sys_poll() and eventpoll work; their wakeup functions do *not* remove the waitqueue entries. Fixes: 2c14fa838cbe ("aio: implement IOCB_CMD_POLL") Cc: # v4.18+ Link: https://lore.kernel.org/r/20211209010455.42744-5-ebiggers@kernel.org Signed-off-by: Eric Biggers --- fs/aio.c | 83 ++++++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 63 insertions(+), 20 deletions(-) diff --git a/fs/aio.c b/fs/aio.c index 9c81cf611d65..2bc1352a83d8 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -181,8 +181,9 @@ struct poll_iocb { struct file *file; struct wait_queue_head *head; __poll_t events; - bool done; bool cancelled; + bool work_scheduled; + bool work_need_resched; struct wait_queue_entry wait; struct work_struct work; }; @@ -1638,14 +1639,26 @@ static void aio_poll_complete_work(struct work_struct *work) * avoid further branches in the fast path. */ spin_lock_irq(&ctx->ctx_lock); + spin_lock(&req->head->lock); if (!mask && !READ_ONCE(req->cancelled)) { - add_wait_queue(req->head, &req->wait); + /* + * The request isn't actually ready to be completed yet. + * Reschedule completion if another wakeup came in. + */ + if (req->work_need_resched) { + schedule_work(&req->work); + req->work_need_resched = false; + } else { + req->work_scheduled = false; + } + spin_unlock(&req->head->lock); spin_unlock_irq(&ctx->ctx_lock); return; } + list_del_init(&req->wait.entry); + spin_unlock(&req->head->lock); list_del_init(&iocb->ki_list); iocb->ki_res.res = mangle_poll(mask); - req->done = true; spin_unlock_irq(&ctx->ctx_lock); iocb_put(iocb); @@ -1659,9 +1672,9 @@ static int aio_poll_cancel(struct kiocb *iocb) spin_lock(&req->head->lock); WRITE_ONCE(req->cancelled, true); - if (!list_empty(&req->wait.entry)) { - list_del_init(&req->wait.entry); + if (!req->work_scheduled) { schedule_work(&aiocb->poll.work); + req->work_scheduled = true; } spin_unlock(&req->head->lock); @@ -1680,20 +1693,26 @@ static int aio_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync, if (mask && !(mask & req->events)) return 0; - list_del_init(&req->wait.entry); - - if (mask && spin_trylock_irqsave(&iocb->ki_ctx->ctx_lock, flags)) { + /* + * Complete the request inline if possible. This requires that three + * conditions be met: + * 1. An event mask must have been passed. If a plain wakeup was done + * instead, then mask == 0 and we have to call vfs_poll() to get + * the events, so inline completion isn't possible. + * 2. The completion work must not have already been scheduled. + * 3. ctx_lock must not be busy. We have to use trylock because we + * already hold the waitqueue lock, so this inverts the normal + * locking order. Use irqsave/irqrestore because not all + * filesystems (e.g. fuse) call this function with IRQs disabled, + * yet IRQs have to be disabled before ctx_lock is obtained. + */ + if (mask && !req->work_scheduled && + spin_trylock_irqsave(&iocb->ki_ctx->ctx_lock, flags)) { struct kioctx *ctx = iocb->ki_ctx; - /* - * Try to complete the iocb inline if we can. Use - * irqsave/irqrestore because not all filesystems (e.g. fuse) - * call this function with IRQs disabled and because IRQs - * have to be disabled before ctx_lock is obtained. - */ + list_del_init(&req->wait.entry); list_del(&iocb->ki_list); iocb->ki_res.res = mangle_poll(mask); - req->done = true; if (iocb->ki_eventfd && eventfd_signal_allowed()) { iocb = NULL; INIT_WORK(&req->work, aio_poll_put_work); @@ -1703,7 +1722,20 @@ static int aio_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync, if (iocb) iocb_put(iocb); } else { - schedule_work(&req->work); + /* + * Schedule the completion work if needed. If it was already + * scheduled, record that another wakeup came in. + * + * Don't remove the request from the waitqueue here, as it might + * not actually be complete yet (we won't know until vfs_poll() + * is called), and we must not miss any wakeups. + */ + if (req->work_scheduled) { + req->work_need_resched = true; + } else { + schedule_work(&req->work); + req->work_scheduled = true; + } } return 1; } @@ -1750,8 +1782,9 @@ static int aio_poll(struct aio_kiocb *aiocb, const struct iocb *iocb) req->events = demangle_poll(iocb->aio_buf) | EPOLLERR | EPOLLHUP; req->head = NULL; - req->done = false; req->cancelled = false; + req->work_scheduled = false; + req->work_need_resched = false; apt.pt._qproc = aio_poll_queue_proc; apt.pt._key = req->events; @@ -1766,17 +1799,27 @@ static int aio_poll(struct aio_kiocb *aiocb, const struct iocb *iocb) spin_lock_irq(&ctx->ctx_lock); if (likely(req->head)) { spin_lock(&req->head->lock); - if (unlikely(list_empty(&req->wait.entry))) { - if (apt.error) + if (list_empty(&req->wait.entry) || req->work_scheduled) { + /* + * aio_poll_wake() already either scheduled the async + * completion work, or completed the request inline. + */ + if (apt.error) /* unsupported case: multiple queues */ cancel = true; apt.error = 0; mask = 0; } if (mask || apt.error) { + /* Steal to complete synchronously. */ list_del_init(&req->wait.entry); } else if (cancel) { + /* Cancel if possible (may be too late though). */ WRITE_ONCE(req->cancelled, true); - } else if (!req->done) { /* actually waiting for an event */ + } else if (!list_empty(&req->wait.entry)) { + /* + * Actually waiting for an event, so add the request to + * active_reqs so that it can be cancelled if needed. + */ list_add_tail(&aiocb->ki_list, &ctx->active_reqs); aiocb->ki_cancel = aio_poll_cancel; } From 50252e4b5e989ce64555c7aef7516bdefc2fea72 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 8 Dec 2021 17:04:55 -0800 Subject: [PATCH 355/868] aio: fix use-after-free due to missing POLLFREE handling signalfd_poll() and binder_poll() are special in that they use a waitqueue whose lifetime is the current task, rather than the struct file as is normally the case. This is okay for blocking polls, since a blocking poll occurs within one task; however, non-blocking polls require another solution. This solution is for the queue to be cleared before it is freed, by sending a POLLFREE notification to all waiters. Unfortunately, only eventpoll handles POLLFREE. A second type of non-blocking poll, aio poll, was added in kernel v4.18, and it doesn't handle POLLFREE. This allows a use-after-free to occur if a signalfd or binder fd is polled with aio poll, and the waitqueue gets freed. Fix this by making aio poll handle POLLFREE. A patch by Ramji Jiyani (https://lore.kernel.org/r/20211027011834.2497484-1-ramjiyani@google.com) tried to do this by making aio_poll_wake() always complete the request inline if POLLFREE is seen. However, that solution had two bugs. First, it introduced a deadlock, as it unconditionally locked the aio context while holding the waitqueue lock, which inverts the normal locking order. Second, it didn't consider that POLLFREE notifications are missed while the request has been temporarily de-queued. The second problem was solved by my previous patch. This patch then properly fixes the use-after-free by handling POLLFREE in a deadlock-free way. It does this by taking advantage of the fact that freeing of the waitqueue is RCU-delayed, similar to what eventpoll does. Fixes: 2c14fa838cbe ("aio: implement IOCB_CMD_POLL") Cc: # v4.18+ Link: https://lore.kernel.org/r/20211209010455.42744-6-ebiggers@kernel.org Signed-off-by: Eric Biggers --- fs/aio.c | 137 ++++++++++++++++++++++++-------- include/uapi/asm-generic/poll.h | 2 +- 2 files changed, 107 insertions(+), 32 deletions(-) diff --git a/fs/aio.c b/fs/aio.c index 2bc1352a83d8..c9bb0d3d8593 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -1620,6 +1620,51 @@ static void aio_poll_put_work(struct work_struct *work) iocb_put(iocb); } +/* + * Safely lock the waitqueue which the request is on, synchronizing with the + * case where the ->poll() provider decides to free its waitqueue early. + * + * Returns true on success, meaning that req->head->lock was locked, req->wait + * is on req->head, and an RCU read lock was taken. Returns false if the + * request was already removed from its waitqueue (which might no longer exist). + */ +static bool poll_iocb_lock_wq(struct poll_iocb *req) +{ + wait_queue_head_t *head; + + /* + * While we hold the waitqueue lock and the waitqueue is nonempty, + * wake_up_pollfree() will wait for us. However, taking the waitqueue + * lock in the first place can race with the waitqueue being freed. + * + * We solve this as eventpoll does: by taking advantage of the fact that + * all users of wake_up_pollfree() will RCU-delay the actual free. If + * we enter rcu_read_lock() and see that the pointer to the queue is + * non-NULL, we can then lock it without the memory being freed out from + * under us, then check whether the request is still on the queue. + * + * Keep holding rcu_read_lock() as long as we hold the queue lock, in + * case the caller deletes the entry from the queue, leaving it empty. + * In that case, only RCU prevents the queue memory from being freed. + */ + rcu_read_lock(); + head = smp_load_acquire(&req->head); + if (head) { + spin_lock(&head->lock); + if (!list_empty(&req->wait.entry)) + return true; + spin_unlock(&head->lock); + } + rcu_read_unlock(); + return false; +} + +static void poll_iocb_unlock_wq(struct poll_iocb *req) +{ + spin_unlock(&req->head->lock); + rcu_read_unlock(); +} + static void aio_poll_complete_work(struct work_struct *work) { struct poll_iocb *req = container_of(work, struct poll_iocb, work); @@ -1639,24 +1684,25 @@ static void aio_poll_complete_work(struct work_struct *work) * avoid further branches in the fast path. */ spin_lock_irq(&ctx->ctx_lock); - spin_lock(&req->head->lock); - if (!mask && !READ_ONCE(req->cancelled)) { - /* - * The request isn't actually ready to be completed yet. - * Reschedule completion if another wakeup came in. - */ - if (req->work_need_resched) { - schedule_work(&req->work); - req->work_need_resched = false; - } else { - req->work_scheduled = false; + if (poll_iocb_lock_wq(req)) { + if (!mask && !READ_ONCE(req->cancelled)) { + /* + * The request isn't actually ready to be completed yet. + * Reschedule completion if another wakeup came in. + */ + if (req->work_need_resched) { + schedule_work(&req->work); + req->work_need_resched = false; + } else { + req->work_scheduled = false; + } + poll_iocb_unlock_wq(req); + spin_unlock_irq(&ctx->ctx_lock); + return; } - spin_unlock(&req->head->lock); - spin_unlock_irq(&ctx->ctx_lock); - return; - } - list_del_init(&req->wait.entry); - spin_unlock(&req->head->lock); + list_del_init(&req->wait.entry); + poll_iocb_unlock_wq(req); + } /* else, POLLFREE has freed the waitqueue, so we must complete */ list_del_init(&iocb->ki_list); iocb->ki_res.res = mangle_poll(mask); spin_unlock_irq(&ctx->ctx_lock); @@ -1670,13 +1716,14 @@ static int aio_poll_cancel(struct kiocb *iocb) struct aio_kiocb *aiocb = container_of(iocb, struct aio_kiocb, rw); struct poll_iocb *req = &aiocb->poll; - spin_lock(&req->head->lock); - WRITE_ONCE(req->cancelled, true); - if (!req->work_scheduled) { - schedule_work(&aiocb->poll.work); - req->work_scheduled = true; - } - spin_unlock(&req->head->lock); + if (poll_iocb_lock_wq(req)) { + WRITE_ONCE(req->cancelled, true); + if (!req->work_scheduled) { + schedule_work(&aiocb->poll.work); + req->work_scheduled = true; + } + poll_iocb_unlock_wq(req); + } /* else, the request was force-cancelled by POLLFREE already */ return 0; } @@ -1728,7 +1775,8 @@ static int aio_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync, * * Don't remove the request from the waitqueue here, as it might * not actually be complete yet (we won't know until vfs_poll() - * is called), and we must not miss any wakeups. + * is called), and we must not miss any wakeups. POLLFREE is an + * exception to this; see below. */ if (req->work_scheduled) { req->work_need_resched = true; @@ -1736,6 +1784,28 @@ static int aio_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync, schedule_work(&req->work); req->work_scheduled = true; } + + /* + * If the waitqueue is being freed early but we can't complete + * the request inline, we have to tear down the request as best + * we can. That means immediately removing the request from its + * waitqueue and preventing all further accesses to the + * waitqueue via the request. We also need to schedule the + * completion work (done above). Also mark the request as + * cancelled, to potentially skip an unneeded call to ->poll(). + */ + if (mask & POLLFREE) { + WRITE_ONCE(req->cancelled, true); + list_del_init(&req->wait.entry); + + /* + * Careful: this *must* be the last step, since as soon + * as req->head is NULL'ed out, the request can be + * completed and freed, since aio_poll_complete_work() + * will no longer need to take the waitqueue lock. + */ + smp_store_release(&req->head, NULL); + } } return 1; } @@ -1743,6 +1813,7 @@ static int aio_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync, struct aio_poll_table { struct poll_table_struct pt; struct aio_kiocb *iocb; + bool queued; int error; }; @@ -1753,11 +1824,12 @@ aio_poll_queue_proc(struct file *file, struct wait_queue_head *head, struct aio_poll_table *pt = container_of(p, struct aio_poll_table, pt); /* multiple wait queues per file are not supported */ - if (unlikely(pt->iocb->poll.head)) { + if (unlikely(pt->queued)) { pt->error = -EINVAL; return; } + pt->queued = true; pt->error = 0; pt->iocb->poll.head = head; add_wait_queue(head, &pt->iocb->poll.wait); @@ -1789,6 +1861,7 @@ static int aio_poll(struct aio_kiocb *aiocb, const struct iocb *iocb) apt.pt._qproc = aio_poll_queue_proc; apt.pt._key = req->events; apt.iocb = aiocb; + apt.queued = false; apt.error = -EINVAL; /* same as no support for IOCB_CMD_POLL */ /* initialized the list so that we can do list_empty checks */ @@ -1797,9 +1870,10 @@ static int aio_poll(struct aio_kiocb *aiocb, const struct iocb *iocb) mask = vfs_poll(req->file, &apt.pt) & req->events; spin_lock_irq(&ctx->ctx_lock); - if (likely(req->head)) { - spin_lock(&req->head->lock); - if (list_empty(&req->wait.entry) || req->work_scheduled) { + if (likely(apt.queued)) { + bool on_queue = poll_iocb_lock_wq(req); + + if (!on_queue || req->work_scheduled) { /* * aio_poll_wake() already either scheduled the async * completion work, or completed the request inline. @@ -1815,7 +1889,7 @@ static int aio_poll(struct aio_kiocb *aiocb, const struct iocb *iocb) } else if (cancel) { /* Cancel if possible (may be too late though). */ WRITE_ONCE(req->cancelled, true); - } else if (!list_empty(&req->wait.entry)) { + } else if (on_queue) { /* * Actually waiting for an event, so add the request to * active_reqs so that it can be cancelled if needed. @@ -1823,7 +1897,8 @@ static int aio_poll(struct aio_kiocb *aiocb, const struct iocb *iocb) list_add_tail(&aiocb->ki_list, &ctx->active_reqs); aiocb->ki_cancel = aio_poll_cancel; } - spin_unlock(&req->head->lock); + if (on_queue) + poll_iocb_unlock_wq(req); } if (mask) { /* no async, we'd stolen it */ aiocb->ki_res.res = mangle_poll(mask); diff --git a/include/uapi/asm-generic/poll.h b/include/uapi/asm-generic/poll.h index 41b509f410bf..f9c520ce4bf4 100644 --- a/include/uapi/asm-generic/poll.h +++ b/include/uapi/asm-generic/poll.h @@ -29,7 +29,7 @@ #define POLLRDHUP 0x2000 #endif -#define POLLFREE (__force __poll_t)0x4000 /* currently only for epoll */ +#define POLLFREE (__force __poll_t)0x4000 #define POLL_BUSY_LOOP (__force __poll_t)0x8000 From 4b3749865374899e115aa8c48681709b086fe6d3 Mon Sep 17 00:00:00 2001 From: Xie Yongji Date: Mon, 13 Sep 2021 19:19:28 +0800 Subject: [PATCH 356/868] aio: Fix incorrect usage of eventfd_signal_allowed() We should defer eventfd_signal() to the workqueue when eventfd_signal_allowed() return false rather than return true. Fixes: b542e383d8c0 ("eventfd: Make signal recursion protection a task bit") Signed-off-by: Xie Yongji Link: https://lore.kernel.org/r/20210913111928.98-1-xieyongji@bytedance.com Reviewed-by: Eric Biggers Signed-off-by: Eric Biggers --- fs/aio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/aio.c b/fs/aio.c index c9bb0d3d8593..f6f1cbffef9e 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -1760,7 +1760,7 @@ static int aio_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync, list_del_init(&req->wait.entry); list_del(&iocb->ki_list); iocb->ki_res.res = mangle_poll(mask); - if (iocb->ki_eventfd && eventfd_signal_allowed()) { + if (iocb->ki_eventfd && !eventfd_signal_allowed()) { iocb = NULL; INIT_WORK(&req->work, aio_poll_put_work); schedule_work(&req->work); From 6a97cee39d8f2ed4d6e35a09a302dae1d566db36 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Tue, 7 Dec 2021 09:43:41 -0800 Subject: [PATCH 357/868] Revert "usb: dwc3: dwc3-qcom: Enable tx-fifo-resize property by default" This reverts commit cefdd52fa0455c0555c30927386ee466a108b060. On sc7180-trogdor class devices with 'fw_devlink=permissive' and KASAN enabled, you'll see a Use-After-Free reported at bootup. The root of the problem is that dwc3_qcom_of_register_core() is adding a devm-allocated "tx-fifo-resize" property to its device tree node using of_add_property(). The issue is that of_add_property() makes a _permanent_ addition to the device tree that lasts until reboot. That means allocating memory for the property using "devm" managed memory is a terrible idea since that memory will be freed upon probe deferral or device unbinding. Let's revert the patch since the system is still functional without it. The fact that of_add_property() makes a permanent change is extra fodder for those folks who were aruging that the device tree isn't really the right way to pass information between parts of the driver. It is an exercise left to the reader to submit a patch re-adding the new feature in a way that makes everyone happier. Fixes: cefdd52fa045 ("usb: dwc3: dwc3-qcom: Enable tx-fifo-resize property by default") Cc: stable Reviewed-by: Stephen Boyd Signed-off-by: Douglas Anderson Link: https://lore.kernel.org/r/20211207094327.1.Ie3cde3443039342e2963262a4c3ac36dc2c08b30@changeid Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/dwc3-qcom.c | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/drivers/usb/dwc3/dwc3-qcom.c b/drivers/usb/dwc3/dwc3-qcom.c index 9abbd01028c5..3cb01cdd02c2 100644 --- a/drivers/usb/dwc3/dwc3-qcom.c +++ b/drivers/usb/dwc3/dwc3-qcom.c @@ -649,7 +649,6 @@ static int dwc3_qcom_of_register_core(struct platform_device *pdev) struct dwc3_qcom *qcom = platform_get_drvdata(pdev); struct device_node *np = pdev->dev.of_node, *dwc3_np; struct device *dev = &pdev->dev; - struct property *prop; int ret; dwc3_np = of_get_compatible_child(np, "snps,dwc3"); @@ -658,20 +657,6 @@ static int dwc3_qcom_of_register_core(struct platform_device *pdev) return -ENODEV; } - prop = devm_kzalloc(dev, sizeof(*prop), GFP_KERNEL); - if (!prop) { - ret = -ENOMEM; - dev_err(dev, "unable to allocate memory for property\n"); - goto node_put; - } - - prop->name = "tx-fifo-resize"; - ret = of_add_property(dwc3_np, prop); - if (ret) { - dev_err(dev, "unable to add property\n"); - goto node_put; - } - ret = of_platform_populate(np, NULL, NULL, dev); if (ret) { dev_err(dev, "failed to register dwc3 core - %d\n", ret); From a4f1192cb53758a7210ed5a9ee695aeba22f75fb Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 9 Dec 2021 14:30:33 +0200 Subject: [PATCH 358/868] percpu_ref: Replace kernel.h with the necessary inclusions When kernel.h is used in the headers it adds a lot into dependency hell, especially when there are circular dependencies are involved. Replace kernel.h inclusion with the list of what is really being used. Signed-off-by: Andy Shevchenko Signed-off-by: Dennis Zhou --- include/linux/percpu-refcount.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/percpu-refcount.h b/include/linux/percpu-refcount.h index b31d3f3312ce..d73a1c08c3e3 100644 --- a/include/linux/percpu-refcount.h +++ b/include/linux/percpu-refcount.h @@ -51,9 +51,9 @@ #define _LINUX_PERCPU_REFCOUNT_H #include -#include #include #include +#include #include struct percpu_ref; From 59ec71575ab440cd5ca0aa53b2a2985b3639fad4 Mon Sep 17 00:00:00 2001 From: Alexey Gladkov Date: Mon, 29 Nov 2021 21:37:25 +0100 Subject: [PATCH 359/868] ucounts: Fix rlimit max values check The semantics of the rlimit max values differs from ucounts itself. When creating a new userns, we store the current rlimit of the process in ucount_max. Thus, the value of the limit in the parent userns is saved in the created one. The problem is that now we are taking the maximum value for counter from the same userns. So for init_user_ns it will always be RLIM_INFINITY. To fix the problem we need to check the counter value with the max value stored in userns. Reproducer: su - test -c "ulimit -u 3; sleep 5 & sleep 6 & unshare -U --map-root-user sh -c 'sleep 7 & sleep 8 & date; wait'" Before: [1] 175 [2] 176 Fri Nov 26 13:48:20 UTC 2021 [1]- Done sleep 5 [2]+ Done sleep 6 After: [1] 167 [2] 168 sh: fork: retry: Resource temporarily unavailable sh: fork: retry: Resource temporarily unavailable sh: fork: retry: Resource temporarily unavailable sh: fork: retry: Resource temporarily unavailable sh: fork: retry: Resource temporarily unavailable sh: fork: retry: Resource temporarily unavailable sh: fork: retry: Resource temporarily unavailable sh: fork: Interrupted system call [1]- Done sleep 5 [2]+ Done sleep 6 Fixes: c54b245d0118 ("Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace") Reported-by: Gleb Fotengauer-Malinovskiy Signed-off-by: "Eric W. Biederman" Signed-off-by: Alexey Gladkov Link: https://lkml.kernel.org/r/024ec805f6e16896f0b23e094773790d171d2c1c.1638218242.git.legion@kernel.org Signed-off-by: Eric W. Biederman --- kernel/ucount.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/kernel/ucount.c b/kernel/ucount.c index 4f5613dac227..7b32c356ebc5 100644 --- a/kernel/ucount.c +++ b/kernel/ucount.c @@ -264,15 +264,16 @@ void dec_ucount(struct ucounts *ucounts, enum ucount_type type) long inc_rlimit_ucounts(struct ucounts *ucounts, enum ucount_type type, long v) { struct ucounts *iter; + long max = LONG_MAX; long ret = 0; for (iter = ucounts; iter; iter = iter->ns->ucounts) { - long max = READ_ONCE(iter->ns->ucount_max[type]); long new = atomic_long_add_return(v, &iter->ucount[type]); if (new < 0 || new > max) ret = LONG_MAX; else if (iter == ucounts) ret = new; + max = READ_ONCE(iter->ns->ucount_max[type]); } return ret; } @@ -312,15 +313,16 @@ long inc_rlimit_get_ucounts(struct ucounts *ucounts, enum ucount_type type) { /* Caller must hold a reference to ucounts */ struct ucounts *iter; + long max = LONG_MAX; long dec, ret = 0; for (iter = ucounts; iter; iter = iter->ns->ucounts) { - long max = READ_ONCE(iter->ns->ucount_max[type]); long new = atomic_long_add_return(1, &iter->ucount[type]); if (new < 0 || new > max) goto unwind; if (iter == ucounts) ret = new; + max = READ_ONCE(iter->ns->ucount_max[type]); /* * Grab an extra ucount reference for the caller when * the rlimit count was previously 0. @@ -339,15 +341,16 @@ unwind: return 0; } -bool is_ucounts_overlimit(struct ucounts *ucounts, enum ucount_type type, unsigned long max) +bool is_ucounts_overlimit(struct ucounts *ucounts, enum ucount_type type, unsigned long rlimit) { struct ucounts *iter; - if (get_ucounts_value(ucounts, type) > max) - return true; + long max = rlimit; + if (rlimit > LONG_MAX) + max = LONG_MAX; for (iter = ucounts; iter; iter = iter->ns->ucounts) { - max = READ_ONCE(iter->ns->ucount_max[type]); if (get_ucounts_value(iter, type) > max) return true; + max = READ_ONCE(iter->ns->ucount_max[type]); } return false; } From 266423e60ea1b953fcc0cd97f3dad85857e434d1 Mon Sep 17 00:00:00 2001 From: Phil Elwell Date: Mon, 6 Dec 2021 09:22:36 +0000 Subject: [PATCH 360/868] pinctrl: bcm2835: Change init order for gpio hogs ...and gpio-ranges pinctrl-bcm2835 is a combined pinctrl/gpio driver. Currently the gpio side is registered first, but this breaks gpio hogs (which are configured during gpiochip_add_data). Part of the hog initialisation is a call to pinctrl_gpio_request, and since the pinctrl driver hasn't yet been registered this results in an -EPROBE_DEFER from which it can never recover. Change the initialisation sequence to register the pinctrl driver first. This also solves a similar problem with the gpio-ranges property, which is required in order for released pins to be returned to inputs. Fixes: 73345a18d464b ("pinctrl: bcm2835: Pass irqchip when adding gpiochip") Signed-off-by: Phil Elwell Reviewed-by: Florian Fainelli Link: https://lore.kernel.org/r/20211206092237.4105895-2-phil@raspberrypi.com Signed-off-by: Linus Walleij --- drivers/pinctrl/bcm/pinctrl-bcm2835.c | 29 +++++++++++++++------------ 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/drivers/pinctrl/bcm/pinctrl-bcm2835.c b/drivers/pinctrl/bcm/pinctrl-bcm2835.c index 2abcc6ce4eba..b607d10e4cbd 100644 --- a/drivers/pinctrl/bcm/pinctrl-bcm2835.c +++ b/drivers/pinctrl/bcm/pinctrl-bcm2835.c @@ -1244,6 +1244,18 @@ static int bcm2835_pinctrl_probe(struct platform_device *pdev) raw_spin_lock_init(&pc->irq_lock[i]); } + pc->pctl_desc = *pdata->pctl_desc; + pc->pctl_dev = devm_pinctrl_register(dev, &pc->pctl_desc, pc); + if (IS_ERR(pc->pctl_dev)) { + gpiochip_remove(&pc->gpio_chip); + return PTR_ERR(pc->pctl_dev); + } + + pc->gpio_range = *pdata->gpio_range; + pc->gpio_range.base = pc->gpio_chip.base; + pc->gpio_range.gc = &pc->gpio_chip; + pinctrl_add_gpio_range(pc->pctl_dev, &pc->gpio_range); + girq = &pc->gpio_chip.irq; girq->chip = &bcm2835_gpio_irq_chip; girq->parent_handler = bcm2835_gpio_irq_handler; @@ -1251,8 +1263,10 @@ static int bcm2835_pinctrl_probe(struct platform_device *pdev) girq->parents = devm_kcalloc(dev, BCM2835_NUM_IRQS, sizeof(*girq->parents), GFP_KERNEL); - if (!girq->parents) + if (!girq->parents) { + pinctrl_remove_gpio_range(pc->pctl_dev, &pc->gpio_range); return -ENOMEM; + } if (is_7211) { pc->wake_irq = devm_kcalloc(dev, BCM2835_NUM_IRQS, @@ -1307,21 +1321,10 @@ static int bcm2835_pinctrl_probe(struct platform_device *pdev) err = gpiochip_add_data(&pc->gpio_chip, pc); if (err) { dev_err(dev, "could not add GPIO chip\n"); + pinctrl_remove_gpio_range(pc->pctl_dev, &pc->gpio_range); return err; } - pc->pctl_desc = *pdata->pctl_desc; - pc->pctl_dev = devm_pinctrl_register(dev, &pc->pctl_desc, pc); - if (IS_ERR(pc->pctl_dev)) { - gpiochip_remove(&pc->gpio_chip); - return PTR_ERR(pc->pctl_dev); - } - - pc->gpio_range = *pdata->gpio_range; - pc->gpio_range.base = pc->gpio_chip.base; - pc->gpio_range.gc = &pc->gpio_chip; - pinctrl_add_gpio_range(pc->pctl_dev, &pc->gpio_range); - return 0; } From 92816e2629808726af015c7f5b14adc8e4f8b147 Mon Sep 17 00:00:00 2001 From: Jie2x Zhou Date: Thu, 9 Dec 2021 10:02:30 +0800 Subject: [PATCH 361/868] selftests: net: Correct ping6 expected rc from 2 to 1 ./fcnal-test.sh -v -t ipv6_ping TEST: ping out, VRF bind - ns-B IPv6 LLA [FAIL] TEST: ping out, VRF bind - multicast IP [FAIL] ping6 is failing as it should. COMMAND: ip netns exec ns-A /bin/ping6 -c1 -w1 fe80::7c4c:bcff:fe66:a63a%red strace of ping6 shows it is failing with '1', so change the expected rc from 2 to 1. Fixes: c0644e71df33 ("selftests: Add ipv6 ping tests to fcnal-test") Reported-by: kernel test robot Suggested-by: David Ahern Signed-off-by: Jie2x Zhou Link: https://lore.kernel.org/r/20211209020230.37270-1-jie2x.zhou@intel.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/fcnal-test.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/fcnal-test.sh b/tools/testing/selftests/net/fcnal-test.sh index a1da013d847b..29cc72d7c3d0 100755 --- a/tools/testing/selftests/net/fcnal-test.sh +++ b/tools/testing/selftests/net/fcnal-test.sh @@ -2191,7 +2191,7 @@ ipv6_ping_vrf() log_start show_hint "Fails since VRF device does not support linklocal or multicast" run_cmd ${ping6} -c1 -w1 ${a} - log_test_addr ${a} $? 2 "ping out, VRF bind" + log_test_addr ${a} $? 1 "ping out, VRF bind" done for a in ${NSB_IP6} ${NSB_LO_IP6} ${NSB_LINKIP6}%${NSA_DEV} ${MCAST}%${NSA_DEV} From 3fd6e12a401ead0345e4b7e6a73e117f0713e0c1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Exp=C3=B3sito?= Date: Thu, 9 Dec 2021 21:18:13 -0800 Subject: [PATCH 362/868] Input: goodix - fix memory leak in goodix_firmware_upload MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Addresses-Coverity-ID: 1493934 ("Resource leak") Signed-off-by: José Expósito Link: https://lore.kernel.org/r/20211208173321.26659-1-jose.exposito89@gmail.com Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/goodix_fwupload.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/input/touchscreen/goodix_fwupload.c b/drivers/input/touchscreen/goodix_fwupload.c index c1e7a2413078..191d4f38d991 100644 --- a/drivers/input/touchscreen/goodix_fwupload.c +++ b/drivers/input/touchscreen/goodix_fwupload.c @@ -207,7 +207,7 @@ static int goodix_firmware_upload(struct goodix_ts_data *ts) error = goodix_reset_no_int_sync(ts); if (error) - return error; + goto release; error = goodix_enter_upload_mode(ts->client); if (error) From c3fbab7767c53397d7b849799474f5a27cf306e6 Mon Sep 17 00:00:00 2001 From: Ye Guojin Date: Tue, 9 Nov 2021 05:59:58 +0000 Subject: [PATCH 363/868] irqchip/irq-bcm7120-l2: Add put_device() after of_find_device_by_node() This was found by coccicheck: ./drivers/irqchip/irq-bcm7120-l2.c,328,1-7,ERROR missing put_device; call of_find_device_by_node on line 234, but without a corresponding object release within this function. ./drivers/irqchip/irq-bcm7120-l2.c,341,1-7,ERROR missing put_device; call of_find_device_by_node on line 234, but without a corresponding object release within this function. Reported-by: Zeal Robot Signed-off-by: Ye Guojin Reviewed-by: Florian Fainelli Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20211109055958.130287-1-ye.guojin@zte.com.cn --- drivers/irqchip/irq-bcm7120-l2.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/irqchip/irq-bcm7120-l2.c b/drivers/irqchip/irq-bcm7120-l2.c index d80e67a6aad2..bb6609cebdbc 100644 --- a/drivers/irqchip/irq-bcm7120-l2.c +++ b/drivers/irqchip/irq-bcm7120-l2.c @@ -238,6 +238,7 @@ static int __init bcm7120_l2_intc_probe(struct device_node *dn, } data->num_parent_irqs = platform_irq_count(pdev); + put_device(&pdev->dev); if (data->num_parent_irqs <= 0) { pr_err("invalid number of parent interrupts\n"); ret = -ENOMEM; From 811ae81320da53a5670c36970cefacca8519f90e Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Fri, 10 Dec 2021 16:17:34 +0200 Subject: [PATCH 364/868] xhci: Remove CONFIG_USB_DEFAULT_PERSIST to prevent xHCI from runtime suspending When the xHCI is quirked with XHCI_RESET_ON_RESUME, runtime resume routine also resets the controller. This is bad for USB drivers without reset_resume callback, because there's no subsequent call of usb_dev_complete() -> usb_resume_complete() to force rebinding the driver to the device. For instance, btusb device stops working after xHCI controller is runtime resumed, if the controlled is quirked with XHCI_RESET_ON_RESUME. So always take XHCI_RESET_ON_RESUME into account to solve the issue. Cc: Signed-off-by: Kai-Heng Feng Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20211210141735.1384209-2-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index 902f410874e8..af92a9f8ed67 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -3934,7 +3934,6 @@ static void xhci_free_dev(struct usb_hcd *hcd, struct usb_device *udev) struct xhci_slot_ctx *slot_ctx; int i, ret; -#ifndef CONFIG_USB_DEFAULT_PERSIST /* * We called pm_runtime_get_noresume when the device was attached. * Decrement the counter here to allow controller to runtime suspend @@ -3942,7 +3941,6 @@ static void xhci_free_dev(struct usb_hcd *hcd, struct usb_device *udev) */ if (xhci->quirks & XHCI_RESET_ON_RESUME) pm_runtime_put_noidle(hcd->self.controller); -#endif ret = xhci_check_args(hcd, udev, NULL, 0, true, __func__); /* If the host is halted due to driver unload, we still need to free the @@ -4094,14 +4092,12 @@ int xhci_alloc_dev(struct usb_hcd *hcd, struct usb_device *udev) xhci_debugfs_create_slot(xhci, slot_id); -#ifndef CONFIG_USB_DEFAULT_PERSIST /* * If resetting upon resume, we can't put the controller into runtime * suspend if there is a device attached. */ if (xhci->quirks & XHCI_RESET_ON_RESUME) pm_runtime_get_noresume(hcd->self.controller); -#endif /* Is this a LS or FS device under a HS hub? */ /* Hub or peripherial? */ From 7faac1953ed1f658f719cdf7bb7303fa5eef822c Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Fri, 10 Dec 2021 16:17:35 +0200 Subject: [PATCH 365/868] xhci: avoid race between disable slot command and host runtime suspend Make xhci_disable_slot() synchronous, thus ensuring it, and xhci_free_dev() calling it return after xHC controller completes the disable slot command. Otherwise the roothub and xHC host may runtime suspend, and clear the command ring while the disable slot command is being processed. This causes a command completion mismatch as the completion event can't be mapped to the correct command. Command ring gets out of sync and commands time out. Driver finally assumes host is unresponsive and bails out. usb 2-4: USB disconnect, device number 10 xhci_hcd 0000:00:0d.0: ERROR mismatched command completion event ... xhci_hcd 0000:00:0d.0: xHCI host controller not responding, assume dead xhci_hcd 0000:00:0d.0: HC died; cleaning up Cc: Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20211210141735.1384209-3-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-hub.c | 1 + drivers/usb/host/xhci-ring.c | 1 - drivers/usb/host/xhci.c | 22 +++++++++++++++------- 3 files changed, 16 insertions(+), 8 deletions(-) diff --git a/drivers/usb/host/xhci-hub.c b/drivers/usb/host/xhci-hub.c index af946c42b6f0..df3522dab31b 100644 --- a/drivers/usb/host/xhci-hub.c +++ b/drivers/usb/host/xhci-hub.c @@ -717,6 +717,7 @@ static int xhci_enter_test_mode(struct xhci_hcd *xhci, continue; retval = xhci_disable_slot(xhci, i); + xhci_free_virt_device(xhci, i); if (retval) xhci_err(xhci, "Failed to disable slot %d, %d. Enter test mode anyway\n", i, retval); diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index eaa49aef2935..d0b6806275e0 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -1525,7 +1525,6 @@ static void xhci_handle_cmd_disable_slot(struct xhci_hcd *xhci, int slot_id) if (xhci->quirks & XHCI_EP_LIMIT_QUIRK) /* Delete default control endpoint resources */ xhci_free_device_endpoint_resources(xhci, virt_dev, true); - xhci_free_virt_device(xhci, slot_id); } static void xhci_handle_cmd_config_ep(struct xhci_hcd *xhci, int slot_id, diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index af92a9f8ed67..f5b1bcc875de 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -3959,9 +3959,8 @@ static void xhci_free_dev(struct usb_hcd *hcd, struct usb_device *udev) del_timer_sync(&virt_dev->eps[i].stop_cmd_timer); } virt_dev->udev = NULL; - ret = xhci_disable_slot(xhci, udev->slot_id); - if (ret) - xhci_free_virt_device(xhci, udev->slot_id); + xhci_disable_slot(xhci, udev->slot_id); + xhci_free_virt_device(xhci, udev->slot_id); } int xhci_disable_slot(struct xhci_hcd *xhci, u32 slot_id) @@ -3971,7 +3970,7 @@ int xhci_disable_slot(struct xhci_hcd *xhci, u32 slot_id) u32 state; int ret = 0; - command = xhci_alloc_command(xhci, false, GFP_KERNEL); + command = xhci_alloc_command(xhci, true, GFP_KERNEL); if (!command) return -ENOMEM; @@ -3996,6 +3995,15 @@ int xhci_disable_slot(struct xhci_hcd *xhci, u32 slot_id) } xhci_ring_cmd_db(xhci); spin_unlock_irqrestore(&xhci->lock, flags); + + wait_for_completion(command->completion); + + if (command->status != COMP_SUCCESS) + xhci_warn(xhci, "Unsuccessful disable slot %u command, status %d\n", + slot_id, command->status); + + xhci_free_command(xhci, command); + return ret; } @@ -4104,9 +4112,8 @@ int xhci_alloc_dev(struct usb_hcd *hcd, struct usb_device *udev) return 1; disable_slot: - ret = xhci_disable_slot(xhci, udev->slot_id); - if (ret) - xhci_free_virt_device(xhci, udev->slot_id); + xhci_disable_slot(xhci, udev->slot_id); + xhci_free_virt_device(xhci, udev->slot_id); return 0; } @@ -4236,6 +4243,7 @@ static int xhci_setup_device(struct usb_hcd *hcd, struct usb_device *udev, mutex_unlock(&xhci->mutex); ret = xhci_disable_slot(xhci, udev->slot_id); + xhci_free_virt_device(xhci, udev->slot_id); if (!ret) xhci_alloc_dev(hcd, udev); kfree(command->completion); From edce10ee21f3916f5da34e55bbc03103c604ba70 Mon Sep 17 00:00:00 2001 From: Philipp Rudo Date: Wed, 8 Dec 2021 14:07:40 +0100 Subject: [PATCH 366/868] s390/kexec_file: print some more error messages Be kind and give some more information on what went wrong. Signed-off-by: Philipp Rudo Link: https://lore.kernel.org/r/20211208130741.5821-2-prudo@redhat.com Signed-off-by: Heiko Carstens --- arch/s390/kernel/machine_kexec_file.c | 29 +++++++++++++++++++++++---- 1 file changed, 25 insertions(+), 4 deletions(-) diff --git a/arch/s390/kernel/machine_kexec_file.c b/arch/s390/kernel/machine_kexec_file.c index 9975ad200d74..a8bfa7c8cbba 100644 --- a/arch/s390/kernel/machine_kexec_file.c +++ b/arch/s390/kernel/machine_kexec_file.c @@ -7,6 +7,8 @@ * Author(s): Philipp Rudo */ +#define pr_fmt(fmt) "kexec: " fmt + #include #include #include @@ -290,9 +292,16 @@ int arch_kexec_apply_relocations_add(struct purgatory_info *pi, const Elf_Shdr *relsec, const Elf_Shdr *symtab) { + const char *strtab, *name, *shstrtab; + const Elf_Shdr *sechdrs; Elf_Rela *relas; int i, r_type; + /* String & section header string table */ + sechdrs = (void *)pi->ehdr + pi->ehdr->e_shoff; + strtab = (char *)pi->ehdr + sechdrs[symtab->sh_link].sh_offset; + shstrtab = (char *)pi->ehdr + sechdrs[pi->ehdr->e_shstrndx].sh_offset; + relas = (void *)pi->ehdr + relsec->sh_offset; for (i = 0; i < relsec->sh_size / sizeof(*relas); i++) { @@ -304,15 +313,27 @@ int arch_kexec_apply_relocations_add(struct purgatory_info *pi, sym = (void *)pi->ehdr + symtab->sh_offset; sym += ELF64_R_SYM(relas[i].r_info); - if (sym->st_shndx == SHN_UNDEF) - return -ENOEXEC; + if (sym->st_name) + name = strtab + sym->st_name; + else + name = shstrtab + sechdrs[sym->st_shndx].sh_name; - if (sym->st_shndx == SHN_COMMON) + if (sym->st_shndx == SHN_UNDEF) { + pr_err("Undefined symbol: %s\n", name); return -ENOEXEC; + } + + if (sym->st_shndx == SHN_COMMON) { + pr_err("symbol '%s' in common section\n", name); + return -ENOEXEC; + } if (sym->st_shndx >= pi->ehdr->e_shnum && - sym->st_shndx != SHN_ABS) + sym->st_shndx != SHN_ABS) { + pr_err("Invalid section %d for symbol %s\n", + sym->st_shndx, name); return -ENOEXEC; + } loc = pi->purgatory_buf; loc += section->sh_offset; From 41967a37b8eedfee15b81406a9f3015be90d3980 Mon Sep 17 00:00:00 2001 From: Philipp Rudo Date: Wed, 8 Dec 2021 14:07:41 +0100 Subject: [PATCH 367/868] s390/kexec_file: fix error handling when applying relocations arch_kexec_apply_relocations_add currently ignores all errors returned by arch_kexec_do_relocs. This means that every unknown relocation is silently skipped causing unpredictable behavior while the relocated code runs. Fix this by checking for errors and fail kexec_file_load if an unknown relocation type is encountered. The problem was found after gcc changed its behavior and used R_390_PLT32DBL relocations for brasl instruction and relied on ld to resolve the relocations in the final link in case direct calls are possible. As the purgatory code is only linked partially (option -r) ld didn't resolve the relocations leaving them for arch_kexec_do_relocs. But arch_kexec_do_relocs doesn't know how to handle R_390_PLT32DBL relocations so they were silently skipped. This ultimately caused an endless loop in the purgatory as the brasl instructions kept branching to itself. Fixes: 71406883fd35 ("s390/kexec_file: Add kexec_file_load system call") Reported-by: Tao Liu Signed-off-by: Philipp Rudo Link: https://lore.kernel.org/r/20211208130741.5821-3-prudo@redhat.com Signed-off-by: Heiko Carstens --- arch/s390/kernel/machine_kexec_file.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/s390/kernel/machine_kexec_file.c b/arch/s390/kernel/machine_kexec_file.c index a8bfa7c8cbba..876cdd3c994e 100644 --- a/arch/s390/kernel/machine_kexec_file.c +++ b/arch/s390/kernel/machine_kexec_file.c @@ -296,6 +296,7 @@ int arch_kexec_apply_relocations_add(struct purgatory_info *pi, const Elf_Shdr *sechdrs; Elf_Rela *relas; int i, r_type; + int ret; /* String & section header string table */ sechdrs = (void *)pi->ehdr + pi->ehdr->e_shoff; @@ -347,7 +348,11 @@ int arch_kexec_apply_relocations_add(struct purgatory_info *pi, addr = section->sh_addr + relas[i].r_offset; r_type = ELF64_R_TYPE(relas[i].r_info); - arch_kexec_do_relocs(r_type, loc, val, addr); + ret = arch_kexec_do_relocs(r_type, loc, val, addr); + if (ret) { + pr_err("Unknown rela relocation: %d\n", r_type); + return -ENOEXEC; + } } return 0; } From ac8fc6af1ab62b2e5d57ddadc8bd4c9433c49a72 Mon Sep 17 00:00:00 2001 From: Jerome Marchand Date: Wed, 8 Dec 2021 16:15:03 +0100 Subject: [PATCH 368/868] s390/ftrace: remove preempt_disable()/preempt_enable() pair It looks like commit ce5e48036c9e76a2 ("ftrace: disable preemption when recursion locked") missed a spot in kprobe_ftrace_handler() in arch/s390/kernel/ftrace.c. Remove the superfluous preempt_disable/enable_notrace() there too. Fixes: ce5e48036c9e76a2 ("ftrace: disable preemption when recursion locked") Signed-off-by: Jerome Marchand Link: https://lore.kernel.org/r/20211208151503.1510381-1-jmarchan@redhat.com Signed-off-by: Heiko Carstens --- arch/s390/kernel/ftrace.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c index 5510c7d10ddc..21d62d8b6b9a 100644 --- a/arch/s390/kernel/ftrace.c +++ b/arch/s390/kernel/ftrace.c @@ -290,7 +290,6 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, return; regs = ftrace_get_regs(fregs); - preempt_disable_notrace(); p = get_kprobe((kprobe_opcode_t *)ip); if (unlikely(!p) || kprobe_disabled(p)) goto out; @@ -318,7 +317,6 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, } __this_cpu_write(current_kprobe, NULL); out: - preempt_enable_notrace(); ftrace_test_recursion_unlock(bit); } NOKPROBE_SYMBOL(kprobe_ftrace_handler); From abf0e8e4ef25478a4390115e6a953d589d1f9ffd Mon Sep 17 00:00:00 2001 From: Alexander Egorenkov Date: Thu, 9 Dec 2021 08:38:17 +0100 Subject: [PATCH 369/868] s390/kexec: handle R_390_PLT32DBL rela in arch_kexec_apply_relocations_add() Starting with gcc 11.3, the C compiler will generate PLT-relative function calls even if they are local and do not require it. Later on during linking, the linker will replace all PLT-relative calls to local functions with PC-relative ones. Unfortunately, the purgatory code of kexec/kdump is not being linked as a regular executable or shared library would have been, and therefore, all PLT-relative addresses remain in the generated purgatory object code unresolved. This leads to the situation where the purgatory code is being executed during kdump with all PLT-relative addresses unresolved. And this results in endless loops within the purgatory code. Furthermore, the clang C compiler has always behaved like described above and this commit should fix kdump for kernels built with the latter. Because the purgatory code is no regular executable or shared library, contains only calls to local functions and has no PLT, all R_390_PLT32DBL relocation entries can be resolved just like a R_390_PC32DBL one. * https://refspecs.linuxfoundation.org/ELF/zSeries/lzsabi0_zSeries/x1633.html#AEN1699 Relocation entries of purgatory code generated with gcc 11.3 ------------------------------------------------------------ $ readelf -r linux/arch/s390/purgatory/purgatory.o Relocation section '.rela.text' at offset 0x370 contains 5 entries: Offset Info Type Sym. Value Sym. Name + Addend 00000000005c 000c00000013 R_390_PC32DBL 0000000000000000 purgatory_sha_regions + 2 00000000007a 000d00000014 R_390_PLT32DBL 0000000000000000 sha256_update + 2 00000000008c 000e00000014 R_390_PLT32DBL 0000000000000000 sha256_final + 2 000000000092 000800000013 R_390_PC32DBL 0000000000000000 .LC0 + 2 0000000000a0 000f00000014 R_390_PLT32DBL 0000000000000000 memcmp + 2 Relocation entries of purgatory code generated with gcc 11.2 ------------------------------------------------------------ $ readelf -r linux/arch/s390/purgatory/purgatory.o Relocation section '.rela.text' at offset 0x368 contains 5 entries: Offset Info Type Sym. Value Sym. Name + Addend 00000000005c 000c00000013 R_390_PC32DBL 0000000000000000 purgatory_sha_regions + 2 00000000007a 000d00000013 R_390_PC32DBL 0000000000000000 sha256_update + 2 00000000008c 000e00000013 R_390_PC32DBL 0000000000000000 sha256_final + 2 000000000092 000800000013 R_390_PC32DBL 0000000000000000 .LC0 + 2 0000000000a0 000f00000013 R_390_PC32DBL 0000000000000000 memcmp + 2 Signed-off-by: Alexander Egorenkov Reported-by: Tao Liu Suggested-by: Philipp Rudo Reviewed-by: Philipp Rudo Cc: Link: https://lore.kernel.org/r/20211209073817.82196-1-egorenar@linux.ibm.com Signed-off-by: Heiko Carstens --- arch/s390/kernel/machine_kexec_file.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/s390/kernel/machine_kexec_file.c b/arch/s390/kernel/machine_kexec_file.c index 876cdd3c994e..8f43575a4dd3 100644 --- a/arch/s390/kernel/machine_kexec_file.c +++ b/arch/s390/kernel/machine_kexec_file.c @@ -348,6 +348,10 @@ int arch_kexec_apply_relocations_add(struct purgatory_info *pi, addr = section->sh_addr + relas[i].r_offset; r_type = ELF64_R_TYPE(relas[i].r_info); + + if (r_type == R_390_PLT32DBL) + r_type = R_390_PC32DBL; + ret = arch_kexec_do_relocs(r_type, loc, val, addr); if (ret) { pr_err("Unknown rela relocation: %d\n", r_type); From 5dcf0c3084eb098bbb702f2f5ee55666047997d4 Mon Sep 17 00:00:00 2001 From: Niklas Schnelle Date: Thu, 9 Dec 2021 15:21:06 +0100 Subject: [PATCH 370/868] s390: enable switchdev support in defconfig The HiperSockets Converged Interface (HSCI) introduced with commit 4e20e73e631a ("s390/qeth: Switchdev event handler") requires CONFIG_SWITCHDEV=y to be usable. Similarly when using Linux controlled SR-IOV capable PF devices with the mlx5_core driver CONFIG_SWITCHDEV=y as well as CONFIG_MLX5_ESWITCH=y are necessary to actually get link on the created VFs. So let's add these to the defconfig to make both types of devices usable. Note also that these options are already enabled in most current distribution kernels. Signed-off-by: Niklas Schnelle Signed-off-by: Heiko Carstens --- arch/s390/configs/debug_defconfig | 2 ++ arch/s390/configs/defconfig | 2 ++ 2 files changed, 4 insertions(+) diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index b626bc6e0eaf..e45cc27716de 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -117,6 +117,7 @@ CONFIG_UNIX=y CONFIG_UNIX_DIAG=m CONFIG_XFRM_USER=m CONFIG_NET_KEY=m +CONFIG_NET_SWITCHDEV=y CONFIG_SMC=m CONFIG_SMC_DIAG=m CONFIG_INET=y @@ -511,6 +512,7 @@ CONFIG_NLMON=m CONFIG_MLX4_EN=m CONFIG_MLX5_CORE=m CONFIG_MLX5_CORE_EN=y +CONFIG_MLX5_ESWITCH=y # CONFIG_NET_VENDOR_MICREL is not set # CONFIG_NET_VENDOR_MICROCHIP is not set # CONFIG_NET_VENDOR_MICROSEMI is not set diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig index 0056cab27372..1c750bfca2d8 100644 --- a/arch/s390/configs/defconfig +++ b/arch/s390/configs/defconfig @@ -109,6 +109,7 @@ CONFIG_UNIX=y CONFIG_UNIX_DIAG=m CONFIG_XFRM_USER=m CONFIG_NET_KEY=m +CONFIG_NET_SWITCHDEV=y CONFIG_SMC=m CONFIG_SMC_DIAG=m CONFIG_INET=y @@ -502,6 +503,7 @@ CONFIG_NLMON=m CONFIG_MLX4_EN=m CONFIG_MLX5_CORE=m CONFIG_MLX5_CORE_EN=y +CONFIG_MLX5_ESWITCH=y # CONFIG_NET_VENDOR_MICREL is not set # CONFIG_NET_VENDOR_MICROCHIP is not set # CONFIG_NET_VENDOR_MICROSEMI is not set From ab443c53916730862cec202078d36fd4008bea79 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 10 Dec 2021 06:20:46 -0800 Subject: [PATCH 371/868] sch_cake: do not call cake_destroy() from cake_init() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit qdiscs are not supposed to call their own destroy() method from init(), because core stack already does that. syzbot was able to trigger use after free: DEBUG_LOCKS_WARN_ON(lock->magic != lock) WARNING: CPU: 0 PID: 21902 at kernel/locking/mutex.c:586 __mutex_lock_common kernel/locking/mutex.c:586 [inline] WARNING: CPU: 0 PID: 21902 at kernel/locking/mutex.c:586 __mutex_lock+0x9ec/0x12f0 kernel/locking/mutex.c:740 Modules linked in: CPU: 0 PID: 21902 Comm: syz-executor189 Not tainted 5.16.0-rc4-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:__mutex_lock_common kernel/locking/mutex.c:586 [inline] RIP: 0010:__mutex_lock+0x9ec/0x12f0 kernel/locking/mutex.c:740 Code: 08 84 d2 0f 85 19 08 00 00 8b 05 97 38 4b 04 85 c0 0f 85 27 f7 ff ff 48 c7 c6 20 00 ac 89 48 c7 c7 a0 fe ab 89 e8 bf 76 ba ff <0f> 0b e9 0d f7 ff ff 48 8b 44 24 40 48 8d b8 c8 08 00 00 48 89 f8 RSP: 0018:ffffc9000627f290 EFLAGS: 00010282 RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000000 RDX: ffff88802315d700 RSI: ffffffff815f1db8 RDI: fffff52000c4fe44 RBP: ffff88818f28e000 R08: 0000000000000000 R09: 0000000000000000 R10: ffffffff815ebb5e R11: 0000000000000000 R12: 0000000000000000 R13: dffffc0000000000 R14: ffffc9000627f458 R15: 0000000093c30000 FS: 0000555556abc400(0000) GS:ffff8880b9c00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007fda689c3303 CR3: 000000001cfbb000 CR4: 0000000000350ef0 Call Trace: tcf_chain0_head_change_cb_del+0x2e/0x3d0 net/sched/cls_api.c:810 tcf_block_put_ext net/sched/cls_api.c:1381 [inline] tcf_block_put_ext net/sched/cls_api.c:1376 [inline] tcf_block_put+0xbc/0x130 net/sched/cls_api.c:1394 cake_destroy+0x3f/0x80 net/sched/sch_cake.c:2695 qdisc_create.constprop.0+0x9da/0x10f0 net/sched/sch_api.c:1293 tc_modify_qdisc+0x4c5/0x1980 net/sched/sch_api.c:1660 rtnetlink_rcv_msg+0x413/0xb80 net/core/rtnetlink.c:5571 netlink_rcv_skb+0x153/0x420 net/netlink/af_netlink.c:2496 netlink_unicast_kernel net/netlink/af_netlink.c:1319 [inline] netlink_unicast+0x533/0x7d0 net/netlink/af_netlink.c:1345 netlink_sendmsg+0x904/0xdf0 net/netlink/af_netlink.c:1921 sock_sendmsg_nosec net/socket.c:704 [inline] sock_sendmsg+0xcf/0x120 net/socket.c:724 ____sys_sendmsg+0x6e8/0x810 net/socket.c:2409 ___sys_sendmsg+0xf3/0x170 net/socket.c:2463 __sys_sendmsg+0xe5/0x1b0 net/socket.c:2492 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x44/0xae RIP: 0033:0x7f1bb06badb9 Code: Unable to access opcode bytes at RIP 0x7f1bb06bad8f. RSP: 002b:00007fff3012a658 EFLAGS: 00000246 ORIG_RAX: 000000000000002e RAX: ffffffffffffffda RBX: 0000000000000003 RCX: 00007f1bb06badb9 RDX: 0000000000000000 RSI: 00000000200007c0 RDI: 0000000000000003 RBP: 0000000000000000 R08: 0000000000000003 R09: 0000000000000003 R10: 0000000000000003 R11: 0000000000000246 R12: 00007fff3012a688 R13: 00007fff3012a6a0 R14: 00007fff3012a6e0 R15: 00000000000013c2 Fixes: 046f6fd5daef ("sched: Add Common Applications Kept Enhanced (cake) qdisc") Signed-off-by: Eric Dumazet Reported-by: syzbot Acked-by: Toke Høiland-Jørgensen Link: https://lore.kernel.org/r/20211210142046.698336-1-eric.dumazet@gmail.com Signed-off-by: Jakub Kicinski --- net/sched/sch_cake.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c index 3c2300d14468..857aaebd49f4 100644 --- a/net/sched/sch_cake.c +++ b/net/sched/sch_cake.c @@ -2736,7 +2736,7 @@ static int cake_init(struct Qdisc *sch, struct nlattr *opt, q->tins = kvcalloc(CAKE_MAX_TINS, sizeof(struct cake_tin_data), GFP_KERNEL); if (!q->tins) - goto nomem; + return -ENOMEM; for (i = 0; i < CAKE_MAX_TINS; i++) { struct cake_tin_data *b = q->tins + i; @@ -2766,10 +2766,6 @@ static int cake_init(struct Qdisc *sch, struct nlattr *opt, q->min_netlen = ~0; q->min_adjlen = ~0; return 0; - -nomem: - cake_destroy(sch); - return -ENOMEM; } static int cake_dump(struct Qdisc *sch, struct sk_buff *skb) From a663bd19114d79f0902e2490fc484e5a7419cdc2 Mon Sep 17 00:00:00 2001 From: Alexey Sheplyakov Date: Tue, 9 Nov 2021 19:34:02 +0400 Subject: [PATCH 372/868] clocksource/drivers/dw_apb_timer_of: Fix probe failure The driver refuses to probe with -EINVAL since the commit 5d9814df0aec ("clocksource/drivers/dw_apb_timer_of: Add error handling if no clock available"). Before the driver used to probe successfully if either "clock-freq" or "clock-frequency" properties has been specified in the device tree. That commit changed if (A && B) panic("No clock nor clock-frequency property"); into if (!A && !B) return 0; That's a bug: the reverse of `A && B` is '!A || !B', not '!A && !B' Signed-off-by: Vadim V. Vlasov Signed-off-by: Alexey Sheplyakov Fixes: 5d9814df0aec56a6 ("clocksource/drivers/dw_apb_timer_of: Add error handling if no clock available"). Cc: Daniel Lezcano Cc: Dinh Nguyen Cc: Thomas Gleixner Cc: Vadim V. Vlasov Acked-by: Dinh Nguyen Link: https://lore.kernel.org/r/20211109153401.157491-1-asheplyakov@basealt.ru Signed-off-by: Daniel Lezcano --- drivers/clocksource/dw_apb_timer_of.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clocksource/dw_apb_timer_of.c b/drivers/clocksource/dw_apb_timer_of.c index 3819ef5b7098..3245eb0c602d 100644 --- a/drivers/clocksource/dw_apb_timer_of.c +++ b/drivers/clocksource/dw_apb_timer_of.c @@ -47,7 +47,7 @@ static int __init timer_get_base_and_rate(struct device_node *np, pr_warn("pclk for %pOFn is present, but could not be activated\n", np); - if (!of_property_read_u32(np, "clock-freq", rate) && + if (!of_property_read_u32(np, "clock-freq", rate) || !of_property_read_u32(np, "clock-frequency", rate)) return 0; From 1edb7e74a7d3d64dc4e69e7059b4eea526d19a10 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 17 Nov 2021 11:35:32 +0000 Subject: [PATCH 373/868] clocksource/drivers/arm_arch_timer: Force inlining of erratum_set_next_event_generic() With some specific kernel configuration and Clang, the kernel fails to like with something like: ld.lld: error: undefined symbol: __compiletime_assert_200 >>> referenced by arch_timer.h:156 (./arch/arm64/include/asm/arch_timer.h:156) >>> clocksource/arm_arch_timer.o:(erratum_set_next_event_generic) in archive drivers/built-in.a ld.lld: error: undefined symbol: __compiletime_assert_197 >>> referenced by arch_timer.h:133 (./arch/arm64/include/asm/arch_timer.h:133) >>> clocksource/arm_arch_timer.o:(erratum_set_next_event_generic) in archive drivers/built-in.a make: *** [Makefile:1161: vmlinux] Error 1 These are due to the BUILD_BUG() macros contained in the low-level accessors (arch_timer_reg_{write,read}_cp15) being emitted, as the access type wasn't known at compile time. Fix this by making erratum_set_next_event_generic() __force_inline, resulting in the 'access' parameter to be resolved at compile time, similarly to what is already done for set_next_event(). Fixes: 4775bc63f880 ("Add build-time guards for unhandled register accesses") Reported-by: Greg Kroah-Hartman Signed-off-by: Marc Zyngier Cc: Mark Rutland Cc: Daniel Lezcano Cc: Sami Tolvanen Cc: Nick Desaulniers Tested-by: Sami Tolvanen Reviewed-by: Nathan Chancellor Tested-by: Nathan Chancellor Link: https://lore.kernel.org/r/20211117113532.3895208-1-maz@kernel.org Signed-off-by: Daniel Lezcano --- drivers/clocksource/arm_arch_timer.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c index 9a04eacc4412..1ecd52f903b8 100644 --- a/drivers/clocksource/arm_arch_timer.c +++ b/drivers/clocksource/arm_arch_timer.c @@ -394,8 +394,13 @@ EXPORT_SYMBOL_GPL(timer_unstable_counter_workaround); static atomic_t timer_unstable_counter_workaround_in_use = ATOMIC_INIT(0); -static void erratum_set_next_event_generic(const int access, unsigned long evt, - struct clock_event_device *clk) +/* + * Force the inlining of this function so that the register accesses + * can be themselves correctly inlined. + */ +static __always_inline +void erratum_set_next_event_generic(const int access, unsigned long evt, + struct clock_event_device *clk) { unsigned long ctrl; u64 cval; From b10252c7ae9c9d7c90552f88b544a44ee773af64 Mon Sep 17 00:00:00 2001 From: Alexander Sverdlin Date: Tue, 7 Dec 2021 15:00:39 +0100 Subject: [PATCH 374/868] nfsd: Fix nsfd startup race (again) Commit bd5ae9288d64 ("nfsd: register pernet ops last, unregister first") has re-opened rpc_pipefs_event() race against nfsd_net_id registration (register_pernet_subsys()) which has been fixed by commit bb7ffbf29e76 ("nfsd: fix nsfd startup race triggering BUG_ON"). Restore the order of register_pernet_subsys() vs register_cld_notifier(). Add WARN_ON() to prevent a future regression. Crash info: Unable to handle kernel NULL pointer dereference at virtual address 0000000000000012 CPU: 8 PID: 345 Comm: mount Not tainted 5.4.144-... #1 pc : rpc_pipefs_event+0x54/0x120 [nfsd] lr : rpc_pipefs_event+0x48/0x120 [nfsd] Call trace: rpc_pipefs_event+0x54/0x120 [nfsd] blocking_notifier_call_chain rpc_fill_super get_tree_keyed rpc_fs_get_tree vfs_get_tree do_mount ksys_mount __arm64_sys_mount el0_svc_handler el0_svc Fixes: bd5ae9288d64 ("nfsd: register pernet ops last, unregister first") Cc: stable@vger.kernel.org Signed-off-by: Alexander Sverdlin Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4recover.c | 1 + fs/nfsd/nfsctl.c | 14 +++++++------- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index 6fedc49726bf..c634483d85d2 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c @@ -2156,6 +2156,7 @@ static struct notifier_block nfsd4_cld_block = { int register_cld_notifier(void) { + WARN_ON(!nfsd_net_id); return rpc_pipefs_notifier_register(&nfsd4_cld_block); } diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index af8531c3854a..51a49e0cfe37 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -1521,12 +1521,9 @@ static int __init init_nfsd(void) int retval; printk(KERN_INFO "Installing knfsd (copyright (C) 1996 okir@monad.swb.de).\n"); - retval = register_cld_notifier(); - if (retval) - return retval; retval = nfsd4_init_slabs(); if (retval) - goto out_unregister_notifier; + return retval; retval = nfsd4_init_pnfs(); if (retval) goto out_free_slabs; @@ -1545,9 +1542,14 @@ static int __init init_nfsd(void) goto out_free_exports; retval = register_pernet_subsys(&nfsd_net_ops); if (retval < 0) + goto out_free_filesystem; + retval = register_cld_notifier(); + if (retval) goto out_free_all; return 0; out_free_all: + unregister_pernet_subsys(&nfsd_net_ops); +out_free_filesystem: unregister_filesystem(&nfsd_fs_type); out_free_exports: remove_proc_entry("fs/nfs/exports", NULL); @@ -1561,13 +1563,12 @@ out_free_pnfs: nfsd4_exit_pnfs(); out_free_slabs: nfsd4_free_slabs(); -out_unregister_notifier: - unregister_cld_notifier(); return retval; } static void __exit exit_nfsd(void) { + unregister_cld_notifier(); unregister_pernet_subsys(&nfsd_net_ops); nfsd_drc_slab_free(); remove_proc_entry("fs/nfs/exports", NULL); @@ -1577,7 +1578,6 @@ static void __exit exit_nfsd(void) nfsd4_free_slabs(); nfsd4_exit_pnfs(); unregister_filesystem(&nfsd_fs_type); - unregister_cld_notifier(); } MODULE_AUTHOR("Olaf Kirch "); From 548ec0805c399c65ed66c6641be467f717833ab5 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 29 Nov 2021 15:08:00 -0500 Subject: [PATCH 375/868] nfsd: fix use-after-free due to delegation race A delegation break could arrive as soon as we've called vfs_setlease. A delegation break runs a callback which immediately (in nfsd4_cb_recall_prepare) adds the delegation to del_recall_lru. If we then exit nfs4_set_delegation without hashing the delegation, it will be freed as soon as the callback is done with it, without ever being removed from del_recall_lru. Symptoms show up later as use-after-free or list corruption warnings, usually in the laundromat thread. I suspect aba2072f4523 "nfsd: grant read delegations to clients holding writes" made this bug easier to hit, but I looked as far back as v3.0 and it looks to me it already had the same problem. So I'm not sure where the bug was introduced; it may have been there from the beginning. Cc: stable@vger.kernel.org Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index bfad94c70b84..1956d377d1a6 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -1207,6 +1207,11 @@ hash_delegation_locked(struct nfs4_delegation *dp, struct nfs4_file *fp) return 0; } +static bool delegation_hashed(struct nfs4_delegation *dp) +{ + return !(list_empty(&dp->dl_perfile)); +} + static bool unhash_delegation_locked(struct nfs4_delegation *dp) { @@ -1214,7 +1219,7 @@ unhash_delegation_locked(struct nfs4_delegation *dp) lockdep_assert_held(&state_lock); - if (list_empty(&dp->dl_perfile)) + if (!delegation_hashed(dp)) return false; dp->dl_stid.sc_type = NFS4_CLOSED_DELEG_STID; @@ -4598,7 +4603,7 @@ static void nfsd4_cb_recall_prepare(struct nfsd4_callback *cb) * queued for a lease break. Don't queue it again. */ spin_lock(&state_lock); - if (dp->dl_time == 0) { + if (delegation_hashed(dp) && dp->dl_time == 0) { dp->dl_time = ktime_get_boottime_seconds(); list_add_tail(&dp->dl_recall_lru, &nn->del_recall_lru); } From 55df1ce0d4e086e05a8ab20619c73c729350f965 Mon Sep 17 00:00:00 2001 From: Markus Hochholdinger Date: Tue, 16 Nov 2021 10:21:35 +0000 Subject: [PATCH 376/868] md: fix update super 1.0 on rdev size change The superblock of version 1.0 doesn't get moved to the new position on a device size change. This leads to a rdev without a superblock on a known position, the raid can't be re-assembled. The line was removed by mistake and is re-added by this patch. Fixes: d9c0fa509eaf ("md: fix max sectors calculation for super 1.0") Cc: stable@vger.kernel.org Signed-off-by: Markus Hochholdinger Reviewed-by: Xiao Ni Signed-off-by: Song Liu --- drivers/md/md.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/md/md.c b/drivers/md/md.c index 5111ed966947..e97d2faf1e88 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -2189,6 +2189,7 @@ super_1_rdev_size_change(struct md_rdev *rdev, sector_t num_sectors) if (!num_sectors || num_sectors > max_sectors) num_sectors = max_sectors; + rdev->sb_start = sb_start; } sb = page_address(rdev->sb_page); sb->data_size = cpu_to_le64(num_sectors); From 07641b5f32f6991758b08da9b1f4173feeb64f2a Mon Sep 17 00:00:00 2001 From: zhangyue Date: Tue, 16 Nov 2021 10:35:26 +0800 Subject: [PATCH 377/868] md: fix double free of mddev->private in autorun_array() In driver/md/md.c, if the function autorun_array() is called, the problem of double free may occur. In function autorun_array(), when the function do_md_run() returns an error, the function do_md_stop() will be called. The function do_md_run() called function md_run(), but in function md_run(), the pointer mddev->private may be freed. The function do_md_stop() called the function __md_stop(), but in function __md_stop(), the pointer mddev->private also will be freed without judging null. At this time, the pointer mddev->private will be double free, so it needs to be judged null or not. Signed-off-by: zhangyue Signed-off-by: Song Liu --- drivers/md/md.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index e97d2faf1e88..41d6e2383517 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -6271,7 +6271,8 @@ static void __md_stop(struct mddev *mddev) spin_lock(&mddev->lock); mddev->pers = NULL; spin_unlock(&mddev->lock); - pers->free(mddev, mddev->private); + if (mddev->private) + pers->free(mddev, mddev->private); mddev->private = NULL; if (pers->sync_request && mddev->to_remove == NULL) mddev->to_remove = &md_redundancy_group; From 345e004d023343d38088fdfea39688aa11e06ccf Mon Sep 17 00:00:00 2001 From: Paul Chaignon Date: Fri, 10 Dec 2021 00:46:31 +0100 Subject: [PATCH 378/868] bpf: Fix incorrect state pruning for <8B spill/fill Commit 354e8f1970f8 ("bpf: Support <8-byte scalar spill and refill") introduced support in the verifier to track <8B spill/fills of scalars. The backtracking logic for the precision bit was however skipping spill/fills of less than 8B. That could cause state pruning to consider two states equivalent when they shouldn't be. As an example, consider the following bytecode snippet: 0: r7 = r1 1: call bpf_get_prandom_u32 2: r6 = 2 3: if r0 == 0 goto pc+1 4: r6 = 3 ... 8: [state pruning point] ... /* u32 spill/fill */ 10: *(u32 *)(r10 - 8) = r6 11: r8 = *(u32 *)(r10 - 8) 12: r0 = 0 13: if r8 == 3 goto pc+1 14: r0 = 1 15: exit The verifier first walks the path with R6=3. Given the support for <8B spill/fills, at instruction 13, it knows the condition is true and skips instruction 14. At that point, the backtracking logic kicks in but stops at the fill instruction since it only propagates the precision bit for 8B spill/fill. When the verifier then walks the path with R6=2, it will consider it safe at instruction 8 because R6 is not marked as needing precision. Instruction 14 is thus never walked and is then incorrectly removed as 'dead code'. It's also possible to lead the verifier to accept e.g. an out-of-bound memory access instead of causing an incorrect dead code elimination. This regression was found via Cilium's bpf-next CI where it was causing a conntrack map update to be silently skipped because the code had been removed by the verifier. This commit fixes it by enabling support for <8B spill/fills in the bactracking logic. In case of a <8B spill/fill, the full 8B stack slot will be marked as needing precision. Then, in __mark_chain_precision, any tracked register spilled in a marked slot will itself be marked as needing precision, regardless of the spill size. This logic makes two assumptions: (1) only 8B-aligned spill/fill are tracked and (2) spilled registers are only tracked if the spill and fill sizes are equal. Commit ef979017b837 ("bpf: selftest: Add verifier tests for <8-byte scalar spill and refill") covers the first assumption and the next commit in this patchset covers the second. Fixes: 354e8f1970f8 ("bpf: Support <8-byte scalar spill and refill") Signed-off-by: Paul Chaignon Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index f3001937bbb9..f2f1ed34cfe9 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -2379,8 +2379,6 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx, */ if (insn->src_reg != BPF_REG_FP) return 0; - if (BPF_SIZE(insn->code) != BPF_DW) - return 0; /* dreg = *(u64 *)[fp - off] was a fill from the stack. * that [fp - off] slot contains scalar that needs to be @@ -2403,8 +2401,6 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx, /* scalars can only be spilled into stack */ if (insn->dst_reg != BPF_REG_FP) return 0; - if (BPF_SIZE(insn->code) != BPF_DW) - return 0; spi = (-insn->off - 1) / BPF_REG_SIZE; if (spi >= 64) { verbose(env, "BUG spi %d\n", spi); From 0be2516f865f5a876837184a8385163ff64a5889 Mon Sep 17 00:00:00 2001 From: Paul Chaignon Date: Fri, 10 Dec 2021 00:47:00 +0100 Subject: [PATCH 379/868] selftests/bpf: Tests for state pruning with u32 spill/fill This patch adds tests for the verifier's tracking for spilled, <8B registers. The first two test cases ensure the verifier doesn't incorrectly prune states in case of <8B spill/fills. The last one simply checks that a filled u64 register is marked unknown if the register spilled in the same slack slot was less than 8B. The map value access at the end of the first program is only incorrect for the path R6=32. If the precision bit for register R8 isn't backtracked through the u32 spill/fill, the R6=32 path is pruned at instruction 9 and the program is incorrectly accepted. The second program is a variation of the same with u32 spills and a u64 fill. The additional instructions to introduce the first pruning point may be a bit fragile as they depend on the heuristics for pruning points in the verifier (currently at least 8 instructions and 2 jumps). If the heuristics are changed, the pruning point may move (e.g., to the subsequent jump) or disappear, which would cause the test to always pass. Signed-off-by: Paul Chaignon Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/verifier/search_pruning.c | 71 +++++++++++++++++++ .../selftests/bpf/verifier/spill_fill.c | 32 +++++++++ 2 files changed, 103 insertions(+) diff --git a/tools/testing/selftests/bpf/verifier/search_pruning.c b/tools/testing/selftests/bpf/verifier/search_pruning.c index 7e50cb80873a..682519769fe3 100644 --- a/tools/testing/selftests/bpf/verifier/search_pruning.c +++ b/tools/testing/selftests/bpf/verifier/search_pruning.c @@ -132,6 +132,77 @@ .result = REJECT, .prog_type = BPF_PROG_TYPE_TRACEPOINT, }, +{ + "precision tracking for u32 spill/fill", + .insns = { + BPF_MOV64_REG(BPF_REG_7, BPF_REG_1), + BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32), + BPF_MOV32_IMM(BPF_REG_6, 32), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), + BPF_MOV32_IMM(BPF_REG_6, 4), + /* Additional insns to introduce a pruning point. */ + BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), + BPF_MOV64_IMM(BPF_REG_3, 0), + /* u32 spill/fill */ + BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_6, -8), + BPF_LDX_MEM(BPF_W, BPF_REG_8, BPF_REG_10, -8), + /* out-of-bound map value access for r6=32 */ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_8), + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_8b = { 15 }, + .result = REJECT, + .errstr = "R0 min value is outside of the allowed memory range", + .prog_type = BPF_PROG_TYPE_TRACEPOINT, +}, +{ + "precision tracking for u32 spills, u64 fill", + .insns = { + BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), + BPF_MOV32_IMM(BPF_REG_7, 0xffffffff), + /* Additional insns to introduce a pruning point. */ + BPF_MOV64_IMM(BPF_REG_3, 1), + BPF_MOV64_IMM(BPF_REG_3, 1), + BPF_MOV64_IMM(BPF_REG_3, 1), + BPF_MOV64_IMM(BPF_REG_3, 1), + BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), + BPF_MOV64_IMM(BPF_REG_3, 1), + BPF_ALU32_IMM(BPF_DIV, BPF_REG_3, 0), + /* u32 spills, u64 fill */ + BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_6, -4), + BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_7, -8), + BPF_LDX_MEM(BPF_DW, BPF_REG_8, BPF_REG_10, -8), + /* if r8 != X goto pc+1 r8 known in fallthrough branch */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_8, 0xffffffff, 1), + BPF_MOV64_IMM(BPF_REG_3, 1), + /* if r8 == X goto pc+1 condition always true on first + * traversal, so starts backtracking to mark r8 as requiring + * precision. r7 marked as needing precision. r6 not marked + * since it's not tracked. + */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_8, 0xffffffff, 1), + /* fails if r8 correctly marked unknown after fill. */ + BPF_ALU32_IMM(BPF_DIV, BPF_REG_3, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "div by zero", + .prog_type = BPF_PROG_TYPE_TRACEPOINT, +}, { "allocated_stack", .insns = { diff --git a/tools/testing/selftests/bpf/verifier/spill_fill.c b/tools/testing/selftests/bpf/verifier/spill_fill.c index 7ab3de108761..6c907144311f 100644 --- a/tools/testing/selftests/bpf/verifier/spill_fill.c +++ b/tools/testing/selftests/bpf/verifier/spill_fill.c @@ -175,6 +175,38 @@ .errstr = "invalid access to packet", .prog_type = BPF_PROG_TYPE_SCHED_CLS, }, +{ + "Spill u32 const scalars. Refill as u64. Offset to skb->data", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + /* r6 = 0 */ + BPF_MOV32_IMM(BPF_REG_6, 0), + /* r7 = 20 */ + BPF_MOV32_IMM(BPF_REG_7, 20), + /* *(u32 *)(r10 -4) = r6 */ + BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_6, -4), + /* *(u32 *)(r10 -8) = r7 */ + BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_7, -8), + /* r4 = *(u64 *)(r10 -8) */ + BPF_LDX_MEM(BPF_H, BPF_REG_4, BPF_REG_10, -8), + /* r0 = r2 */ + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + /* r0 += r4 R0=pkt R2=pkt R3=pkt_end R4=inv,umax=65535 */ + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_4), + /* if (r0 > r3) R0=pkt,umax=65535 R2=pkt R3=pkt_end R4=inv,umax=65535 */ + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1), + /* r0 = *(u32 *)r2 R0=pkt,umax=65535 R2=pkt R3=pkt_end R4=inv20 */ + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "invalid access to packet", + .prog_type = BPF_PROG_TYPE_SCHED_CLS, +}, { "Spill a u32 const scalar. Refill as u16 from fp-6. Offset to skb->data", .insns = { From e6a59aac8a8713f335a37d762db0dbe80e7f6d38 Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Fri, 10 Dec 2021 10:20:58 -0800 Subject: [PATCH 380/868] block: fix ioprio_get(IOPRIO_WHO_PGRP) vs setuid(2) do_each_pid_thread(PIDTYPE_PGID) can race with a concurrent change_pid(PIDTYPE_PGID) that can move the task from one hlist to another while iterating. Serialize ioprio_get to take the tasklist_lock in this case, just like it's set counterpart. Fixes: d69b78ba1de (ioprio: grab rcu_read_lock in sys_ioprio_{set,get}()) Acked-by: Oleg Nesterov Signed-off-by: Davidlohr Bueso Link: https://lore.kernel.org/r/20211210182058.43417-1-dave@stgolabs.net Signed-off-by: Jens Axboe --- block/ioprio.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/block/ioprio.c b/block/ioprio.c index 313c14a70bbd..6f01d35a5145 100644 --- a/block/ioprio.c +++ b/block/ioprio.c @@ -220,6 +220,7 @@ SYSCALL_DEFINE2(ioprio_get, int, which, int, who) pgrp = task_pgrp(current); else pgrp = find_vpid(who); + read_lock(&tasklist_lock); do_each_pid_thread(pgrp, PIDTYPE_PGID, p) { tmpio = get_task_ioprio(p); if (tmpio < 0) @@ -229,6 +230,8 @@ SYSCALL_DEFINE2(ioprio_get, int, which, int, who) else ret = ioprio_best(ret, tmpio); } while_each_pid_thread(pgrp, PIDTYPE_PGID, p); + read_unlock(&tasklist_lock); + break; case IOPRIO_WHO_USER: uid = make_kuid(current_user_ns(), who); From 5eff363838654790f67f4bd564c5782967f67bcc Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 10 Dec 2021 11:52:34 -0700 Subject: [PATCH 381/868] Revert "mtd_blkdevs: don't scan partitions for plain mtdblock" This reverts commit 776b54e97a7d993ba23696e032426d5dea5bbe70. Looks like a last minute edit snuck into this patch, and as a result, it doesn't even compile. Revert the change for now. Signed-off-by: Jens Axboe --- drivers/mtd/mtd_blkdevs.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c index a69d064a8eec..4eaba6f4ec68 100644 --- a/drivers/mtd/mtd_blkdevs.c +++ b/drivers/mtd/mtd_blkdevs.c @@ -346,7 +346,7 @@ int add_mtd_blktrans_dev(struct mtd_blktrans_dev *new) gd->minors = 1 << tr->part_bits; gd->fops = &mtd_block_ops; - if (tr->part_bits) { + if (tr->part_bits) if (new->devnum < 26) snprintf(gd->disk_name, sizeof(gd->disk_name), "%s%c", tr->name, 'a' + new->devnum); @@ -355,11 +355,9 @@ int add_mtd_blktrans_dev(struct mtd_blktrans_dev *new) "%s%c%c", tr->name, 'a' - 1 + new->devnum / 26, 'a' + new->devnum % 26); - } else { + else snprintf(gd->disk_name, sizeof(gd->disk_name), "%s%d", tr->name, new->devnum); - gd->flags |= GENHD_FL_NO_PART; - } set_capacity(gd, ((u64)new->size * tr->blksize) >> 9); From 78a780602075d8b00c98070fa26e389b3b3efa72 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 9 Dec 2021 08:54:29 -0700 Subject: [PATCH 382/868] io_uring: ensure task_work gets run as part of cancelations If we successfully cancel a work item but that work item needs to be processed through task_work, then we can be sleeping uninterruptibly in io_uring_cancel_generic() and never process it. Hence we don't make forward progress and we end up with an uninterruptible sleep warning. While in there, correct a comment that should be IFF, not IIF. Reported-and-tested-by: syzbot+21e6887c0be14181206d@syzkaller.appspotmail.com Cc: stable@vger.kernel.org Signed-off-by: Jens Axboe --- fs/io_uring.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index c4f217613f56..d5ab0e9a3f29 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -9824,7 +9824,7 @@ static __cold void io_uring_drop_tctx_refs(struct task_struct *task) /* * Find any io_uring ctx that this task has registered or done IO on, and cancel - * requests. @sqd should be not-null IIF it's an SQPOLL thread cancellation. + * requests. @sqd should be not-null IFF it's an SQPOLL thread cancellation. */ static __cold void io_uring_cancel_generic(bool cancel_all, struct io_sq_data *sqd) @@ -9866,8 +9866,10 @@ static __cold void io_uring_cancel_generic(bool cancel_all, cancel_all); } - prepare_to_wait(&tctx->wait, &wait, TASK_UNINTERRUPTIBLE); + prepare_to_wait(&tctx->wait, &wait, TASK_INTERRUPTIBLE); + io_run_task_work(); io_uring_drop_tctx_refs(current); + /* * If we've seen completions, retry without waiting. This * avoids a race where a completion comes in before we did From 71a85387546e50b1a37b0fa45dadcae3bfb35cf6 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 10 Dec 2021 08:29:30 -0700 Subject: [PATCH 383/868] io-wq: check for wq exit after adding new worker task_work We check IO_WQ_BIT_EXIT before attempting to create a new worker, and wq exit cancels pending work if we have any. But it's possible to have a race between the two, where creation checks exit finding it not set, but we're in the process of exiting. The exit side will cancel pending creation task_work, but there's a gap where we add task_work after we've canceled existing creations at exit time. Fix this by checking the EXIT bit post adding the creation task_work. If it's set, run the same cancelation that exit does. Reported-and-tested-by: syzbot+b60c982cb0efc5e05a47@syzkaller.appspotmail.com Reviewed-by: Hao Xu Signed-off-by: Jens Axboe --- fs/io-wq.c | 31 +++++++++++++++++++++++++------ 1 file changed, 25 insertions(+), 6 deletions(-) diff --git a/fs/io-wq.c b/fs/io-wq.c index 35da9d90df76..8d2bb818a3bb 100644 --- a/fs/io-wq.c +++ b/fs/io-wq.c @@ -142,6 +142,7 @@ static bool io_acct_cancel_pending_work(struct io_wqe *wqe, struct io_wqe_acct *acct, struct io_cb_cancel_data *match); static void create_worker_cb(struct callback_head *cb); +static void io_wq_cancel_tw_create(struct io_wq *wq); static bool io_worker_get(struct io_worker *worker) { @@ -357,10 +358,22 @@ static bool io_queue_worker_create(struct io_worker *worker, test_and_set_bit_lock(0, &worker->create_state)) goto fail_release; + atomic_inc(&wq->worker_refs); init_task_work(&worker->create_work, func); worker->create_index = acct->index; - if (!task_work_add(wq->task, &worker->create_work, TWA_SIGNAL)) + if (!task_work_add(wq->task, &worker->create_work, TWA_SIGNAL)) { + /* + * EXIT may have been set after checking it above, check after + * adding the task_work and remove any creation item if it is + * now set. wq exit does that too, but we can have added this + * work item after we canceled in io_wq_exit_workers(). + */ + if (test_bit(IO_WQ_BIT_EXIT, &wq->state)) + io_wq_cancel_tw_create(wq); + io_worker_ref_put(wq); return true; + } + io_worker_ref_put(wq); clear_bit_unlock(0, &worker->create_state); fail_release: io_worker_release(worker); @@ -1196,13 +1209,9 @@ void io_wq_exit_start(struct io_wq *wq) set_bit(IO_WQ_BIT_EXIT, &wq->state); } -static void io_wq_exit_workers(struct io_wq *wq) +static void io_wq_cancel_tw_create(struct io_wq *wq) { struct callback_head *cb; - int node; - - if (!wq->task) - return; while ((cb = task_work_cancel_match(wq->task, io_task_work_match, wq)) != NULL) { struct io_worker *worker; @@ -1210,6 +1219,16 @@ static void io_wq_exit_workers(struct io_wq *wq) worker = container_of(cb, struct io_worker, create_work); io_worker_cancel_cb(worker); } +} + +static void io_wq_exit_workers(struct io_wq *wq) +{ + int node; + + if (!wq->task) + return; + + io_wq_cancel_tw_create(wq); rcu_read_lock(); for_each_node(node) { From a74c313aca266fab0d1d1a72becbb8b7b5286b6e Mon Sep 17 00:00:00 2001 From: Chris Packham Date: Tue, 7 Dec 2021 17:21:44 +1300 Subject: [PATCH 384/868] i2c: mpc: Use atomic read and fix break condition Maxime points out that the polling code in mpc_i2c_isr should use the _atomic API because it is called in an irq context and that the behaviour of the MCF bit is that it is 1 when the byte transfer is complete. All of this means the original code was effectively a udelay(100). Fix this by using readb_poll_timeout_atomic() and removing the negation of the break condition. Fixes: 4a8ac5e45cda ("i2c: mpc: Poll for MCF") Reported-by: Maxime Bizon Signed-off-by: Chris Packham Tested-by: Maxime Bizon Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-mpc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-mpc.c b/drivers/i2c/busses/i2c-mpc.c index a6ea1eb1394e..53b8da6dbb23 100644 --- a/drivers/i2c/busses/i2c-mpc.c +++ b/drivers/i2c/busses/i2c-mpc.c @@ -636,7 +636,7 @@ static irqreturn_t mpc_i2c_isr(int irq, void *dev_id) status = readb(i2c->base + MPC_I2C_SR); if (status & CSR_MIF) { /* Wait up to 100us for transfer to properly complete */ - readb_poll_timeout(i2c->base + MPC_I2C_SR, status, !(status & CSR_MCF), 0, 100); + readb_poll_timeout_atomic(i2c->base + MPC_I2C_SR, status, status & CSR_MCF, 0, 100); writeb(0, i2c->base + MPC_I2C_SR); mpc_i2c_do_intr(i2c, status); return IRQ_HANDLED; From 9dcc38e2813e0cd3b195940c98b181ce6ede8f20 Mon Sep 17 00:00:00 2001 From: Drew DeVault Date: Fri, 10 Dec 2021 14:46:09 -0800 Subject: [PATCH 385/868] Increase default MLOCK_LIMIT to 8 MiB This limit has not been updated since 2008, when it was increased to 64 KiB at the request of GnuPG. Until recently, the main use-cases for this feature were (1) preventing sensitive memory from being swapped, as in GnuPG's use-case; and (2) real-time use-cases. In the first case, little memory is called for, and in the second case, the user is generally in a position to increase it if they need more. The introduction of IOURING_REGISTER_BUFFERS adds a third use-case: preparing fixed buffers for high-performance I/O. This use-case will take as much of this memory as it can get, but is still limited to 64 KiB by default, which is very little. This increases the limit to 8 MB, which was chosen fairly arbitrarily as a more generous, but still conservative, default value. It is also possible to raise this limit in userspace. This is easily done, for example, in the use-case of a network daemon: systemd, for instance, provides for this via LimitMEMLOCK in the service file; OpenRC via the rc_ulimit variables. However, there is no established userspace facility for configuring this outside of daemons: end-user applications do not presently have access to a convenient means of raising their limits. The buck, as it were, stops with the kernel. It's much easier to address it here than it is to bring it to hundreds of distributions, and it can only realistically be relied upon to be high-enough by end-user software if it is more-or-less ubiquitous. Most distros don't change this particular rlimit from the kernel-supplied default value, so a change here will easily provide that ubiquity. Link: https://lkml.kernel.org/r/20211028080813.15966-1-sir@cmpwn.com Signed-off-by: Drew DeVault Acked-by: Jens Axboe Acked-by: Cyril Hrubis Acked-by: Johannes Weiner Cc: Pavel Begunkov Cc: David Hildenbrand Cc: Jason Gunthorpe Cc: Andrew Dona-Couch Cc: Ammar Faizi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/uapi/linux/resource.h | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/include/uapi/linux/resource.h b/include/uapi/linux/resource.h index 74ef57b38f9f..ac5d6a3031db 100644 --- a/include/uapi/linux/resource.h +++ b/include/uapi/linux/resource.h @@ -66,10 +66,17 @@ struct rlimit64 { #define _STK_LIM (8*1024*1024) /* - * GPG2 wants 64kB of mlocked memory, to make sure pass phrases - * and other sensitive information are never written to disk. + * Limit the amount of locked memory by some sane default: + * root can always increase this limit if needed. + * + * The main use-cases are (1) preventing sensitive memory + * from being swapped; (2) real-time operations; (3) via + * IOURING_REGISTER_BUFFERS. + * + * The first two don't need much. The latter will take as + * much as it can get. 8MB is a reasonably sane default. */ -#define MLOCK_LIMIT ((PAGE_SIZE > 64*1024) ? PAGE_SIZE : 64*1024) +#define MLOCK_LIMIT (8*1024*1024) /* * Due to binary compatibility, the actual resource numbers From e943d28db257cc771f193bd75443f75ec8e8d978 Mon Sep 17 00:00:00 2001 From: Dave Young Date: Fri, 10 Dec 2021 14:46:12 -0800 Subject: [PATCH 386/868] MAINTAINERS: update kdump maintainers Remove myself from kdump maintainers as I have no enough time to maintain it now. But I can review patches on demand though. Link: https://lkml.kernel.org/r/YZyKilzKFsWJYdgn@dhcp-128-65.nay.redhat.com Signed-off-by: Dave Young Acked-by: Baoquan He Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 8691c531e297..cf77835aea35 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10279,9 +10279,9 @@ F: lib/Kconfig.kcsan F: scripts/Makefile.kcsan KDUMP -M: Dave Young M: Baoquan He R: Vivek Goyal +R: Dave Young L: kexec@lists.infradead.org S: Maintained W: http://lse.sourceforge.net/kdump/ From d020d9e63d5396fbcc3a2c01cee38e28c7d20a3d Mon Sep 17 00:00:00 2001 From: Guo Ren Date: Fri, 10 Dec 2021 14:46:15 -0800 Subject: [PATCH 387/868] mailmap: update email address for Guo Ren The ren_guo@c-sky.com would be deprecated and use guoren@kernel.org as the main email address. Link: https://lkml.kernel.org/r/20211123022741.545541-1-guoren@kernel.org Signed-off-by: Guo Ren Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- .mailmap | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.mailmap b/.mailmap index 6277bb27b4bf..b344067e0acb 100644 --- a/.mailmap +++ b/.mailmap @@ -126,6 +126,8 @@ Greg Kroah-Hartman Greg Kroah-Hartman Greg Kurz Gregory CLEMENT +Guo Ren +Guo Ren Gustavo Padovan Gustavo Padovan Hanjun Guo From 0c941cf30b913d4a684d3f8d9eee60e0daffdc79 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 10 Dec 2021 14:46:18 -0800 Subject: [PATCH 388/868] filemap: remove PageHWPoison check from next_uptodate_page() Pages are individually marked as suffering from hardware poisoning. Checking that the head page is not hardware poisoned doesn't make sense; we might be after a subpage. We check each page individually before we use it, so this was an optimisation gone wrong. It will cause us to fall back to the slow path when there was no need to do that Link: https://lkml.kernel.org/r/20211120174429.2596303-1-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Naoya Horiguchi Cc: Yang Shi Cc: "Kirill A . Shutemov" Cc: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/filemap.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/mm/filemap.c b/mm/filemap.c index daa0e23a6ee6..39c4c46c6133 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -3253,8 +3253,6 @@ static struct page *next_uptodate_page(struct page *page, goto skip; if (!PageUptodate(page) || PageReadahead(page)) goto skip; - if (PageHWPoison(page)) - goto skip; if (!trylock_page(page)) goto skip; if (page->mapping != mapping) From e4779015fd5d2fb8390c258268addff24d6077c7 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Fri, 10 Dec 2021 14:46:22 -0800 Subject: [PATCH 389/868] timers: implement usleep_idle_range() Patch series "mm/damon: Fix fake /proc/loadavg reports", v3. This patchset fixes DAMON's fake load report issue. The first patch makes yet another variant of usleep_range() for this fix, and the second patch fixes the issue of DAMON by making it using the newly introduced function. This patch (of 2): Some kernel threads such as DAMON could need to repeatedly sleep in micro seconds level. Because usleep_range() sleeps in uninterruptible state, however, such threads would make /proc/loadavg reports fake load. To help such cases, this commit implements a variant of usleep_range() called usleep_idle_range(). It is same to usleep_range() but sets the state of the current task as TASK_IDLE while sleeping. Link: https://lkml.kernel.org/r/20211126145015.15862-1-sj@kernel.org Link: https://lkml.kernel.org/r/20211126145015.15862-2-sj@kernel.org Signed-off-by: SeongJae Park Suggested-by: Andrew Morton Reviewed-by: Thomas Gleixner Tested-by: Oleksandr Natalenko Cc: John Stultz Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/delay.h | 14 +++++++++++++- kernel/time/timer.c | 16 +++++++++------- 2 files changed, 22 insertions(+), 8 deletions(-) diff --git a/include/linux/delay.h b/include/linux/delay.h index 8eacf67eb212..039e7e0c7378 100644 --- a/include/linux/delay.h +++ b/include/linux/delay.h @@ -20,6 +20,7 @@ */ #include +#include extern unsigned long loops_per_jiffy; @@ -58,7 +59,18 @@ void calibrate_delay(void); void __attribute__((weak)) calibration_delay_done(void); void msleep(unsigned int msecs); unsigned long msleep_interruptible(unsigned int msecs); -void usleep_range(unsigned long min, unsigned long max); +void usleep_range_state(unsigned long min, unsigned long max, + unsigned int state); + +static inline void usleep_range(unsigned long min, unsigned long max) +{ + usleep_range_state(min, max, TASK_UNINTERRUPTIBLE); +} + +static inline void usleep_idle_range(unsigned long min, unsigned long max) +{ + usleep_range_state(min, max, TASK_IDLE); +} static inline void ssleep(unsigned int seconds) { diff --git a/kernel/time/timer.c b/kernel/time/timer.c index e3d2c23c413d..85f1021ad459 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -2054,26 +2054,28 @@ unsigned long msleep_interruptible(unsigned int msecs) EXPORT_SYMBOL(msleep_interruptible); /** - * usleep_range - Sleep for an approximate time - * @min: Minimum time in usecs to sleep - * @max: Maximum time in usecs to sleep + * usleep_range_state - Sleep for an approximate time in a given state + * @min: Minimum time in usecs to sleep + * @max: Maximum time in usecs to sleep + * @state: State of the current task that will be while sleeping * * In non-atomic context where the exact wakeup time is flexible, use - * usleep_range() instead of udelay(). The sleep improves responsiveness + * usleep_range_state() instead of udelay(). The sleep improves responsiveness * by avoiding the CPU-hogging busy-wait of udelay(), and the range reduces * power usage by allowing hrtimers to take advantage of an already- * scheduled interrupt instead of scheduling a new one just for this sleep. */ -void __sched usleep_range(unsigned long min, unsigned long max) +void __sched usleep_range_state(unsigned long min, unsigned long max, + unsigned int state) { ktime_t exp = ktime_add_us(ktime_get(), min); u64 delta = (u64)(max - min) * NSEC_PER_USEC; for (;;) { - __set_current_state(TASK_UNINTERRUPTIBLE); + __set_current_state(state); /* Do not return before the requested sleep time has elapsed */ if (!schedule_hrtimeout_range(&exp, delta, HRTIMER_MODE_ABS)) break; } } -EXPORT_SYMBOL(usleep_range); +EXPORT_SYMBOL(usleep_range_state); From 70e9274805fccfd175d0431a947bfd11ee7df40e Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Fri, 10 Dec 2021 14:46:25 -0800 Subject: [PATCH 390/868] mm/damon/core: fix fake load reports due to uninterruptible sleeps Because DAMON sleeps in uninterruptible mode, /proc/loadavg reports fake load while DAMON is turned on, though it is doing nothing. This can confuse users[1]. To avoid the case, this commit makes DAMON sleeps in idle mode. [1] https://lore.kernel.org/all/11868371.O9o76ZdvQC@natalenko.name/ Link: https://lkml.kernel.org/r/20211126145015.15862-3-sj@kernel.org Fixes: 2224d8485492 ("mm: introduce Data Access MONitor (DAMON)") Reported-by: Oleksandr Natalenko Signed-off-by: SeongJae Park Tested-by: Oleksandr Natalenko Cc: John Stultz Cc: Thomas Gleixner Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/damon/core.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mm/damon/core.c b/mm/damon/core.c index c381b3c525d0..2daffd5820fe 100644 --- a/mm/damon/core.c +++ b/mm/damon/core.c @@ -981,9 +981,9 @@ static unsigned long damos_wmark_wait_us(struct damos *scheme) static void kdamond_usleep(unsigned long usecs) { if (usecs > 100 * 1000) - schedule_timeout_interruptible(usecs_to_jiffies(usecs)); + schedule_timeout_idle(usecs_to_jiffies(usecs)); else - usleep_range(usecs, usecs + 1); + usleep_idle_range(usecs, usecs + 1); } /* Returns negative error code if it's not activated but should return */ @@ -1038,7 +1038,7 @@ static int kdamond_fn(void *data) ctx->callback.after_sampling(ctx)) done = true; - usleep_range(ctx->sample_interval, ctx->sample_interval + 1); + kdamond_usleep(ctx->sample_interval); if (ctx->primitive.check_accesses) max_nr_accesses = ctx->primitive.check_accesses(ctx); From 4de46a30b9929d3d1b29e481d48e9c25f8ac7919 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Fri, 10 Dec 2021 14:46:28 -0800 Subject: [PATCH 391/868] mm/damon/core: use better timer mechanisms selection threshold Patch series "mm/damon: Trivial fixups and improvements". This patchset contains trivial fixups and improvements for DAMON and its kunit/kselftest tests. This patch (of 11): DAMON is using hrtimer if requested sleep time is <=100ms, while the suggested threshold[1] is <=20ms. This commit applies the threshold. [1] Documentation/timers/timers-howto.rst Link: https://lkml.kernel.org/r/20211201150440.1088-2-sj@kernel.org Fixes: ee801b7dd7822 ("mm/damon/schemes: activate schemes based on a watermarks mechanism") Signed-off-by: SeongJae Park Cc: Shuah Khan Cc: Brendan Higgins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/damon/core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mm/damon/core.c b/mm/damon/core.c index 2daffd5820fe..eefb2ada67ca 100644 --- a/mm/damon/core.c +++ b/mm/damon/core.c @@ -980,7 +980,8 @@ static unsigned long damos_wmark_wait_us(struct damos *scheme) static void kdamond_usleep(unsigned long usecs) { - if (usecs > 100 * 1000) + /* See Documentation/timers/timers-howto.rst for the thresholds */ + if (usecs > 20 * USEC_PER_MSEC) schedule_timeout_idle(usecs_to_jiffies(usecs)); else usleep_idle_range(usecs, usecs + 1); From 0bceffa236af401f5206feaf3538526cbc427209 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Fri, 10 Dec 2021 14:46:31 -0800 Subject: [PATCH 392/868] mm/damon/dbgfs: remove an unnecessary error message When wrong scheme action is requested via the debugfs interface, DAMON prints an error message. Because the function returns error code, this is not really needed. Because the code path is triggered by the user specified input, this can result in kernel log mistakenly being messy. To avoid the case, this commit removes the message. Link: https://lkml.kernel.org/r/20211201150440.1088-3-sj@kernel.org Fixes: af122dd8f3c0 ("mm/damon/dbgfs: support DAMON-based Operation Schemes") Signed-off-by: SeongJae Park Cc: Brendan Higgins Cc: Shuah Khan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/damon/dbgfs.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/mm/damon/dbgfs.c b/mm/damon/dbgfs.c index 9b520bb4a3e7..1efac0022e9a 100644 --- a/mm/damon/dbgfs.c +++ b/mm/damon/dbgfs.c @@ -210,10 +210,8 @@ static struct damos **str_to_schemes(const char *str, ssize_t len, &wmarks.low, &parsed); if (ret != 18) break; - if (!damos_action_valid(action)) { - pr_err("wrong action %d\n", action); + if (!damos_action_valid(action)) goto fail; - } pos += parsed; scheme = damon_new_scheme(min_sz, max_sz, min_nr_a, max_nr_a, From 1afaf5cb687de85c5e00ac70f6eea5597077cbc5 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Fri, 10 Dec 2021 14:46:34 -0800 Subject: [PATCH 393/868] mm/damon/core: remove unnecessary error messages DAMON core prints error messages when damon_target object creation is failed or wrong monitoring attributes are given. Because appropriate error code is returned for each case, the messages are not essential. Also, because the code path can be triggered with user-specified input, this could result in kernel log mistakenly being messy. To avoid the case, this commit removes the messages. Link: https://lkml.kernel.org/r/20211201150440.1088-4-sj@kernel.org Fixes: 4bc05954d007 ("mm/damon: implement a debugfs-based user space interface") Fixes: b9a6ac4e4ede ("mm/damon: adaptively adjust regions") Signed-off-by: SeongJae Park Cc: Brendan Higgins Cc: kernel test robot Cc: Shuah Khan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/damon/core.c | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/mm/damon/core.c b/mm/damon/core.c index eefb2ada67ca..e92497895202 100644 --- a/mm/damon/core.c +++ b/mm/damon/core.c @@ -282,7 +282,6 @@ int damon_set_targets(struct damon_ctx *ctx, for (i = 0; i < nr_ids; i++) { t = damon_new_target(ids[i]); if (!t) { - pr_err("Failed to alloc damon_target\n"); /* The caller should do cleanup of the ids itself */ damon_for_each_target_safe(t, next, ctx) damon_destroy_target(t); @@ -312,16 +311,10 @@ int damon_set_attrs(struct damon_ctx *ctx, unsigned long sample_int, unsigned long aggr_int, unsigned long primitive_upd_int, unsigned long min_nr_reg, unsigned long max_nr_reg) { - if (min_nr_reg < 3) { - pr_err("min_nr_regions (%lu) must be at least 3\n", - min_nr_reg); + if (min_nr_reg < 3) return -EINVAL; - } - if (min_nr_reg > max_nr_reg) { - pr_err("invalid nr_regions. min (%lu) > max (%lu)\n", - min_nr_reg, max_nr_reg); + if (min_nr_reg > max_nr_reg) return -EINVAL; - } ctx->sample_interval = sample_int; ctx->aggr_interval = aggr_int; From 09e12289cc044afa484e70c0b379d579d52caf9a Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Fri, 10 Dec 2021 14:46:37 -0800 Subject: [PATCH 394/868] mm/damon/vaddr: remove an unnecessary warning message The DAMON virtual address space monitoring primitive prints a warning message for wrong DAMOS action. However, it is not essential as the code returns appropriate failure in the case. This commit removes the message to make the log clean. Link: https://lkml.kernel.org/r/20211201150440.1088-5-sj@kernel.org Fixes: 6dea8add4d28 ("mm/damon/vaddr: support DAMON-based Operation Schemes") Signed-off-by: SeongJae Park Reviewed-by: Muchun Song Cc: Brendan Higgins Cc: Shuah Khan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/damon/vaddr.c | 1 - 1 file changed, 1 deletion(-) diff --git a/mm/damon/vaddr.c b/mm/damon/vaddr.c index 47f47f60440e..20a9a9d69eb1 100644 --- a/mm/damon/vaddr.c +++ b/mm/damon/vaddr.c @@ -627,7 +627,6 @@ int damon_va_apply_scheme(struct damon_ctx *ctx, struct damon_target *t, case DAMOS_STAT: return 0; default: - pr_warn("Wrong action %d\n", scheme->action); return -EINVAL; } From 044cd9750fe010170f5dc812e4824d98f5ea928c Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Fri, 10 Dec 2021 14:46:40 -0800 Subject: [PATCH 395/868] mm/damon/vaddr-test: split a test function having >1024 bytes frame size On some configuration[1], 'damon_test_split_evenly()' kunit test function has >1024 bytes frame size, so below build warning is triggered: CC mm/damon/vaddr.o In file included from mm/damon/vaddr.c:672: mm/damon/vaddr-test.h: In function 'damon_test_split_evenly': mm/damon/vaddr-test.h:309:1: warning: the frame size of 1064 bytes is larger than 1024 bytes [-Wframe-larger-than=] 309 | } | ^ This commit fixes the warning by separating the common logic in the function. [1] https://lore.kernel.org/linux-mm/202111182146.OV3C4uGr-lkp@intel.com/ Link: https://lkml.kernel.org/r/20211201150440.1088-6-sj@kernel.org Fixes: 17ccae8bb5c9 ("mm/damon: add kunit tests") Signed-off-by: SeongJae Park Reported-by: kernel test robot Cc: Brendan Higgins Cc: Shuah Khan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/damon/vaddr-test.h | 93 ++++++++++++++++++++++--------------------- 1 file changed, 48 insertions(+), 45 deletions(-) diff --git a/mm/damon/vaddr-test.h b/mm/damon/vaddr-test.h index ecfd0b2ed222..3097ef9c662a 100644 --- a/mm/damon/vaddr-test.h +++ b/mm/damon/vaddr-test.h @@ -252,59 +252,62 @@ static void damon_test_apply_three_regions4(struct kunit *test) new_three_regions, expected, ARRAY_SIZE(expected)); } +static void damon_test_split_evenly_fail(struct kunit *test, + unsigned long start, unsigned long end, unsigned int nr_pieces) +{ + struct damon_target *t = damon_new_target(42); + struct damon_region *r = damon_new_region(start, end); + + damon_add_region(r, t); + KUNIT_EXPECT_EQ(test, + damon_va_evenly_split_region(t, r, nr_pieces), -EINVAL); + KUNIT_EXPECT_EQ(test, damon_nr_regions(t), 1u); + + damon_for_each_region(r, t) { + KUNIT_EXPECT_EQ(test, r->ar.start, start); + KUNIT_EXPECT_EQ(test, r->ar.end, end); + } + + damon_free_target(t); +} + +static void damon_test_split_evenly_succ(struct kunit *test, + unsigned long start, unsigned long end, unsigned int nr_pieces) +{ + struct damon_target *t = damon_new_target(42); + struct damon_region *r = damon_new_region(start, end); + unsigned long expected_width = (end - start) / nr_pieces; + unsigned long i = 0; + + damon_add_region(r, t); + KUNIT_EXPECT_EQ(test, + damon_va_evenly_split_region(t, r, nr_pieces), 0); + KUNIT_EXPECT_EQ(test, damon_nr_regions(t), nr_pieces); + + damon_for_each_region(r, t) { + if (i == nr_pieces - 1) + break; + KUNIT_EXPECT_EQ(test, + r->ar.start, start + i++ * expected_width); + KUNIT_EXPECT_EQ(test, r->ar.end, start + i * expected_width); + } + KUNIT_EXPECT_EQ(test, r->ar.start, start + i * expected_width); + KUNIT_EXPECT_EQ(test, r->ar.end, end); + damon_free_target(t); +} + static void damon_test_split_evenly(struct kunit *test) { struct damon_ctx *c = damon_new_ctx(); - struct damon_target *t; - struct damon_region *r; - unsigned long i; KUNIT_EXPECT_EQ(test, damon_va_evenly_split_region(NULL, NULL, 5), -EINVAL); - t = damon_new_target(42); - r = damon_new_region(0, 100); - KUNIT_EXPECT_EQ(test, damon_va_evenly_split_region(t, r, 0), -EINVAL); + damon_test_split_evenly_fail(test, 0, 100, 0); + damon_test_split_evenly_succ(test, 0, 100, 10); + damon_test_split_evenly_succ(test, 5, 59, 5); + damon_test_split_evenly_fail(test, 5, 6, 2); - damon_add_region(r, t); - KUNIT_EXPECT_EQ(test, damon_va_evenly_split_region(t, r, 10), 0); - KUNIT_EXPECT_EQ(test, damon_nr_regions(t), 10u); - - i = 0; - damon_for_each_region(r, t) { - KUNIT_EXPECT_EQ(test, r->ar.start, i++ * 10); - KUNIT_EXPECT_EQ(test, r->ar.end, i * 10); - } - damon_free_target(t); - - t = damon_new_target(42); - r = damon_new_region(5, 59); - damon_add_region(r, t); - KUNIT_EXPECT_EQ(test, damon_va_evenly_split_region(t, r, 5), 0); - KUNIT_EXPECT_EQ(test, damon_nr_regions(t), 5u); - - i = 0; - damon_for_each_region(r, t) { - if (i == 4) - break; - KUNIT_EXPECT_EQ(test, r->ar.start, 5 + 10 * i++); - KUNIT_EXPECT_EQ(test, r->ar.end, 5 + 10 * i); - } - KUNIT_EXPECT_EQ(test, r->ar.start, 5 + 10 * i); - KUNIT_EXPECT_EQ(test, r->ar.end, 59ul); - damon_free_target(t); - - t = damon_new_target(42); - r = damon_new_region(5, 6); - damon_add_region(r, t); - KUNIT_EXPECT_EQ(test, damon_va_evenly_split_region(t, r, 2), -EINVAL); - KUNIT_EXPECT_EQ(test, damon_nr_regions(t), 1u); - - damon_for_each_region(r, t) { - KUNIT_EXPECT_EQ(test, r->ar.start, 5ul); - KUNIT_EXPECT_EQ(test, r->ar.end, 6ul); - } - damon_free_target(t); damon_destroy_ctx(c); } From 9f86d624292c238203b3687cdb870a2cde1a6f9b Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Fri, 10 Dec 2021 14:46:43 -0800 Subject: [PATCH 396/868] mm/damon/vaddr-test: remove unnecessary variables A couple of test functions in DAMON virtual address space monitoring primitives implementation has unnecessary damon_ctx variables. This commit removes those. Link: https://lkml.kernel.org/r/20211201150440.1088-7-sj@kernel.org Signed-off-by: SeongJae Park Cc: Brendan Higgins Cc: Shuah Khan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/damon/vaddr-test.h | 8 -------- 1 file changed, 8 deletions(-) diff --git a/mm/damon/vaddr-test.h b/mm/damon/vaddr-test.h index 3097ef9c662a..6a1b9272ea12 100644 --- a/mm/damon/vaddr-test.h +++ b/mm/damon/vaddr-test.h @@ -135,7 +135,6 @@ static void damon_do_test_apply_three_regions(struct kunit *test, struct damon_addr_range *three_regions, unsigned long *expected, int nr_expected) { - struct damon_ctx *ctx = damon_new_ctx(); struct damon_target *t; struct damon_region *r; int i; @@ -145,7 +144,6 @@ static void damon_do_test_apply_three_regions(struct kunit *test, r = damon_new_region(regions[i * 2], regions[i * 2 + 1]); damon_add_region(r, t); } - damon_add_target(ctx, t); damon_va_apply_three_regions(t, three_regions); @@ -154,8 +152,6 @@ static void damon_do_test_apply_three_regions(struct kunit *test, KUNIT_EXPECT_EQ(test, r->ar.start, expected[i * 2]); KUNIT_EXPECT_EQ(test, r->ar.end, expected[i * 2 + 1]); } - - damon_destroy_ctx(ctx); } /* @@ -298,8 +294,6 @@ static void damon_test_split_evenly_succ(struct kunit *test, static void damon_test_split_evenly(struct kunit *test) { - struct damon_ctx *c = damon_new_ctx(); - KUNIT_EXPECT_EQ(test, damon_va_evenly_split_region(NULL, NULL, 5), -EINVAL); @@ -307,8 +301,6 @@ static void damon_test_split_evenly(struct kunit *test) damon_test_split_evenly_succ(test, 0, 100, 10); damon_test_split_evenly_succ(test, 5, 59, 5); damon_test_split_evenly_fail(test, 5, 6, 2); - - damon_destroy_ctx(c); } static struct kunit_case damon_test_cases[] = { From 964e17016cf99902c79a5de095cc5e57e7d58248 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Fri, 10 Dec 2021 14:46:46 -0800 Subject: [PATCH 397/868] selftests/damon: skip test if DAMON is running Testing the DAMON debugfs files while DAMON is running makes no sense, as any write to the debugfs files will fail. This commit makes the test be skipped in this case. Link: https://lkml.kernel.org/r/20211201150440.1088-8-sj@kernel.org Signed-off-by: SeongJae Park Cc: Brendan Higgins Cc: Shuah Khan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/testing/selftests/damon/debugfs_attrs.sh | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tools/testing/selftests/damon/debugfs_attrs.sh b/tools/testing/selftests/damon/debugfs_attrs.sh index 196b6640bf37..fc80380c59f0 100644 --- a/tools/testing/selftests/damon/debugfs_attrs.sh +++ b/tools/testing/selftests/damon/debugfs_attrs.sh @@ -44,6 +44,15 @@ test_content() { source ./_chk_dependency.sh +ksft_skip=4 + +damon_onoff="$DBGFS/monitor_on" +if [ $(cat "$damon_onoff") = "on" ] +then + echo "monitoring is on" + exit $ksft_skip +fi + # Test attrs file # =============== From c6980e30af356e85699f37142ae435a6aa483ceb Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Fri, 10 Dec 2021 14:46:49 -0800 Subject: [PATCH 398/868] selftests/damon: test DAMON enabling with empty target_ids case DAMON debugfs didn't check empty targets when starting monitoring, and the issue is fixed with commit b5ca3e83ddb0 ("mm/damon/dbgfs: add adaptive_targets list check before enable monitor_on"). To avoid future regression, this commit adds a test case for that in DAMON selftests. Link: https://lkml.kernel.org/r/20211201150440.1088-9-sj@kernel.org Signed-off-by: SeongJae Park Cc: Brendan Higgins Cc: Shuah Khan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/testing/selftests/damon/debugfs_attrs.sh | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tools/testing/selftests/damon/debugfs_attrs.sh b/tools/testing/selftests/damon/debugfs_attrs.sh index fc80380c59f0..d0916373f310 100644 --- a/tools/testing/selftests/damon/debugfs_attrs.sh +++ b/tools/testing/selftests/damon/debugfs_attrs.sh @@ -94,4 +94,13 @@ test_write_succ "$file" "" "$orig_content" "empty input" test_content "$file" "$orig_content" "" "empty input written" echo "$orig_content" > "$file" +# Test empty targets case +# ======================= + +orig_target_ids=$(cat "$DBGFS/target_ids") +echo "" > "$DBGFS/target_ids" +orig_monitor_on=$(cat "$DBGFS/monitor_on") +test_write_fail "$DBGFS/monitor_on" "on" "orig_monitor_on" "empty target ids" +echo "$orig_target_ids" > "$DBGFS/target_ids" + echo "PASS" From d85570c655cc2c257b7da37a6d1fa4c59443c055 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Fri, 10 Dec 2021 14:46:52 -0800 Subject: [PATCH 399/868] selftests/damon: test wrong DAMOS condition ranges input A patch titled "mm/damon/schemes: add the validity judgment of thresholds"[1] makes DAMON debugfs interface to validate DAMON scheme inputs. This commit adds a test case for the validation logic in DAMON selftests. [1] https://lore.kernel.org/linux-mm/d78360e52158d786fcbf20bc62c96785742e76d3.1637239568.git.xhao@linux.alibaba.com/ Link: https://lkml.kernel.org/r/20211201150440.1088-10-sj@kernel.org Signed-off-by: SeongJae Park Cc: Brendan Higgins Cc: Shuah Khan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/testing/selftests/damon/debugfs_attrs.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/testing/selftests/damon/debugfs_attrs.sh b/tools/testing/selftests/damon/debugfs_attrs.sh index d0916373f310..1ef118617167 100644 --- a/tools/testing/selftests/damon/debugfs_attrs.sh +++ b/tools/testing/selftests/damon/debugfs_attrs.sh @@ -77,6 +77,8 @@ test_write_succ "$file" "1 2 3 4 5 6 4 0 0 0 1 2 3 1 100 3 2 1" \ test_write_fail "$file" "1 2 3 4 5 6 3 0 0 0 1 2 3 1 100 3 2 1" "$orig_content" "multi lines" test_write_succ "$file" "" "$orig_content" "disabling" +test_write_fail "$file" "2 1 2 1 10 1 3 10 1 1 1 1 1 1 1 1 2 3" \ + "$orig_content" "wrong condition ranges" echo "$orig_content" > "$file" # Test target_ids file From b4a002889d24979295ed3c2bf1d5fcfb3901026a Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Fri, 10 Dec 2021 14:46:55 -0800 Subject: [PATCH 400/868] selftests/damon: test debugfs file reads/writes with huge count DAMON debugfs interface users were able to trigger warning by writing some files with arbitrarily large 'count' parameter. The issue is fixed with commit db7a347b26fe ("mm/damon/dbgfs: use '__GFP_NOWARN' for user-specified size buffer allocation"). This commit adds a test case for the issue in DAMON selftests to avoid future regressions. Link: https://lkml.kernel.org/r/20211201150440.1088-11-sj@kernel.org Signed-off-by: SeongJae Park Cc: Brendan Higgins Cc: Shuah Khan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/testing/selftests/damon/.gitignore | 2 + tools/testing/selftests/damon/Makefile | 2 + .../testing/selftests/damon/debugfs_attrs.sh | 18 +++++++++ .../selftests/damon/huge_count_read_write.c | 39 +++++++++++++++++++ 4 files changed, 61 insertions(+) create mode 100644 tools/testing/selftests/damon/.gitignore create mode 100644 tools/testing/selftests/damon/huge_count_read_write.c diff --git a/tools/testing/selftests/damon/.gitignore b/tools/testing/selftests/damon/.gitignore new file mode 100644 index 000000000000..c6c2965a6607 --- /dev/null +++ b/tools/testing/selftests/damon/.gitignore @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only +huge_count_read_write diff --git a/tools/testing/selftests/damon/Makefile b/tools/testing/selftests/damon/Makefile index 8a3f2cd9fec0..f0aa954b5d13 100644 --- a/tools/testing/selftests/damon/Makefile +++ b/tools/testing/selftests/damon/Makefile @@ -1,6 +1,8 @@ # SPDX-License-Identifier: GPL-2.0 # Makefile for damon selftests +TEST_GEN_FILES += huge_count_read_write + TEST_FILES = _chk_dependency.sh TEST_PROGS = debugfs_attrs.sh diff --git a/tools/testing/selftests/damon/debugfs_attrs.sh b/tools/testing/selftests/damon/debugfs_attrs.sh index 1ef118617167..23a7b48ca7d3 100644 --- a/tools/testing/selftests/damon/debugfs_attrs.sh +++ b/tools/testing/selftests/damon/debugfs_attrs.sh @@ -105,4 +105,22 @@ orig_monitor_on=$(cat "$DBGFS/monitor_on") test_write_fail "$DBGFS/monitor_on" "on" "orig_monitor_on" "empty target ids" echo "$orig_target_ids" > "$DBGFS/target_ids" +# Test huge count read write +# ========================== + +dmesg -C + +for file in "$DBGFS/"* +do + ./huge_count_read_write "$file" +done + +if dmesg | grep -q WARNING +then + dmesg + exit 1 +else + exit 0 +fi + echo "PASS" diff --git a/tools/testing/selftests/damon/huge_count_read_write.c b/tools/testing/selftests/damon/huge_count_read_write.c new file mode 100644 index 000000000000..ad7a6b4cf338 --- /dev/null +++ b/tools/testing/selftests/damon/huge_count_read_write.c @@ -0,0 +1,39 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Author: SeongJae Park + */ + +#include +#include +#include +#include + +void write_read_with_huge_count(char *file) +{ + int filedesc = open(file, O_RDWR); + char buf[25]; + int ret; + + printf("%s %s\n", __func__, file); + if (filedesc < 0) { + fprintf(stderr, "failed opening %s\n", file); + exit(1); + } + + write(filedesc, "", 0xfffffffful); + perror("after write: "); + ret = read(filedesc, buf, 0xfffffffful); + perror("after read: "); + close(filedesc); +} + +int main(int argc, char *argv[]) +{ + if (argc != 2) { + fprintf(stderr, "Usage: %s \n", argv[0]); + exit(1); + } + write_read_with_huge_count(argv[1]); + + return 0; +} From 9ab3b0c8ef629f60ef25cb7634ad305315ae94d1 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Fri, 10 Dec 2021 14:46:59 -0800 Subject: [PATCH 401/868] selftests/damon: split test cases Currently, the single test program, debugfs.sh, contains all test cases for DAMON. When one of the cases fails, finding which case is failed from the test log is not so easy, and all remaining tests will be skipped. To improve the situation, this commit splits the single program into small test programs having their own names. Link: https://lkml.kernel.org/r/20211201150440.1088-12-sj@kernel.org Signed-off-by: SeongJae Park Cc: Brendan Higgins Cc: Shuah Khan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/testing/selftests/damon/Makefile | 5 +- .../selftests/damon/_debugfs_common.sh | 52 ++++++++ .../testing/selftests/damon/debugfs_attrs.sh | 111 +----------------- .../selftests/damon/debugfs_empty_targets.sh | 13 ++ .../damon/debugfs_huge_count_read_write.sh | 22 ++++ .../selftests/damon/debugfs_schemes.sh | 19 +++ .../selftests/damon/debugfs_target_ids.sh | 19 +++ 7 files changed, 129 insertions(+), 112 deletions(-) create mode 100644 tools/testing/selftests/damon/_debugfs_common.sh create mode 100644 tools/testing/selftests/damon/debugfs_empty_targets.sh create mode 100644 tools/testing/selftests/damon/debugfs_huge_count_read_write.sh create mode 100644 tools/testing/selftests/damon/debugfs_schemes.sh create mode 100644 tools/testing/selftests/damon/debugfs_target_ids.sh diff --git a/tools/testing/selftests/damon/Makefile b/tools/testing/selftests/damon/Makefile index f0aa954b5d13..937d36ae9a69 100644 --- a/tools/testing/selftests/damon/Makefile +++ b/tools/testing/selftests/damon/Makefile @@ -3,7 +3,8 @@ TEST_GEN_FILES += huge_count_read_write -TEST_FILES = _chk_dependency.sh -TEST_PROGS = debugfs_attrs.sh +TEST_FILES = _chk_dependency.sh _debugfs_common.sh +TEST_PROGS = debugfs_attrs.sh debugfs_schemes.sh debugfs_target_ids.sh +TEST_PROGS += debugfs_empty_targets.sh debugfs_huge_count_read_write.sh include ../lib.mk diff --git a/tools/testing/selftests/damon/_debugfs_common.sh b/tools/testing/selftests/damon/_debugfs_common.sh new file mode 100644 index 000000000000..48989d4813ae --- /dev/null +++ b/tools/testing/selftests/damon/_debugfs_common.sh @@ -0,0 +1,52 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +test_write_result() { + file=$1 + content=$2 + orig_content=$3 + expect_reason=$4 + expected=$5 + + echo "$content" > "$file" + if [ $? -ne "$expected" ] + then + echo "writing $content to $file doesn't return $expected" + echo "expected because: $expect_reason" + echo "$orig_content" > "$file" + exit 1 + fi +} + +test_write_succ() { + test_write_result "$1" "$2" "$3" "$4" 0 +} + +test_write_fail() { + test_write_result "$1" "$2" "$3" "$4" 1 +} + +test_content() { + file=$1 + orig_content=$2 + expected=$3 + expect_reason=$4 + + content=$(cat "$file") + if [ "$content" != "$expected" ] + then + echo "reading $file expected $expected but $content" + echo "expected because: $expect_reason" + echo "$orig_content" > "$file" + exit 1 + fi +} + +source ./_chk_dependency.sh + +damon_onoff="$DBGFS/monitor_on" +if [ $(cat "$damon_onoff") = "on" ] +then + echo "monitoring is on" + exit $ksft_skip +fi diff --git a/tools/testing/selftests/damon/debugfs_attrs.sh b/tools/testing/selftests/damon/debugfs_attrs.sh index 23a7b48ca7d3..902e312bca89 100644 --- a/tools/testing/selftests/damon/debugfs_attrs.sh +++ b/tools/testing/selftests/damon/debugfs_attrs.sh @@ -1,57 +1,7 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 -test_write_result() { - file=$1 - content=$2 - orig_content=$3 - expect_reason=$4 - expected=$5 - - echo "$content" > "$file" - if [ $? -ne "$expected" ] - then - echo "writing $content to $file doesn't return $expected" - echo "expected because: $expect_reason" - echo "$orig_content" > "$file" - exit 1 - fi -} - -test_write_succ() { - test_write_result "$1" "$2" "$3" "$4" 0 -} - -test_write_fail() { - test_write_result "$1" "$2" "$3" "$4" 1 -} - -test_content() { - file=$1 - orig_content=$2 - expected=$3 - expect_reason=$4 - - content=$(cat "$file") - if [ "$content" != "$expected" ] - then - echo "reading $file expected $expected but $content" - echo "expected because: $expect_reason" - echo "$orig_content" > "$file" - exit 1 - fi -} - -source ./_chk_dependency.sh - -ksft_skip=4 - -damon_onoff="$DBGFS/monitor_on" -if [ $(cat "$damon_onoff") = "on" ] -then - echo "monitoring is on" - exit $ksft_skip -fi +source _debugfs_common.sh # Test attrs file # =============== @@ -65,62 +15,3 @@ test_write_fail "$file" "1 2 3 5 4" "$orig_content" \ "min_nr_regions > max_nr_regions" test_content "$file" "$orig_content" "1 2 3 4 5" "successfully written" echo "$orig_content" > "$file" - -# Test schemes file -# ================= - -file="$DBGFS/schemes" -orig_content=$(cat "$file") - -test_write_succ "$file" "1 2 3 4 5 6 4 0 0 0 1 2 3 1 100 3 2 1" \ - "$orig_content" "valid input" -test_write_fail "$file" "1 2 -3 4 5 6 3 0 0 0 1 2 3 1 100 3 2 1" "$orig_content" "multi lines" -test_write_succ "$file" "" "$orig_content" "disabling" -test_write_fail "$file" "2 1 2 1 10 1 3 10 1 1 1 1 1 1 1 1 2 3" \ - "$orig_content" "wrong condition ranges" -echo "$orig_content" > "$file" - -# Test target_ids file -# ==================== - -file="$DBGFS/target_ids" -orig_content=$(cat "$file") - -test_write_succ "$file" "1 2 3 4" "$orig_content" "valid input" -test_write_succ "$file" "1 2 abc 4" "$orig_content" "still valid input" -test_content "$file" "$orig_content" "1 2" "non-integer was there" -test_write_succ "$file" "abc 2 3" "$orig_content" "the file allows wrong input" -test_content "$file" "$orig_content" "" "wrong input written" -test_write_succ "$file" "" "$orig_content" "empty input" -test_content "$file" "$orig_content" "" "empty input written" -echo "$orig_content" > "$file" - -# Test empty targets case -# ======================= - -orig_target_ids=$(cat "$DBGFS/target_ids") -echo "" > "$DBGFS/target_ids" -orig_monitor_on=$(cat "$DBGFS/monitor_on") -test_write_fail "$DBGFS/monitor_on" "on" "orig_monitor_on" "empty target ids" -echo "$orig_target_ids" > "$DBGFS/target_ids" - -# Test huge count read write -# ========================== - -dmesg -C - -for file in "$DBGFS/"* -do - ./huge_count_read_write "$file" -done - -if dmesg | grep -q WARNING -then - dmesg - exit 1 -else - exit 0 -fi - -echo "PASS" diff --git a/tools/testing/selftests/damon/debugfs_empty_targets.sh b/tools/testing/selftests/damon/debugfs_empty_targets.sh new file mode 100644 index 000000000000..87aff8083822 --- /dev/null +++ b/tools/testing/selftests/damon/debugfs_empty_targets.sh @@ -0,0 +1,13 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source _debugfs_common.sh + +# Test empty targets case +# ======================= + +orig_target_ids=$(cat "$DBGFS/target_ids") +echo "" > "$DBGFS/target_ids" +orig_monitor_on=$(cat "$DBGFS/monitor_on") +test_write_fail "$DBGFS/monitor_on" "on" "orig_monitor_on" "empty target ids" +echo "$orig_target_ids" > "$DBGFS/target_ids" diff --git a/tools/testing/selftests/damon/debugfs_huge_count_read_write.sh b/tools/testing/selftests/damon/debugfs_huge_count_read_write.sh new file mode 100644 index 000000000000..922cadac2950 --- /dev/null +++ b/tools/testing/selftests/damon/debugfs_huge_count_read_write.sh @@ -0,0 +1,22 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source _debugfs_common.sh + +# Test huge count read write +# ========================== + +dmesg -C + +for file in "$DBGFS/"* +do + ./huge_count_read_write "$file" +done + +if dmesg | grep -q WARNING +then + dmesg + exit 1 +else + exit 0 +fi diff --git a/tools/testing/selftests/damon/debugfs_schemes.sh b/tools/testing/selftests/damon/debugfs_schemes.sh new file mode 100644 index 000000000000..5b39ab44731c --- /dev/null +++ b/tools/testing/selftests/damon/debugfs_schemes.sh @@ -0,0 +1,19 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source _debugfs_common.sh + +# Test schemes file +# ================= + +file="$DBGFS/schemes" +orig_content=$(cat "$file") + +test_write_succ "$file" "1 2 3 4 5 6 4 0 0 0 1 2 3 1 100 3 2 1" \ + "$orig_content" "valid input" +test_write_fail "$file" "1 2 +3 4 5 6 3 0 0 0 1 2 3 1 100 3 2 1" "$orig_content" "multi lines" +test_write_succ "$file" "" "$orig_content" "disabling" +test_write_fail "$file" "2 1 2 1 10 1 3 10 1 1 1 1 1 1 1 1 2 3" \ + "$orig_content" "wrong condition ranges" +echo "$orig_content" > "$file" diff --git a/tools/testing/selftests/damon/debugfs_target_ids.sh b/tools/testing/selftests/damon/debugfs_target_ids.sh new file mode 100644 index 000000000000..49aeabdb0aae --- /dev/null +++ b/tools/testing/selftests/damon/debugfs_target_ids.sh @@ -0,0 +1,19 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source _debugfs_common.sh + +# Test target_ids file +# ==================== + +file="$DBGFS/target_ids" +orig_content=$(cat "$file") + +test_write_succ "$file" "1 2 3 4" "$orig_content" "valid input" +test_write_succ "$file" "1 2 abc 4" "$orig_content" "still valid input" +test_content "$file" "$orig_content" "1 2" "non-integer was there" +test_write_succ "$file" "abc 2 3" "$orig_content" "the file allows wrong input" +test_content "$file" "$orig_content" "" "wrong input written" +test_write_succ "$file" "" "$orig_content" "empty input" +test_content "$file" "$orig_content" "" "empty input written" +echo "$orig_content" > "$file" From 005a79e5c254c3f60ec269a459cc41b55028c798 Mon Sep 17 00:00:00 2001 From: Gerald Schaefer Date: Fri, 10 Dec 2021 14:47:02 -0800 Subject: [PATCH 402/868] mm/slub: fix endianness bug for alloc/free_traces attributes On big-endian s390, the alloc/free_traces attributes produce endless output, because of always 0 idx in slab_debugfs_show(). idx is de-referenced from *v, which points to a loff_t value, with unsigned int idx = *(unsigned int *)v; This will only give the upper 32 bits on big-endian, which remain 0. Instead of only fixing this de-reference, during discussion it seemed more appropriate to change the seq_ops so that they use an explicit iterator in private loc_track struct. This patch adds idx to loc_track, which will also fix the endianness bug. Link: https://lore.kernel.org/r/20211117193932.4049412-1-gerald.schaefer@linux.ibm.com Link: https://lkml.kernel.org/r/20211126171848.17534-1-gerald.schaefer@linux.ibm.com Fixes: 64dd68497be7 ("mm: slub: move sysfs slab alloc/free interfaces to debugfs") Signed-off-by: Gerald Schaefer Reported-by: Steffen Maier Acked-by: Vlastimil Babka Cc: Faiyaz Mohammed Cc: Greg Kroah-Hartman Cc: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/slub.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index a8626825a829..abe7db581d68 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -5081,6 +5081,7 @@ struct loc_track { unsigned long max; unsigned long count; struct location *loc; + loff_t idx; }; static struct dentry *slab_debugfs_root; @@ -6052,11 +6053,11 @@ __initcall(slab_sysfs_init); #if defined(CONFIG_SLUB_DEBUG) && defined(CONFIG_DEBUG_FS) static int slab_debugfs_show(struct seq_file *seq, void *v) { - - struct location *l; - unsigned int idx = *(unsigned int *)v; struct loc_track *t = seq->private; + struct location *l; + unsigned long idx; + idx = (unsigned long) t->idx; if (idx < t->count) { l = &t->loc[idx]; @@ -6105,16 +6106,18 @@ static void *slab_debugfs_next(struct seq_file *seq, void *v, loff_t *ppos) { struct loc_track *t = seq->private; - v = ppos; - ++*ppos; + t->idx = ++(*ppos); if (*ppos <= t->count) - return v; + return ppos; return NULL; } static void *slab_debugfs_start(struct seq_file *seq, loff_t *ppos) { + struct loc_track *t = seq->private; + + t->idx = *ppos; return ppos; } From a7ebf564de325e1d7d52cd85b12721c424338bcc Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Fri, 10 Dec 2021 14:47:05 -0800 Subject: [PATCH 403/868] mm/memcg: relocate mod_objcg_mlstate(), get_obj_stock() and put_obj_stock() All the calls to mod_objcg_mlstate(), get_obj_stock() and put_obj_stock() are done by functions defined within the same "#ifdef CONFIG_MEMCG_KMEM" compilation block. When CONFIG_MEMCG_KMEM isn't defined, the following compilation warnings will be issued [1] and [2]. mm/memcontrol.c:785:20: warning: unused function 'mod_objcg_mlstate' mm/memcontrol.c:2113:33: warning: unused function 'get_obj_stock' Fix these warning by moving those functions to under the same CONFIG_MEMCG_KMEM compilation block. There is no functional change. [1] https://lore.kernel.org/lkml/202111272014.WOYNLUV6-lkp@intel.com/ [2] https://lore.kernel.org/lkml/202111280551.LXsWYt1T-lkp@intel.com/ Link: https://lkml.kernel.org/r/20211129161140.306488-1-longman@redhat.com Fixes: 559271146efc ("mm/memcg: optimize user context object stock access") Fixes: 68ac5b3c8db2 ("mm/memcg: cache vmstat data in percpu memcg_stock_pcp") Signed-off-by: Waiman Long Reported-by: kernel test robot Reviewed-by: Shakeel Butt Acked-by: Roman Gushchin Reviewed-by: Muchun Song Cc: Johannes Weiner Cc: Michal Hocko Cc: Vladimir Davydov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memcontrol.c | 106 ++++++++++++++++++++++++------------------------ 1 file changed, 53 insertions(+), 53 deletions(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 6863a834ed42..2ed5f2a0879d 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -776,24 +776,6 @@ void __mod_lruvec_kmem_state(void *p, enum node_stat_item idx, int val) rcu_read_unlock(); } -/* - * mod_objcg_mlstate() may be called with irq enabled, so - * mod_memcg_lruvec_state() should be used. - */ -static inline void mod_objcg_mlstate(struct obj_cgroup *objcg, - struct pglist_data *pgdat, - enum node_stat_item idx, int nr) -{ - struct mem_cgroup *memcg; - struct lruvec *lruvec; - - rcu_read_lock(); - memcg = obj_cgroup_memcg(objcg); - lruvec = mem_cgroup_lruvec(memcg, pgdat); - mod_memcg_lruvec_state(lruvec, idx, nr); - rcu_read_unlock(); -} - /** * __count_memcg_events - account VM events in a cgroup * @memcg: the memory cgroup @@ -2137,41 +2119,6 @@ static bool obj_stock_flush_required(struct memcg_stock_pcp *stock, } #endif -/* - * Most kmem_cache_alloc() calls are from user context. The irq disable/enable - * sequence used in this case to access content from object stock is slow. - * To optimize for user context access, there are now two object stocks for - * task context and interrupt context access respectively. - * - * The task context object stock can be accessed by disabling preemption only - * which is cheap in non-preempt kernel. The interrupt context object stock - * can only be accessed after disabling interrupt. User context code can - * access interrupt object stock, but not vice versa. - */ -static inline struct obj_stock *get_obj_stock(unsigned long *pflags) -{ - struct memcg_stock_pcp *stock; - - if (likely(in_task())) { - *pflags = 0UL; - preempt_disable(); - stock = this_cpu_ptr(&memcg_stock); - return &stock->task_obj; - } - - local_irq_save(*pflags); - stock = this_cpu_ptr(&memcg_stock); - return &stock->irq_obj; -} - -static inline void put_obj_stock(unsigned long flags) -{ - if (likely(in_task())) - preempt_enable(); - else - local_irq_restore(flags); -} - /** * consume_stock: Try to consume stocked charge on this cpu. * @memcg: memcg to consume from. @@ -2816,6 +2763,59 @@ retry: */ #define OBJCGS_CLEAR_MASK (__GFP_DMA | __GFP_RECLAIMABLE | __GFP_ACCOUNT) +/* + * Most kmem_cache_alloc() calls are from user context. The irq disable/enable + * sequence used in this case to access content from object stock is slow. + * To optimize for user context access, there are now two object stocks for + * task context and interrupt context access respectively. + * + * The task context object stock can be accessed by disabling preemption only + * which is cheap in non-preempt kernel. The interrupt context object stock + * can only be accessed after disabling interrupt. User context code can + * access interrupt object stock, but not vice versa. + */ +static inline struct obj_stock *get_obj_stock(unsigned long *pflags) +{ + struct memcg_stock_pcp *stock; + + if (likely(in_task())) { + *pflags = 0UL; + preempt_disable(); + stock = this_cpu_ptr(&memcg_stock); + return &stock->task_obj; + } + + local_irq_save(*pflags); + stock = this_cpu_ptr(&memcg_stock); + return &stock->irq_obj; +} + +static inline void put_obj_stock(unsigned long flags) +{ + if (likely(in_task())) + preempt_enable(); + else + local_irq_restore(flags); +} + +/* + * mod_objcg_mlstate() may be called with irq enabled, so + * mod_memcg_lruvec_state() should be used. + */ +static inline void mod_objcg_mlstate(struct obj_cgroup *objcg, + struct pglist_data *pgdat, + enum node_stat_item idx, int nr) +{ + struct mem_cgroup *memcg; + struct lruvec *lruvec; + + rcu_read_lock(); + memcg = obj_cgroup_memcg(objcg); + lruvec = mem_cgroup_lruvec(memcg, pgdat); + mod_memcg_lruvec_state(lruvec, idx, nr); + rcu_read_unlock(); +} + int memcg_alloc_page_obj_cgroups(struct page *page, struct kmem_cache *s, gfp_t gfp, bool new_page) { From 4178158ef8cadeb0ee86639749ce2b33ad75f770 Mon Sep 17 00:00:00 2001 From: Zhenguo Yao Date: Fri, 10 Dec 2021 14:47:08 -0800 Subject: [PATCH 404/868] hugetlbfs: fix issue of preallocation of gigantic pages can't work Preallocation of gigantic pages can't work bacause of commit b5389086ad7b ("hugetlbfs: extend the definition of hugepages parameter to support node allocation"). When nid is NUMA_NO_NODE(-1), alloc_bootmem_huge_page will always return without doing allocation. Fix this by adding more check. Link: https://lkml.kernel.org/r/20211129133803.15653-1-yaozhenguo1@gmail.com Fixes: b5389086ad7b ("hugetlbfs: extend the definition of hugepages parameter to support node allocation") Signed-off-by: Zhenguo Yao Reviewed-by: Mike Kravetz Tested-by: Maxim Levitsky Reviewed-by: Muchun Song Reviewed-by: Baolin Wang Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/hugetlb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index abcd1785c629..a1baa198519a 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -2973,7 +2973,7 @@ int __alloc_bootmem_huge_page(struct hstate *h, int nid) struct huge_bootmem_page *m = NULL; /* initialize for clang */ int nr_nodes, node; - if (nid >= nr_online_nodes) + if (nid != NUMA_NO_NODE && nid >= nr_online_nodes) return 0; /* do node specific alloc */ if (nid != NUMA_NO_NODE) { From 3c376dfafbf7a8ea0dea212d095ddd83e93280bb Mon Sep 17 00:00:00 2001 From: Manjong Lee Date: Fri, 10 Dec 2021 14:47:11 -0800 Subject: [PATCH 405/868] mm: bdi: initialize bdi_min_ratio when bdi is unregistered Initialize min_ratio if it is set during bdi unregistration. This can prevent problems that may occur a when bdi is removed without resetting min_ratio. For example. 1) insert external sdcard 2) set external sdcard's min_ratio 70 3) remove external sdcard without setting min_ratio 0 4) insert external sdcard 5) set external sdcard's min_ratio 70 << error occur(can't set) Because when an sdcard is removed, the present bdi_min_ratio value will remain. Currently, the only way to reset bdi_min_ratio is to reboot. [akpm@linux-foundation.org: tweak comment and coding style] Link: https://lkml.kernel.org/r/20211021161942.5983-1-mj0123.lee@samsung.com Signed-off-by: Manjong Lee Acked-by: Peter Zijlstra (Intel) Cc: Changheun Lee Cc: Jens Axboe Cc: Christoph Hellwig Cc: Matthew Wilcox Cc: Cc: Cc: Cc: Cc: Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/backing-dev.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 1eead4761011..eae96dfe0261 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -945,6 +945,13 @@ void bdi_unregister(struct backing_dev_info *bdi) wb_shutdown(&bdi->wb); cgwb_bdi_unregister(bdi); + /* + * If this BDI's min ratio has been set, use bdi_set_min_ratio() to + * update the global bdi_min_ratio. + */ + if (bdi->min_ratio) + bdi_set_min_ratio(bdi, 0); + if (bdi->dev) { bdi_debug_unregister(bdi); device_unregister(bdi->dev); From bcd0f93353326954817a4f9fa55ec57fb38acbb0 Mon Sep 17 00:00:00 2001 From: Hangyu Hua Date: Thu, 9 Dec 2021 16:28:39 +0800 Subject: [PATCH 406/868] phonet: refcount leak in pep_sock_accep sock_hold(sk) is invoked in pep_sock_accept(), but __sock_put(sk) is not invoked in subsequent failure branches(pep_accept_conn() != 0). Signed-off-by: Hangyu Hua Link: https://lore.kernel.org/r/20211209082839.33985-1-hbh25y@gmail.com Signed-off-by: Jakub Kicinski --- net/phonet/pep.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/phonet/pep.c b/net/phonet/pep.c index a1525916885a..b4f90afb0638 100644 --- a/net/phonet/pep.c +++ b/net/phonet/pep.c @@ -868,6 +868,7 @@ static struct sock *pep_sock_accept(struct sock *sk, int flags, int *errp, err = pep_accept_conn(newsk, skb); if (err) { + __sock_put(sk); sock_put(newsk); newsk = NULL; goto drop; From 71ddeac8cd1d217744a0e060ff520e147c9328d1 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 9 Dec 2021 10:50:58 -0800 Subject: [PATCH 407/868] inet_diag: fix kernel-infoleak for UDP sockets KMSAN reported a kernel-infoleak [1], that can exploited by unpriv users. After analysis it turned out UDP was not initializing r->idiag_expires. Other users of inet_sk_diag_fill() might make the same mistake in the future, so fix this in inet_sk_diag_fill(). [1] BUG: KMSAN: kernel-infoleak in instrument_copy_to_user include/linux/instrumented.h:121 [inline] BUG: KMSAN: kernel-infoleak in copyout lib/iov_iter.c:156 [inline] BUG: KMSAN: kernel-infoleak in _copy_to_iter+0x69d/0x25c0 lib/iov_iter.c:670 instrument_copy_to_user include/linux/instrumented.h:121 [inline] copyout lib/iov_iter.c:156 [inline] _copy_to_iter+0x69d/0x25c0 lib/iov_iter.c:670 copy_to_iter include/linux/uio.h:155 [inline] simple_copy_to_iter+0xf3/0x140 net/core/datagram.c:519 __skb_datagram_iter+0x2cb/0x1280 net/core/datagram.c:425 skb_copy_datagram_iter+0xdc/0x270 net/core/datagram.c:533 skb_copy_datagram_msg include/linux/skbuff.h:3657 [inline] netlink_recvmsg+0x660/0x1c60 net/netlink/af_netlink.c:1974 sock_recvmsg_nosec net/socket.c:944 [inline] sock_recvmsg net/socket.c:962 [inline] sock_read_iter+0x5a9/0x630 net/socket.c:1035 call_read_iter include/linux/fs.h:2156 [inline] new_sync_read fs/read_write.c:400 [inline] vfs_read+0x1631/0x1980 fs/read_write.c:481 ksys_read+0x28c/0x520 fs/read_write.c:619 __do_sys_read fs/read_write.c:629 [inline] __se_sys_read fs/read_write.c:627 [inline] __x64_sys_read+0xdb/0x120 fs/read_write.c:627 do_syscall_x64 arch/x86/entry/common.c:51 [inline] do_syscall_64+0x54/0xd0 arch/x86/entry/common.c:82 entry_SYSCALL_64_after_hwframe+0x44/0xae Uninit was created at: slab_post_alloc_hook mm/slab.h:524 [inline] slab_alloc_node mm/slub.c:3251 [inline] __kmalloc_node_track_caller+0xe0c/0x1510 mm/slub.c:4974 kmalloc_reserve net/core/skbuff.c:354 [inline] __alloc_skb+0x545/0xf90 net/core/skbuff.c:426 alloc_skb include/linux/skbuff.h:1126 [inline] netlink_dump+0x3d5/0x16a0 net/netlink/af_netlink.c:2245 __netlink_dump_start+0xd1c/0xee0 net/netlink/af_netlink.c:2370 netlink_dump_start include/linux/netlink.h:254 [inline] inet_diag_handler_cmd+0x2e7/0x400 net/ipv4/inet_diag.c:1343 sock_diag_rcv_msg+0x24a/0x620 netlink_rcv_skb+0x447/0x800 net/netlink/af_netlink.c:2491 sock_diag_rcv+0x63/0x80 net/core/sock_diag.c:276 netlink_unicast_kernel net/netlink/af_netlink.c:1319 [inline] netlink_unicast+0x1095/0x1360 net/netlink/af_netlink.c:1345 netlink_sendmsg+0x16f3/0x1870 net/netlink/af_netlink.c:1916 sock_sendmsg_nosec net/socket.c:704 [inline] sock_sendmsg net/socket.c:724 [inline] sock_write_iter+0x594/0x690 net/socket.c:1057 do_iter_readv_writev+0xa7f/0xc70 do_iter_write+0x52c/0x1500 fs/read_write.c:851 vfs_writev fs/read_write.c:924 [inline] do_writev+0x63f/0xe30 fs/read_write.c:967 __do_sys_writev fs/read_write.c:1040 [inline] __se_sys_writev fs/read_write.c:1037 [inline] __x64_sys_writev+0xe5/0x120 fs/read_write.c:1037 do_syscall_x64 arch/x86/entry/common.c:51 [inline] do_syscall_64+0x54/0xd0 arch/x86/entry/common.c:82 entry_SYSCALL_64_after_hwframe+0x44/0xae Bytes 68-71 of 312 are uninitialized Memory access of size 312 starts at ffff88812ab54000 Data copied to user address 0000000020001440 CPU: 1 PID: 6365 Comm: syz-executor801 Not tainted 5.16.0-rc3-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Fixes: 3c4d05c80567 ("inet_diag: Introduce the inet socket dumping routine") Signed-off-by: Eric Dumazet Reported-by: syzbot Link: https://lore.kernel.org/r/20211209185058.53917-1-eric.dumazet@gmail.com Signed-off-by: Jakub Kicinski --- net/ipv4/inet_diag.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index c8fa6e7f7d12..581b5b2d72a5 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -261,6 +261,7 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, r->idiag_state = sk->sk_state; r->idiag_timer = 0; r->idiag_retrans = 0; + r->idiag_expires = 0; if (inet_diag_msg_attrs_fill(sk, skb, r, ext, sk_user_ns(NETLINK_CB(cb->skb).sk), @@ -314,9 +315,6 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, r->idiag_retrans = icsk->icsk_probes_out; r->idiag_expires = jiffies_delta_to_msecs(sk->sk_timer.expires - jiffies); - } else { - r->idiag_timer = 0; - r->idiag_expires = 0; } if ((ext & (1 << (INET_DIAG_INFO - 1))) && handler->idiag_info_size) { From 94f2a444f28a649926c410eb9a38afb13a83ebe0 Mon Sep 17 00:00:00 2001 From: Daniele Palmas Date: Fri, 10 Dec 2021 10:57:22 +0100 Subject: [PATCH 408/868] net: usb: qmi_wwan: add Telit 0x1070 composition MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add the following Telit FN990 composition: 0x1070: tty, adb, rmnet, tty, tty, tty, tty Signed-off-by: Daniele Palmas Acked-by: Bjørn Mork Link: https://lore.kernel.org/r/20211210095722.22269-1-dnlplm@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/usb/qmi_wwan.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index 86b814e99224..f510e8219470 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -1358,6 +1358,7 @@ static const struct usb_device_id products[] = { {QMI_QUIRK_SET_DTR(0x1bc7, 0x1040, 2)}, /* Telit LE922A */ {QMI_QUIRK_SET_DTR(0x1bc7, 0x1050, 2)}, /* Telit FN980 */ {QMI_QUIRK_SET_DTR(0x1bc7, 0x1060, 2)}, /* Telit LN920 */ + {QMI_QUIRK_SET_DTR(0x1bc7, 0x1070, 2)}, /* Telit FN990 */ {QMI_FIXED_INTF(0x1bc7, 0x1100, 3)}, /* Telit ME910 */ {QMI_FIXED_INTF(0x1bc7, 0x1101, 3)}, /* Telit ME910 dual modem */ {QMI_FIXED_INTF(0x1bc7, 0x1200, 5)}, /* Telit LE920 */ From ee60e626d536da4c710b3634afe68fe7c6d69b59 Mon Sep 17 00:00:00 2001 From: Filip Pokryvka Date: Fri, 10 Dec 2021 18:50:32 +0100 Subject: [PATCH 409/868] netdevsim: don't overwrite read only ethtool parms Ethtool ring feature has _max_pending attributes read-only. Set only read-write attributes in nsim_set_ringparam. This patch is useful, if netdevsim device is set-up using NetworkManager, because NetworkManager sends 0 as MAX values, as it is pointless to retrieve them in extra call, because they should be read-only. Then, the device is left in incosistent state (value > MAX). Fixes: a7fc6db099b5 ("netdevsim: support ethtool ring and coalesce settings") Signed-off-by: Filip Pokryvka Link: https://lore.kernel.org/r/20211210175032.411872-1-fpokryvk@redhat.com Signed-off-by: Jakub Kicinski --- drivers/net/netdevsim/ethtool.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/netdevsim/ethtool.c b/drivers/net/netdevsim/ethtool.c index 0ab6a40be611..a6a713b31aad 100644 --- a/drivers/net/netdevsim/ethtool.c +++ b/drivers/net/netdevsim/ethtool.c @@ -77,7 +77,10 @@ static int nsim_set_ringparam(struct net_device *dev, { struct netdevsim *ns = netdev_priv(dev); - memcpy(&ns->ethtool.ring, ring, sizeof(ns->ethtool.ring)); + ns->ethtool.ring.rx_pending = ring->rx_pending; + ns->ethtool.ring.rx_jumbo_pending = ring->rx_jumbo_pending; + ns->ethtool.ring.rx_mini_pending = ring->rx_mini_pending; + ns->ethtool.ring.tx_pending = ring->tx_pending; return 0; } From c897899752478d4c905c56f2b54b99ba82b34e13 Mon Sep 17 00:00:00 2001 From: German Gomez Date: Wed, 1 Dec 2021 12:33:29 +0000 Subject: [PATCH 410/868] perf tools: Prevent out-of-bounds access to registers The size of the cache of register values is arch-dependant (PERF_REGS_MAX). This has the potential of causing an out-of-bounds access in the function "perf_reg_value" if the local architecture contains less registers than the one the perf.data file was recorded on. Since the maximum number of registers is bound by the bitmask "u64 cache_mask", and the size of the cache when running under x86 systems is 64 already, fix the size to 64 and add a range-check to the function "perf_reg_value" to prevent out-of-bounds access. Reported-by: Alexandre Truong Reviewed-by: Kajol Jain Signed-off-by: German Gomez Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: John Garry Cc: Leo Yan Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Cc: linux-csky@vger.kernel.org Cc: linux-riscv@lists.infradead.org Link: https://lore.kernel.org/r/20211201123334.679131-2-german.gomez@arm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/event.h | 5 ++++- tools/perf/util/perf_regs.c | 3 +++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index 95ffed66369c..c59331eea1d9 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -44,13 +44,16 @@ struct perf_event_attr; /* perf sample has 16 bits size limit */ #define PERF_SAMPLE_MAX_SIZE (1 << 16) +/* number of register is bound by the number of bits in regs_dump::mask (64) */ +#define PERF_SAMPLE_REGS_CACHE_SIZE (8 * sizeof(u64)) + struct regs_dump { u64 abi; u64 mask; u64 *regs; /* Cached values/mask filled by first register access. */ - u64 cache_regs[PERF_REGS_MAX]; + u64 cache_regs[PERF_SAMPLE_REGS_CACHE_SIZE]; u64 cache_mask; }; diff --git a/tools/perf/util/perf_regs.c b/tools/perf/util/perf_regs.c index 5ee47ae1509c..06a7461ba864 100644 --- a/tools/perf/util/perf_regs.c +++ b/tools/perf/util/perf_regs.c @@ -25,6 +25,9 @@ int perf_reg_value(u64 *valp, struct regs_dump *regs, int id) int i, idx = 0; u64 mask = regs->mask; + if ((u64)id >= PERF_SAMPLE_REGS_CACHE_SIZE) + return -EINVAL; + if (regs->cache_mask & (1ULL << id)) goto out; From 057ae59f5a1d924511beb1b09f395bdb316cfd03 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 10 Dec 2021 18:22:57 +0200 Subject: [PATCH 411/868] perf intel-pt: Fix some PGE (packet generation enable/control flow packets) usage Packet generation enable (PGE) refers to whether control flow (COFI) packets are being produced. PGE may be false even when branch-tracing is enabled, due to being out-of-context, or outside a filter address range. Fix some missing PGE usage. Fixes: 7c1b16ba0e26e6 ("perf intel-pt: Add support for decoding FUP/TIP only") Fixes: 839598176b0554 ("perf intel-pt: Allow decoding with branch tracing disabled") Signed-off-by: Adrian Hunter Cc: Jiri Olsa Cc: stable@vger.kernel.org # v5.15+ Link: https://lore.kernel.org/r/20211210162303.2288710-2-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/intel-pt-decoder/intel-pt-decoder.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c index 5f83937bf8f3..6f6f163161a9 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c @@ -2678,6 +2678,7 @@ static int intel_pt_hop_trace(struct intel_pt_decoder *decoder, bool *no_tip, in return HOP_IGNORE; case INTEL_PT_TIP_PGD: + decoder->pge = false; if (!decoder->packet.count) { intel_pt_set_nr(decoder); return HOP_IGNORE; @@ -2707,7 +2708,7 @@ static int intel_pt_hop_trace(struct intel_pt_decoder *decoder, bool *no_tip, in intel_pt_set_ip(decoder); if (intel_pt_fup_event(decoder)) return HOP_RETURN; - if (!decoder->branch_enable) + if (!decoder->branch_enable || !decoder->pge) *no_tip = true; if (*no_tip) { decoder->state.type = INTEL_PT_INSTRUCTION; @@ -2897,7 +2898,7 @@ static bool intel_pt_psb_with_fup(struct intel_pt_decoder *decoder, int *err) { struct intel_pt_psb_info data = { .fup = false }; - if (!decoder->branch_enable || !decoder->pge) + if (!decoder->branch_enable) return false; intel_pt_pkt_lookahead(decoder, intel_pt_psb_lookahead_cb, &data); @@ -2999,7 +3000,7 @@ next: break; } intel_pt_set_last_ip(decoder); - if (!decoder->branch_enable) { + if (!decoder->branch_enable || !decoder->pge) { decoder->ip = decoder->last_ip; if (intel_pt_fup_event(decoder)) return 0; From ad106a26aef3a95ac7ca88d033b431661ba346ce Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 10 Dec 2021 18:22:58 +0200 Subject: [PATCH 412/868] perf intel-pt: Fix sync state when a PSB (synchronization) packet is found When syncing, it may be that branch packet generation is not enabled at that point, in which case there will not immediately be a control-flow packet, so some packets before a control flow packet turns up, get ignored. However, the decoder is in sync as soon as a PSB is found, so the state should be set accordingly. Fixes: f4aa081949e7b6 ("perf tools: Add Intel PT decoder") Signed-off-by: Adrian Hunter Cc: Jiri Olsa Cc: stable@vger.kernel.org # v5.15+ Link: https://lore.kernel.org/r/20211210162303.2288710-3-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/intel-pt-decoder/intel-pt-decoder.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c index 6f6f163161a9..bddf98123dc3 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c @@ -3608,7 +3608,7 @@ static int intel_pt_sync(struct intel_pt_decoder *decoder) } decoder->have_last_ip = true; - decoder->pkt_state = INTEL_PT_STATE_NO_IP; + decoder->pkt_state = INTEL_PT_STATE_IN_SYNC; err = intel_pt_walk_psb(decoder); if (err) From 4c761d805bb2d2ead1b9baaba75496152b394c80 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 10 Dec 2021 18:22:59 +0200 Subject: [PATCH 413/868] perf intel-pt: Fix intel_pt_fup_event() assumptions about setting state type intel_pt_fup_event() assumes it can overwrite the state type if there has been an FUP event, but this is an unnecessary and unexpected constraint on callers. Fix by touching only the state type flags that are affected by an FUP event. Fixes: a472e65fc490a ("perf intel-pt: Add decoder support for ptwrite and power event packets") Signed-off-by: Adrian Hunter Cc: Jiri Olsa Cc: stable@vger.kernel.org # v5.15+ Link: https://lore.kernel.org/r/20211210162303.2288710-4-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- .../util/intel-pt-decoder/intel-pt-decoder.c | 32 ++++++++----------- 1 file changed, 13 insertions(+), 19 deletions(-) diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c index bddf98123dc3..16fbbf07e367 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c @@ -1205,61 +1205,55 @@ out_no_progress: static bool intel_pt_fup_event(struct intel_pt_decoder *decoder) { + enum intel_pt_sample_type type = decoder->state.type; bool ret = false; + decoder->state.type &= ~INTEL_PT_BRANCH; + if (decoder->set_fup_tx_flags) { decoder->set_fup_tx_flags = false; decoder->tx_flags = decoder->fup_tx_flags; - decoder->state.type = INTEL_PT_TRANSACTION; + decoder->state.type |= INTEL_PT_TRANSACTION; if (decoder->fup_tx_flags & INTEL_PT_ABORT_TX) decoder->state.type |= INTEL_PT_BRANCH; - decoder->state.from_ip = decoder->ip; - decoder->state.to_ip = 0; decoder->state.flags = decoder->fup_tx_flags; - return true; + ret = true; } if (decoder->set_fup_ptw) { decoder->set_fup_ptw = false; - decoder->state.type = INTEL_PT_PTW; + decoder->state.type |= INTEL_PT_PTW; decoder->state.flags |= INTEL_PT_FUP_IP; - decoder->state.from_ip = decoder->ip; - decoder->state.to_ip = 0; decoder->state.ptw_payload = decoder->fup_ptw_payload; - return true; + ret = true; } if (decoder->set_fup_mwait) { decoder->set_fup_mwait = false; - decoder->state.type = INTEL_PT_MWAIT_OP; - decoder->state.from_ip = decoder->ip; - decoder->state.to_ip = 0; + decoder->state.type |= INTEL_PT_MWAIT_OP; decoder->state.mwait_payload = decoder->fup_mwait_payload; ret = true; } if (decoder->set_fup_pwre) { decoder->set_fup_pwre = false; decoder->state.type |= INTEL_PT_PWR_ENTRY; - decoder->state.type &= ~INTEL_PT_BRANCH; - decoder->state.from_ip = decoder->ip; - decoder->state.to_ip = 0; decoder->state.pwre_payload = decoder->fup_pwre_payload; ret = true; } if (decoder->set_fup_exstop) { decoder->set_fup_exstop = false; decoder->state.type |= INTEL_PT_EX_STOP; - decoder->state.type &= ~INTEL_PT_BRANCH; decoder->state.flags |= INTEL_PT_FUP_IP; - decoder->state.from_ip = decoder->ip; - decoder->state.to_ip = 0; ret = true; } if (decoder->set_fup_bep) { decoder->set_fup_bep = false; decoder->state.type |= INTEL_PT_BLK_ITEMS; - decoder->state.type &= ~INTEL_PT_BRANCH; + ret = true; + } + if (ret) { decoder->state.from_ip = decoder->ip; decoder->state.to_ip = 0; - ret = true; + } else { + decoder->state.type = type; } return ret; } From c79ee2b2160909889df67c8801352d3e69d43a1a Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 10 Dec 2021 18:23:00 +0200 Subject: [PATCH 414/868] perf intel-pt: Fix state setting when receiving overflow (OVF) packet An overflow (OVF packet) is treated as an error because it represents a loss of trace data, but there is no loss of synchronization, so the packet state should be INTEL_PT_STATE_IN_SYNC not INTEL_PT_STATE_ERR_RESYNC. To support that, some additional variables must be reset, and the FUP packet that may follow OVF is treated as an FUP event. Fixes: f4aa081949e7b6 ("perf tools: Add Intel PT decoder") Signed-off-by: Adrian Hunter Cc: Jiri Olsa Cc: stable@vger.kernel.org # v5.15+ Link: https://lore.kernel.org/r/20211210162303.2288710-5-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- .../util/intel-pt-decoder/intel-pt-decoder.c | 32 ++++++++++++++++--- 1 file changed, 28 insertions(+), 4 deletions(-) diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c index 16fbbf07e367..845b0ca866a4 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c @@ -1249,6 +1249,20 @@ static bool intel_pt_fup_event(struct intel_pt_decoder *decoder) decoder->state.type |= INTEL_PT_BLK_ITEMS; ret = true; } + if (decoder->overflow) { + decoder->overflow = false; + if (!ret && !decoder->pge) { + if (decoder->hop) { + decoder->state.type = 0; + decoder->pkt_state = INTEL_PT_STATE_RESAMPLE; + } + decoder->pge = true; + decoder->state.type |= INTEL_PT_BRANCH | INTEL_PT_TRACE_BEGIN; + decoder->state.from_ip = 0; + decoder->state.to_ip = decoder->ip; + return true; + } + } if (ret) { decoder->state.from_ip = decoder->ip; decoder->state.to_ip = 0; @@ -1602,7 +1616,16 @@ static int intel_pt_overflow(struct intel_pt_decoder *decoder) intel_pt_clear_tx_flags(decoder); intel_pt_set_nr(decoder); decoder->timestamp_insn_cnt = 0; - decoder->pkt_state = INTEL_PT_STATE_ERR_RESYNC; + decoder->pkt_state = INTEL_PT_STATE_IN_SYNC; + decoder->state.from_ip = decoder->ip; + decoder->ip = 0; + decoder->pge = false; + decoder->set_fup_tx_flags = false; + decoder->set_fup_ptw = false; + decoder->set_fup_mwait = false; + decoder->set_fup_pwre = false; + decoder->set_fup_exstop = false; + decoder->set_fup_bep = false; decoder->overflow = true; return -EOVERFLOW; } @@ -2957,6 +2980,7 @@ next: case INTEL_PT_TIP_PGE: { decoder->pge = true; + decoder->overflow = false; intel_pt_mtc_cyc_cnt_pge(decoder); intel_pt_set_nr(decoder); if (decoder->packet.count == 0) { @@ -3462,10 +3486,10 @@ static int intel_pt_sync_ip(struct intel_pt_decoder *decoder) decoder->set_fup_pwre = false; decoder->set_fup_exstop = false; decoder->set_fup_bep = false; + decoder->overflow = false; if (!decoder->branch_enable) { decoder->pkt_state = INTEL_PT_STATE_IN_SYNC; - decoder->overflow = false; decoder->state.type = 0; /* Do not have a sample */ return 0; } @@ -3480,7 +3504,6 @@ static int intel_pt_sync_ip(struct intel_pt_decoder *decoder) decoder->pkt_state = INTEL_PT_STATE_RESAMPLE; else decoder->pkt_state = INTEL_PT_STATE_IN_SYNC; - decoder->overflow = false; decoder->state.from_ip = 0; decoder->state.to_ip = decoder->ip; @@ -3699,7 +3722,8 @@ const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder) if (err) { decoder->state.err = intel_pt_ext_err(err); - decoder->state.from_ip = decoder->ip; + if (err != -EOVERFLOW) + decoder->state.from_ip = decoder->ip; intel_pt_update_sample_time(decoder); decoder->sample_tot_cyc_cnt = decoder->tot_cyc_cnt; intel_pt_set_nr(decoder); From a32e6c5da599dbf49e60622a4dfb5b9b40ece029 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 10 Dec 2021 18:23:01 +0200 Subject: [PATCH 415/868] perf intel-pt: Fix next 'err' value, walking trace Code after label 'next:' in intel_pt_walk_trace() assumes 'err' is zero, but it may not be, if arrived at via a 'goto'. Ensure it is zero. Fixes: 7c1b16ba0e26e6 ("perf intel-pt: Add support for decoding FUP/TIP only") Signed-off-by: Adrian Hunter Cc: Jiri Olsa Cc: stable@vger.kernel.org # v5.15+ Link: https://lore.kernel.org/r/20211210162303.2288710-6-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/intel-pt-decoder/intel-pt-decoder.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c index 845b0ca866a4..75b504aed7f4 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c @@ -2942,6 +2942,7 @@ static int intel_pt_walk_trace(struct intel_pt_decoder *decoder) if (err) return err; next: + err = 0; if (decoder->cyc_threshold) { if (decoder->sample_cyc && last_packet_type != INTEL_PT_CYC) decoder->sample_cyc = false; From a882cc94971093e146ffa1163b140ad956236754 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 10 Dec 2021 18:23:02 +0200 Subject: [PATCH 416/868] perf intel-pt: Fix missing 'instruction' events with 'q' option FUP packets contain IP information, which makes them also an 'instruction' event in 'hop' mode i.e. the itrace 'q' option. That wasn't happening, so restructure the logic so that FUP events are added along with appropriate 'instruction' and 'branch' events. Fixes: 7c1b16ba0e26e6 ("perf intel-pt: Add support for decoding FUP/TIP only") Signed-off-by: Adrian Hunter Cc: Jiri Olsa Cc: stable@vger.kernel.org # v5.15+ Link: https://lore.kernel.org/r/20211210162303.2288710-7-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/intel-pt-decoder/intel-pt-decoder.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c index 75b504aed7f4..0e013c2d9eb4 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c @@ -2683,6 +2683,8 @@ static int intel_pt_scan_for_psb(struct intel_pt_decoder *decoder); /* Hop mode: Ignore TNT, do not walk code, but get ip from FUPs and TIPs */ static int intel_pt_hop_trace(struct intel_pt_decoder *decoder, bool *no_tip, int *err) { + *err = 0; + /* Leap from PSB to PSB, getting ip from FUP within PSB+ */ if (decoder->leap && !decoder->in_psb && decoder->packet.type != INTEL_PT_PSB) { *err = intel_pt_scan_for_psb(decoder); @@ -2723,18 +2725,21 @@ static int intel_pt_hop_trace(struct intel_pt_decoder *decoder, bool *no_tip, in if (!decoder->packet.count) return HOP_IGNORE; intel_pt_set_ip(decoder); - if (intel_pt_fup_event(decoder)) - return HOP_RETURN; + if (decoder->set_fup_mwait || decoder->set_fup_pwre) + *no_tip = true; if (!decoder->branch_enable || !decoder->pge) *no_tip = true; if (*no_tip) { decoder->state.type = INTEL_PT_INSTRUCTION; decoder->state.from_ip = decoder->ip; decoder->state.to_ip = 0; + intel_pt_fup_event(decoder); return HOP_RETURN; } + intel_pt_fup_event(decoder); + decoder->state.type |= INTEL_PT_INSTRUCTION | INTEL_PT_BRANCH; *err = intel_pt_walk_fup_tip(decoder); - if (!*err) + if (!*err && decoder->state.to_ip) decoder->pkt_state = INTEL_PT_STATE_RESAMPLE; return HOP_RETURN; From 6665b8e4836caa8023cbc7e53733acd234969c8c Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 10 Dec 2021 18:23:03 +0200 Subject: [PATCH 417/868] perf intel-pt: Fix error timestamp setting on the decoder error path An error timestamp shows the last known timestamp for the queue, but this is not updated on the error path. Fix by setting it. Fixes: f4aa081949e7b6 ("perf tools: Add Intel PT decoder") Signed-off-by: Adrian Hunter Cc: Jiri Olsa Cc: stable@vger.kernel.org # v5.15+ Link: https://lore.kernel.org/r/20211210162303.2288710-8-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/intel-pt.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index 556a893508da..10c3187e4c5a 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -2565,6 +2565,7 @@ static int intel_pt_run_decoder(struct intel_pt_queue *ptq, u64 *timestamp) ptq->sync_switch = false; intel_pt_next_tid(pt, ptq); } + ptq->timestamp = state->est_timestamp; if (pt->synth_opts.errors) { err = intel_ptq_synth_error(ptq, state); if (err) From 9937e8daab29d9e20de6b7bc56c76db7a4eeda69 Mon Sep 17 00:00:00 2001 From: Miaoqian Lin Date: Sat, 11 Dec 2021 05:38:53 +0000 Subject: [PATCH 418/868] perf python: Fix NULL vs IS_ERR_OR_NULL() checking The function trace_event__tp_format_id may return ERR_PTR(-ENOMEM). Use IS_ERR_OR_NULL to check tp_format. Signed-off-by: Miaoqian Lin Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Song Liu Link: http://lore.kernel.org/lkml/20211211053856.19827-1-linmq006@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/python.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c index 563a9ba8954f..7f782a31bda3 100644 --- a/tools/perf/util/python.c +++ b/tools/perf/util/python.c @@ -461,7 +461,7 @@ get_tracepoint_field(struct pyrf_event *pevent, PyObject *attr_name) struct tep_event *tp_format; tp_format = trace_event__tp_format_id(evsel->core.attr.config); - if (!tp_format) + if (IS_ERR_OR_NULL(tp_format)) return NULL; evsel->tp_format = tp_format; From 153a2d7e3350cc89d406ba2d35be8793a64c2038 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 9 Dec 2021 18:59:27 +0100 Subject: [PATCH 419/868] USB: gadget: detect too-big endpoint 0 requests Sometimes USB hosts can ask for buffers that are too large from endpoint 0, which should not be allowed. If this happens for OUT requests, stall the endpoint, but for IN requests, trim the request size to the endpoint buffer size. Co-developed-by: Szymon Heidrich Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/composite.c | 12 ++++++++++++ drivers/usb/gadget/legacy/dbgp.c | 13 +++++++++++++ drivers/usb/gadget/legacy/inode.c | 16 +++++++++++++++- 3 files changed, 40 insertions(+), 1 deletion(-) diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c index 504c1cbc255d..1ef7922b57b6 100644 --- a/drivers/usb/gadget/composite.c +++ b/drivers/usb/gadget/composite.c @@ -1679,6 +1679,18 @@ composite_setup(struct usb_gadget *gadget, const struct usb_ctrlrequest *ctrl) struct usb_function *f = NULL; u8 endp; + if (w_length > USB_COMP_EP0_BUFSIZ) { + if (ctrl->bRequestType == USB_DIR_OUT) { + goto done; + } else { + /* Cast away the const, we are going to overwrite on purpose. */ + __le16 *temp = (__le16 *)&ctrl->wLength; + + *temp = cpu_to_le16(USB_COMP_EP0_BUFSIZ); + w_length = USB_COMP_EP0_BUFSIZ; + } + } + /* partial re-init of the response message; the function or the * gadget might need to intercept e.g. a control-OUT completion * when we delegate to it. diff --git a/drivers/usb/gadget/legacy/dbgp.c b/drivers/usb/gadget/legacy/dbgp.c index e1d566c9918a..e567afcb2794 100644 --- a/drivers/usb/gadget/legacy/dbgp.c +++ b/drivers/usb/gadget/legacy/dbgp.c @@ -345,6 +345,19 @@ static int dbgp_setup(struct usb_gadget *gadget, void *data = NULL; u16 len = 0; + if (length > DBGP_REQ_LEN) { + if (ctrl->bRequestType == USB_DIR_OUT) { + return err; + } else { + /* Cast away the const, we are going to overwrite on purpose. */ + __le16 *temp = (__le16 *)&ctrl->wLength; + + *temp = cpu_to_le16(DBGP_REQ_LEN); + length = DBGP_REQ_LEN; + } + } + + if (request == USB_REQ_GET_DESCRIPTOR) { switch (value>>8) { case USB_DT_DEVICE: diff --git a/drivers/usb/gadget/legacy/inode.c b/drivers/usb/gadget/legacy/inode.c index 78be94750232..63150e3889ef 100644 --- a/drivers/usb/gadget/legacy/inode.c +++ b/drivers/usb/gadget/legacy/inode.c @@ -110,6 +110,8 @@ enum ep0_state { /* enough for the whole queue: most events invalidate others */ #define N_EVENT 5 +#define RBUF_SIZE 256 + struct dev_data { spinlock_t lock; refcount_t count; @@ -144,7 +146,7 @@ struct dev_data { struct dentry *dentry; /* except this scratch i/o buffer for ep0 */ - u8 rbuf [256]; + u8 rbuf[RBUF_SIZE]; }; static inline void get_dev (struct dev_data *data) @@ -1331,6 +1333,18 @@ gadgetfs_setup (struct usb_gadget *gadget, const struct usb_ctrlrequest *ctrl) u16 w_value = le16_to_cpu(ctrl->wValue); u16 w_length = le16_to_cpu(ctrl->wLength); + if (w_length > RBUF_SIZE) { + if (ctrl->bRequestType == USB_DIR_OUT) { + return value; + } else { + /* Cast away the const, we are going to overwrite on purpose. */ + __le16 *temp = (__le16 *)&ctrl->wLength; + + *temp = cpu_to_le16(RBUF_SIZE); + w_length = RBUF_SIZE; + } + } + spin_lock (&dev->lock); dev->setup_abort = 0; if (dev->state == STATE_DEV_UNCONNECTED) { From 86ebbc11bb3f60908a51f3e41a17e3f477c2eaa3 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 9 Dec 2021 19:02:15 +0100 Subject: [PATCH 420/868] USB: gadget: zero allocate endpoint 0 buffers Under some conditions, USB gadget devices can show allocated buffer contents to a host. Fix this up by zero-allocating them so that any extra data will all just be zeros. Reported-by: Szymon Heidrich Tested-by: Szymon Heidrich Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/composite.c | 2 +- drivers/usb/gadget/legacy/dbgp.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c index 1ef7922b57b6..284eea9f6e4d 100644 --- a/drivers/usb/gadget/composite.c +++ b/drivers/usb/gadget/composite.c @@ -2221,7 +2221,7 @@ int composite_dev_prepare(struct usb_composite_driver *composite, if (!cdev->req) return -ENOMEM; - cdev->req->buf = kmalloc(USB_COMP_EP0_BUFSIZ, GFP_KERNEL); + cdev->req->buf = kzalloc(USB_COMP_EP0_BUFSIZ, GFP_KERNEL); if (!cdev->req->buf) goto fail; diff --git a/drivers/usb/gadget/legacy/dbgp.c b/drivers/usb/gadget/legacy/dbgp.c index e567afcb2794..355bc7dab9d5 100644 --- a/drivers/usb/gadget/legacy/dbgp.c +++ b/drivers/usb/gadget/legacy/dbgp.c @@ -137,7 +137,7 @@ static int dbgp_enable_ep_req(struct usb_ep *ep) goto fail_1; } - req->buf = kmalloc(DBGP_REQ_LEN, GFP_KERNEL); + req->buf = kzalloc(DBGP_REQ_LEN, GFP_KERNEL); if (!req->buf) { err = -ENOMEM; stp = 2; From 1a3910c80966e4a76b25ce812f6bea0ef1b1d530 Mon Sep 17 00:00:00 2001 From: Pavel Hofman Date: Fri, 10 Dec 2021 09:52:18 +0100 Subject: [PATCH 421/868] usb: core: config: fix validation of wMaxPacketValue entries The checks performed by commit aed9d65ac327 ("USB: validate wMaxPacketValue entries in endpoint descriptors") require that initial value of the maxp variable contains both maximum packet size bits (10..0) and multiple-transactions bits (12..11). However, the existing code assings only the maximum packet size bits. This patch assigns all bits of wMaxPacketSize to the variable. Fixes: aed9d65ac327 ("USB: validate wMaxPacketValue entries in endpoint descriptors") Cc: stable Acked-by: Alan Stern Signed-off-by: Pavel Hofman Link: https://lore.kernel.org/r/20211210085219.16796-1-pavel.hofman@ivitera.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/config.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/core/config.c b/drivers/usb/core/config.c index 16b1fd9dc60c..e3c3a73e1ed8 100644 --- a/drivers/usb/core/config.c +++ b/drivers/usb/core/config.c @@ -406,7 +406,7 @@ static int usb_parse_endpoint(struct device *ddev, int cfgno, * the USB-2 spec requires such endpoints to have wMaxPacketSize = 0 * (see the end of section 5.6.3), so don't warn about them. */ - maxp = usb_endpoint_maxp(&endpoint->desc); + maxp = le16_to_cpu(endpoint->desc.wMaxPacketSize); if (maxp == 0 && !(usb_endpoint_xfer_isoc(d) && asnum == 0)) { dev_warn(ddev, "config %d interface %d altsetting %d endpoint 0x%X has invalid wMaxPacketSize 0\n", cfgno, inum, asnum, d->bEndpointAddress); From ca5737396927afd4d57b133fd2874bbcf3421cdb Mon Sep 17 00:00:00 2001 From: Pavel Hofman Date: Fri, 10 Dec 2021 09:52:19 +0100 Subject: [PATCH 422/868] usb: core: config: using bit mask instead of individual bits Using standard USB_EP_MAXP_MULT_MASK instead of individual bits for extracting multiple-transactions bits from wMaxPacketSize value. Acked-by: Alan Stern Signed-off-by: Pavel Hofman Link: https://lore.kernel.org/r/20211210085219.16796-2-pavel.hofman@ivitera.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/config.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/usb/core/config.c b/drivers/usb/core/config.c index e3c3a73e1ed8..48bc8a4814ac 100644 --- a/drivers/usb/core/config.c +++ b/drivers/usb/core/config.c @@ -422,9 +422,9 @@ static int usb_parse_endpoint(struct device *ddev, int cfgno, maxpacket_maxes = full_speed_maxpacket_maxes; break; case USB_SPEED_HIGH: - /* Bits 12..11 are allowed only for HS periodic endpoints */ + /* Multiple-transactions bits are allowed only for HS periodic endpoints */ if (usb_endpoint_xfer_int(d) || usb_endpoint_xfer_isoc(d)) { - i = maxp & (BIT(12) | BIT(11)); + i = maxp & USB_EP_MAXP_MULT_MASK; maxp &= ~i; } fallthrough; From 3748939bce3fc7a15ef07161826507fbe410bb7a Mon Sep 17 00:00:00 2001 From: Po-Hsu Lin Date: Fri, 10 Dec 2021 15:25:23 +0800 Subject: [PATCH 423/868] selftests: icmp_redirect: pass xfail=0 to log_test() If any sub-test in this icmp_redirect.sh is failing but not expected to fail. The script will complain: ./icmp_redirect.sh: line 72: [: 1: unary operator expected This is because when the sub-test is not expected to fail, we won't pass any value for the xfail local variable in log_test() and thus it's empty. Fix this by passing 0 as the 4th variable to log_test() for non-xfail cases. v2: added fixes tag Fixes: 0a36a75c6818 ("selftests: icmp_redirect: support expected failures") Signed-off-by: Po-Hsu Lin Signed-off-by: David S. Miller --- tools/testing/selftests/net/icmp_redirect.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/icmp_redirect.sh b/tools/testing/selftests/net/icmp_redirect.sh index ecbf57f264ed..7b9d6e31b8e7 100755 --- a/tools/testing/selftests/net/icmp_redirect.sh +++ b/tools/testing/selftests/net/icmp_redirect.sh @@ -311,7 +311,7 @@ check_exception() ip -netns h1 ro get ${H1_VRF_ARG} ${H2_N2_IP} | \ grep -E -v 'mtu|redirected' | grep -q "cache" fi - log_test $? 0 "IPv4: ${desc}" + log_test $? 0 "IPv4: ${desc}" 0 # No PMTU info for test "redirect" and "mtu exception plus redirect" if [ "$with_redirect" = "yes" ] && [ "$desc" != "redirect exception plus mtu" ]; then From 27cbf64a766e86f068ce6214f04c00ceb4db1af4 Mon Sep 17 00:00:00 2001 From: Jie Wang Date: Fri, 10 Dec 2021 21:09:33 +0800 Subject: [PATCH 424/868] net: hns3: fix use-after-free bug in hclgevf_send_mbx_msg Currently, the hns3_remove function firstly uninstall client instance, and then uninstall acceletion engine device. The netdevice is freed in client instance uninstall process, but acceletion engine device uninstall process still use it to trace runtime information. This causes a use after free problem. So fixes it by check the instance register state to avoid use after free. Fixes: d8355240cf8f ("net: hns3: add trace event support for PF/VF mailbox") Signed-off-by: Jie Wang Signed-off-by: Guangbin Huang Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c index fdc66fae0960..c5ac6ecf36e1 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c @@ -114,7 +114,8 @@ int hclgevf_send_mbx_msg(struct hclgevf_dev *hdev, memcpy(&req->msg, send_msg, sizeof(struct hclge_vf_to_pf_msg)); - trace_hclge_vf_mbx_send(hdev, req); + if (test_bit(HCLGEVF_STATE_NIC_REGISTERED, &hdev->state)) + trace_hclge_vf_mbx_send(hdev, req); /* synchronous send */ if (need_resp) { From 6dde452bceca3f2ed2b33bc46a16ff5682a03a2e Mon Sep 17 00:00:00 2001 From: Yufeng Mo Date: Fri, 10 Dec 2021 21:09:34 +0800 Subject: [PATCH 425/868] net: hns3: fix race condition in debugfs When multiple threads concurrently access the debugfs content, data and pointer exceptions may occur. Therefore, mutex lock protection is added for debugfs. Fixes: 5e69ea7ee2a6 ("net: hns3: refactor the debugfs process") Signed-off-by: Yufeng Mo Signed-off-by: Guangbin Huang Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns3/hnae3.h | 2 ++ .../ethernet/hisilicon/hns3/hns3_debugfs.c | 20 +++++++++++++------ 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h index 3f7a9a4c59d5..63f5abcc6bf4 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h +++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h @@ -839,6 +839,8 @@ struct hnae3_handle { u8 netdev_flags; struct dentry *hnae3_dbgfs; + /* protects concurrent contention between debugfs commands */ + struct mutex dbgfs_lock; /* Network interface message level enabled bits */ u32 msg_enable; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c index 081295bff765..c381f8af67f0 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c @@ -1226,6 +1226,7 @@ static ssize_t hns3_dbg_read(struct file *filp, char __user *buffer, if (ret) return ret; + mutex_lock(&handle->dbgfs_lock); save_buf = &hns3_dbg_cmd[index].buf; if (!test_bit(HNS3_NIC_STATE_INITED, &priv->state) || @@ -1238,15 +1239,15 @@ static ssize_t hns3_dbg_read(struct file *filp, char __user *buffer, read_buf = *save_buf; } else { read_buf = kvzalloc(hns3_dbg_cmd[index].buf_len, GFP_KERNEL); - if (!read_buf) - return -ENOMEM; + if (!read_buf) { + ret = -ENOMEM; + goto out; + } /* save the buffer addr until the last read operation */ *save_buf = read_buf; - } - /* get data ready for the first time to read */ - if (!*ppos) { + /* get data ready for the first time to read */ ret = hns3_dbg_read_cmd(dbg_data, hns3_dbg_cmd[index].cmd, read_buf, hns3_dbg_cmd[index].buf_len); if (ret) @@ -1255,8 +1256,10 @@ static ssize_t hns3_dbg_read(struct file *filp, char __user *buffer, size = simple_read_from_buffer(buffer, count, ppos, read_buf, strlen(read_buf)); - if (size > 0) + if (size > 0) { + mutex_unlock(&handle->dbgfs_lock); return size; + } out: /* free the buffer for the last read operation */ @@ -1265,6 +1268,7 @@ out: *save_buf = NULL; } + mutex_unlock(&handle->dbgfs_lock); return ret; } @@ -1337,6 +1341,8 @@ int hns3_dbg_init(struct hnae3_handle *handle) debugfs_create_dir(hns3_dbg_dentry[i].name, handle->hnae3_dbgfs); + mutex_init(&handle->dbgfs_lock); + for (i = 0; i < ARRAY_SIZE(hns3_dbg_cmd); i++) { if ((hns3_dbg_cmd[i].cmd == HNAE3_DBG_CMD_TM_NODES && ae_dev->dev_version <= HNAE3_DEVICE_VERSION_V2) || @@ -1363,6 +1369,7 @@ int hns3_dbg_init(struct hnae3_handle *handle) return 0; out: + mutex_destroy(&handle->dbgfs_lock); debugfs_remove_recursive(handle->hnae3_dbgfs); handle->hnae3_dbgfs = NULL; return ret; @@ -1378,6 +1385,7 @@ void hns3_dbg_uninit(struct hnae3_handle *handle) hns3_dbg_cmd[i].buf = NULL; } + mutex_destroy(&handle->dbgfs_lock); debugfs_remove_recursive(handle->hnae3_dbgfs); handle->hnae3_dbgfs = NULL; } From 7e0147592b5c4f9e2eb8c54a7857a56d4863f74e Mon Sep 17 00:00:00 2001 From: David Ahern Date: Sat, 11 Dec 2021 10:11:30 -0700 Subject: [PATCH 426/868] selftests: Add duplicate config only for MD5 VRF tests Commit referenced below added configuration in the default VRF that duplicates a VRF to check MD5 passwords are properly used and fail when expected. That config should not be added all the time as it can cause tests to pass that should not (by matching on default VRF setup when it should not). Move the duplicate setup to a function that is only called for the MD5 tests and add a cleanup function to remove it after the MD5 tests. Fixes: 5cad8bce26e0 ("fcnal-test: Add TCP MD5 tests for VRF") Signed-off-by: David Ahern Signed-off-by: David S. Miller --- tools/testing/selftests/net/fcnal-test.sh | 26 +++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/tools/testing/selftests/net/fcnal-test.sh b/tools/testing/selftests/net/fcnal-test.sh index 29cc72d7c3d0..dd7437dd2680 100755 --- a/tools/testing/selftests/net/fcnal-test.sh +++ b/tools/testing/selftests/net/fcnal-test.sh @@ -455,6 +455,22 @@ cleanup() ip netns del ${NSC} >/dev/null 2>&1 } +cleanup_vrf_dup() +{ + ip link del ${NSA_DEV2} >/dev/null 2>&1 + ip netns pids ${NSC} | xargs kill 2>/dev/null + ip netns del ${NSC} >/dev/null 2>&1 +} + +setup_vrf_dup() +{ + # some VRF tests use ns-C which has the same config as + # ns-B but for a device NOT in the VRF + create_ns ${NSC} "-" "-" + connect_ns ${NSA} ${NSA_DEV2} ${NSA_IP}/24 ${NSA_IP6}/64 \ + ${NSC} ${NSC_DEV} ${NSB_IP}/24 ${NSB_IP6}/64 +} + setup() { local with_vrf=${1} @@ -484,12 +500,6 @@ setup() ip -netns ${NSB} ro add ${VRF_IP}/32 via ${NSA_IP} dev ${NSB_DEV} ip -netns ${NSB} -6 ro add ${VRF_IP6}/128 via ${NSA_IP6} dev ${NSB_DEV} - - # some VRF tests use ns-C which has the same config as - # ns-B but for a device NOT in the VRF - create_ns ${NSC} "-" "-" - connect_ns ${NSA} ${NSA_DEV2} ${NSA_IP}/24 ${NSA_IP6}/64 \ - ${NSC} ${NSC_DEV} ${NSB_IP}/24 ${NSB_IP6}/64 else ip -netns ${NSA} ro add ${NSB_LO_IP}/32 via ${NSB_IP} dev ${NSA_DEV} ip -netns ${NSA} ro add ${NSB_LO_IP6}/128 via ${NSB_IP6} dev ${NSA_DEV} @@ -1240,7 +1250,9 @@ ipv4_tcp_vrf() log_test_addr ${a} $? 1 "Global server, local connection" # run MD5 tests + setup_vrf_dup ipv4_tcp_md5 + cleanup_vrf_dup # # enable VRF global server @@ -2719,7 +2731,9 @@ ipv6_tcp_vrf() log_test_addr ${a} $? 1 "Global server, local connection" # run MD5 tests + setup_vrf_dup ipv6_tcp_md5 + cleanup_vrf_dup # # enable VRF global server From 0f108ae4452025fef529671998f6c7f1c4526790 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Sat, 11 Dec 2021 10:21:08 -0700 Subject: [PATCH 427/868] selftests: Fix raw socket bind tests with VRF Commit referenced below added negative socket bind tests for VRF. The socket binds should fail since the address to bind to is in a VRF yet the socket is not bound to the VRF or a device within it. Update the expected return code to check for 1 (bind failure) so the test passes when the bind fails as expected. Add a 'show_hint' comment to explain why the bind is expected to fail. Fixes: 75b2b2b3db4c ("selftests: Add ipv4 address bind tests to fcnal-test") Reported-by: Li Zhijian Signed-off-by: David Ahern Signed-off-by: David S. Miller --- tools/testing/selftests/net/fcnal-test.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/fcnal-test.sh b/tools/testing/selftests/net/fcnal-test.sh index dd7437dd2680..4340477863d3 100755 --- a/tools/testing/selftests/net/fcnal-test.sh +++ b/tools/testing/selftests/net/fcnal-test.sh @@ -1810,8 +1810,9 @@ ipv4_addr_bind_vrf() for a in ${NSA_IP} ${VRF_IP} do log_start + show_hint "Socket not bound to VRF, but address is in VRF" run_cmd nettest -s -R -P icmp -l ${a} -b - log_test_addr ${a} $? 0 "Raw socket bind to local address" + log_test_addr ${a} $? 1 "Raw socket bind to local address" log_start run_cmd nettest -s -R -P icmp -l ${a} -I ${NSA_DEV} -b From 28a2686c185e84b6aa6a4d9c9a972360eb7ca266 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Sat, 11 Dec 2021 11:26:16 -0700 Subject: [PATCH 428/868] selftests: Fix IPv6 address bind tests IPv6 allows binding a socket to a device then binding to an address not on the device (__inet6_bind -> ipv6_chk_addr with strict flag not set). Update the bind tests to reflect legacy behavior. Fixes: 34d0302ab861 ("selftests: Add ipv6 address bind tests to fcnal-test") Reported-by: Li Zhijian Signed-off-by: David Ahern Signed-off-by: David S. Miller --- tools/testing/selftests/net/fcnal-test.sh | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/net/fcnal-test.sh b/tools/testing/selftests/net/fcnal-test.sh index 4340477863d3..ad2982b72e02 100755 --- a/tools/testing/selftests/net/fcnal-test.sh +++ b/tools/testing/selftests/net/fcnal-test.sh @@ -3429,11 +3429,14 @@ ipv6_addr_bind_novrf() run_cmd nettest -6 -s -l ${a} -I ${NSA_DEV} -t1 -b log_test_addr ${a} $? 0 "TCP socket bind to local address after device bind" + # Sadly, the kernel allows binding a socket to a device and then + # binding to an address not on the device. So this test passes + # when it really should not a=${NSA_LO_IP6} log_start - show_hint "Should fail with 'Cannot assign requested address'" + show_hint "Tecnically should fail since address is not on device but kernel allows" run_cmd nettest -6 -s -l ${a} -I ${NSA_DEV} -t1 -b - log_test_addr ${a} $? 1 "TCP socket bind to out of scope local address" + log_test_addr ${a} $? 0 "TCP socket bind to out of scope local address" } ipv6_addr_bind_vrf() @@ -3474,10 +3477,15 @@ ipv6_addr_bind_vrf() run_cmd nettest -6 -s -l ${a} -I ${NSA_DEV} -t1 -b log_test_addr ${a} $? 0 "TCP socket bind to local address with device bind" + # Sadly, the kernel allows binding a socket to a device and then + # binding to an address not on the device. The only restriction + # is that the address is valid in the L3 domain. So this test + # passes when it really should not a=${VRF_IP6} log_start + show_hint "Tecnically should fail since address is not on device but kernel allows" run_cmd nettest -6 -s -l ${a} -I ${NSA_DEV} -t1 -b - log_test_addr ${a} $? 1 "TCP socket bind to VRF address with device bind" + log_test_addr ${a} $? 0 "TCP socket bind to VRF address with device bind" a=${NSA_LO_IP6} log_start From c9b12b59e2ea4c3c7cedec7efb071b649652f3a9 Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Mon, 6 Dec 2021 11:50:16 +0100 Subject: [PATCH 429/868] s390/entry: fix duplicate tracking of irq nesting level In the current code, when exiting from idle, rcu_irq_enter() is called twice during irq entry: irq_entry_enter()-> rcu_irq_enter() irq_enter() -> rcu_irq_enter() This may lead to wrong results from rcu_is_cpu_rrupt_from_idle() because of a wrong dynticks nmi nesting count. Fix this by only calling irq_enter_rcu(). Cc: # 5.12+ Reported-by: Mark Rutland Fixes: 56e62a737028 ("s390: convert to generic entry") Signed-off-by: Sven Schnelle Signed-off-by: Heiko Carstens --- arch/s390/kernel/irq.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c index 0df83ecaa2e0..cb7099682340 100644 --- a/arch/s390/kernel/irq.c +++ b/arch/s390/kernel/irq.c @@ -138,7 +138,7 @@ void noinstr do_io_irq(struct pt_regs *regs) struct pt_regs *old_regs = set_irq_regs(regs); int from_idle; - irq_enter(); + irq_enter_rcu(); if (user_mode(regs)) { update_timer_sys(); @@ -158,7 +158,8 @@ void noinstr do_io_irq(struct pt_regs *regs) do_irq_async(regs, IO_INTERRUPT); } while (MACHINE_IS_LPAR && irq_pending(regs)); - irq_exit(); + irq_exit_rcu(); + set_irq_regs(old_regs); irqentry_exit(regs, state); @@ -172,7 +173,7 @@ void noinstr do_ext_irq(struct pt_regs *regs) struct pt_regs *old_regs = set_irq_regs(regs); int from_idle; - irq_enter(); + irq_enter_rcu(); if (user_mode(regs)) { update_timer_sys(); @@ -190,7 +191,7 @@ void noinstr do_ext_irq(struct pt_regs *regs) do_irq_async(regs, EXT_INTERRUPT); - irq_exit(); + irq_exit_rcu(); set_irq_regs(old_regs); irqentry_exit(regs, state); From 85bf17b28f97ca2749968d8786dc423db320d9c2 Mon Sep 17 00:00:00 2001 From: Jerome Marchand Date: Fri, 10 Dec 2021 10:38:27 +0100 Subject: [PATCH 430/868] recordmcount.pl: look for jgnop instruction as well as bcrl on s390 On s390, recordmcount.pl is looking for "bcrl 0," instructions in the objdump -d outpout. However since binutils 2.37, objdump -d display "jgnop " for the same instruction. Update the mcount_regex so that it accepts both. Signed-off-by: Jerome Marchand Reviewed-by: Miroslav Benes Acked-by: Steven Rostedt (VMware) Cc: Link: https://lore.kernel.org/r/20211210093827.1623286-1-jmarchan@redhat.com Signed-off-by: Heiko Carstens --- scripts/recordmcount.pl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/recordmcount.pl b/scripts/recordmcount.pl index 7d631aaa0ae1..52a000b057a5 100755 --- a/scripts/recordmcount.pl +++ b/scripts/recordmcount.pl @@ -219,7 +219,7 @@ if ($arch eq "x86_64") { } elsif ($arch eq "s390" && $bits == 64) { if ($cc =~ /-DCC_USING_HOTPATCH/) { - $mcount_regex = "^\\s*([0-9a-fA-F]+):\\s*c0 04 00 00 00 00\\s*brcl\\s*0,[0-9a-f]+ <([^\+]*)>\$"; + $mcount_regex = "^\\s*([0-9a-fA-F]+):\\s*c0 04 00 00 00 00\\s*(bcrl\\s*0,|jgnop\\s*)[0-9a-f]+ <([^\+]*)>\$"; $mcount_adjust = 0; } $alignment = 8; From 2585cf9dfaaddf00b069673f27bb3f8530e2039c Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 12 Dec 2021 14:53:01 -0800 Subject: [PATCH 431/868] Linux 5.16-rc5 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index bce58275257a..765115c99655 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 5 PATCHLEVEL = 16 SUBLEVEL = 0 -EXTRAVERSION = -rc4 +EXTRAVERSION = -rc5 NAME = Gobble Gobble # *DOCUMENTATION* From fce15c45d3fbd9fc1feaaf3210d8e3f8b33dfd3a Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Sat, 6 Nov 2021 10:02:44 -0700 Subject: [PATCH 432/868] hwmon: (lm90) Fix usage of CONFIG2 register in detect function The detect function had a comment "Make compiler happy" when id did not read the second configuration register. As it turns out, the code was checking the contents of this register for manufacturer ID 0xA1 (NXP Semiconductor/Philips), but never actually read the register. So it wasn't surprising that the compiler complained, and it indeed had a point. Fix the code to read the register contents for manufacturer ID 0xa1. At the same time, the code was reading the register for manufacturer ID 0x41 (Analog Devices), but it was not using the results. In effect it was just checking if reading the register returned an error. That doesn't really add much if any value, so stop doing that. Fixes: f90be42fb383 ("hwmon: (lm90) Refactor reading of config2 register") Signed-off-by: Guenter Roeck --- drivers/hwmon/lm90.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/hwmon/lm90.c b/drivers/hwmon/lm90.c index 618052c6cdb6..b05d73c4fbe2 100644 --- a/drivers/hwmon/lm90.c +++ b/drivers/hwmon/lm90.c @@ -1465,12 +1465,11 @@ static int lm90_detect(struct i2c_client *client, if (man_id < 0 || chip_id < 0 || config1 < 0 || convrate < 0) return -ENODEV; - if (man_id == 0x01 || man_id == 0x5C || man_id == 0x41) { + if (man_id == 0x01 || man_id == 0x5C || man_id == 0xA1) { config2 = i2c_smbus_read_byte_data(client, LM90_REG_R_CONFIG2); if (config2 < 0) return -ENODEV; - } else - config2 = 0; /* Make compiler happy */ + } if ((address == 0x4C || address == 0x4D) && man_id == 0x01) { /* National Semiconductor */ From 55840b9eae5367b5d5b29619dc2fb7e4596dba46 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Wed, 17 Nov 2021 09:51:47 -0800 Subject: [PATCH 433/868] hwmon: (lm90) Prevent integer overflow/underflow in hysteresis calculations Commit b50aa49638c7 ("hwmon: (lm90) Prevent integer underflows of temperature calculations") addressed a number of underflow situations when writing temperature limits. However, it missed one situation, seen when an attempt is made to set the hysteresis value to MAX_LONG and the critical temperature limit is negative. Use clamp_val() when setting the hysteresis temperature to ensure that the provided value can never overflow or underflow. Fixes: b50aa49638c7 ("hwmon: (lm90) Prevent integer underflows of temperature calculations") Cc: Dmitry Osipenko Reviewed-by: Dmitry Osipenko Signed-off-by: Guenter Roeck --- drivers/hwmon/lm90.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/hwmon/lm90.c b/drivers/hwmon/lm90.c index b05d73c4fbe2..72969ea83d82 100644 --- a/drivers/hwmon/lm90.c +++ b/drivers/hwmon/lm90.c @@ -1160,8 +1160,8 @@ static int lm90_set_temphyst(struct lm90_data *data, long val) else temp = temp_from_s8(data->temp8[LOCAL_CRIT]); - /* prevent integer underflow */ - val = max(val, -128000l); + /* prevent integer overflow/underflow */ + val = clamp_val(val, -128000l, 255000l); data->temp_hyst = hyst_to_reg(temp - val); err = i2c_smbus_write_byte_data(client, LM90_REG_W_TCRIT_HYST, From 16ba51b5dcd3f6dde2e51d5ccc86313119dcf889 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Sat, 13 Nov 2021 08:55:06 -0800 Subject: [PATCH 434/868] hwmon: (lm90) Drop critical attribute support for MAX6654 Tests with a real chip and a closer look into the datasheet show that MAX6654 does not support CRIT/THERM/OVERTEMP limits, so drop support of the respective attributes for this chip. Introduce LM90_HAVE_CRIT flag and use it to instantiate critical limit attributes to solve the problem. Cc: Josh Lehan Fixes: 229d495d8189 ("hwmon: (lm90) Add max6654 support to lm90 driver") Signed-off-by: Guenter Roeck --- drivers/hwmon/lm90.c | 86 +++++++++++++++++++++++++------------------- 1 file changed, 49 insertions(+), 37 deletions(-) diff --git a/drivers/hwmon/lm90.c b/drivers/hwmon/lm90.c index 72969ea83d82..6597d055e09d 100644 --- a/drivers/hwmon/lm90.c +++ b/drivers/hwmon/lm90.c @@ -35,13 +35,14 @@ * explicitly as max6659, or if its address is not 0x4c. * These chips lack the remote temperature offset feature. * - * This driver also supports the MAX6654 chip made by Maxim. This chip can - * be at 9 different addresses, similar to MAX6680/MAX6681. The MAX6654 is - * otherwise similar to MAX6657/MAX6658/MAX6659. Extended range is available - * by setting the configuration register accordingly, and is done during - * initialization. Extended precision is only available at conversion rates - * of 1 Hz and slower. Note that extended precision is not enabled by - * default, as this driver initializes all chips to 2 Hz by design. + * This driver also supports the MAX6654 chip made by Maxim. This chip can be + * at 9 different addresses, similar to MAX6680/MAX6681. The MAX6654 is similar + * to MAX6657/MAX6658/MAX6659, but does not support critical temperature + * limits. Extended range is available by setting the configuration register + * accordingly, and is done during initialization. Extended precision is only + * available at conversion rates of 1 Hz and slower. Note that extended + * precision is not enabled by default, as this driver initializes all chips + * to 2 Hz by design. * * This driver also supports the MAX6646, MAX6647, MAX6648, MAX6649 and * MAX6692 chips made by Maxim. These are again similar to the LM86, @@ -188,6 +189,7 @@ enum chips { lm90, adm1032, lm99, lm86, max6657, max6659, adt7461, max6680, #define LM90_HAVE_BROKEN_ALERT (1 << 7) /* Broken alert */ #define LM90_HAVE_EXTENDED_TEMP (1 << 8) /* extended temperature support*/ #define LM90_PAUSE_FOR_CONFIG (1 << 9) /* Pause conversion for config */ +#define LM90_HAVE_CRIT (1 << 10)/* Chip supports CRIT/OVERT register */ /* LM90 status */ #define LM90_STATUS_LTHRM (1 << 0) /* local THERM limit tripped */ @@ -354,38 +356,43 @@ struct lm90_params { static const struct lm90_params lm90_params[] = { [adm1032] = { .flags = LM90_HAVE_OFFSET | LM90_HAVE_REM_LIMIT_EXT - | LM90_HAVE_BROKEN_ALERT, + | LM90_HAVE_BROKEN_ALERT | LM90_HAVE_CRIT, .alert_alarms = 0x7c, .max_convrate = 10, }, [adt7461] = { .flags = LM90_HAVE_OFFSET | LM90_HAVE_REM_LIMIT_EXT - | LM90_HAVE_BROKEN_ALERT | LM90_HAVE_EXTENDED_TEMP, + | LM90_HAVE_BROKEN_ALERT | LM90_HAVE_EXTENDED_TEMP + | LM90_HAVE_CRIT, .alert_alarms = 0x7c, .max_convrate = 10, }, [g781] = { .flags = LM90_HAVE_OFFSET | LM90_HAVE_REM_LIMIT_EXT - | LM90_HAVE_BROKEN_ALERT, + | LM90_HAVE_BROKEN_ALERT | LM90_HAVE_CRIT, .alert_alarms = 0x7c, .max_convrate = 8, }, [lm86] = { - .flags = LM90_HAVE_OFFSET | LM90_HAVE_REM_LIMIT_EXT, + .flags = LM90_HAVE_OFFSET | LM90_HAVE_REM_LIMIT_EXT + | LM90_HAVE_CRIT, .alert_alarms = 0x7b, .max_convrate = 9, }, [lm90] = { - .flags = LM90_HAVE_OFFSET | LM90_HAVE_REM_LIMIT_EXT, + .flags = LM90_HAVE_OFFSET | LM90_HAVE_REM_LIMIT_EXT + | LM90_HAVE_CRIT, .alert_alarms = 0x7b, .max_convrate = 9, }, [lm99] = { - .flags = LM90_HAVE_OFFSET | LM90_HAVE_REM_LIMIT_EXT, + .flags = LM90_HAVE_OFFSET | LM90_HAVE_REM_LIMIT_EXT + | LM90_HAVE_CRIT, .alert_alarms = 0x7b, .max_convrate = 9, }, [max6646] = { + .flags = LM90_HAVE_CRIT, .alert_alarms = 0x7c, .max_convrate = 6, .reg_local_ext = MAX6657_REG_R_LOCAL_TEMPL, @@ -396,50 +403,50 @@ static const struct lm90_params lm90_params[] = { .reg_local_ext = MAX6657_REG_R_LOCAL_TEMPL, }, [max6657] = { - .flags = LM90_PAUSE_FOR_CONFIG, + .flags = LM90_PAUSE_FOR_CONFIG | LM90_HAVE_CRIT, .alert_alarms = 0x7c, .max_convrate = 8, .reg_local_ext = MAX6657_REG_R_LOCAL_TEMPL, }, [max6659] = { - .flags = LM90_HAVE_EMERGENCY, + .flags = LM90_HAVE_EMERGENCY | LM90_HAVE_CRIT, .alert_alarms = 0x7c, .max_convrate = 8, .reg_local_ext = MAX6657_REG_R_LOCAL_TEMPL, }, [max6680] = { - .flags = LM90_HAVE_OFFSET, + .flags = LM90_HAVE_OFFSET | LM90_HAVE_CRIT, .alert_alarms = 0x7c, .max_convrate = 7, }, [max6696] = { .flags = LM90_HAVE_EMERGENCY - | LM90_HAVE_EMERGENCY_ALARM | LM90_HAVE_TEMP3, + | LM90_HAVE_EMERGENCY_ALARM | LM90_HAVE_TEMP3 | LM90_HAVE_CRIT, .alert_alarms = 0x1c7c, .max_convrate = 6, .reg_local_ext = MAX6657_REG_R_LOCAL_TEMPL, }, [w83l771] = { - .flags = LM90_HAVE_OFFSET | LM90_HAVE_REM_LIMIT_EXT, + .flags = LM90_HAVE_OFFSET | LM90_HAVE_REM_LIMIT_EXT | LM90_HAVE_CRIT, .alert_alarms = 0x7c, .max_convrate = 8, }, [sa56004] = { - .flags = LM90_HAVE_OFFSET | LM90_HAVE_REM_LIMIT_EXT, + .flags = LM90_HAVE_OFFSET | LM90_HAVE_REM_LIMIT_EXT | LM90_HAVE_CRIT, .alert_alarms = 0x7b, .max_convrate = 9, .reg_local_ext = SA56004_REG_R_LOCAL_TEMPL, }, [tmp451] = { .flags = LM90_HAVE_OFFSET | LM90_HAVE_REM_LIMIT_EXT - | LM90_HAVE_BROKEN_ALERT | LM90_HAVE_EXTENDED_TEMP, + | LM90_HAVE_BROKEN_ALERT | LM90_HAVE_EXTENDED_TEMP | LM90_HAVE_CRIT, .alert_alarms = 0x7c, .max_convrate = 9, .reg_local_ext = TMP451_REG_R_LOCAL_TEMPL, }, [tmp461] = { .flags = LM90_HAVE_OFFSET | LM90_HAVE_REM_LIMIT_EXT - | LM90_HAVE_BROKEN_ALERT | LM90_HAVE_EXTENDED_TEMP, + | LM90_HAVE_BROKEN_ALERT | LM90_HAVE_EXTENDED_TEMP | LM90_HAVE_CRIT, .alert_alarms = 0x7c, .max_convrate = 9, .reg_local_ext = TMP451_REG_R_LOCAL_TEMPL, @@ -668,20 +675,22 @@ static int lm90_update_limits(struct device *dev) struct i2c_client *client = data->client; int val; - val = lm90_read_reg(client, LM90_REG_R_LOCAL_CRIT); - if (val < 0) - return val; - data->temp8[LOCAL_CRIT] = val; + if (data->flags & LM90_HAVE_CRIT) { + val = lm90_read_reg(client, LM90_REG_R_LOCAL_CRIT); + if (val < 0) + return val; + data->temp8[LOCAL_CRIT] = val; - val = lm90_read_reg(client, LM90_REG_R_REMOTE_CRIT); - if (val < 0) - return val; - data->temp8[REMOTE_CRIT] = val; + val = lm90_read_reg(client, LM90_REG_R_REMOTE_CRIT); + if (val < 0) + return val; + data->temp8[REMOTE_CRIT] = val; - val = lm90_read_reg(client, LM90_REG_R_TCRIT_HYST); - if (val < 0) - return val; - data->temp_hyst = val; + val = lm90_read_reg(client, LM90_REG_R_TCRIT_HYST); + if (val < 0) + return val; + data->temp_hyst = val; + } val = lm90_read_reg(client, LM90_REG_R_REMOTE_LOWH); if (val < 0) @@ -1902,11 +1911,14 @@ static int lm90_probe(struct i2c_client *client) info->config = data->channel_config; data->channel_config[0] = HWMON_T_INPUT | HWMON_T_MIN | HWMON_T_MAX | - HWMON_T_CRIT | HWMON_T_CRIT_HYST | HWMON_T_MIN_ALARM | - HWMON_T_MAX_ALARM | HWMON_T_CRIT_ALARM; + HWMON_T_MIN_ALARM | HWMON_T_MAX_ALARM; data->channel_config[1] = HWMON_T_INPUT | HWMON_T_MIN | HWMON_T_MAX | - HWMON_T_CRIT | HWMON_T_CRIT_HYST | HWMON_T_MIN_ALARM | - HWMON_T_MAX_ALARM | HWMON_T_CRIT_ALARM | HWMON_T_FAULT; + HWMON_T_MIN_ALARM | HWMON_T_MAX_ALARM | HWMON_T_FAULT; + + if (data->flags & LM90_HAVE_CRIT) { + data->channel_config[0] |= HWMON_T_CRIT | HWMON_T_CRIT_ALARM | HWMON_T_CRIT_HYST; + data->channel_config[1] |= HWMON_T_CRIT | HWMON_T_CRIT_ALARM | HWMON_T_CRIT_HYST; + } if (data->flags & LM90_HAVE_OFFSET) data->channel_config[1] |= HWMON_T_OFFSET; From da7dc0568491104c7acb632e9d41ddce9aaabbb1 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Fri, 26 Nov 2021 22:43:39 -0800 Subject: [PATCH 435/868] hwmom: (lm90) Fix citical alarm status for MAX6680/MAX6681 Tests with a real chip and a closer look into the datasheet reveals that the local and remote critical alarm status bits are swapped for MAX6680/MAX6681. Signed-off-by: Guenter Roeck --- drivers/hwmon/lm90.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/hwmon/lm90.c b/drivers/hwmon/lm90.c index 6597d055e09d..dd8612a9d536 100644 --- a/drivers/hwmon/lm90.c +++ b/drivers/hwmon/lm90.c @@ -190,6 +190,7 @@ enum chips { lm90, adm1032, lm99, lm86, max6657, max6659, adt7461, max6680, #define LM90_HAVE_EXTENDED_TEMP (1 << 8) /* extended temperature support*/ #define LM90_PAUSE_FOR_CONFIG (1 << 9) /* Pause conversion for config */ #define LM90_HAVE_CRIT (1 << 10)/* Chip supports CRIT/OVERT register */ +#define LM90_HAVE_CRIT_ALRM_SWP (1 << 11)/* critical alarm bits swapped */ /* LM90 status */ #define LM90_STATUS_LTHRM (1 << 0) /* local THERM limit tripped */ @@ -415,7 +416,8 @@ static const struct lm90_params lm90_params[] = { .reg_local_ext = MAX6657_REG_R_LOCAL_TEMPL, }, [max6680] = { - .flags = LM90_HAVE_OFFSET | LM90_HAVE_CRIT, + .flags = LM90_HAVE_OFFSET | LM90_HAVE_CRIT + | LM90_HAVE_CRIT_ALRM_SWP, .alert_alarms = 0x7c, .max_convrate = 7, }, @@ -1201,6 +1203,7 @@ static const u8 lm90_temp_emerg_index[3] = { static const u8 lm90_min_alarm_bits[3] = { 5, 3, 11 }; static const u8 lm90_max_alarm_bits[3] = { 6, 4, 12 }; static const u8 lm90_crit_alarm_bits[3] = { 0, 1, 9 }; +static const u8 lm90_crit_alarm_bits_swapped[3] = { 1, 0, 9 }; static const u8 lm90_emergency_alarm_bits[3] = { 15, 13, 14 }; static const u8 lm90_fault_bits[3] = { 0, 2, 10 }; @@ -1226,7 +1229,10 @@ static int lm90_temp_read(struct device *dev, u32 attr, int channel, long *val) *val = (data->alarms >> lm90_max_alarm_bits[channel]) & 1; break; case hwmon_temp_crit_alarm: - *val = (data->alarms >> lm90_crit_alarm_bits[channel]) & 1; + if (data->flags & LM90_HAVE_CRIT_ALRM_SWP) + *val = (data->alarms >> lm90_crit_alarm_bits_swapped[channel]) & 1; + else + *val = (data->alarms >> lm90_crit_alarm_bits[channel]) & 1; break; case hwmon_temp_emergency_alarm: *val = (data->alarms >> lm90_emergency_alarm_bits[channel]) & 1; From cdc5287acad9ede121924a9c9313544b80d15842 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Fri, 3 Dec 2021 13:42:22 -0800 Subject: [PATCH 436/868] hwmon: (lm90) Do not report 'busy' status bit as alarm Bit 7 of the status register indicates that the chip is busy doing a conversion. It does not indicate an alarm status. Stop reporting it as alarm status bit. Signed-off-by: Guenter Roeck --- drivers/hwmon/lm90.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/hwmon/lm90.c b/drivers/hwmon/lm90.c index dd8612a9d536..74019dff2550 100644 --- a/drivers/hwmon/lm90.c +++ b/drivers/hwmon/lm90.c @@ -200,6 +200,7 @@ enum chips { lm90, adm1032, lm99, lm86, max6657, max6659, adt7461, max6680, #define LM90_STATUS_RHIGH (1 << 4) /* remote high temp limit tripped */ #define LM90_STATUS_LLOW (1 << 5) /* local low temp limit tripped */ #define LM90_STATUS_LHIGH (1 << 6) /* local high temp limit tripped */ +#define LM90_STATUS_BUSY (1 << 7) /* conversion is ongoing */ #define MAX6696_STATUS2_R2THRM (1 << 1) /* remote2 THERM limit tripped */ #define MAX6696_STATUS2_R2OPEN (1 << 2) /* remote2 is an open circuit */ @@ -820,7 +821,7 @@ static int lm90_update_device(struct device *dev) val = lm90_read_reg(client, LM90_REG_R_STATUS); if (val < 0) return val; - data->alarms = val; /* lower 8 bit of alarms */ + data->alarms = val & ~LM90_STATUS_BUSY; if (data->kind == max6696) { val = lm90_select_remote_channel(data, 1); From 12f247ab590a08856441efdbd351cf2cc8f60a2d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Exp=C3=B3sito?= Date: Sun, 12 Dec 2021 21:01:49 -0800 Subject: [PATCH 437/868] Input: atmel_mxt_ts - fix double free in mxt_read_info_block MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The "id_buf" buffer is stored in "data->raw_info_block" and freed by "mxt_free_object_table" in case of error. Return instead of jumping to avoid a double free. Addresses-Coverity-ID: 1474582 ("Double free") Fixes: 068bdb67ef74 ("Input: atmel_mxt_ts - fix the firmware update") Signed-off-by: José Expósito Link: https://lore.kernel.org/r/20211212194257.68879-1-jose.exposito89@gmail.com Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/atmel_mxt_ts.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/input/touchscreen/atmel_mxt_ts.c b/drivers/input/touchscreen/atmel_mxt_ts.c index 05de92c0293b..eb66cd2689b7 100644 --- a/drivers/input/touchscreen/atmel_mxt_ts.c +++ b/drivers/input/touchscreen/atmel_mxt_ts.c @@ -1882,7 +1882,7 @@ static int mxt_read_info_block(struct mxt_data *data) if (error) { dev_err(&client->dev, "Error %d parsing object table\n", error); mxt_free_object_table(data); - goto err_free_mem; + return error; } data->object_table = (struct mxt_object *)(id_buf + MXT_OBJECT_START); From 80936d68665be88dc3bf60884a71f2694eb6b1f1 Mon Sep 17 00:00:00 2001 From: Vignesh Raghavendra Date: Thu, 9 Dec 2021 23:39:56 +0530 Subject: [PATCH 438/868] dmaengine: ti: k3-udma: Fix smatch warnings Smatch reports below warnings [1] wrt dereferencing rm_res when it can potentially be ERR_PTR(). This is possible when entire range is allocated to Linux Fix this case by making sure, there is no deference of rm_res when its ERR_PTR(). [1]: drivers/dma/ti/k3-udma.c:4524 udma_setup_resources() error: 'rm_res' dereferencing possible ERR_PTR() drivers/dma/ti/k3-udma.c:4537 udma_setup_resources() error: 'rm_res' dereferencing possible ERR_PTR() drivers/dma/ti/k3-udma.c:4681 bcdma_setup_resources() error: 'rm_res' dereferencing possible ERR_PTR() drivers/dma/ti/k3-udma.c:4696 bcdma_setup_resources() error: 'rm_res' dereferencing possible ERR_PTR() drivers/dma/ti/k3-udma.c:4711 bcdma_setup_resources() error: 'rm_res' dereferencing possible ERR_PTR() drivers/dma/ti/k3-udma.c:4848 pktdma_setup_resources() error: 'rm_res' dereferencing possible ERR_PTR() drivers/dma/ti/k3-udma.c:4861 pktdma_setup_resources() error: 'rm_res' dereferencing possible ERR_PTR() Reported-by: Nishanth Menon Signed-off-by: Vignesh Raghavendra Acked-by: Peter Ujfalusi Link: https://lore.kernel.org/r/20211209180957.29036-1-vigneshr@ti.com Signed-off-by: Vinod Koul --- drivers/dma/ti/k3-udma.c | 153 +++++++++++++++++++++++++++------------ 1 file changed, 105 insertions(+), 48 deletions(-) diff --git a/drivers/dma/ti/k3-udma.c b/drivers/dma/ti/k3-udma.c index 041d8e32d630..6e56d1cef5ee 100644 --- a/drivers/dma/ti/k3-udma.c +++ b/drivers/dma/ti/k3-udma.c @@ -4534,45 +4534,60 @@ static int udma_setup_resources(struct udma_dev *ud) rm_res = tisci_rm->rm_ranges[RM_RANGE_TCHAN]; if (IS_ERR(rm_res)) { bitmap_zero(ud->tchan_map, ud->tchan_cnt); + irq_res.sets = 1; } else { bitmap_fill(ud->tchan_map, ud->tchan_cnt); for (i = 0; i < rm_res->sets; i++) udma_mark_resource_ranges(ud, ud->tchan_map, &rm_res->desc[i], "tchan"); + irq_res.sets = rm_res->sets; } - irq_res.sets = rm_res->sets; /* rchan and matching default flow ranges */ rm_res = tisci_rm->rm_ranges[RM_RANGE_RCHAN]; if (IS_ERR(rm_res)) { bitmap_zero(ud->rchan_map, ud->rchan_cnt); + irq_res.sets++; } else { bitmap_fill(ud->rchan_map, ud->rchan_cnt); for (i = 0; i < rm_res->sets; i++) udma_mark_resource_ranges(ud, ud->rchan_map, &rm_res->desc[i], "rchan"); + irq_res.sets += rm_res->sets; } - irq_res.sets += rm_res->sets; irq_res.desc = kcalloc(irq_res.sets, sizeof(*irq_res.desc), GFP_KERNEL); + if (!irq_res.desc) + return -ENOMEM; rm_res = tisci_rm->rm_ranges[RM_RANGE_TCHAN]; - for (i = 0; i < rm_res->sets; i++) { - irq_res.desc[i].start = rm_res->desc[i].start; - irq_res.desc[i].num = rm_res->desc[i].num; - irq_res.desc[i].start_sec = rm_res->desc[i].start_sec; - irq_res.desc[i].num_sec = rm_res->desc[i].num_sec; + if (IS_ERR(rm_res)) { + irq_res.desc[0].start = 0; + irq_res.desc[0].num = ud->tchan_cnt; + i = 1; + } else { + for (i = 0; i < rm_res->sets; i++) { + irq_res.desc[i].start = rm_res->desc[i].start; + irq_res.desc[i].num = rm_res->desc[i].num; + irq_res.desc[i].start_sec = rm_res->desc[i].start_sec; + irq_res.desc[i].num_sec = rm_res->desc[i].num_sec; + } } rm_res = tisci_rm->rm_ranges[RM_RANGE_RCHAN]; - for (j = 0; j < rm_res->sets; j++, i++) { - if (rm_res->desc[j].num) { - irq_res.desc[i].start = rm_res->desc[j].start + - ud->soc_data->oes.udma_rchan; - irq_res.desc[i].num = rm_res->desc[j].num; - } - if (rm_res->desc[j].num_sec) { - irq_res.desc[i].start_sec = rm_res->desc[j].start_sec + - ud->soc_data->oes.udma_rchan; - irq_res.desc[i].num_sec = rm_res->desc[j].num_sec; + if (IS_ERR(rm_res)) { + irq_res.desc[i].start = 0; + irq_res.desc[i].num = ud->rchan_cnt; + } else { + for (j = 0; j < rm_res->sets; j++, i++) { + if (rm_res->desc[j].num) { + irq_res.desc[i].start = rm_res->desc[j].start + + ud->soc_data->oes.udma_rchan; + irq_res.desc[i].num = rm_res->desc[j].num; + } + if (rm_res->desc[j].num_sec) { + irq_res.desc[i].start_sec = rm_res->desc[j].start_sec + + ud->soc_data->oes.udma_rchan; + irq_res.desc[i].num_sec = rm_res->desc[j].num_sec; + } } } ret = ti_sci_inta_msi_domain_alloc_irqs(ud->dev, &irq_res); @@ -4690,14 +4705,15 @@ static int bcdma_setup_resources(struct udma_dev *ud) rm_res = tisci_rm->rm_ranges[RM_RANGE_BCHAN]; if (IS_ERR(rm_res)) { bitmap_zero(ud->bchan_map, ud->bchan_cnt); + irq_res.sets++; } else { bitmap_fill(ud->bchan_map, ud->bchan_cnt); for (i = 0; i < rm_res->sets; i++) udma_mark_resource_ranges(ud, ud->bchan_map, &rm_res->desc[i], "bchan"); + irq_res.sets += rm_res->sets; } - irq_res.sets += rm_res->sets; } /* tchan ranges */ @@ -4705,14 +4721,15 @@ static int bcdma_setup_resources(struct udma_dev *ud) rm_res = tisci_rm->rm_ranges[RM_RANGE_TCHAN]; if (IS_ERR(rm_res)) { bitmap_zero(ud->tchan_map, ud->tchan_cnt); + irq_res.sets += 2; } else { bitmap_fill(ud->tchan_map, ud->tchan_cnt); for (i = 0; i < rm_res->sets; i++) udma_mark_resource_ranges(ud, ud->tchan_map, &rm_res->desc[i], "tchan"); + irq_res.sets += rm_res->sets * 2; } - irq_res.sets += rm_res->sets * 2; } /* rchan ranges */ @@ -4720,47 +4737,72 @@ static int bcdma_setup_resources(struct udma_dev *ud) rm_res = tisci_rm->rm_ranges[RM_RANGE_RCHAN]; if (IS_ERR(rm_res)) { bitmap_zero(ud->rchan_map, ud->rchan_cnt); + irq_res.sets += 2; } else { bitmap_fill(ud->rchan_map, ud->rchan_cnt); for (i = 0; i < rm_res->sets; i++) udma_mark_resource_ranges(ud, ud->rchan_map, &rm_res->desc[i], "rchan"); + irq_res.sets += rm_res->sets * 2; } - irq_res.sets += rm_res->sets * 2; } irq_res.desc = kcalloc(irq_res.sets, sizeof(*irq_res.desc), GFP_KERNEL); + if (!irq_res.desc) + return -ENOMEM; if (ud->bchan_cnt) { rm_res = tisci_rm->rm_ranges[RM_RANGE_BCHAN]; - for (i = 0; i < rm_res->sets; i++) { - irq_res.desc[i].start = rm_res->desc[i].start + - oes->bcdma_bchan_ring; - irq_res.desc[i].num = rm_res->desc[i].num; + if (IS_ERR(rm_res)) { + irq_res.desc[0].start = oes->bcdma_bchan_ring; + irq_res.desc[0].num = ud->bchan_cnt; + i = 1; + } else { + for (i = 0; i < rm_res->sets; i++) { + irq_res.desc[i].start = rm_res->desc[i].start + + oes->bcdma_bchan_ring; + irq_res.desc[i].num = rm_res->desc[i].num; + } } } if (ud->tchan_cnt) { rm_res = tisci_rm->rm_ranges[RM_RANGE_TCHAN]; - for (j = 0; j < rm_res->sets; j++, i += 2) { - irq_res.desc[i].start = rm_res->desc[j].start + - oes->bcdma_tchan_data; - irq_res.desc[i].num = rm_res->desc[j].num; + if (IS_ERR(rm_res)) { + irq_res.desc[i].start = oes->bcdma_tchan_data; + irq_res.desc[i].num = ud->tchan_cnt; + irq_res.desc[i + 1].start = oes->bcdma_tchan_ring; + irq_res.desc[i + 1].num = ud->tchan_cnt; + i += 2; + } else { + for (j = 0; j < rm_res->sets; j++, i += 2) { + irq_res.desc[i].start = rm_res->desc[j].start + + oes->bcdma_tchan_data; + irq_res.desc[i].num = rm_res->desc[j].num; - irq_res.desc[i + 1].start = rm_res->desc[j].start + - oes->bcdma_tchan_ring; - irq_res.desc[i + 1].num = rm_res->desc[j].num; + irq_res.desc[i + 1].start = rm_res->desc[j].start + + oes->bcdma_tchan_ring; + irq_res.desc[i + 1].num = rm_res->desc[j].num; + } } } if (ud->rchan_cnt) { rm_res = tisci_rm->rm_ranges[RM_RANGE_RCHAN]; - for (j = 0; j < rm_res->sets; j++, i += 2) { - irq_res.desc[i].start = rm_res->desc[j].start + - oes->bcdma_rchan_data; - irq_res.desc[i].num = rm_res->desc[j].num; + if (IS_ERR(rm_res)) { + irq_res.desc[i].start = oes->bcdma_rchan_data; + irq_res.desc[i].num = ud->rchan_cnt; + irq_res.desc[i + 1].start = oes->bcdma_rchan_ring; + irq_res.desc[i + 1].num = ud->rchan_cnt; + i += 2; + } else { + for (j = 0; j < rm_res->sets; j++, i += 2) { + irq_res.desc[i].start = rm_res->desc[j].start + + oes->bcdma_rchan_data; + irq_res.desc[i].num = rm_res->desc[j].num; - irq_res.desc[i + 1].start = rm_res->desc[j].start + - oes->bcdma_rchan_ring; - irq_res.desc[i + 1].num = rm_res->desc[j].num; + irq_res.desc[i + 1].start = rm_res->desc[j].start + + oes->bcdma_rchan_ring; + irq_res.desc[i + 1].num = rm_res->desc[j].num; + } } } @@ -4858,39 +4900,54 @@ static int pktdma_setup_resources(struct udma_dev *ud) if (IS_ERR(rm_res)) { /* all rflows are assigned exclusively to Linux */ bitmap_zero(ud->rflow_in_use, ud->rflow_cnt); + irq_res.sets = 1; } else { bitmap_fill(ud->rflow_in_use, ud->rflow_cnt); for (i = 0; i < rm_res->sets; i++) udma_mark_resource_ranges(ud, ud->rflow_in_use, &rm_res->desc[i], "rflow"); + irq_res.sets = rm_res->sets; } - irq_res.sets = rm_res->sets; /* tflow ranges */ rm_res = tisci_rm->rm_ranges[RM_RANGE_TFLOW]; if (IS_ERR(rm_res)) { /* all tflows are assigned exclusively to Linux */ bitmap_zero(ud->tflow_map, ud->tflow_cnt); + irq_res.sets++; } else { bitmap_fill(ud->tflow_map, ud->tflow_cnt); for (i = 0; i < rm_res->sets; i++) udma_mark_resource_ranges(ud, ud->tflow_map, &rm_res->desc[i], "tflow"); + irq_res.sets += rm_res->sets; } - irq_res.sets += rm_res->sets; irq_res.desc = kcalloc(irq_res.sets, sizeof(*irq_res.desc), GFP_KERNEL); + if (!irq_res.desc) + return -ENOMEM; rm_res = tisci_rm->rm_ranges[RM_RANGE_TFLOW]; - for (i = 0; i < rm_res->sets; i++) { - irq_res.desc[i].start = rm_res->desc[i].start + - oes->pktdma_tchan_flow; - irq_res.desc[i].num = rm_res->desc[i].num; + if (IS_ERR(rm_res)) { + irq_res.desc[0].start = oes->pktdma_tchan_flow; + irq_res.desc[0].num = ud->tflow_cnt; + i = 1; + } else { + for (i = 0; i < rm_res->sets; i++) { + irq_res.desc[i].start = rm_res->desc[i].start + + oes->pktdma_tchan_flow; + irq_res.desc[i].num = rm_res->desc[i].num; + } } rm_res = tisci_rm->rm_ranges[RM_RANGE_RFLOW]; - for (j = 0; j < rm_res->sets; j++, i++) { - irq_res.desc[i].start = rm_res->desc[j].start + - oes->pktdma_rchan_flow; - irq_res.desc[i].num = rm_res->desc[j].num; + if (IS_ERR(rm_res)) { + irq_res.desc[i].start = oes->pktdma_rchan_flow; + irq_res.desc[i].num = ud->rflow_cnt; + } else { + for (j = 0; j < rm_res->sets; j++, i++) { + irq_res.desc[i].start = rm_res->desc[j].start + + oes->pktdma_rchan_flow; + irq_res.desc[i].num = rm_res->desc[j].num; + } } ret = ti_sci_inta_msi_domain_alloc_irqs(ud->dev, &irq_res); kfree(irq_res.desc); From 8affd8a4b5ce356c8900cfb037674f3a4a11fbdb Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Wed, 8 Dec 2021 10:01:27 -0700 Subject: [PATCH 439/868] dmaengine: idxd: fix missed completion on abort path Ming reported that with the abort path of the descriptor submission, there can be a window where a completed descriptor can be missed to be completed by the irq completion thread: CPU A CPU B Submit (successful) Submit (fail) irq_process_work_list() // empty llist_abort_desc() // remove all descs from pending list irq_process_pending_llist() // empty exit idxd_wq_thread() with no processing Add opportunistic descriptor completion in the abort path in order to remove the missed completion. Fixes: 6b4b87f2c31a ("dmaengine: idxd: fix submission race window") Reported-by: Ming Li Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/163898288714.443911.16084982766671976640.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul --- drivers/dma/idxd/submit.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/drivers/dma/idxd/submit.c b/drivers/dma/idxd/submit.c index de76fb4abac2..83452fbbb168 100644 --- a/drivers/dma/idxd/submit.c +++ b/drivers/dma/idxd/submit.c @@ -106,6 +106,7 @@ static void llist_abort_desc(struct idxd_wq *wq, struct idxd_irq_entry *ie, { struct idxd_desc *d, *t, *found = NULL; struct llist_node *head; + LIST_HEAD(flist); desc->completion->status = IDXD_COMP_DESC_ABORT; /* @@ -120,7 +121,11 @@ static void llist_abort_desc(struct idxd_wq *wq, struct idxd_irq_entry *ie, found = desc; continue; } - list_add_tail(&desc->list, &ie->work_list); + + if (d->completion->status) + list_add_tail(&d->list, &flist); + else + list_add_tail(&d->list, &ie->work_list); } } @@ -130,6 +135,17 @@ static void llist_abort_desc(struct idxd_wq *wq, struct idxd_irq_entry *ie, if (found) complete_desc(found, IDXD_COMPLETE_ABORT); + + /* + * complete_desc() will return desc to allocator and the desc can be + * acquired by a different process and the desc->list can be modified. + * Delete desc from list so the list trasversing does not get corrupted + * by the other process. + */ + list_for_each_entry_safe(d, t, &flist, list) { + list_del_init(&d->list); + complete_desc(d, IDXD_COMPLETE_NORMAL); + } } int idxd_submit_desc(struct idxd_wq *wq, struct idxd_desc *desc) From 822c9f2b833c53fc67e8adf6f63ecc3ea24d502c Mon Sep 17 00:00:00 2001 From: Alyssa Ross Date: Thu, 25 Nov 2021 15:44:38 +0000 Subject: [PATCH 440/868] dmaengine: st_fdma: fix MODULE_ALIAS modprobe can't handle spaces in aliases. Fixes: 6b4cd727eaf1 ("dmaengine: st_fdma: Add STMicroelectronics FDMA engine driver support") Signed-off-by: Alyssa Ross Link: https://lore.kernel.org/r/20211125154441.2626214-1-hi@alyssa.is Signed-off-by: Vinod Koul --- drivers/dma/st_fdma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/st_fdma.c b/drivers/dma/st_fdma.c index 962b6e05287b..d95c421877fb 100644 --- a/drivers/dma/st_fdma.c +++ b/drivers/dma/st_fdma.c @@ -874,4 +874,4 @@ MODULE_LICENSE("GPL v2"); MODULE_DESCRIPTION("STMicroelectronics FDMA engine driver"); MODULE_AUTHOR("Ludovic.barre "); MODULE_AUTHOR("Peter Griffin "); -MODULE_ALIAS("platform: " DRIVER_NAME); +MODULE_ALIAS("platform:" DRIVER_NAME); From 2dee54b289fbc810669a1b2b8a0887fa1c9a14d7 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Sun, 12 Dec 2021 17:20:25 +0000 Subject: [PATCH 441/868] ALSA: drivers: opl3: Fix incorrect use of vp->state Static analysis with scan-build has found an assignment to vp2 that is never used. It seems that the check on vp->state > 0 should be actually on vp2->state instead. Fix this. This dates back to 2002, I found the offending commit from the git history git://git.kernel.org/pub/scm/linux/kernel/git/tglx/history.git, commit 91e39521bbf6 ("[PATCH] ALSA patch for 2.5.4") Signed-off-by: Colin Ian King Cc: Link: https://lore.kernel.org/r/20211212172025.470367-1-colin.i.king@gmail.com Signed-off-by: Takashi Iwai --- sound/drivers/opl3/opl3_midi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/drivers/opl3/opl3_midi.c b/sound/drivers/opl3/opl3_midi.c index e1b69c65c3c8..e2b7be67f0e3 100644 --- a/sound/drivers/opl3/opl3_midi.c +++ b/sound/drivers/opl3/opl3_midi.c @@ -397,7 +397,7 @@ void snd_opl3_note_on(void *p, int note, int vel, struct snd_midi_channel *chan) } if (instr_4op) { vp2 = &opl3->voices[voice + 3]; - if (vp->state > 0) { + if (vp2->state > 0) { opl3_reg = reg_side | (OPL3_REG_KEYON_BLOCK + voice_offset + 3); reg_val = vp->keyon_reg & ~OPL3_KEYON_BIT; From c01c1db1dc632edafb0dff32d40daf4f9c1a4e19 Mon Sep 17 00:00:00 2001 From: Xiaoke Wang Date: Mon, 13 Dec 2021 15:39:31 +0800 Subject: [PATCH 442/868] ALSA: jack: Check the return value of kstrdup() kstrdup() can return NULL, it is better to check the return value of it. Signed-off-by: Xiaoke Wang Cc: Link: https://lore.kernel.org/r/tencent_094816F3522E0DC704056C789352EBBF0606@qq.com Signed-off-by: Takashi Iwai --- sound/core/jack.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sound/core/jack.c b/sound/core/jack.c index 32350c6aba84..537df1e98f8a 100644 --- a/sound/core/jack.c +++ b/sound/core/jack.c @@ -509,6 +509,10 @@ int snd_jack_new(struct snd_card *card, const char *id, int type, return -ENOMEM; jack->id = kstrdup(id, GFP_KERNEL); + if (jack->id == NULL) { + kfree(jack); + return -ENOMEM; + } /* don't creat input device for phantom jack */ if (!phantom_jack) { From 5cf06065bd1f7b94fbb80e7eeb033899f77ab5ba Mon Sep 17 00:00:00 2001 From: Alejandro Concepcion-Rodriguez Date: Sun, 12 Dec 2021 16:06:02 +0000 Subject: [PATCH 443/868] drm: simpledrm: fix wrong unit with pixel clock Pixel clock has to be set in kHz. Signed-off-by: Alejandro Concepcion-Rodriguez Fixes: 11e8f5fd223b ("drm: Add simpledrm driver") Signed-off-by: Thomas Zimmermann Link: https://patchwork.freedesktop.org/patch/msgid/6f8554ef-1305-0dda-821c-f7d2e5644a48@acoro.eu --- drivers/gpu/drm/tiny/simpledrm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/tiny/simpledrm.c b/drivers/gpu/drm/tiny/simpledrm.c index 481b48bde047..5a6e89825bc2 100644 --- a/drivers/gpu/drm/tiny/simpledrm.c +++ b/drivers/gpu/drm/tiny/simpledrm.c @@ -458,7 +458,7 @@ static struct drm_display_mode simpledrm_mode(unsigned int width, { struct drm_display_mode mode = { SIMPLEDRM_MODE(width, height) }; - mode.clock = 60 /* Hz */ * mode.hdisplay * mode.vdisplay; + mode.clock = mode.hdisplay * mode.vdisplay * 60 / 1000 /* kHz */; drm_mode_set_name(&mode); return mode; From c062f2a0b04d86c5b8c9d973bea43493eaca3d32 Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Fri, 10 Dec 2021 17:42:47 +0100 Subject: [PATCH 444/868] net/sched: sch_ets: don't remove idle classes from the round-robin list Shuang reported that the following script: 1) tc qdisc add dev ddd0 handle 10: parent 1: ets bands 8 strict 4 priomap 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 2) mausezahn ddd0 -A 10.10.10.1 -B 10.10.10.2 -c 0 -a own -b 00:c1:a0:c1:a0:00 -t udp & 3) tc qdisc change dev ddd0 handle 10: ets bands 4 strict 2 quanta 2500 2500 priomap 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 crashes systematically when line 2) is commented: list_del corruption, ffff8e028404bd30->next is LIST_POISON1 (dead000000000100) ------------[ cut here ]------------ kernel BUG at lib/list_debug.c:47! invalid opcode: 0000 [#1] PREEMPT SMP NOPTI CPU: 0 PID: 954 Comm: tc Not tainted 5.16.0-rc4+ #478 Hardware name: Red Hat KVM, BIOS 1.11.1-4.module+el8.1.0+4066+0f1aadab 04/01/2014 RIP: 0010:__list_del_entry_valid.cold.1+0x12/0x47 Code: fe ff 0f 0b 48 89 c1 4c 89 c6 48 c7 c7 08 42 1b 87 e8 1d c5 fe ff 0f 0b 48 89 fe 48 89 c2 48 c7 c7 98 42 1b 87 e8 09 c5 fe ff <0f> 0b 48 c7 c7 48 43 1b 87 e8 fb c4 fe ff 0f 0b 48 89 f2 48 89 fe RSP: 0018:ffffae46807a3888 EFLAGS: 00010246 RAX: 000000000000004e RBX: 0000000000000007 RCX: 0000000000000202 RDX: 0000000000000000 RSI: ffffffff871ac536 RDI: 00000000ffffffff RBP: ffffae46807a3a10 R08: 0000000000000000 R09: c0000000ffff7fff R10: 0000000000000001 R11: ffffae46807a36a8 R12: ffff8e028404b800 R13: ffff8e028404bd30 R14: dead000000000100 R15: ffff8e02fafa2400 FS: 00007efdc92e4480(0000) GS:ffff8e02fb600000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000682f48 CR3: 00000001058be000 CR4: 0000000000350ef0 Call Trace: ets_qdisc_change+0x58b/0xa70 [sch_ets] tc_modify_qdisc+0x323/0x880 rtnetlink_rcv_msg+0x169/0x4a0 netlink_rcv_skb+0x50/0x100 netlink_unicast+0x1a5/0x280 netlink_sendmsg+0x257/0x4d0 sock_sendmsg+0x5b/0x60 ____sys_sendmsg+0x1f2/0x260 ___sys_sendmsg+0x7c/0xc0 __sys_sendmsg+0x57/0xa0 do_syscall_64+0x3a/0x80 entry_SYSCALL_64_after_hwframe+0x44/0xae RIP: 0033:0x7efdc8031338 Code: 89 02 48 c7 c0 ff ff ff ff eb b5 0f 1f 80 00 00 00 00 f3 0f 1e fa 48 8d 05 25 43 2c 00 8b 00 85 c0 75 17 b8 2e 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 58 c3 0f 1f 80 00 00 00 00 41 54 41 89 d4 55 RSP: 002b:00007ffdf1ce9828 EFLAGS: 00000246 ORIG_RAX: 000000000000002e RAX: ffffffffffffffda RBX: 0000000061b37a97 RCX: 00007efdc8031338 RDX: 0000000000000000 RSI: 00007ffdf1ce9890 RDI: 0000000000000003 RBP: 0000000000000000 R08: 0000000000000001 R09: 000000000078a940 R10: 000000000000000c R11: 0000000000000246 R12: 0000000000000001 R13: 0000000000688880 R14: 0000000000000000 R15: 0000000000000000 Modules linked in: sch_ets sch_tbf dummy rfkill iTCO_wdt iTCO_vendor_support intel_rapl_msr intel_rapl_common joydev pcspkr i2c_i801 virtio_balloon i2c_smbus lpc_ich ip_tables xfs libcrc32c crct10dif_pclmul crc32_pclmul crc32c_intel serio_raw ghash_clmulni_intel ahci libahci libata virtio_blk virtio_console virtio_net net_failover failover sunrpc dm_mirror dm_region_hash dm_log dm_mod [last unloaded: sch_ets] ---[ end trace f35878d1912655c2 ]--- RIP: 0010:__list_del_entry_valid.cold.1+0x12/0x47 Code: fe ff 0f 0b 48 89 c1 4c 89 c6 48 c7 c7 08 42 1b 87 e8 1d c5 fe ff 0f 0b 48 89 fe 48 89 c2 48 c7 c7 98 42 1b 87 e8 09 c5 fe ff <0f> 0b 48 c7 c7 48 43 1b 87 e8 fb c4 fe ff 0f 0b 48 89 f2 48 89 fe RSP: 0018:ffffae46807a3888 EFLAGS: 00010246 RAX: 000000000000004e RBX: 0000000000000007 RCX: 0000000000000202 RDX: 0000000000000000 RSI: ffffffff871ac536 RDI: 00000000ffffffff RBP: ffffae46807a3a10 R08: 0000000000000000 R09: c0000000ffff7fff R10: 0000000000000001 R11: ffffae46807a36a8 R12: ffff8e028404b800 R13: ffff8e028404bd30 R14: dead000000000100 R15: ffff8e02fafa2400 FS: 00007efdc92e4480(0000) GS:ffff8e02fb600000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000682f48 CR3: 00000001058be000 CR4: 0000000000350ef0 Kernel panic - not syncing: Fatal exception in interrupt Kernel Offset: 0x4e00000 from 0xffffffff81000000 (relocation range: 0xffffffff80000000-0xffffffffbfffffff) ---[ end Kernel panic - not syncing: Fatal exception in interrupt ]--- we can remove 'q->classes[i].alist' only if DRR class 'i' was part of the active list. In the ETS scheduler DRR classes belong to that list only if the queue length is greater than zero: we need to test for non-zero value of 'q->classes[i].qdisc->q.qlen' before removing from the list, similarly to what has been done elsewhere in the ETS code. Fixes: de6d25924c2a ("net/sched: sch_ets: don't peek at classes beyond 'nbands'") Reported-by: Shuang Li Signed-off-by: Davide Caratti Signed-off-by: David S. Miller --- net/sched/sch_ets.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/sched/sch_ets.c b/net/sched/sch_ets.c index e007fc75ef2f..d73393493553 100644 --- a/net/sched/sch_ets.c +++ b/net/sched/sch_ets.c @@ -666,9 +666,9 @@ static int ets_qdisc_change(struct Qdisc *sch, struct nlattr *opt, } } for (i = q->nbands; i < oldbands; i++) { - qdisc_tree_flush_backlog(q->classes[i].qdisc); - if (i >= q->nstrict) + if (i >= q->nstrict && q->classes[i].qdisc->q.qlen) list_del(&q->classes[i].alist); + qdisc_tree_flush_backlog(q->classes[i].qdisc); } q->nstrict = nstrict; memcpy(q->prio2band, priomap, sizeof(priomap)); From 3b8e19a0aa3933a785be9f1541afd8d398c4ec69 Mon Sep 17 00:00:00 2001 From: AngeloGioacchino Del Regno Date: Thu, 28 Oct 2021 09:43:11 +0200 Subject: [PATCH 445/868] drm/mediatek: hdmi: Perform NULL pointer check for mtk_hdmi_conf In commit 41ca9caaae0b ("drm/mediatek: hdmi: Add check for CEA modes only") a check for CEA modes was added to function mtk_hdmi_bridge_mode_valid() in order to address possible issues on MT8167; moreover, with commit c91026a938c2 ("drm/mediatek: hdmi: Add optional limit on maximal HDMI mode clock") another similar check was introduced. Unfortunately though, at the time of writing, MT8173 does not provide any mtk_hdmi_conf structure and this is crashing the kernel with NULL pointer upon entering mtk_hdmi_bridge_mode_valid(), which happens as soon as a HDMI cable gets plugged in. To fix this regression, add a NULL pointer check for hdmi->conf in the said function, restoring HDMI functionality and avoiding NULL pointer kernel panics. Fixes: 41ca9caaae0b ("drm/mediatek: hdmi: Add check for CEA modes only") Fixes: c91026a938c2 ("drm/mediatek: hdmi: Add optional limit on maximal HDMI mode clock") Signed-off-by: AngeloGioacchino Del Regno Signed-off-by: Chun-Kuang Hu --- drivers/gpu/drm/mediatek/mtk_hdmi.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/mediatek/mtk_hdmi.c b/drivers/gpu/drm/mediatek/mtk_hdmi.c index 5838c44cbf6f..3196189429bc 100644 --- a/drivers/gpu/drm/mediatek/mtk_hdmi.c +++ b/drivers/gpu/drm/mediatek/mtk_hdmi.c @@ -1224,12 +1224,14 @@ static int mtk_hdmi_bridge_mode_valid(struct drm_bridge *bridge, return MODE_BAD; } - if (hdmi->conf->cea_modes_only && !drm_match_cea_mode(mode)) - return MODE_BAD; + if (hdmi->conf) { + if (hdmi->conf->cea_modes_only && !drm_match_cea_mode(mode)) + return MODE_BAD; - if (hdmi->conf->max_mode_clock && - mode->clock > hdmi->conf->max_mode_clock) - return MODE_CLOCK_HIGH; + if (hdmi->conf->max_mode_clock && + mode->clock > hdmi->conf->max_mode_clock) + return MODE_CLOCK_HIGH; + } if (mode->clock < 27000) return MODE_CLOCK_LOW; From 4bc5e64e6cf37007e436970024e5998ee0935651 Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Fri, 26 Nov 2021 01:13:32 +0100 Subject: [PATCH 446/868] efi: Move efifb_setup_from_dmi() prototype from arch headers Commit 8633ef82f101 ("drivers/firmware: consolidate EFI framebuffer setup for all arches") made the Generic System Framebuffers (sysfb) driver able to be built on non-x86 architectures. But it left the efifb_setup_from_dmi() function prototype declaration in the architecture specific headers. This could lead to the following compiler warning as reported by the kernel test robot: drivers/firmware/efi/sysfb_efi.c:70:6: warning: no previous prototype for function 'efifb_setup_from_dmi' [-Wmissing-prototypes] void efifb_setup_from_dmi(struct screen_info *si, const char *opt) ^ drivers/firmware/efi/sysfb_efi.c:70:1: note: declare 'static' if the function is not intended to be used outside of this translation unit void efifb_setup_from_dmi(struct screen_info *si, const char *opt) Fixes: 8633ef82f101 ("drivers/firmware: consolidate EFI framebuffer setup for all arches") Reported-by: kernel test robot Cc: # 5.15.x Signed-off-by: Javier Martinez Canillas Acked-by: Thomas Zimmermann Link: https://lore.kernel.org/r/20211126001333.555514-1-javierm@redhat.com Signed-off-by: Ard Biesheuvel --- arch/arm/include/asm/efi.h | 1 - arch/arm64/include/asm/efi.h | 1 - arch/riscv/include/asm/efi.h | 1 - arch/x86/include/asm/efi.h | 2 -- include/linux/efi.h | 6 ++++++ 5 files changed, 6 insertions(+), 5 deletions(-) diff --git a/arch/arm/include/asm/efi.h b/arch/arm/include/asm/efi.h index a6f3b179e8a9..27218eabbf9a 100644 --- a/arch/arm/include/asm/efi.h +++ b/arch/arm/include/asm/efi.h @@ -17,7 +17,6 @@ #ifdef CONFIG_EFI void efi_init(void); -extern void efifb_setup_from_dmi(struct screen_info *si, const char *opt); int efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md); int efi_set_mapping_permissions(struct mm_struct *mm, efi_memory_desc_t *md); diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h index d3e1825337be..ad55079abe47 100644 --- a/arch/arm64/include/asm/efi.h +++ b/arch/arm64/include/asm/efi.h @@ -14,7 +14,6 @@ #ifdef CONFIG_EFI extern void efi_init(void); -extern void efifb_setup_from_dmi(struct screen_info *si, const char *opt); #else #define efi_init() #endif diff --git a/arch/riscv/include/asm/efi.h b/arch/riscv/include/asm/efi.h index 49b398fe99f1..cc4f6787f937 100644 --- a/arch/riscv/include/asm/efi.h +++ b/arch/riscv/include/asm/efi.h @@ -13,7 +13,6 @@ #ifdef CONFIG_EFI extern void efi_init(void); -extern void efifb_setup_from_dmi(struct screen_info *si, const char *opt); #else #define efi_init() #endif diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index 4d0b126835b8..63158fd55856 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -197,8 +197,6 @@ static inline bool efi_runtime_supported(void) extern void parse_efi_setup(u64 phys_addr, u32 data_len); -extern void efifb_setup_from_dmi(struct screen_info *si, const char *opt); - extern void efi_thunk_runtime_setup(void); efi_status_t efi_set_virtual_address_map(unsigned long memory_map_size, unsigned long descriptor_size, diff --git a/include/linux/efi.h b/include/linux/efi.h index dbd39b20e034..ef8dbc0a1522 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -1283,4 +1283,10 @@ static inline struct efi_mokvar_table_entry *efi_mokvar_entry_find( } #endif +#ifdef CONFIG_SYSFB +extern void efifb_setup_from_dmi(struct screen_info *si, const char *opt); +#else +static inline void efifb_setup_from_dmi(struct screen_info *si, const char *opt) { } +#endif + #endif /* _LINUX_EFI_H */ From 865ed67ab955428b9aa771d8b4f1e4fb7fd08945 Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Thu, 9 Dec 2021 12:04:56 +0000 Subject: [PATCH 447/868] firmware: arm_scpi: Fix string overflow in SCPI genpd driver Without the bound checks for scpi_pd->name, it could result in the buffer overflow when copying the SCPI device name from the corresponding device tree node as the name string is set at maximum size of 30. Let us fix it by using devm_kasprintf so that the string buffer is allocated dynamically. Fixes: 8bec4337ad40 ("firmware: scpi: add device power domain support using genpd") Reported-by: Pedro Batista Signed-off-by: Sudeep Holla Cc: stable@vger.kernel.org Cc: Cristian Marussi Link: https://lore.kernel.org/r/20211209120456.696879-1-sudeep.holla@arm.com' Signed-off-by: Arnd Bergmann --- drivers/firmware/scpi_pm_domain.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/firmware/scpi_pm_domain.c b/drivers/firmware/scpi_pm_domain.c index 51201600d789..800673910b51 100644 --- a/drivers/firmware/scpi_pm_domain.c +++ b/drivers/firmware/scpi_pm_domain.c @@ -16,7 +16,6 @@ struct scpi_pm_domain { struct generic_pm_domain genpd; struct scpi_ops *ops; u32 domain; - char name[30]; }; /* @@ -110,8 +109,13 @@ static int scpi_pm_domain_probe(struct platform_device *pdev) scpi_pd->domain = i; scpi_pd->ops = scpi_ops; - sprintf(scpi_pd->name, "%pOFn.%d", np, i); - scpi_pd->genpd.name = scpi_pd->name; + scpi_pd->genpd.name = devm_kasprintf(dev, GFP_KERNEL, + "%pOFn.%d", np, i); + if (!scpi_pd->genpd.name) { + dev_err(dev, "Failed to allocate genpd name:%pOFn.%d\n", + np, i); + continue; + } scpi_pd->genpd.power_off = scpi_pd_power_off; scpi_pd->genpd.power_on = scpi_pd_power_on; From 890d5b40908bfd1a79be018d2d297cf9df60f4ee Mon Sep 17 00:00:00 2001 From: Marian Postevca Date: Sat, 4 Dec 2021 23:49:12 +0200 Subject: [PATCH 448/868] usb: gadget: u_ether: fix race in setting MAC address in setup phase When listening for notifications through netlink of a new interface being registered, sporadically, it is possible for the MAC to be read as zero. The zero MAC address lasts a short period of time and then switches to a valid random MAC address. This causes problems for netd in Android, which assumes that the interface is malfunctioning and will not use it. In the good case we get this log: InterfaceController::getCfg() ifName usb0 hwAddr 92:a8:f0:73:79:5b ipv4Addr 0.0.0.0 flags 0x1002 In the error case we get these logs: InterfaceController::getCfg() ifName usb0 hwAddr 00:00:00:00:00:00 ipv4Addr 0.0.0.0 flags 0x1002 netd : interfaceGetCfg("usb0") netd : interfaceSetCfg() -> ServiceSpecificException (99, "[Cannot assign requested address] : ioctl() failed") The reason for the issue is the order in which the interface is setup, it is first registered through register_netdev() and after the MAC address is set. Fixed by first setting the MAC address of the net_device and after that calling register_netdev(). Fixes: bcd4a1c40bee885e ("usb: gadget: u_ether: construct with default values and add setters/getters") Cc: stable@vger.kernel.org Signed-off-by: Marian Postevca Link: https://lore.kernel.org/r/20211204214912.17627-1-posteuca@mutex.one Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/u_ether.c | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/drivers/usb/gadget/function/u_ether.c b/drivers/usb/gadget/function/u_ether.c index e0ad5aed6ac9..6f5d45ef2e39 100644 --- a/drivers/usb/gadget/function/u_ether.c +++ b/drivers/usb/gadget/function/u_ether.c @@ -17,6 +17,7 @@ #include #include #include +#include #include "u_ether.h" @@ -863,19 +864,23 @@ int gether_register_netdev(struct net_device *net) { struct eth_dev *dev; struct usb_gadget *g; - struct sockaddr sa; int status; if (!net->dev.parent) return -EINVAL; dev = netdev_priv(net); g = dev->gadget; + + net->addr_assign_type = NET_ADDR_RANDOM; + eth_hw_addr_set(net, dev->dev_mac); + status = register_netdev(net); if (status < 0) { dev_dbg(&g->dev, "register_netdev failed, %d\n", status); return status; } else { INFO(dev, "HOST MAC %pM\n", dev->host_mac); + INFO(dev, "MAC %pM\n", dev->dev_mac); /* two kinds of host-initiated state changes: * - iff DATA transfer is active, carrier is "on" @@ -883,15 +888,6 @@ int gether_register_netdev(struct net_device *net) */ netif_carrier_off(net); } - sa.sa_family = net->type; - memcpy(sa.sa_data, dev->dev_mac, ETH_ALEN); - rtnl_lock(); - status = dev_set_mac_address(net, &sa, NULL); - rtnl_unlock(); - if (status) - pr_warn("cannot set self ethernet address: %d\n", status); - else - INFO(dev, "MAC %pM\n", dev->dev_mac); return status; } From ccc14c6cfd346e85c3ecb970975afd5132763437 Mon Sep 17 00:00:00 2001 From: Chunfeng Yun Date: Thu, 9 Dec 2021 10:54:22 +0800 Subject: [PATCH 449/868] usb: xhci-mtk: fix list_del warning when enable list debug There is warning of 'list_del corruption' when enable list debug (CONFIG_DEBUG_LIST=y), fix it by using list_del_init() Fixes: 4ce186665e7c ("usb: xhci-mtk: Do not use xhci's virt_dev in drop_endpoint") Cc: stable Signed-off-by: Chunfeng Yun Link: https://lore.kernel.org/r/20211209025422.17108-1-chunfeng.yun@mediatek.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-mtk-sch.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/host/xhci-mtk-sch.c b/drivers/usb/host/xhci-mtk-sch.c index 1edef7527c11..edbfa82c6565 100644 --- a/drivers/usb/host/xhci-mtk-sch.c +++ b/drivers/usb/host/xhci-mtk-sch.c @@ -781,7 +781,7 @@ int xhci_mtk_check_bandwidth(struct usb_hcd *hcd, struct usb_device *udev) ret = xhci_check_bandwidth(hcd, udev); if (!ret) - INIT_LIST_HEAD(&mtk->bw_ep_chk_list); + list_del_init(&mtk->bw_ep_chk_list); return ret; } From 16f00d969afe60e233c1a91af7ac840df60d3536 Mon Sep 17 00:00:00 2001 From: Pawel Laszczak Date: Fri, 10 Dec 2021 12:29:45 +0100 Subject: [PATCH 450/868] usb: cdnsp: Fix incorrect calling of cdnsp_died function Patch restrict calling of cdnsp_died function during removing modules or software disconnect. This function was called because after transition controller to HALT state the driver starts handling the deferred interrupt. In this case such interrupt can be simple ignored. Fixes: 3d82904559f4 ("usb: cdnsp: cdns3 Add main part of Cadence USBSSP DRD Driver") cc: Reviewed-by: Peter Chen Signed-off-by: Pawel Laszczak Link: https://lore.kernel.org/r/20211210112945.660-1-pawell@gli-login.cadence.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/cdns3/cdnsp-ring.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/usb/cdns3/cdnsp-ring.c b/drivers/usb/cdns3/cdnsp-ring.c index 1b1438457fb0..e1ac6c398bd3 100644 --- a/drivers/usb/cdns3/cdnsp-ring.c +++ b/drivers/usb/cdns3/cdnsp-ring.c @@ -1523,7 +1523,14 @@ irqreturn_t cdnsp_thread_irq_handler(int irq, void *data) spin_lock_irqsave(&pdev->lock, flags); if (pdev->cdnsp_state & (CDNSP_STATE_HALTED | CDNSP_STATE_DYING)) { - cdnsp_died(pdev); + /* + * While removing or stopping driver there may still be deferred + * not handled interrupt which should not be treated as error. + * Driver should simply ignore it. + */ + if (pdev->gadget_driver) + cdnsp_died(pdev); + spin_unlock_irqrestore(&pdev->lock, flags); return IRQ_HANDLED; } From 50931ba27d1665c8b038cd1d16c5869301f32fd6 Mon Sep 17 00:00:00 2001 From: Pawel Laszczak Date: Mon, 13 Dec 2021 06:06:09 +0100 Subject: [PATCH 451/868] usb: cdnsp: Fix issue in cdnsp_log_ep trace event Patch fixes incorrect order of __entry->stream_id and __entry->state parameters in TP_printk macro. Fixes: 3d82904559f4 ("usb: cdnsp: cdns3 Add main part of Cadence USBSSP DRD Driver") cc: Reviewed-by: Peter Chen Signed-off-by: Pawel Laszczak Link: https://lore.kernel.org/r/20211213050609.22640-1-pawell@gli-login.cadence.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/cdns3/cdnsp-trace.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/usb/cdns3/cdnsp-trace.h b/drivers/usb/cdns3/cdnsp-trace.h index 6a2571c6aa9e..5983dfb99653 100644 --- a/drivers/usb/cdns3/cdnsp-trace.h +++ b/drivers/usb/cdns3/cdnsp-trace.h @@ -57,9 +57,9 @@ DECLARE_EVENT_CLASS(cdnsp_log_ep, __entry->first_prime_det = pep->stream_info.first_prime_det; __entry->drbls_count = pep->stream_info.drbls_count; ), - TP_printk("%s: SID: %08x ep state: %x stream: enabled: %d num %d " + TP_printk("%s: SID: %08x, ep state: %x, stream: enabled: %d num %d " "tds %d, first prime: %d drbls %d", - __get_str(name), __entry->state, __entry->stream_id, + __get_str(name), __entry->stream_id, __entry->state, __entry->enabled, __entry->num_streams, __entry->td_count, __entry->first_prime_det, __entry->drbls_count) ); From 99ea221f2e2f2743314e348b25c1e2574b467528 Mon Sep 17 00:00:00 2001 From: Pawel Laszczak Date: Tue, 7 Dec 2021 10:18:38 +0100 Subject: [PATCH 452/868] usb: cdnsp: Fix incorrect status for control request Patch fixes incorrect status for control request. Without this fix all usb_request objects were returned to upper drivers with usb_reqest->status field set to -EINPROGRESS. Fixes: 3d82904559f4 ("usb: cdnsp: cdns3 Add main part of Cadence USBSSP DRD Driver") cc: Reported-by: Ken (Jian) He Reviewed-by: Peter Chen Signed-off-by: Pawel Laszczak Link: https://lore.kernel.org/r/20211207091838.39572-1-pawell@gli-login.cadence.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/cdns3/cdnsp-ring.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/cdns3/cdnsp-ring.c b/drivers/usb/cdns3/cdnsp-ring.c index e1ac6c398bd3..e45c3d6e1536 100644 --- a/drivers/usb/cdns3/cdnsp-ring.c +++ b/drivers/usb/cdns3/cdnsp-ring.c @@ -1029,6 +1029,8 @@ static void cdnsp_process_ctrl_td(struct cdnsp_device *pdev, return; } + *status = 0; + cdnsp_finish_td(pdev, td, event, pep, status); } From ab8eb798ddabddb2944401bf31ead9671cb97d95 Mon Sep 17 00:00:00 2001 From: Miaoqian Lin Date: Sat, 11 Dec 2021 14:01:53 +0000 Subject: [PATCH 453/868] net: bcmgenet: Fix NULL vs IS_ERR() checking The phy_attach() function does not return NULL. It returns error pointers. Signed-off-by: Miaoqian Lin Acked-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/genet/bcmmii.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/genet/bcmmii.c b/drivers/net/ethernet/broadcom/genet/bcmmii.c index 5f259641437a..c888ddee1fc4 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmmii.c +++ b/drivers/net/ethernet/broadcom/genet/bcmmii.c @@ -589,9 +589,9 @@ static int bcmgenet_mii_pd_init(struct bcmgenet_priv *priv) * Internal or external PHY with MDIO access */ phydev = phy_attach(priv->dev, phy_name, pd->phy_interface); - if (!phydev) { + if (IS_ERR(phydev)) { dev_err(kdev, "failed to register PHY device\n"); - return -ENODEV; + return PTR_ERR(phydev); } } else { /* From a8d13611b4a7b1b20d17bf2b9a89a3efcabde56c Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Sat, 11 Dec 2021 14:30:31 -0500 Subject: [PATCH 454/868] selftests/net: toeplitz: fix udp option Tiny fix. Option -u ("use udp") does not take an argument. It can cause the next argument to silently be ignored. Fixes: 5ebfb4cc3048 ("selftests/net: toeplitz test") Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller --- tools/testing/selftests/net/toeplitz.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/toeplitz.c b/tools/testing/selftests/net/toeplitz.c index 710ac956bdb3..c5489341cfb8 100644 --- a/tools/testing/selftests/net/toeplitz.c +++ b/tools/testing/selftests/net/toeplitz.c @@ -498,7 +498,7 @@ static void parse_opts(int argc, char **argv) bool have_toeplitz = false; int index, c; - while ((c = getopt_long(argc, argv, "46C:d:i:k:r:stT:u:v", long_options, &index)) != -1) { + while ((c = getopt_long(argc, argv, "46C:d:i:k:r:stT:uv", long_options, &index)) != -1) { switch (c) { case '4': cfg_family = AF_INET; From 9d591fc028b6bddb38c6585874f331267cbdadae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Beh=C3=BAn?= Date: Sat, 11 Dec 2021 23:51:41 +0100 Subject: [PATCH 455/868] net: dsa: mv88e6xxx: Unforce speed & duplex in mac_link_down() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 64d47d50be7a ("net: dsa: mv88e6xxx: configure interface settings in mac_config") removed forcing of speed and duplex from mv88e6xxx_mac_config(), where the link is forced down, and left it only in mv88e6xxx_mac_link_up(), by which time link is unforced. It seems that (at least on 88E6190) when changing cmode to 2500base-x, if the link is not forced down, but the speed or duplex are still forced, the forcing of new settings for speed & duplex doesn't take in mv88e6xxx_mac_link_up(). Fix this by unforcing speed & duplex in mv88e6xxx_mac_link_down(). Fixes: 64d47d50be7a ("net: dsa: mv88e6xxx: configure interface settings in mac_config") Signed-off-by: Marek Behún Reviewed-by: Russell King (Oracle) Signed-off-by: David S. Miller --- drivers/net/dsa/mv88e6xxx/chip.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 14f87f6ac479..cd8462d1e27c 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -768,6 +768,10 @@ static void mv88e6xxx_mac_link_down(struct dsa_switch *ds, int port, if ((!mv88e6xxx_port_ppu_updates(chip, port) || mode == MLO_AN_FIXED) && ops->port_sync_link) err = ops->port_sync_link(chip, port, mode, false); + + if (!err && ops->port_set_speed_duplex) + err = ops->port_set_speed_duplex(chip, port, SPEED_UNFORCED, + DUPLEX_UNFORCED); mv88e6xxx_reg_unlock(chip); if (err) From 71da1aec215290e249d09c44c768df859f3a3bba Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Mon, 13 Dec 2021 16:36:00 +0800 Subject: [PATCH 456/868] selftest/net/forwarding: declare NETIFS p9 p10 The recent GRE selftests defined NUM_NETIFS=10. If the users copy forwarding.config.sample to forwarding.config directly, they will get error "Command line is not complete" when run the GRE tests, because create_netif_veth() failed with no interface name defined. Fix it by extending the NETIFS with p9 and p10. Fixes: 2800f2485417 ("selftests: forwarding: Test multipath hashing on inner IP pkts for GRE tunnel") Signed-off-by: Hangbin Liu Reviewed-by: Ido Schimmel Signed-off-by: David S. Miller --- tools/testing/selftests/net/forwarding/forwarding.config.sample | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/testing/selftests/net/forwarding/forwarding.config.sample b/tools/testing/selftests/net/forwarding/forwarding.config.sample index bf17e485684f..b0980a2efa31 100644 --- a/tools/testing/selftests/net/forwarding/forwarding.config.sample +++ b/tools/testing/selftests/net/forwarding/forwarding.config.sample @@ -13,6 +13,8 @@ NETIFS[p5]=veth4 NETIFS[p6]=veth5 NETIFS[p7]=veth6 NETIFS[p8]=veth7 +NETIFS[p9]=veth8 +NETIFS[p10]=veth9 # Port that does not have a cable connected. NETIF_NO_CABLE=eth8 From be565ec71d1d59438bed0c7ed0a252a327e0b0ef Mon Sep 17 00:00:00 2001 From: Wang Qing Date: Mon, 13 Dec 2021 01:44:36 -0800 Subject: [PATCH 457/868] net: ethernet: ti: add missing of_node_put before return Fix following coccicheck warning: WARNING: Function "for_each_child_of_node" should have of_node_put() before return. Early exits from for_each_child_of_node should decrement the node reference counter. Signed-off-by: Wang Qing Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/am65-cpsw-nuss.c | 29 ++++++++++++++++-------- 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c index c092cb61416a..ffbbda8f4d41 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c +++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c @@ -1844,13 +1844,14 @@ static int am65_cpsw_nuss_init_slave_ports(struct am65_cpsw_common *common) if (ret < 0) { dev_err(dev, "%pOF error reading port_id %d\n", port_np, ret); - return ret; + goto of_node_put; } if (!port_id || port_id > common->port_num) { dev_err(dev, "%pOF has invalid port_id %u %s\n", port_np, port_id, port_np->name); - return -EINVAL; + ret = -EINVAL; + goto of_node_put; } port = am65_common_get_port(common, port_id); @@ -1866,8 +1867,10 @@ static int am65_cpsw_nuss_init_slave_ports(struct am65_cpsw_common *common) (AM65_CPSW_NU_FRAM_PORT_OFFSET * (port_id - 1)); port->slave.mac_sl = cpsw_sl_get("am65", dev, port->port_base); - if (IS_ERR(port->slave.mac_sl)) - return PTR_ERR(port->slave.mac_sl); + if (IS_ERR(port->slave.mac_sl)) { + ret = PTR_ERR(port->slave.mac_sl); + goto of_node_put; + } port->disabled = !of_device_is_available(port_np); if (port->disabled) { @@ -1880,7 +1883,7 @@ static int am65_cpsw_nuss_init_slave_ports(struct am65_cpsw_common *common) ret = PTR_ERR(port->slave.ifphy); dev_err(dev, "%pOF error retrieving port phy: %d\n", port_np, ret); - return ret; + goto of_node_put; } port->slave.mac_only = @@ -1889,10 +1892,12 @@ static int am65_cpsw_nuss_init_slave_ports(struct am65_cpsw_common *common) /* get phy/link info */ if (of_phy_is_fixed_link(port_np)) { ret = of_phy_register_fixed_link(port_np); - if (ret) - return dev_err_probe(dev, ret, + if (ret) { + ret = dev_err_probe(dev, ret, "failed to register fixed-link phy %pOF\n", port_np); + goto of_node_put; + } port->slave.phy_node = of_node_get(port_np); } else { port->slave.phy_node = @@ -1902,14 +1907,15 @@ static int am65_cpsw_nuss_init_slave_ports(struct am65_cpsw_common *common) if (!port->slave.phy_node) { dev_err(dev, "slave[%d] no phy found\n", port_id); - return -ENODEV; + ret = -ENODEV; + goto of_node_put; } ret = of_get_phy_mode(port_np, &port->slave.phy_if); if (ret) { dev_err(dev, "%pOF read phy-mode err %d\n", port_np, ret); - return ret; + goto of_node_put; } ret = of_get_mac_address(port_np, port->slave.mac_addr); @@ -1932,6 +1938,11 @@ static int am65_cpsw_nuss_init_slave_ports(struct am65_cpsw_common *common) } return 0; + +of_node_put: + of_node_put(port_np); + of_node_put(node); + return ret; } static void am65_cpsw_pcpu_stats_free(void *data) From d33dae51645c0d837e587000f3131118fcd6bf5e Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Mon, 13 Dec 2021 11:05:13 +0000 Subject: [PATCH 458/868] net: phy: add a note about refcounting Recently, a patch has been submitted to "fix" the refcounting for a DT node in of_mdiobus_link_mdiodev(). This is not a leaked refcount. The refcount is passed to the new device. Sadly, coccicheck identifies this location as a leaked refcount, which means we're likely to keep getting patches to "fix" this. However, fixing this will cause breakage. Add a comment to state that the lack of of_node_put() here is intentional. Signed-off-by: Russell King (Oracle) Signed-off-by: David S. Miller --- drivers/net/phy/mdio_bus.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c index c204067f1890..c198722e4871 100644 --- a/drivers/net/phy/mdio_bus.c +++ b/drivers/net/phy/mdio_bus.c @@ -460,6 +460,9 @@ static void of_mdiobus_link_mdiodev(struct mii_bus *bus, if (addr == mdiodev->addr) { device_set_node(dev, of_fwnode_handle(child)); + /* The refcount on "child" is passed to the mdio + * device. Do _not_ use of_node_put(child) here. + */ return; } } From 884d2b845477cd0a18302444dc20fe2d9a01743e Mon Sep 17 00:00:00 2001 From: David Wu Date: Mon, 13 Dec 2021 19:15:15 +0800 Subject: [PATCH 459/868] net: stmmac: Add GFP_DMA32 for rx buffers if no 64 capability Use page_pool_alloc_pages instead of page_pool_dev_alloc_pages, which can give the gfp parameter, in the case of not supporting 64-bit width, using 32-bit address memory can reduce a copy from swiotlb. Signed-off-by: David Wu Signed-off-by: David S. Miller --- .../net/ethernet/stmicro/stmmac/stmmac_main.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index da8306f60730..8ded4be08b00 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -1461,16 +1461,20 @@ static int stmmac_init_rx_buffers(struct stmmac_priv *priv, struct dma_desc *p, { struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue]; struct stmmac_rx_buffer *buf = &rx_q->buf_pool[i]; + gfp_t gfp = (GFP_ATOMIC | __GFP_NOWARN); + + if (priv->dma_cap.addr64 <= 32) + gfp |= GFP_DMA32; if (!buf->page) { - buf->page = page_pool_dev_alloc_pages(rx_q->page_pool); + buf->page = page_pool_alloc_pages(rx_q->page_pool, gfp); if (!buf->page) return -ENOMEM; buf->page_offset = stmmac_rx_offset(priv); } if (priv->sph && !buf->sec_page) { - buf->sec_page = page_pool_dev_alloc_pages(rx_q->page_pool); + buf->sec_page = page_pool_alloc_pages(rx_q->page_pool, gfp); if (!buf->sec_page) return -ENOMEM; @@ -4482,6 +4486,10 @@ static inline void stmmac_rx_refill(struct stmmac_priv *priv, u32 queue) struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue]; int dirty = stmmac_rx_dirty(priv, queue); unsigned int entry = rx_q->dirty_rx; + gfp_t gfp = (GFP_ATOMIC | __GFP_NOWARN); + + if (priv->dma_cap.addr64 <= 32) + gfp |= GFP_DMA32; while (dirty-- > 0) { struct stmmac_rx_buffer *buf = &rx_q->buf_pool[entry]; @@ -4494,13 +4502,13 @@ static inline void stmmac_rx_refill(struct stmmac_priv *priv, u32 queue) p = rx_q->dma_rx + entry; if (!buf->page) { - buf->page = page_pool_dev_alloc_pages(rx_q->page_pool); + buf->page = page_pool_alloc_pages(rx_q->page_pool, gfp); if (!buf->page) break; } if (priv->sph && !buf->sec_page) { - buf->sec_page = page_pool_dev_alloc_pages(rx_q->page_pool); + buf->sec_page = page_pool_alloc_pages(rx_q->page_pool, gfp); if (!buf->sec_page) break; From d800c65c2d4eccebb27ffb7808e842d5b533823c Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 13 Dec 2021 09:04:01 -0700 Subject: [PATCH 460/868] io-wq: drop wqe lock before creating new worker We have two io-wq creation paths: - On queue enqueue - When a worker goes to sleep The latter invokes worker creation with the wqe->lock held, but that can run into problems if we end up exiting and need to cancel the queued work. syzbot caught this: ============================================ WARNING: possible recursive locking detected 5.16.0-rc4-syzkaller #0 Not tainted -------------------------------------------- iou-wrk-6468/6471 is trying to acquire lock: ffff88801aa98018 (&wqe->lock){+.+.}-{2:2}, at: io_worker_cancel_cb+0xb7/0x210 fs/io-wq.c:187 but task is already holding lock: ffff88801aa98018 (&wqe->lock){+.+.}-{2:2}, at: io_wq_worker_sleeping+0xb6/0x140 fs/io-wq.c:700 other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(&wqe->lock); lock(&wqe->lock); *** DEADLOCK *** May be due to missing lock nesting notation 1 lock held by iou-wrk-6468/6471: #0: ffff88801aa98018 (&wqe->lock){+.+.}-{2:2}, at: io_wq_worker_sleeping+0xb6/0x140 fs/io-wq.c:700 stack backtrace: CPU: 1 PID: 6471 Comm: iou-wrk-6468 Not tainted 5.16.0-rc4-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0x1dc/0x2d8 lib/dump_stack.c:106 print_deadlock_bug kernel/locking/lockdep.c:2956 [inline] check_deadlock kernel/locking/lockdep.c:2999 [inline] validate_chain+0x5984/0x8240 kernel/locking/lockdep.c:3788 __lock_acquire+0x1382/0x2b00 kernel/locking/lockdep.c:5027 lock_acquire+0x19f/0x4d0 kernel/locking/lockdep.c:5637 __raw_spin_lock include/linux/spinlock_api_smp.h:133 [inline] _raw_spin_lock+0x2a/0x40 kernel/locking/spinlock.c:154 io_worker_cancel_cb+0xb7/0x210 fs/io-wq.c:187 io_wq_cancel_tw_create fs/io-wq.c:1220 [inline] io_queue_worker_create+0x3cf/0x4c0 fs/io-wq.c:372 io_wq_worker_sleeping+0xbe/0x140 fs/io-wq.c:701 sched_submit_work kernel/sched/core.c:6295 [inline] schedule+0x67/0x1f0 kernel/sched/core.c:6323 schedule_timeout+0xac/0x300 kernel/time/timer.c:1857 wait_woken+0xca/0x1b0 kernel/sched/wait.c:460 unix_msg_wait_data net/unix/unix_bpf.c:32 [inline] unix_bpf_recvmsg+0x7f9/0xe20 net/unix/unix_bpf.c:77 unix_stream_recvmsg+0x214/0x2c0 net/unix/af_unix.c:2832 sock_recvmsg_nosec net/socket.c:944 [inline] sock_recvmsg net/socket.c:962 [inline] sock_read_iter+0x3a7/0x4d0 net/socket.c:1035 call_read_iter include/linux/fs.h:2156 [inline] io_iter_do_read fs/io_uring.c:3501 [inline] io_read fs/io_uring.c:3558 [inline] io_issue_sqe+0x144c/0x9590 fs/io_uring.c:6671 io_wq_submit_work+0x2d8/0x790 fs/io_uring.c:6836 io_worker_handle_work+0x808/0xdd0 fs/io-wq.c:574 io_wqe_worker+0x395/0x870 fs/io-wq.c:630 ret_from_fork+0x1f/0x30 We can safely drop the lock before doing work creation, making the two contexts the same in that regard. Reported-by: syzbot+b18b8be69df33a3918e9@syzkaller.appspotmail.com Fixes: 71a85387546e ("io-wq: check for wq exit after adding new worker task_work") Signed-off-by: Jens Axboe --- fs/io-wq.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/io-wq.c b/fs/io-wq.c index 8d2bb818a3bb..5c4f582d6549 100644 --- a/fs/io-wq.c +++ b/fs/io-wq.c @@ -395,7 +395,9 @@ static void io_wqe_dec_running(struct io_worker *worker) if (atomic_dec_and_test(&acct->nr_running) && io_acct_run_queue(acct)) { atomic_inc(&acct->nr_running); atomic_inc(&wqe->wq->worker_refs); + raw_spin_unlock(&wqe->lock); io_queue_worker_create(worker, acct, create_worker_cb); + raw_spin_lock(&wqe->lock); } } From bc2f39a6252ee40d9bfc2743d4437d420aec5f6e Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 10 Nov 2021 11:13:50 +0300 Subject: [PATCH 461/868] iavf: missing unlocks in iavf_watchdog_task() This code was re-organized and there some unlocks missing now. Fixes: 898ef1cb1cb2 ("iavf: Combine init and watchdog state machines") Signed-off-by: Dan Carpenter Tested-by: Konrad Jankowski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/iavf/iavf_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index cfdbf8c08d18..884a19c51543 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -2046,6 +2046,7 @@ static void iavf_watchdog_task(struct work_struct *work) } adapter->aq_required = 0; adapter->current_op = VIRTCHNL_OP_UNKNOWN; + mutex_unlock(&adapter->crit_lock); queue_delayed_work(iavf_wq, &adapter->watchdog_task, msecs_to_jiffies(10)); @@ -2076,9 +2077,8 @@ static void iavf_watchdog_task(struct work_struct *work) iavf_detect_recover_hung(&adapter->vsi); break; case __IAVF_REMOVE: - mutex_unlock(&adapter->crit_lock); - return; default: + mutex_unlock(&adapter->crit_lock); return; } From fe523d7c9a8332855376ad5eb1aa301091129ba4 Mon Sep 17 00:00:00 2001 From: Stefan Assmann Date: Wed, 1 Dec 2021 09:14:34 +0100 Subject: [PATCH 462/868] iavf: do not override the adapter state in the watchdog task (again) The watchdog task incorrectly changes the state to __IAVF_RESETTING, instead of letting the reset task take care of that. This was already resolved by commit 22c8fd71d3a5 ("iavf: do not override the adapter state in the watchdog task") but the problem was reintroduced by the recent code refactoring in commit 45eebd62999d ("iavf: Refactor iavf state machine tracking"). Fixes: 45eebd62999d ("iavf: Refactor iavf state machine tracking") Signed-off-by: Stefan Assmann Tested-by: Konrad Jankowski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/iavf/iavf_main.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index 884a19c51543..4e7c04047f91 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -2085,7 +2085,6 @@ static void iavf_watchdog_task(struct work_struct *work) /* check for hw reset */ reg_val = rd32(hw, IAVF_VF_ARQLEN1) & IAVF_VF_ARQLEN1_ARQENABLE_MASK; if (!reg_val) { - iavf_change_state(adapter, __IAVF_RESETTING); adapter->flags |= IAVF_FLAG_RESET_PENDING; adapter->aq_required = 0; adapter->current_op = VIRTCHNL_OP_UNKNOWN; From e386dfc56f837da66d00a078e5314bc8382fab83 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 10 Dec 2021 14:00:15 -0800 Subject: [PATCH 463/868] fget: clarify and improve __fget_files() implementation Commit 054aa8d439b9 ("fget: check that the fd still exists after getting a ref to it") fixed a race with getting a reference to a file just as it was being closed. It was a fairly minimal patch, and I didn't think re-checking the file pointer lookup would be a measurable overhead, since it was all right there and cached. But I was wrong, as pointed out by the kernel test robot. The 'poll2' case of the will-it-scale.per_thread_ops benchmark regressed quite noticeably. Admittedly it seems to be a very artificial test: doing "poll()" system calls on regular files in a very tight loop in multiple threads. That means that basically all the time is spent just looking up file descriptors without ever doing anything useful with them (not that doing 'poll()' on a regular file is useful to begin with). And as a result it shows the extra "re-check fd" cost as a sore thumb. Happily, the regression is fixable by just writing the code to loook up the fd to be better and clearer. There's still a cost to verify the file pointer, but now it's basically in the noise even for that benchmark that does nothing else - and the code is more understandable and has better comments too. [ Side note: this patch is also a classic case of one that looks very messy with the default greedy Myers diff - it's much more legible with either the patience of histogram diff algorithm ] Link: https://lore.kernel.org/lkml/20211210053743.GA36420@xsang-OptiPlex-9020/ Link: https://lore.kernel.org/lkml/20211213083154.GA20853@linux.intel.com/ Reported-by: kernel test robot Tested-by: Carel Si Cc: Jann Horn Cc: Miklos Szeredi Signed-off-by: Linus Torvalds --- fs/file.c | 72 ++++++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 56 insertions(+), 16 deletions(-) diff --git a/fs/file.c b/fs/file.c index ad4a8bf3cf10..97d212a9b814 100644 --- a/fs/file.c +++ b/fs/file.c @@ -841,28 +841,68 @@ void do_close_on_exec(struct files_struct *files) spin_unlock(&files->file_lock); } +static inline struct file *__fget_files_rcu(struct files_struct *files, + unsigned int fd, fmode_t mask, unsigned int refs) +{ + for (;;) { + struct file *file; + struct fdtable *fdt = rcu_dereference_raw(files->fdt); + struct file __rcu **fdentry; + + if (unlikely(fd >= fdt->max_fds)) + return NULL; + + fdentry = fdt->fd + array_index_nospec(fd, fdt->max_fds); + file = rcu_dereference_raw(*fdentry); + if (unlikely(!file)) + return NULL; + + if (unlikely(file->f_mode & mask)) + return NULL; + + /* + * Ok, we have a file pointer. However, because we do + * this all locklessly under RCU, we may be racing with + * that file being closed. + * + * Such a race can take two forms: + * + * (a) the file ref already went down to zero, + * and get_file_rcu_many() fails. Just try + * again: + */ + if (unlikely(!get_file_rcu_many(file, refs))) + continue; + + /* + * (b) the file table entry has changed under us. + * Note that we don't need to re-check the 'fdt->fd' + * pointer having changed, because it always goes + * hand-in-hand with 'fdt'. + * + * If so, we need to put our refs and try again. + */ + if (unlikely(rcu_dereference_raw(files->fdt) != fdt) || + unlikely(rcu_dereference_raw(*fdentry) != file)) { + fput_many(file, refs); + continue; + } + + /* + * Ok, we have a ref to the file, and checked that it + * still exists. + */ + return file; + } +} + static struct file *__fget_files(struct files_struct *files, unsigned int fd, fmode_t mask, unsigned int refs) { struct file *file; rcu_read_lock(); -loop: - file = files_lookup_fd_rcu(files, fd); - if (file) { - /* File object ref couldn't be taken. - * dup2() atomicity guarantee is the reason - * we loop to catch the new file (or NULL pointer) - */ - if (file->f_mode & mask) - file = NULL; - else if (!get_file_rcu_many(file, refs)) - goto loop; - else if (files_lookup_fd_raw(files, fd) != file) { - fput_many(file, refs); - goto loop; - } - } + file = __fget_files_rcu(files, fd, mask, refs); rcu_read_unlock(); return file; From aa50faff4416c869b52dff68a937c84d29e12f4b Mon Sep 17 00:00:00 2001 From: Sergio Paracuellos Date: Wed, 1 Dec 2021 22:34:02 +0100 Subject: [PATCH 464/868] PCI: mt7621: Convert driver into 'bool' The driver is not ready yet to be compiled as a module since it depends on some symbols not exported on MIPS. We have the following current problems: Building mips:allmodconfig ... failed -------------- Error log: ERROR: modpost: missing MODULE_LICENSE() in drivers/pci/controller/pcie-mt7621.o ERROR: modpost: "mips_cm_unlock_other" [drivers/pci/controller/pcie-mt7621.ko] undefined! ERROR: modpost: "mips_cpc_base" [drivers/pci/controller/pcie-mt7621.ko] undefined! ERROR: modpost: "mips_cm_lock_other" [drivers/pci/controller/pcie-mt7621.ko] undefined! ERROR: modpost: "mips_cm_is64" [drivers/pci/controller/pcie-mt7621.ko] undefined! ERROR: modpost: "mips_gcr_base" [drivers/pci/controller/pcie-mt7621.ko] undefined! Temporarily move from 'tristate' to 'bool' until a better solution is ready. Also RALINK is redundant because SOC_MT7621 already depends on it. Hence, simplify condition. Fixes: 2bdd5238e756 ("PCI: mt7621: Add MediaTek MT7621 PCIe host controller driver"). Signed-off-by: Sergio Paracuellos Reviewed-and-Tested-by: Guenter Roeck Signed-off-by: Linus Torvalds --- drivers/pci/controller/Kconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/pci/controller/Kconfig b/drivers/pci/controller/Kconfig index 93b141110537..7fc5135ffbbf 100644 --- a/drivers/pci/controller/Kconfig +++ b/drivers/pci/controller/Kconfig @@ -332,8 +332,8 @@ config PCIE_APPLE If unsure, say Y if you have an Apple Silicon system. config PCIE_MT7621 - tristate "MediaTek MT7621 PCIe Controller" - depends on (RALINK && SOC_MT7621) || (MIPS && COMPILE_TEST) + bool "MediaTek MT7621 PCIe Controller" + depends on SOC_MT7621 || (MIPS && COMPILE_TEST) select PHY_MT7621_PCI default SOC_MT7621 help From d341b427c3c3fd6a58263ce01e01700d16861c28 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Sun, 12 Dec 2021 02:11:45 +0300 Subject: [PATCH 465/868] ASoC: tegra: Add DAPM switches for headphones and mic jack UCM of Acer Chromebook (Nyan) uses DAPM switches of headphones and mic jack. These switches were lost by accident during unification of the machine drivers, restore them. Cc: Fixes: cc8f70f ("ASoC: tegra: Unify ASoC machine drivers") Reported-by: Thomas Graichen # T124 Nyan Big Tested-by: Thomas Graichen # T124 Nyan Big Signed-off-by: Dmitry Osipenko Link: https://lore.kernel.org/r/20211211231146.6137-1-digetx@gmail.com Signed-off-by: Mark Brown --- sound/soc/tegra/tegra_asoc_machine.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/soc/tegra/tegra_asoc_machine.c b/sound/soc/tegra/tegra_asoc_machine.c index b95438c3dbf7..f3e86bd714b4 100644 --- a/sound/soc/tegra/tegra_asoc_machine.c +++ b/sound/soc/tegra/tegra_asoc_machine.c @@ -116,6 +116,8 @@ static const struct snd_kcontrol_new tegra_machine_controls[] = { SOC_DAPM_PIN_SWITCH("Headset Mic"), SOC_DAPM_PIN_SWITCH("Internal Mic 1"), SOC_DAPM_PIN_SWITCH("Internal Mic 2"), + SOC_DAPM_PIN_SWITCH("Headphones"), + SOC_DAPM_PIN_SWITCH("Mic Jack"), }; int tegra_asoc_machine_init(struct snd_soc_pcm_runtime *rtd) From db635ba4fadf3ba676d07537f3b3f58166aa7b0e Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Sun, 12 Dec 2021 02:11:46 +0300 Subject: [PATCH 466/868] ASoC: tegra: Restore headphones jack name on Nyan Big UCM of Acer Chromebook (Nyan) uses a different name for the headphones jack. The name was changed during unification of the machine drivers and UCM fails now to load because of that. Restore the old jack name. Cc: Fixes: cc8f70f ("ASoC: tegra: Unify ASoC machine drivers") Reported-by: Thomas Graichen # T124 Nyan Big Tested-by: Thomas Graichen # T124 Nyan Big Signed-off-by: Dmitry Osipenko Link: https://lore.kernel.org/r/20211211231146.6137-2-digetx@gmail.com Signed-off-by: Mark Brown --- sound/soc/tegra/tegra_asoc_machine.c | 9 ++++++++- sound/soc/tegra/tegra_asoc_machine.h | 1 + 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/sound/soc/tegra/tegra_asoc_machine.c b/sound/soc/tegra/tegra_asoc_machine.c index f3e86bd714b4..a73404879aa1 100644 --- a/sound/soc/tegra/tegra_asoc_machine.c +++ b/sound/soc/tegra/tegra_asoc_machine.c @@ -124,10 +124,16 @@ int tegra_asoc_machine_init(struct snd_soc_pcm_runtime *rtd) { struct snd_soc_card *card = rtd->card; struct tegra_machine *machine = snd_soc_card_get_drvdata(card); + const char *jack_name; int err; if (machine->gpiod_hp_det && machine->asoc->add_hp_jack) { - err = snd_soc_card_jack_new(card, "Headphones Jack", + if (machine->asoc->hp_jack_name) + jack_name = machine->asoc->hp_jack_name; + else + jack_name = "Headphones Jack"; + + err = snd_soc_card_jack_new(card, jack_name, SND_JACK_HEADPHONE, &tegra_machine_hp_jack, tegra_machine_hp_jack_pins, @@ -660,6 +666,7 @@ static struct snd_soc_card snd_soc_tegra_max98090 = { static const struct tegra_asoc_data tegra_max98090_data = { .mclk_rate = tegra_machine_mclk_rate_12mhz, .card = &snd_soc_tegra_max98090, + .hp_jack_name = "Headphones", .add_common_dapm_widgets = true, .add_common_controls = true, .add_common_snd_ops = true, diff --git a/sound/soc/tegra/tegra_asoc_machine.h b/sound/soc/tegra/tegra_asoc_machine.h index d6a8d1320551..6f795d7dff7c 100644 --- a/sound/soc/tegra/tegra_asoc_machine.h +++ b/sound/soc/tegra/tegra_asoc_machine.h @@ -14,6 +14,7 @@ struct snd_soc_pcm_runtime; struct tegra_asoc_data { unsigned int (*mclk_rate)(unsigned int srate); const char *codec_dev_name; + const char *hp_jack_name; struct snd_soc_card *card; unsigned int mclk_id; bool hp_jack_gpio_active_low; From b0cdc5dbcf2ba0d99785da5aabf1b17943805b8a Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Sat, 11 Dec 2021 17:11:12 +0100 Subject: [PATCH 467/868] mptcp: never allow the PM to close a listener subflow Currently, when deleting an endpoint the netlink PM treverses all the local MPTCP sockets, regardless of their status. If an MPTCP listener socket is bound to the IP matching the delete endpoint, the listener TCP socket will be closed. That is unexpected, the PM should only affect data subflows. Additionally, syzbot was able to trigger a NULL ptr dereference due to the above: general protection fault, probably for non-canonical address 0xdffffc0000000003: 0000 [#1] PREEMPT SMP KASAN KASAN: null-ptr-deref in range [0x0000000000000018-0x000000000000001f] CPU: 1 PID: 6550 Comm: syz-executor122 Not tainted 5.16.0-rc4-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:__lock_acquire+0xd7d/0x54a0 kernel/locking/lockdep.c:4897 Code: 0f 0e 41 be 01 00 00 00 0f 86 c8 00 00 00 89 05 69 cc 0f 0e e9 bd 00 00 00 48 b8 00 00 00 00 00 fc ff df 48 89 da 48 c1 ea 03 <80> 3c 02 00 0f 85 f3 2f 00 00 48 81 3b 20 75 17 8f 0f 84 52 f3 ff RSP: 0018:ffffc90001f2f818 EFLAGS: 00010016 RAX: dffffc0000000000 RBX: 0000000000000018 RCX: 0000000000000000 RDX: 0000000000000003 RSI: 0000000000000000 RDI: 0000000000000001 RBP: 0000000000000000 R08: 0000000000000001 R09: 0000000000000001 R10: 0000000000000000 R11: 000000000000000a R12: 0000000000000000 R13: ffff88801b98d700 R14: 0000000000000000 R15: 0000000000000001 FS: 00007f177cd3d700(0000) GS:ffff8880b9d00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f177cd1b268 CR3: 000000001dd55000 CR4: 0000000000350ee0 Call Trace: lock_acquire kernel/locking/lockdep.c:5637 [inline] lock_acquire+0x1ab/0x510 kernel/locking/lockdep.c:5602 __raw_spin_lock_irqsave include/linux/spinlock_api_smp.h:110 [inline] _raw_spin_lock_irqsave+0x39/0x50 kernel/locking/spinlock.c:162 finish_wait+0xc0/0x270 kernel/sched/wait.c:400 inet_csk_wait_for_connect net/ipv4/inet_connection_sock.c:464 [inline] inet_csk_accept+0x7de/0x9d0 net/ipv4/inet_connection_sock.c:497 mptcp_accept+0xe5/0x500 net/mptcp/protocol.c:2865 inet_accept+0xe4/0x7b0 net/ipv4/af_inet.c:739 mptcp_stream_accept+0x2e7/0x10e0 net/mptcp/protocol.c:3345 do_accept+0x382/0x510 net/socket.c:1773 __sys_accept4_file+0x7e/0xe0 net/socket.c:1816 __sys_accept4+0xb0/0x100 net/socket.c:1846 __do_sys_accept net/socket.c:1864 [inline] __se_sys_accept net/socket.c:1861 [inline] __x64_sys_accept+0x71/0xb0 net/socket.c:1861 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x44/0xae RIP: 0033:0x7f177cd8b8e9 Code: 28 00 00 00 75 05 48 83 c4 28 c3 e8 b1 14 00 00 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b8 ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007f177cd3d308 EFLAGS: 00000246 ORIG_RAX: 000000000000002b RAX: ffffffffffffffda RBX: 00007f177ce13408 RCX: 00007f177cd8b8e9 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000003 RBP: 00007f177ce13400 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 00007f177ce1340c R13: 00007f177cde1004 R14: 6d705f706374706d R15: 0000000000022000 Fix the issue explicitly skipping MPTCP socket in TCP_LISTEN status. Reported-and-tested-by: syzbot+e4d843bb96a9431e6331@syzkaller.appspotmail.com Reviewed-by: Mat Martineau Fixes: 740d798e8767 ("mptcp: remove id 0 address") Signed-off-by: Paolo Abeni Link: https://lore.kernel.org/r/ebc7594cdd420d241fb2172ddb8542ba64717657.1639238695.git.pabeni@redhat.com Signed-off-by: Jakub Kicinski --- net/mptcp/pm_netlink.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 7b96be1e9f14..f523051f5aef 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -700,6 +700,9 @@ static void mptcp_pm_nl_rm_addr_or_subflow(struct mptcp_sock *msk, msk_owned_by_me(msk); + if (sk->sk_state == TCP_LISTEN) + return; + if (!rm_list->nr) return; From 2fe24343922e0428fb68674a4fae099171141bc7 Mon Sep 17 00:00:00 2001 From: John Garry Date: Mon, 13 Dec 2021 18:10:48 +0800 Subject: [PATCH 468/868] scsi: pm8001: Fix phys_to_virt() usage on dma_addr_t The driver supports a "direct" mode of operation, where the SMP req frame is directly copied into the command payload (and vice-versa for the SMP resp). To get at the SMP req frame data in the scatterlist the driver uses phys_to_virt() on the DMA mapped memory dma_addr_t . This is broken, and subsequently crashes as follows when an IOMMU is enabled: Unable to handle kernel paging request at virtual address ffff0000fcebfb00 ... pc : pm80xx_chip_smp_req+0x2d0/0x3d0 lr : pm80xx_chip_smp_req+0xac/0x3d0 pm80xx_chip_smp_req+0x2d0/0x3d0 pm8001_task_exec.constprop.0+0x368/0x520 pm8001_queue_command+0x1c/0x30 smp_execute_task_sg+0xdc/0x204 sas_discover_expander.part.0+0xac/0x6cc sas_discover_root_expander+0x8c/0x150 sas_discover_domain+0x3ac/0x6a0 process_one_work+0x1d0/0x354 worker_thread+0x13c/0x470 kthread+0x17c/0x190 ret_from_fork+0x10/0x20 Code: 371806e1 910006d6 6b16033f 54000249 (38766b05) ---[ end trace b91d59aaee98ea2d ]--- note: kworker/u192:0[7] exited with preempt_count 1 Instead use kmap_atomic(). -- Difference to v1: - use kmap_atomic() in both locations Difference to v2: - add whitespace around arithmetic (Damien) Link: https://lore.kernel.org/r/1639390248-213603-1-git-send-email-john.garry@huawei.com Reviewed-by: Damien Le Moal Signed-off-by: John Garry Signed-off-by: Martin K. Petersen --- drivers/scsi/pm8001/pm80xx_hwi.c | 38 ++++++++++++++++++-------------- 1 file changed, 21 insertions(+), 17 deletions(-) diff --git a/drivers/scsi/pm8001/pm80xx_hwi.c b/drivers/scsi/pm8001/pm80xx_hwi.c index b9f6d83ff380..2101fc5761c3 100644 --- a/drivers/scsi/pm8001/pm80xx_hwi.c +++ b/drivers/scsi/pm8001/pm80xx_hwi.c @@ -3053,7 +3053,6 @@ mpi_smp_completion(struct pm8001_hba_info *pm8001_ha, void *piomb) struct smp_completion_resp *psmpPayload; struct task_status_struct *ts; struct pm8001_device *pm8001_dev; - char *pdma_respaddr = NULL; psmpPayload = (struct smp_completion_resp *)(piomb + 4); status = le32_to_cpu(psmpPayload->status); @@ -3080,19 +3079,23 @@ mpi_smp_completion(struct pm8001_hba_info *pm8001_ha, void *piomb) if (pm8001_dev) atomic_dec(&pm8001_dev->running_req); if (pm8001_ha->smp_exp_mode == SMP_DIRECT) { + struct scatterlist *sg_resp = &t->smp_task.smp_resp; + u8 *payload; + void *to; + pm8001_dbg(pm8001_ha, IO, "DIRECT RESPONSE Length:%d\n", param); - pdma_respaddr = (char *)(phys_to_virt(cpu_to_le64 - ((u64)sg_dma_address - (&t->smp_task.smp_resp)))); + to = kmap_atomic(sg_page(sg_resp)); + payload = to + sg_resp->offset; for (i = 0; i < param; i++) { - *(pdma_respaddr+i) = psmpPayload->_r_a[i]; + *(payload + i) = psmpPayload->_r_a[i]; pm8001_dbg(pm8001_ha, IO, "SMP Byte%d DMA data 0x%x psmp 0x%x\n", - i, *(pdma_respaddr + i), + i, *(payload + i), psmpPayload->_r_a[i]); } + kunmap_atomic(to); } break; case IO_ABORTED: @@ -4236,14 +4239,14 @@ static int pm80xx_chip_smp_req(struct pm8001_hba_info *pm8001_ha, struct sas_task *task = ccb->task; struct domain_device *dev = task->dev; struct pm8001_device *pm8001_dev = dev->lldd_dev; - struct scatterlist *sg_req, *sg_resp; + struct scatterlist *sg_req, *sg_resp, *smp_req; u32 req_len, resp_len; struct smp_req smp_cmd; u32 opc; struct inbound_queue_table *circularQ; - char *preq_dma_addr = NULL; - __le64 tmp_addr; u32 i, length; + u8 *payload; + u8 *to; memset(&smp_cmd, 0, sizeof(smp_cmd)); /* @@ -4280,8 +4283,9 @@ static int pm80xx_chip_smp_req(struct pm8001_hba_info *pm8001_ha, pm8001_ha->smp_exp_mode = SMP_INDIRECT; - tmp_addr = cpu_to_le64((u64)sg_dma_address(&task->smp_task.smp_req)); - preq_dma_addr = (char *)phys_to_virt(tmp_addr); + smp_req = &task->smp_task.smp_req; + to = kmap_atomic(sg_page(smp_req)); + payload = to + smp_req->offset; /* INDIRECT MODE command settings. Use DMA */ if (pm8001_ha->smp_exp_mode == SMP_INDIRECT) { @@ -4289,7 +4293,7 @@ static int pm80xx_chip_smp_req(struct pm8001_hba_info *pm8001_ha, /* for SPCv indirect mode. Place the top 4 bytes of * SMP Request header here. */ for (i = 0; i < 4; i++) - smp_cmd.smp_req16[i] = *(preq_dma_addr + i); + smp_cmd.smp_req16[i] = *(payload + i); /* exclude top 4 bytes for SMP req header */ smp_cmd.long_smp_req.long_req_addr = cpu_to_le64((u64)sg_dma_address @@ -4320,20 +4324,20 @@ static int pm80xx_chip_smp_req(struct pm8001_hba_info *pm8001_ha, pm8001_dbg(pm8001_ha, IO, "SMP REQUEST DIRECT MODE\n"); for (i = 0; i < length; i++) if (i < 16) { - smp_cmd.smp_req16[i] = *(preq_dma_addr+i); + smp_cmd.smp_req16[i] = *(payload + i); pm8001_dbg(pm8001_ha, IO, "Byte[%d]:%x (DMA data:%x)\n", i, smp_cmd.smp_req16[i], - *(preq_dma_addr)); + *(payload)); } else { - smp_cmd.smp_req[i] = *(preq_dma_addr+i); + smp_cmd.smp_req[i] = *(payload + i); pm8001_dbg(pm8001_ha, IO, "Byte[%d]:%x (DMA data:%x)\n", i, smp_cmd.smp_req[i], - *(preq_dma_addr)); + *(payload)); } } - + kunmap_atomic(to); build_smp_cmd(pm8001_dev->device_id, smp_cmd.tag, &smp_cmd, pm8001_ha->smp_exp_mode, length); rc = pm8001_mpi_build_cmd(pm8001_ha, circularQ, opc, &smp_cmd, From fea3fdf975dd9f3e5248afaab8fe023db313f005 Mon Sep 17 00:00:00 2001 From: Jiasheng Jiang Date: Tue, 14 Dec 2021 09:41:26 +0800 Subject: [PATCH 469/868] drm/ast: potential dereference of null pointer The return value of kzalloc() needs to be checked. To avoid use of null pointer '&ast_state->base' in case of the failure of alloc. Fixes: f0adbc382b8b ("drm/ast: Allocate initial CRTC state of the correct size") Signed-off-by: Jiasheng Jiang Signed-off-by: Thomas Zimmermann Link: https://patchwork.freedesktop.org/patch/msgid/20211214014126.2211535-1-jiasheng@iscas.ac.cn --- drivers/gpu/drm/ast/ast_mode.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/ast/ast_mode.c b/drivers/gpu/drm/ast/ast_mode.c index 1e30eaeb0e1b..d5c98f79d58d 100644 --- a/drivers/gpu/drm/ast/ast_mode.c +++ b/drivers/gpu/drm/ast/ast_mode.c @@ -1121,7 +1121,10 @@ static void ast_crtc_reset(struct drm_crtc *crtc) if (crtc->state) crtc->funcs->atomic_destroy_state(crtc, crtc->state); - __drm_atomic_helper_crtc_reset(crtc, &ast_state->base); + if (ast_state) + __drm_atomic_helper_crtc_reset(crtc, &ast_state->base); + else + __drm_atomic_helper_crtc_reset(crtc, NULL); } static struct drm_crtc_state * From 83b67041f3eaf33f98a075249aa7f4c7617c2f85 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 26 Nov 2021 10:43:48 +0100 Subject: [PATCH 470/868] USB: serial: cp210x: fix CP2105 GPIO registration When generalising GPIO support and adding support for CP2102N, the GPIO registration for some CP2105 devices accidentally broke. Specifically, when all the pins of a port are in "modem" mode, and thus unavailable for GPIO use, the GPIO chip would now be registered without having initialised the number of GPIO lines. This would in turn be rejected by gpiolib and some errors messages would be printed (but importantly probe would still succeed). Fix this by initialising the number of GPIO lines before registering the GPIO chip. Note that as for the other device types, and as when all CP2105 pins are muxed for LED function, the GPIO chip is registered also when no pins are available for GPIO use. Reported-by: Maarten Brock Link: https://lore.kernel.org/r/5eb560c81d2ea1a2b4602a92d9f48a89@vanmierlo.com Fixes: c8acfe0aadbe ("USB: serial: cp210x: implement GPIO support for CP2102N") Cc: stable@vger.kernel.org # 4.19 Cc: Karoly Pados Link: https://lore.kernel.org/r/20211126094348.31698-1-johan@kernel.org Reviewed-by: Greg Kroah-Hartman Tested-by: Maarten Brock Signed-off-by: Johan Hovold --- drivers/usb/serial/cp210x.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c index 7705328034ca..8a60c0d56863 100644 --- a/drivers/usb/serial/cp210x.c +++ b/drivers/usb/serial/cp210x.c @@ -1635,6 +1635,8 @@ static int cp2105_gpioconf_init(struct usb_serial *serial) /* 2 banks of GPIO - One for the pins taken from each serial port */ if (intf_num == 0) { + priv->gc.ngpio = 2; + if (mode.eci == CP210X_PIN_MODE_MODEM) { /* mark all GPIOs of this interface as reserved */ priv->gpio_altfunc = 0xff; @@ -1645,8 +1647,9 @@ static int cp2105_gpioconf_init(struct usb_serial *serial) priv->gpio_pushpull = (u8)((le16_to_cpu(config.gpio_mode) & CP210X_ECI_GPIO_MODE_MASK) >> CP210X_ECI_GPIO_MODE_OFFSET); - priv->gc.ngpio = 2; } else if (intf_num == 1) { + priv->gc.ngpio = 3; + if (mode.sci == CP210X_PIN_MODE_MODEM) { /* mark all GPIOs of this interface as reserved */ priv->gpio_altfunc = 0xff; @@ -1657,7 +1660,6 @@ static int cp2105_gpioconf_init(struct usb_serial *serial) priv->gpio_pushpull = (u8)((le16_to_cpu(config.gpio_mode) & CP210X_SCI_GPIO_MODE_MASK) >> CP210X_SCI_GPIO_MODE_OFFSET); - priv->gc.ngpio = 3; } else { return -ENODEV; } From 2b503c8598d1b232e7fc7526bce9326d92331541 Mon Sep 17 00:00:00 2001 From: Daniele Palmas Date: Fri, 10 Dec 2021 11:07:14 +0100 Subject: [PATCH 471/868] USB: serial: option: add Telit FN990 compositions Add the following Telit FN990 compositions: 0x1070: tty, adb, rmnet, tty, tty, tty, tty 0x1071: tty, adb, mbim, tty, tty, tty, tty 0x1072: rndis, tty, adb, tty, tty, tty, tty 0x1073: tty, adb, ecm, tty, tty, tty, tty Signed-off-by: Daniele Palmas Link: https://lore.kernel.org/r/20211210100714.22587-1-dnlplm@gmail.com Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold --- drivers/usb/serial/option.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 546fce4617a8..42420bfc983c 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -1219,6 +1219,14 @@ static const struct usb_device_id option_ids[] = { .driver_info = NCTRL(2) | RSVD(3) }, { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1063, 0xff), /* Telit LN920 (ECM) */ .driver_info = NCTRL(0) | RSVD(1) }, + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1070, 0xff), /* Telit FN990 (rmnet) */ + .driver_info = NCTRL(0) | RSVD(1) | RSVD(2) }, + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1071, 0xff), /* Telit FN990 (MBIM) */ + .driver_info = NCTRL(0) | RSVD(1) }, + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1072, 0xff), /* Telit FN990 (RNDIS) */ + .driver_info = NCTRL(2) | RSVD(3) }, + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1073, 0xff), /* Telit FN990 (ECM) */ + .driver_info = NCTRL(0) | RSVD(1) }, { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_ME910), .driver_info = NCTRL(0) | RSVD(1) | RSVD(3) }, { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_ME910_DUAL_MODEM), From 53b3495273282aa844c4613d19c3b30558c70c84 Mon Sep 17 00:00:00 2001 From: Harshit Mogalapalli Date: Thu, 9 Dec 2021 20:41:24 -0800 Subject: [PATCH 472/868] drm/i915/display: Fix an unsigned subtraction which can never be negative. smatch warning: drivers/gpu/drm/i915/display/intel_dmc.c:601 parse_dmc_fw() warn: unsigned 'fw->size - offset' is never less than zero Firmware size is size_t and offset is u32. So the subtraction is unsigned which can never be less than zero. Fixes: 3d5928a168a9 ("drm/i915/xelpd: Pipe A DMC plugging") Signed-off-by: Harshit Mogalapalli Reviewed-by: Lucas De Marchi Signed-off-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20211210044129.12422-1-harshit.m.mogalapalli@oracle.com (cherry picked from commit 87bb2a410dcfb617b88e4695edf4beb6336dc314) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/display/intel_dmc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_dmc.c b/drivers/gpu/drm/i915/display/intel_dmc.c index 2dc9d632969d..aef69522f0be 100644 --- a/drivers/gpu/drm/i915/display/intel_dmc.c +++ b/drivers/gpu/drm/i915/display/intel_dmc.c @@ -596,7 +596,7 @@ static void parse_dmc_fw(struct drm_i915_private *dev_priv, continue; offset = readcount + dmc->dmc_info[id].dmc_offset * 4; - if (fw->size - offset < 0) { + if (offset > fw->size) { drm_err(&dev_priv->drm, "Reading beyond the fw_size\n"); continue; } From d296a74b7b59ff9116236c17edb25f26935dbf70 Mon Sep 17 00:00:00 2001 From: Bradley Scott Date: Mon, 13 Dec 2021 10:49:39 -0500 Subject: [PATCH 473/868] ALSA: hda/realtek: Amp init fixup for HP ZBook 15 G6 HP ZBook 15 G6 (SSID 103c:860f) needs the same speaker amplifier initialization as used on several other HP laptops using ALC285. Signed-off-by: Bradley Scott Cc: Link: https://lore.kernel.org/r/20211213154938.503201-1-Bradley.Scott@zebra.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 3599f4c85ebf..d162662fe684 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -8660,6 +8660,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x84da, "HP OMEN dc0019-ur", ALC295_FIXUP_HP_OMEN), SND_PCI_QUIRK(0x103c, 0x84e7, "HP Pavilion 15", ALC269_FIXUP_HP_MUTE_LED_MIC3), SND_PCI_QUIRK(0x103c, 0x8519, "HP Spectre x360 15-df0xxx", ALC285_FIXUP_HP_SPECTRE_X360), + SND_PCI_QUIRK(0x103c, 0x860f, "HP ZBook 15 G6", ALC285_FIXUP_HP_GPIO_AMP_INIT), SND_PCI_QUIRK(0x103c, 0x861f, "HP Elite Dragonfly G1", ALC285_FIXUP_HP_GPIO_AMP_INIT), SND_PCI_QUIRK(0x103c, 0x869d, "HP", ALC236_FIXUP_HP_MUTE_LED), SND_PCI_QUIRK(0x103c, 0x86c7, "HP Envy AiO 32", ALC274_FIXUP_HP_ENVY_GPIO), From aa72394667e5cea3547e4c41ddff7ca8c632d764 Mon Sep 17 00:00:00 2001 From: Bradley Scott Date: Mon, 13 Dec 2021 11:22:47 -0500 Subject: [PATCH 474/868] ALSA: hda/realtek: Add new alc285-hp-amp-init model Adds a new "alc285-hp-amp-init" model that can be used to apply the ALC285 HP speaker amplifier initialization fixup to devices that are not already known by passing "hda_model=alc285-hp-amp-init" to the snd-sof-intel-hda-common module or "model=alc285-hp-amp-init" to the snd-hda-intel module, depending on which is being used. Signed-off-by: Bradley Scott Cc: Link: https://lore.kernel.org/r/20211213162246.506838-1-bscott@teksavvy.com Signed-off-by: Takashi Iwai --- Documentation/sound/hd-audio/models.rst | 2 ++ sound/pci/hda/patch_realtek.c | 1 + 2 files changed, 3 insertions(+) diff --git a/Documentation/sound/hd-audio/models.rst b/Documentation/sound/hd-audio/models.rst index 0ea967d34583..d25335993e55 100644 --- a/Documentation/sound/hd-audio/models.rst +++ b/Documentation/sound/hd-audio/models.rst @@ -326,6 +326,8 @@ usi-headset Headset support on USI machines dual-codecs Lenovo laptops with dual codecs +alc285-hp-amp-init + HP laptops which require speaker amplifier initialization (ALC285) ALC680 ====== diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index d162662fe684..fc41f3e8ddc3 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -9124,6 +9124,7 @@ static const struct hda_model_fixup alc269_fixup_models[] = { {.id = ALC287_FIXUP_IDEAPAD_BASS_SPK_AMP, .name = "alc287-ideapad-bass-spk-amp"}, {.id = ALC623_FIXUP_LENOVO_THINKSTATION_P340, .name = "alc623-lenovo-thinkstation-p340"}, {.id = ALC255_FIXUP_ACER_HEADPHONE_AND_MIC, .name = "alc255-acer-headphone-and-mic"}, + {.id = ALC285_FIXUP_HP_GPIO_AMP_INIT, .name = "alc285-hp-amp-init"}, {} }; #define ALC225_STANDARD_PINS \ From 1fe98f5690c4219d419ea9cc190f94b3401cf324 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Thu, 2 Dec 2021 13:45:33 +0100 Subject: [PATCH 475/868] mac80211: send ADDBA requests using the tid/queue of the aggregation session Sending them out on a different queue can cause a race condition where a number of packets in the queue may be discarded by the receiver, because the ADDBA request is sent too early. This affects any driver with software A-MPDU setup which does not allocate packet seqno in hardware on tx, regardless of whether iTXQ is used or not. The only driver I've seen that explicitly deals with this issue internally is mwl8k. Cc: stable@vger.kernel.org Signed-off-by: Felix Fietkau Link: https://lore.kernel.org/r/20211202124533.80388-1-nbd@nbd.name Signed-off-by: Johannes Berg --- net/mac80211/agg-tx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c index c1558dd2d244..58761ca7da3c 100644 --- a/net/mac80211/agg-tx.c +++ b/net/mac80211/agg-tx.c @@ -106,7 +106,7 @@ static void ieee80211_send_addba_request(struct ieee80211_sub_if_data *sdata, mgmt->u.action.u.addba_req.start_seq_num = cpu_to_le16(start_seq_num << 4); - ieee80211_tx_skb(sdata, skb); + ieee80211_tx_skb_tid(sdata, skb, tid); } void ieee80211_send_bar(struct ieee80211_vif *vif, u8 *ra, u16 tid, u16 ssn) From 37d33114240ede043c42463a6347f68ed72d6904 Mon Sep 17 00:00:00 2001 From: Finn Behrens Date: Wed, 1 Dec 2021 13:49:18 +0100 Subject: [PATCH 476/868] nl80211: remove reload flag from regulatory_request This removes the previously unused reload flag, which was introduced in 1eda919126b4. The request is handled as NL80211_REGDOM_SET_BY_CORE, which is parsed unconditionally. Reported-by: kernel test robot Reported-by: Nathan Chancellor Fixes: 1eda919126b4 ("nl80211: reset regdom when reloading regdb") Link: https://lore.kernel.org/all/YaZuKYM5bfWe2Urn@archlinux-ax161/ Signed-off-by: Finn Behrens Reviewed-by: Nathan Chancellor Link: https://lore.kernel.org/r/YadvTolO8rQcNCd/@gimli.kloenk.dev Signed-off-by: Johannes Berg --- include/net/regulatory.h | 1 - net/wireless/reg.c | 6 ++---- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/include/net/regulatory.h b/include/net/regulatory.h index 0cf9335431e0..47f06f6f5a67 100644 --- a/include/net/regulatory.h +++ b/include/net/regulatory.h @@ -83,7 +83,6 @@ struct regulatory_request { enum nl80211_dfs_regions dfs_region; bool intersect; bool processed; - bool reload; enum environment_cap country_ie_env; struct list_head list; }; diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 61f1bf1bc4a7..8148a3b5f607 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -1134,9 +1134,8 @@ int reg_reload_regdb(void) request->wiphy_idx = WIPHY_IDX_INVALID; request->alpha2[0] = current_regdomain->alpha2[0]; request->alpha2[1] = current_regdomain->alpha2[1]; - request->initiator = NL80211_USER_REG_HINT_USER; + request->initiator = NL80211_REGDOM_SET_BY_CORE; request->user_reg_hint_type = NL80211_USER_REG_HINT_USER; - request->reload = true; reg_process_hint(request); @@ -2712,8 +2711,7 @@ reg_process_hint_user(struct regulatory_request *user_request) treatment = __reg_process_hint_user(user_request); if (treatment == REG_REQ_IGNORE || - (treatment == REG_REQ_ALREADY_SET && - !user_request->reload)) + treatment == REG_REQ_ALREADY_SET) return REG_REQ_IGNORE; user_request->intersect = treatment == REG_REQ_INTERSECT; From 06c41bda0ea14aa7fba932a9613c4ee239682cf0 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 2 Dec 2021 15:26:25 +0200 Subject: [PATCH 477/868] mac80211: agg-tx: don't schedule_and_wake_txq() under sta->lock When we call ieee80211_agg_start_txq(), that will in turn call schedule_and_wake_txq(). Called from ieee80211_stop_tx_ba_cb() this is done under sta->lock, which leads to certain circular lock dependencies, as reported by Chris Murphy: https://lore.kernel.org/r/CAJCQCtSXJ5qA4bqSPY=oLRMbv-irihVvP7A2uGutEbXQVkoNaw@mail.gmail.com In general, ieee80211_agg_start_txq() is usually not called with sta->lock held, only in this one place. But it's always called with sta->ampdu_mlme.mtx held, and that's therefore clearly sufficient. Change ieee80211_stop_tx_ba_cb() to also call it without the sta->lock held, by factoring it out of ieee80211_remove_tid_tx() (which is only called in this one place). This breaks the locking chain and makes it less likely that we'll have similar locking chain problems in the future. Fixes: ba8c3d6f16a1 ("mac80211: add an intermediate software queue implementation") Reported-by: Chris Murphy Signed-off-by: Johannes Berg Signed-off-by: Luca Coelho Link: https://lore.kernel.org/r/iwlwifi.20211202152554.f519884c8784.I555fef8e67d93fff3d9a304886c4a9f8b322e591@changeid Signed-off-by: Johannes Berg --- net/mac80211/agg-tx.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c index 58761ca7da3c..74a878f213d3 100644 --- a/net/mac80211/agg-tx.c +++ b/net/mac80211/agg-tx.c @@ -9,7 +9,7 @@ * Copyright 2007, Michael Wu * Copyright 2007-2010, Intel Corporation * Copyright(c) 2015-2017 Intel Deutschland GmbH - * Copyright (C) 2018 - 2020 Intel Corporation + * Copyright (C) 2018 - 2021 Intel Corporation */ #include @@ -213,6 +213,8 @@ ieee80211_agg_start_txq(struct sta_info *sta, int tid, bool enable) struct ieee80211_txq *txq = sta->sta.txq[tid]; struct txq_info *txqi; + lockdep_assert_held(&sta->ampdu_mlme.mtx); + if (!txq) return; @@ -290,7 +292,6 @@ static void ieee80211_remove_tid_tx(struct sta_info *sta, int tid) ieee80211_assign_tid_tx(sta, tid, NULL); ieee80211_agg_splice_finish(sta->sdata, tid); - ieee80211_agg_start_txq(sta, tid, false); kfree_rcu(tid_tx, rcu_head); } @@ -889,6 +890,7 @@ void ieee80211_stop_tx_ba_cb(struct sta_info *sta, int tid, { struct ieee80211_sub_if_data *sdata = sta->sdata; bool send_delba = false; + bool start_txq = false; ht_dbg(sdata, "Stopping Tx BA session for %pM tid %d\n", sta->sta.addr, tid); @@ -906,10 +908,14 @@ void ieee80211_stop_tx_ba_cb(struct sta_info *sta, int tid, send_delba = true; ieee80211_remove_tid_tx(sta, tid); + start_txq = true; unlock_sta: spin_unlock_bh(&sta->lock); + if (start_txq) + ieee80211_agg_start_txq(sta, tid, false); + if (send_delba) ieee80211_send_delba(sdata, sta->sta.addr, tid, WLAN_BACK_INITIATOR, WLAN_REASON_QSTA_NOT_USE); From e08ebd6d7b90ae81f21425ca39136f5b2272580f Mon Sep 17 00:00:00 2001 From: Ilan Peer Date: Thu, 2 Dec 2021 15:28:54 +0200 Subject: [PATCH 478/868] cfg80211: Acquire wiphy mutex on regulatory work The function cfg80211_reg_can_beacon_relax() expects wiphy mutex to be held when it is being called. However, when reg_leave_invalid_chans() is called the mutex is not held. Fix it by acquiring the lock before calling the function. Fixes: a05829a7222e ("cfg80211: avoid holding the RTNL when calling the driver") Signed-off-by: Ilan Peer Signed-off-by: Luca Coelho Link: https://lore.kernel.org/r/iwlwifi.20211202152831.527686cda037.I40ad9372a47cbad53b4aae7b5a6ccc0dc3fddf8b@changeid Signed-off-by: Johannes Berg --- net/wireless/reg.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 8148a3b5f607..f8f01a3e020b 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -2359,6 +2359,7 @@ static bool reg_wdev_chan_valid(struct wiphy *wiphy, struct wireless_dev *wdev) struct cfg80211_chan_def chandef = {}; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); enum nl80211_iftype iftype; + bool ret; wdev_lock(wdev); iftype = wdev->iftype; @@ -2408,7 +2409,11 @@ static bool reg_wdev_chan_valid(struct wiphy *wiphy, struct wireless_dev *wdev) case NL80211_IFTYPE_AP: case NL80211_IFTYPE_P2P_GO: case NL80211_IFTYPE_ADHOC: - return cfg80211_reg_can_beacon_relax(wiphy, &chandef, iftype); + wiphy_lock(wiphy); + ret = cfg80211_reg_can_beacon_relax(wiphy, &chandef, iftype); + wiphy_unlock(wiphy); + + return ret; case NL80211_IFTYPE_STATION: case NL80211_IFTYPE_P2P_CLIENT: return cfg80211_chandef_usable(wiphy, &chandef, From 768c0b19b50665e337c96858aa2b7928d6dcf756 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sat, 11 Dec 2021 20:10:24 +0100 Subject: [PATCH 479/868] mac80211: validate extended element ID is present Before attempting to parse an extended element, verify that the extended element ID is present. Fixes: 41cbb0f5a295 ("mac80211: add support for HE") Reported-by: syzbot+59bdff68edce82e393b6@syzkaller.appspotmail.com Link: https://lore.kernel.org/r/20211211201023.f30a1b128c07.I5cacc176da94ba316877c6e10fe3ceec8b4dbd7d@changeid Cc: stable@vger.kernel.org Signed-off-by: Johannes Berg --- net/mac80211/util.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 43df2f0c5db9..6c2934854d3c 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -943,7 +943,12 @@ static void ieee80211_parse_extension_element(u32 *crc, struct ieee802_11_elems *elems) { const void *data = elem->data + 1; - u8 len = elem->datalen - 1; + u8 len; + + if (!elem->datalen) + return; + + len = elem->datalen - 1; switch (elem->data[0]) { case WLAN_EID_EXT_HE_MU_EDCA: From 511ab0c1dfb260a6b17b8771109e8d63474473a7 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 29 Nov 2021 15:32:46 +0200 Subject: [PATCH 480/868] mac80211: fix lookup when adding AddBA extension element We should be doing the HE capabilities lookup based on the full interface type so if P2P doesn't have HE but client has it doesn't get confused. Fix that. Fixes: 2ab45876756f ("mac80211: add support for the ADDBA extension element") Signed-off-by: Johannes Berg Signed-off-by: Luca Coelho Link: https://lore.kernel.org/r/iwlwifi.20211129152938.010fc1d61137.If3a468145f29d670cb00a693bed559d8290ba693@changeid Signed-off-by: Johannes Berg --- net/mac80211/agg-rx.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c index 470ff0ce3dc7..7d2925bb966e 100644 --- a/net/mac80211/agg-rx.c +++ b/net/mac80211/agg-rx.c @@ -9,7 +9,7 @@ * Copyright 2007, Michael Wu * Copyright 2007-2010, Intel Corporation * Copyright(c) 2015-2017 Intel Deutschland GmbH - * Copyright (C) 2018-2020 Intel Corporation + * Copyright (C) 2018-2021 Intel Corporation */ /** @@ -191,7 +191,8 @@ static void ieee80211_add_addbaext(struct ieee80211_sub_if_data *sdata, sband = ieee80211_get_sband(sdata); if (!sband) return; - he_cap = ieee80211_get_he_iftype_cap(sband, sdata->vif.type); + he_cap = ieee80211_get_he_iftype_cap(sband, + ieee80211_vif_type_p2p(&sdata->vif)); if (!he_cap) return; From f22d981386d12d1513bd2720fb4387b469124d4b Mon Sep 17 00:00:00 2001 From: Ilan Peer Date: Mon, 29 Nov 2021 15:32:45 +0200 Subject: [PATCH 481/868] mac80211: Fix the size used for building probe request Instead of using the hard-coded value of '100' use the correct scan IEs length as calculated during HW registration to mac80211. Signed-off-by: Ilan Peer Signed-off-by: Luca Coelho Link: https://lore.kernel.org/r/iwlwifi.20211129152938.0a82d6891719.I8ded1f2e0bccb9e71222c945666bcd86537f2e35@changeid Signed-off-by: Johannes Berg --- net/mac80211/util.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 6c2934854d3c..fe2903b84ceb 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -2068,7 +2068,7 @@ struct sk_buff *ieee80211_build_probe_req(struct ieee80211_sub_if_data *sdata, chandef.chan = chan; skb = ieee80211_probereq_get(&local->hw, src, ssid, ssid_len, - 100 + ie_len); + local->scan_ies_len + ie_len); if (!skb) return NULL; From 4dde3c3627b52ca515a34f6f4de3898224aa1dd3 Mon Sep 17 00:00:00 2001 From: Mordechay Goodstein Date: Mon, 29 Nov 2021 15:32:42 +0200 Subject: [PATCH 482/868] mac80211: update channel context before station state Currently channel context is updated only after station got an update about new assoc state, this results in station using the old channel context. Fix this by moving the update channel context before updating station, enabling the driver to immediately use the updated channel context in the new assoc state. Signed-off-by: Mordechay Goodstein Signed-off-by: Luca Coelho Link: https://lore.kernel.org/r/iwlwifi.20211129152938.1c80c17ffd8a.I94ae31378b363c1182cfdca46c4b7e7165cff984@changeid Signed-off-by: Johannes Berg --- net/mac80211/sta_info.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 840ad1a860fa..537535a88990 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -667,6 +667,15 @@ static int sta_info_insert_finish(struct sta_info *sta) __acquires(RCU) list_add_tail_rcu(&sta->list, &local->sta_list); + /* update channel context before notifying the driver about state + * change, this enables driver using the updated channel context right away. + */ + if (sta->sta_state >= IEEE80211_STA_ASSOC) { + ieee80211_recalc_min_chandef(sta->sdata); + if (!sta->sta.support_p2p_ps) + ieee80211_recalc_p2p_go_ps_allowed(sta->sdata); + } + /* notify driver */ err = sta_info_insert_drv_state(local, sdata, sta); if (err) @@ -674,12 +683,6 @@ static int sta_info_insert_finish(struct sta_info *sta) __acquires(RCU) set_sta_flag(sta, WLAN_STA_INSERTED); - if (sta->sta_state >= IEEE80211_STA_ASSOC) { - ieee80211_recalc_min_chandef(sta->sdata); - if (!sta->sta.support_p2p_ps) - ieee80211_recalc_p2p_go_ps_allowed(sta->sdata); - } - /* accept BA sessions now */ clear_sta_flag(sta, WLAN_STA_BLOCK_BA); From db7205af049d230e7e0abf61c1e74c1aab40f390 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 29 Nov 2021 15:32:39 +0200 Subject: [PATCH 483/868] mac80211: mark TX-during-stop for TX in in_reconfig Mark TXQs as having seen transmit while they were stopped if we bail out of drv_wake_tx_queue() due to reconfig, so that the queue wake after this will make them catch up. This is particularly necessary for when TXQs are used for management packets since those TXQs won't see a lot of traffic that'd make them catch up later. Cc: stable@vger.kernel.org Fixes: 4856bfd23098 ("mac80211: do not call driver wake_tx_queue op during reconfig") Signed-off-by: Johannes Berg Signed-off-by: Luca Coelho Link: https://lore.kernel.org/r/iwlwifi.20211129152938.4573a221c0e1.I0d1d5daea3089be3fc0dccc92991b0f8c5677f0c@changeid Signed-off-by: Johannes Berg --- net/mac80211/driver-ops.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index cd3731cbf6c6..c336267f4599 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -1219,8 +1219,11 @@ static inline void drv_wake_tx_queue(struct ieee80211_local *local, { struct ieee80211_sub_if_data *sdata = vif_to_sdata(txq->txq.vif); - if (local->in_reconfig) + /* In reconfig don't transmit now, but mark for waking later */ + if (local->in_reconfig) { + set_bit(IEEE80211_TXQ_STOP_NETIF_TX, &txq->flags); return; + } if (!check_sdata_in_driver(sdata)) return; From 13dee10b30c058ee2c58c5da00339cc0d4201aa6 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 29 Nov 2021 15:32:40 +0200 Subject: [PATCH 484/868] mac80211: do drv_reconfig_complete() before restarting all When we reconfigure, the driver might do some things to complete the reconfiguration. It's strange and could be broken in some cases because we restart other works (e.g. remain-on-channel and TX) before this happens, yet only start queues later. Change this to do the reconfig complete when reconfiguration is actually complete, not when we've already started doing other things again. For iwlwifi, this should fix a race where the reconfig can race with TX, for ath10k and ath11k that also use this it won't make a difference because they just start queues there, and mac80211 also stopped the queues and will restart them later as before. Signed-off-by: Johannes Berg Signed-off-by: Luca Coelho Link: https://lore.kernel.org/r/iwlwifi.20211129152938.cab99f22fe19.Iefe494687f15fd85f77c1b989d1149c8efdfdc36@changeid Signed-off-by: Johannes Berg --- net/mac80211/util.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/net/mac80211/util.c b/net/mac80211/util.c index fe2903b84ceb..0e4e1956bcea 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -2651,6 +2651,13 @@ int ieee80211_reconfig(struct ieee80211_local *local) mutex_unlock(&local->sta_mtx); } + /* + * If this is for hw restart things are still running. + * We may want to change that later, however. + */ + if (local->open_count && (!suspended || reconfig_due_to_wowlan)) + drv_reconfig_complete(local, IEEE80211_RECONFIG_TYPE_RESTART); + if (local->in_reconfig) { local->in_reconfig = false; barrier(); @@ -2669,13 +2676,6 @@ int ieee80211_reconfig(struct ieee80211_local *local) IEEE80211_QUEUE_STOP_REASON_SUSPEND, false); - /* - * If this is for hw restart things are still running. - * We may want to change that later, however. - */ - if (local->open_count && (!suspended || reconfig_due_to_wowlan)) - drv_reconfig_complete(local, IEEE80211_RECONFIG_TYPE_RESTART); - if (!suspended) return 0; From 8734b41b3efe0fc6082c1937b0e88556c396dc96 Mon Sep 17 00:00:00 2001 From: Russell Currey Date: Tue, 23 Nov 2021 18:15:20 +1000 Subject: [PATCH 485/868] powerpc/module_64: Fix livepatching for RO modules Livepatching a loaded module involves applying relocations through apply_relocate_add(), which attempts to write to read-only memory when CONFIG_STRICT_MODULE_RWX=y. Work around this by performing these writes through the text poke area by using patch_instruction(). R_PPC_REL24 is the only relocation type generated by the kpatch-build userspace tool or klp-convert kernel tree that I observed applying a relocation to a post-init module. A more comprehensive solution is planned, but using patch_instruction() for R_PPC_REL24 on should serve as a sufficient fix. This does have a performance impact, I observed ~15% overhead in module_load() on POWER8 bare metal with checksum verification off. Fixes: c35717c71e98 ("powerpc: Set ARCH_HAS_STRICT_MODULE_RWX") Cc: stable@vger.kernel.org # v5.14+ Reported-by: Joe Lawrence Signed-off-by: Russell Currey Tested-by: Joe Lawrence [mpe: Check return codes from patch_instruction()] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20211214121248.777249-1-mpe@ellerman.id.au --- arch/powerpc/kernel/module_64.c | 42 ++++++++++++++++++++++++++------- 1 file changed, 34 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c index 6baa676e7cb6..5d77d3f5fbb5 100644 --- a/arch/powerpc/kernel/module_64.c +++ b/arch/powerpc/kernel/module_64.c @@ -422,11 +422,17 @@ static inline int create_stub(const Elf64_Shdr *sechdrs, const char *name) { long reladdr; + func_desc_t desc; + int i; if (is_mprofile_ftrace_call(name)) return create_ftrace_stub(entry, addr, me); - memcpy(entry->jump, ppc64_stub_insns, sizeof(ppc64_stub_insns)); + for (i = 0; i < sizeof(ppc64_stub_insns) / sizeof(u32); i++) { + if (patch_instruction(&entry->jump[i], + ppc_inst(ppc64_stub_insns[i]))) + return 0; + } /* Stub uses address relative to r2. */ reladdr = (unsigned long)entry - my_r2(sechdrs, me); @@ -437,10 +443,24 @@ static inline int create_stub(const Elf64_Shdr *sechdrs, } pr_debug("Stub %p get data from reladdr %li\n", entry, reladdr); - entry->jump[0] |= PPC_HA(reladdr); - entry->jump[1] |= PPC_LO(reladdr); - entry->funcdata = func_desc(addr); - entry->magic = STUB_MAGIC; + if (patch_instruction(&entry->jump[0], + ppc_inst(entry->jump[0] | PPC_HA(reladdr)))) + return 0; + + if (patch_instruction(&entry->jump[1], + ppc_inst(entry->jump[1] | PPC_LO(reladdr)))) + return 0; + + // func_desc_t is 8 bytes if ABIv2, else 16 bytes + desc = func_desc(addr); + for (i = 0; i < sizeof(func_desc_t) / sizeof(u32); i++) { + if (patch_instruction(((u32 *)&entry->funcdata) + i, + ppc_inst(((u32 *)(&desc))[i]))) + return 0; + } + + if (patch_instruction(&entry->magic, ppc_inst(STUB_MAGIC))) + return 0; return 1; } @@ -495,8 +515,11 @@ static int restore_r2(const char *name, u32 *instruction, struct module *me) me->name, *instruction, instruction); return 0; } + /* ld r2,R2_STACK_OFFSET(r1) */ - *instruction = PPC_INST_LD_TOC; + if (patch_instruction(instruction, ppc_inst(PPC_INST_LD_TOC))) + return 0; + return 1; } @@ -636,9 +659,12 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, } /* Only replace bits 2 through 26 */ - *(uint32_t *)location - = (*(uint32_t *)location & ~0x03fffffc) + value = (*(uint32_t *)location & ~0x03fffffc) | (value & 0x03fffffc); + + if (patch_instruction((u32 *)location, ppc_inst(value))) + return -EFAULT; + break; case R_PPC64_REL64: From 83dbf898a2d45289be875deb580e93050ba67529 Mon Sep 17 00:00:00 2001 From: Stefan Roese Date: Tue, 14 Dec 2021 12:49:32 +0100 Subject: [PATCH 486/868] PCI/MSI: Mask MSI-X vectors only on success MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Masking all unused MSI-X entries is done to ensure that a crash kernel starts from a clean slate, which correponds to the reset state of the device as defined in the PCI-E specificion 3.0 and later: Vector Control for MSI-X Table Entries -------------------------------------- "00: Mask bit: When this bit is set, the function is prohibited from sending a message using this MSI-X Table entry. ... This bit’s state after reset is 1 (entry is masked)." A Marvell NVME device fails to deliver MSI interrupts after trying to enable MSI-X interrupts due to that masking. It seems to take the MSI-X mask bits into account even when MSI-X is disabled. While not specification compliant, this can be cured by moving the masking into the success path, so that the MSI-X table entries stay in device reset state when the MSI-X setup fails. [ tglx: Move it into the success path, add comment and amend changelog ] Fixes: aa8092c1d1f1 ("PCI/MSI: Mask all unused MSI-X entries") Signed-off-by: Stefan Roese Signed-off-by: Thomas Gleixner Cc: linux-pci@vger.kernel.org Cc: Bjorn Helgaas Cc: Michal Simek Cc: Marek Vasut Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20211210161025.3287927-1-sr@denx.de --- drivers/pci/msi.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c index 48e3f4e47b29..6748cf9d7d90 100644 --- a/drivers/pci/msi.c +++ b/drivers/pci/msi.c @@ -722,9 +722,6 @@ static int msix_capability_init(struct pci_dev *dev, struct msix_entry *entries, goto out_disable; } - /* Ensure that all table entries are masked. */ - msix_mask_all(base, tsize); - ret = msix_setup_entries(dev, base, entries, nvec, affd); if (ret) goto out_disable; @@ -751,6 +748,16 @@ static int msix_capability_init(struct pci_dev *dev, struct msix_entry *entries, /* Set MSI-X enabled bits and unmask the function */ pci_intx_for_msi(dev, 0); dev->msix_enabled = 1; + + /* + * Ensure that all table entries are masked to prevent + * stale entries from firing in a crash kernel. + * + * Done late to deal with a broken Marvell NVME device + * which takes the MSI-X mask bits into account even + * when MSI-X is disabled, which prevents MSI delivery. + */ + msix_mask_all(base, tsize); pci_msix_clear_and_set_ctrl(dev, PCI_MSIX_FLAGS_MASKALL, 0); pcibios_free_irq(dev); From 94185adbfad56815c2c8401e16d81bdb74a79201 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 14 Dec 2021 12:42:14 +0100 Subject: [PATCH 487/868] PCI/MSI: Clear PCI_MSIX_FLAGS_MASKALL on error PCI_MSIX_FLAGS_MASKALL is set in the MSI-X control register at MSI-X interrupt setup time. It's cleared on success, but the error handling path only clears the PCI_MSIX_FLAGS_ENABLE bit. That's incorrect as the reset state of the PCI_MSIX_FLAGS_MASKALL bit is zero. That can be observed via lspci: Capabilities: [b0] MSI-X: Enable- Count=67 Masked+ Clear the bit in the error path to restore the reset state. Fixes: 438553958ba1 ("PCI/MSI: Enable and mask MSI-X early") Reported-by: Stefan Roese Signed-off-by: Thomas Gleixner Tested-by: Stefan Roese Cc: linux-pci@vger.kernel.org Cc: Bjorn Helgaas Cc: Michal Simek Cc: Marek Vasut Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/87tufevoqx.ffs@tglx --- drivers/pci/msi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c index 6748cf9d7d90..d84cf30bb279 100644 --- a/drivers/pci/msi.c +++ b/drivers/pci/msi.c @@ -784,7 +784,7 @@ out_free: free_msi_irqs(dev); out_disable: - pci_msix_clear_and_set_ctrl(dev, PCI_MSIX_FLAGS_ENABLE, 0); + pci_msix_clear_and_set_ctrl(dev, PCI_MSIX_FLAGS_MASKALL | PCI_MSIX_FLAGS_ENABLE, 0); return ret; } From aeb7c75cb77478fdbf821628e9c95c4baa9adc63 Mon Sep 17 00:00:00 2001 From: Ong Boon Leong Date: Sat, 11 Dec 2021 22:51:34 +0800 Subject: [PATCH 488/868] net: stmmac: fix tc flower deletion for VLAN priority Rx steering To replicate the issue:- 1) Add 1 flower filter for VLAN Priority based frame steering:- $ IFDEVNAME=eth0 $ tc qdisc add dev $IFDEVNAME ingress $ tc qdisc add dev $IFDEVNAME root mqprio num_tc 8 \ map 0 1 2 3 4 5 6 7 0 0 0 0 0 0 0 0 \ queues 1@0 1@1 1@2 1@3 1@4 1@5 1@6 1@7 hw 0 $ tc filter add dev $IFDEVNAME parent ffff: protocol 802.1Q \ flower vlan_prio 0 hw_tc 0 2) Get the 'pref' id $ tc filter show dev $IFDEVNAME ingress 3) Delete a specific tc flower record (say pref 49151) $ tc filter del dev $IFDEVNAME parent ffff: pref 49151 From dmesg, we will observe kernel NULL pointer ooops [ 197.170464] BUG: kernel NULL pointer dereference, address: 0000000000000000 [ 197.171367] #PF: supervisor read access in kernel mode [ 197.171367] #PF: error_code(0x0000) - not-present page [ 197.171367] PGD 0 P4D 0 [ 197.171367] Oops: 0000 [#1] PREEMPT SMP NOPTI [ 197.171367] RIP: 0010:tc_setup_cls+0x20b/0x4a0 [stmmac] [ 197.171367] Call Trace: [ 197.171367] [ 197.171367] ? __stmmac_disable_all_queues+0xa8/0xe0 [stmmac] [ 197.171367] stmmac_setup_tc_block_cb+0x70/0x110 [stmmac] [ 197.171367] tc_setup_cb_destroy+0xb3/0x180 [ 197.171367] fl_hw_destroy_filter+0x94/0xc0 [cls_flower] The above issue is due to previous incorrect implementation of tc_del_vlan_flow(), shown below, that uses flow_cls_offload_flow_rule() to get struct flow_rule *rule which is no longer valid for tc filter delete operation. struct flow_rule *rule = flow_cls_offload_flow_rule(cls); struct flow_dissector *dissector = rule->match.dissector; So, to ensure tc_del_vlan_flow() deletes the right VLAN cls record for earlier configured RX queue (configured by hw_tc) in tc_add_vlan_flow(), this patch introduces stmmac_rfs_entry as driver-side flow_cls_offload record for 'RX frame steering' tc flower, currently used for VLAN priority. The implementation has taken consideration for future extension to include other type RX frame steering such as EtherType based. v2: - Clean up overly extensive backtrace and rewrite git message to better explain the kernel NULL pointer issue. Fixes: 0e039f5cf86c ("net: stmmac: add RX frame steering based on VLAN priority in tc flower") Tested-by: Kurt Kanzenbach Signed-off-by: Ong Boon Leong Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/stmmac.h | 17 ++++ .../net/ethernet/stmicro/stmmac/stmmac_tc.c | 86 ++++++++++++++++--- 2 files changed, 90 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h index 5f129733aabd..873b9e3e5da2 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h @@ -172,6 +172,19 @@ struct stmmac_flow_entry { int is_l4; }; +/* Rx Frame Steering */ +enum stmmac_rfs_type { + STMMAC_RFS_T_VLAN, + STMMAC_RFS_T_MAX, +}; + +struct stmmac_rfs_entry { + unsigned long cookie; + int in_use; + int type; + int tc; +}; + struct stmmac_priv { /* Frequently used values are kept adjacent for cache effect */ u32 tx_coal_frames[MTL_MAX_TX_QUEUES]; @@ -289,6 +302,10 @@ struct stmmac_priv { struct stmmac_tc_entry *tc_entries; unsigned int flow_entries_max; struct stmmac_flow_entry *flow_entries; + unsigned int rfs_entries_max[STMMAC_RFS_T_MAX]; + unsigned int rfs_entries_cnt[STMMAC_RFS_T_MAX]; + unsigned int rfs_entries_total; + struct stmmac_rfs_entry *rfs_entries; /* Pulse Per Second output */ struct stmmac_pps_cfg pps[STMMAC_PPS_MAX]; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c index 1c4ea0b1b845..d0a2b289f460 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c @@ -232,11 +232,33 @@ static int tc_setup_cls_u32(struct stmmac_priv *priv, } } +static int tc_rfs_init(struct stmmac_priv *priv) +{ + int i; + + priv->rfs_entries_max[STMMAC_RFS_T_VLAN] = 8; + + for (i = 0; i < STMMAC_RFS_T_MAX; i++) + priv->rfs_entries_total += priv->rfs_entries_max[i]; + + priv->rfs_entries = devm_kcalloc(priv->device, + priv->rfs_entries_total, + sizeof(*priv->rfs_entries), + GFP_KERNEL); + if (!priv->rfs_entries) + return -ENOMEM; + + dev_info(priv->device, "Enabled RFS Flow TC (entries=%d)\n", + priv->rfs_entries_total); + + return 0; +} + static int tc_init(struct stmmac_priv *priv) { struct dma_features *dma_cap = &priv->dma_cap; unsigned int count; - int i; + int ret, i; if (dma_cap->l3l4fnum) { priv->flow_entries_max = dma_cap->l3l4fnum; @@ -250,10 +272,14 @@ static int tc_init(struct stmmac_priv *priv) for (i = 0; i < priv->flow_entries_max; i++) priv->flow_entries[i].idx = i; - dev_info(priv->device, "Enabled Flow TC (entries=%d)\n", + dev_info(priv->device, "Enabled L3L4 Flow TC (entries=%d)\n", priv->flow_entries_max); } + ret = tc_rfs_init(priv); + if (ret) + return -ENOMEM; + if (!priv->plat->fpe_cfg) { priv->plat->fpe_cfg = devm_kzalloc(priv->device, sizeof(*priv->plat->fpe_cfg), @@ -607,16 +633,45 @@ static int tc_del_flow(struct stmmac_priv *priv, return ret; } +static struct stmmac_rfs_entry *tc_find_rfs(struct stmmac_priv *priv, + struct flow_cls_offload *cls, + bool get_free) +{ + int i; + + for (i = 0; i < priv->rfs_entries_total; i++) { + struct stmmac_rfs_entry *entry = &priv->rfs_entries[i]; + + if (entry->cookie == cls->cookie) + return entry; + if (get_free && entry->in_use == false) + return entry; + } + + return NULL; +} + #define VLAN_PRIO_FULL_MASK (0x07) static int tc_add_vlan_flow(struct stmmac_priv *priv, struct flow_cls_offload *cls) { + struct stmmac_rfs_entry *entry = tc_find_rfs(priv, cls, false); struct flow_rule *rule = flow_cls_offload_flow_rule(cls); struct flow_dissector *dissector = rule->match.dissector; int tc = tc_classid_to_hwtc(priv->dev, cls->classid); struct flow_match_vlan match; + if (!entry) { + entry = tc_find_rfs(priv, cls, true); + if (!entry) + return -ENOENT; + } + + if (priv->rfs_entries_cnt[STMMAC_RFS_T_VLAN] >= + priv->rfs_entries_max[STMMAC_RFS_T_VLAN]) + return -ENOENT; + /* Nothing to do here */ if (!dissector_uses_key(dissector, FLOW_DISSECTOR_KEY_VLAN)) return -EINVAL; @@ -638,6 +693,12 @@ static int tc_add_vlan_flow(struct stmmac_priv *priv, prio = BIT(match.key->vlan_priority); stmmac_rx_queue_prio(priv, priv->hw, prio, tc); + + entry->in_use = true; + entry->cookie = cls->cookie; + entry->tc = tc; + entry->type = STMMAC_RFS_T_VLAN; + priv->rfs_entries_cnt[STMMAC_RFS_T_VLAN]++; } return 0; @@ -646,20 +707,19 @@ static int tc_add_vlan_flow(struct stmmac_priv *priv, static int tc_del_vlan_flow(struct stmmac_priv *priv, struct flow_cls_offload *cls) { - struct flow_rule *rule = flow_cls_offload_flow_rule(cls); - struct flow_dissector *dissector = rule->match.dissector; - int tc = tc_classid_to_hwtc(priv->dev, cls->classid); + struct stmmac_rfs_entry *entry = tc_find_rfs(priv, cls, false); - /* Nothing to do here */ - if (!dissector_uses_key(dissector, FLOW_DISSECTOR_KEY_VLAN)) - return -EINVAL; + if (!entry || !entry->in_use || entry->type != STMMAC_RFS_T_VLAN) + return -ENOENT; - if (tc < 0) { - netdev_err(priv->dev, "Invalid traffic class\n"); - return -EINVAL; - } + stmmac_rx_queue_prio(priv, priv->hw, 0, entry->tc); - stmmac_rx_queue_prio(priv, priv->hw, 0, tc); + entry->in_use = false; + entry->cookie = 0; + entry->tc = 0; + entry->type = 0; + + priv->rfs_entries_cnt[STMMAC_RFS_T_VLAN]--; return 0; } From 4fc7261dbab139d3c64c3b618262504e16cfe7ee Mon Sep 17 00:00:00 2001 From: Prathamesh Shete Date: Tue, 14 Dec 2021 17:06:53 +0530 Subject: [PATCH 489/868] mmc: sdhci-tegra: Fix switch to HS400ES mode When CMD13 is sent after switching to HS400ES mode, the bus is operating at either MMC_HIGH_26_MAX_DTR or MMC_HIGH_52_MAX_DTR. To meet Tegra SDHCI requirement at HS400ES mode, force SDHCI interface clock to MMC_HS200_MAX_DTR (200 MHz) so that host controller CAR clock and the interface clock are rate matched. Signed-off-by: Prathamesh Shete Acked-by: Adrian Hunter Fixes: dfc9700cef77 ("mmc: tegra: Implement HS400 enhanced strobe") Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20211214113653.4631-1-pshete@nvidia.com Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci-tegra.c | 43 ++++++++++++++++++++-------------- 1 file changed, 26 insertions(+), 17 deletions(-) diff --git a/drivers/mmc/host/sdhci-tegra.c b/drivers/mmc/host/sdhci-tegra.c index a5001875876b..9762ffab2e23 100644 --- a/drivers/mmc/host/sdhci-tegra.c +++ b/drivers/mmc/host/sdhci-tegra.c @@ -356,23 +356,6 @@ static void tegra_sdhci_set_tap(struct sdhci_host *host, unsigned int tap) } } -static void tegra_sdhci_hs400_enhanced_strobe(struct mmc_host *mmc, - struct mmc_ios *ios) -{ - struct sdhci_host *host = mmc_priv(mmc); - u32 val; - - val = sdhci_readl(host, SDHCI_TEGRA_VENDOR_SYS_SW_CTRL); - - if (ios->enhanced_strobe) - val |= SDHCI_TEGRA_SYS_SW_CTRL_ENHANCED_STROBE; - else - val &= ~SDHCI_TEGRA_SYS_SW_CTRL_ENHANCED_STROBE; - - sdhci_writel(host, val, SDHCI_TEGRA_VENDOR_SYS_SW_CTRL); - -} - static void tegra_sdhci_reset(struct sdhci_host *host, u8 mask) { struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); @@ -793,6 +776,32 @@ static void tegra_sdhci_set_clock(struct sdhci_host *host, unsigned int clock) } } +static void tegra_sdhci_hs400_enhanced_strobe(struct mmc_host *mmc, + struct mmc_ios *ios) +{ + struct sdhci_host *host = mmc_priv(mmc); + u32 val; + + val = sdhci_readl(host, SDHCI_TEGRA_VENDOR_SYS_SW_CTRL); + + if (ios->enhanced_strobe) { + val |= SDHCI_TEGRA_SYS_SW_CTRL_ENHANCED_STROBE; + /* + * When CMD13 is sent from mmc_select_hs400es() after + * switching to HS400ES mode, the bus is operating at + * either MMC_HIGH_26_MAX_DTR or MMC_HIGH_52_MAX_DTR. + * To meet Tegra SDHCI requirement at HS400ES mode, force SDHCI + * interface clock to MMC_HS200_MAX_DTR (200 MHz) so that host + * controller CAR clock and the interface clock are rate matched. + */ + tegra_sdhci_set_clock(host, MMC_HS200_MAX_DTR); + } else { + val &= ~SDHCI_TEGRA_SYS_SW_CTRL_ENHANCED_STROBE; + } + + sdhci_writel(host, val, SDHCI_TEGRA_VENDOR_SYS_SW_CTRL); +} + static unsigned int tegra_sdhci_get_max_clock(struct sdhci_host *host) { struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); From 166b6a46b78bf8b9559a6620c3032f9fe492e082 Mon Sep 17 00:00:00 2001 From: Baowen Zheng Date: Mon, 13 Dec 2021 15:46:04 +0100 Subject: [PATCH 490/868] flow_offload: return EOPNOTSUPP for the unsupported mpls action type We need to return EOPNOTSUPP for the unsupported mpls action type when setup the flow action. In the original implement, we will return 0 for the unsupported mpls action type, actually we do not setup it and the following actions to the flow action entry. Fixes: 9838b20a7fb2 ("net: sched: take rtnl lock in tc_setup_flow_action()") Signed-off-by: Baowen Zheng Signed-off-by: Simon Horman Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- net/sched/cls_api.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 2ef8f5a6205a..e54f0a42270c 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -3687,6 +3687,7 @@ int tc_setup_flow_action(struct flow_action *flow_action, entry->mpls_mangle.ttl = tcf_mpls_ttl(act); break; default: + err = -EOPNOTSUPP; goto err_out_locked; } } else if (is_tcf_skbedit_ptype(act)) { From 5f9562ebe710c307adc5f666bf1a2162ee7977c0 Mon Sep 17 00:00:00 2001 From: Hangyu Hua Date: Tue, 14 Dec 2021 18:46:59 +0800 Subject: [PATCH 491/868] rds: memory leak in __rds_conn_create() __rds_conn_create() did not release conn->c_path when loop_trans != 0 and trans->t_prefer_loopback != 0 and is_outgoing == 0. Fixes: aced3ce57cd3 ("RDS tcp loopback connection can hang") Signed-off-by: Hangyu Hua Reviewed-by: Sharath Srinivasan Signed-off-by: David S. Miller --- net/rds/connection.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/rds/connection.c b/net/rds/connection.c index a3bc4b54d491..b4cc699c5fad 100644 --- a/net/rds/connection.c +++ b/net/rds/connection.c @@ -253,6 +253,7 @@ static struct rds_connection *__rds_conn_create(struct net *net, * should end up here, but if it * does, reset/destroy the connection. */ + kfree(conn->c_path); kmem_cache_free(rds_conn_slab, conn); conn = ERR_PTR(-EOPNOTSUPP); goto out; From b442f2ea84624873d538e4c5986d7c0d40883a47 Mon Sep 17 00:00:00 2001 From: Danielle Ratson Date: Tue, 14 Dec 2021 12:21:36 +0200 Subject: [PATCH 492/868] mlxsw: spectrum_router: Consolidate MAC profiles when possible Currently, when setting a router interface (RIF) MAC address while the MAC profile is not shared with other RIFs, the profile is edited so that the new MAC address is assigned to it. This does not take into account a situation in which the new MAC address already matches an existing MAC profile. In that situation, two MAC profiles will be occupied even though they hold MAC addresses from the same profile. In order to prevent that, add a check to ensure that editing a MAC profile takes place only when the new MAC address does not match an existing profile. Fixes: 605d25cd782a6 ("mlxsw: spectrum_router: Add RIF MAC profiles support") Reported-by: Maksym Yaremchuk Tested-by: Maksym Yaremchuk Signed-off-by: Danielle Ratson Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 217e3b351dfe..c34833ff1dde 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -8494,7 +8494,8 @@ mlxsw_sp_rif_mac_profile_replace(struct mlxsw_sp *mlxsw_sp, u8 mac_profile; int err; - if (!mlxsw_sp_rif_mac_profile_is_shared(rif)) + if (!mlxsw_sp_rif_mac_profile_is_shared(rif) && + !mlxsw_sp_rif_mac_profile_find(mlxsw_sp, new_mac)) return mlxsw_sp_rif_mac_profile_edit(rif, new_mac); err = mlxsw_sp_rif_mac_profile_get(mlxsw_sp, new_mac, From 20617717cd219d3c1f798cd13dbce1bcd86a6ece Mon Sep 17 00:00:00 2001 From: Danielle Ratson Date: Tue, 14 Dec 2021 12:21:37 +0200 Subject: [PATCH 493/868] selftests: mlxsw: Add a test case for MAC profiles consolidation Add a test case to cover the bug fixed by the previous patch. Edit the MAC address of one netdev so that it matches the MAC address of the second netdev. Verify that the two MAC profiles were consolidated by testing that the MAC profiles occupancy decreased by one. Signed-off-by: Danielle Ratson Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- .../drivers/net/mlxsw/rif_mac_profiles_occ.sh | 30 +++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/tools/testing/selftests/drivers/net/mlxsw/rif_mac_profiles_occ.sh b/tools/testing/selftests/drivers/net/mlxsw/rif_mac_profiles_occ.sh index b513f64d9092..026a126f584d 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/rif_mac_profiles_occ.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/rif_mac_profiles_occ.sh @@ -72,6 +72,35 @@ rif_mac_profile_replacement_test() ip link set $h1.10 address $h1_10_mac } +rif_mac_profile_consolidation_test() +{ + local count=$1; shift + local h1_20_mac + + RET=0 + + if [[ $count -eq 1 ]]; then + return + fi + + h1_20_mac=$(mac_get $h1.20) + + # Set the MAC of $h1.20 to that of $h1.10 and confirm that they are + # using the same MAC profile. + ip link set $h1.20 address 00:11:11:11:11:11 + check_err $? + + occ=$(devlink -j resource show $DEVLINK_DEV \ + | jq '.[][][] | select(.name=="rif_mac_profiles") |.["occ"]') + + [[ $occ -eq $((count - 1)) ]] + check_err $? "MAC profile occupancy did not decrease" + + log_test "RIF MAC profile consolidation" + + ip link set $h1.20 address $h1_20_mac +} + rif_mac_profile_shared_replacement_test() { local count=$1; shift @@ -104,6 +133,7 @@ rif_mac_profile_edit_test() create_max_rif_mac_profiles $count rif_mac_profile_replacement_test + rif_mac_profile_consolidation_test $count rif_mac_profile_shared_replacement_test $count } From 8deb34a90f06374fd26f722c2a79e15160f66be7 Mon Sep 17 00:00:00 2001 From: Derek Fang Date: Tue, 14 Dec 2021 18:50:33 +0800 Subject: [PATCH 494/868] ASoC: rt5682: fix the wrong jack type detected Some powers were changed during the jack insert detection and clk's enable/disable in CCF. If in parallel, the influence has a chance to detect the wrong jack type, so add a lock. Signed-off-by: Derek Fang Link: https://lore.kernel.org/r/20211214105033.471-1-derek.fang@realtek.com Signed-off-by: Mark Brown --- sound/soc/codecs/rt5682.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sound/soc/codecs/rt5682.c b/sound/soc/codecs/rt5682.c index 5224123d0d3b..b34a8542077d 100644 --- a/sound/soc/codecs/rt5682.c +++ b/sound/soc/codecs/rt5682.c @@ -929,6 +929,8 @@ int rt5682_headset_detect(struct snd_soc_component *component, int jack_insert) unsigned int val, count; if (jack_insert) { + snd_soc_dapm_mutex_lock(dapm); + snd_soc_component_update_bits(component, RT5682_PWR_ANLG_1, RT5682_PWR_VREF2 | RT5682_PWR_MB, RT5682_PWR_VREF2 | RT5682_PWR_MB); @@ -979,6 +981,8 @@ int rt5682_headset_detect(struct snd_soc_component *component, int jack_insert) snd_soc_component_update_bits(component, RT5682_MICBIAS_2, RT5682_PWR_CLK25M_MASK | RT5682_PWR_CLK1M_MASK, RT5682_PWR_CLK25M_PU | RT5682_PWR_CLK1M_PU); + + snd_soc_dapm_mutex_unlock(dapm); } else { rt5682_enable_push_button_irq(component, false); snd_soc_component_update_bits(component, RT5682_CBJ_CTRL_1, From edaa26334c117a584add6053f48d63a988d25a6e Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 13 Dec 2021 14:14:43 -1000 Subject: [PATCH 495/868] iocost: Fix divide-by-zero on donation from low hweight cgroup The donation calculation logic assumes that the donor has non-zero after-donation hweight, so the lowest active hweight a donating cgroup can have is 2 so that it can donate 1 while keeping the other 1 for itself. Earlier, we only donated from cgroups with sizable surpluses so this condition was always true. However, with the precise donation algorithm implemented, f1de2439ec43 ("blk-iocost: revamp donation amount determination") made the donation amount calculation exact enabling even low hweight cgroups to donate. This means that in rare occasions, a cgroup with active hweight of 1 can enter donation calculation triggering the following warning and then a divide-by-zero oops. WARNING: CPU: 4 PID: 0 at block/blk-iocost.c:1928 transfer_surpluses.cold+0x0/0x53 [884/94867] ... RIP: 0010:transfer_surpluses.cold+0x0/0x53 Code: 92 ff 48 c7 c7 28 d1 ab b5 65 48 8b 34 25 00 ae 01 00 48 81 c6 90 06 00 00 e8 8b 3f fe ff 48 c7 c0 ea ff ff ff e9 95 ff 92 ff <0f> 0b 48 c7 c7 30 da ab b5 e8 71 3f fe ff 4c 89 e8 4d 85 ed 74 0 4 ... Call Trace: ioc_timer_fn+0x1043/0x1390 call_timer_fn+0xa1/0x2c0 __run_timers.part.0+0x1ec/0x2e0 run_timer_softirq+0x35/0x70 ... iocg: invalid donation weights in /a/b: active=1 donating=1 after=0 Fix it by excluding cgroups w/ active hweight < 2 from donating. Excluding these extreme low hweight donations shouldn't affect work conservation in any meaningful way. Signed-off-by: Tejun Heo Fixes: f1de2439ec43 ("blk-iocost: revamp donation amount determination") Cc: stable@vger.kernel.org # v5.10+ Link: https://lore.kernel.org/r/Ybfh86iSvpWKxhVM@slm.duckdns.org Signed-off-by: Jens Axboe --- block/blk-iocost.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/block/blk-iocost.c b/block/blk-iocost.c index a5b37cc65b17..769b64394298 100644 --- a/block/blk-iocost.c +++ b/block/blk-iocost.c @@ -2311,7 +2311,14 @@ static void ioc_timer_fn(struct timer_list *timer) hwm = current_hweight_max(iocg); new_hwi = hweight_after_donation(iocg, old_hwi, hwm, usage, &now); - if (new_hwi < hwm) { + /* + * Donation calculation assumes hweight_after_donation + * to be positive, a condition that a donor w/ hwa < 2 + * can't meet. Don't bother with donation if hwa is + * below 2. It's not gonna make a meaningful difference + * anyway. + */ + if (new_hwi < hwm && hwa >= 2) { iocg->hweight_donating = hwa; iocg->hweight_after_donation = new_hwi; list_add(&iocg->surplus_list, &surpluses); From bd0687c18e635b63233dc87f38058cd728802ab4 Mon Sep 17 00:00:00 2001 From: Magnus Karlsson Date: Tue, 14 Dec 2021 11:26:07 +0100 Subject: [PATCH 496/868] xsk: Do not sleep in poll() when need_wakeup set Do not sleep in poll() when the need_wakeup flag is set. When this flag is set, the application needs to explicitly wake up the driver with a syscall (poll, recvmsg, sendmsg, etc.) to guarantee that Rx and/or Tx processing will be processed promptly. But the current code in poll(), sleeps first then wakes up the driver. This means that no driver processing will occur (baring any interrupts) until the timeout has expired. Fix this by checking the need_wakeup flag first and if set, wake the driver and return to the application. Only if need_wakeup is not set should the process sleep if there is a timeout set in the poll() call. Fixes: 77cd0d7b3f25 ("xsk: add support for need_wakeup flag in AF_XDP rings") Reported-by: Keith Wiles Signed-off-by: Magnus Karlsson Signed-off-by: Daniel Borkmann Acked-by: Maciej Fijalkowski Link: https://lore.kernel.org/bpf/20211214102607.7677-1-magnus.karlsson@gmail.com --- net/xdp/xsk.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index f16074eb53c7..7a466ea962c5 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -677,8 +677,6 @@ static __poll_t xsk_poll(struct file *file, struct socket *sock, struct xdp_sock *xs = xdp_sk(sk); struct xsk_buff_pool *pool; - sock_poll_wait(file, sock, wait); - if (unlikely(!xsk_is_bound(xs))) return mask; @@ -690,6 +688,8 @@ static __poll_t xsk_poll(struct file *file, struct socket *sock, else /* Poll needs to drive Tx also in copy mode */ __xsk_sendmsg(sk); + } else { + sock_poll_wait(file, sock, wait); } if (xs->rx && !xskq_prod_is_empty(xs->rx)) From f7abc4c8df8c7930d0b9c56d9abee9a1fca635e9 Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Tue, 14 Dec 2021 07:18:00 +0530 Subject: [PATCH 497/868] selftests/bpf: Fix OOB write in test_verifier The commit referenced below added fixup_map_timer support (to create a BPF map containing timers), but failed to increase the size of the map_fds array, leading to out of bounds write. Fix this by changing MAX_NR_MAPS to 22. Fixes: e60e6962c503 ("selftests/bpf: Add tests for restricted helpers") Signed-off-by: Kumar Kartikeya Dwivedi Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20211214014800.78762-1-memxor@gmail.com --- tools/testing/selftests/bpf/test_verifier.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 465ef3f112c0..d3bf83d5c6cf 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -54,7 +54,7 @@ #define MAX_INSNS BPF_MAXINSNS #define MAX_TEST_INSNS 1000000 #define MAX_FIXUPS 8 -#define MAX_NR_MAPS 21 +#define MAX_NR_MAPS 22 #define MAX_TEST_RUNS 8 #define POINTER_VALUE 0xcafe4all #define TEST_DATA_LEN 64 From f35838a6930296fc1988764cfa54cb3f705c0665 Mon Sep 17 00:00:00 2001 From: Jianglei Nie Date: Thu, 9 Dec 2021 14:56:31 +0800 Subject: [PATCH 498/868] btrfs: fix memory leak in __add_inode_ref() Line 1169 (#3) allocates a memory chunk for victim_name by kmalloc(), but when the function returns in line 1184 (#4) victim_name allocated by line 1169 (#3) is not freed, which will lead to a memory leak. There is a similar snippet of code in this function as allocating a memory chunk for victim_name in line 1104 (#1) as well as releasing the memory in line 1116 (#2). We should kfree() victim_name when the return value of backref_in_log() is less than zero and before the function returns in line 1184 (#4). 1057 static inline int __add_inode_ref(struct btrfs_trans_handle *trans, 1058 struct btrfs_root *root, 1059 struct btrfs_path *path, 1060 struct btrfs_root *log_root, 1061 struct btrfs_inode *dir, 1062 struct btrfs_inode *inode, 1063 u64 inode_objectid, u64 parent_objectid, 1064 u64 ref_index, char *name, int namelen, 1065 int *search_done) 1066 { 1104 victim_name = kmalloc(victim_name_len, GFP_NOFS); // #1: kmalloc (victim_name-1) 1105 if (!victim_name) 1106 return -ENOMEM; 1112 ret = backref_in_log(log_root, &search_key, 1113 parent_objectid, victim_name, 1114 victim_name_len); 1115 if (ret < 0) { 1116 kfree(victim_name); // #2: kfree (victim_name-1) 1117 return ret; 1118 } else if (!ret) { 1169 victim_name = kmalloc(victim_name_len, GFP_NOFS); // #3: kmalloc (victim_name-2) 1170 if (!victim_name) 1171 return -ENOMEM; 1180 ret = backref_in_log(log_root, &search_key, 1181 parent_objectid, victim_name, 1182 victim_name_len); 1183 if (ret < 0) { 1184 return ret; // #4: missing kfree (victim_name-2) 1185 } else if (!ret) { 1241 return 0; 1242 } Fixes: d3316c8233bb ("btrfs: Properly handle backref_in_log retval") CC: stable@vger.kernel.org # 5.10+ Reviewed-by: Qu Wenruo Reviewed-by: Filipe Manana Signed-off-by: Jianglei Nie Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/tree-log.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 3e6f14e13918..8778401665c3 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -1181,6 +1181,7 @@ again: parent_objectid, victim_name, victim_name_len); if (ret < 0) { + kfree(victim_name); return ret; } else if (!ret) { ret = -ENOENT; From 33fab972497ae66822c0b6846d4f9382938575b6 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Fri, 10 Dec 2021 19:02:18 +0000 Subject: [PATCH 499/868] btrfs: fix double free of anon_dev after failure to create subvolume When creating a subvolume, at create_subvol(), we allocate an anonymous device and later call btrfs_get_new_fs_root(), which in turn just calls btrfs_get_root_ref(). There we call btrfs_init_fs_root() which assigns the anonymous device to the root, but if after that call there's an error, when we jump to 'fail' label, we call btrfs_put_root(), which frees the anonymous device and then returns an error that is propagated back to create_subvol(). Than create_subvol() frees the anonymous device again. When this happens, if the anonymous device was not reallocated after the first time it was freed with btrfs_put_root(), we get a kernel message like the following: (...) [13950.282466] BTRFS: error (device dm-0) in create_subvol:663: errno=-5 IO failure [13950.283027] ida_free called for id=65 which is not allocated. [13950.285974] BTRFS info (device dm-0): forced readonly (...) If the anonymous device gets reallocated by another btrfs filesystem or any other kernel subsystem, then bad things can happen. So fix this by setting the root's anonymous device to 0 at btrfs_get_root_ref(), before we call btrfs_put_root(), if an error happened. Fixes: 2dfb1e43f57dd3 ("btrfs: preallocate anon block device at first phase of snapshot creation") CC: stable@vger.kernel.org # 5.10+ Reviewed-by: Qu Wenruo Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 847aabb30676..28449ca66dbd 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1732,6 +1732,14 @@ again: } return root; fail: + /* + * If our caller provided us an anonymous device, then it's his + * responsability to free it in case we fail. So we have to set our + * root's anon_dev to 0 to avoid a double free, once by btrfs_put_root() + * and once again by our caller. + */ + if (anon_dev) + root->anon_dev = 0; btrfs_put_root(root); return ERR_PTR(ret); } From 1b2e5e5c7feabb4f3041f637b96494944da6aeff Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Tue, 14 Dec 2021 11:29:01 +0000 Subject: [PATCH 500/868] btrfs: fix missing last dir item offset update when logging directory When logging a directory, once we finish processing a leaf that is full of dir items, if we find the next leaf was not modified in the current transaction, we grab the first key of that next leaf and log it as to mark the end of a key range boundary. However we did not update the value of ctx->last_dir_item_offset, which tracks the offset of the last logged key. This can result in subsequent logging of the same directory in the current transaction to not realize that key was already logged, and then add it to the middle of a batch that starts with a lower key, resulting later in a leaf with one key that is duplicated and at non-consecutive slots. When that happens we get an error later when writing out the leaf, reporting that there is a pair of keys in wrong order. The report is something like the following: Dec 13 21:44:50 kernel: BTRFS critical (device dm-0): corrupt leaf: root=18446744073709551610 block=118444032 slot=21, bad key order, prev (704687 84 4146773349) current (704687 84 1063561078) Dec 13 21:44:50 kernel: BTRFS info (device dm-0): leaf 118444032 gen 91449 total ptrs 39 free space 546 owner 18446744073709551610 Dec 13 21:44:50 kernel: item 0 key (704687 1 0) itemoff 3835 itemsize 160 Dec 13 21:44:50 kernel: inode generation 35532 size 1026 mode 40755 Dec 13 21:44:50 kernel: item 1 key (704687 12 704685) itemoff 3822 itemsize 13 Dec 13 21:44:50 kernel: item 2 key (704687 24 3817753667) itemoff 3736 itemsize 86 Dec 13 21:44:50 kernel: item 3 key (704687 60 0) itemoff 3728 itemsize 8 Dec 13 21:44:50 kernel: item 4 key (704687 72 0) itemoff 3720 itemsize 8 Dec 13 21:44:50 kernel: item 5 key (704687 84 140445108) itemoff 3666 itemsize 54 Dec 13 21:44:50 kernel: dir oid 704793 type 1 Dec 13 21:44:50 kernel: item 6 key (704687 84 298800632) itemoff 3599 itemsize 67 Dec 13 21:44:50 kernel: dir oid 707849 type 2 Dec 13 21:44:50 kernel: item 7 key (704687 84 476147658) itemoff 3532 itemsize 67 Dec 13 21:44:50 kernel: dir oid 707901 type 2 Dec 13 21:44:50 kernel: item 8 key (704687 84 633818382) itemoff 3471 itemsize 61 Dec 13 21:44:50 kernel: dir oid 704694 type 2 Dec 13 21:44:50 kernel: item 9 key (704687 84 654256665) itemoff 3403 itemsize 68 Dec 13 21:44:50 kernel: dir oid 707841 type 1 Dec 13 21:44:50 kernel: item 10 key (704687 84 995843418) itemoff 3331 itemsize 72 Dec 13 21:44:50 kernel: dir oid 2167736 type 1 Dec 13 21:44:50 kernel: item 11 key (704687 84 1063561078) itemoff 3278 itemsize 53 Dec 13 21:44:50 kernel: dir oid 704799 type 2 Dec 13 21:44:50 kernel: item 12 key (704687 84 1101156010) itemoff 3225 itemsize 53 Dec 13 21:44:50 kernel: dir oid 704696 type 1 Dec 13 21:44:50 kernel: item 13 key (704687 84 2521936574) itemoff 3173 itemsize 52 Dec 13 21:44:50 kernel: dir oid 704704 type 2 Dec 13 21:44:50 kernel: item 14 key (704687 84 2618368432) itemoff 3112 itemsize 61 Dec 13 21:44:50 kernel: dir oid 704738 type 1 Dec 13 21:44:50 kernel: item 15 key (704687 84 2676316190) itemoff 3046 itemsize 66 Dec 13 21:44:50 kernel: dir oid 2167729 type 1 Dec 13 21:44:50 kernel: item 16 key (704687 84 3319104192) itemoff 2986 itemsize 60 Dec 13 21:44:50 kernel: dir oid 704745 type 2 Dec 13 21:44:50 kernel: item 17 key (704687 84 3908046265) itemoff 2929 itemsize 57 Dec 13 21:44:50 kernel: dir oid 2167734 type 1 Dec 13 21:44:50 kernel: item 18 key (704687 84 3945713089) itemoff 2857 itemsize 72 Dec 13 21:44:50 kernel: dir oid 2167730 type 1 Dec 13 21:44:50 kernel: item 19 key (704687 84 4077169308) itemoff 2795 itemsize 62 Dec 13 21:44:50 kernel: dir oid 704688 type 1 Dec 13 21:44:50 kernel: item 20 key (704687 84 4146773349) itemoff 2727 itemsize 68 Dec 13 21:44:50 kernel: dir oid 707892 type 1 Dec 13 21:44:50 kernel: item 21 key (704687 84 1063561078) itemoff 2674 itemsize 53 Dec 13 21:44:50 kernel: dir oid 704799 type 2 Dec 13 21:44:50 kernel: item 22 key (704687 96 2) itemoff 2612 itemsize 62 Dec 13 21:44:50 kernel: item 23 key (704687 96 6) itemoff 2551 itemsize 61 Dec 13 21:44:50 kernel: item 24 key (704687 96 7) itemoff 2498 itemsize 53 Dec 13 21:44:50 kernel: item 25 key (704687 96 12) itemoff 2446 itemsize 52 Dec 13 21:44:50 kernel: item 26 key (704687 96 14) itemoff 2385 itemsize 61 Dec 13 21:44:50 kernel: item 27 key (704687 96 18) itemoff 2325 itemsize 60 Dec 13 21:44:50 kernel: item 28 key (704687 96 24) itemoff 2271 itemsize 54 Dec 13 21:44:50 kernel: item 29 key (704687 96 28) itemoff 2218 itemsize 53 Dec 13 21:44:50 kernel: item 30 key (704687 96 62) itemoff 2150 itemsize 68 Dec 13 21:44:50 kernel: item 31 key (704687 96 66) itemoff 2083 itemsize 67 Dec 13 21:44:50 kernel: item 32 key (704687 96 75) itemoff 2015 itemsize 68 Dec 13 21:44:50 kernel: item 33 key (704687 96 79) itemoff 1948 itemsize 67 Dec 13 21:44:50 kernel: item 34 key (704687 96 82) itemoff 1882 itemsize 66 Dec 13 21:44:50 kernel: item 35 key (704687 96 83) itemoff 1810 itemsize 72 Dec 13 21:44:50 kernel: item 36 key (704687 96 85) itemoff 1753 itemsize 57 Dec 13 21:44:50 kernel: item 37 key (704687 96 87) itemoff 1681 itemsize 72 Dec 13 21:44:50 kernel: item 38 key (704694 1 0) itemoff 1521 itemsize 160 Dec 13 21:44:50 kernel: inode generation 35534 size 30 mode 40755 Dec 13 21:44:50 kernel: BTRFS error (device dm-0): block=118444032 write time tree block corruption detected So fix that by adding the missing update of ctx->last_dir_item_offset with the offset of the boundary key. Reported-by: Chris Murphy Link: https://lore.kernel.org/linux-btrfs/CAJCQCtT+RSzpUjbMq+UfzNUMe1X5+1G+DnAGbHC=OZ=iRS24jg@mail.gmail.com/ Fixes: dc2872247ec0ca ("btrfs: keep track of the last logged keys when logging a directory") Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/tree-log.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 8778401665c3..6993dcdba6f1 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -3978,6 +3978,7 @@ search: goto done; } if (btrfs_header_generation(path->nodes[0]) != trans->transid) { + ctx->last_dir_item_offset = min_key.offset; ret = overwrite_item(trans, log, dst_path, path->nodes[0], path->slots[0], &min_key); From 80d5be1a057e05f01d66e986cfd34d71845e5190 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Povi=C5=A1er?= Date: Mon, 6 Dec 2021 22:45:43 +0000 Subject: [PATCH 501/868] ASoC: tas2770: Fix setting of high sample rates MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Although the codec advertises support for 176.4 and 192 ksps, without this fix setting those sample rates fails with EINVAL at hw_params time. Signed-off-by: Martin Povišer Link: https://lore.kernel.org/r/20211206224529.74656-1-povik@protonmail.com Signed-off-by: Mark Brown --- sound/soc/codecs/tas2770.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/codecs/tas2770.c b/sound/soc/codecs/tas2770.c index 172e79cbe0da..6549e7fef3e3 100644 --- a/sound/soc/codecs/tas2770.c +++ b/sound/soc/codecs/tas2770.c @@ -291,11 +291,11 @@ static int tas2770_set_samplerate(struct tas2770_priv *tas2770, int samplerate) ramp_rate_val = TAS2770_TDM_CFG_REG0_SMP_44_1KHZ | TAS2770_TDM_CFG_REG0_31_88_2_96KHZ; break; - case 19200: + case 192000: ramp_rate_val = TAS2770_TDM_CFG_REG0_SMP_48KHZ | TAS2770_TDM_CFG_REG0_31_176_4_192KHZ; break; - case 17640: + case 176400: ramp_rate_val = TAS2770_TDM_CFG_REG0_SMP_44_1KHZ | TAS2770_TDM_CFG_REG0_31_176_4_192KHZ; break; From 1bcd326631dc4faa3322d60b4fc45e8b3747993e Mon Sep 17 00:00:00 2001 From: Martin Blumenstingl Date: Mon, 6 Dec 2021 22:08:03 +0100 Subject: [PATCH 502/868] ASoC: meson: aiu: fifo: Add missing dma_coerce_mask_and_coherent() The FIFO registers which take an DMA-able address are only 32-bit wide on AIU. Add dma_coerce_mask_and_coherent() to make the DMA core aware of this limitation. Fixes: 6ae9ca9ce986bf ("ASoC: meson: aiu: add i2s and spdif support") Signed-off-by: Martin Blumenstingl Link: https://lore.kernel.org/r/20211206210804.2512999-2-martin.blumenstingl@googlemail.com Signed-off-by: Mark Brown --- sound/soc/meson/aiu-fifo.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/sound/soc/meson/aiu-fifo.c b/sound/soc/meson/aiu-fifo.c index 4ad23267cace..d67ff4cdabd5 100644 --- a/sound/soc/meson/aiu-fifo.c +++ b/sound/soc/meson/aiu-fifo.c @@ -5,6 +5,7 @@ #include #include +#include #include #include #include @@ -179,6 +180,11 @@ int aiu_fifo_pcm_new(struct snd_soc_pcm_runtime *rtd, struct snd_card *card = rtd->card->snd_card; struct aiu_fifo *fifo = dai->playback_dma_data; size_t size = fifo->pcm->buffer_bytes_max; + int ret; + + ret = dma_coerce_mask_and_coherent(card->dev, DMA_BIT_MASK(32)); + if (ret) + return ret; snd_pcm_set_managed_buffer_all(rtd->pcm, SNDRV_DMA_TYPE_DEV, card->dev, size, size); From ee907afb0c39a41ee74b862882cfe12820c74b98 Mon Sep 17 00:00:00 2001 From: Martin Blumenstingl Date: Mon, 6 Dec 2021 22:08:04 +0100 Subject: [PATCH 503/868] ASoC: meson: aiu: Move AIU_I2S_MISC hold setting to aiu-fifo-i2s The out-of-tree vendor driver uses the following approach to set the AIU_I2S_MISC register: 1) write AIU_MEM_I2S_START_PTR and AIU_MEM_I2S_RD_PTR 2) configure AIU_I2S_MUTE_SWAP[15:0] 3) write AIU_MEM_I2S_END_PTR 4) set AIU_I2S_MISC[2] to 1 (documented as: "put I2S interface in hold mode") 5) set AIU_I2S_MISC[4] to 1 (depending on the driver revision it always stays at 1 while for older drivers this bit is unset in step 4) 6) set AIU_I2S_MISC[2] to 0 7) write AIU_MEM_I2S_MASKS 8) toggle AIU_MEM_I2S_CONTROL[0] 9) toggle AIU_MEM_I2S_BUF_CNTL[0] Move setting the AIU_I2S_MISC[2] bit to aiu_fifo_i2s_hw_params() so it resembles the flow in the vendor kernel more closely. While here also configure AIU_I2S_MISC[4] (documented as: "force each audio data to left or right according to the bit attached with the audio data") similar to how the vendor driver does this. This fixes the infamous and long-standing "machine gun noise" issue (a buffer underrun issue). Fixes: 6ae9ca9ce986bf ("ASoC: meson: aiu: add i2s and spdif support") Reported-by: Christian Hewitt Reported-by: Geraldo Nascimento Tested-by: Christian Hewitt Tested-by: Geraldo Nascimento Acked-by: Jerome Brunet Cc: stable@vger.kernel.org Signed-off-by: Martin Blumenstingl Link: https://lore.kernel.org/r/20211206210804.2512999-3-martin.blumenstingl@googlemail.com Signed-off-by: Mark Brown --- sound/soc/meson/aiu-encoder-i2s.c | 33 ------------------------------- sound/soc/meson/aiu-fifo-i2s.c | 19 ++++++++++++++++++ 2 files changed, 19 insertions(+), 33 deletions(-) diff --git a/sound/soc/meson/aiu-encoder-i2s.c b/sound/soc/meson/aiu-encoder-i2s.c index 932224552146..67729de41a73 100644 --- a/sound/soc/meson/aiu-encoder-i2s.c +++ b/sound/soc/meson/aiu-encoder-i2s.c @@ -18,7 +18,6 @@ #define AIU_RST_SOFT_I2S_FAST BIT(0) #define AIU_I2S_DAC_CFG_MSB_FIRST BIT(2) -#define AIU_I2S_MISC_HOLD_EN BIT(2) #define AIU_CLK_CTRL_I2S_DIV_EN BIT(0) #define AIU_CLK_CTRL_I2S_DIV GENMASK(3, 2) #define AIU_CLK_CTRL_AOCLK_INVERT BIT(6) @@ -36,37 +35,6 @@ static void aiu_encoder_i2s_divider_enable(struct snd_soc_component *component, enable ? AIU_CLK_CTRL_I2S_DIV_EN : 0); } -static void aiu_encoder_i2s_hold(struct snd_soc_component *component, - bool enable) -{ - snd_soc_component_update_bits(component, AIU_I2S_MISC, - AIU_I2S_MISC_HOLD_EN, - enable ? AIU_I2S_MISC_HOLD_EN : 0); -} - -static int aiu_encoder_i2s_trigger(struct snd_pcm_substream *substream, int cmd, - struct snd_soc_dai *dai) -{ - struct snd_soc_component *component = dai->component; - - switch (cmd) { - case SNDRV_PCM_TRIGGER_START: - case SNDRV_PCM_TRIGGER_RESUME: - case SNDRV_PCM_TRIGGER_PAUSE_RELEASE: - aiu_encoder_i2s_hold(component, false); - return 0; - - case SNDRV_PCM_TRIGGER_STOP: - case SNDRV_PCM_TRIGGER_SUSPEND: - case SNDRV_PCM_TRIGGER_PAUSE_PUSH: - aiu_encoder_i2s_hold(component, true); - return 0; - - default: - return -EINVAL; - } -} - static int aiu_encoder_i2s_setup_desc(struct snd_soc_component *component, struct snd_pcm_hw_params *params) { @@ -353,7 +321,6 @@ static void aiu_encoder_i2s_shutdown(struct snd_pcm_substream *substream, } const struct snd_soc_dai_ops aiu_encoder_i2s_dai_ops = { - .trigger = aiu_encoder_i2s_trigger, .hw_params = aiu_encoder_i2s_hw_params, .hw_free = aiu_encoder_i2s_hw_free, .set_fmt = aiu_encoder_i2s_set_fmt, diff --git a/sound/soc/meson/aiu-fifo-i2s.c b/sound/soc/meson/aiu-fifo-i2s.c index 2388a2d0b3a6..57e6e7160d2f 100644 --- a/sound/soc/meson/aiu-fifo-i2s.c +++ b/sound/soc/meson/aiu-fifo-i2s.c @@ -20,6 +20,8 @@ #define AIU_MEM_I2S_CONTROL_MODE_16BIT BIT(6) #define AIU_MEM_I2S_BUF_CNTL_INIT BIT(0) #define AIU_RST_SOFT_I2S_FAST BIT(0) +#define AIU_I2S_MISC_HOLD_EN BIT(2) +#define AIU_I2S_MISC_FORCE_LEFT_RIGHT BIT(4) #define AIU_FIFO_I2S_BLOCK 256 @@ -90,6 +92,10 @@ static int aiu_fifo_i2s_hw_params(struct snd_pcm_substream *substream, unsigned int val; int ret; + snd_soc_component_update_bits(component, AIU_I2S_MISC, + AIU_I2S_MISC_HOLD_EN, + AIU_I2S_MISC_HOLD_EN); + ret = aiu_fifo_hw_params(substream, params, dai); if (ret) return ret; @@ -117,6 +123,19 @@ static int aiu_fifo_i2s_hw_params(struct snd_pcm_substream *substream, snd_soc_component_update_bits(component, AIU_MEM_I2S_MASKS, AIU_MEM_I2S_MASKS_IRQ_BLOCK, val); + /* + * Most (all?) supported SoCs have this bit set by default. The vendor + * driver however sets it manually (depending on the version either + * while un-setting AIU_I2S_MISC_HOLD_EN or right before that). Follow + * the same approach for consistency with the vendor driver. + */ + snd_soc_component_update_bits(component, AIU_I2S_MISC, + AIU_I2S_MISC_FORCE_LEFT_RIGHT, + AIU_I2S_MISC_FORCE_LEFT_RIGHT); + + snd_soc_component_update_bits(component, AIU_I2S_MISC, + AIU_I2S_MISC_HOLD_EN, 0); + return 0; } From 0013881c1145d36bf26165bb70fdd7560a5507a3 Mon Sep 17 00:00:00 2001 From: Karol Kolacinski Date: Thu, 4 Nov 2021 14:52:11 +0100 Subject: [PATCH 504/868] ice: Use div64_u64 instead of div_u64 in adjfine Change the division in ice_ptp_adjfine from div_u64 to div64_u64. div_u64 is used when the divisor is 32 bit but in this case incval is 64 bit and it caused incorrect calculations and incval adjustments. Fixes: 06c16d89d2cb ("ice: register 1588 PTP clock device object for E810 devices") Signed-off-by: Karol Kolacinski Tested-by: Gurucharan G Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_ptp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.c b/drivers/net/ethernet/intel/ice/ice_ptp.c index bf7247c6f58e..ad7cabe7932f 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp.c +++ b/drivers/net/ethernet/intel/ice/ice_ptp.c @@ -705,7 +705,7 @@ static int ice_ptp_adjfine(struct ptp_clock_info *info, long scaled_ppm) scaled_ppm = -scaled_ppm; } - while ((u64)scaled_ppm > div_u64(U64_MAX, incval)) { + while ((u64)scaled_ppm > div64_u64(U64_MAX, incval)) { /* handle overflow by scaling down the scaled_ppm and * the divisor, losing some precision */ From 37e738b6fdb14529534dca441e0222313688fde3 Mon Sep 17 00:00:00 2001 From: Karol Kolacinski Date: Tue, 16 Nov 2021 13:07:14 +0100 Subject: [PATCH 505/868] ice: Don't put stale timestamps in the skb The driver has to check if it does not accidentally put the timestamp in the SKB before previous timestamp gets overwritten. Timestamp values in the PHY are read only and do not get cleared except at hardware reset or when a new timestamp value is captured. The cached_tstamp field is used to detect the case where a new timestamp has not yet been captured, ensuring that we avoid sending stale timestamp data to the stack. Fixes: ea9b847cda64 ("ice: enable transmit timestamps for E810 devices") Signed-off-by: Karol Kolacinski Tested-by: Gurucharan G Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_ptp.c | 11 ++++------- drivers/net/ethernet/intel/ice/ice_ptp.h | 6 ++++++ 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.c b/drivers/net/ethernet/intel/ice/ice_ptp.c index ad7cabe7932f..442b031b0edc 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp.c +++ b/drivers/net/ethernet/intel/ice/ice_ptp.c @@ -1540,19 +1540,16 @@ static void ice_ptp_tx_tstamp_work(struct kthread_work *work) if (err) continue; - /* Check if the timestamp is valid */ - if (!(raw_tstamp & ICE_PTP_TS_VALID)) + /* Check if the timestamp is invalid or stale */ + if (!(raw_tstamp & ICE_PTP_TS_VALID) || + raw_tstamp == tx->tstamps[idx].cached_tstamp) continue; - /* clear the timestamp register, so that it won't show valid - * again when re-used. - */ - ice_clear_phy_tstamp(hw, tx->quad, phy_idx); - /* The timestamp is valid, so we'll go ahead and clear this * index and then send the timestamp up to the stack. */ spin_lock(&tx->lock); + tx->tstamps[idx].cached_tstamp = raw_tstamp; clear_bit(idx, tx->in_use); skb = tx->tstamps[idx].skb; tx->tstamps[idx].skb = NULL; diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.h b/drivers/net/ethernet/intel/ice/ice_ptp.h index f71ad317d6c8..53c15fc9d996 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp.h +++ b/drivers/net/ethernet/intel/ice/ice_ptp.h @@ -55,15 +55,21 @@ struct ice_perout_channel { * struct ice_tx_tstamp - Tracking for a single Tx timestamp * @skb: pointer to the SKB for this timestamp request * @start: jiffies when the timestamp was first requested + * @cached_tstamp: last read timestamp * * This structure tracks a single timestamp request. The SKB pointer is * provided when initiating a request. The start time is used to ensure that * we discard old requests that were not fulfilled within a 2 second time * window. + * Timestamp values in the PHY are read only and do not get cleared except at + * hardware reset or when a new timestamp value is captured. The cached_tstamp + * field is used to detect the case where a new timestamp has not yet been + * captured, ensuring that we avoid sending stale timestamp data to the stack. */ struct ice_tx_tstamp { struct sk_buff *skb; unsigned long start; + u64 cached_tstamp; }; /** From 6c3118c32129b4197999a8928ba776bcabd0f5c4 Mon Sep 17 00:00:00 2001 From: "Chang S. Bae" Date: Fri, 10 Dec 2021 14:55:03 -0800 Subject: [PATCH 506/868] signal: Skip the altstack update when not needed == Background == Support for large, "dynamic" fpstates was recently merged. This included code to ensure that sigaltstacks are sufficiently sized for these large states. A new lock was added to remove races between enabling large features and setting up sigaltstacks. == Problem == The new lock (sigaltstack_lock()) is acquired in the sigreturn path before restoring the old sigaltstack. Unfortunately, contention on the new lock causes a measurable signal handling performance regression [1]. However, the common case is that no *changes* are made to the sigaltstack state at sigreturn. == Solution == do_sigaltstack() acquires sigaltstack_lock() and is used for both sys_sigaltstack() and restoring the sigaltstack in sys_sigreturn(). Check for changes to the sigaltstack before taking the lock. If no changes were made, return before acquiring the lock. This removes lock contention from the common-case sigreturn path. [1] https://lore.kernel.org/lkml/20211207012128.GA16074@xsang-OptiPlex-9020/ Fixes: 3aac3ebea08f ("x86/signal: Implement sigaltstack size validation") Reported-by: kernel test robot Signed-off-by: Chang S. Bae Signed-off-by: Dave Hansen Reviewed-by: Thomas Gleixner Link: https://lkml.kernel.org/r/20211210225503.12734-1-chang.seok.bae@intel.com --- kernel/signal.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/kernel/signal.c b/kernel/signal.c index a629b11bf3e0..dfcee3888b00 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -4185,6 +4185,15 @@ do_sigaltstack (const stack_t *ss, stack_t *oss, unsigned long sp, ss_mode != 0)) return -EINVAL; + /* + * Return before taking any locks if no actual + * sigaltstack changes were requested. + */ + if (t->sas_ss_sp == (unsigned long)ss_sp && + t->sas_ss_size == ss_size && + t->sas_ss_flags == ss_flags) + return 0; + sigaltstack_lock(); if (ss_mode == SS_DISABLE) { ss_size = 0; From f3a8076eb28cae1553958c629aecec479394bbe2 Mon Sep 17 00:00:00 2001 From: Le Ma Date: Sat, 4 Dec 2021 18:59:08 +0800 Subject: [PATCH 507/868] drm/amdgpu: correct register access for RLC_JUMP_TABLE_RESTORE should count on GC IP base address Signed-off-by: Le Ma Signed-off-by: Hawking Zhang Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index b305fd39874f..edb3e3b08eed 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -3070,8 +3070,8 @@ static void gfx_v9_0_init_pg(struct amdgpu_device *adev) AMD_PG_SUPPORT_CP | AMD_PG_SUPPORT_GDS | AMD_PG_SUPPORT_RLC_SMU_HS)) { - WREG32(mmRLC_JUMP_TABLE_RESTORE, - adev->gfx.rlc.cp_table_gpu_addr >> 8); + WREG32_SOC15(GC, 0, mmRLC_JUMP_TABLE_RESTORE, + adev->gfx.rlc.cp_table_gpu_addr >> 8); gfx_v9_0_init_gfx_power_gating(adev); } } From 841933d5b8aa853abe68e63827f68f50fab37226 Mon Sep 17 00:00:00 2001 From: Hawking Zhang Date: Sat, 4 Dec 2021 19:22:12 +0800 Subject: [PATCH 508/868] drm/amdgpu: don't override default ECO_BITs setting Leave this bit as hardware default setting Signed-off-by: Hawking Zhang Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c | 1 - drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c | 1 - drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c | 1 - drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c | 1 - drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c | 1 - drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c | 1 - drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c | 1 - drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c | 2 -- 8 files changed, 9 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c index 480e41847d7c..ec4d5e15b766 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c @@ -162,7 +162,6 @@ static void gfxhub_v1_0_init_tlb_regs(struct amdgpu_device *adev) ENABLE_ADVANCED_DRIVER_MODEL, 1); tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, SYSTEM_APERTURE_UNMAPPED_ACCESS, 0); - tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ECO_BITS, 0); tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, MTYPE, MTYPE_UC);/* XXX for emulation. */ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ATC_EN, 1); diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c index 14c1c1a297dd..6e0ace2fbfab 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c @@ -196,7 +196,6 @@ static void gfxhub_v2_0_init_tlb_regs(struct amdgpu_device *adev) ENABLE_ADVANCED_DRIVER_MODEL, 1); tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, SYSTEM_APERTURE_UNMAPPED_ACCESS, 0); - tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, ECO_BITS, 0); tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, MTYPE, MTYPE_UC); /* UC, uncached */ diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c index e80d1dc43079..b4eddf6e98a6 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c @@ -197,7 +197,6 @@ static void gfxhub_v2_1_init_tlb_regs(struct amdgpu_device *adev) ENABLE_ADVANCED_DRIVER_MODEL, 1); tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, SYSTEM_APERTURE_UNMAPPED_ACCESS, 0); - tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, ECO_BITS, 0); tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, MTYPE, MTYPE_UC); /* UC, uncached */ diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c index a99953833820..b3bede1dc41d 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c @@ -145,7 +145,6 @@ static void mmhub_v1_0_init_tlb_regs(struct amdgpu_device *adev) ENABLE_ADVANCED_DRIVER_MODEL, 1); tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, SYSTEM_APERTURE_UNMAPPED_ACCESS, 0); - tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ECO_BITS, 0); tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, MTYPE, MTYPE_UC);/* XXX for emulation. */ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ATC_EN, 1); diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c index f80a14a1b82d..f5f7181f9af5 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c @@ -165,7 +165,6 @@ static void mmhub_v1_7_init_tlb_regs(struct amdgpu_device *adev) ENABLE_ADVANCED_DRIVER_MODEL, 1); tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, SYSTEM_APERTURE_UNMAPPED_ACCESS, 0); - tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ECO_BITS, 0); tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, MTYPE, MTYPE_UC);/* XXX for emulation. */ tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ATC_EN, 1); diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c index 25f8e93e5ec3..3718ff610ab2 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c @@ -267,7 +267,6 @@ static void mmhub_v2_0_init_tlb_regs(struct amdgpu_device *adev) ENABLE_ADVANCED_DRIVER_MODEL, 1); tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, SYSTEM_APERTURE_UNMAPPED_ACCESS, 0); - tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, ECO_BITS, 0); tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, MTYPE, MTYPE_UC); /* UC, uncached */ diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c index a11d60ec6321..9e16da28505a 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c @@ -194,7 +194,6 @@ static void mmhub_v2_3_init_tlb_regs(struct amdgpu_device *adev) ENABLE_ADVANCED_DRIVER_MODEL, 1); tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, SYSTEM_APERTURE_UNMAPPED_ACCESS, 0); - tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, ECO_BITS, 0); tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, MTYPE, MTYPE_UC); /* UC, uncached */ diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c index c4ef822bbe8c..ff49eeaf7882 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c @@ -189,8 +189,6 @@ static void mmhub_v9_4_init_tlb_regs(struct amdgpu_device *adev, int hubid) ENABLE_ADVANCED_DRIVER_MODEL, 1); tmp = REG_SET_FIELD(tmp, VMSHAREDVC0_MC_VM_MX_L1_TLB_CNTL, SYSTEM_APERTURE_UNMAPPED_ACCESS, 0); - tmp = REG_SET_FIELD(tmp, VMSHAREDVC0_MC_VM_MX_L1_TLB_CNTL, - ECO_BITS, 0); tmp = REG_SET_FIELD(tmp, VMSHAREDVC0_MC_VM_MX_L1_TLB_CNTL, MTYPE, MTYPE_UC);/* XXX for emulation. */ tmp = REG_SET_FIELD(tmp, VMSHAREDVC0_MC_VM_MX_L1_TLB_CNTL, From dcd10d879a9d1d4e929d374c2f24aba8fac3252b Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Thu, 9 Dec 2021 12:13:53 -0600 Subject: [PATCH 509/868] drm/amd/pm: fix reading SMU FW version from amdgpu_firmware_info on YC This value does not get cached into adev->pm.fw_version during startup for smu13 like it does for other SMU like smu12. Signed-off-by: Mario Limonciello Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c index 35145db6eedf..19a5d2c39c8d 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c @@ -198,6 +198,7 @@ int smu_v13_0_check_fw_status(struct smu_context *smu) int smu_v13_0_check_fw_version(struct smu_context *smu) { + struct amdgpu_device *adev = smu->adev; uint32_t if_version = 0xff, smu_version = 0xff; uint16_t smu_major; uint8_t smu_minor, smu_debug; @@ -210,6 +211,8 @@ int smu_v13_0_check_fw_version(struct smu_context *smu) smu_major = (smu_version >> 16) & 0xffff; smu_minor = (smu_version >> 8) & 0xff; smu_debug = (smu_version >> 0) & 0xff; + if (smu->is_apu) + adev->pm.fw_version = smu_version; switch (smu->adev->ip_versions[MP1_HWIP][0]) { case IP_VERSION(13, 0, 2): From 7e4d2f30df3fb48f75ce9e96867d42bdddab83ac Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Fri, 10 Dec 2021 15:03:59 -0800 Subject: [PATCH 510/868] drm/amd/display: Set exit_optimized_pwr_state for DCN31 [Why] SMU now respects the PHY refclk disable request from driver. This causes a hang during hotplug when PHY refclk was disabled because it's not being re-enabled and the transmitter control starts on dc_link_detect. [How] We normally would re-enable the clk with exit_optimized_pwr_state but this is only set on DCN21 and DCN301. Set it for dcn31 as well. This fixes DMCUB timeouts in the PHY. Fixes: 64b1d0e8d500 ("drm/amd/display: Add DCN3.1 HWSEQ") Reviewed-by: Eric Yang Acked-by: Pavle Kotarac Tested-by: Daniel Wheeler Signed-off-by: Nicholas Kazlauskas Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c index 05335a8c3c2d..4f6e639e9353 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c @@ -101,6 +101,7 @@ static const struct hw_sequencer_funcs dcn31_funcs = { .z10_restore = dcn31_z10_restore, .z10_save_init = dcn31_z10_save_init, .set_disp_pattern_generator = dcn30_set_disp_pattern_generator, + .exit_optimized_pwr_state = dcn21_exit_optimized_pwr_state, .update_visual_confirm_color = dcn20_update_visual_confirm_color, }; From 791255ca9fbe38042cfd55df5deb116dc11fef18 Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Fri, 10 Dec 2021 15:04:05 -0800 Subject: [PATCH 511/868] drm/amd/display: Reset DMCUB before HW init [Why] If the firmware wasn't reset by PSP or HW and is currently running then the firmware will hang or perform underfined behavior when we modify its firmware state underneath it. [How] Reset DMCUB before setting up cache windows and performing HW init. Reviewed-by: Aurabindo Jayamohanan Pillai Acked-by: Pavle Kotarac Tested-by: Daniel Wheeler Signed-off-by: Nicholas Kazlauskas Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 122dae1a1813..e727f1dd2a9a 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -1051,6 +1051,11 @@ static int dm_dmub_hw_init(struct amdgpu_device *adev) return 0; } + /* Reset DMCUB if it was previously running - before we overwrite its memory. */ + status = dmub_srv_hw_reset(dmub_srv); + if (status != DMUB_STATUS_OK) + DRM_WARN("Error resetting DMUB HW: %d\n", status); + hdr = (const struct dmcub_firmware_header_v1_0 *)dmub_fw->data; fw_inst_const = dmub_fw->data + From 17c65d6fca844ee72a651944d8ce721e9040bf70 Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Mon, 13 Dec 2021 14:38:38 +0800 Subject: [PATCH 512/868] drm/amdgpu: correct the wrong cached state for GMC on PICASSO Pair the operations did in GMC ->hw_init and ->hw_fini. That can help to maintain correct cached state for GMC and avoid unintention gate operation dropping due to wrong cached state. BugLink: https://gitlab.freedesktop.org/drm/amd/-/issues/1828 Signed-off-by: Evan Quan Acked-by: Guchun Chen Reviewed-by: Mario Limonciello Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 8 ++++++++ drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c | 8 ++++---- drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c | 7 ++++++- 3 files changed, 18 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index cb82404df534..d84523cf5f75 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -1808,6 +1808,14 @@ static int gmc_v9_0_hw_fini(void *handle) return 0; } + /* + * Pair the operations did in gmc_v9_0_hw_init and thus maintain + * a correct cached state for GMC. Otherwise, the "gate" again + * operation on S3 resuming will fail due to wrong cached state. + */ + if (adev->mmhub.funcs->update_power_gating) + adev->mmhub.funcs->update_power_gating(adev, false); + amdgpu_irq_put(adev, &adev->gmc.ecc_irq, 0); amdgpu_irq_put(adev, &adev->gmc.vm_fault, 0); diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c index b3bede1dc41d..1da2ec692057 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c @@ -301,10 +301,10 @@ static void mmhub_v1_0_update_power_gating(struct amdgpu_device *adev, if (amdgpu_sriov_vf(adev)) return; - if (enable && adev->pg_flags & AMD_PG_SUPPORT_MMHUB) { - amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GMC, true); - - } + if (adev->pg_flags & AMD_PG_SUPPORT_MMHUB) + amdgpu_dpm_set_powergating_by_smu(adev, + AMD_IP_BLOCK_TYPE_GMC, + enable); } static int mmhub_v1_0_gart_enable(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c b/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c index 8d796ed3b7d1..619f8d305292 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c +++ b/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c @@ -1328,7 +1328,12 @@ static int pp_set_powergating_by_smu(void *handle, pp_dpm_powergate_vce(handle, gate); break; case AMD_IP_BLOCK_TYPE_GMC: - pp_dpm_powergate_mmhub(handle); + /* + * For now, this is only used on PICASSO. + * And only "gate" operation is supported. + */ + if (gate) + pp_dpm_powergate_mmhub(handle); break; case AMD_IP_BLOCK_TYPE_GFX: ret = pp_dpm_powergate_gfx(handle, gate); From aa464957f7e660abd554f2546a588f6533720e21 Mon Sep 17 00:00:00 2001 From: Lang Yu Date: Tue, 14 Dec 2021 15:25:54 +0800 Subject: [PATCH 513/868] drm/amd/pm: fix a potential gpu_metrics_table memory leak Memory is allocated for gpu_metrics_table in renoir_init_smc_tables(), but not freed in int smu_v12_0_fini_smc_tables(). Free it! Fixes: 95868b85764a ("drm/amd/powerplay: add Renoir support for gpu metrics export") Signed-off-by: Lang Yu Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/swsmu/smu12/smu_v12_0.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu12/smu_v12_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu12/smu_v12_0.c index d60b8c5e8715..43028f2cd28b 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu12/smu_v12_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu12/smu_v12_0.c @@ -191,6 +191,9 @@ int smu_v12_0_fini_smc_tables(struct smu_context *smu) kfree(smu_table->watermarks_table); smu_table->watermarks_table = NULL; + kfree(smu_table->gpu_metrics_table); + smu_table->gpu_metrics_table = NULL; + return 0; } From 4ad8181426df92976feee5fbc55236293d069b37 Mon Sep 17 00:00:00 2001 From: Yangyang Li Date: Thu, 9 Dec 2021 22:06:55 +0800 Subject: [PATCH 514/868] RDMA/hns: Fix RNR retransmission issue for HIP08 Due to the discrete nature of the HIP08 timer unit, a requester might finish the timeout period sooner, in elapsed real time, than its responder does, even when both sides share the identical RNR timeout length included in the RNR Nak packet and the responder indeed starts the timing prior to the requester. Furthermore, if a 'providential' resend packet arrived before the responder's timeout period expired, the responder is certainly entitled to drop the packet silently in the light of IB protocol. To address this problem, our team made good use of certain hardware facts: 1) The timing resolution regards the transmission arrangements is 1 microsecond, e.g. if cq_period field is set to 3, it would be interpreted as 3 microsecond by hardware 2) A QPC field shall inform the hardware how many timing unit (ticks) constitutes a full microsecond, which, by default, is 1000 3) It takes 14ns for the processor to handle a packet in the buffer, so the RNR timeout length of 10ns would ensure our processing mechanism is disabled during the entire timeout period and the packet won't be dropped silently To achieve (3), we permanently set the QPC field mentioned in (2) to zero which nominally indicates every time tick is equivalent to a microsecond in wall-clock time; now, a RNR timeout period at face value of 10 would only last 10 ticks, which is 10ns in wall-clock time. It's worth noting that we adapt the driver by magnifying certain configuration parameters(cq_period, eq_period and ack_timeout)by 1000 given the user assumes the configuring timing unit to be microseconds. Also, this particular improvisation is only deployed on HIP08 since other hardware has already solved this issue. Fixes: cfc85f3e4b7f ("RDMA/hns: Add profile support for hip08 driver") Link: https://lore.kernel.org/r/20211209140655.49493-1-liangwenpeng@huawei.com Signed-off-by: Yangyang Li Signed-off-by: Wenpeng Liang Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 64 +++++++++++++++++++--- drivers/infiniband/hw/hns/hns_roce_hw_v2.h | 8 +++ 2 files changed, 65 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index bbfa1332dedc..eb0defa80d0d 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -1594,11 +1594,17 @@ static int hns_roce_config_global_param(struct hns_roce_dev *hr_dev) { struct hns_roce_cmq_desc desc; struct hns_roce_cmq_req *req = (struct hns_roce_cmq_req *)desc.data; + u32 clock_cycles_of_1us; hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_CFG_GLOBAL_PARAM, false); - hr_reg_write(req, CFG_GLOBAL_PARAM_1US_CYCLES, 0x3e8); + if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08) + clock_cycles_of_1us = HNS_ROCE_1NS_CFG; + else + clock_cycles_of_1us = HNS_ROCE_1US_CFG; + + hr_reg_write(req, CFG_GLOBAL_PARAM_1US_CYCLES, clock_cycles_of_1us); hr_reg_write(req, CFG_GLOBAL_PARAM_UDP_PORT, ROCE_V2_UDP_DPORT); return hns_roce_cmq_send(hr_dev, &desc, 1); @@ -4802,6 +4808,30 @@ static int hns_roce_v2_set_abs_fields(struct ib_qp *ibqp, return ret; } +static bool check_qp_timeout_cfg_range(struct hns_roce_dev *hr_dev, u8 *timeout) +{ +#define QP_ACK_TIMEOUT_MAX_HIP08 20 +#define QP_ACK_TIMEOUT_OFFSET 10 +#define QP_ACK_TIMEOUT_MAX 31 + + if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08) { + if (*timeout > QP_ACK_TIMEOUT_MAX_HIP08) { + ibdev_warn(&hr_dev->ib_dev, + "Local ACK timeout shall be 0 to 20.\n"); + return false; + } + *timeout += QP_ACK_TIMEOUT_OFFSET; + } else if (hr_dev->pci_dev->revision > PCI_REVISION_ID_HIP08) { + if (*timeout > QP_ACK_TIMEOUT_MAX) { + ibdev_warn(&hr_dev->ib_dev, + "Local ACK timeout shall be 0 to 31.\n"); + return false; + } + } + + return true; +} + static int hns_roce_v2_set_opt_fields(struct ib_qp *ibqp, const struct ib_qp_attr *attr, int attr_mask, @@ -4811,6 +4841,7 @@ static int hns_roce_v2_set_opt_fields(struct ib_qp *ibqp, struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); int ret = 0; + u8 timeout; if (attr_mask & IB_QP_AV) { ret = hns_roce_v2_set_path(ibqp, attr, attr_mask, context, @@ -4820,12 +4851,10 @@ static int hns_roce_v2_set_opt_fields(struct ib_qp *ibqp, } if (attr_mask & IB_QP_TIMEOUT) { - if (attr->timeout < 31) { - hr_reg_write(context, QPC_AT, attr->timeout); + timeout = attr->timeout; + if (check_qp_timeout_cfg_range(hr_dev, &timeout)) { + hr_reg_write(context, QPC_AT, timeout); hr_reg_clear(qpc_mask, QPC_AT); - } else { - ibdev_warn(&hr_dev->ib_dev, - "Local ACK timeout shall be 0 to 30.\n"); } } @@ -4882,7 +4911,9 @@ static int hns_roce_v2_set_opt_fields(struct ib_qp *ibqp, set_access_flags(hr_qp, context, qpc_mask, attr, attr_mask); if (attr_mask & IB_QP_MIN_RNR_TIMER) { - hr_reg_write(context, QPC_MIN_RNR_TIME, attr->min_rnr_timer); + hr_reg_write(context, QPC_MIN_RNR_TIME, + hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08 ? + HNS_ROCE_RNR_TIMER_10NS : attr->min_rnr_timer); hr_reg_clear(qpc_mask, QPC_MIN_RNR_TIME); } @@ -5499,6 +5530,16 @@ static int hns_roce_v2_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period) hr_reg_write(cq_context, CQC_CQ_MAX_CNT, cq_count); hr_reg_clear(cqc_mask, CQC_CQ_MAX_CNT); + + if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08) { + if (cq_period * HNS_ROCE_CLOCK_ADJUST > USHRT_MAX) { + dev_info(hr_dev->dev, + "cq_period(%u) reached the upper limit, adjusted to 65.\n", + cq_period); + cq_period = HNS_ROCE_MAX_CQ_PERIOD; + } + cq_period *= HNS_ROCE_CLOCK_ADJUST; + } hr_reg_write(cq_context, CQC_CQ_PERIOD, cq_period); hr_reg_clear(cqc_mask, CQC_CQ_PERIOD); @@ -5894,6 +5935,15 @@ static int config_eqc(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq, hr_reg_write(eqc, EQC_EQ_PROD_INDX, HNS_ROCE_EQ_INIT_PROD_IDX); hr_reg_write(eqc, EQC_EQ_MAX_CNT, eq->eq_max_cnt); + if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08) { + if (eq->eq_period * HNS_ROCE_CLOCK_ADJUST > USHRT_MAX) { + dev_info(hr_dev->dev, "eq_period(%u) reached the upper limit, adjusted to 65.\n", + eq->eq_period); + eq->eq_period = HNS_ROCE_MAX_EQ_PERIOD; + } + eq->eq_period *= HNS_ROCE_CLOCK_ADJUST; + } + hr_reg_write(eqc, EQC_EQ_PERIOD, eq->eq_period); hr_reg_write(eqc, EQC_EQE_REPORT_TIMER, HNS_ROCE_EQ_INIT_REPORT_TIMER); hr_reg_write(eqc, EQC_EQE_BA_L, bt_ba >> 3); diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h index 4d904d5e82be..35c61da7ba15 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -1444,6 +1444,14 @@ struct hns_roce_dip { struct list_head node; /* all dips are on a list */ }; +/* only for RNR timeout issue of HIP08 */ +#define HNS_ROCE_CLOCK_ADJUST 1000 +#define HNS_ROCE_MAX_CQ_PERIOD 65 +#define HNS_ROCE_MAX_EQ_PERIOD 65 +#define HNS_ROCE_RNR_TIMER_10NS 1 +#define HNS_ROCE_1US_CFG 999 +#define HNS_ROCE_1NS_CFG 0 + #define HNS_ROCE_AEQ_DEFAULT_BURST_NUM 0x0 #define HNS_ROCE_AEQ_DEFAULT_INTERVAL 0x0 #define HNS_ROCE_CEQ_DEFAULT_BURST_NUM 0x0 From bee90911e0138c76ee67458ac0d58b38a3190f65 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Exp=C3=B3sito?= Date: Wed, 8 Dec 2021 18:52:38 +0100 Subject: [PATCH 515/868] IB/qib: Fix memory leak in qib_user_sdma_queue_pkts() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The wrong goto label was used for the error case and missed cleanup of the pkt allocation. Fixes: d39bf40e55e6 ("IB/qib: Protect from buffer overflow in struct qib_user_sdma_pkt fields") Link: https://lore.kernel.org/r/20211208175238.29983-1-jose.exposito89@gmail.com Addresses-Coverity-ID: 1493352 ("Resource leak") Signed-off-by: José Expósito Acked-by: Mike Marciniszyn Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/qib/qib_user_sdma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/qib/qib_user_sdma.c b/drivers/infiniband/hw/qib/qib_user_sdma.c index ac11943a5ddb..bf2f30d67949 100644 --- a/drivers/infiniband/hw/qib/qib_user_sdma.c +++ b/drivers/infiniband/hw/qib/qib_user_sdma.c @@ -941,7 +941,7 @@ static int qib_user_sdma_queue_pkts(const struct qib_devdata *dd, &addrlimit) || addrlimit > type_max(typeof(pkt->addrlimit))) { ret = -EINVAL; - goto free_pbc; + goto free_pkt; } pkt->addrlimit = addrlimit; From 12d3bbdd6bd2780b71cc466f3fbc6eb7d43bbc2a Mon Sep 17 00:00:00 2001 From: Jiacheng Shi Date: Fri, 10 Dec 2021 01:42:34 -0800 Subject: [PATCH 516/868] RDMA/hns: Replace kfree() with kvfree() Variables allocated by kvmalloc_array() should not be freed by kfree. Because they may be allocated by vmalloc. So we replace kfree() with kvfree() here. Fixes: 6fd610c5733d ("RDMA/hns: Support 0 hop addressing for SRQ buffer") Link: https://lore.kernel.org/r/20211210094234.5829-1-billsjc@sjtu.edu.cn Signed-off-by: Jiacheng Shi Acked-by: Wenpeng Liang Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_srq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_srq.c b/drivers/infiniband/hw/hns/hns_roce_srq.c index 6eee9deadd12..e64ef6903fb4 100644 --- a/drivers/infiniband/hw/hns/hns_roce_srq.c +++ b/drivers/infiniband/hw/hns/hns_roce_srq.c @@ -259,7 +259,7 @@ static int alloc_srq_wrid(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq) static void free_srq_wrid(struct hns_roce_srq *srq) { - kfree(srq->wrid); + kvfree(srq->wrid); srq->wrid = NULL; } From 404cd9a22150f24acf23a8df2ad0c094ba379f57 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 14 Dec 2021 15:16:01 -0800 Subject: [PATCH 517/868] mptcp: remove tcp ulp setsockopt support TCP_ULP setsockopt cannot be used for mptcp because its already used internally to plumb subflow (tcp) sockets to the mptcp layer. syzbot managed to trigger a crash for mptcp connections that are in fallback mode: KASAN: null-ptr-deref in range [0x0000000000000020-0x0000000000000027] CPU: 1 PID: 1083 Comm: syz-executor.3 Not tainted 5.16.0-rc2-syzkaller #0 RIP: 0010:tls_build_proto net/tls/tls_main.c:776 [inline] [..] __tcp_set_ulp net/ipv4/tcp_ulp.c:139 [inline] tcp_set_ulp+0x428/0x4c0 net/ipv4/tcp_ulp.c:160 do_tcp_setsockopt+0x455/0x37c0 net/ipv4/tcp.c:3391 mptcp_setsockopt+0x1b47/0x2400 net/mptcp/sockopt.c:638 Remove support for TCP_ULP setsockopt. Fixes: d9e4c1291810 ("mptcp: only admit explicitly supported sockopt") Reported-by: syzbot+1fd9b69cde42967d1add@syzkaller.appspotmail.com Signed-off-by: Florian Westphal Signed-off-by: Mat Martineau Signed-off-by: Jakub Kicinski --- net/mptcp/sockopt.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c index 0f1e661c2032..f8efd478ac97 100644 --- a/net/mptcp/sockopt.c +++ b/net/mptcp/sockopt.c @@ -525,7 +525,6 @@ static bool mptcp_supported_sockopt(int level, int optname) case TCP_NODELAY: case TCP_THIN_LINEAR_TIMEOUTS: case TCP_CONGESTION: - case TCP_ULP: case TCP_CORK: case TCP_KEEPIDLE: case TCP_KEEPINTVL: From d6692b3b97bdc165d150f4c1505751a323a80717 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 14 Dec 2021 15:16:02 -0800 Subject: [PATCH 518/868] mptcp: clear 'kern' flag from fallback sockets The mptcp ULP extension relies on sk->sk_sock_kern being set correctly: It prevents setsockopt(fd, IPPROTO_TCP, TCP_ULP, "mptcp", 6); from working for plain tcp sockets (any userspace-exposed socket). But in case of fallback, accept() can return a plain tcp sk. In such case, sk is still tagged as 'kernel' and setsockopt will work. This will crash the kernel, The subflow extension has a NULL ctx->conn mptcp socket: BUG: KASAN: null-ptr-deref in subflow_data_ready+0x181/0x2b0 Call Trace: tcp_data_ready+0xf8/0x370 [..] Fixes: cf7da0d66cc1 ("mptcp: Create SUBFLOW socket for incoming connections") Signed-off-by: Florian Westphal Signed-off-by: Mat Martineau Signed-off-by: Jakub Kicinski --- net/mptcp/protocol.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index c82a76d2d0bf..6dc1ff07994c 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -2879,7 +2879,7 @@ static struct sock *mptcp_accept(struct sock *sk, int flags, int *err, */ if (WARN_ON_ONCE(!new_mptcp_sock)) { tcp_sk(newsk)->is_mptcp = 0; - return newsk; + goto out; } /* acquire the 2nd reference for the owning socket */ @@ -2891,6 +2891,8 @@ static struct sock *mptcp_accept(struct sock *sk, int flags, int *err, MPTCP_MIB_MPCAPABLEPASSIVEFALLBACK); } +out: + newsk->sk_kern_sock = kern; return newsk; } From 3d79e3756ca90f7a6087b77b62c1d9c0801e0820 Mon Sep 17 00:00:00 2001 From: Maxim Galaganov Date: Tue, 14 Dec 2021 15:16:03 -0800 Subject: [PATCH 519/868] mptcp: fix deadlock in __mptcp_push_pending() __mptcp_push_pending() may call mptcp_flush_join_list() with subflow socket lock held. If such call hits mptcp_sockopt_sync_all() then subsequently __mptcp_sockopt_sync() could try to lock the subflow socket for itself, causing a deadlock. sysrq: Show Blocked State task:ss-server state:D stack: 0 pid: 938 ppid: 1 flags:0x00000000 Call Trace: __schedule+0x2d6/0x10c0 ? __mod_memcg_state+0x4d/0x70 ? csum_partial+0xd/0x20 ? _raw_spin_lock_irqsave+0x26/0x50 schedule+0x4e/0xc0 __lock_sock+0x69/0x90 ? do_wait_intr_irq+0xa0/0xa0 __lock_sock_fast+0x35/0x50 mptcp_sockopt_sync_all+0x38/0xc0 __mptcp_push_pending+0x105/0x200 mptcp_sendmsg+0x466/0x490 sock_sendmsg+0x57/0x60 __sys_sendto+0xf0/0x160 ? do_wait_intr_irq+0xa0/0xa0 ? fpregs_restore_userregs+0x12/0xd0 __x64_sys_sendto+0x20/0x30 do_syscall_64+0x38/0x90 entry_SYSCALL_64_after_hwframe+0x44/0xae RIP: 0033:0x7f9ba546c2d0 RSP: 002b:00007ffdc3b762d8 EFLAGS: 00000246 ORIG_RAX: 000000000000002c RAX: ffffffffffffffda RBX: 00007f9ba56c8060 RCX: 00007f9ba546c2d0 RDX: 000000000000077a RSI: 0000000000e5e180 RDI: 0000000000000234 RBP: 0000000000cc57f0 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 00007f9ba56c8060 R13: 0000000000b6ba60 R14: 0000000000cc7840 R15: 41d8685b1d7901b8 Fix the issue by using __mptcp_flush_join_list() instead of plain mptcp_flush_join_list() inside __mptcp_push_pending(), as suggested by Florian. The sockopt sync will be deferred to the workqueue. Fixes: 1b3e7ede1365 ("mptcp: setsockopt: handle SO_KEEPALIVE and SO_PRIORITY") Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/244 Suggested-by: Florian Westphal Reviewed-by: Florian Westphal Signed-off-by: Maxim Galaganov Signed-off-by: Mat Martineau Signed-off-by: Jakub Kicinski --- net/mptcp/protocol.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 6dc1ff07994c..54613f5b7521 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -1524,7 +1524,7 @@ void __mptcp_push_pending(struct sock *sk, unsigned int flags) int ret = 0; prev_ssk = ssk; - mptcp_flush_join_list(msk); + __mptcp_flush_join_list(msk); ssk = mptcp_subflow_get_send(msk); /* First check. If the ssk has changed since From 6813b1928758ce64fabbb8ef157f994b7c2235fa Mon Sep 17 00:00:00 2001 From: Matthieu Baerts Date: Tue, 14 Dec 2021 15:16:04 -0800 Subject: [PATCH 520/868] mptcp: add missing documented NL params 'loc_id' and 'rem_id' are set in all events linked to subflows but those were missing in the events description in the comments. Fixes: b911c97c7dc7 ("mptcp: add netlink event support") Signed-off-by: Matthieu Baerts Signed-off-by: Mat Martineau Signed-off-by: Jakub Kicinski --- include/uapi/linux/mptcp.h | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/include/uapi/linux/mptcp.h b/include/uapi/linux/mptcp.h index c8cc46f80a16..f106a3941cdf 100644 --- a/include/uapi/linux/mptcp.h +++ b/include/uapi/linux/mptcp.h @@ -136,19 +136,21 @@ struct mptcp_info { * MPTCP_EVENT_REMOVED: token, rem_id * An address has been lost by the peer. * - * MPTCP_EVENT_SUB_ESTABLISHED: token, family, saddr4 | saddr6, - * daddr4 | daddr6, sport, dport, backup, - * if_idx [, error] + * MPTCP_EVENT_SUB_ESTABLISHED: token, family, loc_id, rem_id, + * saddr4 | saddr6, daddr4 | daddr6, sport, + * dport, backup, if_idx [, error] * A new subflow has been established. 'error' should not be set. * - * MPTCP_EVENT_SUB_CLOSED: token, family, saddr4 | saddr6, daddr4 | daddr6, - * sport, dport, backup, if_idx [, error] + * MPTCP_EVENT_SUB_CLOSED: token, family, loc_id, rem_id, saddr4 | saddr6, + * daddr4 | daddr6, sport, dport, backup, if_idx + * [, error] * A subflow has been closed. An error (copy of sk_err) could be set if an * error has been detected for this subflow. * - * MPTCP_EVENT_SUB_PRIORITY: token, family, saddr4 | saddr6, daddr4 | daddr6, - * sport, dport, backup, if_idx [, error] - * The priority of a subflow has changed. 'error' should not be set. + * MPTCP_EVENT_SUB_PRIORITY: token, family, loc_id, rem_id, saddr4 | saddr6, + * daddr4 | daddr6, sport, dport, backup, if_idx + * [, error] + * The priority of a subflow has changed. 'error' should not be set. */ enum mptcp_event_type { MPTCP_EVENT_UNSPEC = 0, From cb2ac2912a9ca7d3d26291c511939a41361d2d83 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 14 Dec 2021 07:03:24 -0700 Subject: [PATCH 521/868] block: reduce kblockd_mod_delayed_work_on() CPU consumption Dexuan reports that he's seeing spikes of very heavy CPU utilization when running 24 disks and using the 'none' scheduler. This happens off the sched restart path, because SCSI requires the queue to be restarted async, and hence we're hammering on mod_delayed_work_on() to ensure that the work item gets run appropriately. Avoid hammering on the timer and just use queue_work_on() if no delay has been specified. Reported-and-tested-by: Dexuan Cui Link: https://lore.kernel.org/linux-block/BYAPR21MB1270C598ED214C0490F47400BF719@BYAPR21MB1270.namprd21.prod.outlook.com/ Reviewed-by: Ming Lei Signed-off-by: Jens Axboe --- block/blk-core.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/block/blk-core.c b/block/blk-core.c index 1378d084c770..c1833f95cb97 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1484,6 +1484,8 @@ EXPORT_SYMBOL(kblockd_schedule_work); int kblockd_mod_delayed_work_on(int cpu, struct delayed_work *dwork, unsigned long delay) { + if (!delay) + return queue_work_on(cpu, kblockd_workqueue, &dwork->work); return mod_delayed_work_on(cpu, kblockd_workqueue, dwork, delay); } EXPORT_SYMBOL(kblockd_mod_delayed_work_on); From aa97f6cdb7e92909e17c8ca63e622fcb81d57a57 Mon Sep 17 00:00:00 2001 From: Lin Feng Date: Fri, 12 Nov 2021 13:36:29 +0800 Subject: [PATCH 522/868] bcache: fix NULL pointer reference in cached_dev_detach_finish Commit 0259d4498ba4 ("bcache: move calc_cached_dev_sectors to proper place on backing device detach") tries to fix calc_cached_dev_sectors when bcache device detaches, but now we have: cached_dev_detach_finish ... bcache_device_detach(&dc->disk); ... closure_put(&d->c->caching); d->c = NULL; [*explicitly set dc->disk.c to NULL*] list_move(&dc->list, &uncached_devices); calc_cached_dev_sectors(dc->disk.c); [*passing a NULL pointer*] ... Upper codeflows shows how bug happens, this patch fix the problem by caching dc->disk.c beforehand, and cache_set won't be freed under us because c->caching closure at least holds a reference count and closure callback __cache_set_unregister only being called by bch_cache_set_stop which using closure_queue(&c->caching), that means c->caching closure callback for destroying cache_set won't be trigger by previous closure_put(&d->c->caching). So at this stage(while cached_dev_detach_finish is calling) it's safe to access cache_set dc->disk.c. Fixes: 0259d4498ba4 ("bcache: move calc_cached_dev_sectors to proper place on backing device detach") Signed-off-by: Lin Feng Signed-off-by: Coly Li Link: https://lore.kernel.org/r/20211112053629.3437-2-colyli@suse.de Signed-off-by: Jens Axboe --- drivers/md/bcache/super.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 86b9e355c583..140f35dc0c45 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -1139,6 +1139,7 @@ static void cancel_writeback_rate_update_dwork(struct cached_dev *dc) static void cached_dev_detach_finish(struct work_struct *w) { struct cached_dev *dc = container_of(w, struct cached_dev, detach); + struct cache_set *c = dc->disk.c; BUG_ON(!test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags)); BUG_ON(refcount_read(&dc->count)); @@ -1156,7 +1157,7 @@ static void cached_dev_detach_finish(struct work_struct *w) bcache_device_detach(&dc->disk); list_move(&dc->list, &uncached_devices); - calc_cached_dev_sectors(dc->disk.c); + calc_cached_dev_sectors(c); clear_bit(BCACHE_DEV_DETACHING, &dc->disk.flags); clear_bit(BCACHE_DEV_UNLINK_DONE, &dc->disk.flags); From 7d3baf0afa3aa9102d6a521a8e4c41888bb79882 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 7 Dec 2021 12:51:56 +0000 Subject: [PATCH 523/868] bpf: Fix kernel address leakage in atomic fetch The change in commit 37086bfdc737 ("bpf: Propagate stack bounds to registers in atomics w/ BPF_FETCH") around check_mem_access() handling is buggy since this would allow for unprivileged users to leak kernel pointers. For example, an atomic fetch/and with -1 on a stack destination which holds a spilled pointer will migrate the spilled register type into a scalar, which can then be exported out of the program (since scalar != pointer) by dumping it into a map value. The original implementation of XADD was preventing this situation by using a double call to check_mem_access() one with BPF_READ and a subsequent one with BPF_WRITE, in both cases passing -1 as a placeholder value instead of register as per XADD semantics since it didn't contain a value fetch. The BPF_READ also included a check in check_stack_read_fixed_off() which rejects the program if the stack slot is of __is_pointer_value() if dst_regno < 0. The latter is to distinguish whether we're dealing with a regular stack spill/ fill or some arithmetical operation which is disallowed on non-scalars, see also 6e7e63cbb023 ("bpf: Forbid XADD on spilled pointers for unprivileged users") for more context on check_mem_access() and its handling of placeholder value -1. One minimally intrusive option to fix the leak is for the BPF_FETCH case to initially check the BPF_READ case via check_mem_access() with -1 as register, followed by the actual load case with non-negative load_reg to propagate stack bounds to registers. Fixes: 37086bfdc737 ("bpf: Propagate stack bounds to registers in atomics w/ BPF_FETCH") Reported-by: Acked-by: Brendan Jackman Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index f2f1ed34cfe9..53d39db3b0fa 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -4584,13 +4584,19 @@ static int check_atomic(struct bpf_verifier_env *env, int insn_idx, struct bpf_i load_reg = -1; } - /* check whether we can read the memory */ + /* Check whether we can read the memory, with second call for fetch + * case to simulate the register fill. + */ err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off, - BPF_SIZE(insn->code), BPF_READ, load_reg, true); + BPF_SIZE(insn->code), BPF_READ, -1, true); + if (!err && load_reg >= 0) + err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off, + BPF_SIZE(insn->code), BPF_READ, load_reg, + true); if (err) return err; - /* check whether we can write into the same memory */ + /* Check whether we can write into the same memory. */ err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off, BPF_SIZE(insn->code), BPF_WRITE, -1, true); if (err) From 180486b430f4e22cc00a478163d942804baae4b5 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 7 Dec 2021 10:07:04 +0000 Subject: [PATCH 524/868] bpf, selftests: Add test case for atomic fetch on spilled pointer Test whether unprivileged would be able to leak the spilled pointer either by exporting the returned value from the atomic{32,64} operation or by reading and exporting the value from the stack after the atomic operation took place. Note that for unprivileged, the below atomic cmpxchg test case named "Dest pointer in r0 - succeed" is failing. The reason is that in the dst memory location (r10 -8) there is the spilled register r10: 0: R1=ctx(id=0,off=0,imm=0) R10=fp0 0: (bf) r0 = r10 1: R0_w=fp0 R1=ctx(id=0,off=0,imm=0) R10=fp0 1: (7b) *(u64 *)(r10 -8) = r0 2: R0_w=fp0 R1=ctx(id=0,off=0,imm=0) R10=fp0 fp-8_w=fp 2: (b7) r1 = 0 3: R0_w=fp0 R1_w=invP0 R10=fp0 fp-8_w=fp 3: (db) r0 = atomic64_cmpxchg((u64 *)(r10 -8), r0, r1) 4: R0_w=fp0 R1_w=invP0 R10=fp0 fp-8_w=mmmmmmmm 4: (79) r1 = *(u64 *)(r0 -8) 5: R0_w=fp0 R1_w=invP(id=0) R10=fp0 fp-8_w=mmmmmmmm 5: (b7) r0 = 0 6: R0_w=invP0 R1_w=invP(id=0) R10=fp0 fp-8_w=mmmmmmmm 6: (95) exit However, allowing this case for unprivileged is a bit useless given an update with a new pointer will fail anyway: 0: R1=ctx(id=0,off=0,imm=0) R10=fp0 0: (bf) r0 = r10 1: R0_w=fp0 R1=ctx(id=0,off=0,imm=0) R10=fp0 1: (7b) *(u64 *)(r10 -8) = r0 2: R0_w=fp0 R1=ctx(id=0,off=0,imm=0) R10=fp0 fp-8_w=fp 2: (db) r0 = atomic64_cmpxchg((u64 *)(r10 -8), r0, r10) R10 leaks addr into mem Acked-by: Brendan Jackman Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/verifier/atomic_cmpxchg.c | 23 +++++ .../selftests/bpf/verifier/atomic_fetch.c | 94 +++++++++++++++++++ 2 files changed, 117 insertions(+) diff --git a/tools/testing/selftests/bpf/verifier/atomic_cmpxchg.c b/tools/testing/selftests/bpf/verifier/atomic_cmpxchg.c index c22dc83a41fd..0ffc69f602af 100644 --- a/tools/testing/selftests/bpf/verifier/atomic_cmpxchg.c +++ b/tools/testing/selftests/bpf/verifier/atomic_cmpxchg.c @@ -156,4 +156,27 @@ BPF_EXIT_INSN(), }, .result = ACCEPT, + .result_unpriv = REJECT, + .errstr_unpriv = "leaking pointer from stack off -8", +}, +{ + "Dest pointer in r0 - succeed, check 2", + .insns = { + /* r0 = &val */ + BPF_MOV64_REG(BPF_REG_0, BPF_REG_10), + /* val = r0; */ + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8), + /* r5 = &val */ + BPF_MOV64_REG(BPF_REG_5, BPF_REG_10), + /* r0 = atomic_cmpxchg(&val, r0, r5); */ + BPF_ATOMIC_OP(BPF_DW, BPF_CMPXCHG, BPF_REG_10, BPF_REG_5, -8), + /* r1 = *r0 */ + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, -8), + /* exit(0); */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .result_unpriv = REJECT, + .errstr_unpriv = "R5 leaks addr into mem", }, diff --git a/tools/testing/selftests/bpf/verifier/atomic_fetch.c b/tools/testing/selftests/bpf/verifier/atomic_fetch.c index 3bc9ff7a860b..5bf03fb4fa2b 100644 --- a/tools/testing/selftests/bpf/verifier/atomic_fetch.c +++ b/tools/testing/selftests/bpf/verifier/atomic_fetch.c @@ -1,3 +1,97 @@ +{ + "atomic dw/fetch and address leakage of (map ptr & -1) via stack slot", + .insns = { + BPF_LD_IMM64(BPF_REG_1, -1), + BPF_LD_MAP_FD(BPF_REG_8, 0), + BPF_LD_MAP_FD(BPF_REG_9, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_9, 0), + BPF_ATOMIC_OP(BPF_DW, BPF_AND | BPF_FETCH, BPF_REG_2, BPF_REG_1, 0), + BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_2, 0), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_8), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), + BPF_STX_MEM(BPF_DW, BPF_REG_0, BPF_REG_9, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 2, 4 }, + .result = ACCEPT, + .result_unpriv = REJECT, + .errstr_unpriv = "leaking pointer from stack off -8", +}, +{ + "atomic dw/fetch and address leakage of (map ptr & -1) via returned value", + .insns = { + BPF_LD_IMM64(BPF_REG_1, -1), + BPF_LD_MAP_FD(BPF_REG_8, 0), + BPF_LD_MAP_FD(BPF_REG_9, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_9, 0), + BPF_ATOMIC_OP(BPF_DW, BPF_AND | BPF_FETCH, BPF_REG_2, BPF_REG_1, 0), + BPF_MOV64_REG(BPF_REG_9, BPF_REG_1), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_8), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), + BPF_STX_MEM(BPF_DW, BPF_REG_0, BPF_REG_9, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 2, 4 }, + .result = ACCEPT, + .result_unpriv = REJECT, + .errstr_unpriv = "leaking pointer from stack off -8", +}, +{ + "atomic w/fetch and address leakage of (map ptr & -1) via stack slot", + .insns = { + BPF_LD_IMM64(BPF_REG_1, -1), + BPF_LD_MAP_FD(BPF_REG_8, 0), + BPF_LD_MAP_FD(BPF_REG_9, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_9, 0), + BPF_ATOMIC_OP(BPF_W, BPF_AND | BPF_FETCH, BPF_REG_2, BPF_REG_1, 0), + BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_2, 0), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_8), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), + BPF_STX_MEM(BPF_DW, BPF_REG_0, BPF_REG_9, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 2, 4 }, + .result = REJECT, + .errstr = "invalid size of register fill", +}, +{ + "atomic w/fetch and address leakage of (map ptr & -1) via returned value", + .insns = { + BPF_LD_IMM64(BPF_REG_1, -1), + BPF_LD_MAP_FD(BPF_REG_8, 0), + BPF_LD_MAP_FD(BPF_REG_9, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_9, 0), + BPF_ATOMIC_OP(BPF_W, BPF_AND | BPF_FETCH, BPF_REG_2, BPF_REG_1, 0), + BPF_MOV64_REG(BPF_REG_9, BPF_REG_1), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_8), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), + BPF_STX_MEM(BPF_DW, BPF_REG_0, BPF_REG_9, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 2, 4 }, + .result = REJECT, + .errstr = "invalid size of register fill", +}, #define __ATOMIC_FETCH_OP_TEST(src_reg, dst_reg, operand1, op, operand2, expect) \ { \ "atomic fetch " #op ", src=" #dst_reg " dst=" #dst_reg, \ From a82fe085f344ef20b452cd5f481010ff96b5c4cd Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 7 Dec 2021 11:02:02 +0000 Subject: [PATCH 525/868] bpf: Fix kernel address leakage in atomic cmpxchg's r0 aux reg The implementation of BPF_CMPXCHG on a high level has the following parameters: .-[old-val] .-[new-val] BPF_R0 = cmpxchg{32,64}(DST_REG + insn->off, BPF_R0, SRC_REG) `-[mem-loc] `-[old-val] Given a BPF insn can only have two registers (dst, src), the R0 is fixed and used as an auxilliary register for input (old value) as well as output (returning old value from memory location). While the verifier performs a number of safety checks, it misses to reject unprivileged programs where R0 contains a pointer as old value. Through brute-forcing it takes about ~16sec on my machine to leak a kernel pointer with BPF_CMPXCHG. The PoC is basically probing for kernel addresses by storing the guessed address into the map slot as a scalar, and using the map value pointer as R0 while SRC_REG has a canary value to detect a matching address. Fix it by checking R0 for pointers, and reject if that's the case for unprivileged programs. Fixes: 5ffa25502b5a ("bpf: Add instructions for atomic_[cmp]xchg") Reported-by: Ryota Shiga (Flatt Security) Acked-by: Brendan Jackman Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 53d39db3b0fa..2d48159b58bd 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -4547,9 +4547,16 @@ static int check_atomic(struct bpf_verifier_env *env, int insn_idx, struct bpf_i if (insn->imm == BPF_CMPXCHG) { /* Check comparison of R0 with memory location */ - err = check_reg_arg(env, BPF_REG_0, SRC_OP); + const u32 aux_reg = BPF_REG_0; + + err = check_reg_arg(env, aux_reg, SRC_OP); if (err) return err; + + if (is_pointer_value(env, aux_reg)) { + verbose(env, "R%d leaks addr into mem\n", aux_reg); + return -EACCES; + } } if (is_pointer_value(env, insn->src_reg)) { From e523102cb719cbad1673b6aa2a4d5c1fa6f13799 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 13 Dec 2021 22:25:23 +0000 Subject: [PATCH 526/868] bpf, selftests: Update test case for atomic cmpxchg on r0 with pointer Fix up unprivileged test case results for 'Dest pointer in r0' verifier tests given they now need to reject R0 containing a pointer value, and add a couple of new related ones with 32bit cmpxchg as well. root@foo:~/bpf/tools/testing/selftests/bpf# ./test_verifier #0/u invalid and of negative number OK #0/p invalid and of negative number OK [...] #1268/p XDP pkt read, pkt_meta' <= pkt_data, bad access 1 OK #1269/p XDP pkt read, pkt_meta' <= pkt_data, bad access 2 OK #1270/p XDP pkt read, pkt_data <= pkt_meta', good access OK #1271/p XDP pkt read, pkt_data <= pkt_meta', bad access 1 OK #1272/p XDP pkt read, pkt_data <= pkt_meta', bad access 2 OK Summary: 1900 PASSED, 0 SKIPPED, 0 FAILED Acked-by: Brendan Jackman Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/verifier/atomic_cmpxchg.c | 67 ++++++++++++++++++- 1 file changed, 65 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/bpf/verifier/atomic_cmpxchg.c b/tools/testing/selftests/bpf/verifier/atomic_cmpxchg.c index 0ffc69f602af..b39665f33524 100644 --- a/tools/testing/selftests/bpf/verifier/atomic_cmpxchg.c +++ b/tools/testing/selftests/bpf/verifier/atomic_cmpxchg.c @@ -138,6 +138,8 @@ BPF_EXIT_INSN(), }, .result = ACCEPT, + .result_unpriv = REJECT, + .errstr_unpriv = "R0 leaks addr into mem", }, { "Dest pointer in r0 - succeed", @@ -157,7 +159,7 @@ }, .result = ACCEPT, .result_unpriv = REJECT, - .errstr_unpriv = "leaking pointer from stack off -8", + .errstr_unpriv = "R0 leaks addr into mem", }, { "Dest pointer in r0 - succeed, check 2", @@ -178,5 +180,66 @@ }, .result = ACCEPT, .result_unpriv = REJECT, - .errstr_unpriv = "R5 leaks addr into mem", + .errstr_unpriv = "R0 leaks addr into mem", +}, +{ + "Dest pointer in r0 - succeed, check 3", + .insns = { + /* r0 = &val */ + BPF_MOV64_REG(BPF_REG_0, BPF_REG_10), + /* val = r0; */ + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8), + /* r5 = &val */ + BPF_MOV64_REG(BPF_REG_5, BPF_REG_10), + /* r0 = atomic_cmpxchg(&val, r0, r5); */ + BPF_ATOMIC_OP(BPF_W, BPF_CMPXCHG, BPF_REG_10, BPF_REG_5, -8), + /* exit(0); */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "invalid size of register fill", + .errstr_unpriv = "R0 leaks addr into mem", +}, +{ + "Dest pointer in r0 - succeed, check 4", + .insns = { + /* r0 = &val */ + BPF_MOV32_REG(BPF_REG_0, BPF_REG_10), + /* val = r0; */ + BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -8), + /* r5 = &val */ + BPF_MOV32_REG(BPF_REG_5, BPF_REG_10), + /* r0 = atomic_cmpxchg(&val, r0, r5); */ + BPF_ATOMIC_OP(BPF_W, BPF_CMPXCHG, BPF_REG_10, BPF_REG_5, -8), + /* r1 = *r10 */ + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_10, -8), + /* exit(0); */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .result_unpriv = REJECT, + .errstr_unpriv = "R10 partial copy of pointer", +}, +{ + "Dest pointer in r0 - succeed, check 5", + .insns = { + /* r0 = &val */ + BPF_MOV32_REG(BPF_REG_0, BPF_REG_10), + /* val = r0; */ + BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -8), + /* r5 = &val */ + BPF_MOV32_REG(BPF_REG_5, BPF_REG_10), + /* r0 = atomic_cmpxchg(&val, r0, r5); */ + BPF_ATOMIC_OP(BPF_W, BPF_CMPXCHG, BPF_REG_10, BPF_REG_5, -8), + /* r1 = *r0 */ + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, -8), + /* exit(0); */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "R0 invalid mem access", + .errstr_unpriv = "R10 partial copy of pointer", }, From f7ac570d0f026cf5475d4cc4d8040bd947980b3a Mon Sep 17 00:00:00 2001 From: Jeremy Szu Date: Wed, 15 Dec 2021 00:41:54 +0800 Subject: [PATCH 527/868] ALSA: hda/realtek: fix mute/micmute LEDs for a HP ProBook There is a HP ProBook which using ALC236 codec and need the ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF quirk to make mute LED and micmute LED work. Signed-off-by: Jeremy Szu Cc: Link: https://lore.kernel.org/r/20211214164156.49711-1-jeremy.szu@canonical.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index fc41f3e8ddc3..e59ff75eea75 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -8706,6 +8706,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8896, "HP EliteBook 855 G8 Notebook PC", ALC285_FIXUP_HP_MUTE_LED), SND_PCI_QUIRK(0x103c, 0x8898, "HP EliteBook 845 G8 Notebook PC", ALC285_FIXUP_HP_LIMIT_INT_MIC_BOOST), SND_PCI_QUIRK(0x103c, 0x88d0, "HP Pavilion 15-eh1xxx (mainboard 88D0)", ALC287_FIXUP_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x89ca, "HP", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), SND_PCI_QUIRK(0x1043, 0x103e, "ASUS X540SA", ALC256_FIXUP_ASUS_MIC), SND_PCI_QUIRK(0x1043, 0x103f, "ASUS TX300", ALC282_FIXUP_ASUS_TX300), SND_PCI_QUIRK(0x1043, 0x106d, "Asus K53BE", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), From 58e138d62476fc5f889252dcf73848beeaa54789 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Mon, 13 Dec 2021 12:27:55 +0100 Subject: [PATCH 528/868] Revert "x86/boot: Mark prepare_command_line() __init" This reverts commit c0f2077baa4113f38f008b8e912b9fb3ff8d43df. Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20211213112757.2612-2-bp@alien8.de --- arch/x86/kernel/setup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 6a190c7f4d71..c410be738ae7 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -742,7 +742,7 @@ dump_kernel_offset(struct notifier_block *self, unsigned long v, void *p) return 0; } -static char * __init prepare_command_line(void) +static char *prepare_command_line(void) { #ifdef CONFIG_CMDLINE_BOOL #ifdef CONFIG_CMDLINE_OVERRIDE From 92c959bae2e54ba1e2540ba5f813f7752bd76be1 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Wed, 15 Dec 2021 11:14:23 +0100 Subject: [PATCH 529/868] reset: renesas: Fix Runtime PM usage If pm_runtime_resume_and_get() fails then it returns w/o the RPM usage counter being incremented. In this case call pm_runtime_put() in remove() will result in a usage counter imbalance. Therefore check the return code of pm_runtime_resume_and_get() and bail out in case of error. Fixes: bee08559701f ("reset: renesas: Add RZ/G2L usbphy control driver") Signed-off-by: Heiner Kallweit Reviewed-by: Biju Das Link: https://lore.kernel.org/r/ec24e13f-0530-b091-7a08-864577b9b3be@gmail.com Signed-off-by: Philipp Zabel --- drivers/reset/reset-rzg2l-usbphy-ctrl.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/reset/reset-rzg2l-usbphy-ctrl.c b/drivers/reset/reset-rzg2l-usbphy-ctrl.c index e0704fd2b533..1e8315038850 100644 --- a/drivers/reset/reset-rzg2l-usbphy-ctrl.c +++ b/drivers/reset/reset-rzg2l-usbphy-ctrl.c @@ -137,7 +137,12 @@ static int rzg2l_usbphy_ctrl_probe(struct platform_device *pdev) dev_set_drvdata(dev, priv); pm_runtime_enable(&pdev->dev); - pm_runtime_resume_and_get(&pdev->dev); + error = pm_runtime_resume_and_get(&pdev->dev); + if (error < 0) { + pm_runtime_disable(&pdev->dev); + reset_control_assert(priv->rstc); + return dev_err_probe(&pdev->dev, error, "pm_runtime_resume_and_get failed"); + } /* put pll and phy into reset state */ spin_lock_irqsave(&priv->lock, flags); From fbe6183998546f8896ee0b620ece86deff5a2fd1 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Mon, 13 Dec 2021 12:27:56 +0100 Subject: [PATCH 530/868] Revert "x86/boot: Pull up cmdline preparation and early param parsing" This reverts commit 8d48bf8206f77aa8687f0e241e901e5197e52423. It turned out to be a bad idea as it broke supplying mem= cmdline parameters due to parse_memopt() requiring preparatory work like setting up the e820 table in e820__memory_setup() in order to be able to exclude the range specified by mem=. Pulling that up would've broken Xen PV again, see threads at https://lkml.kernel.org/r/20210920120421.29276-1-jgross@suse.com due to xen_memory_setup() needing the first reservations in early_reserve_memory() - kernel and initrd - to have happened already. This could be fixed again by having Xen do those reservations itself... Long story short, revert this and do a simpler fix in a later patch. Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20211213112757.2612-3-bp@alien8.de --- arch/x86/kernel/setup.c | 66 +++++++++++++++++------------------------ 1 file changed, 27 insertions(+), 39 deletions(-) diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index c410be738ae7..49b596db5631 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -742,28 +742,6 @@ dump_kernel_offset(struct notifier_block *self, unsigned long v, void *p) return 0; } -static char *prepare_command_line(void) -{ -#ifdef CONFIG_CMDLINE_BOOL -#ifdef CONFIG_CMDLINE_OVERRIDE - strlcpy(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE); -#else - if (builtin_cmdline[0]) { - /* append boot loader cmdline to builtin */ - strlcat(builtin_cmdline, " ", COMMAND_LINE_SIZE); - strlcat(builtin_cmdline, boot_command_line, COMMAND_LINE_SIZE); - strlcpy(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE); - } -#endif -#endif - - strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE); - - parse_early_param(); - - return command_line; -} - /* * Determine if we were loaded by an EFI loader. If so, then we have also been * passed the efi memmap, systab, etc., so we should use these data structures @@ -852,23 +830,6 @@ void __init setup_arch(char **cmdline_p) x86_init.oem.arch_setup(); - /* - * x86_configure_nx() is called before parse_early_param() (called by - * prepare_command_line()) to detect whether hardware doesn't support - * NX (so that the early EHCI debug console setup can safely call - * set_fixmap()). It may then be called again from within noexec_setup() - * during parsing early parameters to honor the respective command line - * option. - */ - x86_configure_nx(); - - /* - * This parses early params and it needs to run before - * early_reserve_memory() because latter relies on such settings - * supplied as early params. - */ - *cmdline_p = prepare_command_line(); - /* * Do some memory reservations *before* memory is added to memblock, so * memblock allocations won't overwrite it. @@ -902,6 +863,33 @@ void __init setup_arch(char **cmdline_p) bss_resource.start = __pa_symbol(__bss_start); bss_resource.end = __pa_symbol(__bss_stop)-1; +#ifdef CONFIG_CMDLINE_BOOL +#ifdef CONFIG_CMDLINE_OVERRIDE + strlcpy(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE); +#else + if (builtin_cmdline[0]) { + /* append boot loader cmdline to builtin */ + strlcat(builtin_cmdline, " ", COMMAND_LINE_SIZE); + strlcat(builtin_cmdline, boot_command_line, COMMAND_LINE_SIZE); + strlcpy(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE); + } +#endif +#endif + + strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE); + *cmdline_p = command_line; + + /* + * x86_configure_nx() is called before parse_early_param() to detect + * whether hardware doesn't support NX (so that the early EHCI debug + * console setup can safely call set_fixmap()). It may then be called + * again from within noexec_setup() during parsing early parameters + * to honor the respective command line option. + */ + x86_configure_nx(); + + parse_early_param(); + #ifdef CONFIG_MEMORY_HOTPLUG /* * Memory used by the kernel cannot be hot-removed because Linux From 04e57a2d952bbd34bc45744e72be3eecdc344294 Mon Sep 17 00:00:00 2001 From: Dmitry Vyukov Date: Tue, 14 Dec 2021 10:45:26 +0100 Subject: [PATCH 531/868] tomoyo: Check exceeded quota early in tomoyo_domain_quota_is_ok(). If tomoyo is used in a testing/fuzzing environment in learning mode, for lots of domains the quota will be exceeded and stay exceeded for prolonged periods of time. In such cases it's pointless (and slow) to walk the whole acl list again and again just to rediscover that the quota is exceeded. We already have the TOMOYO_DIF_QUOTA_WARNED flag that notes the overflow condition. Check it early to avoid the slowdown. [penguin-kernel] This patch causes a user visible change that the learning mode will not be automatically resumed after the quota is increased. To resume the learning mode, administrator will need to explicitly clear TOMOYO_DIF_QUOTA_WARNED flag after increasing the quota. But I think that this change is generally preferable, for administrator likely wants to optimize the acl list for that domain before increasing the quota, or that domain likely hits the quota again. Therefore, don't try to care to clear TOMOYO_DIF_QUOTA_WARNED flag automatically when the quota for that domain changed. Signed-off-by: Dmitry Vyukov Signed-off-by: Tetsuo Handa --- security/tomoyo/util.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/security/tomoyo/util.c b/security/tomoyo/util.c index 1da2e3722b12..af8cd2af3466 100644 --- a/security/tomoyo/util.c +++ b/security/tomoyo/util.c @@ -1051,6 +1051,8 @@ bool tomoyo_domain_quota_is_ok(struct tomoyo_request_info *r) return false; if (!domain) return true; + if (READ_ONCE(domain->flags[TOMOYO_DIF_QUOTA_WARNED])) + return false; list_for_each_entry_rcu(ptr, &domain->acl_info_list, list, srcu_read_lock_held(&tomoyo_ss)) { u16 perm; @@ -1096,14 +1098,12 @@ bool tomoyo_domain_quota_is_ok(struct tomoyo_request_info *r) if (count < tomoyo_profile(domain->ns, domain->profile)-> pref[TOMOYO_PREF_MAX_LEARNING_ENTRY]) return true; - if (!domain->flags[TOMOYO_DIF_QUOTA_WARNED]) { - domain->flags[TOMOYO_DIF_QUOTA_WARNED] = true; - /* r->granted = false; */ - tomoyo_write_log(r, "%s", tomoyo_dif[TOMOYO_DIF_QUOTA_WARNED]); + WRITE_ONCE(domain->flags[TOMOYO_DIF_QUOTA_WARNED], true); + /* r->granted = false; */ + tomoyo_write_log(r, "%s", tomoyo_dif[TOMOYO_DIF_QUOTA_WARNED]); #ifndef CONFIG_SECURITY_TOMOYO_INSECURE_BUILTIN_SETTING - pr_warn("WARNING: Domain '%s' has too many ACLs to hold. Stopped learning mode.\n", - domain->domainname->name); + pr_warn("WARNING: Domain '%s' has too many ACLs to hold. Stopped learning mode.\n", + domain->domainname->name); #endif - } return false; } From f702e1107601230eec707739038a89018ea3468d Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Wed, 15 Dec 2021 20:13:55 +0900 Subject: [PATCH 532/868] tomoyo: use hwight16() in tomoyo_domain_quota_is_ok() hwight16() is much faster. While we are at it, no need to include "perm =" part into data_race() macro, for perm is a local variable that cannot be accessed by other threads. Signed-off-by: Tetsuo Handa --- security/tomoyo/util.c | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/security/tomoyo/util.c b/security/tomoyo/util.c index af8cd2af3466..6799b1122c9d 100644 --- a/security/tomoyo/util.c +++ b/security/tomoyo/util.c @@ -1056,7 +1056,6 @@ bool tomoyo_domain_quota_is_ok(struct tomoyo_request_info *r) list_for_each_entry_rcu(ptr, &domain->acl_info_list, list, srcu_read_lock_held(&tomoyo_ss)) { u16 perm; - u8 i; if (ptr->is_deleted) continue; @@ -1067,23 +1066,23 @@ bool tomoyo_domain_quota_is_ok(struct tomoyo_request_info *r) */ switch (ptr->type) { case TOMOYO_TYPE_PATH_ACL: - data_race(perm = container_of(ptr, struct tomoyo_path_acl, head)->perm); + perm = data_race(container_of(ptr, struct tomoyo_path_acl, head)->perm); break; case TOMOYO_TYPE_PATH2_ACL: - data_race(perm = container_of(ptr, struct tomoyo_path2_acl, head)->perm); + perm = data_race(container_of(ptr, struct tomoyo_path2_acl, head)->perm); break; case TOMOYO_TYPE_PATH_NUMBER_ACL: - data_race(perm = container_of(ptr, struct tomoyo_path_number_acl, head) + perm = data_race(container_of(ptr, struct tomoyo_path_number_acl, head) ->perm); break; case TOMOYO_TYPE_MKDEV_ACL: - data_race(perm = container_of(ptr, struct tomoyo_mkdev_acl, head)->perm); + perm = data_race(container_of(ptr, struct tomoyo_mkdev_acl, head)->perm); break; case TOMOYO_TYPE_INET_ACL: - data_race(perm = container_of(ptr, struct tomoyo_inet_acl, head)->perm); + perm = data_race(container_of(ptr, struct tomoyo_inet_acl, head)->perm); break; case TOMOYO_TYPE_UNIX_ACL: - data_race(perm = container_of(ptr, struct tomoyo_unix_acl, head)->perm); + perm = data_race(container_of(ptr, struct tomoyo_unix_acl, head)->perm); break; case TOMOYO_TYPE_MANUAL_TASK_ACL: perm = 0; @@ -1091,9 +1090,7 @@ bool tomoyo_domain_quota_is_ok(struct tomoyo_request_info *r) default: perm = 1; } - for (i = 0; i < 16; i++) - if (perm & (1 << i)) - count++; + count += hweight16(perm); } if (count < tomoyo_profile(domain->ns, domain->profile)-> pref[TOMOYO_PREF_MAX_LEARNING_ENTRY]) From 2f5b3514c33fecad4003ce0f22ca9691492d310b Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Mon, 13 Dec 2021 12:27:57 +0100 Subject: [PATCH 533/868] x86/boot: Move EFI range reservation after cmdline parsing The memory reservation in arch/x86/platform/efi/efi.c depends on at least two command line parameters. Put it back later in the boot process and move efi_memblock_x86_reserve_range() out of early_memory_reserve(). An attempt to fix this was done in 8d48bf8206f7 ("x86/boot: Pull up cmdline preparation and early param parsing") but that caused other troubles so it got reverted. The bug this is addressing is: Dan reports that Anjaneya Chagam can no longer use the efi=nosoftreserve kernel command line parameter to suppress "soft reservation" behavior. This is due to the fact that the following call-chain happens at boot: early_reserve_memory |-> efi_memblock_x86_reserve_range |-> efi_fake_memmap_early which does if (!efi_soft_reserve_enabled()) return; and that would have set EFI_MEM_NO_SOFT_RESERVE after having parsed "nosoftreserve". However, parse_early_param() gets called *after* it, leading to the boot cmdline not being taken into account. See also https://lore.kernel.org/r/e8dd8993c38702ee6dd73b3c11f158617e665607.camel@intel.com [ bp: Turn into a proper patch. ] Signed-off-by: Mike Rapoport Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20211213112757.2612-4-bp@alien8.de --- arch/x86/kernel/setup.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 49b596db5631..e04f5e6eb33f 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -713,9 +713,6 @@ static void __init early_reserve_memory(void) early_reserve_initrd(); - if (efi_enabled(EFI_BOOT)) - efi_memblock_x86_reserve_range(); - memblock_x86_reserve_range_setup_data(); reserve_ibft_region(); @@ -890,6 +887,9 @@ void __init setup_arch(char **cmdline_p) parse_early_param(); + if (efi_enabled(EFI_BOOT)) + efi_memblock_x86_reserve_range(); + #ifdef CONFIG_MEMORY_HOTPLUG /* * Memory used by the kernel cannot be hot-removed because Linux From 651740a502411793327e2f0741104749c4eedcd1 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Mon, 13 Dec 2021 14:22:33 -0500 Subject: [PATCH 534/868] btrfs: check WRITE_ERR when trying to read an extent buffer Filipe reported a hang when we have errors on btrfs. This turned out to be a side-effect of my fix c2e39305299f01 ("btrfs: clear extent buffer uptodate when we fail to write it") which made it so we clear EXTENT_BUFFER_UPTODATE on an eb when we fail to write it out. Below is a paste of Filipe's analysis he got from using drgn to debug the hang """ btree readahead code calls read_extent_buffer_pages(), sets ->io_pages to a value while writeback of all pages has not yet completed: --> writeback for the first 3 pages finishes, we clear EXTENT_BUFFER_UPTODATE from eb on the first page when we get an error. --> at this point eb->io_pages is 1 and we cleared Uptodate bit from the first 3 pages --> read_extent_buffer_pages() does not see EXTENT_BUFFER_UPTODATE() so it continues, it's able to lock the pages since we obviously don't hold the pages locked during writeback --> read_extent_buffer_pages() then computes 'num_reads' as 3, and sets eb->io_pages to 3, since only the first page does not have Uptodate bit set at this point --> writeback for the remaining page completes, we ended decrementing eb->io_pages by 1, resulting in eb->io_pages == 2, and therefore never calling end_extent_buffer_writeback(), so EXTENT_BUFFER_WRITEBACK remains in the eb's flags --> of course, when the read bio completes, it doesn't and shouldn't call end_extent_buffer_writeback() --> we should clear EXTENT_BUFFER_UPTODATE only after all pages of the eb finished writeback? or maybe make the read pages code wait for writeback of all pages of the eb to complete before checking which pages need to be read, touch ->io_pages, submit read bio, etc writeback bit never cleared means we can hang when aborting a transaction, at: btrfs_cleanup_one_transaction() btrfs_destroy_marked_extents() wait_on_extent_buffer_writeback() """ This is a problem because our writes are not synchronized with reads in any way. We clear the UPTODATE flag and then we can easily come in and try to read the EB while we're still waiting on other bio's to complete. We have two options here, we could lock all the pages, and then check to see if eb->io_pages != 0 to know if we've already got an outstanding write on the eb. Or we can simply check to see if we have WRITE_ERR set on this extent buffer. We set this bit _before_ we clear UPTODATE, so if the read gets triggered because we aren't UPTODATE because of a write error we're guaranteed to have WRITE_ERR set, and in this case we can simply return -EIO. This will fix the reported hang. Reported-by: Filipe Manana Fixes: c2e39305299f01 ("btrfs: clear extent buffer uptodate when we fail to write it") CC: stable@vger.kernel.org # 5.4+ Reviewed-by: Filipe Manana Signed-off-by: Josef Bacik Signed-off-by: David Sterba --- fs/btrfs/extent_io.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 3258b6f01e85..9234d96a7fd5 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -6611,6 +6611,14 @@ int read_extent_buffer_pages(struct extent_buffer *eb, int wait, int mirror_num) if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags)) return 0; + /* + * We could have had EXTENT_BUFFER_UPTODATE cleared by the write + * operation, which could potentially still be in flight. In this case + * we simply want to return an error. + */ + if (unlikely(test_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags))) + return -EIO; + if (eb->fs_info->sectorsize < PAGE_SIZE) return read_extent_buffer_subpage(eb, wait, mirror_num); From 7a1636089acfee7562fe79aff7d1b4c57869896d Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Mon, 13 Dec 2021 08:45:12 +0000 Subject: [PATCH 535/868] btrfs: fix invalid delayed ref after subvolume creation failure When creating a subvolume, at ioctl.c:create_subvol(), if we fail to insert the new root's root item into the root tree, we are freeing the metadata extent we reserved for the new root to prevent a metadata extent leak, as we don't abort the transaction at that point (since there is nothing at that point that is irreversible). However we allocated the metadata extent for the new root which we are creating for the new subvolume, so its delayed reference refers to the ID of this new root. But when we free the metadata extent we pass the root of the subvolume where the new subvolume is located to btrfs_free_tree_block() - this is incorrect because this will generate a delayed reference that refers to the ID of the parent subvolume's root, and not to ID of the new root. This results in a failure when running delayed references that leads to a transaction abort and a trace like the following: [3868.738042] RIP: 0010:__btrfs_free_extent+0x709/0x950 [btrfs] [3868.739857] Code: 68 0f 85 e6 fb ff (...) [3868.742963] RSP: 0018:ffffb0e9045cf910 EFLAGS: 00010246 [3868.743908] RAX: 00000000fffffffe RBX: 00000000fffffffe RCX: 0000000000000002 [3868.745312] RDX: 00000000fffffffe RSI: 0000000000000002 RDI: ffff90b0cd793b88 [3868.746643] RBP: 000000000e5d8000 R08: 0000000000000000 R09: ffff90b0cd793b88 [3868.747979] R10: 0000000000000002 R11: 00014ded97944d68 R12: 0000000000000000 [3868.749373] R13: ffff90b09afe4a28 R14: 0000000000000000 R15: ffff90b0cd793b88 [3868.750725] FS: 00007f281c4a8b80(0000) GS:ffff90b3ada00000(0000) knlGS:0000000000000000 [3868.752275] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [3868.753515] CR2: 00007f281c6a5000 CR3: 0000000108a42006 CR4: 0000000000370ee0 [3868.754869] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [3868.756228] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [3868.757803] Call Trace: [3868.758281] [3868.758655] ? btrfs_merge_delayed_refs+0x178/0x1c0 [btrfs] [3868.759827] __btrfs_run_delayed_refs+0x2b1/0x1250 [btrfs] [3868.761047] btrfs_run_delayed_refs+0x86/0x210 [btrfs] [3868.762069] ? lock_acquired+0x19f/0x420 [3868.762829] btrfs_commit_transaction+0x69/0xb20 [btrfs] [3868.763860] ? _raw_spin_unlock+0x29/0x40 [3868.764614] ? btrfs_block_rsv_release+0x1c2/0x1e0 [btrfs] [3868.765870] create_subvol+0x1d8/0x9a0 [btrfs] [3868.766766] btrfs_mksubvol+0x447/0x4c0 [btrfs] [3868.767669] ? preempt_count_add+0x49/0xa0 [3868.768444] __btrfs_ioctl_snap_create+0x123/0x190 [btrfs] [3868.769639] ? _copy_from_user+0x66/0xa0 [3868.770391] btrfs_ioctl_snap_create_v2+0xbb/0x140 [btrfs] [3868.771495] btrfs_ioctl+0xd1e/0x35c0 [btrfs] [3868.772364] ? __slab_free+0x10a/0x360 [3868.773198] ? rcu_read_lock_sched_held+0x12/0x60 [3868.774121] ? lock_release+0x223/0x4a0 [3868.774863] ? lock_acquired+0x19f/0x420 [3868.775634] ? rcu_read_lock_sched_held+0x12/0x60 [3868.776530] ? trace_hardirqs_on+0x1b/0xe0 [3868.777373] ? _raw_spin_unlock_irqrestore+0x3e/0x60 [3868.778280] ? kmem_cache_free+0x321/0x3c0 [3868.779011] ? __x64_sys_ioctl+0x83/0xb0 [3868.779718] __x64_sys_ioctl+0x83/0xb0 [3868.780387] do_syscall_64+0x3b/0xc0 [3868.781059] entry_SYSCALL_64_after_hwframe+0x44/0xae [3868.781953] RIP: 0033:0x7f281c59e957 [3868.782585] Code: 3c 1c 48 f7 d8 4c (...) [3868.785867] RSP: 002b:00007ffe1f83e2b8 EFLAGS: 00000202 ORIG_RAX: 0000000000000010 [3868.787198] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f281c59e957 [3868.788450] RDX: 00007ffe1f83e2c0 RSI: 0000000050009418 RDI: 0000000000000003 [3868.789748] RBP: 00007ffe1f83f300 R08: 0000000000000000 R09: 00007ffe1f83fe36 [3868.791214] R10: 0000000000000000 R11: 0000000000000202 R12: 0000000000000003 [3868.792468] R13: 0000000000000003 R14: 00007ffe1f83e2c0 R15: 00000000000003cc [3868.793765] [3868.794037] irq event stamp: 0 [3868.794548] hardirqs last enabled at (0): [<0000000000000000>] 0x0 [3868.795670] hardirqs last disabled at (0): [] copy_process+0x934/0x2040 [3868.797086] softirqs last enabled at (0): [] copy_process+0x934/0x2040 [3868.798309] softirqs last disabled at (0): [<0000000000000000>] 0x0 [3868.799284] ---[ end trace be24c7002fe27747 ]--- [3868.799928] BTRFS info (device dm-0): leaf 241188864 gen 1268 total ptrs 214 free space 469 owner 2 [3868.801133] BTRFS info (device dm-0): refs 2 lock_owner 225627 current 225627 [3868.802056] item 0 key (237436928 169 0) itemoff 16250 itemsize 33 [3868.802863] extent refs 1 gen 1265 flags 2 [3868.803447] ref#0: tree block backref root 1610 (...) [3869.064354] item 114 key (241008640 169 0) itemoff 12488 itemsize 33 [3869.065421] extent refs 1 gen 1268 flags 2 [3869.066115] ref#0: tree block backref root 1689 (...) [3869.403834] BTRFS error (device dm-0): unable to find ref byte nr 241008640 parent 0 root 1622 owner 0 offset 0 [3869.405641] BTRFS: error (device dm-0) in __btrfs_free_extent:3076: errno=-2 No such entry [3869.407138] BTRFS: error (device dm-0) in btrfs_run_delayed_refs:2159: errno=-2 No such entry Fix this by passing the new subvolume's root ID to btrfs_free_tree_block(). This requires changing the root argument of btrfs_free_tree_block() from struct btrfs_root * to a u64, since at this point during the subvolume creation we have not yet created the struct btrfs_root for the new subvolume, and btrfs_free_tree_block() only needs a root ID and nothing else from a struct btrfs_root. This was triggered by test case generic/475 from fstests. Fixes: 67addf29004c5b ("btrfs: fix metadata extent leak after failure to create subvolume") CC: stable@vger.kernel.org # 4.4+ Reviewed-by: Nikolay Borisov Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/ctree.c | 17 +++++++++-------- fs/btrfs/ctree.h | 7 ++++++- fs/btrfs/extent-tree.c | 13 +++++++------ fs/btrfs/free-space-tree.c | 4 ++-- fs/btrfs/ioctl.c | 9 +++++---- fs/btrfs/qgroup.c | 3 ++- 6 files changed, 31 insertions(+), 22 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 74c8e18f3720..64599625c7d7 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -462,8 +462,8 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, BUG_ON(ret < 0); rcu_assign_pointer(root->node, cow); - btrfs_free_tree_block(trans, root, buf, parent_start, - last_ref); + btrfs_free_tree_block(trans, btrfs_root_id(root), buf, + parent_start, last_ref); free_extent_buffer(buf); add_root_to_dirty_list(root); } else { @@ -484,8 +484,8 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, return ret; } } - btrfs_free_tree_block(trans, root, buf, parent_start, - last_ref); + btrfs_free_tree_block(trans, btrfs_root_id(root), buf, + parent_start, last_ref); } if (unlock_orig) btrfs_tree_unlock(buf); @@ -926,7 +926,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, free_extent_buffer(mid); root_sub_used(root, mid->len); - btrfs_free_tree_block(trans, root, mid, 0, 1); + btrfs_free_tree_block(trans, btrfs_root_id(root), mid, 0, 1); /* once for the root ptr */ free_extent_buffer_stale(mid); return 0; @@ -985,7 +985,8 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, btrfs_tree_unlock(right); del_ptr(root, path, level + 1, pslot + 1); root_sub_used(root, right->len); - btrfs_free_tree_block(trans, root, right, 0, 1); + btrfs_free_tree_block(trans, btrfs_root_id(root), right, + 0, 1); free_extent_buffer_stale(right); right = NULL; } else { @@ -1030,7 +1031,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, btrfs_tree_unlock(mid); del_ptr(root, path, level + 1, pslot); root_sub_used(root, mid->len); - btrfs_free_tree_block(trans, root, mid, 0, 1); + btrfs_free_tree_block(trans, btrfs_root_id(root), mid, 0, 1); free_extent_buffer_stale(mid); mid = NULL; } else { @@ -4031,7 +4032,7 @@ static noinline void btrfs_del_leaf(struct btrfs_trans_handle *trans, root_sub_used(root, leaf->len); atomic_inc(&leaf->refs); - btrfs_free_tree_block(trans, root, leaf, 0, 1); + btrfs_free_tree_block(trans, btrfs_root_id(root), leaf, 0, 1); free_extent_buffer_stale(leaf); } /* diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 7553e9dc5f93..5fe5eccb3c87 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -2257,6 +2257,11 @@ static inline bool btrfs_root_dead(const struct btrfs_root *root) return (root->root_item.flags & cpu_to_le64(BTRFS_ROOT_SUBVOL_DEAD)) != 0; } +static inline u64 btrfs_root_id(const struct btrfs_root *root) +{ + return root->root_key.objectid; +} + /* struct btrfs_root_backup */ BTRFS_SETGET_STACK_FUNCS(backup_tree_root, struct btrfs_root_backup, tree_root, 64); @@ -2719,7 +2724,7 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans, u64 empty_size, enum btrfs_lock_nesting nest); void btrfs_free_tree_block(struct btrfs_trans_handle *trans, - struct btrfs_root *root, + u64 root_id, struct extent_buffer *buf, u64 parent, int last_ref); int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans, diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index fc4895e6a62c..25ef6e3fd306 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3275,20 +3275,20 @@ out_delayed_unlock: } void btrfs_free_tree_block(struct btrfs_trans_handle *trans, - struct btrfs_root *root, + u64 root_id, struct extent_buffer *buf, u64 parent, int last_ref) { - struct btrfs_fs_info *fs_info = root->fs_info; + struct btrfs_fs_info *fs_info = trans->fs_info; struct btrfs_ref generic_ref = { 0 }; int ret; btrfs_init_generic_ref(&generic_ref, BTRFS_DROP_DELAYED_REF, buf->start, buf->len, parent); btrfs_init_tree_ref(&generic_ref, btrfs_header_level(buf), - root->root_key.objectid, 0, false); + root_id, 0, false); - if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) { + if (root_id != BTRFS_TREE_LOG_OBJECTID) { btrfs_ref_tree_mod(fs_info, &generic_ref); ret = btrfs_add_delayed_tree_ref(trans, &generic_ref, NULL); BUG_ON(ret); /* -ENOMEM */ @@ -3298,7 +3298,7 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans, struct btrfs_block_group *cache; bool must_pin = false; - if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) { + if (root_id != BTRFS_TREE_LOG_OBJECTID) { ret = check_ref_cleanup(trans, buf->start); if (!ret) { btrfs_redirty_list_add(trans->transaction, buf); @@ -5472,7 +5472,8 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans, goto owner_mismatch; } - btrfs_free_tree_block(trans, root, eb, parent, wc->refs[level] == 1); + btrfs_free_tree_block(trans, btrfs_root_id(root), eb, parent, + wc->refs[level] == 1); out: wc->refs[level] = 0; wc->flags[level] = 0; diff --git a/fs/btrfs/free-space-tree.c b/fs/btrfs/free-space-tree.c index a33bca94d133..3abec44c6255 100644 --- a/fs/btrfs/free-space-tree.c +++ b/fs/btrfs/free-space-tree.c @@ -1256,8 +1256,8 @@ int btrfs_clear_free_space_tree(struct btrfs_fs_info *fs_info) btrfs_tree_lock(free_space_root->node); btrfs_clean_tree_block(free_space_root->node); btrfs_tree_unlock(free_space_root->node); - btrfs_free_tree_block(trans, free_space_root, free_space_root->node, - 0, 1); + btrfs_free_tree_block(trans, btrfs_root_id(free_space_root), + free_space_root->node, 0, 1); btrfs_put_root(free_space_root); diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 1b85d98df66b..a7533416370a 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -617,11 +617,12 @@ static noinline int create_subvol(struct user_namespace *mnt_userns, * Since we don't abort the transaction in this case, free the * tree block so that we don't leak space and leave the * filesystem in an inconsistent state (an extent item in the - * extent tree without backreferences). Also no need to have - * the tree block locked since it is not in any tree at this - * point, so no other task can find it and use it. + * extent tree with a backreference for a root that does not + * exists). Also no need to have the tree block locked since it + * is not in any tree at this point, so no other task can find + * it and use it. */ - btrfs_free_tree_block(trans, root, leaf, 0, 1); + btrfs_free_tree_block(trans, objectid, leaf, 0, 1); free_extent_buffer(leaf); goto fail; } diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index db680f5be745..6c037f1252b7 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -1219,7 +1219,8 @@ int btrfs_quota_disable(struct btrfs_fs_info *fs_info) btrfs_tree_lock(quota_root->node); btrfs_clean_tree_block(quota_root->node); btrfs_tree_unlock(quota_root->node); - btrfs_free_tree_block(trans, quota_root, quota_root->node, 0, 1); + btrfs_free_tree_block(trans, btrfs_root_id(quota_root), + quota_root->node, 0, 1); btrfs_put_root(quota_root); From 212a58fda9b9077e0efc20200a4feb76afacfd95 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Mon, 13 Dec 2021 08:45:13 +0000 Subject: [PATCH 536/868] btrfs: fix warning when freeing leaf after subvolume creation failure When creating a subvolume, at ioctl.c:create_subvol(), if we fail to insert the root item for the new subvolume into the root tree, we can trigger the following warning: [78961.741046] WARNING: CPU: 0 PID: 4079814 at fs/btrfs/extent-tree.c:3357 btrfs_free_tree_block+0x2af/0x310 [btrfs] [78961.743344] Modules linked in: [78961.749440] dm_snapshot dm_thin_pool (...) [78961.773648] CPU: 0 PID: 4079814 Comm: fsstress Not tainted 5.16.0-rc4-btrfs-next-108 #1 [78961.775198] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.14.0-0-g155821a1990b-prebuilt.qemu.org 04/01/2014 [78961.777266] RIP: 0010:btrfs_free_tree_block+0x2af/0x310 [btrfs] [78961.778398] Code: 17 00 48 85 (...) [78961.781067] RSP: 0018:ffffaa4001657b28 EFLAGS: 00010202 [78961.781877] RAX: 0000000000000213 RBX: ffff897f8a796910 RCX: 0000000000000000 [78961.782780] RDX: 0000000000000000 RSI: 0000000011004000 RDI: 00000000ffffffff [78961.783764] RBP: ffff8981f490e800 R08: 0000000000000001 R09: 0000000000000000 [78961.784740] R10: 0000000000000000 R11: 0000000000000001 R12: ffff897fc963fcc8 [78961.785665] R13: 0000000000000001 R14: ffff898063548000 R15: ffff898063548000 [78961.786620] FS: 00007f31283c6b80(0000) GS:ffff8982ace00000(0000) knlGS:0000000000000000 [78961.787717] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [78961.788598] CR2: 00007f31285c3000 CR3: 000000023fcc8003 CR4: 0000000000370ef0 [78961.789568] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [78961.790585] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [78961.791684] Call Trace: [78961.792082] [78961.792359] create_subvol+0x5d1/0x9a0 [btrfs] [78961.793054] btrfs_mksubvol+0x447/0x4c0 [btrfs] [78961.794009] ? preempt_count_add+0x49/0xa0 [78961.794705] __btrfs_ioctl_snap_create+0x123/0x190 [btrfs] [78961.795712] ? _copy_from_user+0x66/0xa0 [78961.796382] btrfs_ioctl_snap_create_v2+0xbb/0x140 [btrfs] [78961.797392] btrfs_ioctl+0xd1e/0x35c0 [btrfs] [78961.798172] ? __slab_free+0x10a/0x360 [78961.798820] ? rcu_read_lock_sched_held+0x12/0x60 [78961.799664] ? lock_release+0x223/0x4a0 [78961.800321] ? lock_acquired+0x19f/0x420 [78961.800992] ? rcu_read_lock_sched_held+0x12/0x60 [78961.801796] ? trace_hardirqs_on+0x1b/0xe0 [78961.802495] ? _raw_spin_unlock_irqrestore+0x3e/0x60 [78961.803358] ? kmem_cache_free+0x321/0x3c0 [78961.804071] ? __x64_sys_ioctl+0x83/0xb0 [78961.804711] __x64_sys_ioctl+0x83/0xb0 [78961.805348] do_syscall_64+0x3b/0xc0 [78961.805969] entry_SYSCALL_64_after_hwframe+0x44/0xae [78961.806830] RIP: 0033:0x7f31284bc957 [78961.807517] Code: 3c 1c 48 f7 d8 (...) This is because we are calling btrfs_free_tree_block() on an extent buffer that is dirty. Fix that by cleaning the extent buffer, with btrfs_clean_tree_block(), before freeing it. This was triggered by test case generic/475 from fstests. Fixes: 67addf29004c5b ("btrfs: fix metadata extent leak after failure to create subvolume") CC: stable@vger.kernel.org # 4.4+ Reviewed-by: Nikolay Borisov Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/ioctl.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index a7533416370a..8a442b59eee0 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -618,10 +618,11 @@ static noinline int create_subvol(struct user_namespace *mnt_userns, * tree block so that we don't leak space and leave the * filesystem in an inconsistent state (an extent item in the * extent tree with a backreference for a root that does not - * exists). Also no need to have the tree block locked since it - * is not in any tree at this point, so no other task can find - * it and use it. + * exists). */ + btrfs_tree_lock(leaf); + btrfs_clean_tree_block(leaf); + btrfs_tree_unlock(leaf); btrfs_free_tree_block(trans, objectid, leaf, 0, 1); free_extent_buffer(leaf); goto fail; From 4989d4a0aed3fb30f5b48787a689d7090de6f86d Mon Sep 17 00:00:00 2001 From: Shin'ichiro Kawasaki Date: Wed, 15 Dec 2021 19:38:43 +0900 Subject: [PATCH 537/868] btrfs: fix missing blkdev_put() call in btrfs_scan_one_device() The function btrfs_scan_one_device() calls blkdev_get_by_path() and blkdev_put() to get and release its target block device. However, when btrfs_sb_log_location_bdev() fails, blkdev_put() is not called and the block device is left without clean up. This triggered failure of fstests generic/085. Fix the failure path of btrfs_sb_log_location_bdev() to call blkdev_put(). Fixes: 12659251ca5df ("btrfs: implement log-structured superblock for ZONED mode") CC: stable@vger.kernel.org # 5.15+ Reviewed-by: Nikolay Borisov Signed-off-by: Shin'ichiro Kawasaki Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/volumes.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index cc80f2a97a0b..b4da58fd0e1a 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -1370,8 +1370,10 @@ struct btrfs_device *btrfs_scan_one_device(const char *path, fmode_t flags, bytenr_orig = btrfs_sb_offset(0); ret = btrfs_sb_log_location_bdev(bdev, 0, READ, &bytenr); - if (ret) - return ERR_PTR(ret); + if (ret) { + device = ERR_PTR(ret); + goto error_bdev_put; + } disk_super = btrfs_read_disk_super(bdev, bytenr, bytenr_orig); if (IS_ERR(disk_super)) { From a7083763619f7485ccdade160deb81737cf2732f Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Fri, 10 Dec 2021 09:55:29 -0700 Subject: [PATCH 538/868] soc/tegra: fuse: Fix bitwise vs. logical OR warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A new warning in clang points out two instances where boolean expressions are being used with a bitwise OR instead of logical OR: drivers/soc/tegra/fuse/speedo-tegra20.c:72:9: warning: use of bitwise '|' with boolean operands [-Wbitwise-instead-of-logical] reg = tegra_fuse_read_spare(i) | ^~~~~~~~~~~~~~~~~~~~~~~~~~ || drivers/soc/tegra/fuse/speedo-tegra20.c:72:9: note: cast one or both operands to int to silence this warning drivers/soc/tegra/fuse/speedo-tegra20.c:87:9: warning: use of bitwise '|' with boolean operands [-Wbitwise-instead-of-logical] reg = tegra_fuse_read_spare(i) | ^~~~~~~~~~~~~~~~~~~~~~~~~~ || drivers/soc/tegra/fuse/speedo-tegra20.c:87:9: note: cast one or both operands to int to silence this warning 2 warnings generated. The motivation for the warning is that logical operations short circuit while bitwise operations do not. In this instance, tegra_fuse_read_spare() is not semantically returning a boolean, it is returning a bit value. Use u32 for its return type so that it can be used with either bitwise or boolean operators without any warnings. Fixes: 25cd5a391478 ("ARM: tegra: Add speedo-based process identification") Link: https://github.com/ClangBuiltLinux/linux/issues/1488 Suggested-by: Michał Mirosław Signed-off-by: Nathan Chancellor Reviewed-by: Nick Desaulniers Signed-off-by: Thierry Reding --- drivers/soc/tegra/fuse/fuse-tegra.c | 2 +- drivers/soc/tegra/fuse/fuse.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/soc/tegra/fuse/fuse-tegra.c b/drivers/soc/tegra/fuse/fuse-tegra.c index f2151815db58..e714ed3b61bc 100644 --- a/drivers/soc/tegra/fuse/fuse-tegra.c +++ b/drivers/soc/tegra/fuse/fuse-tegra.c @@ -320,7 +320,7 @@ static struct platform_driver tegra_fuse_driver = { }; builtin_platform_driver(tegra_fuse_driver); -bool __init tegra_fuse_read_spare(unsigned int spare) +u32 __init tegra_fuse_read_spare(unsigned int spare) { unsigned int offset = fuse->soc->info->spare + spare * 4; diff --git a/drivers/soc/tegra/fuse/fuse.h b/drivers/soc/tegra/fuse/fuse.h index de58feba0435..ecff0c08e959 100644 --- a/drivers/soc/tegra/fuse/fuse.h +++ b/drivers/soc/tegra/fuse/fuse.h @@ -65,7 +65,7 @@ struct tegra_fuse { void tegra_init_revision(void); void tegra_init_apbmisc(void); -bool __init tegra_fuse_read_spare(unsigned int spare); +u32 __init tegra_fuse_read_spare(unsigned int spare); u32 __init tegra_fuse_read_early(unsigned int offset); u8 tegra_get_major_rev(void); From f08adf5add9a071160c68bb2a61d697f39ab0758 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 14 Dec 2021 19:46:21 +0100 Subject: [PATCH 539/868] USB: gadget: bRequestType is a bitfield, not a enum Szymon rightly pointed out that the previous check for the endpoint direction in bRequestType was not looking at only the bit involved, but rather the whole value. Normally this is ok, but for some request types, bits other than bit 8 could be set and the check for the endpoint length could not stall correctly. Fix that up by only checking the single bit. Fixes: 153a2d7e3350 ("USB: gadget: detect too-big endpoint 0 requests") Cc: Felipe Balbi Reported-by: Szymon Heidrich Link: https://lore.kernel.org/r/20211214184621.385828-1-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/composite.c | 6 +++--- drivers/usb/gadget/legacy/dbgp.c | 6 +++--- drivers/usb/gadget/legacy/inode.c | 6 +++--- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c index 284eea9f6e4d..3789c329183c 100644 --- a/drivers/usb/gadget/composite.c +++ b/drivers/usb/gadget/composite.c @@ -1680,14 +1680,14 @@ composite_setup(struct usb_gadget *gadget, const struct usb_ctrlrequest *ctrl) u8 endp; if (w_length > USB_COMP_EP0_BUFSIZ) { - if (ctrl->bRequestType == USB_DIR_OUT) { - goto done; - } else { + if (ctrl->bRequestType & USB_DIR_IN) { /* Cast away the const, we are going to overwrite on purpose. */ __le16 *temp = (__le16 *)&ctrl->wLength; *temp = cpu_to_le16(USB_COMP_EP0_BUFSIZ); w_length = USB_COMP_EP0_BUFSIZ; + } else { + goto done; } } diff --git a/drivers/usb/gadget/legacy/dbgp.c b/drivers/usb/gadget/legacy/dbgp.c index 355bc7dab9d5..6bcbad382580 100644 --- a/drivers/usb/gadget/legacy/dbgp.c +++ b/drivers/usb/gadget/legacy/dbgp.c @@ -346,14 +346,14 @@ static int dbgp_setup(struct usb_gadget *gadget, u16 len = 0; if (length > DBGP_REQ_LEN) { - if (ctrl->bRequestType == USB_DIR_OUT) { - return err; - } else { + if (ctrl->bRequestType & USB_DIR_IN) { /* Cast away the const, we are going to overwrite on purpose. */ __le16 *temp = (__le16 *)&ctrl->wLength; *temp = cpu_to_le16(DBGP_REQ_LEN); length = DBGP_REQ_LEN; + } else { + return err; } } diff --git a/drivers/usb/gadget/legacy/inode.c b/drivers/usb/gadget/legacy/inode.c index 63150e3889ef..3b58f4fc0a80 100644 --- a/drivers/usb/gadget/legacy/inode.c +++ b/drivers/usb/gadget/legacy/inode.c @@ -1334,14 +1334,14 @@ gadgetfs_setup (struct usb_gadget *gadget, const struct usb_ctrlrequest *ctrl) u16 w_length = le16_to_cpu(ctrl->wLength); if (w_length > RBUF_SIZE) { - if (ctrl->bRequestType == USB_DIR_OUT) { - return value; - } else { + if (ctrl->bRequestType & USB_DIR_IN) { /* Cast away the const, we are going to overwrite on purpose. */ __le16 *temp = (__le16 *)&ctrl->wLength; *temp = cpu_to_le16(RBUF_SIZE); w_length = RBUF_SIZE; + } else { + return value; } } From fac6bf87c55f7f0733efb0375565fb6a50cf2caf Mon Sep 17 00:00:00 2001 From: Amelie Delaunay Date: Tue, 7 Dec 2021 13:45:10 +0100 Subject: [PATCH 540/868] usb: dwc2: fix STM ID/VBUS detection startup delay in dwc2_driver_probe When activate_stm_id_vb_detection is enabled, ID and Vbus detection relies on sensing comparators. This detection needs time to stabilize. A delay was already applied in dwc2_resume() when reactivating the detection, but it wasn't done in dwc2_probe(). This patch adds delay after enabling STM ID/VBUS detection. Then, ID state is good when initializing gadget and host, and avoid to get a wrong Connector ID Status Change interrupt. Fixes: a415083a11cc ("usb: dwc2: add support for STM32MP15 SoCs USB OTG HS and FS") Cc: stable Acked-by: Minas Harutyunyan Signed-off-by: Amelie Delaunay Link: https://lore.kernel.org/r/20211207124510.268841-1-amelie.delaunay@foss.st.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc2/platform.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/dwc2/platform.c b/drivers/usb/dwc2/platform.c index c8f18f3ba9e3..c331a5128c2c 100644 --- a/drivers/usb/dwc2/platform.c +++ b/drivers/usb/dwc2/platform.c @@ -575,6 +575,9 @@ static int dwc2_driver_probe(struct platform_device *dev) ggpio |= GGPIO_STM32_OTG_GCCFG_IDEN; ggpio |= GGPIO_STM32_OTG_GCCFG_VBDEN; dwc2_writel(hsotg, ggpio, GGPIO); + + /* ID/VBUS detection startup time */ + usleep_range(5000, 7000); } retval = dwc2_drd_init(hsotg); From f4b3ee3c85551d2d343a3ba159304066523f730f Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Thu, 9 Dec 2021 11:46:07 -0500 Subject: [PATCH 541/868] audit: improve robustness of the audit queue handling If the audit daemon were ever to get stuck in a stopped state the kernel's kauditd_thread() could get blocked attempting to send audit records to the userspace audit daemon. With the kernel thread blocked it is possible that the audit queue could grow unbounded as certain audit record generating events must be exempt from the queue limits else the system enter a deadlock state. This patch resolves this problem by lowering the kernel thread's socket sending timeout from MAX_SCHEDULE_TIMEOUT to HZ/10 and tweaks the kauditd_send_queue() function to better manage the various audit queues when connection problems occur between the kernel and the audit daemon. With this patch, the backlog may temporarily grow beyond the defined limits when the audit daemon is stopped and the system is under heavy audit pressure, but kauditd_thread() will continue to make progress and drain the queues as it would for other connection problems. For example, with the audit daemon put into a stopped state and the system configured to audit every syscall it was still possible to shutdown the system without a kernel panic, deadlock, etc.; granted, the system was slow to shutdown but that is to be expected given the extreme pressure of recording every syscall. The timeout value of HZ/10 was chosen primarily through experimentation and this developer's "gut feeling". There is likely no one perfect value, but as this scenario is limited in scope (root privileges would be needed to send SIGSTOP to the audit daemon), it is likely not worth exposing this as a tunable at present. This can always be done at a later date if it proves necessary. Cc: stable@vger.kernel.org Fixes: 5b52330bbfe63 ("audit: fix auditd/kernel connection state tracking") Reported-by: Gaosheng Cui Tested-by: Gaosheng Cui Reviewed-by: Richard Guy Briggs Signed-off-by: Paul Moore --- kernel/audit.c | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/kernel/audit.c b/kernel/audit.c index 121d37e700a6..4cebadb5f30d 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -718,7 +718,7 @@ static int kauditd_send_queue(struct sock *sk, u32 portid, { int rc = 0; struct sk_buff *skb; - static unsigned int failed = 0; + unsigned int failed = 0; /* NOTE: kauditd_thread takes care of all our locking, we just use * the netlink info passed to us (e.g. sk and portid) */ @@ -735,32 +735,30 @@ static int kauditd_send_queue(struct sock *sk, u32 portid, continue; } +retry: /* grab an extra skb reference in case of error */ skb_get(skb); rc = netlink_unicast(sk, skb, portid, 0); if (rc < 0) { - /* fatal failure for our queue flush attempt? */ + /* send failed - try a few times unless fatal error */ if (++failed >= retry_limit || rc == -ECONNREFUSED || rc == -EPERM) { - /* yes - error processing for the queue */ sk = NULL; if (err_hook) (*err_hook)(skb); - if (!skb_hook) - goto out; - /* keep processing with the skb_hook */ + if (rc == -EAGAIN) + rc = 0; + /* continue to drain the queue */ continue; } else - /* no - requeue to preserve ordering */ - skb_queue_head(queue, skb); + goto retry; } else { - /* it worked - drop the extra reference and continue */ + /* skb sent - drop the extra reference and continue */ consume_skb(skb); failed = 0; } } -out: return (rc >= 0 ? 0 : rc); } @@ -1609,7 +1607,8 @@ static int __net_init audit_net_init(struct net *net) audit_panic("cannot initialize netlink socket in namespace"); return -ENOMEM; } - aunet->sk->sk_sndtimeo = MAX_SCHEDULE_TIMEOUT; + /* limit the timeout in case auditd is blocked/stopped */ + aunet->sk->sk_sndtimeo = HZ / 10; return 0; } From 584af82154f56e6b2740160fcc84a2966d969e15 Mon Sep 17 00:00:00 2001 From: Karen Sornek Date: Tue, 31 Aug 2021 13:16:35 +0200 Subject: [PATCH 542/868] igb: Fix removal of unicast MAC filters of VFs Move checking condition of VF MAC filter before clearing or adding MAC filter to VF to prevent potential blackout caused by removal of necessary and working VF's MAC filter. Fixes: 1b8b062a99dc ("igb: add VF trust infrastructure") Signed-off-by: Karen Sornek Tested-by: Konrad Jankowski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igb/igb_main.c | 28 +++++++++++------------ 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index fd54d3ef890b..b597b8bfb910 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -7648,6 +7648,20 @@ static int igb_set_vf_mac_filter(struct igb_adapter *adapter, const int vf, struct vf_mac_filter *entry = NULL; int ret = 0; + if ((vf_data->flags & IGB_VF_FLAG_PF_SET_MAC) && + !vf_data->trusted) { + dev_warn(&pdev->dev, + "VF %d requested MAC filter but is administratively denied\n", + vf); + return -EINVAL; + } + if (!is_valid_ether_addr(addr)) { + dev_warn(&pdev->dev, + "VF %d attempted to set invalid MAC filter\n", + vf); + return -EINVAL; + } + switch (info) { case E1000_VF_MAC_FILTER_CLR: /* remove all unicast MAC filters related to the current VF */ @@ -7661,20 +7675,6 @@ static int igb_set_vf_mac_filter(struct igb_adapter *adapter, const int vf, } break; case E1000_VF_MAC_FILTER_ADD: - if ((vf_data->flags & IGB_VF_FLAG_PF_SET_MAC) && - !vf_data->trusted) { - dev_warn(&pdev->dev, - "VF %d requested MAC filter but is administratively denied\n", - vf); - return -EINVAL; - } - if (!is_valid_ether_addr(addr)) { - dev_warn(&pdev->dev, - "VF %d attempted to set invalid MAC filter\n", - vf); - return -EINVAL; - } - /* try to find empty slot in the list */ list_for_each(pos, &adapter->vf_macs.l) { entry = list_entry(pos, struct vf_mac_filter, l); From b6d335a60dc624c0d279333b22c737faa765b028 Mon Sep 17 00:00:00 2001 From: Letu Ren Date: Sat, 13 Nov 2021 11:42:34 +0800 Subject: [PATCH 543/868] igbvf: fix double free in `igbvf_probe` In `igbvf_probe`, if register_netdev() fails, the program will go to label err_hw_init, and then to label err_ioremap. In free_netdev() which is just below label err_ioremap, there is `list_for_each_entry_safe` and `netif_napi_del` which aims to delete all entries in `dev->napi_list`. The program has added an entry `adapter->rx_ring->napi` which is added by `netif_napi_add` in igbvf_alloc_queues(). However, adapter->rx_ring has been freed below label err_hw_init. So this a UAF. In terms of how to patch the problem, we can refer to igbvf_remove() and delete the entry before `adapter->rx_ring`. The KASAN logs are as follows: [ 35.126075] BUG: KASAN: use-after-free in free_netdev+0x1fd/0x450 [ 35.127170] Read of size 8 at addr ffff88810126d990 by task modprobe/366 [ 35.128360] [ 35.128643] CPU: 1 PID: 366 Comm: modprobe Not tainted 5.15.0-rc2+ #14 [ 35.129789] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.12.0-59-gc9ba5276e321-prebuilt.qemu.org 04/01/2014 [ 35.131749] Call Trace: [ 35.132199] dump_stack_lvl+0x59/0x7b [ 35.132865] print_address_description+0x7c/0x3b0 [ 35.133707] ? free_netdev+0x1fd/0x450 [ 35.134378] __kasan_report+0x160/0x1c0 [ 35.135063] ? free_netdev+0x1fd/0x450 [ 35.135738] kasan_report+0x4b/0x70 [ 35.136367] free_netdev+0x1fd/0x450 [ 35.137006] igbvf_probe+0x121d/0x1a10 [igbvf] [ 35.137808] ? igbvf_vlan_rx_add_vid+0x100/0x100 [igbvf] [ 35.138751] local_pci_probe+0x13c/0x1f0 [ 35.139461] pci_device_probe+0x37e/0x6c0 [ 35.165526] [ 35.165806] Allocated by task 366: [ 35.166414] ____kasan_kmalloc+0xc4/0xf0 [ 35.167117] foo_kmem_cache_alloc_trace+0x3c/0x50 [igbvf] [ 35.168078] igbvf_probe+0x9c5/0x1a10 [igbvf] [ 35.168866] local_pci_probe+0x13c/0x1f0 [ 35.169565] pci_device_probe+0x37e/0x6c0 [ 35.179713] [ 35.179993] Freed by task 366: [ 35.180539] kasan_set_track+0x4c/0x80 [ 35.181211] kasan_set_free_info+0x1f/0x40 [ 35.181942] ____kasan_slab_free+0x103/0x140 [ 35.182703] kfree+0xe3/0x250 [ 35.183239] igbvf_probe+0x1173/0x1a10 [igbvf] [ 35.184040] local_pci_probe+0x13c/0x1f0 Fixes: d4e0fe01a38a0 (igbvf: add new driver to support 82576 virtual functions) Reported-by: Zheyu Ma Signed-off-by: Letu Ren Tested-by: Konrad Jankowski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igbvf/netdev.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/intel/igbvf/netdev.c b/drivers/net/ethernet/intel/igbvf/netdev.c index 74ccd622251a..4d988da68394 100644 --- a/drivers/net/ethernet/intel/igbvf/netdev.c +++ b/drivers/net/ethernet/intel/igbvf/netdev.c @@ -2859,6 +2859,7 @@ static int igbvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) return 0; err_hw_init: + netif_napi_del(&adapter->rx_ring->napi); kfree(adapter->tx_ring); kfree(adapter->rx_ring); err_sw_init: From 0182d1f3fa640888a2ed7e3f6df2fdb10adee7c8 Mon Sep 17 00:00:00 2001 From: Sasha Neftin Date: Tue, 2 Nov 2021 09:20:06 +0200 Subject: [PATCH 544/868] igc: Fix typo in i225 LTR functions The LTR maximum value was incorrectly written using the scale from the LTR minimum value. This would cause incorrect values to be sent, in cases where the initial calculation lead to different min/max scales. Fixes: 707abf069548 ("igc: Add initial LTR support") Suggested-by: Dima Ruinskiy Signed-off-by: Sasha Neftin Tested-by: Nechama Kraus Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igc/igc_i225.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/igc/igc_i225.c b/drivers/net/ethernet/intel/igc/igc_i225.c index b2ef9fde97b3..b6807e16eea9 100644 --- a/drivers/net/ethernet/intel/igc/igc_i225.c +++ b/drivers/net/ethernet/intel/igc/igc_i225.c @@ -636,7 +636,7 @@ s32 igc_set_ltr_i225(struct igc_hw *hw, bool link) ltrv = rd32(IGC_LTRMAXV); if (ltr_max != (ltrv & IGC_LTRMAXV_LTRV_MASK)) { ltrv = IGC_LTRMAXV_LSNP_REQ | ltr_max | - (scale_min << IGC_LTRMAXV_SCALE_SHIFT); + (scale_max << IGC_LTRMAXV_SCALE_SHIFT); wr32(IGC_LTRMAXV, ltrv); } } From 271225fd57c2f1e0b3f8826df51be6c634affefe Mon Sep 17 00:00:00 2001 From: Robert Schlabbach Date: Tue, 26 Oct 2021 02:24:48 +0200 Subject: [PATCH 545/868] ixgbe: Document how to enable NBASE-T support Commit a296d665eae1 ("ixgbe: Add ethtool support to enable 2.5 and 5.0 Gbps support") introduced suppression of the advertisement of NBASE-T speeds by default, according to Todd Fujinaka to accommodate customers with network switches which could not cope with advertised NBASE-T speeds, as posted in the E1000-devel mailing list: https://sourceforge.net/p/e1000/mailman/message/37106269/ However, the suppression was not documented at all, nor was how to enable NBASE-T support. Properly document the NBASE-T suppression and how to enable NBASE-T support. Fixes: a296d665eae1 ("ixgbe: Add ethtool support to enable 2.5 and 5.0 Gbps support") Reported-by: Robert Schlabbach Signed-off-by: Robert Schlabbach Signed-off-by: Tony Nguyen --- .../device_drivers/ethernet/intel/ixgbe.rst | 16 ++++++++++++++++ drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 4 ++++ 2 files changed, 20 insertions(+) diff --git a/Documentation/networking/device_drivers/ethernet/intel/ixgbe.rst b/Documentation/networking/device_drivers/ethernet/intel/ixgbe.rst index f1d5233e5e51..0a233b17c664 100644 --- a/Documentation/networking/device_drivers/ethernet/intel/ixgbe.rst +++ b/Documentation/networking/device_drivers/ethernet/intel/ixgbe.rst @@ -440,6 +440,22 @@ NOTE: For 82599-based network connections, if you are enabling jumbo frames in a virtual function (VF), jumbo frames must first be enabled in the physical function (PF). The VF MTU setting cannot be larger than the PF MTU. +NBASE-T Support +--------------- +The ixgbe driver supports NBASE-T on some devices. However, the advertisement +of NBASE-T speeds is suppressed by default, to accommodate broken network +switches which cannot cope with advertised NBASE-T speeds. Use the ethtool +command to enable advertising NBASE-T speeds on devices which support it:: + + ethtool -s eth? advertise 0x1800000001028 + +On Linux systems with INTERFACES(5), this can be specified as a pre-up command +in /etc/network/interfaces so that the interface is always brought up with +NBASE-T support, e.g.:: + + iface eth? inet dhcp + pre-up ethtool -s eth? advertise 0x1800000001028 || true + Generic Receive Offload, aka GRO -------------------------------- The driver supports the in-kernel software implementation of GRO. GRO has diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 0f9f022260d7..45e2ec4d264d 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -5531,6 +5531,10 @@ static int ixgbe_non_sfp_link_config(struct ixgbe_hw *hw) if (!speed && hw->mac.ops.get_link_capabilities) { ret = hw->mac.ops.get_link_capabilities(hw, &speed, &autoneg); + /* remove NBASE-T speeds from default autonegotiation + * to accommodate broken network switches in the field + * which cannot cope with advertised NBASE-T speeds + */ speed &= ~(IXGBE_LINK_SPEED_5GB_FULL | IXGBE_LINK_SPEED_2_5GB_FULL); } From 1cef171abd39102dcc862c6bfbf7f954f4f1f66f Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Wed, 15 Dec 2021 12:31:51 -0500 Subject: [PATCH 546/868] dm integrity: fix data corruption due to improper use of bvec_kmap_local Commit 25058d1c725c ("dm integrity: use bvec_kmap_local in __journal_read_write") didn't account for __journal_read_write() later adding the biovec's bv_offset. As such using bvec_kmap_local() caused the start of the biovec to be skipped. Trivial test that illustrates data corruption: # integritysetup format /dev/pmem0 # integritysetup open /dev/pmem0 integrityroot # mkfs.xfs /dev/mapper/integrityroot ... bad magic number bad magic number Metadata corruption detected at xfs_sb block 0x0/0x1000 libxfs_writebufr: write verifer failed on xfs_sb bno 0x0/0x1000 releasing dirty buffer (bulk) to free list! Fix this by using kmap_local_page() instead of bvec_kmap_local() in __journal_read_write(). Fixes: 25058d1c725c ("dm integrity: use bvec_kmap_local in __journal_read_write") Reported-by: Tony Asleson Reviewed-by: Heinz Mauelshagen Signed-off-by: Mike Snitzer --- drivers/md/dm-integrity.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index 6319deccbe09..7af242de3202 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -1963,7 +1963,7 @@ static bool __journal_read_write(struct dm_integrity_io *dio, struct bio *bio, n_sectors -= bv.bv_len >> SECTOR_SHIFT; bio_advance_iter(bio, &bio->bi_iter, bv.bv_len); retry_kmap: - mem = bvec_kmap_local(&bv); + mem = kmap_local_page(bv.bv_page); if (likely(dio->op == REQ_OP_WRITE)) flush_dcache_page(bv.bv_page); From bf0a375055bd1afbbf02a0ef45f7655da7b71317 Mon Sep 17 00:00:00 2001 From: Cyril Novikov Date: Mon, 1 Nov 2021 18:39:36 -0700 Subject: [PATCH 547/868] ixgbe: set X550 MDIO speed before talking to PHY The MDIO bus speed must be initialized before talking to the PHY the first time in order to avoid talking to it using a speed that the PHY doesn't support. This fixes HW initialization error -17 (IXGBE_ERR_PHY_ADDR_INVALID) on Denverton CPUs (a.k.a. the Atom C3000 family) on ports with a 10Gb network plugged in. On those devices, HLREG0[MDCSPD] resets to 1, which combined with the 10Gb network results in a 24MHz MDIO speed, which is apparently too fast for the connected PHY. PHY register reads over MDIO bus return garbage, leading to initialization failure. Reproduced with Linux kernel 4.19 and 5.15-rc7. Can be reproduced using the following setup: * Use an Atom C3000 family system with at least one X552 LAN on the SoC * Disable PXE or other BIOS network initialization if possible (the interface must not be initialized before Linux boots) * Connect a live 10Gb Ethernet cable to an X550 port * Power cycle (not reset, doesn't always work) the system and boot Linux * Observe: ixgbe interfaces w/ 10GbE cables plugged in fail with error -17 Fixes: e84db7272798 ("ixgbe: Introduce function to control MDIO speed") Signed-off-by: Cyril Novikov Reviewed-by: Andrew Lunn Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c index 9724ffb16518..e4b50c7781ff 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c @@ -3405,6 +3405,9 @@ static s32 ixgbe_reset_hw_X550em(struct ixgbe_hw *hw) /* flush pending Tx transactions */ ixgbe_clear_tx_pending(hw); + /* set MDIO speed before talking to the PHY in case it's the 1st time */ + ixgbe_set_mdio_speed(hw); + /* PHY ops must be identified and initialized prior to reset */ status = hw->phy.ops.init(hw); if (status == IXGBE_ERR_SFP_NOT_SUPPORTED || From 1ee33b1ca2b8dabfcc17198ffd049a6b55674a86 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Wed, 15 Dec 2021 20:52:40 +0900 Subject: [PATCH 548/868] tty: n_hdlc: make n_hdlc_tty_wakeup() asynchronous syzbot is reporting that an unprivileged user who logged in from tty console can crash the system using a reproducer shown below [1], for n_hdlc_tty_wakeup() is synchronously calling n_hdlc_send_frames(). ---------- #include #include int main(int argc, char *argv[]) { const int disc = 0xd; ioctl(1, TIOCSETD, &disc); while (1) { ioctl(1, TCXONC, 0); write(1, "", 1); ioctl(1, TCXONC, 1); /* Kernel panic - not syncing: scheduling while atomic */ } } ---------- Linus suspected that "struct tty_ldisc"->ops->write_wakeup() must not sleep, and Jiri confirmed it from include/linux/tty_ldisc.h. Thus, defer n_hdlc_send_frames() from n_hdlc_tty_wakeup() to a WQ context like net/nfc/nci/uart.c does. Link: https://syzkaller.appspot.com/bug?extid=5f47a8cea6a12b77a876 [1] Reported-by: syzbot Cc: stable Analyzed-by: Fabio M. De Francesco Suggested-by: Linus Torvalds Confirmed-by: Jiri Slaby Reviewed-by: Fabio M. De Francesco Signed-off-by: Tetsuo Handa Link: https://lore.kernel.org/r/40de8b7e-a3be-4486-4e33-1b1d1da452f8@i-love.sakura.ne.jp Signed-off-by: Greg Kroah-Hartman --- drivers/tty/n_hdlc.c | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/drivers/tty/n_hdlc.c b/drivers/tty/n_hdlc.c index 7e0884ecc74f..23ba1fc99df8 100644 --- a/drivers/tty/n_hdlc.c +++ b/drivers/tty/n_hdlc.c @@ -140,6 +140,8 @@ struct n_hdlc { struct n_hdlc_buf_list rx_buf_list; struct n_hdlc_buf_list tx_free_buf_list; struct n_hdlc_buf_list rx_free_buf_list; + struct work_struct write_work; + struct tty_struct *tty_for_write_work; }; /* @@ -154,6 +156,7 @@ static struct n_hdlc_buf *n_hdlc_buf_get(struct n_hdlc_buf_list *list); /* Local functions */ static struct n_hdlc *n_hdlc_alloc(void); +static void n_hdlc_tty_write_work(struct work_struct *work); /* max frame size for memory allocations */ static int maxframe = 4096; @@ -210,6 +213,8 @@ static void n_hdlc_tty_close(struct tty_struct *tty) wake_up_interruptible(&tty->read_wait); wake_up_interruptible(&tty->write_wait); + cancel_work_sync(&n_hdlc->write_work); + n_hdlc_free_buf_list(&n_hdlc->rx_free_buf_list); n_hdlc_free_buf_list(&n_hdlc->tx_free_buf_list); n_hdlc_free_buf_list(&n_hdlc->rx_buf_list); @@ -241,6 +246,8 @@ static int n_hdlc_tty_open(struct tty_struct *tty) return -ENFILE; } + INIT_WORK(&n_hdlc->write_work, n_hdlc_tty_write_work); + n_hdlc->tty_for_write_work = tty; tty->disc_data = n_hdlc; tty->receive_room = 65536; @@ -334,6 +341,20 @@ check_again: goto check_again; } /* end of n_hdlc_send_frames() */ +/** + * n_hdlc_tty_write_work - Asynchronous callback for transmit wakeup + * @work: pointer to work_struct + * + * Called when low level device driver can accept more send data. + */ +static void n_hdlc_tty_write_work(struct work_struct *work) +{ + struct n_hdlc *n_hdlc = container_of(work, struct n_hdlc, write_work); + struct tty_struct *tty = n_hdlc->tty_for_write_work; + + n_hdlc_send_frames(n_hdlc, tty); +} /* end of n_hdlc_tty_write_work() */ + /** * n_hdlc_tty_wakeup - Callback for transmit wakeup * @tty: pointer to associated tty instance data @@ -344,7 +365,7 @@ static void n_hdlc_tty_wakeup(struct tty_struct *tty) { struct n_hdlc *n_hdlc = tty->disc_data; - n_hdlc_send_frames(n_hdlc, tty); + schedule_work(&n_hdlc->write_work); } /* end of n_hdlc_tty_wakeup() */ /** From 6c33ff728812aa18792afffaf2c9873b898e7512 Mon Sep 17 00:00:00 2001 From: "Ji-Ze Hong (Peter Hong)" Date: Wed, 15 Dec 2021 15:58:35 +0800 Subject: [PATCH 549/868] serial: 8250_fintek: Fix garbled text for console Commit fab8a02b73eb ("serial: 8250_fintek: Enable high speed mode on Fintek F81866") introduced support to use high baudrate with Fintek SuperIO UARTs. It'll change clocksources when the UART probed. But when user add kernel parameter "console=ttyS0,115200 console=tty0" to make the UART as console output, the console will output garbled text after the following kernel message. [ 3.681188] Serial: 8250/16550 driver, 32 ports, IRQ sharing enabled The issue is occurs in following step: probe_setup_port() -> fintek_8250_goto_highspeed() It change clocksource from 115200 to 921600 with wrong time, it should change clocksource in set_termios() not in probed. The following 3 patches are implemented change clocksource in fintek_8250_set_termios(). Commit 58178914ae5b ("serial: 8250_fintek: UART dynamic clocksource on Fintek F81216H") Commit 195638b6d44f ("serial: 8250_fintek: UART dynamic clocksource on Fintek F81866") Commit 423d9118c624 ("serial: 8250_fintek: Add F81966 Support") Due to the high baud rate had implemented above 3 patches and the patch Commit fab8a02b73eb ("serial: 8250_fintek: Enable high speed mode on Fintek F81866") is bugged, So this patch will remove it. Fixes: fab8a02b73eb ("serial: 8250_fintek: Enable high speed mode on Fintek F81866") Signed-off-by: Ji-Ze Hong (Peter Hong) Link: https://lore.kernel.org/r/20211215075835.2072-1-hpeter+linux_kernel@gmail.com Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_fintek.c | 20 -------------------- 1 file changed, 20 deletions(-) diff --git a/drivers/tty/serial/8250/8250_fintek.c b/drivers/tty/serial/8250/8250_fintek.c index 31c9e83ea3cb..251f0018ae8c 100644 --- a/drivers/tty/serial/8250/8250_fintek.c +++ b/drivers/tty/serial/8250/8250_fintek.c @@ -290,25 +290,6 @@ static void fintek_8250_set_max_fifo(struct fintek_8250 *pdata) } } -static void fintek_8250_goto_highspeed(struct uart_8250_port *uart, - struct fintek_8250 *pdata) -{ - sio_write_reg(pdata, LDN, pdata->index); - - switch (pdata->pid) { - case CHIP_ID_F81966: - case CHIP_ID_F81866: /* set uart clock for high speed serial mode */ - sio_write_mask_reg(pdata, F81866_UART_CLK, - F81866_UART_CLK_MASK, - F81866_UART_CLK_14_769MHZ); - - uart->port.uartclk = 921600 * 16; - break; - default: /* leave clock speed untouched */ - break; - } -} - static void fintek_8250_set_termios(struct uart_port *port, struct ktermios *termios, struct ktermios *old) @@ -430,7 +411,6 @@ static int probe_setup_port(struct fintek_8250 *pdata, fintek_8250_set_irq_mode(pdata, level_mode); fintek_8250_set_max_fifo(pdata); - fintek_8250_goto_highspeed(uart, pdata); fintek_8250_exit_key(addr[i]); From f886d4fbb7c97b8f5f447c92d2dab99c841803c0 Mon Sep 17 00:00:00 2001 From: Nehal Bakulchandra Shah Date: Wed, 15 Dec 2021 15:02:16 +0530 Subject: [PATCH 550/868] usb: xhci: Extend support for runtime power management for AMD's Yellow carp. AMD's Yellow Carp platform has few more XHCI controllers, enable the runtime power management support for the same. Signed-off-by: Nehal Bakulchandra Shah Cc: stable Link: https://lore.kernel.org/r/20211215093216.1839065-1-Nehal-Bakulchandra.shah@amd.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-pci.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index 92adf6107864..3af017883231 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -71,6 +71,8 @@ #define PCI_DEVICE_ID_AMD_YELLOW_CARP_XHCI_4 0x161e #define PCI_DEVICE_ID_AMD_YELLOW_CARP_XHCI_5 0x15d6 #define PCI_DEVICE_ID_AMD_YELLOW_CARP_XHCI_6 0x15d7 +#define PCI_DEVICE_ID_AMD_YELLOW_CARP_XHCI_7 0x161c +#define PCI_DEVICE_ID_AMD_YELLOW_CARP_XHCI_8 0x161f #define PCI_DEVICE_ID_ASMEDIA_1042_XHCI 0x1042 #define PCI_DEVICE_ID_ASMEDIA_1042A_XHCI 0x1142 @@ -330,7 +332,9 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) pdev->device == PCI_DEVICE_ID_AMD_YELLOW_CARP_XHCI_3 || pdev->device == PCI_DEVICE_ID_AMD_YELLOW_CARP_XHCI_4 || pdev->device == PCI_DEVICE_ID_AMD_YELLOW_CARP_XHCI_5 || - pdev->device == PCI_DEVICE_ID_AMD_YELLOW_CARP_XHCI_6)) + pdev->device == PCI_DEVICE_ID_AMD_YELLOW_CARP_XHCI_6 || + pdev->device == PCI_DEVICE_ID_AMD_YELLOW_CARP_XHCI_7 || + pdev->device == PCI_DEVICE_ID_AMD_YELLOW_CARP_XHCI_8)) xhci->quirks |= XHCI_DEFAULT_PM_RUNTIME_ALLOW; if (xhci->quirks & XHCI_RESET_ON_RESUME) From 0ad3bd562bb91853b9f42bda145b5db6255aee90 Mon Sep 17 00:00:00 2001 From: Jimmy Wang Date: Tue, 14 Dec 2021 09:26:50 +0800 Subject: [PATCH 551/868] USB: NO_LPM quirk Lenovo USB-C to Ethernet Adapher(RTL8153-04) This device doesn't work well with LPM, losing connectivity intermittently. Disable LPM to resolve the issue. Reviewed-by: Signed-off-by: Jimmy Wang Cc: stable Link: https://lore.kernel.org/r/20211214012652.4898-1-wangjm221@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/quirks.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c index 019351c0b52c..d3c14b5ed4a1 100644 --- a/drivers/usb/core/quirks.c +++ b/drivers/usb/core/quirks.c @@ -434,6 +434,9 @@ static const struct usb_device_id usb_quirk_list[] = { { USB_DEVICE(0x1532, 0x0116), .driver_info = USB_QUIRK_LINEAR_UFRAME_INTR_BINTERVAL }, + /* Lenovo USB-C to Ethernet Adapter RTL8153-04 */ + { USB_DEVICE(0x17ef, 0x720c), .driver_info = USB_QUIRK_NO_LPM }, + /* Lenovo Powered USB-C Travel Hub (4X90S92381, RTL8153 GigE) */ { USB_DEVICE(0x17ef, 0x721e), .driver_info = USB_QUIRK_NO_LPM }, From 4c4e162d9cf38528c4f13df09d5755cbc06f6c77 Mon Sep 17 00:00:00 2001 From: Pawel Laszczak Date: Tue, 14 Dec 2021 05:55:27 +0100 Subject: [PATCH 552/868] usb: cdnsp: Fix lack of spin_lock_irqsave/spin_lock_restore Patch puts content of cdnsp_gadget_pullup function inside spin_lock_irqsave and spin_lock_restore section. This construction is required here to keep the data consistency, otherwise some data can be changed e.g. from interrupt context. Fixes: 3d82904559f4 ("usb: cdnsp: cdns3 Add main part of Cadence USBSSP DRD Driver") Reported-by: Ken (Jian) He cc: Signed-off-by: Pawel Laszczak -- Changelog: v2: - added disable_irq/enable_irq as sugester by Peter Chen drivers/usb/cdns3/cdnsp-gadget.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) Reviewed-by: Peter Chen Link: https://lore.kernel.org/r/20211214045527.26823-1-pawell@gli-login.cadence.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/cdns3/cdnsp-gadget.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/usb/cdns3/cdnsp-gadget.c b/drivers/usb/cdns3/cdnsp-gadget.c index 27df0c697897..e85bf768c66d 100644 --- a/drivers/usb/cdns3/cdnsp-gadget.c +++ b/drivers/usb/cdns3/cdnsp-gadget.c @@ -1541,15 +1541,27 @@ static int cdnsp_gadget_pullup(struct usb_gadget *gadget, int is_on) { struct cdnsp_device *pdev = gadget_to_cdnsp(gadget); struct cdns *cdns = dev_get_drvdata(pdev->dev); + unsigned long flags; trace_cdnsp_pullup(is_on); + /* + * Disable events handling while controller is being + * enabled/disabled. + */ + disable_irq(cdns->dev_irq); + spin_lock_irqsave(&pdev->lock, flags); + if (!is_on) { cdnsp_reset_device(pdev); cdns_clear_vbus(cdns); } else { cdns_set_vbus(cdns); } + + spin_unlock_irqrestore(&pdev->lock, flags); + enable_irq(cdns->dev_irq); + return 0; } From 0f7d9b31ce7abdbb29bf018131ac920c9f698518 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 13 Dec 2021 05:45:44 -0800 Subject: [PATCH 553/868] netfilter: nf_tables: fix use-after-free in nft_set_catchall_destroy() We need to use list_for_each_entry_safe() iterator because we can not access @catchall after kfree_rcu() call. syzbot reported: BUG: KASAN: use-after-free in nft_set_catchall_destroy net/netfilter/nf_tables_api.c:4486 [inline] BUG: KASAN: use-after-free in nft_set_destroy net/netfilter/nf_tables_api.c:4504 [inline] BUG: KASAN: use-after-free in nft_set_destroy+0x3fd/0x4f0 net/netfilter/nf_tables_api.c:4493 Read of size 8 at addr ffff8880716e5b80 by task syz-executor.3/8871 CPU: 1 PID: 8871 Comm: syz-executor.3 Not tainted 5.16.0-rc5-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0xcd/0x134 lib/dump_stack.c:106 print_address_description.constprop.0.cold+0x8d/0x2ed mm/kasan/report.c:247 __kasan_report mm/kasan/report.c:433 [inline] kasan_report.cold+0x83/0xdf mm/kasan/report.c:450 nft_set_catchall_destroy net/netfilter/nf_tables_api.c:4486 [inline] nft_set_destroy net/netfilter/nf_tables_api.c:4504 [inline] nft_set_destroy+0x3fd/0x4f0 net/netfilter/nf_tables_api.c:4493 __nft_release_table+0x79f/0xcd0 net/netfilter/nf_tables_api.c:9626 nft_rcv_nl_event+0x4f8/0x670 net/netfilter/nf_tables_api.c:9688 notifier_call_chain+0xb5/0x200 kernel/notifier.c:83 blocking_notifier_call_chain kernel/notifier.c:318 [inline] blocking_notifier_call_chain+0x67/0x90 kernel/notifier.c:306 netlink_release+0xcb6/0x1dd0 net/netlink/af_netlink.c:788 __sock_release+0xcd/0x280 net/socket.c:649 sock_close+0x18/0x20 net/socket.c:1314 __fput+0x286/0x9f0 fs/file_table.c:280 task_work_run+0xdd/0x1a0 kernel/task_work.c:164 tracehook_notify_resume include/linux/tracehook.h:189 [inline] exit_to_user_mode_loop kernel/entry/common.c:175 [inline] exit_to_user_mode_prepare+0x27e/0x290 kernel/entry/common.c:207 __syscall_exit_to_user_mode_work kernel/entry/common.c:289 [inline] syscall_exit_to_user_mode+0x19/0x60 kernel/entry/common.c:300 do_syscall_64+0x42/0xb0 arch/x86/entry/common.c:86 entry_SYSCALL_64_after_hwframe+0x44/0xae RIP: 0033:0x7f75fbf28adb Code: 0f 05 48 3d 00 f0 ff ff 77 45 c3 0f 1f 40 00 48 83 ec 18 89 7c 24 0c e8 63 fc ff ff 8b 7c 24 0c 41 89 c0 b8 03 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 35 44 89 c7 89 44 24 0c e8 a1 fc ff ff 8b 44 RSP: 002b:00007ffd8da7ec10 EFLAGS: 00000293 ORIG_RAX: 0000000000000003 RAX: 0000000000000000 RBX: 0000000000000004 RCX: 00007f75fbf28adb RDX: 00007f75fc08e828 RSI: ffffffffffffffff RDI: 0000000000000003 RBP: 00007f75fc08a960 R08: 0000000000000000 R09: 00007f75fc08e830 R10: 00007ffd8da7ed10 R11: 0000000000000293 R12: 00000000002067c3 R13: 00007ffd8da7ed10 R14: 00007f75fc088f60 R15: 0000000000000032 Allocated by task 8886: kasan_save_stack+0x1e/0x50 mm/kasan/common.c:38 kasan_set_track mm/kasan/common.c:46 [inline] set_alloc_info mm/kasan/common.c:434 [inline] ____kasan_kmalloc mm/kasan/common.c:513 [inline] ____kasan_kmalloc mm/kasan/common.c:472 [inline] __kasan_kmalloc+0xa6/0xd0 mm/kasan/common.c:522 kasan_kmalloc include/linux/kasan.h:269 [inline] kmem_cache_alloc_trace+0x1ea/0x4a0 mm/slab.c:3575 kmalloc include/linux/slab.h:590 [inline] nft_setelem_catchall_insert net/netfilter/nf_tables_api.c:5544 [inline] nft_setelem_insert net/netfilter/nf_tables_api.c:5562 [inline] nft_add_set_elem+0x232e/0x2f40 net/netfilter/nf_tables_api.c:5936 nf_tables_newsetelem+0x6ff/0xbb0 net/netfilter/nf_tables_api.c:6032 nfnetlink_rcv_batch+0x1710/0x25f0 net/netfilter/nfnetlink.c:513 nfnetlink_rcv_skb_batch net/netfilter/nfnetlink.c:634 [inline] nfnetlink_rcv+0x3af/0x420 net/netfilter/nfnetlink.c:652 netlink_unicast_kernel net/netlink/af_netlink.c:1319 [inline] netlink_unicast+0x533/0x7d0 net/netlink/af_netlink.c:1345 netlink_sendmsg+0x904/0xdf0 net/netlink/af_netlink.c:1921 sock_sendmsg_nosec net/socket.c:704 [inline] sock_sendmsg+0xcf/0x120 net/socket.c:724 ____sys_sendmsg+0x6e8/0x810 net/socket.c:2409 ___sys_sendmsg+0xf3/0x170 net/socket.c:2463 __sys_sendmsg+0xe5/0x1b0 net/socket.c:2492 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x44/0xae Freed by task 15335: kasan_save_stack+0x1e/0x50 mm/kasan/common.c:38 kasan_set_track+0x21/0x30 mm/kasan/common.c:46 kasan_set_free_info+0x20/0x30 mm/kasan/generic.c:370 ____kasan_slab_free mm/kasan/common.c:366 [inline] ____kasan_slab_free mm/kasan/common.c:328 [inline] __kasan_slab_free+0xd1/0x110 mm/kasan/common.c:374 kasan_slab_free include/linux/kasan.h:235 [inline] __cache_free mm/slab.c:3445 [inline] kmem_cache_free_bulk+0x67/0x1e0 mm/slab.c:3766 kfree_bulk include/linux/slab.h:446 [inline] kfree_rcu_work+0x51c/0xa10 kernel/rcu/tree.c:3273 process_one_work+0x9b2/0x1690 kernel/workqueue.c:2298 worker_thread+0x658/0x11f0 kernel/workqueue.c:2445 kthread+0x405/0x4f0 kernel/kthread.c:327 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:295 Last potentially related work creation: kasan_save_stack+0x1e/0x50 mm/kasan/common.c:38 __kasan_record_aux_stack+0xb5/0xe0 mm/kasan/generic.c:348 kvfree_call_rcu+0x74/0x990 kernel/rcu/tree.c:3550 nft_set_catchall_destroy net/netfilter/nf_tables_api.c:4489 [inline] nft_set_destroy net/netfilter/nf_tables_api.c:4504 [inline] nft_set_destroy+0x34a/0x4f0 net/netfilter/nf_tables_api.c:4493 __nft_release_table+0x79f/0xcd0 net/netfilter/nf_tables_api.c:9626 nft_rcv_nl_event+0x4f8/0x670 net/netfilter/nf_tables_api.c:9688 notifier_call_chain+0xb5/0x200 kernel/notifier.c:83 blocking_notifier_call_chain kernel/notifier.c:318 [inline] blocking_notifier_call_chain+0x67/0x90 kernel/notifier.c:306 netlink_release+0xcb6/0x1dd0 net/netlink/af_netlink.c:788 __sock_release+0xcd/0x280 net/socket.c:649 sock_close+0x18/0x20 net/socket.c:1314 __fput+0x286/0x9f0 fs/file_table.c:280 task_work_run+0xdd/0x1a0 kernel/task_work.c:164 tracehook_notify_resume include/linux/tracehook.h:189 [inline] exit_to_user_mode_loop kernel/entry/common.c:175 [inline] exit_to_user_mode_prepare+0x27e/0x290 kernel/entry/common.c:207 __syscall_exit_to_user_mode_work kernel/entry/common.c:289 [inline] syscall_exit_to_user_mode+0x19/0x60 kernel/entry/common.c:300 do_syscall_64+0x42/0xb0 arch/x86/entry/common.c:86 entry_SYSCALL_64_after_hwframe+0x44/0xae The buggy address belongs to the object at ffff8880716e5b80 which belongs to the cache kmalloc-64 of size 64 The buggy address is located 0 bytes inside of 64-byte region [ffff8880716e5b80, ffff8880716e5bc0) The buggy address belongs to the page: page:ffffea0001c5b940 refcount:1 mapcount:0 mapping:0000000000000000 index:0xffff8880716e5c00 pfn:0x716e5 flags: 0xfff00000000200(slab|node=0|zone=1|lastcpupid=0x7ff) raw: 00fff00000000200 ffffea0000911848 ffffea00007c4d48 ffff888010c40200 raw: ffff8880716e5c00 ffff8880716e5000 000000010000001e 0000000000000000 page dumped because: kasan: bad access detected page_owner tracks the page as allocated page last allocated via order 0, migratetype Unmovable, gfp_mask 0x242040(__GFP_IO|__GFP_NOWARN|__GFP_COMP|__GFP_THISNODE), pid 3638, ts 211086074437, free_ts 211031029429 prep_new_page mm/page_alloc.c:2418 [inline] get_page_from_freelist+0xa72/0x2f50 mm/page_alloc.c:4149 __alloc_pages+0x1b2/0x500 mm/page_alloc.c:5369 __alloc_pages_node include/linux/gfp.h:570 [inline] kmem_getpages mm/slab.c:1377 [inline] cache_grow_begin+0x75/0x470 mm/slab.c:2593 cache_alloc_refill+0x27f/0x380 mm/slab.c:2965 ____cache_alloc mm/slab.c:3048 [inline] ____cache_alloc mm/slab.c:3031 [inline] __do_cache_alloc mm/slab.c:3275 [inline] slab_alloc mm/slab.c:3316 [inline] __do_kmalloc mm/slab.c:3700 [inline] __kmalloc+0x3b3/0x4d0 mm/slab.c:3711 kmalloc include/linux/slab.h:595 [inline] kzalloc include/linux/slab.h:724 [inline] tomoyo_get_name+0x234/0x480 security/tomoyo/memory.c:173 tomoyo_parse_name_union+0xbc/0x160 security/tomoyo/util.c:260 tomoyo_update_path_number_acl security/tomoyo/file.c:687 [inline] tomoyo_write_file+0x629/0x7f0 security/tomoyo/file.c:1034 tomoyo_write_domain2+0x116/0x1d0 security/tomoyo/common.c:1152 tomoyo_add_entry security/tomoyo/common.c:2042 [inline] tomoyo_supervisor+0xbc7/0xf00 security/tomoyo/common.c:2103 tomoyo_audit_path_number_log security/tomoyo/file.c:235 [inline] tomoyo_path_number_perm+0x419/0x590 security/tomoyo/file.c:734 security_file_ioctl+0x50/0xb0 security/security.c:1541 __do_sys_ioctl fs/ioctl.c:868 [inline] __se_sys_ioctl fs/ioctl.c:860 [inline] __x64_sys_ioctl+0xb3/0x200 fs/ioctl.c:860 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x44/0xae page last free stack trace: reset_page_owner include/linux/page_owner.h:24 [inline] free_pages_prepare mm/page_alloc.c:1338 [inline] free_pcp_prepare+0x374/0x870 mm/page_alloc.c:1389 free_unref_page_prepare mm/page_alloc.c:3309 [inline] free_unref_page+0x19/0x690 mm/page_alloc.c:3388 slab_destroy mm/slab.c:1627 [inline] slabs_destroy+0x89/0xc0 mm/slab.c:1647 cache_flusharray mm/slab.c:3418 [inline] ___cache_free+0x4cc/0x610 mm/slab.c:3480 qlink_free mm/kasan/quarantine.c:146 [inline] qlist_free_all+0x4e/0x110 mm/kasan/quarantine.c:165 kasan_quarantine_reduce+0x180/0x200 mm/kasan/quarantine.c:272 __kasan_slab_alloc+0x97/0xb0 mm/kasan/common.c:444 kasan_slab_alloc include/linux/kasan.h:259 [inline] slab_post_alloc_hook mm/slab.h:519 [inline] slab_alloc_node mm/slab.c:3261 [inline] kmem_cache_alloc_node+0x2ea/0x590 mm/slab.c:3599 __alloc_skb+0x215/0x340 net/core/skbuff.c:414 alloc_skb include/linux/skbuff.h:1126 [inline] nlmsg_new include/net/netlink.h:953 [inline] rtmsg_ifinfo_build_skb+0x72/0x1a0 net/core/rtnetlink.c:3808 rtmsg_ifinfo_event net/core/rtnetlink.c:3844 [inline] rtmsg_ifinfo_event net/core/rtnetlink.c:3835 [inline] rtmsg_ifinfo+0x83/0x120 net/core/rtnetlink.c:3853 netdev_state_change net/core/dev.c:1395 [inline] netdev_state_change+0x114/0x130 net/core/dev.c:1386 linkwatch_do_dev+0x10e/0x150 net/core/link_watch.c:167 __linkwatch_run_queue+0x233/0x6a0 net/core/link_watch.c:213 linkwatch_event+0x4a/0x60 net/core/link_watch.c:252 process_one_work+0x9b2/0x1690 kernel/workqueue.c:2298 Memory state around the buggy address: ffff8880716e5a80: fb fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc ffff8880716e5b00: 00 00 00 00 00 00 fc fc fc fc fc fc fc fc fc fc >ffff8880716e5b80: fa fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc ^ ffff8880716e5c00: fb fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc ffff8880716e5c80: 00 00 00 00 00 00 00 00 fc fc fc fc fc fc fc fc Fixes: aaa31047a6d2 ("netfilter: nftables: add catch-all set element support") Signed-off-by: Eric Dumazet Reported-by: syzbot Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index c0851fec11d4..c20772822637 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -4481,9 +4481,9 @@ struct nft_set_elem_catchall { static void nft_set_catchall_destroy(const struct nft_ctx *ctx, struct nft_set *set) { - struct nft_set_elem_catchall *catchall; + struct nft_set_elem_catchall *next, *catchall; - list_for_each_entry_rcu(catchall, &set->catchall_list, list) { + list_for_each_entry_safe(catchall, next, &set->catchall_list, list) { list_del_rcu(&catchall->list); nft_set_elem_destroy(set, catchall->elem, true); kfree_rcu(catchall); From ca4d8344a72b91fb9d4c8bfbc22204b4c09c5d8f Mon Sep 17 00:00:00 2001 From: Xu Yang Date: Thu, 9 Dec 2021 18:15:07 +0800 Subject: [PATCH 554/868] usb: typec: tcpm: fix tcpm unregister port but leave a pending timer In current design, when the tcpm port is unregisterd, the kthread_worker will be destroyed in the last step. Inside the kthread_destroy_worker(), the worker will flush all the works and wait for them to end. However, if one of the works calls hrtimer_start(), this hrtimer will be pending until timeout even though tcpm port is removed. Once the hrtimer timeout, many strange kernel dumps appear. Thus, we can first complete kthread_destroy_worker(), then cancel all the hrtimers. This will guarantee that no hrtimer is pending at the end. Fixes: 3ed8e1c2ac99 ("usb: typec: tcpm: Migrate workqueue to RT priority for processing events") cc: Reviewed-by: Guenter Roeck Acked-by: Heikki Krogerus Signed-off-by: Xu Yang Link: https://lore.kernel.org/r/20211209101507.499096-1-xu.yang_2@nxp.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/tcpm/tcpm.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c index 6010b9901126..59d4fa2443f2 100644 --- a/drivers/usb/typec/tcpm/tcpm.c +++ b/drivers/usb/typec/tcpm/tcpm.c @@ -324,6 +324,7 @@ struct tcpm_port { bool attached; bool connected; + bool registered; bool pd_supported; enum typec_port_type port_type; @@ -6291,7 +6292,8 @@ static enum hrtimer_restart state_machine_timer_handler(struct hrtimer *timer) { struct tcpm_port *port = container_of(timer, struct tcpm_port, state_machine_timer); - kthread_queue_work(port->wq, &port->state_machine); + if (port->registered) + kthread_queue_work(port->wq, &port->state_machine); return HRTIMER_NORESTART; } @@ -6299,7 +6301,8 @@ static enum hrtimer_restart vdm_state_machine_timer_handler(struct hrtimer *time { struct tcpm_port *port = container_of(timer, struct tcpm_port, vdm_state_machine_timer); - kthread_queue_work(port->wq, &port->vdm_state_machine); + if (port->registered) + kthread_queue_work(port->wq, &port->vdm_state_machine); return HRTIMER_NORESTART; } @@ -6307,7 +6310,8 @@ static enum hrtimer_restart enable_frs_timer_handler(struct hrtimer *timer) { struct tcpm_port *port = container_of(timer, struct tcpm_port, enable_frs_timer); - kthread_queue_work(port->wq, &port->enable_frs); + if (port->registered) + kthread_queue_work(port->wq, &port->enable_frs); return HRTIMER_NORESTART; } @@ -6315,7 +6319,8 @@ static enum hrtimer_restart send_discover_timer_handler(struct hrtimer *timer) { struct tcpm_port *port = container_of(timer, struct tcpm_port, send_discover_timer); - kthread_queue_work(port->wq, &port->send_discover_work); + if (port->registered) + kthread_queue_work(port->wq, &port->send_discover_work); return HRTIMER_NORESTART; } @@ -6403,6 +6408,7 @@ struct tcpm_port *tcpm_register_port(struct device *dev, struct tcpc_dev *tcpc) typec_port_register_altmodes(port->typec_port, &tcpm_altmode_ops, port, port->port_altmode, ALTMODE_DISCOVERY_MAX); + port->registered = true; mutex_lock(&port->lock); tcpm_init(port); @@ -6424,6 +6430,9 @@ void tcpm_unregister_port(struct tcpm_port *port) { int i; + port->registered = false; + kthread_destroy_worker(port->wq); + hrtimer_cancel(&port->send_discover_timer); hrtimer_cancel(&port->enable_frs_timer); hrtimer_cancel(&port->vdm_state_machine_timer); @@ -6435,7 +6444,6 @@ void tcpm_unregister_port(struct tcpm_port *port) typec_unregister_port(port->typec_port); usb_role_switch_put(port->role_sw); tcpm_debugfs_exit(port); - kthread_destroy_worker(port->wq); } EXPORT_SYMBOL_GPL(tcpm_unregister_port); From ebb966d3bdfed581ecccbb4a7432341baf7619b4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ignacy=20Gaw=C4=99dzki?= Date: Fri, 10 Dec 2021 16:31:27 +0100 Subject: [PATCH 555/868] netfilter: fix regression in looped (broad|multi)cast's MAC handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In commit 5648b5e1169f ("netfilter: nfnetlink_queue: fix OOB when mac header was cleared"), the test for non-empty MAC header introduced in commit 2c38de4c1f8da7 ("netfilter: fix looped (broad|multi)cast's MAC handling") has been replaced with a test for a set MAC header. This breaks the case when the MAC header has been reset (using skb_reset_mac_header), as is the case with looped-back multicast packets. As a result, the packets ending up in NFQUEUE get a bogus hwaddr interpreted from the first bytes of the IP header. This patch adds a test for a non-empty MAC header in addition to the test for a set MAC header. The same two tests are also implemented in nfnetlink_log.c, where the initial code of commit 2c38de4c1f8da7 ("netfilter: fix looped (broad|multi)cast's MAC handling") has not been touched, but where supposedly the same situation may happen. Fixes: 5648b5e1169f ("netfilter: nfnetlink_queue: fix OOB when mac header was cleared") Signed-off-by: Ignacy Gawędzki Reviewed-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nfnetlink_log.c | 3 ++- net/netfilter/nfnetlink_queue.c | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index 691ef4cffdd9..7f83f9697fc1 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -556,7 +556,8 @@ __build_packet_message(struct nfnl_log_net *log, goto nla_put_failure; if (indev && skb->dev && - skb->mac_header != skb->network_header) { + skb_mac_header_was_set(skb) && + skb_mac_header_len(skb) != 0) { struct nfulnl_msg_packet_hw phw; int len; diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index 5837e8efc9c2..f0b9e21a2452 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -560,7 +560,8 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, goto nla_put_failure; if (indev && entskb->dev && - skb_mac_header_was_set(entskb)) { + skb_mac_header_was_set(entskb) && + skb_mac_header_len(entskb) != 0) { struct nfqnl_msg_packet_hw phw; int len; From 972ce7e3801e790bb348bfe98be1ab65af15bacd Mon Sep 17 00:00:00 2001 From: Ioana Ciornei Date: Wed, 15 Dec 2021 12:58:31 +0200 Subject: [PATCH 556/868] dpaa2-eth: fix ethtool statistics Unfortunately, with the blamed commit I also added a side effect in the ethtool stats shown. Because I added two more fields in the per channel structure without verifying if its size is used in any way, part of the ethtool statistics were off by 2. Fix this by not looking up the size of the structure but instead on a fixed value kept in a macro. Fixes: fc398bec0387 ("net: dpaa2: add adaptive interrupt coalescing") Signed-off-by: Ioana Ciornei Link: https://lore.kernel.org/r/20211215105831.290070-1-ioana.ciornei@nxp.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h | 2 ++ drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h index 2085844227fe..e54e70ebdd05 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h @@ -388,6 +388,8 @@ struct dpaa2_eth_ch_stats { __u64 bytes_per_cdan; }; +#define DPAA2_ETH_CH_STATS 7 + /* Maximum number of queues associated with a DPNI */ #define DPAA2_ETH_MAX_TCS 8 #define DPAA2_ETH_MAX_RX_QUEUES_PER_TC 16 diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c index adb8ce5306ee..3fdbf87dccb1 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c @@ -278,7 +278,7 @@ static void dpaa2_eth_get_ethtool_stats(struct net_device *net_dev, /* Per-channel stats */ for (k = 0; k < priv->num_channels; k++) { ch_stats = &priv->channel[k]->stats; - for (j = 0; j < sizeof(*ch_stats) / sizeof(__u64) - 1; j++) + for (j = 0; j < DPAA2_ETH_CH_STATS; j++) *((__u64 *)data + i + j) += *((__u64 *)ch_stats + j); } i += j; From 481221775d53d6215a6e5e9ce1cce6d2b4ab9a46 Mon Sep 17 00:00:00 2001 From: Haimin Zhang Date: Wed, 15 Dec 2021 19:15:30 +0800 Subject: [PATCH 557/868] netdevsim: Zero-initialize memory for new map's value in function nsim_bpf_map_alloc Zero-initialize memory for new map's value in function nsim_bpf_map_alloc since it may cause a potential kernel information leak issue, as follows: 1. nsim_bpf_map_alloc calls nsim_map_alloc_elem to allocate elements for a new map. 2. nsim_map_alloc_elem uses kmalloc to allocate map's value, but doesn't zero it. 3. A user application can use IOCTL BPF_MAP_LOOKUP_ELEM to get specific element's information in the map. 4. The kernel function map_lookup_elem will call bpf_map_copy_value to get the information allocated at step-2, then use copy_to_user to copy to the user buffer. This can only leak information for an array map. Fixes: 395cacb5f1a0 ("netdevsim: bpf: support fake map offload") Suggested-by: Jakub Kicinski Acked-by: Jakub Kicinski Signed-off-by: Haimin Zhang Link: https://lore.kernel.org/r/20211215111530.72103-1-tcs.kernel@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/netdevsim/bpf.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/netdevsim/bpf.c b/drivers/net/netdevsim/bpf.c index 90aafb56f140..a43820212932 100644 --- a/drivers/net/netdevsim/bpf.c +++ b/drivers/net/netdevsim/bpf.c @@ -514,6 +514,7 @@ nsim_bpf_map_alloc(struct netdevsim *ns, struct bpf_offloaded_map *offmap) goto err_free; key = nmap->entry[i].key; *key = i; + memset(nmap->entry[i].value, 0, offmap->map.value_size); } } From ec6af094ea28f0f2dda1a6a33b14cd57e36a9755 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Wed, 15 Dec 2021 09:39:37 -0500 Subject: [PATCH 558/868] net/packet: rx_owner_map depends on pg_vec Packet sockets may switch ring versions. Avoid misinterpreting state between versions, whose fields share a union. rx_owner_map is only allocated with a packet ring (pg_vec) and both are swapped together. If pg_vec is NULL, meaning no packet ring was allocated, then neither was rx_owner_map. And the field may be old state from a tpacket_v3. Fixes: 61fad6816fc1 ("net/packet: tpacket_rcv: avoid a producer race condition") Reported-by: Syzbot Signed-off-by: Willem de Bruijn Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20211215143937.106178-1-willemdebruijn.kernel@gmail.com Signed-off-by: Jakub Kicinski --- net/packet/af_packet.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 46943a18a10d..76c2dca7f0a5 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -4492,9 +4492,10 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, } out_free_pg_vec: - bitmap_free(rx_owner_map); - if (pg_vec) + if (pg_vec) { + bitmap_free(rx_owner_map); free_pg_vec(pg_vec, order, req->tp_block_nr); + } out: return err; } From ef8a0f6eab1ca5d1a75c242c5c7b9d386735fa0a Mon Sep 17 00:00:00 2001 From: Greg Jesionowski Date: Tue, 14 Dec 2021 15:10:27 -0700 Subject: [PATCH 559/868] net: usb: lan78xx: add Allied Telesis AT29M2-AF This adds the vendor and product IDs for the AT29M2-AF which is a lan7801-based device. Signed-off-by: Greg Jesionowski Link: https://lore.kernel.org/r/20211214221027.305784-1-jesionowskigreg@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/usb/lan78xx.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index 8cd265fc1fd9..075f8abde5cd 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -76,6 +76,8 @@ #define LAN7801_USB_PRODUCT_ID (0x7801) #define LAN78XX_EEPROM_MAGIC (0x78A5) #define LAN78XX_OTP_MAGIC (0x78F3) +#define AT29M2AF_USB_VENDOR_ID (0x07C9) +#define AT29M2AF_USB_PRODUCT_ID (0x0012) #define MII_READ 1 #define MII_WRITE 0 @@ -4734,6 +4736,10 @@ static const struct usb_device_id products[] = { /* LAN7801 USB Gigabit Ethernet Device */ USB_DEVICE(LAN78XX_USB_VENDOR_ID, LAN7801_USB_PRODUCT_ID), }, + { + /* ATM2-AF USB Gigabit Ethernet Device */ + USB_DEVICE(AT29M2AF_USB_VENDOR_ID, AT29M2AF_USB_PRODUCT_ID), + }, {}, }; MODULE_DEVICE_TABLE(usb, products); From b67210cc217f9ca1c576909454d846970c13dfd4 Mon Sep 17 00:00:00 2001 From: Fabien Dessenne Date: Wed, 15 Dec 2021 10:58:08 +0100 Subject: [PATCH 560/868] pinctrl: stm32: consider the GPIO offset to expose all the GPIO lines Consider the GPIO controller offset (from "gpio-ranges") to compute the maximum GPIO line number. This fixes an issue where gpio-ranges uses a non-null offset. e.g.: gpio-ranges = <&pinctrl 6 86 10> In that case the last valid GPIO line is not 9 but 15 (6 + 10 - 1) Cc: stable@vger.kernel.org Fixes: 67e2996f72c7 ("pinctrl: stm32: fix the reported number of GPIO lines per bank") Reported-by: Christoph Fritz Signed-off-by: Fabien Dessenne Link: https://lore.kernel.org/r/20211215095808.621716-1-fabien.dessenne@foss.st.com Signed-off-by: Linus Walleij --- drivers/pinctrl/stm32/pinctrl-stm32.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/pinctrl/stm32/pinctrl-stm32.c b/drivers/pinctrl/stm32/pinctrl-stm32.c index 24764ebcc936..9ed764731570 100644 --- a/drivers/pinctrl/stm32/pinctrl-stm32.c +++ b/drivers/pinctrl/stm32/pinctrl-stm32.c @@ -1251,10 +1251,10 @@ static int stm32_gpiolib_register_bank(struct stm32_pinctrl *pctl, bank_nr = args.args[1] / STM32_GPIO_PINS_PER_BANK; bank->gpio_chip.base = args.args[1]; - npins = args.args[2]; - while (!of_parse_phandle_with_fixed_args(np, "gpio-ranges", 3, - ++i, &args)) - npins += args.args[2]; + /* get the last defined gpio line (offset + nb of pins) */ + npins = args.args[0] + args.args[2]; + while (!of_parse_phandle_with_fixed_args(np, "gpio-ranges", 3, ++i, &args)) + npins = max(npins, (int)(args.args[0] + args.args[2])); } else { bank_nr = pctl->nbanks; bank->gpio_chip.base = bank_nr * STM32_GPIO_PINS_PER_BANK; From 39e660687ac0c57499134765abbecf71cfd11eae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Haa=C3=9F?= Date: Sun, 12 Dec 2021 09:30:30 -0300 Subject: [PATCH 561/868] ARM: dts: imx6qdl-wandboard: Fix Ethernet support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently, the imx6q-wandboard Ethernet does not transmit any data. This issue has been exposed by commit f5d9aa79dfdf ("ARM: imx6q: remove clk-out fixup for the Atheros AR8031 and AR8035 PHYs"). Fix it by describing the qca,clk-out-frequency property as suggested by the commit above. Fixes: 77591e42458d ("ARM: dts: imx6qdl-wandboard: add ethernet PHY description") Signed-off-by: Martin Haaß Tested-by: Fabio Estevam Signed-off-by: Fabio Estevam Signed-off-by: Shawn Guo --- arch/arm/boot/dts/imx6qdl-wandboard.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/boot/dts/imx6qdl-wandboard.dtsi b/arch/arm/boot/dts/imx6qdl-wandboard.dtsi index b62a0dbb033f..ec6fba5ee8fd 100644 --- a/arch/arm/boot/dts/imx6qdl-wandboard.dtsi +++ b/arch/arm/boot/dts/imx6qdl-wandboard.dtsi @@ -309,6 +309,7 @@ ethphy: ethernet-phy@1 { reg = <1>; + qca,clk-out-frequency = <125000000>; }; }; }; From 0fd08a34e8e3b67ec9bd8287ac0facf8374b844a Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Thu, 16 Dec 2021 08:24:08 +0100 Subject: [PATCH 562/868] xen/blkfront: harden blkfront against event channel storms The Xen blkfront driver is still vulnerable for an attack via excessive number of events sent by the backend. Fix that by using lateeoi event channels. This is part of XSA-391 Signed-off-by: Juergen Gross Reviewed-by: Jan Beulich --- drivers/block/xen-blkfront.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 8e3983e456f3..286cf1afad78 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -1512,9 +1512,12 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) unsigned long flags; struct blkfront_ring_info *rinfo = (struct blkfront_ring_info *)dev_id; struct blkfront_info *info = rinfo->dev_info; + unsigned int eoiflag = XEN_EOI_FLAG_SPURIOUS; - if (unlikely(info->connected != BLKIF_STATE_CONNECTED)) + if (unlikely(info->connected != BLKIF_STATE_CONNECTED)) { + xen_irq_lateeoi(irq, XEN_EOI_FLAG_SPURIOUS); return IRQ_HANDLED; + } spin_lock_irqsave(&rinfo->ring_lock, flags); again: @@ -1530,6 +1533,8 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) unsigned long id; unsigned int op; + eoiflag = 0; + RING_COPY_RESPONSE(&rinfo->ring, i, &bret); id = bret.id; @@ -1646,6 +1651,8 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) spin_unlock_irqrestore(&rinfo->ring_lock, flags); + xen_irq_lateeoi(irq, eoiflag); + return IRQ_HANDLED; err: @@ -1653,6 +1660,8 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) spin_unlock_irqrestore(&rinfo->ring_lock, flags); + /* No EOI in order to avoid further interrupts. */ + pr_alert("%s disabled for further use\n", info->gd->disk_name); return IRQ_HANDLED; } @@ -1692,8 +1701,8 @@ static int setup_blkring(struct xenbus_device *dev, if (err) goto fail; - err = bind_evtchn_to_irqhandler(rinfo->evtchn, blkif_interrupt, 0, - "blkif", rinfo); + err = bind_evtchn_to_irqhandler_lateeoi(rinfo->evtchn, blkif_interrupt, + 0, "blkif", rinfo); if (err <= 0) { xenbus_dev_fatal(dev, err, "bind_evtchn_to_irqhandler failed"); From b27d47950e481f292c0a5ad57357edb9d95d03ba Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Thu, 16 Dec 2021 08:24:08 +0100 Subject: [PATCH 563/868] xen/netfront: harden netfront against event channel storms The Xen netfront driver is still vulnerable for an attack via excessive number of events sent by the backend. Fix that by using lateeoi event channels. For being able to detect the case of no rx responses being added while the carrier is down a new lock is needed in order to update and test rsp_cons and the number of seen unconsumed responses atomically. This is part of XSA-391 Signed-off-by: Juergen Gross Reviewed-by: Jan Beulich --- V2: - don't eoi irq in case of interface set broken (Jan Beulich) - handle carrier off + no new responses added (Jan Beulich) V3: - add rx_ prefix to rsp_unconsumed (Jan Beulich) - correct xennet_set_rx_rsp_cons() spelling (Jan Beulich) --- drivers/net/xen-netfront.c | 127 +++++++++++++++++++++++++++---------- 1 file changed, 95 insertions(+), 32 deletions(-) diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index 911f43986a8c..d514d96027a6 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -148,6 +148,9 @@ struct netfront_queue { grant_ref_t gref_rx_head; grant_ref_t grant_rx_ref[NET_RX_RING_SIZE]; + unsigned int rx_rsp_unconsumed; + spinlock_t rx_cons_lock; + struct page_pool *page_pool; struct xdp_rxq_info xdp_rxq; }; @@ -376,12 +379,13 @@ static int xennet_open(struct net_device *dev) return 0; } -static void xennet_tx_buf_gc(struct netfront_queue *queue) +static bool xennet_tx_buf_gc(struct netfront_queue *queue) { RING_IDX cons, prod; unsigned short id; struct sk_buff *skb; bool more_to_do; + bool work_done = false; const struct device *dev = &queue->info->netdev->dev; BUG_ON(!netif_carrier_ok(queue->info->netdev)); @@ -398,6 +402,8 @@ static void xennet_tx_buf_gc(struct netfront_queue *queue) for (cons = queue->tx.rsp_cons; cons != prod; cons++) { struct xen_netif_tx_response txrsp; + work_done = true; + RING_COPY_RESPONSE(&queue->tx, cons, &txrsp); if (txrsp.status == XEN_NETIF_RSP_NULL) continue; @@ -441,11 +447,13 @@ static void xennet_tx_buf_gc(struct netfront_queue *queue) xennet_maybe_wake_tx(queue); - return; + return work_done; err: queue->info->broken = true; dev_alert(dev, "Disabled for further use\n"); + + return work_done; } struct xennet_gnttab_make_txreq { @@ -834,6 +842,16 @@ static int xennet_close(struct net_device *dev) return 0; } +static void xennet_set_rx_rsp_cons(struct netfront_queue *queue, RING_IDX val) +{ + unsigned long flags; + + spin_lock_irqsave(&queue->rx_cons_lock, flags); + queue->rx.rsp_cons = val; + queue->rx_rsp_unconsumed = RING_HAS_UNCONSUMED_RESPONSES(&queue->rx); + spin_unlock_irqrestore(&queue->rx_cons_lock, flags); +} + static void xennet_move_rx_slot(struct netfront_queue *queue, struct sk_buff *skb, grant_ref_t ref) { @@ -885,7 +903,7 @@ static int xennet_get_extras(struct netfront_queue *queue, xennet_move_rx_slot(queue, skb, ref); } while (extra.flags & XEN_NETIF_EXTRA_FLAG_MORE); - queue->rx.rsp_cons = cons; + xennet_set_rx_rsp_cons(queue, cons); return err; } @@ -1039,7 +1057,7 @@ next: } if (unlikely(err)) - queue->rx.rsp_cons = cons + slots; + xennet_set_rx_rsp_cons(queue, cons + slots); return err; } @@ -1093,7 +1111,8 @@ static int xennet_fill_frags(struct netfront_queue *queue, __pskb_pull_tail(skb, pull_to - skb_headlen(skb)); } if (unlikely(skb_shinfo(skb)->nr_frags >= MAX_SKB_FRAGS)) { - queue->rx.rsp_cons = ++cons + skb_queue_len(list); + xennet_set_rx_rsp_cons(queue, + ++cons + skb_queue_len(list)); kfree_skb(nskb); return -ENOENT; } @@ -1106,7 +1125,7 @@ static int xennet_fill_frags(struct netfront_queue *queue, kfree_skb(nskb); } - queue->rx.rsp_cons = cons; + xennet_set_rx_rsp_cons(queue, cons); return 0; } @@ -1229,7 +1248,9 @@ err: if (unlikely(xennet_set_skb_gso(skb, gso))) { __skb_queue_head(&tmpq, skb); - queue->rx.rsp_cons += skb_queue_len(&tmpq); + xennet_set_rx_rsp_cons(queue, + queue->rx.rsp_cons + + skb_queue_len(&tmpq)); goto err; } } @@ -1253,7 +1274,8 @@ err: __skb_queue_tail(&rxq, skb); - i = ++queue->rx.rsp_cons; + i = queue->rx.rsp_cons + 1; + xennet_set_rx_rsp_cons(queue, i); work_done++; } if (need_xdp_flush) @@ -1417,40 +1439,79 @@ static int xennet_set_features(struct net_device *dev, return 0; } -static irqreturn_t xennet_tx_interrupt(int irq, void *dev_id) +static bool xennet_handle_tx(struct netfront_queue *queue, unsigned int *eoi) { - struct netfront_queue *queue = dev_id; unsigned long flags; - if (queue->info->broken) - return IRQ_HANDLED; + if (unlikely(queue->info->broken)) + return false; spin_lock_irqsave(&queue->tx_lock, flags); - xennet_tx_buf_gc(queue); + if (xennet_tx_buf_gc(queue)) + *eoi = 0; spin_unlock_irqrestore(&queue->tx_lock, flags); + return true; +} + +static irqreturn_t xennet_tx_interrupt(int irq, void *dev_id) +{ + unsigned int eoiflag = XEN_EOI_FLAG_SPURIOUS; + + if (likely(xennet_handle_tx(dev_id, &eoiflag))) + xen_irq_lateeoi(irq, eoiflag); + return IRQ_HANDLED; } +static bool xennet_handle_rx(struct netfront_queue *queue, unsigned int *eoi) +{ + unsigned int work_queued; + unsigned long flags; + + if (unlikely(queue->info->broken)) + return false; + + spin_lock_irqsave(&queue->rx_cons_lock, flags); + work_queued = RING_HAS_UNCONSUMED_RESPONSES(&queue->rx); + if (work_queued > queue->rx_rsp_unconsumed) { + queue->rx_rsp_unconsumed = work_queued; + *eoi = 0; + } else if (unlikely(work_queued < queue->rx_rsp_unconsumed)) { + const struct device *dev = &queue->info->netdev->dev; + + spin_unlock_irqrestore(&queue->rx_cons_lock, flags); + dev_alert(dev, "RX producer index going backwards\n"); + dev_alert(dev, "Disabled for further use\n"); + queue->info->broken = true; + return false; + } + spin_unlock_irqrestore(&queue->rx_cons_lock, flags); + + if (likely(netif_carrier_ok(queue->info->netdev) && work_queued)) + napi_schedule(&queue->napi); + + return true; +} + static irqreturn_t xennet_rx_interrupt(int irq, void *dev_id) { - struct netfront_queue *queue = dev_id; - struct net_device *dev = queue->info->netdev; + unsigned int eoiflag = XEN_EOI_FLAG_SPURIOUS; - if (queue->info->broken) - return IRQ_HANDLED; - - if (likely(netif_carrier_ok(dev) && - RING_HAS_UNCONSUMED_RESPONSES(&queue->rx))) - napi_schedule(&queue->napi); + if (likely(xennet_handle_rx(dev_id, &eoiflag))) + xen_irq_lateeoi(irq, eoiflag); return IRQ_HANDLED; } static irqreturn_t xennet_interrupt(int irq, void *dev_id) { - xennet_tx_interrupt(irq, dev_id); - xennet_rx_interrupt(irq, dev_id); + unsigned int eoiflag = XEN_EOI_FLAG_SPURIOUS; + + if (xennet_handle_tx(dev_id, &eoiflag) && + xennet_handle_rx(dev_id, &eoiflag)) + xen_irq_lateeoi(irq, eoiflag); + return IRQ_HANDLED; } @@ -1768,9 +1829,10 @@ static int setup_netfront_single(struct netfront_queue *queue) if (err < 0) goto fail; - err = bind_evtchn_to_irqhandler(queue->tx_evtchn, - xennet_interrupt, - 0, queue->info->netdev->name, queue); + err = bind_evtchn_to_irqhandler_lateeoi(queue->tx_evtchn, + xennet_interrupt, 0, + queue->info->netdev->name, + queue); if (err < 0) goto bind_fail; queue->rx_evtchn = queue->tx_evtchn; @@ -1798,18 +1860,18 @@ static int setup_netfront_split(struct netfront_queue *queue) snprintf(queue->tx_irq_name, sizeof(queue->tx_irq_name), "%s-tx", queue->name); - err = bind_evtchn_to_irqhandler(queue->tx_evtchn, - xennet_tx_interrupt, - 0, queue->tx_irq_name, queue); + err = bind_evtchn_to_irqhandler_lateeoi(queue->tx_evtchn, + xennet_tx_interrupt, 0, + queue->tx_irq_name, queue); if (err < 0) goto bind_tx_fail; queue->tx_irq = err; snprintf(queue->rx_irq_name, sizeof(queue->rx_irq_name), "%s-rx", queue->name); - err = bind_evtchn_to_irqhandler(queue->rx_evtchn, - xennet_rx_interrupt, - 0, queue->rx_irq_name, queue); + err = bind_evtchn_to_irqhandler_lateeoi(queue->rx_evtchn, + xennet_rx_interrupt, 0, + queue->rx_irq_name, queue); if (err < 0) goto bind_rx_fail; queue->rx_irq = err; @@ -1911,6 +1973,7 @@ static int xennet_init_queue(struct netfront_queue *queue) spin_lock_init(&queue->tx_lock); spin_lock_init(&queue->rx_lock); + spin_lock_init(&queue->rx_cons_lock); timer_setup(&queue->rx_refill_timer, rx_refill_timeout, 0); From fe415186b43df0db1f17fa3a46275fd92107fe71 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Thu, 16 Dec 2021 08:24:08 +0100 Subject: [PATCH 564/868] xen/console: harden hvc_xen against event channel storms The Xen console driver is still vulnerable for an attack via excessive number of events sent by the backend. Fix that by using a lateeoi event channel. For the normal domU initial console this requires the introduction of bind_evtchn_to_irq_lateeoi() as there is no xenbus device available at the time the event channel is bound to the irq. As the decision whether an interrupt was spurious or not requires to test for bytes having been read from the backend, move sending the event into the if statement, as sending an event without having found any bytes to be read is making no sense at all. This is part of XSA-391 Signed-off-by: Juergen Gross Reviewed-by: Jan Beulich --- V2: - slightly adapt spurious irq detection (Jan Beulich) V3: - fix spurious irq detection (Jan Beulich) --- drivers/tty/hvc/hvc_xen.c | 30 +++++++++++++++++++++++++++--- drivers/xen/events/events_base.c | 6 ++++++ include/xen/events.h | 1 + 3 files changed, 34 insertions(+), 3 deletions(-) diff --git a/drivers/tty/hvc/hvc_xen.c b/drivers/tty/hvc/hvc_xen.c index 71e0dd2c0ce5..ebaf7500f48f 100644 --- a/drivers/tty/hvc/hvc_xen.c +++ b/drivers/tty/hvc/hvc_xen.c @@ -37,6 +37,8 @@ struct xencons_info { struct xenbus_device *xbdev; struct xencons_interface *intf; unsigned int evtchn; + XENCONS_RING_IDX out_cons; + unsigned int out_cons_same; struct hvc_struct *hvc; int irq; int vtermno; @@ -138,6 +140,8 @@ static int domU_read_console(uint32_t vtermno, char *buf, int len) XENCONS_RING_IDX cons, prod; int recv = 0; struct xencons_info *xencons = vtermno_to_xencons(vtermno); + unsigned int eoiflag = 0; + if (xencons == NULL) return -EINVAL; intf = xencons->intf; @@ -157,7 +161,27 @@ static int domU_read_console(uint32_t vtermno, char *buf, int len) mb(); /* read ring before consuming */ intf->in_cons = cons; - notify_daemon(xencons); + /* + * When to mark interrupt having been spurious: + * - there was no new data to be read, and + * - the backend did not consume some output bytes, and + * - the previous round with no read data didn't see consumed bytes + * (we might have a race with an interrupt being in flight while + * updating xencons->out_cons, so account for that by allowing one + * round without any visible reason) + */ + if (intf->out_cons != xencons->out_cons) { + xencons->out_cons = intf->out_cons; + xencons->out_cons_same = 0; + } + if (recv) { + notify_daemon(xencons); + } else if (xencons->out_cons_same++ > 1) { + eoiflag = XEN_EOI_FLAG_SPURIOUS; + } + + xen_irq_lateeoi(xencons->irq, eoiflag); + return recv; } @@ -386,7 +410,7 @@ static int xencons_connect_backend(struct xenbus_device *dev, if (ret) return ret; info->evtchn = evtchn; - irq = bind_evtchn_to_irq(evtchn); + irq = bind_interdomain_evtchn_to_irq_lateeoi(dev, evtchn); if (irq < 0) return irq; info->irq = irq; @@ -551,7 +575,7 @@ static int __init xen_hvc_init(void) return r; info = vtermno_to_xencons(HVC_COOKIE); - info->irq = bind_evtchn_to_irq(info->evtchn); + info->irq = bind_evtchn_to_irq_lateeoi(info->evtchn); } if (info->irq < 0) info->irq = 0; /* NO_IRQ */ diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c index a78704ae3618..46d9295d9a6e 100644 --- a/drivers/xen/events/events_base.c +++ b/drivers/xen/events/events_base.c @@ -1251,6 +1251,12 @@ int bind_evtchn_to_irq(evtchn_port_t evtchn) } EXPORT_SYMBOL_GPL(bind_evtchn_to_irq); +int bind_evtchn_to_irq_lateeoi(evtchn_port_t evtchn) +{ + return bind_evtchn_to_irq_chip(evtchn, &xen_lateeoi_chip, NULL); +} +EXPORT_SYMBOL_GPL(bind_evtchn_to_irq_lateeoi); + static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu) { struct evtchn_bind_ipi bind_ipi; diff --git a/include/xen/events.h b/include/xen/events.h index c204262d9fc2..344081e71584 100644 --- a/include/xen/events.h +++ b/include/xen/events.h @@ -17,6 +17,7 @@ struct xenbus_device; unsigned xen_evtchn_nr_channels(void); int bind_evtchn_to_irq(evtchn_port_t evtchn); +int bind_evtchn_to_irq_lateeoi(evtchn_port_t evtchn); int bind_evtchn_to_irqhandler(evtchn_port_t evtchn, irq_handler_t handler, unsigned long irqflags, const char *devname, From 6032046ec4b70176d247a71836186d47b25d1684 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Thu, 16 Dec 2021 08:25:12 +0100 Subject: [PATCH 565/868] xen/netback: fix rx queue stall detection Commit 1d5d48523900a4b ("xen-netback: require fewer guest Rx slots when not using GSO") introduced a security problem in netback, as an interface would only be regarded to be stalled if no slot is available in the rx queue ring page. In case the SKB at the head of the queued requests will need more than one rx slot and only one slot is free the stall detection logic will never trigger, as the test for that is only looking for at least one slot to be free. Fix that by testing for the needed number of slots instead of only one slot being available. In order to not have to take the rx queue lock that often, store the number of needed slots in the queue data. As all SKB dequeue operations happen in the rx queue kernel thread this is safe, as long as the number of needed slots is accessed via READ/WRITE_ONCE() only and updates are always done with the rx queue lock held. Add a small helper for obtaining the number of free slots. This is part of XSA-392 Fixes: 1d5d48523900a4b ("xen-netback: require fewer guest Rx slots when not using GSO") Signed-off-by: Juergen Gross Reviewed-by: Jan Beulich --- drivers/net/xen-netback/common.h | 1 + drivers/net/xen-netback/rx.c | 65 ++++++++++++++++++++------------ 2 files changed, 42 insertions(+), 24 deletions(-) diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h index 4a16d6e33c09..d9dea4829c86 100644 --- a/drivers/net/xen-netback/common.h +++ b/drivers/net/xen-netback/common.h @@ -203,6 +203,7 @@ struct xenvif_queue { /* Per-queue data for xenvif */ unsigned int rx_queue_max; unsigned int rx_queue_len; unsigned long last_rx_time; + unsigned int rx_slots_needed; bool stalled; struct xenvif_copy_state rx_copy; diff --git a/drivers/net/xen-netback/rx.c b/drivers/net/xen-netback/rx.c index accc991d153f..a8511e27d6c1 100644 --- a/drivers/net/xen-netback/rx.c +++ b/drivers/net/xen-netback/rx.c @@ -33,28 +33,36 @@ #include #include +/* + * Update the needed ring page slots for the first SKB queued. + * Note that any call sequence outside the RX thread calling this function + * needs to wake up the RX thread via a call of xenvif_kick_thread() + * afterwards in order to avoid a race with putting the thread to sleep. + */ +static void xenvif_update_needed_slots(struct xenvif_queue *queue, + const struct sk_buff *skb) +{ + unsigned int needed = 0; + + if (skb) { + needed = DIV_ROUND_UP(skb->len, XEN_PAGE_SIZE); + if (skb_is_gso(skb)) + needed++; + if (skb->sw_hash) + needed++; + } + + WRITE_ONCE(queue->rx_slots_needed, needed); +} + static bool xenvif_rx_ring_slots_available(struct xenvif_queue *queue) { RING_IDX prod, cons; - struct sk_buff *skb; - int needed; - unsigned long flags; + unsigned int needed; - spin_lock_irqsave(&queue->rx_queue.lock, flags); - - skb = skb_peek(&queue->rx_queue); - if (!skb) { - spin_unlock_irqrestore(&queue->rx_queue.lock, flags); + needed = READ_ONCE(queue->rx_slots_needed); + if (!needed) return false; - } - - needed = DIV_ROUND_UP(skb->len, XEN_PAGE_SIZE); - if (skb_is_gso(skb)) - needed++; - if (skb->sw_hash) - needed++; - - spin_unlock_irqrestore(&queue->rx_queue.lock, flags); do { prod = queue->rx.sring->req_prod; @@ -80,6 +88,9 @@ void xenvif_rx_queue_tail(struct xenvif_queue *queue, struct sk_buff *skb) spin_lock_irqsave(&queue->rx_queue.lock, flags); + if (skb_queue_empty(&queue->rx_queue)) + xenvif_update_needed_slots(queue, skb); + __skb_queue_tail(&queue->rx_queue, skb); queue->rx_queue_len += skb->len; @@ -100,6 +111,8 @@ static struct sk_buff *xenvif_rx_dequeue(struct xenvif_queue *queue) skb = __skb_dequeue(&queue->rx_queue); if (skb) { + xenvif_update_needed_slots(queue, skb_peek(&queue->rx_queue)); + queue->rx_queue_len -= skb->len; if (queue->rx_queue_len < queue->rx_queue_max) { struct netdev_queue *txq; @@ -487,27 +500,31 @@ void xenvif_rx_action(struct xenvif_queue *queue) xenvif_rx_copy_flush(queue); } -static bool xenvif_rx_queue_stalled(struct xenvif_queue *queue) +static RING_IDX xenvif_rx_queue_slots(const struct xenvif_queue *queue) { RING_IDX prod, cons; prod = queue->rx.sring->req_prod; cons = queue->rx.req_cons; + return prod - cons; +} + +static bool xenvif_rx_queue_stalled(const struct xenvif_queue *queue) +{ + unsigned int needed = READ_ONCE(queue->rx_slots_needed); + return !queue->stalled && - prod - cons < 1 && + xenvif_rx_queue_slots(queue) < needed && time_after(jiffies, queue->last_rx_time + queue->vif->stall_timeout); } static bool xenvif_rx_queue_ready(struct xenvif_queue *queue) { - RING_IDX prod, cons; + unsigned int needed = READ_ONCE(queue->rx_slots_needed); - prod = queue->rx.sring->req_prod; - cons = queue->rx.req_cons; - - return queue->stalled && prod - cons >= 1; + return queue->stalled && xenvif_rx_queue_slots(queue) >= needed; } bool xenvif_have_rx_work(struct xenvif_queue *queue, bool test_kthread) From be81992f9086b230623ae3ebbc85ecee4d00a3d3 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Tue, 30 Nov 2021 08:36:12 +0100 Subject: [PATCH 566/868] xen/netback: don't queue unlimited number of packages In case a guest isn't consuming incoming network traffic as fast as it is coming in, xen-netback is buffering network packages in unlimited numbers today. This can result in host OOM situations. Commit f48da8b14d04ca8 ("xen-netback: fix unlimited guest Rx internal queue and carrier flapping") meant to introduce a mechanism to limit the amount of buffered data by stopping the Tx queue when reaching the data limit, but this doesn't work for cases like UDP. When hitting the limit don't queue further SKBs, but drop them instead. In order to be able to tell Rx packages have been dropped increment the rx_dropped statistics counter in this case. It should be noted that the old solution to continue queueing SKBs had the additional problem of an overflow of the 32-bit rx_queue_len value would result in intermittent Tx queue enabling. This is part of XSA-392 Fixes: f48da8b14d04ca8 ("xen-netback: fix unlimited guest Rx internal queue and carrier flapping") Signed-off-by: Juergen Gross Reviewed-by: Jan Beulich --- drivers/net/xen-netback/rx.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/drivers/net/xen-netback/rx.c b/drivers/net/xen-netback/rx.c index a8511e27d6c1..dbac4c03d21a 100644 --- a/drivers/net/xen-netback/rx.c +++ b/drivers/net/xen-netback/rx.c @@ -88,16 +88,19 @@ void xenvif_rx_queue_tail(struct xenvif_queue *queue, struct sk_buff *skb) spin_lock_irqsave(&queue->rx_queue.lock, flags); - if (skb_queue_empty(&queue->rx_queue)) - xenvif_update_needed_slots(queue, skb); - - __skb_queue_tail(&queue->rx_queue, skb); - - queue->rx_queue_len += skb->len; - if (queue->rx_queue_len > queue->rx_queue_max) { + if (queue->rx_queue_len >= queue->rx_queue_max) { struct net_device *dev = queue->vif->dev; netif_tx_stop_queue(netdev_get_tx_queue(dev, queue->id)); + kfree_skb(skb); + queue->vif->dev->stats.rx_dropped++; + } else { + if (skb_queue_empty(&queue->rx_queue)) + xenvif_update_needed_slots(queue, skb); + + __skb_queue_tail(&queue->rx_queue, skb); + + queue->rx_queue_len += skb->len; } spin_unlock_irqrestore(&queue->rx_queue.lock, flags); @@ -147,6 +150,7 @@ static void xenvif_rx_queue_drop_expired(struct xenvif_queue *queue) break; xenvif_rx_dequeue(queue); kfree_skb(skb); + queue->vif->dev->stats.rx_dropped++; } } From dfd0743f1d9ea76931510ed150334d571fbab49d Mon Sep 17 00:00:00 2001 From: Jens Wiklander Date: Thu, 9 Dec 2021 15:59:37 +0100 Subject: [PATCH 567/868] tee: handle lookup of shm with reference count 0 Since the tee subsystem does not keep a strong reference to its idle shared memory buffers, it races with other threads that try to destroy a shared memory through a close of its dma-buf fd or by unmapping the memory. In tee_shm_get_from_id() when a lookup in teedev->idr has been successful, it is possible that the tee_shm is in the dma-buf teardown path, but that path is blocked by the teedev mutex. Since we don't have an API to tell if the tee_shm is in the dma-buf teardown path or not we must find another way of detecting this condition. Fix this by doing the reference counting directly on the tee_shm using a new refcount_t refcount field. dma-buf is replaced by using anon_inode_getfd() instead, this separates the life-cycle of the underlying file from the tee_shm. tee_shm_put() is updated to hold the mutex when decreasing the refcount to 0 and then remove the tee_shm from teedev->idr before releasing the mutex. This means that the tee_shm can never be found unless it has a refcount larger than 0. Fixes: 967c9cca2cc5 ("tee: generic TEE subsystem") Cc: stable@vger.kernel.org Reviewed-by: Greg Kroah-Hartman Reviewed-by: Lars Persson Reviewed-by: Sumit Garg Reported-by: Patrik Lantz Signed-off-by: Jens Wiklander --- drivers/tee/tee_shm.c | 174 +++++++++++++++------------------------- include/linux/tee_drv.h | 4 +- 2 files changed, 68 insertions(+), 110 deletions(-) diff --git a/drivers/tee/tee_shm.c b/drivers/tee/tee_shm.c index 8a8deb95e918..499fccba3d74 100644 --- a/drivers/tee/tee_shm.c +++ b/drivers/tee/tee_shm.c @@ -1,20 +1,17 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * Copyright (c) 2015-2016, Linaro Limited + * Copyright (c) 2015-2017, 2019-2021 Linaro Limited */ +#include #include -#include -#include #include +#include #include #include #include #include -#include #include "tee_private.h" -MODULE_IMPORT_NS(DMA_BUF); - static void release_registered_pages(struct tee_shm *shm) { if (shm->pages) { @@ -31,16 +28,8 @@ static void release_registered_pages(struct tee_shm *shm) } } -static void tee_shm_release(struct tee_shm *shm) +static void tee_shm_release(struct tee_device *teedev, struct tee_shm *shm) { - struct tee_device *teedev = shm->ctx->teedev; - - if (shm->flags & TEE_SHM_DMA_BUF) { - mutex_lock(&teedev->mutex); - idr_remove(&teedev->idr, shm->id); - mutex_unlock(&teedev->mutex); - } - if (shm->flags & TEE_SHM_POOL) { struct tee_shm_pool_mgr *poolm; @@ -67,45 +56,6 @@ static void tee_shm_release(struct tee_shm *shm) tee_device_put(teedev); } -static struct sg_table *tee_shm_op_map_dma_buf(struct dma_buf_attachment - *attach, enum dma_data_direction dir) -{ - return NULL; -} - -static void tee_shm_op_unmap_dma_buf(struct dma_buf_attachment *attach, - struct sg_table *table, - enum dma_data_direction dir) -{ -} - -static void tee_shm_op_release(struct dma_buf *dmabuf) -{ - struct tee_shm *shm = dmabuf->priv; - - tee_shm_release(shm); -} - -static int tee_shm_op_mmap(struct dma_buf *dmabuf, struct vm_area_struct *vma) -{ - struct tee_shm *shm = dmabuf->priv; - size_t size = vma->vm_end - vma->vm_start; - - /* Refuse sharing shared memory provided by application */ - if (shm->flags & TEE_SHM_USER_MAPPED) - return -EINVAL; - - return remap_pfn_range(vma, vma->vm_start, shm->paddr >> PAGE_SHIFT, - size, vma->vm_page_prot); -} - -static const struct dma_buf_ops tee_shm_dma_buf_ops = { - .map_dma_buf = tee_shm_op_map_dma_buf, - .unmap_dma_buf = tee_shm_op_unmap_dma_buf, - .release = tee_shm_op_release, - .mmap = tee_shm_op_mmap, -}; - struct tee_shm *tee_shm_alloc(struct tee_context *ctx, size_t size, u32 flags) { struct tee_device *teedev = ctx->teedev; @@ -140,6 +90,7 @@ struct tee_shm *tee_shm_alloc(struct tee_context *ctx, size_t size, u32 flags) goto err_dev_put; } + refcount_set(&shm->refcount, 1); shm->flags = flags | TEE_SHM_POOL; shm->ctx = ctx; if (flags & TEE_SHM_DMA_BUF) @@ -153,10 +104,7 @@ struct tee_shm *tee_shm_alloc(struct tee_context *ctx, size_t size, u32 flags) goto err_kfree; } - if (flags & TEE_SHM_DMA_BUF) { - DEFINE_DMA_BUF_EXPORT_INFO(exp_info); - mutex_lock(&teedev->mutex); shm->id = idr_alloc(&teedev->idr, shm, 1, 0, GFP_KERNEL); mutex_unlock(&teedev->mutex); @@ -164,28 +112,11 @@ struct tee_shm *tee_shm_alloc(struct tee_context *ctx, size_t size, u32 flags) ret = ERR_PTR(shm->id); goto err_pool_free; } - - exp_info.ops = &tee_shm_dma_buf_ops; - exp_info.size = shm->size; - exp_info.flags = O_RDWR; - exp_info.priv = shm; - - shm->dmabuf = dma_buf_export(&exp_info); - if (IS_ERR(shm->dmabuf)) { - ret = ERR_CAST(shm->dmabuf); - goto err_rem; - } } teedev_ctx_get(ctx); return shm; -err_rem: - if (flags & TEE_SHM_DMA_BUF) { - mutex_lock(&teedev->mutex); - idr_remove(&teedev->idr, shm->id); - mutex_unlock(&teedev->mutex); - } err_pool_free: poolm->ops->free(poolm, shm); err_kfree: @@ -246,6 +177,7 @@ struct tee_shm *tee_shm_register(struct tee_context *ctx, unsigned long addr, goto err; } + refcount_set(&shm->refcount, 1); shm->flags = flags | TEE_SHM_REGISTER; shm->ctx = ctx; shm->id = -1; @@ -306,22 +238,6 @@ struct tee_shm *tee_shm_register(struct tee_context *ctx, unsigned long addr, goto err; } - if (flags & TEE_SHM_DMA_BUF) { - DEFINE_DMA_BUF_EXPORT_INFO(exp_info); - - exp_info.ops = &tee_shm_dma_buf_ops; - exp_info.size = shm->size; - exp_info.flags = O_RDWR; - exp_info.priv = shm; - - shm->dmabuf = dma_buf_export(&exp_info); - if (IS_ERR(shm->dmabuf)) { - ret = ERR_CAST(shm->dmabuf); - teedev->desc->ops->shm_unregister(ctx, shm); - goto err; - } - } - return shm; err: if (shm) { @@ -339,6 +255,35 @@ err: } EXPORT_SYMBOL_GPL(tee_shm_register); +static int tee_shm_fop_release(struct inode *inode, struct file *filp) +{ + tee_shm_put(filp->private_data); + return 0; +} + +static int tee_shm_fop_mmap(struct file *filp, struct vm_area_struct *vma) +{ + struct tee_shm *shm = filp->private_data; + size_t size = vma->vm_end - vma->vm_start; + + /* Refuse sharing shared memory provided by application */ + if (shm->flags & TEE_SHM_USER_MAPPED) + return -EINVAL; + + /* check for overflowing the buffer's size */ + if (vma->vm_pgoff + vma_pages(vma) > shm->size >> PAGE_SHIFT) + return -EINVAL; + + return remap_pfn_range(vma, vma->vm_start, shm->paddr >> PAGE_SHIFT, + size, vma->vm_page_prot); +} + +static const struct file_operations tee_shm_fops = { + .owner = THIS_MODULE, + .release = tee_shm_fop_release, + .mmap = tee_shm_fop_mmap, +}; + /** * tee_shm_get_fd() - Increase reference count and return file descriptor * @shm: Shared memory handle @@ -351,10 +296,11 @@ int tee_shm_get_fd(struct tee_shm *shm) if (!(shm->flags & TEE_SHM_DMA_BUF)) return -EINVAL; - get_dma_buf(shm->dmabuf); - fd = dma_buf_fd(shm->dmabuf, O_CLOEXEC); + /* matched by tee_shm_put() in tee_shm_op_release() */ + refcount_inc(&shm->refcount); + fd = anon_inode_getfd("tee_shm", &tee_shm_fops, shm, O_RDWR); if (fd < 0) - dma_buf_put(shm->dmabuf); + tee_shm_put(shm); return fd; } @@ -364,17 +310,7 @@ int tee_shm_get_fd(struct tee_shm *shm) */ void tee_shm_free(struct tee_shm *shm) { - /* - * dma_buf_put() decreases the dmabuf reference counter and will - * call tee_shm_release() when the last reference is gone. - * - * In the case of driver private memory we call tee_shm_release - * directly instead as it doesn't have a reference counter. - */ - if (shm->flags & TEE_SHM_DMA_BUF) - dma_buf_put(shm->dmabuf); - else - tee_shm_release(shm); + tee_shm_put(shm); } EXPORT_SYMBOL_GPL(tee_shm_free); @@ -481,10 +417,15 @@ struct tee_shm *tee_shm_get_from_id(struct tee_context *ctx, int id) teedev = ctx->teedev; mutex_lock(&teedev->mutex); shm = idr_find(&teedev->idr, id); + /* + * If the tee_shm was found in the IDR it must have a refcount + * larger than 0 due to the guarantee in tee_shm_put() below. So + * it's safe to use refcount_inc(). + */ if (!shm || shm->ctx != ctx) shm = ERR_PTR(-EINVAL); - else if (shm->flags & TEE_SHM_DMA_BUF) - get_dma_buf(shm->dmabuf); + else + refcount_inc(&shm->refcount); mutex_unlock(&teedev->mutex); return shm; } @@ -496,7 +437,24 @@ EXPORT_SYMBOL_GPL(tee_shm_get_from_id); */ void tee_shm_put(struct tee_shm *shm) { - if (shm->flags & TEE_SHM_DMA_BUF) - dma_buf_put(shm->dmabuf); + struct tee_device *teedev = shm->ctx->teedev; + bool do_release = false; + + mutex_lock(&teedev->mutex); + if (refcount_dec_and_test(&shm->refcount)) { + /* + * refcount has reached 0, we must now remove it from the + * IDR before releasing the mutex. This will guarantee that + * the refcount_inc() in tee_shm_get_from_id() never starts + * from 0. + */ + if (shm->flags & TEE_SHM_DMA_BUF) + idr_remove(&teedev->idr, shm->id); + do_release = true; + } + mutex_unlock(&teedev->mutex); + + if (do_release) + tee_shm_release(teedev, shm); } EXPORT_SYMBOL_GPL(tee_shm_put); diff --git a/include/linux/tee_drv.h b/include/linux/tee_drv.h index a1f03461369b..cf5999626e28 100644 --- a/include/linux/tee_drv.h +++ b/include/linux/tee_drv.h @@ -195,7 +195,7 @@ int tee_session_calc_client_uuid(uuid_t *uuid, u32 connection_method, * @offset: offset of buffer in user space * @pages: locked pages from userspace * @num_pages: number of locked pages - * @dmabuf: dmabuf used to for exporting to user space + * @refcount: reference counter * @flags: defined by TEE_SHM_* in tee_drv.h * @id: unique id of a shared memory object on this device, shared * with user space @@ -214,7 +214,7 @@ struct tee_shm { unsigned int offset; struct page **pages; size_t num_pages; - struct dma_buf *dmabuf; + refcount_t refcount; u32 flags; int id; u64 sec_world_id; From 849e087ba68ac6956c11016ce34f9f10a09a4186 Mon Sep 17 00:00:00 2001 From: Zhang Ying-22455 Date: Tue, 14 Dec 2021 01:23:33 -0600 Subject: [PATCH 568/868] arm64: dts: lx2160a: fix scl-gpios property name Fix the typo in the property name. Fixes: d548c217c6a3c ("arm64: dts: add QorIQ LX2160A SoC support") Signed-off-by: Zhang Ying Signed-off-by: Li Yang Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/fsl-lx2160a.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/freescale/fsl-lx2160a.dtsi b/arch/arm64/boot/dts/freescale/fsl-lx2160a.dtsi index dc8661ebd1f6..2433e6f2eda8 100644 --- a/arch/arm64/boot/dts/freescale/fsl-lx2160a.dtsi +++ b/arch/arm64/boot/dts/freescale/fsl-lx2160a.dtsi @@ -719,7 +719,7 @@ clock-names = "i2c"; clocks = <&clockgen QORIQ_CLK_PLATFORM_PLL QORIQ_CLK_PLL_DIV(16)>; - scl-gpio = <&gpio2 15 GPIO_ACTIVE_HIGH>; + scl-gpios = <&gpio2 15 GPIO_ACTIVE_HIGH>; status = "disabled"; }; @@ -768,7 +768,7 @@ clock-names = "i2c"; clocks = <&clockgen QORIQ_CLK_PLATFORM_PLL QORIQ_CLK_PLL_DIV(16)>; - scl-gpio = <&gpio2 16 GPIO_ACTIVE_HIGH>; + scl-gpios = <&gpio2 16 GPIO_ACTIVE_HIGH>; status = "disabled"; }; From 0546b224cc7717cc8a2db076b0bb069a9c430794 Mon Sep 17 00:00:00 2001 From: John Keeping Date: Tue, 14 Dec 2021 19:10:09 +0000 Subject: [PATCH 569/868] net: stmmac: dwmac-rk: fix oob read in rk_gmac_setup KASAN reports an out-of-bounds read in rk_gmac_setup on the line: while (ops->regs[i]) { This happens for most platforms since the regs flexible array member is empty, so the memory after the ops structure is being read here. It seems that mostly this happens to contain zero anyway, so we get lucky and everything still works. To avoid adding redundant data to nearly all the ops structures, add a new flag to indicate whether the regs field is valid and avoid this loop when it is not. Fixes: 3bb3d6b1c195 ("net: stmmac: Add RK3566/RK3568 SoC support") Signed-off-by: John Keeping Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c index 6924a6aacbd5..c469abc91fa1 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c @@ -33,6 +33,7 @@ struct rk_gmac_ops { void (*set_rgmii_speed)(struct rk_priv_data *bsp_priv, int speed); void (*set_rmii_speed)(struct rk_priv_data *bsp_priv, int speed); void (*integrated_phy_powerup)(struct rk_priv_data *bsp_priv); + bool regs_valid; u32 regs[]; }; @@ -1092,6 +1093,7 @@ static const struct rk_gmac_ops rk3568_ops = { .set_to_rmii = rk3568_set_to_rmii, .set_rgmii_speed = rk3568_set_gmac_speed, .set_rmii_speed = rk3568_set_gmac_speed, + .regs_valid = true, .regs = { 0xfe2a0000, /* gmac0 */ 0xfe010000, /* gmac1 */ @@ -1383,7 +1385,7 @@ static struct rk_priv_data *rk_gmac_setup(struct platform_device *pdev, * to be distinguished. */ res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - if (res) { + if (res && ops->regs_valid) { int i = 0; while (ops->regs[i]) { From 407ecd1bd726f240123f704620d46e285ff30dd9 Mon Sep 17 00:00:00 2001 From: Jiasheng Jiang Date: Wed, 15 Dec 2021 22:37:31 +0800 Subject: [PATCH 570/868] sfc_ef100: potential dereference of null pointer The return value of kmalloc() needs to be checked. To avoid use in efx_nic_update_stats() in case of the failure of alloc. Fixes: b593b6f1b492 ("sfc_ef100: statistics gathering") Signed-off-by: Jiasheng Jiang Reported-by: kernel test robot Signed-off-by: David S. Miller --- drivers/net/ethernet/sfc/ef100_nic.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/sfc/ef100_nic.c b/drivers/net/ethernet/sfc/ef100_nic.c index 6aa81229b68a..e77a5cb4e40d 100644 --- a/drivers/net/ethernet/sfc/ef100_nic.c +++ b/drivers/net/ethernet/sfc/ef100_nic.c @@ -609,6 +609,9 @@ static size_t ef100_update_stats(struct efx_nic *efx, ef100_common_stat_mask(mask); ef100_ethtool_stat_mask(mask); + if (!mc_stats) + return 0; + efx_nic_copy_stats(efx, mc_stats); efx_nic_update_stats(ef100_stat_desc, EF100_STAT_COUNT, mask, stats, mc_stats, false); From e08cdf63049b711099efff0811273449083bb958 Mon Sep 17 00:00:00 2001 From: Andrey Eremeev Date: Wed, 15 Dec 2021 20:30:32 +0300 Subject: [PATCH 571/868] dsa: mv88e6xxx: fix debug print for SPEED_UNFORCED Debug print uses invalid check to detect if speed is unforced: (speed != SPEED_UNFORCED) should be used instead of (!speed). Found by Linux Verification Center (linuxtesting.org) with SVACE. Signed-off-by: Andrey Eremeev Fixes: 96a2b40c7bd3 ("net: dsa: mv88e6xxx: add port's MAC speed setter") Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/dsa/mv88e6xxx/port.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/dsa/mv88e6xxx/port.c b/drivers/net/dsa/mv88e6xxx/port.c index d9817b20ea64..ab41619a809b 100644 --- a/drivers/net/dsa/mv88e6xxx/port.c +++ b/drivers/net/dsa/mv88e6xxx/port.c @@ -283,7 +283,7 @@ static int mv88e6xxx_port_set_speed_duplex(struct mv88e6xxx_chip *chip, if (err) return err; - if (speed) + if (speed != SPEED_UNFORCED) dev_dbg(chip->dev, "p%d: Speed set to %d Mbps\n", port, speed); else dev_dbg(chip->dev, "p%d: Speed unforced\n", port); @@ -516,7 +516,7 @@ int mv88e6393x_port_set_speed_duplex(struct mv88e6xxx_chip *chip, int port, if (err) return err; - if (speed) + if (speed != SPEED_UNFORCED) dev_dbg(chip->dev, "p%d: Speed set to %d Mbps\n", port, speed); else dev_dbg(chip->dev, "p%d: Speed unforced\n", port); From 053c9e18c6f9cf82242ef35ac21cae1842725714 Mon Sep 17 00:00:00 2001 From: Wenliang Wang Date: Thu, 16 Dec 2021 11:11:35 +0800 Subject: [PATCH 572/868] virtio_net: fix rx_drops stat for small pkts We found the stat of rx drops for small pkts does not increment when build_skb fail, it's not coherent with other mode's rx drops stat. Signed-off-by: Wenliang Wang Acked-by: Jason Wang Acked-by: Michael S. Tsirkin Signed-off-by: David S. Miller --- drivers/net/virtio_net.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 55db6a336f7e..b107835242ad 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -733,7 +733,7 @@ static struct sk_buff *receive_small(struct net_device *dev, pr_debug("%s: rx error: len %u exceeds max size %d\n", dev->name, len, GOOD_PACKET_LEN); dev->stats.rx_length_errors++; - goto err_len; + goto err; } if (likely(!vi->xdp_enabled)) { @@ -825,10 +825,8 @@ static struct sk_buff *receive_small(struct net_device *dev, skip_xdp: skb = build_skb(buf, buflen); - if (!skb) { - put_page(page); + if (!skb) goto err; - } skb_reserve(skb, headroom - delta); skb_put(skb, len); if (!xdp_prog) { @@ -839,13 +837,12 @@ skip_xdp: if (metasize) skb_metadata_set(skb, metasize); -err: return skb; err_xdp: rcu_read_unlock(); stats->xdp_drops++; -err_len: +err: stats->drops++; put_page(page); xdp_xmit: From 8a03ef676ade55182f9b05115763aeda6dc08159 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Thu, 16 Dec 2021 11:28:25 +0200 Subject: [PATCH 573/868] net: Fix double 0x prefix print in SKB dump When printing netdev features %pNF already takes care of the 0x prefix, remove the explicit one. Fixes: 6413139dfc64 ("skbuff: increase verbosity when dumping skb data") Signed-off-by: Gal Pressman Signed-off-by: David S. Miller --- net/core/skbuff.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index ba2f38246f07..909db87d7383 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -832,7 +832,7 @@ void skb_dump(const char *level, const struct sk_buff *skb, bool full_pkt) ntohs(skb->protocol), skb->pkt_type, skb->skb_iif); if (dev) - printk("%sdev name=%s feat=0x%pNF\n", + printk("%sdev name=%s feat=%pNF\n", level, dev->name, &dev->features); if (sk) printk("%ssk family=%hu type=%u proto=%u\n", From 76f12e632a15a20c8de3532d64a0708cf0e32f11 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 9 Dec 2021 17:39:26 +0100 Subject: [PATCH 574/868] netfilter: ctnetlink: remove expired entries first When dumping conntrack table to userspace via ctnetlink, check if the ct has already expired before doing any of the 'skip' checks. This expires dead entries faster. /proc handler also removes outdated entries first. Reported-by: Vitaly Zuevsky Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_netlink.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 81d03acf68d4..ec4164c32d27 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -1195,8 +1195,6 @@ restart: } hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[cb->args[0]], hnnode) { - if (NF_CT_DIRECTION(h) != IP_CT_DIR_ORIGINAL) - continue; ct = nf_ct_tuplehash_to_ctrack(h); if (nf_ct_is_expired(ct)) { if (i < ARRAY_SIZE(nf_ct_evict) && @@ -1208,6 +1206,9 @@ restart: if (!net_eq(net, nf_ct_net(ct))) continue; + if (NF_CT_DIRECTION(h) != IP_CT_DIR_ORIGINAL) + continue; + if (cb->args[1]) { if (ct != last) continue; From 18549bf4b21c739a9def39f27dcac53e27286ab5 Mon Sep 17 00:00:00 2001 From: Sumit Garg Date: Thu, 16 Dec 2021 11:17:25 +0530 Subject: [PATCH 575/868] tee: optee: Fix incorrect page free bug Pointer to the allocated pages (struct page *page) has already progressed towards the end of allocation. It is incorrect to perform __free_pages(page, order) using this pointer as we would free any arbitrary pages. Fix this by stop modifying the page pointer. Fixes: ec185dd3ab25 ("optee: Fix memory leak when failing to register shm pages") Cc: stable@vger.kernel.org Reported-by: Patrik Lantz Signed-off-by: Sumit Garg Reviewed-by: Tyler Hicks Signed-off-by: Jens Wiklander --- drivers/tee/optee/core.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/tee/optee/core.c b/drivers/tee/optee/core.c index ab2edfcc6c70..2a66a5203d2f 100644 --- a/drivers/tee/optee/core.c +++ b/drivers/tee/optee/core.c @@ -48,10 +48,8 @@ int optee_pool_op_alloc_helper(struct tee_shm_pool_mgr *poolm, goto err; } - for (i = 0; i < nr_pages; i++) { - pages[i] = page; - page++; - } + for (i = 0; i < nr_pages; i++) + pages[i] = page + i; shm->flags |= TEE_SHM_REGISTER; rc = shm_register(shm->ctx, shm, pages, nr_pages, From 6add87fdae9bcb1d20b4503df5bd02ce5246cc8b Mon Sep 17 00:00:00 2001 From: Xiaolei Wang Date: Mon, 6 Dec 2021 20:05:33 +0800 Subject: [PATCH 576/868] optee: Suppress false positive kmemleak report in optee_handle_rpc() We observed the following kmemleak report: unreferenced object 0xffff000007904500 (size 128): comm "swapper/0", pid 1, jiffies 4294892671 (age 44.036s) hex dump (first 32 bytes): 00 47 90 07 00 00 ff ff 60 00 c0 ff 00 00 00 00 .G......`....... 60 00 80 13 00 80 ff ff a0 00 00 00 00 00 00 00 `............... backtrace: [<000000004c12b1c7>] kmem_cache_alloc+0x1ac/0x2f4 [<000000005d23eb4f>] tee_shm_alloc+0x78/0x230 [<00000000794dd22c>] optee_handle_rpc+0x60/0x6f0 [<00000000d9f7c52d>] optee_do_call_with_arg+0x17c/0x1dc [<00000000c35884da>] optee_open_session+0x128/0x1ec [<000000001748f2ff>] tee_client_open_session+0x28/0x40 [<00000000aecb5389>] optee_enumerate_devices+0x84/0x2a0 [<000000003df18bf1>] optee_probe+0x674/0x6cc [<000000003a4a534a>] platform_drv_probe+0x54/0xb0 [<000000000c51ce7d>] really_probe+0xe4/0x4d0 [<000000002f04c865>] driver_probe_device+0x58/0xc0 [<00000000b485397d>] device_driver_attach+0xc0/0xd0 [<00000000c835f0df>] __driver_attach+0x84/0x124 [<000000008e5a429c>] bus_for_each_dev+0x70/0xc0 [<000000001735e8a8>] driver_attach+0x24/0x30 [<000000006d94b04f>] bus_add_driver+0x104/0x1ec This is not a memory leak because we pass the share memory pointer to secure world and would get it from secure world before releasing it. Signed-off-by: Xiaolei Wang Signed-off-by: Jens Wiklander --- drivers/tee/optee/smc_abi.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/tee/optee/smc_abi.c b/drivers/tee/optee/smc_abi.c index 6196d7c3888f..cf2e3293567d 100644 --- a/drivers/tee/optee/smc_abi.c +++ b/drivers/tee/optee/smc_abi.c @@ -23,6 +23,7 @@ #include "optee_private.h" #include "optee_smc.h" #include "optee_rpc_cmd.h" +#include #define CREATE_TRACE_POINTS #include "optee_trace.h" @@ -783,6 +784,7 @@ static void optee_handle_rpc(struct tee_context *ctx, param->a4 = 0; param->a5 = 0; } + kmemleak_not_leak(shm); break; case OPTEE_SMC_RPC_FUNC_FREE: shm = reg_pair_to_ptr(param->a1, param->a2); From 5c15b3123f65f8fbb1b445d9a7e8812e0e435df2 Mon Sep 17 00:00:00 2001 From: "D. Wythe" Date: Wed, 15 Dec 2021 20:29:21 +0800 Subject: [PATCH 577/868] net/smc: Prevent smc_release() from long blocking MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In nginx/wrk benchmark, there's a hung problem with high probability on case likes that: (client will last several minutes to exit) server: smc_run nginx client: smc_run wrk -c 10000 -t 1 http://server Client hangs with the following backtrace: 0 [ffffa7ce8Of3bbf8] __schedule at ffffffff9f9eOd5f 1 [ffffa7ce8Of3bc88] schedule at ffffffff9f9eløe6 2 [ffffa7ce8Of3bcaO] schedule_timeout at ffffffff9f9e3f3c 3 [ffffa7ce8Of3bd2O] wait_for_common at ffffffff9f9el9de 4 [ffffa7ce8Of3bd8O] __flush_work at ffffffff9fOfeOl3 5 [ffffa7ce8øf3bdfO] smc_release at ffffffffcO697d24 [smc] 6 [ffffa7ce8Of3be2O] __sock_release at ffffffff9f8O2e2d 7 [ffffa7ce8Of3be4ø] sock_close at ffffffff9f8ø2ebl 8 [ffffa7ce8øf3be48] __fput at ffffffff9f334f93 9 [ffffa7ce8Of3be78] task_work_run at ffffffff9flOlff5 10 [ffffa7ce8Of3beaO] do_exit at ffffffff9fOe5Ol2 11 [ffffa7ce8Of3bflO] do_group_exit at ffffffff9fOe592a 12 [ffffa7ce8Of3bf38] __x64_sys_exit_group at ffffffff9fOe5994 13 [ffffa7ce8Of3bf4O] do_syscall_64 at ffffffff9f9d4373 14 [ffffa7ce8Of3bfsO] entry_SYSCALL_64_after_hwframe at ffffffff9fa0007c This issue dues to flush_work(), which is used to wait for smc_connect_work() to finish in smc_release(). Once lots of smc_connect_work() was pending or all executing work dangling, smc_release() has to block until one worker comes to free, which is equivalent to wait another smc_connnect_work() to finish. In order to fix this, There are two changes: 1. For those idle smc_connect_work(), cancel it from the workqueue; for executing smc_connect_work(), waiting for it to finish. For that purpose, replace flush_work() with cancel_work_sync(). 2. Since smc_connect() hold a reference for passive closing, if smc_connect_work() has been cancelled, release the reference. Fixes: 24ac3a08e658 ("net/smc: rebuild nonblocking connect") Reported-by: Tony Lu Tested-by: Dust Li Reviewed-by: Dust Li Reviewed-by: Tony Lu Signed-off-by: D. Wythe Acked-by: Karsten Graul Link: https://lore.kernel.org/r/1639571361-101128-1-git-send-email-alibuda@linux.alibaba.com Signed-off-by: Jakub Kicinski --- net/smc/af_smc.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 230072f9ec48..1c9289f56dc4 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -194,7 +194,9 @@ static int smc_release(struct socket *sock) /* cleanup for a dangling non-blocking connect */ if (smc->connect_nonblock && sk->sk_state == SMC_INIT) tcp_abort(smc->clcsock->sk, ECONNABORTED); - flush_work(&smc->connect_work); + + if (cancel_work_sync(&smc->connect_work)) + sock_put(&smc->sk); /* sock_hold in smc_connect for passive closing */ if (sk->sk_state == SMC_LISTEN) /* smc_close_non_accepted() is called and acquires From 8b8e6e782456f1ce02a7ae914bbd5b1053f0b034 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Wed, 15 Dec 2021 12:24:49 -0800 Subject: [PATCH 578/868] net: systemport: Add global locking for descriptor lifecycle The descriptor list is a shared resource across all of the transmit queues, and the locking mechanism used today only protects concurrency across a given transmit queue between the transmit and reclaiming. This creates an opportunity for the SYSTEMPORT hardware to work on corrupted descriptors if we have multiple producers at once which is the case when using multiple transmit queues. This was particularly noticeable when using multiple flows/transmit queues and it showed up in interesting ways in that UDP packets would get a correct UDP header checksum being calculated over an incorrect packet length. Similarly TCP packets would get an equally correct checksum computed by the hardware over an incorrect packet length. The SYSTEMPORT hardware maintains an internal descriptor list that it re-arranges when the driver produces a new descriptor anytime it writes to the WRITE_PORT_{HI,LO} registers, there is however some delay in the hardware to re-organize its descriptors and it is possible that concurrent TX queues eventually break this internal allocation scheme to the point where the length/status part of the descriptor gets used for an incorrect data buffer. The fix is to impose a global serialization for all TX queues in the short section where we are writing to the WRITE_PORT_{HI,LO} registers which solves the corruption even with multiple concurrent TX queues being used. Fixes: 80105befdb4b ("net: systemport: add Broadcom SYSTEMPORT Ethernet MAC driver") Signed-off-by: Florian Fainelli Link: https://lore.kernel.org/r/20211215202450.4086240-1-f.fainelli@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bcmsysport.c | 5 ++++- drivers/net/ethernet/broadcom/bcmsysport.h | 1 + 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c index 40933bf5a710..60dde29974bf 100644 --- a/drivers/net/ethernet/broadcom/bcmsysport.c +++ b/drivers/net/ethernet/broadcom/bcmsysport.c @@ -1309,11 +1309,11 @@ static netdev_tx_t bcm_sysport_xmit(struct sk_buff *skb, struct bcm_sysport_priv *priv = netdev_priv(dev); struct device *kdev = &priv->pdev->dev; struct bcm_sysport_tx_ring *ring; + unsigned long flags, desc_flags; struct bcm_sysport_cb *cb; struct netdev_queue *txq; u32 len_status, addr_lo; unsigned int skb_len; - unsigned long flags; dma_addr_t mapping; u16 queue; int ret; @@ -1373,8 +1373,10 @@ static netdev_tx_t bcm_sysport_xmit(struct sk_buff *skb, ring->desc_count--; /* Ports are latched, so write upper address first */ + spin_lock_irqsave(&priv->desc_lock, desc_flags); tdma_writel(priv, len_status, TDMA_WRITE_PORT_HI(ring->index)); tdma_writel(priv, addr_lo, TDMA_WRITE_PORT_LO(ring->index)); + spin_unlock_irqrestore(&priv->desc_lock, desc_flags); /* Check ring space and update SW control flow */ if (ring->desc_count == 0) @@ -2013,6 +2015,7 @@ static int bcm_sysport_open(struct net_device *dev) } /* Initialize both hardware and software ring */ + spin_lock_init(&priv->desc_lock); for (i = 0; i < dev->num_tx_queues; i++) { ret = bcm_sysport_init_tx_ring(priv, i); if (ret) { diff --git a/drivers/net/ethernet/broadcom/bcmsysport.h b/drivers/net/ethernet/broadcom/bcmsysport.h index 984f76e74b43..16b73bb9acc7 100644 --- a/drivers/net/ethernet/broadcom/bcmsysport.h +++ b/drivers/net/ethernet/broadcom/bcmsysport.h @@ -711,6 +711,7 @@ struct bcm_sysport_priv { int wol_irq; /* Transmit rings */ + spinlock_t desc_lock; struct bcm_sysport_tx_ring *tx_rings; /* Receive queue */ From e28587cc491ef0f3c51258fdc87fbc386b1d4c59 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 16 Dec 2021 03:17:41 -0800 Subject: [PATCH 579/868] sit: do not call ipip6_dev_free() from sit_init_net() ipip6_dev_free is sit dev->priv_destructor, already called by register_netdevice() if something goes wrong. Alternative would be to make ipip6_dev_free() robust against multiple invocations, but other drivers do not implement this strategy. syzbot reported: dst_release underflow WARNING: CPU: 0 PID: 5059 at net/core/dst.c:173 dst_release+0xd8/0xe0 net/core/dst.c:173 Modules linked in: CPU: 1 PID: 5059 Comm: syz-executor.4 Not tainted 5.16.0-rc5-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:dst_release+0xd8/0xe0 net/core/dst.c:173 Code: 4c 89 f2 89 d9 31 c0 5b 41 5e 5d e9 da d5 44 f9 e8 1d 90 5f f9 c6 05 87 48 c6 05 01 48 c7 c7 80 44 99 8b 31 c0 e8 e8 67 29 f9 <0f> 0b eb 85 0f 1f 40 00 53 48 89 fb e8 f7 8f 5f f9 48 83 c3 a8 48 RSP: 0018:ffffc9000aa5faa0 EFLAGS: 00010246 RAX: d6894a925dd15a00 RBX: 00000000ffffffff RCX: 0000000000040000 RDX: ffffc90005e19000 RSI: 000000000003ffff RDI: 0000000000040000 RBP: 0000000000000000 R08: ffffffff816a1f42 R09: ffffed1017344f2c R10: ffffed1017344f2c R11: 0000000000000000 R12: 0000607f462b1358 R13: 1ffffffff1bfd305 R14: ffffe8ffffcb1358 R15: dffffc0000000000 FS: 00007f66c71a2700(0000) GS:ffff8880b9a00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f88aaed5058 CR3: 0000000023e0f000 CR4: 00000000003506f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: dst_cache_destroy+0x107/0x1e0 net/core/dst_cache.c:160 ipip6_dev_free net/ipv6/sit.c:1414 [inline] sit_init_net+0x229/0x550 net/ipv6/sit.c:1936 ops_init+0x313/0x430 net/core/net_namespace.c:140 setup_net+0x35b/0x9d0 net/core/net_namespace.c:326 copy_net_ns+0x359/0x5c0 net/core/net_namespace.c:470 create_new_namespaces+0x4ce/0xa00 kernel/nsproxy.c:110 unshare_nsproxy_namespaces+0x11e/0x180 kernel/nsproxy.c:226 ksys_unshare+0x57d/0xb50 kernel/fork.c:3075 __do_sys_unshare kernel/fork.c:3146 [inline] __se_sys_unshare kernel/fork.c:3144 [inline] __x64_sys_unshare+0x34/0x40 kernel/fork.c:3144 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x44/0xd0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x44/0xae RIP: 0033:0x7f66c882ce99 Code: ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 bc ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007f66c71a2168 EFLAGS: 00000246 ORIG_RAX: 0000000000000110 RAX: ffffffffffffffda RBX: 00007f66c893ff60 RCX: 00007f66c882ce99 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000048040200 RBP: 00007f66c8886ff1 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 R13: 00007fff6634832f R14: 00007f66c71a2300 R15: 0000000000022000 Fixes: cf124db566e6 ("net: Fix inconsistent teardown and release of private netdev state.") Signed-off-by: Eric Dumazet Reported-by: syzbot Link: https://lore.kernel.org/r/20211216111741.1387540-1-eric.dumazet@gmail.com Signed-off-by: Jakub Kicinski --- net/ipv6/sit.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 1b57ee36d668..8a3618a30632 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -1933,7 +1933,6 @@ static int __net_init sit_init_net(struct net *net) return 0; err_reg_dev: - ipip6_dev_free(sitn->fb_tunnel_dev); free_netdev(sitn->fb_tunnel_dev); err_alloc_dev: return err; From 1744a22ae948799da7927b53ec97ccc877ff9d61 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 14 Dec 2021 09:22:12 +0000 Subject: [PATCH 580/868] afs: Fix mmap Fix afs_add_open_map() to check that the vnode isn't already on the list when it adds it. It's possible that afs_drop_open_mmap() decremented the cb_nr_mmap counter, but hadn't yet got into the locked section to remove it. Also vnode->cb_mmap_link should be initialised, so fix that too. Fixes: 6e0e99d58a65 ("afs: Fix mmap coherency vs 3rd-party changes") Reported-by: kafs-testing+fedora34_64checkkafs-build-300@auristor.com Suggested-by: Marc Dionne Signed-off-by: David Howells Tested-by: kafs-testing+fedora34_64checkkafs-build-300@auristor.com cc: linux-afs@lists.infradead.org Link: https://lore.kernel.org/r/686465.1639435380@warthog.procyon.org.uk/ # v1 Signed-off-by: Linus Torvalds --- fs/afs/file.c | 5 +++-- fs/afs/super.c | 1 + 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/afs/file.c b/fs/afs/file.c index cb6ad61eec3b..afe4b803f84b 100644 --- a/fs/afs/file.c +++ b/fs/afs/file.c @@ -514,8 +514,9 @@ static void afs_add_open_mmap(struct afs_vnode *vnode) if (atomic_inc_return(&vnode->cb_nr_mmap) == 1) { down_write(&vnode->volume->cell->fs_open_mmaps_lock); - list_add_tail(&vnode->cb_mmap_link, - &vnode->volume->cell->fs_open_mmaps); + if (list_empty(&vnode->cb_mmap_link)) + list_add_tail(&vnode->cb_mmap_link, + &vnode->volume->cell->fs_open_mmaps); up_write(&vnode->volume->cell->fs_open_mmaps_lock); } diff --git a/fs/afs/super.c b/fs/afs/super.c index d110def8aa8e..34c68724c98b 100644 --- a/fs/afs/super.c +++ b/fs/afs/super.c @@ -667,6 +667,7 @@ static void afs_i_init_once(void *_vnode) INIT_LIST_HEAD(&vnode->pending_locks); INIT_LIST_HEAD(&vnode->granted_locks); INIT_DELAYED_WORK(&vnode->lock_work, afs_lock_work); + INIT_LIST_HEAD(&vnode->cb_mmap_link); seqlock_init(&vnode->cb_lock); } From 9c5d89bc10551f1aecd768b00fca3339a7b8c8ee Mon Sep 17 00:00:00 2001 From: Lakshmi Ramasubramanian Date: Thu, 9 Dec 2021 17:01:21 -0800 Subject: [PATCH 581/868] arm64: kexec: Fix missing error code 'ret' warning in load_other_segments() Since commit ac10be5cdbfa ("arm64: Use common of_kexec_alloc_and_setup_fdt()"), smatch reports the following warning: arch/arm64/kernel/machine_kexec_file.c:152 load_other_segments() warn: missing error code 'ret' Return code is not set to an error code in load_other_segments() when of_kexec_alloc_and_setup_fdt() call returns a NULL dtb. This results in status success (return code set to 0) being returned from load_other_segments(). Set return code to -EINVAL if of_kexec_alloc_and_setup_fdt() returns NULL dtb. Signed-off-by: Lakshmi Ramasubramanian Reported-by: kernel test robot Reported-by: Dan Carpenter Fixes: ac10be5cdbfa ("arm64: Use common of_kexec_alloc_and_setup_fdt()") Link: https://lore.kernel.org/r/20211210010121.101823-1-nramas@linux.microsoft.com Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/kernel/machine_kexec_file.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/kernel/machine_kexec_file.c b/arch/arm64/kernel/machine_kexec_file.c index 63634b4d72c1..59c648d51848 100644 --- a/arch/arm64/kernel/machine_kexec_file.c +++ b/arch/arm64/kernel/machine_kexec_file.c @@ -149,6 +149,7 @@ int load_other_segments(struct kimage *image, initrd_len, cmdline, 0); if (!dtb) { pr_err("Preparing for new dtb failed\n"); + ret = -EINVAL; goto out_err; } From ef399469d9ceb9f2171cdd79863f9434b9fa3edc Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 30 Nov 2021 15:50:47 +0300 Subject: [PATCH 582/868] ksmbd: fix error code in ndr_read_int32() This is a failure path and it should return -EINVAL instead of success. Otherwise it could result in the caller using uninitialized memory. Fixes: 303fff2b8c77 ("ksmbd: add validation for ndr read/write functions") Cc: stable@vger.kernel.org # v5.15 Acked-by: Namjae Jeon Signed-off-by: Dan Carpenter Signed-off-by: Steve French --- fs/ksmbd/ndr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ksmbd/ndr.c b/fs/ksmbd/ndr.c index 8317f7ca402b..5052be9261d9 100644 --- a/fs/ksmbd/ndr.c +++ b/fs/ksmbd/ndr.c @@ -148,7 +148,7 @@ static int ndr_read_int16(struct ndr *n, __u16 *value) static int ndr_read_int32(struct ndr *n, __u32 *value) { if (n->offset + sizeof(__u32) > n->length) - return 0; + return -EINVAL; if (value) *value = le32_to_cpu(*(__le32 *)ndr_get_field(n)); From f2e78affc48dee29b989c1d9b0d89b503dcd1204 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Wed, 1 Dec 2021 10:12:39 +0900 Subject: [PATCH 583/868] ksmbd: fix uninitialized symbol 'pntsd_size' No check for if "rc" is an error code for build_sec_desc(). This can cause problems with using uninitialized pntsd_size. Fixes: e2f34481b24d ("cifsd: add server-side procedures for SMB3") Cc: stable@vger.kernel.org # v5.15 Reported-by: Dan Carpenter Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/ksmbd/smb2pdu.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/ksmbd/smb2pdu.c b/fs/ksmbd/smb2pdu.c index 49c9da37315c..125590d5e940 100644 --- a/fs/ksmbd/smb2pdu.c +++ b/fs/ksmbd/smb2pdu.c @@ -2962,6 +2962,10 @@ int smb2_open(struct ksmbd_work *work) &pntsd_size, &fattr); posix_acl_release(fattr.cf_acls); posix_acl_release(fattr.cf_dacls); + if (rc) { + kfree(pntsd); + goto err_out; + } rc = ksmbd_vfs_set_sd_xattr(conn, user_ns, From 3cf2b61eb06765e27fec6799292d9fb46d0b7e60 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 15 Dec 2021 22:02:19 +0000 Subject: [PATCH 584/868] bpf: Fix signed bounds propagation after mov32 For the case where both s32_{min,max}_value bounds are positive, the __reg_assign_32_into_64() directly propagates them to their 64 bit counterparts, otherwise it pessimises them into [0,u32_max] universe and tries to refine them later on by learning through the tnum as per comment in mentioned function. However, that does not always happen, for example, in mov32 operation we call zext_32_to_64(dst_reg) which invokes the __reg_assign_32_into_64() as is without subsequent bounds update as elsewhere thus no refinement based on tnum takes place. Thus, not calling into the __update_reg_bounds() / __reg_deduce_bounds() / __reg_bound_offset() triplet as we do, for example, in case of ALU ops via adjust_scalar_min_max_vals(), will lead to more pessimistic bounds when dumping the full register state: Before fix: 0: (b4) w0 = -1 1: R0_w=invP4294967295 (id=0,imm=ffffffff, smin_value=4294967295,smax_value=4294967295, umin_value=4294967295,umax_value=4294967295, var_off=(0xffffffff; 0x0), s32_min_value=-1,s32_max_value=-1, u32_min_value=-1,u32_max_value=-1) 1: (bc) w0 = w0 2: R0_w=invP4294967295 (id=0,imm=ffffffff, smin_value=0,smax_value=4294967295, umin_value=4294967295,umax_value=4294967295, var_off=(0xffffffff; 0x0), s32_min_value=-1,s32_max_value=-1, u32_min_value=-1,u32_max_value=-1) Technically, the smin_value=0 and smax_value=4294967295 bounds are not incorrect, but given the register is still a constant, they break assumptions about const scalars that smin_value == smax_value and umin_value == umax_value. After fix: 0: (b4) w0 = -1 1: R0_w=invP4294967295 (id=0,imm=ffffffff, smin_value=4294967295,smax_value=4294967295, umin_value=4294967295,umax_value=4294967295, var_off=(0xffffffff; 0x0), s32_min_value=-1,s32_max_value=-1, u32_min_value=-1,u32_max_value=-1) 1: (bc) w0 = w0 2: R0_w=invP4294967295 (id=0,imm=ffffffff, smin_value=4294967295,smax_value=4294967295, umin_value=4294967295,umax_value=4294967295, var_off=(0xffffffff; 0x0), s32_min_value=-1,s32_max_value=-1, u32_min_value=-1,u32_max_value=-1) Without the smin_value == smax_value and umin_value == umax_value invariant being intact for const scalars, it is possible to leak out kernel pointers from unprivileged user space if the latter is enabled. For example, when such registers are involved in pointer arithmtics, then adjust_ptr_min_max_vals() will taint the destination register into an unknown scalar, and the latter can be exported and stored e.g. into a BPF map value. Fixes: 3f50f132d840 ("bpf: Verifier, do explicit ALU32 bounds tracking") Reported-by: Kuee K1r0a Signed-off-by: Daniel Borkmann Reviewed-by: John Fastabend Acked-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 2d48159b58bd..0872b6c9fb33 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -8317,6 +8317,10 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn) insn->dst_reg); } zext_32_to_64(dst_reg); + + __update_reg_bounds(dst_reg); + __reg_deduce_bounds(dst_reg); + __reg_bound_offset(dst_reg); } } else { /* case: R = imm From e572ff80f05c33cd0cb4860f864f5c9c044280b6 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 15 Dec 2021 22:28:48 +0000 Subject: [PATCH 585/868] bpf: Make 32->64 bounds propagation slightly more robust Make the bounds propagation in __reg_assign_32_into_64() slightly more robust and readable by aligning it similarly as we did back in the __reg_combine_64_into_32() counterpart. Meaning, only propagate or pessimize them as a smin/smax pair. Signed-off-by: Daniel Borkmann Reviewed-by: John Fastabend Acked-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 0872b6c9fb33..b532f1058d35 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1366,22 +1366,28 @@ static void __reg_bound_offset(struct bpf_reg_state *reg) reg->var_off = tnum_or(tnum_clear_subreg(var64_off), var32_off); } +static bool __reg32_bound_s64(s32 a) +{ + return a >= 0 && a <= S32_MAX; +} + static void __reg_assign_32_into_64(struct bpf_reg_state *reg) { reg->umin_value = reg->u32_min_value; reg->umax_value = reg->u32_max_value; - /* Attempt to pull 32-bit signed bounds into 64-bit bounds - * but must be positive otherwise set to worse case bounds - * and refine later from tnum. + + /* Attempt to pull 32-bit signed bounds into 64-bit bounds but must + * be positive otherwise set to worse case bounds and refine later + * from tnum. */ - if (reg->s32_min_value >= 0 && reg->s32_max_value >= 0) - reg->smax_value = reg->s32_max_value; - else - reg->smax_value = U32_MAX; - if (reg->s32_min_value >= 0) + if (__reg32_bound_s64(reg->s32_min_value) && + __reg32_bound_s64(reg->s32_max_value)) { reg->smin_value = reg->s32_min_value; - else + reg->smax_value = reg->s32_max_value; + } else { reg->smin_value = 0; + reg->smax_value = U32_MAX; + } } static void __reg_combine_32_into_64(struct bpf_reg_state *reg) From b1a7288dedc6caf9023f2676b4f5ed34cf0d4029 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 15 Dec 2021 23:48:54 +0000 Subject: [PATCH 586/868] bpf, selftests: Add test case trying to taint map value pointer Add a test case which tries to taint map value pointer arithmetic into a unknown scalar with subsequent export through the map. Before fix: # ./test_verifier 1186 #1186/u map access: trying to leak tained dst reg FAIL Unexpected success to load! verification time 24 usec stack depth 8 processed 15 insns (limit 1000000) max_states_per_insn 0 total_states 1 peak_states 1 mark_read 1 #1186/p map access: trying to leak tained dst reg FAIL Unexpected success to load! verification time 8 usec stack depth 8 processed 15 insns (limit 1000000) max_states_per_insn 0 total_states 1 peak_states 1 mark_read 1 Summary: 0 PASSED, 0 SKIPPED, 2 FAILED After fix: # ./test_verifier 1186 #1186/u map access: trying to leak tained dst reg OK #1186/p map access: trying to leak tained dst reg OK Summary: 2 PASSED, 0 SKIPPED, 0 FAILED Signed-off-by: Daniel Borkmann Reviewed-by: John Fastabend Acked-by: Alexei Starovoitov --- .../selftests/bpf/verifier/value_ptr_arith.c | 23 +++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/tools/testing/selftests/bpf/verifier/value_ptr_arith.c b/tools/testing/selftests/bpf/verifier/value_ptr_arith.c index 2debba4e8a3a..4d347bc53aa2 100644 --- a/tools/testing/selftests/bpf/verifier/value_ptr_arith.c +++ b/tools/testing/selftests/bpf/verifier/value_ptr_arith.c @@ -1077,6 +1077,29 @@ .errstr = "R0 invalid mem access 'inv'", .errstr_unpriv = "R0 pointer -= pointer prohibited", }, +{ + "map access: trying to leak tained dst reg", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), + BPF_MOV32_IMM(BPF_REG_1, 0xFFFFFFFF), + BPF_MOV32_REG(BPF_REG_1, BPF_REG_1), + BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_1), + BPF_STX_MEM(BPF_DW, BPF_REG_0, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 4 }, + .result = REJECT, + .errstr = "math between map_value pointer and 4294967295 is not allowed", +}, { "32bit pkt_ptr -= scalar", .insns = { From 433956e91200734d09958673a56df02d00a917c2 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Wed, 15 Dec 2021 18:38:30 -0800 Subject: [PATCH 587/868] bpf: Fix extable fixup offset. The prog - start_of_ldx is the offset before the faulting ldx to the location after it, so this will be used to adjust pt_regs->ip for jumping over it and continuing, and with old temp it would have been fixed up to the wrong offset, causing crash. Fixes: 4c5de127598e ("bpf: Emit explicit NULL pointer checks for PROBE_LDX instructions.") Signed-off-by: Alexei Starovoitov Reviewed-by: Daniel Borkmann Signed-off-by: Daniel Borkmann --- arch/x86/net/bpf_jit_comp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 726700fabca6..fa58681db45e 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -1305,7 +1305,7 @@ st: if (is_imm8(insn->off)) * End result: x86 insn "mov rbx, qword ptr [rax+0x14]" * of 4 bytes will be ignored and rbx will be zero inited. */ - ex->fixup = (prog - temp) | (reg2pt_regs[dst_reg] << 8); + ex->fixup = (prog - start_of_ldx) | (reg2pt_regs[dst_reg] << 8); } break; From 588a25e92458c6efeb7a261d5ca5726f5de89184 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Tue, 14 Dec 2021 19:25:13 -0800 Subject: [PATCH 588/868] bpf: Fix extable address check. The verifier checks that PTR_TO_BTF_ID pointer is either valid or NULL, but it cannot distinguish IS_ERR pointer from valid one. When offset is added to IS_ERR pointer it may become small positive value which is a user address that is not handled by extable logic and has to be checked for at the runtime. Tighten BPF_PROBE_MEM pointer check code to prevent this case. Fixes: 4c5de127598e ("bpf: Emit explicit NULL pointer checks for PROBE_LDX instructions.") Reported-by: Lorenzo Fontana Signed-off-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: Daniel Borkmann --- arch/x86/net/bpf_jit_comp.c | 49 +++++++++++++++++++++++++++++++------ 1 file changed, 42 insertions(+), 7 deletions(-) diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index fa58681db45e..bafe36e69227 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -1252,19 +1252,54 @@ st: if (is_imm8(insn->off)) case BPF_LDX | BPF_MEM | BPF_DW: case BPF_LDX | BPF_PROBE_MEM | BPF_DW: if (BPF_MODE(insn->code) == BPF_PROBE_MEM) { - /* test src_reg, src_reg */ - maybe_emit_mod(&prog, src_reg, src_reg, true); /* always 1 byte */ - EMIT2(0x85, add_2reg(0xC0, src_reg, src_reg)); - /* jne start_of_ldx */ - EMIT2(X86_JNE, 0); + /* Though the verifier prevents negative insn->off in BPF_PROBE_MEM + * add abs(insn->off) to the limit to make sure that negative + * offset won't be an issue. + * insn->off is s16, so it won't affect valid pointers. + */ + u64 limit = TASK_SIZE_MAX + PAGE_SIZE + abs(insn->off); + u8 *end_of_jmp1, *end_of_jmp2; + + /* Conservatively check that src_reg + insn->off is a kernel address: + * 1. src_reg + insn->off >= limit + * 2. src_reg + insn->off doesn't become small positive. + * Cannot do src_reg + insn->off >= limit in one branch, + * since it needs two spare registers, but JIT has only one. + */ + + /* movabsq r11, limit */ + EMIT2(add_1mod(0x48, AUX_REG), add_1reg(0xB8, AUX_REG)); + EMIT((u32)limit, 4); + EMIT(limit >> 32, 4); + /* cmp src_reg, r11 */ + maybe_emit_mod(&prog, src_reg, AUX_REG, true); + EMIT2(0x39, add_2reg(0xC0, src_reg, AUX_REG)); + /* if unsigned '<' goto end_of_jmp2 */ + EMIT2(X86_JB, 0); + end_of_jmp1 = prog; + + /* mov r11, src_reg */ + emit_mov_reg(&prog, true, AUX_REG, src_reg); + /* add r11, insn->off */ + maybe_emit_1mod(&prog, AUX_REG, true); + EMIT2_off32(0x81, add_1reg(0xC0, AUX_REG), insn->off); + /* jmp if not carry to start_of_ldx + * Otherwise ERR_PTR(-EINVAL) + 128 will be the user addr + * that has to be rejected. + */ + EMIT2(0x73 /* JNC */, 0); + end_of_jmp2 = prog; + /* xor dst_reg, dst_reg */ emit_mov_imm32(&prog, false, dst_reg, 0); /* jmp byte_after_ldx */ EMIT2(0xEB, 0); - /* populate jmp_offset for JNE above */ - temp[4] = prog - temp - 5 /* sizeof(test + jne) */; + /* populate jmp_offset for JB above to jump to xor dst_reg */ + end_of_jmp1[-1] = end_of_jmp2 - end_of_jmp1; + /* populate jmp_offset for JNC above to jump to start_of_ldx */ start_of_ldx = prog; + end_of_jmp2[-1] = start_of_ldx - end_of_jmp2; } emit_ldx(&prog, BPF_SIZE(insn->code), dst_reg, src_reg, insn->off); if (BPF_MODE(insn->code) == BPF_PROBE_MEM) { From 7edc3fcbf9a2b2e3df53c9656a9f85bf807affac Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Wed, 15 Dec 2021 12:35:34 -0800 Subject: [PATCH 589/868] selftest/bpf: Add a test that reads various addresses. Add a function to bpf_testmod that returns invalid kernel and user addresses. Then attach an fexit program to that function that tries to read memory through these addresses. This logic checks that bpf_probe_read_kernel and BPF_PROBE_MEM logic is sane. Signed-off-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: Daniel Borkmann --- .../selftests/bpf/bpf_testmod/bpf_testmod.c | 20 +++++++++++++++++++ .../selftests/bpf/progs/test_module_attach.c | 12 +++++++++++ 2 files changed, 32 insertions(+) diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c index 5d52ea2768df..df3b292a8ffe 100644 --- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c +++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c @@ -33,6 +33,22 @@ noinline int bpf_testmod_loop_test(int n) return sum; } +__weak noinline struct file *bpf_testmod_return_ptr(int arg) +{ + static struct file f = {}; + + switch (arg) { + case 1: return (void *)EINVAL; /* user addr */ + case 2: return (void *)0xcafe4a11; /* user addr */ + case 3: return (void *)-EINVAL; /* canonical, but invalid */ + case 4: return (void *)(1ull << 60); /* non-canonical and invalid */ + case 5: return (void *)~(1ull << 30); /* trigger extable */ + case 6: return &f; /* valid addr */ + case 7: return (void *)((long)&f | 1); /* kernel tricks */ + default: return NULL; + } +} + noinline ssize_t bpf_testmod_test_read(struct file *file, struct kobject *kobj, struct bin_attribute *bin_attr, @@ -43,6 +59,10 @@ bpf_testmod_test_read(struct file *file, struct kobject *kobj, .off = off, .len = len, }; + int i = 1; + + while (bpf_testmod_return_ptr(i)) + i++; /* This is always true. Use the check to make sure the compiler * doesn't remove bpf_testmod_loop_test. diff --git a/tools/testing/selftests/bpf/progs/test_module_attach.c b/tools/testing/selftests/bpf/progs/test_module_attach.c index b36857093f71..50ce16d02da7 100644 --- a/tools/testing/selftests/bpf/progs/test_module_attach.c +++ b/tools/testing/selftests/bpf/progs/test_module_attach.c @@ -87,6 +87,18 @@ int BPF_PROG(handle_fexit, return 0; } +SEC("fexit/bpf_testmod_return_ptr") +int BPF_PROG(handle_fexit_ret, int arg, struct file *ret) +{ + long buf = 0; + + bpf_probe_read_kernel(&buf, 8, ret); + bpf_probe_read_kernel(&buf, 8, (char *)ret + 256); + *(volatile long long *)ret; + *(volatile int *)&ret->f_mode; + return 0; +} + __u32 fmod_ret_read_sz = 0; SEC("fmod_ret/bpf_testmod_test_read") From c2fcbf81c332b42382a0c439bfe2414a241e4f5b Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Thu, 16 Dec 2021 11:16:30 -0800 Subject: [PATCH 590/868] bpf, selftests: Fix racing issue in btf_skc_cls_ingress test The libbpf CI reported occasional failure in btf_skc_cls_ingress: test_syncookie:FAIL:Unexpected syncookie states gen_cookie:80326634 recv_cookie:0 bpf prog error at line 97 "error at line 97" means the bpf prog cannot find the listening socket when the final ack is received. It then skipped processing the syncookie in the final ack which then led to "recv_cookie:0". The problem is the userspace program did not do accept() and went ahead to close(listen_fd) before the kernel (and the bpf prog) had a chance to process the final ack. The fix is to add accept() call so that the userspace will wait for the kernel to finish processing the final ack first before close()-ing everything. Fixes: 9a856cae2217 ("bpf: selftest: Add test_btf_skc_cls_ingress") Reported-by: Andrii Nakryiko Signed-off-by: Martin KaFai Lau Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20211216191630.466151-1-kafai@fb.com --- .../bpf/prog_tests/btf_skc_cls_ingress.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/btf_skc_cls_ingress.c b/tools/testing/selftests/bpf/prog_tests/btf_skc_cls_ingress.c index 762f6a9da8b5..664ffc0364f4 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf_skc_cls_ingress.c +++ b/tools/testing/selftests/bpf/prog_tests/btf_skc_cls_ingress.c @@ -90,7 +90,7 @@ static void print_err_line(void) static void test_conn(void) { - int listen_fd = -1, cli_fd = -1, err; + int listen_fd = -1, cli_fd = -1, srv_fd = -1, err; socklen_t addrlen = sizeof(srv_sa6); int srv_port; @@ -112,6 +112,10 @@ static void test_conn(void) if (CHECK_FAIL(cli_fd == -1)) goto done; + srv_fd = accept(listen_fd, NULL, NULL); + if (CHECK_FAIL(srv_fd == -1)) + goto done; + if (CHECK(skel->bss->listen_tp_sport != srv_port || skel->bss->req_sk_sport != srv_port, "Unexpected sk src port", @@ -134,11 +138,13 @@ done: close(listen_fd); if (cli_fd != -1) close(cli_fd); + if (srv_fd != -1) + close(srv_fd); } static void test_syncookie(void) { - int listen_fd = -1, cli_fd = -1, err; + int listen_fd = -1, cli_fd = -1, srv_fd = -1, err; socklen_t addrlen = sizeof(srv_sa6); int srv_port; @@ -161,6 +167,10 @@ static void test_syncookie(void) if (CHECK_FAIL(cli_fd == -1)) goto done; + srv_fd = accept(listen_fd, NULL, NULL); + if (CHECK_FAIL(srv_fd == -1)) + goto done; + if (CHECK(skel->bss->listen_tp_sport != srv_port, "Unexpected tp src port", "listen_tp_sport:%u expected:%u\n", @@ -188,6 +198,8 @@ done: close(listen_fd); if (cli_fd != -1) close(cli_fd); + if (srv_fd != -1) + close(srv_fd); } struct test { From cc274ae7763d9700a56659f3228641d7069e7a3f Mon Sep 17 00:00:00 2001 From: Scott Mayhew Date: Wed, 15 Dec 2021 16:28:40 -0500 Subject: [PATCH 591/868] selinux: fix sleeping function called from invalid context selinux_sb_mnt_opts_compat() is called via sget_fc() under the sb_lock spinlock, so it can't use GFP_KERNEL allocations: [ 868.565200] BUG: sleeping function called from invalid context at include/linux/sched/mm.h:230 [ 868.568246] in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 4914, name: mount.nfs [ 868.569626] preempt_count: 1, expected: 0 [ 868.570215] RCU nest depth: 0, expected: 0 [ 868.570809] Preemption disabled at: [ 868.570810] [<0000000000000000>] 0x0 [ 868.571848] CPU: 1 PID: 4914 Comm: mount.nfs Kdump: loaded Tainted: G W 5.16.0-rc5.2585cf9dfa #1 [ 868.573273] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.14.0-4.fc34 04/01/2014 [ 868.574478] Call Trace: [ 868.574844] [ 868.575156] dump_stack_lvl+0x34/0x44 [ 868.575692] __might_resched.cold+0xd6/0x10f [ 868.576308] slab_pre_alloc_hook.constprop.0+0x89/0xf0 [ 868.577046] __kmalloc_track_caller+0x72/0x420 [ 868.577684] ? security_context_to_sid_core+0x48/0x2b0 [ 868.578569] kmemdup_nul+0x22/0x50 [ 868.579108] security_context_to_sid_core+0x48/0x2b0 [ 868.579854] ? _nfs4_proc_pathconf+0xff/0x110 [nfsv4] [ 868.580742] ? nfs_reconfigure+0x80/0x80 [nfs] [ 868.581355] security_context_str_to_sid+0x36/0x40 [ 868.581960] selinux_sb_mnt_opts_compat+0xb5/0x1e0 [ 868.582550] ? nfs_reconfigure+0x80/0x80 [nfs] [ 868.583098] security_sb_mnt_opts_compat+0x2a/0x40 [ 868.583676] nfs_compare_super+0x113/0x220 [nfs] [ 868.584249] ? nfs_try_mount_request+0x210/0x210 [nfs] [ 868.584879] sget_fc+0xb5/0x2f0 [ 868.585267] nfs_get_tree_common+0x91/0x4a0 [nfs] [ 868.585834] vfs_get_tree+0x25/0xb0 [ 868.586241] fc_mount+0xe/0x30 [ 868.586605] do_nfs4_mount+0x130/0x380 [nfsv4] [ 868.587160] nfs4_try_get_tree+0x47/0xb0 [nfsv4] [ 868.587724] vfs_get_tree+0x25/0xb0 [ 868.588193] do_new_mount+0x176/0x310 [ 868.588782] __x64_sys_mount+0x103/0x140 [ 868.589388] do_syscall_64+0x3b/0x90 [ 868.589935] entry_SYSCALL_64_after_hwframe+0x44/0xae [ 868.590699] RIP: 0033:0x7f2b371c6c4e [ 868.591239] Code: 48 8b 0d dd 71 0e 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa 49 89 ca b8 a5 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d aa 71 0e 00 f7 d8 64 89 01 48 [ 868.593810] RSP: 002b:00007ffc83775d88 EFLAGS: 00000246 ORIG_RAX: 00000000000000a5 [ 868.594691] RAX: ffffffffffffffda RBX: 00007ffc83775f10 RCX: 00007f2b371c6c4e [ 868.595504] RDX: 0000555d517247a0 RSI: 0000555d51724700 RDI: 0000555d51724540 [ 868.596317] RBP: 00007ffc83775f10 R08: 0000555d51726890 R09: 0000555d51726890 [ 868.597162] R10: 0000000000000000 R11: 0000000000000246 R12: 0000555d51726890 [ 868.598005] R13: 0000000000000003 R14: 0000555d517246e0 R15: 0000555d511ac925 [ 868.598826] Cc: stable@vger.kernel.org Fixes: 69c4a42d72eb ("lsm,selinux: add new hook to compare new mount to an existing mount") Signed-off-by: Scott Mayhew [PM: cleanup/line-wrap the backtrace] Signed-off-by: Paul Moore --- security/selinux/hooks.c | 33 +++++++++++++++++++-------------- 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 62d30c0a30c2..1afc06ffd969 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -611,10 +611,11 @@ static int bad_option(struct superblock_security_struct *sbsec, char flag, return 0; } -static int parse_sid(struct super_block *sb, const char *s, u32 *sid) +static int parse_sid(struct super_block *sb, const char *s, u32 *sid, + gfp_t gfp) { int rc = security_context_str_to_sid(&selinux_state, s, - sid, GFP_KERNEL); + sid, gfp); if (rc) pr_warn("SELinux: security_context_str_to_sid" "(%s) failed for (dev %s, type %s) errno=%d\n", @@ -685,7 +686,8 @@ static int selinux_set_mnt_opts(struct super_block *sb, */ if (opts) { if (opts->fscontext) { - rc = parse_sid(sb, opts->fscontext, &fscontext_sid); + rc = parse_sid(sb, opts->fscontext, &fscontext_sid, + GFP_KERNEL); if (rc) goto out; if (bad_option(sbsec, FSCONTEXT_MNT, sbsec->sid, @@ -694,7 +696,8 @@ static int selinux_set_mnt_opts(struct super_block *sb, sbsec->flags |= FSCONTEXT_MNT; } if (opts->context) { - rc = parse_sid(sb, opts->context, &context_sid); + rc = parse_sid(sb, opts->context, &context_sid, + GFP_KERNEL); if (rc) goto out; if (bad_option(sbsec, CONTEXT_MNT, sbsec->mntpoint_sid, @@ -703,7 +706,8 @@ static int selinux_set_mnt_opts(struct super_block *sb, sbsec->flags |= CONTEXT_MNT; } if (opts->rootcontext) { - rc = parse_sid(sb, opts->rootcontext, &rootcontext_sid); + rc = parse_sid(sb, opts->rootcontext, &rootcontext_sid, + GFP_KERNEL); if (rc) goto out; if (bad_option(sbsec, ROOTCONTEXT_MNT, root_isec->sid, @@ -712,7 +716,8 @@ static int selinux_set_mnt_opts(struct super_block *sb, sbsec->flags |= ROOTCONTEXT_MNT; } if (opts->defcontext) { - rc = parse_sid(sb, opts->defcontext, &defcontext_sid); + rc = parse_sid(sb, opts->defcontext, &defcontext_sid, + GFP_KERNEL); if (rc) goto out; if (bad_option(sbsec, DEFCONTEXT_MNT, sbsec->def_sid, @@ -2702,14 +2707,14 @@ static int selinux_sb_mnt_opts_compat(struct super_block *sb, void *mnt_opts) return (sbsec->flags & SE_MNTMASK) ? 1 : 0; if (opts->fscontext) { - rc = parse_sid(sb, opts->fscontext, &sid); + rc = parse_sid(sb, opts->fscontext, &sid, GFP_NOWAIT); if (rc) return 1; if (bad_option(sbsec, FSCONTEXT_MNT, sbsec->sid, sid)) return 1; } if (opts->context) { - rc = parse_sid(sb, opts->context, &sid); + rc = parse_sid(sb, opts->context, &sid, GFP_NOWAIT); if (rc) return 1; if (bad_option(sbsec, CONTEXT_MNT, sbsec->mntpoint_sid, sid)) @@ -2719,14 +2724,14 @@ static int selinux_sb_mnt_opts_compat(struct super_block *sb, void *mnt_opts) struct inode_security_struct *root_isec; root_isec = backing_inode_security(sb->s_root); - rc = parse_sid(sb, opts->rootcontext, &sid); + rc = parse_sid(sb, opts->rootcontext, &sid, GFP_NOWAIT); if (rc) return 1; if (bad_option(sbsec, ROOTCONTEXT_MNT, root_isec->sid, sid)) return 1; } if (opts->defcontext) { - rc = parse_sid(sb, opts->defcontext, &sid); + rc = parse_sid(sb, opts->defcontext, &sid, GFP_NOWAIT); if (rc) return 1; if (bad_option(sbsec, DEFCONTEXT_MNT, sbsec->def_sid, sid)) @@ -2749,14 +2754,14 @@ static int selinux_sb_remount(struct super_block *sb, void *mnt_opts) return 0; if (opts->fscontext) { - rc = parse_sid(sb, opts->fscontext, &sid); + rc = parse_sid(sb, opts->fscontext, &sid, GFP_KERNEL); if (rc) return rc; if (bad_option(sbsec, FSCONTEXT_MNT, sbsec->sid, sid)) goto out_bad_option; } if (opts->context) { - rc = parse_sid(sb, opts->context, &sid); + rc = parse_sid(sb, opts->context, &sid, GFP_KERNEL); if (rc) return rc; if (bad_option(sbsec, CONTEXT_MNT, sbsec->mntpoint_sid, sid)) @@ -2765,14 +2770,14 @@ static int selinux_sb_remount(struct super_block *sb, void *mnt_opts) if (opts->rootcontext) { struct inode_security_struct *root_isec; root_isec = backing_inode_security(sb->s_root); - rc = parse_sid(sb, opts->rootcontext, &sid); + rc = parse_sid(sb, opts->rootcontext, &sid, GFP_KERNEL); if (rc) return rc; if (bad_option(sbsec, ROOTCONTEXT_MNT, root_isec->sid, sid)) goto out_bad_option; } if (opts->defcontext) { - rc = parse_sid(sb, opts->defcontext, &sid); + rc = parse_sid(sb, opts->defcontext, &sid, GFP_KERNEL); if (rc) return rc; if (bad_option(sbsec, DEFCONTEXT_MNT, sbsec->def_sid, sid)) From 5da5231bb47864e5dd6c6731151e98b6ee498827 Mon Sep 17 00:00:00 2001 From: George Kennedy Date: Tue, 14 Dec 2021 09:45:10 -0500 Subject: [PATCH 592/868] libata: if T_LENGTH is zero, dma direction should be DMA_NONE Avoid data corruption by rejecting pass-through commands where T_LENGTH is zero (No data is transferred) and the dma direction is not DMA_NONE. Cc: Reported-by: syzkaller Signed-off-by: George Kennedy Signed-off-by: Damien Le Moal --- drivers/ata/libata-scsi.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index 1b84d5526d77..313e9475507b 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -2859,8 +2859,19 @@ static unsigned int ata_scsi_pass_thru(struct ata_queued_cmd *qc) goto invalid_fld; } - if (ata_is_ncq(tf->protocol) && (cdb[2 + cdb_offset] & 0x3) == 0) - tf->protocol = ATA_PROT_NCQ_NODATA; + if ((cdb[2 + cdb_offset] & 0x3) == 0) { + /* + * When T_LENGTH is zero (No data is transferred), dir should + * be DMA_NONE. + */ + if (scmd->sc_data_direction != DMA_NONE) { + fp = 2 + cdb_offset; + goto invalid_fld; + } + + if (ata_is_ncq(tf->protocol)) + tf->protocol = ATA_PROT_NCQ_NODATA; + } /* enable LBA */ tf->flags |= ATA_TFLAG_LBA; From 27750a315aba7e6675bb1c3dfd4481c4f6888af1 Mon Sep 17 00:00:00 2001 From: Giovanni Cabiddu Date: Wed, 17 Nov 2021 14:30:34 +0000 Subject: [PATCH 593/868] crypto: qat - do not handle PFVF sources for qat_4xxx The QAT driver does not have support for PFVF interrupts for GEN4 devices, therefore report the vf2pf sources as 0. This prevents a NULL pointer dereference in the function adf_msix_isr_ae() if the device triggers a spurious interrupt. Fixes: 993161d36ab5 ("crypto: qat - fix handling of VF to PF interrupts") Reported-by: Adam Guerin Signed-off-by: Giovanni Cabiddu Signed-off-by: Herbert Xu --- drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.c b/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.c index fa768f10635f..fd29861526d6 100644 --- a/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.c +++ b/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.c @@ -211,6 +211,12 @@ static u32 uof_get_ae_mask(u32 obj_num) return adf_4xxx_fw_config[obj_num].ae_mask; } +static u32 get_vf2pf_sources(void __iomem *pmisc_addr) +{ + /* For the moment do not report vf2pf sources */ + return 0; +} + void adf_init_hw_data_4xxx(struct adf_hw_device_data *hw_data) { hw_data->dev_class = &adf_4xxx_class; @@ -254,6 +260,7 @@ void adf_init_hw_data_4xxx(struct adf_hw_device_data *hw_data) hw_data->set_msix_rttable = set_msix_default_rttable; hw_data->set_ssm_wdtimer = adf_gen4_set_ssm_wdtimer; hw_data->enable_pfvf_comms = pfvf_comms_disabled; + hw_data->get_vf2pf_sources = get_vf2pf_sources; hw_data->disable_iov = adf_disable_sriov; hw_data->min_iov_compat_ver = ADF_PFVF_COMPAT_THIS_VERSION; From 9020be114a47bf7ff33e179b3bb0016b91a098e6 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 14 Dec 2021 10:05:27 +0300 Subject: [PATCH 594/868] scsi: lpfc: Terminate string in lpfc_debugfs_nvmeio_trc_write() The "mybuf" string comes from the user, so we need to ensure that it is NUL terminated. Link: https://lore.kernel.org/r/20211214070527.GA27934@kili Fixes: bd2cdd5e400f ("scsi: lpfc: NVME Initiator: Add debugfs support") Reviewed-by: James Smart Signed-off-by: Dan Carpenter Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_debugfs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_debugfs.c b/drivers/scsi/lpfc/lpfc_debugfs.c index bd6d459afce5..08b2e85dcd7d 100644 --- a/drivers/scsi/lpfc/lpfc_debugfs.c +++ b/drivers/scsi/lpfc/lpfc_debugfs.c @@ -2954,8 +2954,8 @@ lpfc_debugfs_nvmeio_trc_write(struct file *file, const char __user *buf, char mybuf[64]; char *pbuf; - if (nbytes > 64) - nbytes = 64; + if (nbytes > 63) + nbytes = 63; memset(mybuf, 0, sizeof(mybuf)); From ea81b91e4e256b0bb75d47ad3a5c230b2171a005 Mon Sep 17 00:00:00 2001 From: Vincent Pelletier Date: Tue, 16 Nov 2021 23:57:37 +0000 Subject: [PATCH 595/868] riscv: dts: sifive unmatched: Name gpio lines Follow the pin descriptions given in the version 3 of the board schematics. Signed-off-by: Vincent Pelletier Signed-off-by: Palmer Dabbelt --- arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts b/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts index 3c796d64cf51..f8648ee1785a 100644 --- a/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts +++ b/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts @@ -247,4 +247,8 @@ &gpio { status = "okay"; + gpio-line-names = "J29.1", "PMICNTB", "PMICSHDN", "J8.1", "J8.3", + "PCIe_PWREN", "THERM", "UBRDG_RSTN", "PCIe_PERSTN", + "ULPI_RSTN", "J8.2", "UHUB_RSTN", "GEMGXL_RST", "J8.4", + "EN_VDD_SD", "SD_CD"; }; From 8120393b74b31bbaf293f59896de6b0d50febc48 Mon Sep 17 00:00:00 2001 From: Vincent Pelletier Date: Tue, 16 Nov 2021 23:57:38 +0000 Subject: [PATCH 596/868] riscv: dts: sifive unmatched: Expose the board ID eeprom Mark it as read-only as it is factory-programmed with identifying information, and no executable nor configuration: - eth MAC address - board model (PCB version, BoM version) - board serial number Accidental modification would cause misidentification which could brick the board, so marking read-only seem like both a safe and non-constraining choice. Signed-off-by: Vincent Pelletier Signed-off-by: Palmer Dabbelt --- arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts b/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts index f8648ee1785a..d1f2289e529b 100644 --- a/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts +++ b/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts @@ -59,6 +59,16 @@ interrupts = <6 IRQ_TYPE_LEVEL_LOW>; }; + eeprom@54 { + compatible = "microchip,24c02", "atmel,24c02"; + reg = <0x54>; + vcc-supply = <&vdd_bpro>; + label = "board-id"; + pagesize = <16>; + read-only; + size = <256>; + }; + pmic@58 { compatible = "dlg,da9063"; reg = <0x58>; From cd29cc8ad2540a4f9a0a3e174394d39e648ef941 Mon Sep 17 00:00:00 2001 From: Vincent Pelletier Date: Tue, 16 Nov 2021 23:57:39 +0000 Subject: [PATCH 597/868] riscv: dts: sifive unmatched: Expose the PMIC sub-functions These sub-functions are available in the chip revision on this board, so expose them. Signed-off-by: Vincent Pelletier Signed-off-by: Palmer Dabbelt --- arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts b/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts index d1f2289e529b..91b3e76b2bb2 100644 --- a/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts +++ b/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts @@ -76,6 +76,18 @@ interrupts = <1 IRQ_TYPE_LEVEL_LOW>; interrupt-controller; + onkey { + compatible = "dlg,da9063-onkey"; + }; + + rtc { + compatible = "dlg,da9063-rtc"; + }; + + wdt { + compatible = "dlg,da9063-watchdog"; + }; + regulators { vdd_bcore1: bcore1 { regulator-min-microvolt = <900000>; From ad931d9b3b2e21586de8e6b34346d0a30c13721d Mon Sep 17 00:00:00 2001 From: Vincent Pelletier Date: Tue, 16 Nov 2021 23:57:41 +0000 Subject: [PATCH 598/868] riscv: dts: sifive unmatched: Fix regulator for board rev3 The existing values are rejected by the da9063 regulator driver, as they are unachievable with the declared chip setup (non-merged vcore and bmem are unable to provide the declared curent). Fix voltages to match rev3 schematics, which also matches their boot-up configuration within the chip's available precision. Declare bcore1/bcore2 and bmem/bio as merged. Set ldo09 and ldo10 as always-on as their consumers are not declared but exist. Drop ldo current limits as there is no current limit feature for these regulators in the DA9063. Fixes warnings like: DA9063_LDO3: Operation of current configuration missing Signed-off-by: Vincent Pelletier Signed-off-by: Palmer Dabbelt --- .../boot/dts/sifive/hifive-unmatched-a00.dts | 84 ++++++------------- 1 file changed, 24 insertions(+), 60 deletions(-) diff --git a/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts b/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts index 91b3e76b2bb2..58de5a312fc9 100644 --- a/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts +++ b/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts @@ -89,47 +89,31 @@ }; regulators { - vdd_bcore1: bcore1 { - regulator-min-microvolt = <900000>; - regulator-max-microvolt = <900000>; - regulator-min-microamp = <5000000>; - regulator-max-microamp = <5000000>; - regulator-always-on; - }; - - vdd_bcore2: bcore2 { - regulator-min-microvolt = <900000>; - regulator-max-microvolt = <900000>; - regulator-min-microamp = <5000000>; - regulator-max-microamp = <5000000>; + vdd_bcore: bcores-merged { + regulator-min-microvolt = <1050000>; + regulator-max-microvolt = <1050000>; + regulator-min-microamp = <4800000>; + regulator-max-microamp = <4800000>; regulator-always-on; }; vdd_bpro: bpro { regulator-min-microvolt = <1800000>; regulator-max-microvolt = <1800000>; - regulator-min-microamp = <2500000>; - regulator-max-microamp = <2500000>; + regulator-min-microamp = <2400000>; + regulator-max-microamp = <2400000>; regulator-always-on; }; vdd_bperi: bperi { - regulator-min-microvolt = <1050000>; - regulator-max-microvolt = <1050000>; + regulator-min-microvolt = <1060000>; + regulator-max-microvolt = <1060000>; regulator-min-microamp = <1500000>; regulator-max-microamp = <1500000>; regulator-always-on; }; - vdd_bmem: bmem { - regulator-min-microvolt = <1200000>; - regulator-max-microvolt = <1200000>; - regulator-min-microamp = <3000000>; - regulator-max-microamp = <3000000>; - regulator-always-on; - }; - - vdd_bio: bio { + vdd_bmem_bio: bmem-bio-merged { regulator-min-microvolt = <1200000>; regulator-max-microvolt = <1200000>; regulator-min-microamp = <3000000>; @@ -140,86 +124,66 @@ vdd_ldo1: ldo1 { regulator-min-microvolt = <1800000>; regulator-max-microvolt = <1800000>; - regulator-min-microamp = <100000>; - regulator-max-microamp = <100000>; regulator-always-on; }; vdd_ldo2: ldo2 { regulator-min-microvolt = <1800000>; regulator-max-microvolt = <1800000>; - regulator-min-microamp = <200000>; - regulator-max-microamp = <200000>; regulator-always-on; }; vdd_ldo3: ldo3 { - regulator-min-microvolt = <1800000>; - regulator-max-microvolt = <1800000>; - regulator-min-microamp = <200000>; - regulator-max-microamp = <200000>; + regulator-min-microvolt = <3300000>; + regulator-max-microvolt = <3300000>; regulator-always-on; }; vdd_ldo4: ldo4 { - regulator-min-microvolt = <1800000>; - regulator-max-microvolt = <1800000>; - regulator-min-microamp = <200000>; - regulator-max-microamp = <200000>; + regulator-min-microvolt = <2500000>; + regulator-max-microvolt = <2500000>; regulator-always-on; }; vdd_ldo5: ldo5 { - regulator-min-microvolt = <1800000>; - regulator-max-microvolt = <1800000>; - regulator-min-microamp = <100000>; - regulator-max-microamp = <100000>; + regulator-min-microvolt = <3300000>; + regulator-max-microvolt = <3300000>; regulator-always-on; }; vdd_ldo6: ldo6 { - regulator-min-microvolt = <3300000>; - regulator-max-microvolt = <3300000>; - regulator-min-microamp = <200000>; - regulator-max-microamp = <200000>; + regulator-min-microvolt = <1800000>; + regulator-max-microvolt = <1800000>; regulator-always-on; }; vdd_ldo7: ldo7 { - regulator-min-microvolt = <1800000>; - regulator-max-microvolt = <1800000>; - regulator-min-microamp = <200000>; - regulator-max-microamp = <200000>; + regulator-min-microvolt = <3300000>; + regulator-max-microvolt = <3300000>; regulator-always-on; }; vdd_ldo8: ldo8 { - regulator-min-microvolt = <1800000>; - regulator-max-microvolt = <1800000>; - regulator-min-microamp = <200000>; - regulator-max-microamp = <200000>; + regulator-min-microvolt = <3300000>; + regulator-max-microvolt = <3300000>; regulator-always-on; }; vdd_ld09: ldo9 { regulator-min-microvolt = <1050000>; regulator-max-microvolt = <1050000>; - regulator-min-microamp = <200000>; - regulator-max-microamp = <200000>; + regulator-always-on; }; vdd_ldo10: ldo10 { regulator-min-microvolt = <1000000>; regulator-max-microvolt = <1000000>; - regulator-min-microamp = <300000>; - regulator-max-microamp = <300000>; + regulator-always-on; }; vdd_ldo11: ldo11 { regulator-min-microvolt = <2500000>; regulator-max-microvolt = <2500000>; - regulator-min-microamp = <300000>; - regulator-max-microamp = <300000>; regulator-always-on; }; }; From f6f7fbb89bf8dc9132fde55cfe67483138eea880 Mon Sep 17 00:00:00 2001 From: Vincent Pelletier Date: Tue, 16 Nov 2021 23:57:42 +0000 Subject: [PATCH 599/868] riscv: dts: sifive unmatched: Link the tmp451 with its power supply Fixes the following probe warning: lm90 0-004c: Looking up vcc-supply from device tree lm90 0-004c: Looking up vcc-supply property in node /soc/i2c@10030000/temperature-sensor@4c failed lm90 0-004c: supply vcc not found, using dummy regulator Signed-off-by: Vincent Pelletier Signed-off-by: Palmer Dabbelt --- arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts b/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts index 58de5a312fc9..6bfa1f24d3de 100644 --- a/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts +++ b/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts @@ -55,6 +55,7 @@ temperature-sensor@4c { compatible = "ti,tmp451"; reg = <0x4c>; + vcc-supply = <&vdd_bpro>; interrupt-parent = <&gpio>; interrupts = <6 IRQ_TYPE_LEVEL_LOW>; }; From 8ffea2599f63fdbee968b894eab78170abf3ec2c Mon Sep 17 00:00:00 2001 From: Naohiro Aota Date: Fri, 17 Dec 2021 15:15:45 +0900 Subject: [PATCH 600/868] zonefs: add MODULE_ALIAS_FS Add MODULE_ALIAS_FS() to load the module automatically when you do "mount -t zonefs". Fixes: 8dcc1a9d90c1 ("fs: New zonefs file system") Cc: stable # 5.6+ Signed-off-by: Naohiro Aota Reviewed-by: Johannes Thumshirn Signed-off-by: Damien Le Moal --- fs/zonefs/super.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c index 259ee2bda492..b76dfb310ab6 100644 --- a/fs/zonefs/super.c +++ b/fs/zonefs/super.c @@ -1787,5 +1787,6 @@ static void __exit zonefs_exit(void) MODULE_AUTHOR("Damien Le Moal"); MODULE_DESCRIPTION("Zone file system for zoned block devices"); MODULE_LICENSE("GPL"); +MODULE_ALIAS_FS("zonefs"); module_init(zonefs_init); module_exit(zonefs_exit); From bce472f90952cc8be03dded25c4aa109d27e5924 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Fri, 17 Dec 2021 16:41:17 +0900 Subject: [PATCH 601/868] MAITAINERS: Change zonefs maintainer email address Update my email address from damien.lemoal@wdc.com to damien.lemoal@opensource.wdc.com. Signed-off-by: Damien Le Moal --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 13f9a84a617e..d01ae22c55f8 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -21059,7 +21059,7 @@ S: Maintained F: arch/x86/kernel/cpu/zhaoxin.c ZONEFS FILESYSTEM -M: Damien Le Moal +M: Damien Le Moal M: Naohiro Aota R: Johannes Thumshirn L: linux-fsdevel@vger.kernel.org From b62e3317b68d9c84301940ca8ca9c35a584111b2 Mon Sep 17 00:00:00 2001 From: Xiang wangx Date: Thu, 16 Dec 2021 23:19:16 +0800 Subject: [PATCH 602/868] net: fix typo in a comment The double 'as' in a comment is repeated, thus it should be removed. Signed-off-by: Xiang wangx Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index be5cb3360b94..6aadcc0ecb5b 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1937,7 +1937,7 @@ enum netdev_ml_priv_type { * @udp_tunnel_nic: UDP tunnel offload state * @xdp_state: stores info on attached XDP BPF programs * - * @nested_level: Used as as a parameter of spin_lock_nested() of + * @nested_level: Used as a parameter of spin_lock_nested() of * dev->addr_list_lock. * @unlink_list: As netif_addr_lock() can be called recursively, * keep a list of interfaces to be deleted. From fc74881c28d314b10efac016ef49df4ff40b8b97 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Fri, 10 Dec 2021 09:39:27 +0100 Subject: [PATCH 603/868] drm/amdgpu: fix dropped backing store handling in amdgpu_dma_buf_move_notify MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit bo->tbo.resource can now be NULL. Signed-off-by: Christian König Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/1811 Acked-by: Alex Deucher Link: https://patchwork.freedesktop.org/patch/msgid/20211210083927.1754-1-christian.koenig@amd.com --- drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c index ae6ab93c868b..7444484a12bf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c @@ -384,7 +384,7 @@ amdgpu_dma_buf_move_notify(struct dma_buf_attachment *attach) struct amdgpu_vm_bo_base *bo_base; int r; - if (bo->tbo.resource->mem_type == TTM_PL_SYSTEM) + if (!bo->tbo.resource || bo->tbo.resource->mem_type == TTM_PL_SYSTEM) return; r = ttm_bo_validate(&bo->tbo, &placement, &ctx); From 7202216a6f34d571a22274e729f841256bf8b1ef Mon Sep 17 00:00:00 2001 From: Vladimir Murzin Date: Thu, 25 Nov 2021 12:05:19 +0100 Subject: [PATCH 604/868] ARM: 9160/1: NOMMU: Reload __secondary_data after PROCINFO_INITFUNC __secondary_data used to reside in r7 around call to PROCINFO_INITFUNC. After commit 95731b8ee63e ("ARM: 9059/1: cache-v7: get rid of mini-stack") r7 is used as a scratch register, so we have to reload __secondary_data before we setup the stack pointer. Fixes: 95731b8ee63e ("ARM: 9059/1: cache-v7: get rid of mini-stack") Signed-off-by: Vladimir Murzin Signed-off-by: Russell King (Oracle) --- arch/arm/kernel/head-nommu.S | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/kernel/head-nommu.S b/arch/arm/kernel/head-nommu.S index fadfee9e2b45..950bef83339f 100644 --- a/arch/arm/kernel/head-nommu.S +++ b/arch/arm/kernel/head-nommu.S @@ -114,6 +114,7 @@ ENTRY(secondary_startup) add r12, r12, r10 ret r12 1: bl __after_proc_init + ldr r7, __secondary_data @ reload r7 ldr sp, [r7, #12] @ set up the stack pointer ldr r0, [r7, #16] @ set up task pointer mov fp, #0 From 8536a5ef886005bc443c2da9b842d69fd3d7647f Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 15 Dec 2021 09:31:36 +0100 Subject: [PATCH 605/868] ARM: 9169/1: entry: fix Thumb2 bug in iWMMXt exception handling The Thumb2 version of the FP exception handling entry code treats the register holding the CP number (R8) differently, resulting in the iWMMXT CP number check to be incorrect. Fix this by unifying the ARM and Thumb2 code paths, and switch the order of the additions of the TI_USED_CP offset and the shifted CP index. Cc: Fixes: b86040a59feb ("Thumb-2: Implementation of the unified start-up and exceptions code") Signed-off-by: Ard Biesheuvel Signed-off-by: Russell King (Oracle) --- arch/arm/kernel/entry-armv.S | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S index deff286eb5ea..5cd057859fe9 100644 --- a/arch/arm/kernel/entry-armv.S +++ b/arch/arm/kernel/entry-armv.S @@ -596,11 +596,9 @@ call_fpe: tstne r0, #0x04000000 @ bit 26 set on both ARM and Thumb-2 reteq lr and r8, r0, #0x00000f00 @ mask out CP number - THUMB( lsr r8, r8, #8 ) mov r7, #1 - add r6, r10, #TI_USED_CP - ARM( strb r7, [r6, r8, lsr #8] ) @ set appropriate used_cp[] - THUMB( strb r7, [r6, r8] ) @ set appropriate used_cp[] + add r6, r10, r8, lsr #8 @ add used_cp[] array offset first + strb r7, [r6, #TI_USED_CP] @ set appropriate used_cp[] #ifdef CONFIG_IWMMXT @ Test if we need to give access to iWMMXt coprocessors ldr r5, [r10, #TI_FLAGS] @@ -609,7 +607,7 @@ call_fpe: bcs iwmmxt_task_enable #endif ARM( add pc, pc, r8, lsr #6 ) - THUMB( lsl r8, r8, #2 ) + THUMB( lsr r8, r8, #6 ) THUMB( add pc, r8 ) nop From c4d936efa46d8ea183df16c0f3fa4423327da51d Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 17 Dec 2021 16:24:30 +0100 Subject: [PATCH 606/868] Revert "usb: early: convert to readl_poll_timeout_atomic()" This reverts commit 796eed4b2342c9d6b26c958e92af91253a2390e1. This change causes boot lockups when using "arlyprintk=xdbc" because ktime can not be used at this point in time in the boot process. Also, it is not needed for very small delays like this. Reported-by: Mathias Nyman Reported-by: Peter Zijlstra Cc: Jann Horn Cc: Chunfeng Yun Fixes: 796eed4b2342 ("usb: early: convert to readl_poll_timeout_atomic()") Link: https://lore.kernel.org/r/c2b5c9bb-1b75-bf56-3754-b5b18812d65e@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/early/xhci-dbc.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/usb/early/xhci-dbc.c b/drivers/usb/early/xhci-dbc.c index 933d77ad0a64..4502108069cd 100644 --- a/drivers/usb/early/xhci-dbc.c +++ b/drivers/usb/early/xhci-dbc.c @@ -14,7 +14,6 @@ #include #include #include -#include #include #include #include @@ -136,9 +135,17 @@ static int handshake(void __iomem *ptr, u32 mask, u32 done, int wait, int delay) { u32 result; - return readl_poll_timeout_atomic(ptr, result, - ((result & mask) == done), - delay, wait); + /* Can not use readl_poll_timeout_atomic() for early boot things */ + do { + result = readl(ptr); + result &= mask; + if (result == done) + return 0; + udelay(delay); + wait -= delay; + } while (wait > 0); + + return -ETIMEDOUT; } static void __init xdbc_bios_handoff(void) From 544e737dea5ad1a457f25dbddf68761ff25e028b Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 16 Dec 2021 20:30:18 +0100 Subject: [PATCH 607/868] PM: sleep: Fix error handling in dpm_prepare() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 2aa36604e824 ("PM: sleep: Avoid calling put_device() under dpm_list_mtx") forgot to update the while () loop termination condition to also break the loop if error is nonzero, which causes the loop to become infinite if device_prepare() returns an error for one device. Add the missing !error check. Fixes: 2aa36604e824 ("PM: sleep: Avoid calling put_device() under dpm_list_mtx") Signed-off-by: Rafael J. Wysocki Reported-by: Thomas Hellström Reviewed-by: Thomas Hellström Reviewed-by: Ulf Hansson Cc: All applicable --- drivers/base/power/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index f4d0c555de29..04ea92cbd9cf 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -1902,7 +1902,7 @@ int dpm_prepare(pm_message_t state) device_block_probing(); mutex_lock(&dpm_list_mtx); - while (!list_empty(&dpm_list)) { + while (!list_empty(&dpm_list) && !error) { struct device *dev = to_device(dpm_list.next); get_device(dev); From bf67014d6bda16a72deea11dbbff2a97c705ca92 Mon Sep 17 00:00:00 2001 From: Huang Rui Date: Thu, 16 Dec 2021 13:35:27 -0500 Subject: [PATCH 608/868] drm/amdgpu: introduce new amdgpu_fence object to indicate the job embedded fence MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The job embedded fence donesn't initialize the flags at dma_fence_init(). Then we will go a wrong way in amdgpu_fence_get_timeline_name callback and trigger a null pointer panic once we enabled the trace event here. So introduce new amdgpu_fence object to indicate the job embedded fence. [ 156.131790] BUG: kernel NULL pointer dereference, address: 00000000000002a0 [ 156.131804] #PF: supervisor read access in kernel mode [ 156.131811] #PF: error_code(0x0000) - not-present page [ 156.131817] PGD 0 P4D 0 [ 156.131824] Oops: 0000 [#1] PREEMPT SMP PTI [ 156.131832] CPU: 6 PID: 1404 Comm: sdma0 Tainted: G OE 5.16.0-rc1-custom #1 [ 156.131842] Hardware name: Gigabyte Technology Co., Ltd. Z170XP-SLI/Z170XP-SLI-CF, BIOS F20 11/04/2016 [ 156.131848] RIP: 0010:strlen+0x0/0x20 [ 156.131859] Code: 89 c0 c3 0f 1f 80 00 00 00 00 48 01 fe eb 0f 0f b6 07 38 d0 74 10 48 83 c7 01 84 c0 74 05 48 39 f7 75 ec 31 c0 c3 48 89 f8 c3 <80> 3f 00 74 10 48 89 f8 48 83 c0 01 80 38 00 75 f7 48 29 f8 c3 31 [ 156.131872] RSP: 0018:ffff9bd0018dbcf8 EFLAGS: 00010206 [ 156.131880] RAX: 00000000000002a0 RBX: ffff8d0305ef01b0 RCX: 000000000000000b [ 156.131888] RDX: ffff8d03772ab924 RSI: ffff8d0305ef01b0 RDI: 00000000000002a0 [ 156.131895] RBP: ffff9bd0018dbd60 R08: ffff8d03002094d0 R09: 0000000000000000 [ 156.131901] R10: 000000000000005e R11: 0000000000000065 R12: ffff8d03002094d0 [ 156.131907] R13: 000000000000001f R14: 0000000000070018 R15: 0000000000000007 [ 156.131914] FS: 0000000000000000(0000) GS:ffff8d062ed80000(0000) knlGS:0000000000000000 [ 156.131923] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 156.131929] CR2: 00000000000002a0 CR3: 000000001120a005 CR4: 00000000003706e0 [ 156.131937] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 156.131942] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 156.131949] Call Trace: [ 156.131953] [ 156.131957] ? trace_event_raw_event_dma_fence+0xcc/0x200 [ 156.131973] ? ring_buffer_unlock_commit+0x23/0x130 [ 156.131982] dma_fence_init+0x92/0xb0 [ 156.131993] amdgpu_fence_emit+0x10d/0x2b0 [amdgpu] [ 156.132302] amdgpu_ib_schedule+0x2f9/0x580 [amdgpu] [ 156.132586] amdgpu_job_run+0xed/0x220 [amdgpu] v2: fix mismatch warning between the prototype and function name (Ray, kernel test robot) Signed-off-by: Huang Rui Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 11 +- drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 126 ++++++++++++++------- drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 4 +- 3 files changed, 90 insertions(+), 51 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 1e651b959141..9dc86c5a1cad 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -4461,7 +4461,7 @@ int amdgpu_device_mode1_reset(struct amdgpu_device *adev) int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev, struct amdgpu_reset_context *reset_context) { - int i, j, r = 0; + int i, r = 0; struct amdgpu_job *job = NULL; bool need_full_reset = test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags); @@ -4483,15 +4483,8 @@ int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev, /*clear job fence from fence drv to avoid force_completion *leave NULL and vm flush fence in fence drv */ - for (j = 0; j <= ring->fence_drv.num_fences_mask; j++) { - struct dma_fence *old, **ptr; + amdgpu_fence_driver_clear_job_fences(ring); - ptr = &ring->fence_drv.fences[j]; - old = rcu_dereference_protected(*ptr, 1); - if (old && test_bit(AMDGPU_FENCE_FLAG_EMBED_IN_JOB_BIT, &old->flags)) { - RCU_INIT_POINTER(*ptr, NULL); - } - } /* after all hw jobs are reset, hw fence is meaningless, so force_completion */ amdgpu_fence_driver_force_completion(ring); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index 3b7e86ea7167..9afd11ca2709 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -77,11 +77,13 @@ void amdgpu_fence_slab_fini(void) * Cast helper */ static const struct dma_fence_ops amdgpu_fence_ops; +static const struct dma_fence_ops amdgpu_job_fence_ops; static inline struct amdgpu_fence *to_amdgpu_fence(struct dma_fence *f) { struct amdgpu_fence *__f = container_of(f, struct amdgpu_fence, base); - if (__f->base.ops == &amdgpu_fence_ops) + if (__f->base.ops == &amdgpu_fence_ops || + __f->base.ops == &amdgpu_job_fence_ops) return __f; return NULL; @@ -158,19 +160,18 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f, struct amd } seq = ++ring->fence_drv.sync_seq; - if (job != NULL && job->job_run_counter) { + if (job && job->job_run_counter) { /* reinit seq for resubmitted jobs */ fence->seqno = seq; } else { - dma_fence_init(fence, &amdgpu_fence_ops, - &ring->fence_drv.lock, - adev->fence_context + ring->idx, - seq); - } - - if (job != NULL) { - /* mark this fence has a parent job */ - set_bit(AMDGPU_FENCE_FLAG_EMBED_IN_JOB_BIT, &fence->flags); + if (job) + dma_fence_init(fence, &amdgpu_job_fence_ops, + &ring->fence_drv.lock, + adev->fence_context + ring->idx, seq); + else + dma_fence_init(fence, &amdgpu_fence_ops, + &ring->fence_drv.lock, + adev->fence_context + ring->idx, seq); } amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr, @@ -620,6 +621,25 @@ void amdgpu_fence_driver_hw_init(struct amdgpu_device *adev) } } +/** + * amdgpu_fence_driver_clear_job_fences - clear job embedded fences of ring + * + * @ring: fence of the ring to be cleared + * + */ +void amdgpu_fence_driver_clear_job_fences(struct amdgpu_ring *ring) +{ + int i; + struct dma_fence *old, **ptr; + + for (i = 0; i <= ring->fence_drv.num_fences_mask; i++) { + ptr = &ring->fence_drv.fences[i]; + old = rcu_dereference_protected(*ptr, 1); + if (old && old->ops == &amdgpu_job_fence_ops) + RCU_INIT_POINTER(*ptr, NULL); + } +} + /** * amdgpu_fence_driver_force_completion - force signal latest fence of ring * @@ -643,16 +663,14 @@ static const char *amdgpu_fence_get_driver_name(struct dma_fence *fence) static const char *amdgpu_fence_get_timeline_name(struct dma_fence *f) { - struct amdgpu_ring *ring; + return (const char *)to_amdgpu_fence(f)->ring->name; +} - if (test_bit(AMDGPU_FENCE_FLAG_EMBED_IN_JOB_BIT, &f->flags)) { - struct amdgpu_job *job = container_of(f, struct amdgpu_job, hw_fence); +static const char *amdgpu_job_fence_get_timeline_name(struct dma_fence *f) +{ + struct amdgpu_job *job = container_of(f, struct amdgpu_job, hw_fence); - ring = to_amdgpu_ring(job->base.sched); - } else { - ring = to_amdgpu_fence(f)->ring; - } - return (const char *)ring->name; + return (const char *)to_amdgpu_ring(job->base.sched)->name; } /** @@ -665,18 +683,25 @@ static const char *amdgpu_fence_get_timeline_name(struct dma_fence *f) */ static bool amdgpu_fence_enable_signaling(struct dma_fence *f) { - struct amdgpu_ring *ring; + if (!timer_pending(&to_amdgpu_fence(f)->ring->fence_drv.fallback_timer)) + amdgpu_fence_schedule_fallback(to_amdgpu_fence(f)->ring); - if (test_bit(AMDGPU_FENCE_FLAG_EMBED_IN_JOB_BIT, &f->flags)) { - struct amdgpu_job *job = container_of(f, struct amdgpu_job, hw_fence); + return true; +} - ring = to_amdgpu_ring(job->base.sched); - } else { - ring = to_amdgpu_fence(f)->ring; - } +/** + * amdgpu_job_fence_enable_signaling - enable signalling on job fence + * @f: fence + * + * This is the simliar function with amdgpu_fence_enable_signaling above, it + * only handles the job embedded fence. + */ +static bool amdgpu_job_fence_enable_signaling(struct dma_fence *f) +{ + struct amdgpu_job *job = container_of(f, struct amdgpu_job, hw_fence); - if (!timer_pending(&ring->fence_drv.fallback_timer)) - amdgpu_fence_schedule_fallback(ring); + if (!timer_pending(&to_amdgpu_ring(job->base.sched)->fence_drv.fallback_timer)) + amdgpu_fence_schedule_fallback(to_amdgpu_ring(job->base.sched)); return true; } @@ -692,19 +717,23 @@ static void amdgpu_fence_free(struct rcu_head *rcu) { struct dma_fence *f = container_of(rcu, struct dma_fence, rcu); - if (test_bit(AMDGPU_FENCE_FLAG_EMBED_IN_JOB_BIT, &f->flags)) { - /* free job if fence has a parent job */ - struct amdgpu_job *job; - - job = container_of(f, struct amdgpu_job, hw_fence); - kfree(job); - } else { /* free fence_slab if it's separated fence*/ - struct amdgpu_fence *fence; + kmem_cache_free(amdgpu_fence_slab, to_amdgpu_fence(f)); +} - fence = to_amdgpu_fence(f); - kmem_cache_free(amdgpu_fence_slab, fence); - } +/** + * amdgpu_job_fence_free - free up the job with embedded fence + * + * @rcu: RCU callback head + * + * Free up the job with embedded fence after the RCU grace period. + */ +static void amdgpu_job_fence_free(struct rcu_head *rcu) +{ + struct dma_fence *f = container_of(rcu, struct dma_fence, rcu); + + /* free job if fence has a parent job */ + kfree(container_of(f, struct amdgpu_job, hw_fence)); } /** @@ -720,6 +749,19 @@ static void amdgpu_fence_release(struct dma_fence *f) call_rcu(&f->rcu, amdgpu_fence_free); } +/** + * amdgpu_job_fence_release - callback that job embedded fence can be freed + * + * @f: fence + * + * This is the simliar function with amdgpu_fence_release above, it + * only handles the job embedded fence. + */ +static void amdgpu_job_fence_release(struct dma_fence *f) +{ + call_rcu(&f->rcu, amdgpu_job_fence_free); +} + static const struct dma_fence_ops amdgpu_fence_ops = { .get_driver_name = amdgpu_fence_get_driver_name, .get_timeline_name = amdgpu_fence_get_timeline_name, @@ -727,6 +769,12 @@ static const struct dma_fence_ops amdgpu_fence_ops = { .release = amdgpu_fence_release, }; +static const struct dma_fence_ops amdgpu_job_fence_ops = { + .get_driver_name = amdgpu_fence_get_driver_name, + .get_timeline_name = amdgpu_job_fence_get_timeline_name, + .enable_signaling = amdgpu_job_fence_enable_signaling, + .release = amdgpu_job_fence_release, +}; /* * Fence debugfs diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index 4d380e79752c..fae7d185ad0d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -53,9 +53,6 @@ enum amdgpu_ring_priority_level { #define AMDGPU_FENCE_FLAG_INT (1 << 1) #define AMDGPU_FENCE_FLAG_TC_WB_ONLY (1 << 2) -/* fence flag bit to indicate the face is embedded in job*/ -#define AMDGPU_FENCE_FLAG_EMBED_IN_JOB_BIT (DMA_FENCE_FLAG_USER_BITS + 1) - #define to_amdgpu_ring(s) container_of((s), struct amdgpu_ring, sched) #define AMDGPU_IB_POOL_SIZE (1024 * 1024) @@ -114,6 +111,7 @@ struct amdgpu_fence_driver { struct dma_fence **fences; }; +void amdgpu_fence_driver_clear_job_fences(struct amdgpu_ring *ring); void amdgpu_fence_driver_force_completion(struct amdgpu_ring *ring); int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring, From 19e66d512e4182a0461530fa3159638e0f55d97e Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Wed, 15 Dec 2021 23:37:03 +0800 Subject: [PATCH 609/868] drm/amd/pm: Fix xgmi link control on aldebaran Fix the message argument. 0: Allow power down 1: Disallow power down Signed-off-by: Lijo Lazar Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c index 59a7d276541d..7d50827cf0a8 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c @@ -1621,7 +1621,7 @@ static int aldebaran_allow_xgmi_power_down(struct smu_context *smu, bool en) { return smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_GmiPwrDnControl, - en ? 1 : 0, + en ? 0 : 1, NULL); } From b7865173cf6ae59942e2c69326a06e1c1df5ecf6 Mon Sep 17 00:00:00 2001 From: chen gong Date: Thu, 9 Dec 2021 19:47:10 +0800 Subject: [PATCH 610/868] drm/amdgpu: When the VCN(1.0) block is suspended, powergating is explicitly enabled Play a video on the raven (or PCO, raven2) platform, and then do the S3 test. When resume, the following error will be reported: amdgpu 0000:02:00.0: [drm:amdgpu_ring_test_helper [amdgpu]] *ERROR* ring vcn_dec test failed (-110) [drm:amdgpu_device_ip_resume_phase2 [amdgpu]] *ERROR* resume of IP block failed -110 amdgpu 0000:02:00.0: amdgpu: amdgpu_device_ip_resume failed (-110). PM: dpm_run_callback(): pci_pm_resume+0x0/0x90 returns -110 [why] When playing the video: The power state flag of the vcn block is set to POWER_STATE_ON. When doing suspend: There is no change to the power state flag of the vcn block, it is still POWER_STATE_ON. When doing resume: Need to open the power gate of the vcn block and set the power state flag of the VCN block to POWER_STATE_ON. But at this time, the power state flag of the vcn block is already POWER_STATE_ON. The power status flag check in the "8f2cdef drm/amd/pm: avoid duplicate powergate/ungate setting" patch will return the amdgpu_dpm_set_powergating_by_smu function directly. As a result, the gate of the power was not opened, causing the subsequent ring test to fail. [how] In the suspend function of the vcn block, explicitly change the power state flag of the vcn block to POWER_STATE_OFF. BugLink: https://gitlab.freedesktop.org/drm/amd/-/issues/1828 Signed-off-by: chen gong Reviewed-by: Evan Quan Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c index d54d720b3cf6..3799226defc0 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c @@ -246,6 +246,13 @@ static int vcn_v1_0_suspend(void *handle) { int r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; + bool idle_work_unexecuted; + + idle_work_unexecuted = cancel_delayed_work_sync(&adev->vcn.idle_work); + if (idle_work_unexecuted) { + if (adev->pm.dpm_enabled) + amdgpu_dpm_enable_uvd(adev, false); + } r = vcn_v1_0_hw_fini(adev); if (r) From 5e713c6afa34c0fd6f113bf7bb1c2847172d7b20 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 15 Dec 2021 22:13:56 -0500 Subject: [PATCH 611/868] drm/amdgpu: add support for IP discovery gc_info table v2 Used on gfx9 based systems. Fixes incorrect CU counts reported in the kernel log. Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/1833 Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 76 +++++++++++++------ drivers/gpu/drm/amd/include/discovery.h | 49 ++++++++++++ 2 files changed, 103 insertions(+), 22 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index ea00090b3fb3..bcc9343353b5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -526,10 +526,15 @@ void amdgpu_discovery_harvest_ip(struct amdgpu_device *adev) } } +union gc_info { + struct gc_info_v1_0 v1; + struct gc_info_v2_0 v2; +}; + int amdgpu_discovery_get_gfx_info(struct amdgpu_device *adev) { struct binary_header *bhdr; - struct gc_info_v1_0 *gc_info; + union gc_info *gc_info; if (!adev->mman.discovery_bin) { DRM_ERROR("ip discovery uninitialized\n"); @@ -537,28 +542,55 @@ int amdgpu_discovery_get_gfx_info(struct amdgpu_device *adev) } bhdr = (struct binary_header *)adev->mman.discovery_bin; - gc_info = (struct gc_info_v1_0 *)(adev->mman.discovery_bin + + gc_info = (union gc_info *)(adev->mman.discovery_bin + le16_to_cpu(bhdr->table_list[GC].offset)); - - adev->gfx.config.max_shader_engines = le32_to_cpu(gc_info->gc_num_se); - adev->gfx.config.max_cu_per_sh = 2 * (le32_to_cpu(gc_info->gc_num_wgp0_per_sa) + - le32_to_cpu(gc_info->gc_num_wgp1_per_sa)); - adev->gfx.config.max_sh_per_se = le32_to_cpu(gc_info->gc_num_sa_per_se); - adev->gfx.config.max_backends_per_se = le32_to_cpu(gc_info->gc_num_rb_per_se); - adev->gfx.config.max_texture_channel_caches = le32_to_cpu(gc_info->gc_num_gl2c); - adev->gfx.config.max_gprs = le32_to_cpu(gc_info->gc_num_gprs); - adev->gfx.config.max_gs_threads = le32_to_cpu(gc_info->gc_num_max_gs_thds); - adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gc_info->gc_gs_table_depth); - adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gc_info->gc_gsprim_buff_depth); - adev->gfx.config.double_offchip_lds_buf = le32_to_cpu(gc_info->gc_double_offchip_lds_buffer); - adev->gfx.cu_info.wave_front_size = le32_to_cpu(gc_info->gc_wave_size); - adev->gfx.cu_info.max_waves_per_simd = le32_to_cpu(gc_info->gc_max_waves_per_simd); - adev->gfx.cu_info.max_scratch_slots_per_cu = le32_to_cpu(gc_info->gc_max_scratch_slots_per_cu); - adev->gfx.cu_info.lds_size = le32_to_cpu(gc_info->gc_lds_size); - adev->gfx.config.num_sc_per_sh = le32_to_cpu(gc_info->gc_num_sc_per_se) / - le32_to_cpu(gc_info->gc_num_sa_per_se); - adev->gfx.config.num_packer_per_sc = le32_to_cpu(gc_info->gc_num_packer_per_sc); - + switch (gc_info->v1.header.version_major) { + case 1: + adev->gfx.config.max_shader_engines = le32_to_cpu(gc_info->v1.gc_num_se); + adev->gfx.config.max_cu_per_sh = 2 * (le32_to_cpu(gc_info->v1.gc_num_wgp0_per_sa) + + le32_to_cpu(gc_info->v1.gc_num_wgp1_per_sa)); + adev->gfx.config.max_sh_per_se = le32_to_cpu(gc_info->v1.gc_num_sa_per_se); + adev->gfx.config.max_backends_per_se = le32_to_cpu(gc_info->v1.gc_num_rb_per_se); + adev->gfx.config.max_texture_channel_caches = le32_to_cpu(gc_info->v1.gc_num_gl2c); + adev->gfx.config.max_gprs = le32_to_cpu(gc_info->v1.gc_num_gprs); + adev->gfx.config.max_gs_threads = le32_to_cpu(gc_info->v1.gc_num_max_gs_thds); + adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gc_info->v1.gc_gs_table_depth); + adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gc_info->v1.gc_gsprim_buff_depth); + adev->gfx.config.double_offchip_lds_buf = le32_to_cpu(gc_info->v1.gc_double_offchip_lds_buffer); + adev->gfx.cu_info.wave_front_size = le32_to_cpu(gc_info->v1.gc_wave_size); + adev->gfx.cu_info.max_waves_per_simd = le32_to_cpu(gc_info->v1.gc_max_waves_per_simd); + adev->gfx.cu_info.max_scratch_slots_per_cu = le32_to_cpu(gc_info->v1.gc_max_scratch_slots_per_cu); + adev->gfx.cu_info.lds_size = le32_to_cpu(gc_info->v1.gc_lds_size); + adev->gfx.config.num_sc_per_sh = le32_to_cpu(gc_info->v1.gc_num_sc_per_se) / + le32_to_cpu(gc_info->v1.gc_num_sa_per_se); + adev->gfx.config.num_packer_per_sc = le32_to_cpu(gc_info->v1.gc_num_packer_per_sc); + break; + case 2: + adev->gfx.config.max_shader_engines = le32_to_cpu(gc_info->v2.gc_num_se); + adev->gfx.config.max_cu_per_sh = le32_to_cpu(gc_info->v2.gc_num_cu_per_sh); + adev->gfx.config.max_sh_per_se = le32_to_cpu(gc_info->v2.gc_num_sh_per_se); + adev->gfx.config.max_backends_per_se = le32_to_cpu(gc_info->v2.gc_num_rb_per_se); + adev->gfx.config.max_texture_channel_caches = le32_to_cpu(gc_info->v2.gc_num_tccs); + adev->gfx.config.max_gprs = le32_to_cpu(gc_info->v2.gc_num_gprs); + adev->gfx.config.max_gs_threads = le32_to_cpu(gc_info->v2.gc_num_max_gs_thds); + adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gc_info->v2.gc_gs_table_depth); + adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gc_info->v2.gc_gsprim_buff_depth); + adev->gfx.config.double_offchip_lds_buf = le32_to_cpu(gc_info->v2.gc_double_offchip_lds_buffer); + adev->gfx.cu_info.wave_front_size = le32_to_cpu(gc_info->v2.gc_wave_size); + adev->gfx.cu_info.max_waves_per_simd = le32_to_cpu(gc_info->v2.gc_max_waves_per_simd); + adev->gfx.cu_info.max_scratch_slots_per_cu = le32_to_cpu(gc_info->v2.gc_max_scratch_slots_per_cu); + adev->gfx.cu_info.lds_size = le32_to_cpu(gc_info->v2.gc_lds_size); + adev->gfx.config.num_sc_per_sh = le32_to_cpu(gc_info->v2.gc_num_sc_per_se) / + le32_to_cpu(gc_info->v2.gc_num_sh_per_se); + adev->gfx.config.num_packer_per_sc = le32_to_cpu(gc_info->v2.gc_num_packer_per_sc); + break; + default: + dev_err(adev->dev, + "Unhandled GC info table %d.%d\n", + gc_info->v1.header.version_major, + gc_info->v1.header.version_minor); + return -EINVAL; + } return 0; } diff --git a/drivers/gpu/drm/amd/include/discovery.h b/drivers/gpu/drm/amd/include/discovery.h index 7ec4331e67f2..a486769b66c6 100644 --- a/drivers/gpu/drm/amd/include/discovery.h +++ b/drivers/gpu/drm/amd/include/discovery.h @@ -143,6 +143,55 @@ struct gc_info_v1_0 { uint32_t gc_num_gl2a; }; +struct gc_info_v1_1 { + struct gpu_info_header header; + + uint32_t gc_num_se; + uint32_t gc_num_wgp0_per_sa; + uint32_t gc_num_wgp1_per_sa; + uint32_t gc_num_rb_per_se; + uint32_t gc_num_gl2c; + uint32_t gc_num_gprs; + uint32_t gc_num_max_gs_thds; + uint32_t gc_gs_table_depth; + uint32_t gc_gsprim_buff_depth; + uint32_t gc_parameter_cache_depth; + uint32_t gc_double_offchip_lds_buffer; + uint32_t gc_wave_size; + uint32_t gc_max_waves_per_simd; + uint32_t gc_max_scratch_slots_per_cu; + uint32_t gc_lds_size; + uint32_t gc_num_sc_per_se; + uint32_t gc_num_sa_per_se; + uint32_t gc_num_packer_per_sc; + uint32_t gc_num_gl2a; + uint32_t gc_num_tcp_per_sa; + uint32_t gc_num_sdp_interface; + uint32_t gc_num_tcps; +}; + +struct gc_info_v2_0 { + struct gpu_info_header header; + + uint32_t gc_num_se; + uint32_t gc_num_cu_per_sh; + uint32_t gc_num_sh_per_se; + uint32_t gc_num_rb_per_se; + uint32_t gc_num_tccs; + uint32_t gc_num_gprs; + uint32_t gc_num_max_gs_thds; + uint32_t gc_gs_table_depth; + uint32_t gc_gsprim_buff_depth; + uint32_t gc_parameter_cache_depth; + uint32_t gc_double_offchip_lds_buffer; + uint32_t gc_wave_size; + uint32_t gc_max_waves_per_simd; + uint32_t gc_max_scratch_slots_per_cu; + uint32_t gc_lds_size; + uint32_t gc_num_sc_per_se; + uint32_t gc_num_packer_per_sc; +}; + typedef struct harvest_info_header { uint32_t signature; /* Table Signature */ uint32_t version; /* Table Version */ From afe8a3ba85ec2a6b6849367e25c06a2f8e0ddd05 Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Mon, 13 Dec 2021 16:31:06 +0100 Subject: [PATCH 612/868] ice: xsk: return xsk buffers back to pool when cleaning the ring Currently we only NULL the xdp_buff pointer in the internal SW ring but we never give it back to the xsk buffer pool. This means that buffers can be leaked out of the buff pool and never be used again. Add missing xsk_buff_free() call to the routine that is supposed to clean the entries that are left in the ring so that these buffers in the umem can be used by other sockets. Also, only go through the space that is actually left to be cleaned instead of a whole ring. Fixes: 2d4238f55697 ("ice: Add support for AF_XDP") Signed-off-by: Magnus Karlsson Signed-off-by: Maciej Fijalkowski Tested-by: Kiran Bhandare Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_xsk.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c index bb9a80847298..8593717a755e 100644 --- a/drivers/net/ethernet/intel/ice/ice_xsk.c +++ b/drivers/net/ethernet/intel/ice/ice_xsk.c @@ -811,14 +811,14 @@ bool ice_xsk_any_rx_ring_ena(struct ice_vsi *vsi) */ void ice_xsk_clean_rx_ring(struct ice_rx_ring *rx_ring) { - u16 i; + u16 count_mask = rx_ring->count - 1; + u16 ntc = rx_ring->next_to_clean; + u16 ntu = rx_ring->next_to_use; - for (i = 0; i < rx_ring->count; i++) { - struct xdp_buff **xdp = &rx_ring->xdp_buf[i]; - - if (!xdp) - continue; + for ( ; ntc != ntu; ntc = (ntc + 1) & count_mask) { + struct xdp_buff **xdp = &rx_ring->xdp_buf[ntc]; + xsk_buff_free(*xdp); *xdp = NULL; } } From 617f3e1b588c802517c236087561c6bcb0b4afd6 Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Mon, 13 Dec 2021 16:31:07 +0100 Subject: [PATCH 613/868] ice: xsk: allocate separate memory for XDP SW ring Currently, the zero-copy data path is reusing the memory region that was initially allocated for an array of struct ice_rx_buf for its own purposes. This is error prone as it is based on the ice_rx_buf struct always being the same size or bigger than what the zero-copy path needs. There can also be old values present in that array giving rise to errors when the zero-copy path uses it. Fix this by freeing the ice_rx_buf region and allocating a new array for the zero-copy path that has the right length and is initialized to zero. Fixes: 57f7f8b6bc0b ("ice: Use xdp_buf instead of rx_buf for xsk zero-copy") Signed-off-by: Maciej Fijalkowski Tested-by: Kiran Bhandare Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_base.c | 17 ++++++++++++ drivers/net/ethernet/intel/ice/ice_txrx.c | 19 ++++++++----- drivers/net/ethernet/intel/ice/ice_xsk.c | 33 ++++++++++++----------- 3 files changed, 47 insertions(+), 22 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_base.c b/drivers/net/ethernet/intel/ice/ice_base.c index 1efc635cc0f5..fafe020e46ee 100644 --- a/drivers/net/ethernet/intel/ice/ice_base.c +++ b/drivers/net/ethernet/intel/ice/ice_base.c @@ -6,6 +6,18 @@ #include "ice_lib.h" #include "ice_dcb_lib.h" +static bool ice_alloc_rx_buf_zc(struct ice_rx_ring *rx_ring) +{ + rx_ring->xdp_buf = kcalloc(rx_ring->count, sizeof(*rx_ring->xdp_buf), GFP_KERNEL); + return !!rx_ring->xdp_buf; +} + +static bool ice_alloc_rx_buf(struct ice_rx_ring *rx_ring) +{ + rx_ring->rx_buf = kcalloc(rx_ring->count, sizeof(*rx_ring->rx_buf), GFP_KERNEL); + return !!rx_ring->rx_buf; +} + /** * __ice_vsi_get_qs_contig - Assign a contiguous chunk of queues to VSI * @qs_cfg: gathered variables needed for PF->VSI queues assignment @@ -492,8 +504,11 @@ int ice_vsi_cfg_rxq(struct ice_rx_ring *ring) xdp_rxq_info_reg(&ring->xdp_rxq, ring->netdev, ring->q_index, ring->q_vector->napi.napi_id); + kfree(ring->rx_buf); ring->xsk_pool = ice_xsk_pool(ring); if (ring->xsk_pool) { + if (!ice_alloc_rx_buf_zc(ring)) + return -ENOMEM; xdp_rxq_info_unreg_mem_model(&ring->xdp_rxq); ring->rx_buf_len = @@ -508,6 +523,8 @@ int ice_vsi_cfg_rxq(struct ice_rx_ring *ring) dev_info(dev, "Registered XDP mem model MEM_TYPE_XSK_BUFF_POOL on Rx ring %d\n", ring->q_index); } else { + if (!ice_alloc_rx_buf(ring)) + return -ENOMEM; if (!xdp_rxq_info_is_reg(&ring->xdp_rxq)) /* coverity[check_return] */ xdp_rxq_info_reg(&ring->xdp_rxq, diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index bc3ba19dc88f..dccf09eefc75 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -419,7 +419,10 @@ void ice_clean_rx_ring(struct ice_rx_ring *rx_ring) } rx_skip_free: - memset(rx_ring->rx_buf, 0, sizeof(*rx_ring->rx_buf) * rx_ring->count); + if (rx_ring->xsk_pool) + memset(rx_ring->xdp_buf, 0, array_size(rx_ring->count, sizeof(*rx_ring->xdp_buf))); + else + memset(rx_ring->rx_buf, 0, array_size(rx_ring->count, sizeof(*rx_ring->rx_buf))); /* Zero out the descriptor ring */ size = ALIGN(rx_ring->count * sizeof(union ice_32byte_rx_desc), @@ -446,8 +449,13 @@ void ice_free_rx_ring(struct ice_rx_ring *rx_ring) if (xdp_rxq_info_is_reg(&rx_ring->xdp_rxq)) xdp_rxq_info_unreg(&rx_ring->xdp_rxq); rx_ring->xdp_prog = NULL; - devm_kfree(rx_ring->dev, rx_ring->rx_buf); - rx_ring->rx_buf = NULL; + if (rx_ring->xsk_pool) { + kfree(rx_ring->xdp_buf); + rx_ring->xdp_buf = NULL; + } else { + kfree(rx_ring->rx_buf); + rx_ring->rx_buf = NULL; + } if (rx_ring->desc) { size = ALIGN(rx_ring->count * sizeof(union ice_32byte_rx_desc), @@ -475,8 +483,7 @@ int ice_setup_rx_ring(struct ice_rx_ring *rx_ring) /* warn if we are about to overwrite the pointer */ WARN_ON(rx_ring->rx_buf); rx_ring->rx_buf = - devm_kcalloc(dev, sizeof(*rx_ring->rx_buf), rx_ring->count, - GFP_KERNEL); + kcalloc(rx_ring->count, sizeof(*rx_ring->rx_buf), GFP_KERNEL); if (!rx_ring->rx_buf) return -ENOMEM; @@ -505,7 +512,7 @@ int ice_setup_rx_ring(struct ice_rx_ring *rx_ring) return 0; err: - devm_kfree(dev, rx_ring->rx_buf); + kfree(rx_ring->rx_buf); rx_ring->rx_buf = NULL; return -ENOMEM; } diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c index 8593717a755e..c124229d98fe 100644 --- a/drivers/net/ethernet/intel/ice/ice_xsk.c +++ b/drivers/net/ethernet/intel/ice/ice_xsk.c @@ -12,6 +12,11 @@ #include "ice_txrx_lib.h" #include "ice_lib.h" +static struct xdp_buff **ice_xdp_buf(struct ice_rx_ring *rx_ring, u32 idx) +{ + return &rx_ring->xdp_buf[idx]; +} + /** * ice_qp_reset_stats - Resets all stats for rings of given index * @vsi: VSI that contains rings of interest @@ -372,7 +377,7 @@ bool ice_alloc_rx_bufs_zc(struct ice_rx_ring *rx_ring, u16 count) dma_addr_t dma; rx_desc = ICE_RX_DESC(rx_ring, ntu); - xdp = &rx_ring->xdp_buf[ntu]; + xdp = ice_xdp_buf(rx_ring, ntu); nb_buffs = min_t(u16, count, rx_ring->count - ntu); nb_buffs = xsk_buff_alloc_batch(rx_ring->xsk_pool, xdp, nb_buffs); @@ -419,19 +424,18 @@ static void ice_bump_ntc(struct ice_rx_ring *rx_ring) /** * ice_construct_skb_zc - Create an sk_buff from zero-copy buffer * @rx_ring: Rx ring - * @xdp_arr: Pointer to the SW ring of xdp_buff pointers + * @xdp: Pointer to XDP buffer * * This function allocates a new skb from a zero-copy Rx buffer. * * Returns the skb on success, NULL on failure. */ static struct sk_buff * -ice_construct_skb_zc(struct ice_rx_ring *rx_ring, struct xdp_buff **xdp_arr) +ice_construct_skb_zc(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp) { - struct xdp_buff *xdp = *xdp_arr; + unsigned int datasize_hard = xdp->data_end - xdp->data_hard_start; unsigned int metasize = xdp->data - xdp->data_meta; unsigned int datasize = xdp->data_end - xdp->data; - unsigned int datasize_hard = xdp->data_end - xdp->data_hard_start; struct sk_buff *skb; skb = __napi_alloc_skb(&rx_ring->q_vector->napi, datasize_hard, @@ -445,7 +449,6 @@ ice_construct_skb_zc(struct ice_rx_ring *rx_ring, struct xdp_buff **xdp_arr) skb_metadata_set(skb, metasize); xsk_buff_free(xdp); - *xdp_arr = NULL; return skb; } @@ -522,7 +525,7 @@ int ice_clean_rx_irq_zc(struct ice_rx_ring *rx_ring, int budget) while (likely(total_rx_packets < (unsigned int)budget)) { union ice_32b_rx_flex_desc *rx_desc; unsigned int size, xdp_res = 0; - struct xdp_buff **xdp; + struct xdp_buff *xdp; struct sk_buff *skb; u16 stat_err_bits; u16 vlan_tag = 0; @@ -545,18 +548,17 @@ int ice_clean_rx_irq_zc(struct ice_rx_ring *rx_ring, int budget) if (!size) break; - xdp = &rx_ring->xdp_buf[rx_ring->next_to_clean]; - xsk_buff_set_size(*xdp, size); - xsk_buff_dma_sync_for_cpu(*xdp, rx_ring->xsk_pool); + xdp = *ice_xdp_buf(rx_ring, rx_ring->next_to_clean); + xsk_buff_set_size(xdp, size); + xsk_buff_dma_sync_for_cpu(xdp, rx_ring->xsk_pool); - xdp_res = ice_run_xdp_zc(rx_ring, *xdp, xdp_prog, xdp_ring); + xdp_res = ice_run_xdp_zc(rx_ring, xdp, xdp_prog, xdp_ring); if (xdp_res) { if (xdp_res & (ICE_XDP_TX | ICE_XDP_REDIR)) xdp_xmit |= xdp_res; else - xsk_buff_free(*xdp); + xsk_buff_free(xdp); - *xdp = NULL; total_rx_bytes += size; total_rx_packets++; cleaned_count++; @@ -816,10 +818,9 @@ void ice_xsk_clean_rx_ring(struct ice_rx_ring *rx_ring) u16 ntu = rx_ring->next_to_use; for ( ; ntc != ntu; ntc = (ntc + 1) & count_mask) { - struct xdp_buff **xdp = &rx_ring->xdp_buf[ntc]; + struct xdp_buff *xdp = *ice_xdp_buf(rx_ring, ntc); - xsk_buff_free(*xdp); - *xdp = NULL; + xsk_buff_free(xdp); } } From 0708b6facb4d165ef22bccddf2dc3e1eb9a12d03 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Mon, 13 Dec 2021 16:31:08 +0100 Subject: [PATCH 614/868] ice: remove dead store on XSK hotpath The 'if (ntu == rx_ring->count)' block in ice_alloc_rx_buffers_zc() was previously residing in the loop, but after introducing the batched interface it is used only to wrap-around the NTU descriptor, thus no more need to assign 'xdp'. Fixes: db804cfc21e9 ("ice: Use the xsk batched rx allocation interface") Signed-off-by: Alexander Lobakin Acked-by: Maciej Fijalkowski Tested-by: Kiran Bhandare Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_xsk.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c index c124229d98fe..27f5f64dcbd6 100644 --- a/drivers/net/ethernet/intel/ice/ice_xsk.c +++ b/drivers/net/ethernet/intel/ice/ice_xsk.c @@ -397,7 +397,6 @@ bool ice_alloc_rx_bufs_zc(struct ice_rx_ring *rx_ring, u16 count) ntu += nb_buffs; if (ntu == rx_ring->count) { rx_desc = ICE_RX_DESC(rx_ring, 0); - xdp = rx_ring->xdp_buf; ntu = 0; } From 8b51a13c37c24c08e488bd58303cb437814f4454 Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Mon, 13 Dec 2021 16:31:09 +0100 Subject: [PATCH 615/868] ice: xsk: do not clear status_error0 for ntu + nb_buffs descriptor The descriptor that ntu is pointing at when we exit ice_alloc_rx_bufs_zc() should not have its corresponding DD bit cleared as descriptor is not allocated in there and it is not valid for HW usage. The allocation routine at the entry will fill the descriptor that ntu points to after it was set to ntu + nb_buffs on previous call. Even the spec says: "The tail pointer should be set to one descriptor beyond the last empty descriptor in host descriptor ring." Therefore, step away from clearing the status_error0 on ntu + nb_buffs descriptor. Fixes: db804cfc21e9 ("ice: Use the xsk batched rx allocation interface") Reported-by: Elza Mathew Signed-off-by: Maciej Fijalkowski Tested-by: Kiran Bhandare Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_xsk.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c index 27f5f64dcbd6..ffa9a160766a 100644 --- a/drivers/net/ethernet/intel/ice/ice_xsk.c +++ b/drivers/net/ethernet/intel/ice/ice_xsk.c @@ -395,13 +395,9 @@ bool ice_alloc_rx_bufs_zc(struct ice_rx_ring *rx_ring, u16 count) } ntu += nb_buffs; - if (ntu == rx_ring->count) { - rx_desc = ICE_RX_DESC(rx_ring, 0); + if (ntu == rx_ring->count) ntu = 0; - } - /* clear the status bits for the next_to_use descriptor */ - rx_desc->wb.status_error0 = 0; ice_release_rx_desc(rx_ring, ntu); return count == nb_buffs; From 8bea15ab7485863d900982ee6a0ff6f78b339c77 Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Mon, 13 Dec 2021 16:31:10 +0100 Subject: [PATCH 616/868] ice: xsk: allow empty Rx descriptors on XSK ZC data path Commit ac6f733a7bd5 ("ice: allow empty Rx descriptors") stated that ice HW can produce empty descriptors that are valid and they should be processed. Add this support to xsk ZC path to avoid potential processing problems. Fixes: 2d4238f55697 ("ice: Add support for AF_XDP") Signed-off-by: Maciej Fijalkowski Tested-by: Kiran Bhandare Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_xsk.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c index ffa9a160766a..c1491dc0675d 100644 --- a/drivers/net/ethernet/intel/ice/ice_xsk.c +++ b/drivers/net/ethernet/intel/ice/ice_xsk.c @@ -538,12 +538,18 @@ int ice_clean_rx_irq_zc(struct ice_rx_ring *rx_ring, int budget) */ dma_rmb(); + xdp = *ice_xdp_buf(rx_ring, rx_ring->next_to_clean); + size = le16_to_cpu(rx_desc->wb.pkt_len) & ICE_RX_FLX_DESC_PKT_LEN_M; - if (!size) - break; + if (!size) { + xdp->data = NULL; + xdp->data_end = NULL; + xdp->data_hard_start = NULL; + xdp->data_meta = NULL; + goto construct_skb; + } - xdp = *ice_xdp_buf(rx_ring, rx_ring->next_to_clean); xsk_buff_set_size(xdp, size); xsk_buff_dma_sync_for_cpu(xdp, rx_ring->xsk_pool); @@ -561,7 +567,7 @@ int ice_clean_rx_irq_zc(struct ice_rx_ring *rx_ring, int budget) ice_bump_ntc(rx_ring); continue; } - +construct_skb: /* XDP_PASS path */ skb = ice_construct_skb_zc(rx_ring, xdp); if (!skb) { From dcbaf72aa4232a7aa5db5e483972a6fe4ba2b41c Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Mon, 13 Dec 2021 16:31:11 +0100 Subject: [PATCH 617/868] ice: xsk: fix cleaned_count setting Currently cleaned_count is initialized to ICE_DESC_UNUSED(rx_ring) and later on during the Rx processing it is incremented per each frame that driver consumed. This can result in excessive buffers requested from xsk pool based on that value. To address this, just drop cleaned_count and pass ICE_DESC_UNUSED(rx_ring) directly as a function argument to ice_alloc_rx_bufs_zc(). Idea is to ask for buffers as many as consumed. Let us also call ice_alloc_rx_bufs_zc unconditionally at the end of ice_clean_rx_irq_zc. This has been changed in that way for corresponding ice_clean_rx_irq, but not here. Fixes: 2d4238f55697 ("ice: Add support for AF_XDP") Signed-off-by: Maciej Fijalkowski Tested-by: Kiran Bhandare Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_txrx.h | 1 - drivers/net/ethernet/intel/ice/ice_xsk.c | 6 +----- 2 files changed, 1 insertion(+), 6 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h index c56dd1749903..b7b3bd4816f0 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.h +++ b/drivers/net/ethernet/intel/ice/ice_txrx.h @@ -24,7 +24,6 @@ #define ICE_MAX_DATA_PER_TXD_ALIGNED \ (~(ICE_MAX_READ_REQ_SIZE - 1) & ICE_MAX_DATA_PER_TXD) -#define ICE_RX_BUF_WRITE 16 /* Must be power of 2 */ #define ICE_MAX_TXQ_PER_TXQG 128 /* Attempt to maximize the headroom available for incoming frames. We use a 2K diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c index c1491dc0675d..c895351b25e0 100644 --- a/drivers/net/ethernet/intel/ice/ice_xsk.c +++ b/drivers/net/ethernet/intel/ice/ice_xsk.c @@ -505,7 +505,6 @@ out_failure: int ice_clean_rx_irq_zc(struct ice_rx_ring *rx_ring, int budget) { unsigned int total_rx_bytes = 0, total_rx_packets = 0; - u16 cleaned_count = ICE_DESC_UNUSED(rx_ring); struct ice_tx_ring *xdp_ring; unsigned int xdp_xmit = 0; struct bpf_prog *xdp_prog; @@ -562,7 +561,6 @@ int ice_clean_rx_irq_zc(struct ice_rx_ring *rx_ring, int budget) total_rx_bytes += size; total_rx_packets++; - cleaned_count++; ice_bump_ntc(rx_ring); continue; @@ -575,7 +573,6 @@ construct_skb: break; } - cleaned_count++; ice_bump_ntc(rx_ring); if (eth_skb_pad(skb)) { @@ -597,8 +594,7 @@ construct_skb: ice_receive_skb(rx_ring, skb, vlan_tag); } - if (cleaned_count >= ICE_RX_BUF_WRITE) - failure = !ice_alloc_rx_bufs_zc(rx_ring, cleaned_count); + failure = !ice_alloc_rx_bufs_zc(rx_ring, ICE_DESC_UNUSED(rx_ring)); ice_finalize_xdp_rx(xdp_ring, xdp_xmit); ice_update_rx_ring_stats(rx_ring, total_rx_packets, total_rx_bytes); From 2b5160b12091285c5aca45980f100a9294af7b04 Mon Sep 17 00:00:00 2001 From: Thadeu Lima de Souza Cascardo Date: Fri, 17 Dec 2021 12:44:09 -0300 Subject: [PATCH 618/868] ipmi: bail out if init_srcu_struct fails In case, init_srcu_struct fails (because of memory allocation failure), we might proceed with the driver initialization despite srcu_struct not being entirely initialized. Fixes: 913a89f009d9 ("ipmi: Don't initialize anything in the core until something uses it") Signed-off-by: Thadeu Lima de Souza Cascardo Cc: Corey Minyard Cc: stable@vger.kernel.org Message-Id: <20211217154410.1228673-1-cascardo@canonical.com> Signed-off-by: Corey Minyard --- drivers/char/ipmi/ipmi_msghandler.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/char/ipmi/ipmi_msghandler.c b/drivers/char/ipmi/ipmi_msghandler.c index c837d5416e0e..84975b21fff2 100644 --- a/drivers/char/ipmi/ipmi_msghandler.c +++ b/drivers/char/ipmi/ipmi_msghandler.c @@ -5392,7 +5392,9 @@ static int ipmi_init_msghandler(void) if (initialized) goto out; - init_srcu_struct(&ipmi_interfaces_srcu); + rv = init_srcu_struct(&ipmi_interfaces_srcu); + if (rv) + goto out; timer_setup(&ipmi_timer, ipmi_timeout, 0); mod_timer(&ipmi_timer, jiffies + IPMI_TIMEOUT_JIFFIES); From 75d70d76cb7b927cace2cb34265d68ebb3306b13 Mon Sep 17 00:00:00 2001 From: Thadeu Lima de Souza Cascardo Date: Fri, 17 Dec 2021 12:44:10 -0300 Subject: [PATCH 619/868] ipmi: fix initialization when workqueue allocation fails If the workqueue allocation fails, the driver is marked as not initialized, and timer and panic_notifier will be left registered. Instead of removing those when workqueue allocation fails, do the workqueue initialization before doing it, and cleanup srcu_struct if it fails. Fixes: 1d49eb91e86e ("ipmi: Move remove_work to dedicated workqueue") Signed-off-by: Thadeu Lima de Souza Cascardo Cc: Corey Minyard Cc: Ioanna Alifieraki Cc: stable@vger.kernel.org Message-Id: <20211217154410.1228673-2-cascardo@canonical.com> Signed-off-by: Corey Minyard --- drivers/char/ipmi/ipmi_msghandler.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/drivers/char/ipmi/ipmi_msghandler.c b/drivers/char/ipmi/ipmi_msghandler.c index 84975b21fff2..266c7bc58dda 100644 --- a/drivers/char/ipmi/ipmi_msghandler.c +++ b/drivers/char/ipmi/ipmi_msghandler.c @@ -5396,20 +5396,23 @@ static int ipmi_init_msghandler(void) if (rv) goto out; + remove_work_wq = create_singlethread_workqueue("ipmi-msghandler-remove-wq"); + if (!remove_work_wq) { + pr_err("unable to create ipmi-msghandler-remove-wq workqueue"); + rv = -ENOMEM; + goto out_wq; + } + timer_setup(&ipmi_timer, ipmi_timeout, 0); mod_timer(&ipmi_timer, jiffies + IPMI_TIMEOUT_JIFFIES); atomic_notifier_chain_register(&panic_notifier_list, &panic_block); - remove_work_wq = create_singlethread_workqueue("ipmi-msghandler-remove-wq"); - if (!remove_work_wq) { - pr_err("unable to create ipmi-msghandler-remove-wq workqueue"); - rv = -ENOMEM; - goto out; - } - initialized = true; +out_wq: + if (rv) + cleanup_srcu_struct(&ipmi_interfaces_srcu); out: mutex_unlock(&ipmi_interfaces_mutex); return rv; From 4e8c11b6b3f0b6a283e898344f154641eda94266 Mon Sep 17 00:00:00 2001 From: Yu Liao Date: Mon, 13 Dec 2021 21:57:27 +0800 Subject: [PATCH 620/868] timekeeping: Really make sure wall_to_monotonic isn't positive Even after commit e1d7ba873555 ("time: Always make sure wall_to_monotonic isn't positive") it is still possible to make wall_to_monotonic positive by running the following code: int main(void) { struct timespec time; clock_gettime(CLOCK_MONOTONIC, &time); time.tv_nsec = 0; clock_settime(CLOCK_REALTIME, &time); return 0; } The reason is that the second parameter of timespec64_compare(), ts_delta, may be unnormalized because the delta is calculated with an open coded substraction which causes the comparison of tv_sec to yield the wrong result: wall_to_monotonic = { .tv_sec = -10, .tv_nsec = 900000000 } ts_delta = { .tv_sec = -9, .tv_nsec = -900000000 } That makes timespec64_compare() claim that wall_to_monotonic < ts_delta, but actually the result should be wall_to_monotonic > ts_delta. After normalization, the result of timespec64_compare() is correct because the tv_sec comparison is not longer misleading: wall_to_monotonic = { .tv_sec = -10, .tv_nsec = 900000000 } ts_delta = { .tv_sec = -10, .tv_nsec = 100000000 } Use timespec64_sub() to ensure that ts_delta is normalized, which fixes the issue. Fixes: e1d7ba873555 ("time: Always make sure wall_to_monotonic isn't positive") Signed-off-by: Yu Liao Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20211213135727.1656662-1-liaoyu15@huawei.com --- kernel/time/timekeeping.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index b348749a9fc6..dcdcb85121e4 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -1306,8 +1306,7 @@ int do_settimeofday64(const struct timespec64 *ts) timekeeping_forward_now(tk); xt = tk_xtime(tk); - ts_delta.tv_sec = ts->tv_sec - xt.tv_sec; - ts_delta.tv_nsec = ts->tv_nsec - xt.tv_nsec; + ts_delta = timespec64_sub(*ts, xt); if (timespec64_compare(&tk->wall_to_monotonic, &ts_delta) > 0) { ret = -EINVAL; From 0706a78f31c4217ca144f630063ec9561a21548d Mon Sep 17 00:00:00 2001 From: Magnus Karlsson Date: Fri, 17 Dec 2021 15:56:46 +0100 Subject: [PATCH 621/868] Revert "xsk: Do not sleep in poll() when need_wakeup set" This reverts commit bd0687c18e635b63233dc87f38058cd728802ab4. This patch causes a Tx only workload to go to sleep even when it does not have to, leading to misserable performance in skb mode. It fixed one rare problem but created a much worse one, so this need to be reverted while I try to craft a proper solution to the original problem. Fixes: bd0687c18e63 ("xsk: Do not sleep in poll() when need_wakeup set") Signed-off-by: Magnus Karlsson Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20211217145646.26449-1-magnus.karlsson@gmail.com --- net/xdp/xsk.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index 7a466ea962c5..f16074eb53c7 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -677,6 +677,8 @@ static __poll_t xsk_poll(struct file *file, struct socket *sock, struct xdp_sock *xs = xdp_sk(sk); struct xsk_buff_pool *pool; + sock_poll_wait(file, sock, wait); + if (unlikely(!xsk_is_bound(xs))) return mask; @@ -688,8 +690,6 @@ static __poll_t xsk_poll(struct file *file, struct socket *sock, else /* Poll needs to drive Tx also in copy mode */ __xsk_sendmsg(sk); - } else { - sock_poll_wait(file, sock, wait); } if (xs->rx && !xskq_prod_is_empty(xs->rx)) From 819d11507f6637731947836e6308f5966d64cf9d Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 17 Dec 2021 18:24:00 +0000 Subject: [PATCH 622/868] bpf, selftests: Fix spelling mistake "tained" -> "tainted" There appears to be a spelling mistake in a bpf test message. Fix it. Signed-off-by: Colin Ian King Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20211217182400.39296-1-colin.i.king@gmail.com --- tools/testing/selftests/bpf/verifier/value_ptr_arith.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/verifier/value_ptr_arith.c b/tools/testing/selftests/bpf/verifier/value_ptr_arith.c index 4d347bc53aa2..359f3e8f8b60 100644 --- a/tools/testing/selftests/bpf/verifier/value_ptr_arith.c +++ b/tools/testing/selftests/bpf/verifier/value_ptr_arith.c @@ -1078,7 +1078,7 @@ .errstr_unpriv = "R0 pointer -= pointer prohibited", }, { - "map access: trying to leak tained dst reg", + "map access: trying to leak tainted dst reg", .insns = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), From b774302e885697dde027825f8de9beb985d037bd Mon Sep 17 00:00:00 2001 From: Shyam Prasad N Date: Wed, 8 Dec 2021 16:33:19 +0000 Subject: [PATCH 623/868] cifs: ignore resource_id while getting fscache super cookie We have a cyclic dependency between fscache super cookie and root inode cookie. The super cookie relies on tcon->resource_id, which gets populated from the root inode number. However, fetching the root inode initializes inode cookie as a child of super cookie, which is yet to be populated. resource_id is only used as auxdata to check the validity of super cookie. We can completely avoid setting resource_id to remove the circular dependency. Since vol creation time and vol serial numbers are used for auxdata, we should be fine. Additionally, there will be auxiliary data check for each inode cookie as well. Fixes: 5bf91ef03d98 ("cifs: wait for tcon resource_id before getting fscache super") CC: David Howells Signed-off-by: Shyam Prasad N Signed-off-by: Steve French --- fs/cifs/connect.c | 7 +++++++ fs/cifs/inode.c | 13 ------------- 2 files changed, 7 insertions(+), 13 deletions(-) diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 18448dbd762a..1060164b984a 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -3064,6 +3064,13 @@ static int mount_get_conns(struct mount_ctx *mnt_ctx) (cifs_sb->ctx->rsize > server->ops->negotiate_rsize(tcon, ctx))) cifs_sb->ctx->rsize = server->ops->negotiate_rsize(tcon, ctx); + /* + * The cookie is initialized from volume info returned above. + * Inside cifs_fscache_get_super_cookie it checks + * that we do not get super cookie twice. + */ + cifs_fscache_get_super_cookie(tcon); + out: mnt_ctx->server = server; mnt_ctx->ses = ses; diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 96d083db1737..279622e4eb1c 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -1356,11 +1356,6 @@ iget_no_retry: goto out; } -#ifdef CONFIG_CIFS_FSCACHE - /* populate tcon->resource_id */ - tcon->resource_id = CIFS_I(inode)->uniqueid; -#endif - if (rc && tcon->pipe) { cifs_dbg(FYI, "ipc connection - fake read inode\n"); spin_lock(&inode->i_lock); @@ -1375,14 +1370,6 @@ iget_no_retry: iget_failed(inode); inode = ERR_PTR(rc); } - - /* - * The cookie is initialized from volume info returned above. - * Inside cifs_fscache_get_super_cookie it checks - * that we do not get super cookie twice. - */ - cifs_fscache_get_super_cookie(tcon); - out: kfree(path); free_xid(xid); From a31080899d5fdafcccf7f39dd214a814a2c82626 Mon Sep 17 00:00:00 2001 From: Thiago Rafael Becker Date: Fri, 17 Dec 2021 15:20:22 -0300 Subject: [PATCH 624/868] cifs: sanitize multiple delimiters in prepath mount.cifs can pass a device with multiple delimiters in it. This will cause rename(2) to fail with ENOENT. V2: - Make sanitize_path more readable. - Fix multiple delimiters between UNC and prepath. - Avoid a memory leak if a bad user starts putting a lot of delimiters in the path on purpose. BugLink: https://bugzilla.redhat.com/show_bug.cgi?id=2031200 Fixes: 24e0a1eff9e2 ("cifs: switch to new mount api") Cc: stable@vger.kernel.org # 5.11+ Acked-by: Ronnie Sahlberg Signed-off-by: Thiago Rafael Becker Signed-off-by: Steve French --- fs/cifs/fs_context.c | 38 +++++++++++++++++++++++++++++++++++++- 1 file changed, 37 insertions(+), 1 deletion(-) diff --git a/fs/cifs/fs_context.c b/fs/cifs/fs_context.c index 6a179ae753c1..e3ed25dc6f3f 100644 --- a/fs/cifs/fs_context.c +++ b/fs/cifs/fs_context.c @@ -434,6 +434,42 @@ out: return rc; } +/* + * Remove duplicate path delimiters. Windows is supposed to do that + * but there are some bugs that prevent rename from working if there are + * multiple delimiters. + * + * Returns a sanitized duplicate of @path. The caller is responsible for + * cleaning up the original. + */ +#define IS_DELIM(c) ((c) == '/' || (c) == '\\') +static char *sanitize_path(char *path) +{ + char *cursor1 = path, *cursor2 = path; + + /* skip all prepended delimiters */ + while (IS_DELIM(*cursor1)) + cursor1++; + + /* copy the first letter */ + *cursor2 = *cursor1; + + /* copy the remainder... */ + while (*(cursor1++)) { + /* ... skipping all duplicated delimiters */ + if (IS_DELIM(*cursor1) && IS_DELIM(*cursor2)) + continue; + *(++cursor2) = *cursor1; + } + + /* if the last character is a delimiter, skip it */ + if (IS_DELIM(*(cursor2 - 1))) + cursor2--; + + *(cursor2) = '\0'; + return kstrdup(path, GFP_KERNEL); +} + /* * Parse a devname into substrings and populate the ctx->UNC and ctx->prepath * fields with the result. Returns 0 on success and an error otherwise @@ -493,7 +529,7 @@ smb3_parse_devname(const char *devname, struct smb3_fs_context *ctx) if (!*pos) return 0; - ctx->prepath = kstrdup(pos, GFP_KERNEL); + ctx->prepath = sanitize_path(pos); if (!ctx->prepath) return -ENOMEM; From 83912d6d55be10d65b5268d1871168b9ebe1ec4b Mon Sep 17 00:00:00 2001 From: Marcos Del Sol Vives Date: Thu, 16 Dec 2021 11:37:22 +0100 Subject: [PATCH 625/868] ksmbd: disable SMB2_GLOBAL_CAP_ENCRYPTION for SMB 3.1.1 According to the official Microsoft MS-SMB2 document section 3.3.5.4, this flag should be used only for 3.0 and 3.0.2 dialects. Setting it for 3.1.1 is a violation of the specification. This causes my Windows 10 client to detect an anomaly in the negotiation, and disable encryption entirely despite being explicitly enabled in ksmbd, causing all data transfers to go in plain text. Fixes: e2f34481b24d ("cifsd: add server-side procedures for SMB3") Cc: stable@vger.kernel.org # v5.15 Acked-by: Namjae Jeon Signed-off-by: Marcos Del Sol Vives Signed-off-by: Steve French --- fs/ksmbd/smb2ops.c | 3 --- fs/ksmbd/smb2pdu.c | 25 +++++++++++++++++++++---- 2 files changed, 21 insertions(+), 7 deletions(-) diff --git a/fs/ksmbd/smb2ops.c b/fs/ksmbd/smb2ops.c index 0a5d8450e835..02a44d28bdaf 100644 --- a/fs/ksmbd/smb2ops.c +++ b/fs/ksmbd/smb2ops.c @@ -271,9 +271,6 @@ int init_smb3_11_server(struct ksmbd_conn *conn) if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_LEASES) conn->vals->capabilities |= SMB2_GLOBAL_CAP_LEASING; - if (conn->cipher_type) - conn->vals->capabilities |= SMB2_GLOBAL_CAP_ENCRYPTION; - if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB3_MULTICHANNEL) conn->vals->capabilities |= SMB2_GLOBAL_CAP_MULTI_CHANNEL; diff --git a/fs/ksmbd/smb2pdu.c b/fs/ksmbd/smb2pdu.c index 125590d5e940..b8b3a4c28b74 100644 --- a/fs/ksmbd/smb2pdu.c +++ b/fs/ksmbd/smb2pdu.c @@ -915,6 +915,25 @@ static void decode_encrypt_ctxt(struct ksmbd_conn *conn, } } +/** + * smb3_encryption_negotiated() - checks if server and client agreed on enabling encryption + * @conn: smb connection + * + * Return: true if connection should be encrypted, else false + */ +static bool smb3_encryption_negotiated(struct ksmbd_conn *conn) +{ + if (!conn->ops->generate_encryptionkey) + return false; + + /* + * SMB 3.0 and 3.0.2 dialects use the SMB2_GLOBAL_CAP_ENCRYPTION flag. + * SMB 3.1.1 uses the cipher_type field. + */ + return (conn->vals->capabilities & SMB2_GLOBAL_CAP_ENCRYPTION) || + conn->cipher_type; +} + static void decode_compress_ctxt(struct ksmbd_conn *conn, struct smb2_compression_capabilities_context *pneg_ctxt) { @@ -1469,8 +1488,7 @@ static int ntlm_authenticate(struct ksmbd_work *work) (req->SecurityMode & SMB2_NEGOTIATE_SIGNING_REQUIRED)) sess->sign = true; - if (conn->vals->capabilities & SMB2_GLOBAL_CAP_ENCRYPTION && - conn->ops->generate_encryptionkey && + if (smb3_encryption_negotiated(conn) && !(req->Flags & SMB2_SESSION_REQ_FLAG_BINDING)) { rc = conn->ops->generate_encryptionkey(sess); if (rc) { @@ -1559,8 +1577,7 @@ static int krb5_authenticate(struct ksmbd_work *work) (req->SecurityMode & SMB2_NEGOTIATE_SIGNING_REQUIRED)) sess->sign = true; - if ((conn->vals->capabilities & SMB2_GLOBAL_CAP_ENCRYPTION) && - conn->ops->generate_encryptionkey) { + if (smb3_encryption_negotiated(conn)) { retval = conn->ops->generate_encryptionkey(sess); if (retval) { ksmbd_debug(SMB, From ec624fe740b416fb68d536b37fb8eef46f90b5c2 Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Tue, 14 Dec 2021 19:24:33 +0200 Subject: [PATCH 626/868] net/sched: Extend qdisc control block with tc control block BPF layer extends the qdisc control block via struct bpf_skb_data_end and because of that there is no more room to add variables to the qdisc layer control block without going over the skb->cb size. Extend the qdisc control block with a tc control block, and move all tc related variables to there as a pre-step for extending the tc control block with additional members. Signed-off-by: Paul Blakey Signed-off-by: Jakub Kicinski --- include/net/pkt_sched.h | 15 +++++++++++++++ include/net/sch_generic.h | 2 -- net/core/dev.c | 8 ++++---- net/sched/act_ct.c | 14 +++++++------- net/sched/cls_api.c | 6 ++++-- net/sched/cls_flower.c | 3 ++- net/sched/sch_frag.c | 3 ++- 7 files changed, 34 insertions(+), 17 deletions(-) diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h index bf79f3a890af..05f18e81f3e8 100644 --- a/include/net/pkt_sched.h +++ b/include/net/pkt_sched.h @@ -193,4 +193,19 @@ static inline void skb_txtime_consumed(struct sk_buff *skb) skb->tstamp = ktime_set(0, 0); } +struct tc_skb_cb { + struct qdisc_skb_cb qdisc_cb; + + u16 mru; + bool post_ct; +}; + +static inline struct tc_skb_cb *tc_skb_cb(const struct sk_buff *skb) +{ + struct tc_skb_cb *cb = (struct tc_skb_cb *)skb->cb; + + BUILD_BUG_ON(sizeof(*cb) > sizeof_field(struct sk_buff, cb)); + return cb; +} + #endif diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 22179b2fda72..c70e6d2b2fdd 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -447,8 +447,6 @@ struct qdisc_skb_cb { }; #define QDISC_CB_PRIV_LEN 20 unsigned char data[QDISC_CB_PRIV_LEN]; - u16 mru; - bool post_ct; }; typedef void tcf_chain_head_change_t(struct tcf_proto *tp_head, void *priv); diff --git a/net/core/dev.c b/net/core/dev.c index 2a352e668d10..c4708e2487fb 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3941,8 +3941,8 @@ sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev) return skb; /* qdisc_skb_cb(skb)->pkt_len was already set by the caller. */ - qdisc_skb_cb(skb)->mru = 0; - qdisc_skb_cb(skb)->post_ct = false; + tc_skb_cb(skb)->mru = 0; + tc_skb_cb(skb)->post_ct = false; mini_qdisc_bstats_cpu_update(miniq, skb); switch (tcf_classify(skb, miniq->block, miniq->filter_list, &cl_res, false)) { @@ -5103,8 +5103,8 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret, } qdisc_skb_cb(skb)->pkt_len = skb->len; - qdisc_skb_cb(skb)->mru = 0; - qdisc_skb_cb(skb)->post_ct = false; + tc_skb_cb(skb)->mru = 0; + tc_skb_cb(skb)->post_ct = false; skb->tc_at_ingress = 1; mini_qdisc_bstats_cpu_update(miniq, skb); diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c index 90866ae45573..98e248b9c0b1 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c @@ -690,10 +690,10 @@ static int tcf_ct_handle_fragments(struct net *net, struct sk_buff *skb, u8 family, u16 zone, bool *defrag) { enum ip_conntrack_info ctinfo; - struct qdisc_skb_cb cb; struct nf_conn *ct; int err = 0; bool frag; + u16 mru; /* Previously seen (loopback)? Ignore. */ ct = nf_ct_get(skb, &ctinfo); @@ -708,7 +708,7 @@ static int tcf_ct_handle_fragments(struct net *net, struct sk_buff *skb, return err; skb_get(skb); - cb = *qdisc_skb_cb(skb); + mru = tc_skb_cb(skb)->mru; if (family == NFPROTO_IPV4) { enum ip_defrag_users user = IP_DEFRAG_CONNTRACK_IN + zone; @@ -722,7 +722,7 @@ static int tcf_ct_handle_fragments(struct net *net, struct sk_buff *skb, if (!err) { *defrag = true; - cb.mru = IPCB(skb)->frag_max_size; + mru = IPCB(skb)->frag_max_size; } } else { /* NFPROTO_IPV6 */ #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) @@ -735,7 +735,7 @@ static int tcf_ct_handle_fragments(struct net *net, struct sk_buff *skb, if (!err) { *defrag = true; - cb.mru = IP6CB(skb)->frag_max_size; + mru = IP6CB(skb)->frag_max_size; } #else err = -EOPNOTSUPP; @@ -744,7 +744,7 @@ static int tcf_ct_handle_fragments(struct net *net, struct sk_buff *skb, } if (err != -EINPROGRESS) - *qdisc_skb_cb(skb) = cb; + tc_skb_cb(skb)->mru = mru; skb_clear_hash(skb); skb->ignore_df = 1; return err; @@ -963,7 +963,7 @@ static int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a, tcf_action_update_bstats(&c->common, skb); if (clear) { - qdisc_skb_cb(skb)->post_ct = false; + tc_skb_cb(skb)->post_ct = false; ct = nf_ct_get(skb, &ctinfo); if (ct) { nf_conntrack_put(&ct->ct_general); @@ -1048,7 +1048,7 @@ do_nat: out_push: skb_push_rcsum(skb, nh_ofs); - qdisc_skb_cb(skb)->post_ct = true; + tc_skb_cb(skb)->post_ct = true; out_clear: if (defrag) qdisc_skb_cb(skb)->pkt_len = skb->len; diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index e54f0a42270c..ff8a9383bf1c 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -1617,12 +1617,14 @@ int tcf_classify(struct sk_buff *skb, /* If we missed on some chain */ if (ret == TC_ACT_UNSPEC && last_executed_chain) { + struct tc_skb_cb *cb = tc_skb_cb(skb); + ext = tc_skb_ext_alloc(skb); if (WARN_ON_ONCE(!ext)) return TC_ACT_SHOT; ext->chain = last_executed_chain; - ext->mru = qdisc_skb_cb(skb)->mru; - ext->post_ct = qdisc_skb_cb(skb)->post_ct; + ext->mru = cb->mru; + ext->post_ct = cb->post_ct; } return ret; diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index aab13ba11767..9782b93db1b3 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -19,6 +19,7 @@ #include #include +#include #include #include #include @@ -309,7 +310,7 @@ static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct tcf_result *res) { struct cls_fl_head *head = rcu_dereference_bh(tp->root); - bool post_ct = qdisc_skb_cb(skb)->post_ct; + bool post_ct = tc_skb_cb(skb)->post_ct; struct fl_flow_key skb_key; struct fl_flow_mask *mask; struct cls_fl_filter *f; diff --git a/net/sched/sch_frag.c b/net/sched/sch_frag.c index 8c06381391d6..5ded4c8672a6 100644 --- a/net/sched/sch_frag.c +++ b/net/sched/sch_frag.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB #include #include +#include #include #include #include @@ -137,7 +138,7 @@ err: int sch_frag_xmit_hook(struct sk_buff *skb, int (*xmit)(struct sk_buff *skb)) { - u16 mru = qdisc_skb_cb(skb)->mru; + u16 mru = tc_skb_cb(skb)->mru; int err; if (mru && skb->len > mru + skb->dev->hard_header_len) From 3849595866166b23bf6a0cb9ff87e06423167f67 Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Tue, 14 Dec 2021 19:24:34 +0200 Subject: [PATCH 627/868] net/sched: flow_dissector: Fix matching on zone id for invalid conns If ct rejects a flow, it removes the conntrack info from the skb. act_ct sets the post_ct variable so the dissector will see this case as an +tracked +invalid state, but the zone id is lost with the conntrack info. To restore the zone id on such cases, set the last executed zone, via the tc control block, when passing ct, and read it back in the dissector if there is no ct info on the skb (invalid connection). Fixes: 7baf2429a1a9 ("net/sched: cls_flower add CT_FLAGS_INVALID flag support") Signed-off-by: Paul Blakey Signed-off-by: Jakub Kicinski --- include/linux/skbuff.h | 2 +- include/net/pkt_sched.h | 1 + net/core/flow_dissector.c | 3 ++- net/sched/act_ct.c | 1 + net/sched/cls_flower.c | 3 ++- 5 files changed, 7 insertions(+), 3 deletions(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index c8cb7e697d47..2ecf8cfd2223 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1380,7 +1380,7 @@ skb_flow_dissect_ct(const struct sk_buff *skb, struct flow_dissector *flow_dissector, void *target_container, u16 *ctinfo_map, size_t mapsize, - bool post_ct); + bool post_ct, u16 zone); void skb_flow_dissect_tunnel_info(const struct sk_buff *skb, struct flow_dissector *flow_dissector, diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h index 05f18e81f3e8..9e71691c491b 100644 --- a/include/net/pkt_sched.h +++ b/include/net/pkt_sched.h @@ -198,6 +198,7 @@ struct tc_skb_cb { u16 mru; bool post_ct; + u16 zone; /* Only valid if post_ct = true */ }; static inline struct tc_skb_cb *tc_skb_cb(const struct sk_buff *skb) diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 3255f57f5131..1b094c481f1d 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -238,7 +238,7 @@ void skb_flow_dissect_ct(const struct sk_buff *skb, struct flow_dissector *flow_dissector, void *target_container, u16 *ctinfo_map, - size_t mapsize, bool post_ct) + size_t mapsize, bool post_ct, u16 zone) { #if IS_ENABLED(CONFIG_NF_CONNTRACK) struct flow_dissector_key_ct *key; @@ -260,6 +260,7 @@ skb_flow_dissect_ct(const struct sk_buff *skb, if (!ct) { key->ct_state = TCA_FLOWER_KEY_CT_FLAGS_TRACKED | TCA_FLOWER_KEY_CT_FLAGS_INVALID; + key->ct_zone = zone; return; } diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c index 98e248b9c0b1..ab3591408419 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c @@ -1049,6 +1049,7 @@ out_push: skb_push_rcsum(skb, nh_ofs); tc_skb_cb(skb)->post_ct = true; + tc_skb_cb(skb)->zone = p->zone; out_clear: if (defrag) qdisc_skb_cb(skb)->pkt_len = skb->len; diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 9782b93db1b3..ef54ed395874 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -311,6 +311,7 @@ static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp, { struct cls_fl_head *head = rcu_dereference_bh(tp->root); bool post_ct = tc_skb_cb(skb)->post_ct; + u16 zone = tc_skb_cb(skb)->zone; struct fl_flow_key skb_key; struct fl_flow_mask *mask; struct cls_fl_filter *f; @@ -328,7 +329,7 @@ static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp, skb_flow_dissect_ct(skb, &mask->dissector, &skb_key, fl_ct_info_to_flower_map, ARRAY_SIZE(fl_ct_info_to_flower_map), - post_ct); + post_ct, zone); skb_flow_dissect_hash(skb, &mask->dissector, &skb_key); skb_flow_dissect(skb, &mask->dissector, &skb_key, FLOW_DISSECTOR_F_STOP_BEFORE_ENCAP); From 635d448a1cce4b4ebee52b351052c70434fa90ea Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Tue, 14 Dec 2021 19:24:35 +0200 Subject: [PATCH 628/868] net: openvswitch: Fix matching zone id for invalid conns arriving from tc Zone id is not restored if we passed ct and ct rejected the connection, as there is no ct info on the skb. Save the zone from tc skb cb to tc skb extension and pass it on to ovs, use that info to restore the zone id for invalid connections. Fixes: d29334c15d33 ("net/sched: act_api: fix miss set post_ct for ovs after do conntrack in act_ct") Signed-off-by: Paul Blakey Signed-off-by: Jakub Kicinski --- include/linux/skbuff.h | 1 + net/openvswitch/flow.c | 8 +++++++- net/sched/cls_api.c | 1 + 3 files changed, 9 insertions(+), 1 deletion(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 2ecf8cfd2223..4507d77d6941 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -286,6 +286,7 @@ struct nf_bridge_info { struct tc_skb_ext { __u32 chain; __u16 mru; + __u16 zone; bool post_ct; }; #endif diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index 9713035b89e3..6d262d9aa10e 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -34,6 +34,7 @@ #include #include #include +#include #include "conntrack.h" #include "datapath.h" @@ -860,6 +861,7 @@ int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info, #endif bool post_ct = false; int res, err; + u16 zone = 0; /* Extract metadata from packet. */ if (tun_info) { @@ -898,6 +900,7 @@ int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info, key->recirc_id = tc_ext ? tc_ext->chain : 0; OVS_CB(skb)->mru = tc_ext ? tc_ext->mru : 0; post_ct = tc_ext ? tc_ext->post_ct : false; + zone = post_ct ? tc_ext->zone : 0; } else { key->recirc_id = 0; } @@ -906,8 +909,11 @@ int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info, #endif err = key_extract(skb, key); - if (!err) + if (!err) { ovs_ct_fill_key(skb, key, post_ct); /* Must be after key_extract(). */ + if (post_ct && !skb_get_nfct(skb)) + key->ct_zone = zone; + } return err; } diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index ff8a9383bf1c..35c74bdde848 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -1625,6 +1625,7 @@ int tcf_classify(struct sk_buff *skb, ext->chain = last_executed_chain; ext->mru = cb->mru; ext->post_ct = cb->post_ct; + ext->zone = cb->zone; } return ret; From 1488fc204568f707fe2a42a913788c00a95af30e Mon Sep 17 00:00:00 2001 From: Aleksander Jan Bajkowski Date: Fri, 17 Dec 2021 01:07:40 +0100 Subject: [PATCH 629/868] net: lantiq_xrx200: increase buffer reservation If the user sets a lower mtu on the CPU port than on the switch, then DMA inserts a few more bytes into the buffer than expected. In the worst case, it may exceed the size of the buffer. The experiments showed that the buffer should be a multiple of the burst length value. This patch rounds the length of the rx buffer upwards and fixes this bug. The reservation of FCS space in the buffer has been removed as PMAC strips the FCS. Fixes: 998ac358019e ("net: lantiq: add support for jumbo frames") Reported-by: Thomas Nixon Signed-off-by: Aleksander Jan Bajkowski Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/lantiq_xrx200.c | 34 ++++++++++++++++++++-------- 1 file changed, 24 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/lantiq_xrx200.c b/drivers/net/ethernet/lantiq_xrx200.c index 0da09ea81980..96bd6f2b21ed 100644 --- a/drivers/net/ethernet/lantiq_xrx200.c +++ b/drivers/net/ethernet/lantiq_xrx200.c @@ -71,6 +71,8 @@ struct xrx200_priv { struct xrx200_chan chan_tx; struct xrx200_chan chan_rx; + u16 rx_buf_size; + struct net_device *net_dev; struct device *dev; @@ -97,6 +99,16 @@ static void xrx200_pmac_mask(struct xrx200_priv *priv, u32 clear, u32 set, xrx200_pmac_w32(priv, val, offset); } +static int xrx200_max_frame_len(int mtu) +{ + return VLAN_ETH_HLEN + mtu; +} + +static int xrx200_buffer_size(int mtu) +{ + return round_up(xrx200_max_frame_len(mtu), 4 * XRX200_DMA_BURST_LEN); +} + /* drop all the packets from the DMA ring */ static void xrx200_flush_dma(struct xrx200_chan *ch) { @@ -109,8 +121,7 @@ static void xrx200_flush_dma(struct xrx200_chan *ch) break; desc->ctl = LTQ_DMA_OWN | LTQ_DMA_RX_OFFSET(NET_IP_ALIGN) | - (ch->priv->net_dev->mtu + VLAN_ETH_HLEN + - ETH_FCS_LEN); + ch->priv->rx_buf_size; ch->dma.desc++; ch->dma.desc %= LTQ_DESC_NUM; } @@ -158,21 +169,21 @@ static int xrx200_close(struct net_device *net_dev) static int xrx200_alloc_skb(struct xrx200_chan *ch) { - int len = ch->priv->net_dev->mtu + VLAN_ETH_HLEN + ETH_FCS_LEN; struct sk_buff *skb = ch->skb[ch->dma.desc]; + struct xrx200_priv *priv = ch->priv; dma_addr_t mapping; int ret = 0; - ch->skb[ch->dma.desc] = netdev_alloc_skb_ip_align(ch->priv->net_dev, - len); + ch->skb[ch->dma.desc] = netdev_alloc_skb_ip_align(priv->net_dev, + priv->rx_buf_size); if (!ch->skb[ch->dma.desc]) { ret = -ENOMEM; goto skip; } - mapping = dma_map_single(ch->priv->dev, ch->skb[ch->dma.desc]->data, - len, DMA_FROM_DEVICE); - if (unlikely(dma_mapping_error(ch->priv->dev, mapping))) { + mapping = dma_map_single(priv->dev, ch->skb[ch->dma.desc]->data, + priv->rx_buf_size, DMA_FROM_DEVICE); + if (unlikely(dma_mapping_error(priv->dev, mapping))) { dev_kfree_skb_any(ch->skb[ch->dma.desc]); ch->skb[ch->dma.desc] = skb; ret = -ENOMEM; @@ -184,7 +195,7 @@ static int xrx200_alloc_skb(struct xrx200_chan *ch) wmb(); skip: ch->dma.desc_base[ch->dma.desc].ctl = - LTQ_DMA_OWN | LTQ_DMA_RX_OFFSET(NET_IP_ALIGN) | len; + LTQ_DMA_OWN | LTQ_DMA_RX_OFFSET(NET_IP_ALIGN) | priv->rx_buf_size; return ret; } @@ -356,6 +367,7 @@ xrx200_change_mtu(struct net_device *net_dev, int new_mtu) int ret = 0; net_dev->mtu = new_mtu; + priv->rx_buf_size = xrx200_buffer_size(new_mtu); if (new_mtu <= old_mtu) return ret; @@ -375,6 +387,7 @@ xrx200_change_mtu(struct net_device *net_dev, int new_mtu) ret = xrx200_alloc_skb(ch_rx); if (ret) { net_dev->mtu = old_mtu; + priv->rx_buf_size = xrx200_buffer_size(old_mtu); break; } dev_kfree_skb_any(skb); @@ -505,7 +518,8 @@ static int xrx200_probe(struct platform_device *pdev) net_dev->netdev_ops = &xrx200_netdev_ops; SET_NETDEV_DEV(net_dev, dev); net_dev->min_mtu = ETH_ZLEN; - net_dev->max_mtu = XRX200_DMA_DATA_LEN - VLAN_ETH_HLEN - ETH_FCS_LEN; + net_dev->max_mtu = XRX200_DMA_DATA_LEN - xrx200_max_frame_len(0); + priv->rx_buf_size = xrx200_buffer_size(ETH_DATA_LEN); /* load the memory ranges */ priv->pmac_reg = devm_platform_get_and_ioremap_resource(pdev, 0, NULL); From f845fe5819efc4111c456c102f15db6d9ed3406e Mon Sep 17 00:00:00 2001 From: Hoang Le Date: Fri, 17 Dec 2021 10:00:59 +0700 Subject: [PATCH 630/868] Revert "tipc: use consistent GFP flags" This reverts commit 86c3a3e964d910a62eeb277d60b2a60ebefa9feb. The tipc_aead_init() function can be calling from an interrupt routine. This allocation might sleep with GFP_KERNEL flag, hence the following BUG is reported. [ 17.657509] BUG: sleeping function called from invalid context at include/linux/sched/mm.h:230 [ 17.660916] in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 0, name: swapper/3 [ 17.664093] preempt_count: 302, expected: 0 [ 17.665619] RCU nest depth: 2, expected: 0 [ 17.667163] Preemption disabled at: [ 17.667165] [<0000000000000000>] 0x0 [ 17.669753] CPU: 3 PID: 0 Comm: swapper/3 Kdump: loaded Tainted: G W 5.16.0-rc4+ #1 [ 17.673006] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.12.0-1 04/01/2014 [ 17.675540] Call Trace: [ 17.676285] [ 17.676913] dump_stack_lvl+0x34/0x44 [ 17.678033] __might_resched.cold+0xd6/0x10f [ 17.679311] kmem_cache_alloc_trace+0x14d/0x220 [ 17.680663] tipc_crypto_start+0x4a/0x2b0 [tipc] [ 17.682146] ? kmem_cache_alloc_trace+0xd3/0x220 [ 17.683545] tipc_node_create+0x2f0/0x790 [tipc] [ 17.684956] tipc_node_check_dest+0x72/0x680 [tipc] [ 17.686706] ? ___cache_free+0x31/0x350 [ 17.688008] ? skb_release_data+0x128/0x140 [ 17.689431] tipc_disc_rcv+0x479/0x510 [tipc] [ 17.690904] tipc_rcv+0x71c/0x730 [tipc] [ 17.692219] ? __netif_receive_skb_core+0xb7/0xf60 [ 17.693856] tipc_l2_rcv_msg+0x5e/0x90 [tipc] [ 17.695333] __netif_receive_skb_list_core+0x20b/0x260 [ 17.697072] netif_receive_skb_list_internal+0x1bf/0x2e0 [ 17.698870] ? dev_gro_receive+0x4c2/0x680 [ 17.700255] napi_complete_done+0x6f/0x180 [ 17.701657] virtnet_poll+0x29c/0x42e [virtio_net] [ 17.703262] __napi_poll+0x2c/0x170 [ 17.704429] net_rx_action+0x22f/0x280 [ 17.705706] __do_softirq+0xfd/0x30a [ 17.706921] common_interrupt+0xa4/0xc0 [ 17.708206] [ 17.708922] [ 17.709651] asm_common_interrupt+0x1e/0x40 [ 17.711078] RIP: 0010:default_idle+0x18/0x20 Fixes: 86c3a3e964d9 ("tipc: use consistent GFP flags") Acked-by: Jon Maloy Signed-off-by: Hoang Le Link: https://lore.kernel.org/r/20211217030059.5947-1-hoang.h.le@dektech.com.au Signed-off-by: Jakub Kicinski --- net/tipc/crypto.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/tipc/crypto.c b/net/tipc/crypto.c index b4d9419a015b..d293614d5fc6 100644 --- a/net/tipc/crypto.c +++ b/net/tipc/crypto.c @@ -524,7 +524,7 @@ static int tipc_aead_init(struct tipc_aead **aead, struct tipc_aead_key *ukey, return -EEXIST; /* Allocate a new AEAD */ - tmp = kzalloc(sizeof(*tmp), GFP_KERNEL); + tmp = kzalloc(sizeof(*tmp), GFP_ATOMIC); if (unlikely(!tmp)) return -ENOMEM; @@ -1474,7 +1474,7 @@ int tipc_crypto_start(struct tipc_crypto **crypto, struct net *net, return -EEXIST; /* Allocate crypto */ - c = kzalloc(sizeof(*c), GFP_KERNEL); + c = kzalloc(sizeof(*c), GFP_ATOMIC); if (!c) return -ENOMEM; @@ -1488,7 +1488,7 @@ int tipc_crypto_start(struct tipc_crypto **crypto, struct net *net, } /* Allocate statistic structure */ - c->stats = alloc_percpu(struct tipc_crypto_stats); + c->stats = alloc_percpu_gfp(struct tipc_crypto_stats, GFP_ATOMIC); if (!c->stats) { if (c->wq) destroy_workqueue(c->wq); @@ -2461,7 +2461,7 @@ static void tipc_crypto_work_tx(struct work_struct *work) } /* Lets duplicate it first */ - skey = kmemdup(aead->key, tipc_aead_key_size(aead->key), GFP_KERNEL); + skey = kmemdup(aead->key, tipc_aead_key_size(aead->key), GFP_ATOMIC); rcu_read_unlock(); /* Now, generate new key, initiate & distribute it */ From 8b681bd7c301c423fbe97a6b23388a2180ff04ca Mon Sep 17 00:00:00 2001 From: Yevhen Orlov Date: Thu, 16 Dec 2021 19:07:36 +0200 Subject: [PATCH 631/868] net: marvell: prestera: fix incorrect return of port_find In case, when some ports is in list and we don't find requested - we return last iterator state and not return NULL as expected. Fixes: 501ef3066c89 ("net: marvell: prestera: Add driver for Prestera family ASIC devices") Signed-off-by: Yevhen Orlov Link: https://lore.kernel.org/r/20211216170736.8851-1-yevhen.orlov@plvision.eu Signed-off-by: Jakub Kicinski --- .../ethernet/marvell/prestera/prestera_main.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/marvell/prestera/prestera_main.c b/drivers/net/ethernet/marvell/prestera/prestera_main.c index 4369a3ffad45..6c24375ad9cf 100644 --- a/drivers/net/ethernet/marvell/prestera/prestera_main.c +++ b/drivers/net/ethernet/marvell/prestera/prestera_main.c @@ -54,12 +54,14 @@ int prestera_port_pvid_set(struct prestera_port *port, u16 vid) struct prestera_port *prestera_port_find_by_hwid(struct prestera_switch *sw, u32 dev_id, u32 hw_id) { - struct prestera_port *port = NULL; + struct prestera_port *port = NULL, *tmp; read_lock(&sw->port_list_lock); - list_for_each_entry(port, &sw->port_list, list) { - if (port->dev_id == dev_id && port->hw_id == hw_id) + list_for_each_entry(tmp, &sw->port_list, list) { + if (tmp->dev_id == dev_id && tmp->hw_id == hw_id) { + port = tmp; break; + } } read_unlock(&sw->port_list_lock); @@ -68,12 +70,14 @@ struct prestera_port *prestera_port_find_by_hwid(struct prestera_switch *sw, struct prestera_port *prestera_find_port(struct prestera_switch *sw, u32 id) { - struct prestera_port *port = NULL; + struct prestera_port *port = NULL, *tmp; read_lock(&sw->port_list_lock); - list_for_each_entry(port, &sw->port_list, list) { - if (port->id == id) + list_for_each_entry(tmp, &sw->port_list, list) { + if (tmp->id == id) { + port = tmp; break; + } } read_unlock(&sw->port_list_lock); From 2efc2256febf214e7b2bdaa21fe6c3c3146acdcb Mon Sep 17 00:00:00 2001 From: Yevhen Orlov Date: Thu, 16 Dec 2021 19:17:14 +0200 Subject: [PATCH 632/868] net: marvell: prestera: fix incorrect structure access In line: upper = info->upper_dev; We access upper_dev field, which is related only for particular events (e.g. event == NETDEV_CHANGEUPPER). So, this line cause invalid memory access for another events, when ptr is not netdev_notifier_changeupper_info. The KASAN logs are as follows: [ 30.123165] BUG: KASAN: stack-out-of-bounds in prestera_netdev_port_event.constprop.0+0x68/0x538 [prestera] [ 30.133336] Read of size 8 at addr ffff80000cf772b0 by task udevd/778 [ 30.139866] [ 30.141398] CPU: 0 PID: 778 Comm: udevd Not tainted 5.16.0-rc3 #6 [ 30.147588] Hardware name: DNI AmazonGo1 A7040 board (DT) [ 30.153056] Call trace: [ 30.155547] dump_backtrace+0x0/0x2c0 [ 30.159320] show_stack+0x18/0x30 [ 30.162729] dump_stack_lvl+0x68/0x84 [ 30.166491] print_address_description.constprop.0+0x74/0x2b8 [ 30.172346] kasan_report+0x1e8/0x250 [ 30.176102] __asan_load8+0x98/0xe0 [ 30.179682] prestera_netdev_port_event.constprop.0+0x68/0x538 [prestera] [ 30.186847] prestera_netdev_event_handler+0x1b4/0x1c0 [prestera] [ 30.193313] raw_notifier_call_chain+0x74/0xa0 [ 30.197860] call_netdevice_notifiers_info+0x68/0xc0 [ 30.202924] register_netdevice+0x3cc/0x760 [ 30.207190] register_netdev+0x24/0x50 [ 30.211015] prestera_device_register+0x8a0/0xba0 [prestera] Fixes: 3d5048cc54bd ("net: marvell: prestera: move netdev topology validation to prestera_main") Signed-off-by: Yevhen Orlov Link: https://lore.kernel.org/r/20211216171714.11341-1-yevhen.orlov@plvision.eu Signed-off-by: Jakub Kicinski --- .../ethernet/marvell/prestera/prestera_main.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/marvell/prestera/prestera_main.c b/drivers/net/ethernet/marvell/prestera/prestera_main.c index 6c24375ad9cf..c687dc9aa973 100644 --- a/drivers/net/ethernet/marvell/prestera/prestera_main.c +++ b/drivers/net/ethernet/marvell/prestera/prestera_main.c @@ -768,23 +768,27 @@ static int prestera_netdev_port_event(struct net_device *lower, struct net_device *dev, unsigned long event, void *ptr) { - struct netdev_notifier_changeupper_info *info = ptr; + struct netdev_notifier_info *info = ptr; + struct netdev_notifier_changeupper_info *cu_info; struct prestera_port *port = netdev_priv(dev); struct netlink_ext_ack *extack; struct net_device *upper; - extack = netdev_notifier_info_to_extack(&info->info); - upper = info->upper_dev; + extack = netdev_notifier_info_to_extack(info); + cu_info = container_of(info, + struct netdev_notifier_changeupper_info, + info); switch (event) { case NETDEV_PRECHANGEUPPER: + upper = cu_info->upper_dev; if (!netif_is_bridge_master(upper) && !netif_is_lag_master(upper)) { NL_SET_ERR_MSG_MOD(extack, "Unknown upper device type"); return -EINVAL; } - if (!info->linking) + if (!cu_info->linking) break; if (netdev_has_any_upper_dev(upper)) { @@ -793,7 +797,7 @@ static int prestera_netdev_port_event(struct net_device *lower, } if (netif_is_lag_master(upper) && - !prestera_lag_master_check(upper, info->upper_info, extack)) + !prestera_lag_master_check(upper, cu_info->upper_info, extack)) return -EOPNOTSUPP; if (netif_is_lag_master(upper) && vlan_uses_dev(dev)) { NL_SET_ERR_MSG_MOD(extack, @@ -809,14 +813,15 @@ static int prestera_netdev_port_event(struct net_device *lower, break; case NETDEV_CHANGEUPPER: + upper = cu_info->upper_dev; if (netif_is_bridge_master(upper)) { - if (info->linking) + if (cu_info->linking) return prestera_bridge_port_join(upper, port, extack); else prestera_bridge_port_leave(upper, port); } else if (netif_is_lag_master(upper)) { - if (info->linking) + if (cu_info->linking) return prestera_lag_port_add(port, upper); else prestera_lag_port_del(port); From 158b515f703e75e7d68289bf4d98c664e1d632df Mon Sep 17 00:00:00 2001 From: George Kennedy Date: Thu, 16 Dec 2021 13:25:32 -0500 Subject: [PATCH 633/868] tun: avoid double free in tun_free_netdev Avoid double free in tun_free_netdev() by moving the dev->tstats and tun->security allocs to a new ndo_init routine (tun_net_init()) that will be called by register_netdevice(). ndo_init is paired with the desctructor (tun_free_netdev()), so if there's an error in register_netdevice() the destructor will handle the frees. BUG: KASAN: double-free or invalid-free in selinux_tun_dev_free_security+0x1a/0x20 security/selinux/hooks.c:5605 CPU: 0 PID: 25750 Comm: syz-executor416 Not tainted 5.16.0-rc2-syzk #1 Hardware name: Red Hat KVM, BIOS Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0x89/0xb5 lib/dump_stack.c:106 print_address_description.constprop.9+0x28/0x160 mm/kasan/report.c:247 kasan_report_invalid_free+0x55/0x80 mm/kasan/report.c:372 ____kasan_slab_free mm/kasan/common.c:346 [inline] __kasan_slab_free+0x107/0x120 mm/kasan/common.c:374 kasan_slab_free include/linux/kasan.h:235 [inline] slab_free_hook mm/slub.c:1723 [inline] slab_free_freelist_hook mm/slub.c:1749 [inline] slab_free mm/slub.c:3513 [inline] kfree+0xac/0x2d0 mm/slub.c:4561 selinux_tun_dev_free_security+0x1a/0x20 security/selinux/hooks.c:5605 security_tun_dev_free_security+0x4f/0x90 security/security.c:2342 tun_free_netdev+0xe6/0x150 drivers/net/tun.c:2215 netdev_run_todo+0x4df/0x840 net/core/dev.c:10627 rtnl_unlock+0x13/0x20 net/core/rtnetlink.c:112 __tun_chr_ioctl+0x80c/0x2870 drivers/net/tun.c:3302 tun_chr_ioctl+0x2f/0x40 drivers/net/tun.c:3311 vfs_ioctl fs/ioctl.c:51 [inline] __do_sys_ioctl fs/ioctl.c:874 [inline] __se_sys_ioctl fs/ioctl.c:860 [inline] __x64_sys_ioctl+0x19d/0x220 fs/ioctl.c:860 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x3a/0x80 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x44/0xae Reported-by: syzkaller Signed-off-by: George Kennedy Suggested-by: Jakub Kicinski Link: https://lore.kernel.org/r/1639679132-19884-1-git-send-email-george.kennedy@oracle.com Signed-off-by: Jakub Kicinski --- drivers/net/tun.c | 115 ++++++++++++++++++++++++---------------------- 1 file changed, 59 insertions(+), 56 deletions(-) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 1572878c3403..45a67e72a02c 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -209,6 +209,9 @@ struct tun_struct { struct tun_prog __rcu *steering_prog; struct tun_prog __rcu *filter_prog; struct ethtool_link_ksettings link_ksettings; + /* init args */ + struct file *file; + struct ifreq *ifr; }; struct veth { @@ -216,6 +219,9 @@ struct veth { __be16 h_vlan_TCI; }; +static void tun_flow_init(struct tun_struct *tun); +static void tun_flow_uninit(struct tun_struct *tun); + static int tun_napi_receive(struct napi_struct *napi, int budget) { struct tun_file *tfile = container_of(napi, struct tun_file, napi); @@ -953,6 +959,49 @@ static int check_filter(struct tap_filter *filter, const struct sk_buff *skb) static const struct ethtool_ops tun_ethtool_ops; +static int tun_net_init(struct net_device *dev) +{ + struct tun_struct *tun = netdev_priv(dev); + struct ifreq *ifr = tun->ifr; + int err; + + dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); + if (!dev->tstats) + return -ENOMEM; + + spin_lock_init(&tun->lock); + + err = security_tun_dev_alloc_security(&tun->security); + if (err < 0) { + free_percpu(dev->tstats); + return err; + } + + tun_flow_init(tun); + + dev->hw_features = NETIF_F_SG | NETIF_F_FRAGLIST | + TUN_USER_FEATURES | NETIF_F_HW_VLAN_CTAG_TX | + NETIF_F_HW_VLAN_STAG_TX; + dev->features = dev->hw_features | NETIF_F_LLTX; + dev->vlan_features = dev->features & + ~(NETIF_F_HW_VLAN_CTAG_TX | + NETIF_F_HW_VLAN_STAG_TX); + + tun->flags = (tun->flags & ~TUN_FEATURES) | + (ifr->ifr_flags & TUN_FEATURES); + + INIT_LIST_HEAD(&tun->disabled); + err = tun_attach(tun, tun->file, false, ifr->ifr_flags & IFF_NAPI, + ifr->ifr_flags & IFF_NAPI_FRAGS, false); + if (err < 0) { + tun_flow_uninit(tun); + security_tun_dev_free_security(tun->security); + free_percpu(dev->tstats); + return err; + } + return 0; +} + /* Net device detach from fd. */ static void tun_net_uninit(struct net_device *dev) { @@ -1169,6 +1218,7 @@ static int tun_net_change_carrier(struct net_device *dev, bool new_carrier) } static const struct net_device_ops tun_netdev_ops = { + .ndo_init = tun_net_init, .ndo_uninit = tun_net_uninit, .ndo_open = tun_net_open, .ndo_stop = tun_net_close, @@ -1252,6 +1302,7 @@ static int tun_xdp_tx(struct net_device *dev, struct xdp_buff *xdp) } static const struct net_device_ops tap_netdev_ops = { + .ndo_init = tun_net_init, .ndo_uninit = tun_net_uninit, .ndo_open = tun_net_open, .ndo_stop = tun_net_close, @@ -1292,7 +1343,7 @@ static void tun_flow_uninit(struct tun_struct *tun) #define MAX_MTU 65535 /* Initialize net device. */ -static void tun_net_init(struct net_device *dev) +static void tun_net_initialize(struct net_device *dev) { struct tun_struct *tun = netdev_priv(dev); @@ -2206,11 +2257,6 @@ static void tun_free_netdev(struct net_device *dev) BUG_ON(!(list_empty(&tun->disabled))); free_percpu(dev->tstats); - /* We clear tstats so that tun_set_iff() can tell if - * tun_free_netdev() has been called from register_netdevice(). - */ - dev->tstats = NULL; - tun_flow_uninit(tun); security_tun_dev_free_security(tun->security); __tun_set_ebpf(tun, &tun->steering_prog, NULL); @@ -2716,41 +2762,16 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) tun->rx_batched = 0; RCU_INIT_POINTER(tun->steering_prog, NULL); - dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); - if (!dev->tstats) { - err = -ENOMEM; - goto err_free_dev; - } + tun->ifr = ifr; + tun->file = file; - spin_lock_init(&tun->lock); - - err = security_tun_dev_alloc_security(&tun->security); - if (err < 0) - goto err_free_stat; - - tun_net_init(dev); - tun_flow_init(tun); - - dev->hw_features = NETIF_F_SG | NETIF_F_FRAGLIST | - TUN_USER_FEATURES | NETIF_F_HW_VLAN_CTAG_TX | - NETIF_F_HW_VLAN_STAG_TX; - dev->features = dev->hw_features | NETIF_F_LLTX; - dev->vlan_features = dev->features & - ~(NETIF_F_HW_VLAN_CTAG_TX | - NETIF_F_HW_VLAN_STAG_TX); - - tun->flags = (tun->flags & ~TUN_FEATURES) | - (ifr->ifr_flags & TUN_FEATURES); - - INIT_LIST_HEAD(&tun->disabled); - err = tun_attach(tun, file, false, ifr->ifr_flags & IFF_NAPI, - ifr->ifr_flags & IFF_NAPI_FRAGS, false); - if (err < 0) - goto err_free_flow; + tun_net_initialize(dev); err = register_netdevice(tun->dev); - if (err < 0) - goto err_detach; + if (err < 0) { + free_netdev(dev); + return err; + } /* free_netdev() won't check refcnt, to avoid race * with dev_put() we need publish tun after registration. */ @@ -2767,24 +2788,6 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) strcpy(ifr->ifr_name, tun->dev->name); return 0; - -err_detach: - tun_detach_all(dev); - /* We are here because register_netdevice() has failed. - * If register_netdevice() already called tun_free_netdev() - * while dealing with the error, dev->stats has been cleared. - */ - if (!dev->tstats) - goto err_free_dev; - -err_free_flow: - tun_flow_uninit(tun); - security_tun_dev_free_security(tun->security); -err_free_stat: - free_percpu(dev->tstats); -err_free_dev: - free_netdev(dev); - return err; } static void tun_get_iff(struct tun_struct *tun, struct ifreq *ifr) From 8f556a326c93213927e683fc32bbf5be1b62540a Mon Sep 17 00:00:00 2001 From: Zqiang Date: Fri, 17 Dec 2021 15:42:07 +0800 Subject: [PATCH 634/868] locking/rtmutex: Fix incorrect condition in rtmutex_spin_on_owner() Optimistic spinning needs to be terminated when the spinning waiter is not longer the top waiter on the lock, but the condition is negated. It terminates if the waiter is the top waiter, which is defeating the whole purpose. Fixes: c3123c431447 ("locking/rtmutex: Dont dereference waiter lockless") Signed-off-by: Zqiang Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20211217074207.77425-1-qiang1.zhang@intel.com --- kernel/locking/rtmutex.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c index 0c6a48dfcecb..1f25a4d7de27 100644 --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c @@ -1380,7 +1380,7 @@ static bool rtmutex_spin_on_owner(struct rt_mutex_base *lock, * - the VCPU on which owner runs is preempted */ if (!owner->on_cpu || need_resched() || - rt_mutex_waiter_is_top_waiter(lock, waiter) || + !rt_mutex_waiter_is_top_waiter(lock, waiter) || vcpu_is_preempted(task_cpu(owner))) { res = false; break; From 0a515a06c5ebfa46fee3ac519e418f801e718da4 Mon Sep 17 00:00:00 2001 From: Miaoqian Lin Date: Sun, 12 Dec 2021 06:25:02 +0000 Subject: [PATCH 635/868] perf expr: Fix missing check for return value of hashmap__new() The hashmap__new() function may return ERR_PTR(-ENOMEM) when malloc() fails, add IS_ERR() checking for ctx->ids. Signed-off-by: Miaoqian Lin Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ian Rogers Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20211212062504.25841-1-linmq006@gmail.com [ s/kfree()/free()/ and add missing linux/err.h include ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/expr.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tools/perf/util/expr.c b/tools/perf/util/expr.c index 1d532b9fed29..254601060b39 100644 --- a/tools/perf/util/expr.c +++ b/tools/perf/util/expr.c @@ -12,6 +12,7 @@ #include "expr-bison.h" #include "expr-flex.h" #include "smt.h" +#include #include #include #include @@ -299,6 +300,10 @@ struct expr_parse_ctx *expr__ctx_new(void) return NULL; ctx->ids = hashmap__new(key_hash, key_equal, NULL); + if (IS_ERR(ctx->ids)) { + free(ctx); + return NULL; + } ctx->runtime = 0; return ctx; From 0c8e32fe48f549eef27c8c6b0a63530f83c3a643 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 13 Dec 2021 10:48:28 +0200 Subject: [PATCH 636/868] perf inject: Fix segfault due to close without open The fixed commit attempts to close inject.output even if it was never opened e.g. $ perf record uname Linux [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.002 MB perf.data (7 samples) ] $ perf inject -i perf.data --vm-time-correlation=dry-run Segmentation fault (core dumped) $ gdb --quiet perf Reading symbols from perf... (gdb) r inject -i perf.data --vm-time-correlation=dry-run Starting program: /home/ahunter/bin/perf inject -i perf.data --vm-time-correlation=dry-run [Thread debugging using libthread_db enabled] Using host libthread_db library "/lib/x86_64-linux-gnu/libthread_db.so.1". Program received signal SIGSEGV, Segmentation fault. 0x00007eff8afeef5b in _IO_new_fclose (fp=0x0) at iofclose.c:48 48 iofclose.c: No such file or directory. (gdb) bt #0 0x00007eff8afeef5b in _IO_new_fclose (fp=0x0) at iofclose.c:48 #1 0x0000557fc7b74f92 in perf_data__close (data=data@entry=0x7ffcdafa6578) at util/data.c:376 #2 0x0000557fc7a6b807 in cmd_inject (argc=, argv=) at builtin-inject.c:1085 #3 0x0000557fc7ac4783 in run_builtin (p=0x557fc8074878 , argc=4, argv=0x7ffcdafb6a60) at perf.c:313 #4 0x0000557fc7a25d5c in handle_internal_command (argv=, argc=) at perf.c:365 #5 run_argv (argcp=, argv=) at perf.c:409 #6 main (argc=4, argv=0x7ffcdafb6a60) at perf.c:539 (gdb) Fixes: 02e6246f5364d526 ("perf inject: Close inject.output on exit") Signed-off-by: Adrian Hunter Tested-by: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Namhyung Kim Cc: Riccardo Mancini Cc: stable@vger.kernel.org Link: http://lore.kernel.org/lkml/20211213084829.114772-2-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-inject.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index b9d6306cc14e..af70f1c72052 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -1078,7 +1078,8 @@ out_delete: zstd_fini(&(inject.session->zstd_data)); perf_session__delete(inject.session); out_close_output: - perf_data__close(&inject.output); + if (!inject.in_place_update) + perf_data__close(&inject.output); free(inject.itrace_synth_opts.vm_tm_corr_args); return ret; } From c271a55b0c6029fed0cac909fa57999a11467132 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 13 Dec 2021 10:48:29 +0200 Subject: [PATCH 637/868] perf inject: Fix segfault due to perf_data__fd() without open The fixed commit attempts to get the output file descriptor even if the file was never opened e.g. $ perf record uname Linux [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.002 MB perf.data (7 samples) ] $ perf inject -i perf.data --vm-time-correlation=dry-run Segmentation fault (core dumped) $ gdb --quiet perf Reading symbols from perf... (gdb) r inject -i perf.data --vm-time-correlation=dry-run Starting program: /home/ahunter/bin/perf inject -i perf.data --vm-time-correlation=dry-run [Thread debugging using libthread_db enabled] Using host libthread_db library "/lib/x86_64-linux-gnu/libthread_db.so.1". Program received signal SIGSEGV, Segmentation fault. __GI___fileno (fp=0x0) at fileno.c:35 35 fileno.c: No such file or directory. (gdb) bt #0 __GI___fileno (fp=0x0) at fileno.c:35 #1 0x00005621e48dd987 in perf_data__fd (data=0x7fff4c68bd08) at util/data.h:72 #2 perf_data__fd (data=0x7fff4c68bd08) at util/data.h:69 #3 cmd_inject (argc=, argv=0x7fff4c69c1f0) at builtin-inject.c:1017 #4 0x00005621e4936783 in run_builtin (p=0x5621e4ee6878 , argc=4, argv=0x7fff4c69c1f0) at perf.c:313 #5 0x00005621e4897d5c in handle_internal_command (argv=, argc=) at perf.c:365 #6 run_argv (argcp=, argv=) at perf.c:409 #7 main (argc=4, argv=0x7fff4c69c1f0) at perf.c:539 (gdb) Fixes: 0ae03893623dd1dd ("perf tools: Pass a fd to perf_file_header__read_pipe()") Signed-off-by: Adrian Hunter Tested-by: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Namhyung Kim Cc: Riccardo Mancini Cc: stable@vger.kernel.org Link: http://lore.kernel.org/lkml/20211213084829.114772-3-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-inject.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index af70f1c72052..409b721666cb 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -755,12 +755,16 @@ static int parse_vm_time_correlation(const struct option *opt, const char *str, return inject->itrace_synth_opts.vm_tm_corr_args ? 0 : -ENOMEM; } +static int output_fd(struct perf_inject *inject) +{ + return inject->in_place_update ? -1 : perf_data__fd(&inject->output); +} + static int __cmd_inject(struct perf_inject *inject) { int ret = -EINVAL; struct perf_session *session = inject->session; - struct perf_data *data_out = &inject->output; - int fd = inject->in_place_update ? -1 : perf_data__fd(data_out); + int fd = output_fd(inject); u64 output_data_offset; signal(SIGINT, sig_handler); @@ -1015,7 +1019,7 @@ int cmd_inject(int argc, const char **argv) } inject.session = __perf_session__new(&data, repipe, - perf_data__fd(&inject.output), + output_fd(&inject), &inject.tool); if (IS_ERR(inject.session)) { ret = PTR_ERR(inject.session); From b2f37aead1b82a770c48b5d583f35ec22aabb61e Mon Sep 17 00:00:00 2001 From: Lin Ma Date: Fri, 17 Dec 2021 10:13:56 +0800 Subject: [PATCH 638/868] hamradio: improve the incomplete fix to avoid NPD The previous commit 3e0588c291d6 ("hamradio: defer ax25 kfree after unregister_netdev") reorder the kfree operations and unregister_netdev operation to prevent UAF. This commit improves the previous one by also deferring the nullify of the ax->tty pointer. Otherwise, a NULL pointer dereference bug occurs. Partial of the stack trace is shown below. BUG: kernel NULL pointer dereference, address: 0000000000000538 RIP: 0010:ax_xmit+0x1f9/0x400 ... Call Trace: dev_hard_start_xmit+0xec/0x320 sch_direct_xmit+0xea/0x240 __qdisc_run+0x166/0x5c0 __dev_queue_xmit+0x2c7/0xaf0 ax25_std_establish_data_link+0x59/0x60 ax25_connect+0x3a0/0x500 ? security_socket_connect+0x2b/0x40 __sys_connect+0x96/0xc0 ? __hrtimer_init+0xc0/0xc0 ? common_nsleep+0x2e/0x50 ? switch_fpu_return+0x139/0x1a0 __x64_sys_connect+0x11/0x20 do_syscall_64+0x33/0x40 entry_SYSCALL_64_after_hwframe+0x44/0xa9 The crash point is shown as below static void ax_encaps(...) { ... set_bit(TTY_DO_WRITE_WAKEUP, &ax->tty->flags); // ax->tty = NULL! ... } By placing the nullify action after the unregister_netdev, the ax->tty pointer won't be assigned as NULL net_device framework layer is well synchronized. Signed-off-by: Lin Ma Signed-off-by: David S. Miller --- drivers/net/hamradio/mkiss.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/hamradio/mkiss.c b/drivers/net/hamradio/mkiss.c index 7da2bb8a443c..edde9c3ae12b 100644 --- a/drivers/net/hamradio/mkiss.c +++ b/drivers/net/hamradio/mkiss.c @@ -794,14 +794,14 @@ static void mkiss_close(struct tty_struct *tty) */ netif_stop_queue(ax->dev); - ax->tty = NULL; - unregister_netdev(ax->dev); /* Free all AX25 frame buffers after unreg. */ kfree(ax->rbuff); kfree(ax->xbuff); + ax->tty = NULL; + free_netdev(ax->dev); } From 1ade48d0c27d5da1ccf4b583d8c5fc8b534a3ac8 Mon Sep 17 00:00:00 2001 From: Lin Ma Date: Fri, 17 Dec 2021 10:29:41 +0800 Subject: [PATCH 639/868] ax25: NPD bug when detaching AX25 device The existing cleanup routine implementation is not well synchronized with the syscall routine. When a device is detaching, below race could occur. static int ax25_sendmsg(...) { ... lock_sock() ax25 = sk_to_ax25(sk); if (ax25->ax25_dev == NULL) // CHECK ... ax25_queue_xmit(skb, ax25->ax25_dev->dev); // USE ... } static void ax25_kill_by_device(...) { ... if (s->ax25_dev == ax25_dev) { s->ax25_dev = NULL; ... } Other syscall functions like ax25_getsockopt, ax25_getname, ax25_info_show also suffer from similar races. To fix them, this patch introduce lock_sock() into ax25_kill_by_device in order to guarantee that the nullify action in cleanup routine cannot proceed when another socket request is pending. Signed-off-by: Hanjie Wu Signed-off-by: Lin Ma Signed-off-by: David S. Miller --- net/ax25/af_ax25.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index 2f34bbdde0e8..cfca99e295b8 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -85,8 +85,10 @@ static void ax25_kill_by_device(struct net_device *dev) again: ax25_for_each(s, &ax25_list) { if (s->ax25_dev == ax25_dev) { - s->ax25_dev = NULL; spin_unlock_bh(&ax25_list_lock); + lock_sock(s->sk); + s->ax25_dev = NULL; + release_sock(s->sk); ax25_disconnect(s, ENETUNREACH); spin_lock_bh(&ax25_list_lock); From 60ec7fcfe76892a1479afab51ff17a4281923156 Mon Sep 17 00:00:00 2001 From: Jiasheng Jiang Date: Fri, 17 Dec 2021 17:39:11 +0800 Subject: [PATCH 640/868] qlcnic: potential dereference null pointer of rx_queue->page_ring The return value of kcalloc() needs to be checked. To avoid dereference of null pointer in case of the failure of alloc. Therefore, it might be better to change the return type of qlcnic_sriov_alloc_vlans() and return -ENOMEM when alloc fails and return 0 the others. Also, qlcnic_sriov_set_guest_vlan_mode() and __qlcnic_pci_sriov_enable() should deal with the return value of qlcnic_sriov_alloc_vlans(). Fixes: 154d0c810c53 ("qlcnic: VLAN enhancement for 84XX adapters") Signed-off-by: Jiasheng Jiang Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov.h | 2 +- .../net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c | 12 +++++++++--- drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_pf.c | 4 +++- 3 files changed, 13 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov.h b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov.h index 7160b42f51dd..d0111cb3b40e 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov.h +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov.h @@ -201,7 +201,7 @@ int qlcnic_sriov_get_vf_vport_info(struct qlcnic_adapter *, struct qlcnic_info *, u16); int qlcnic_sriov_cfg_vf_guest_vlan(struct qlcnic_adapter *, u16, u8); void qlcnic_sriov_free_vlans(struct qlcnic_adapter *); -void qlcnic_sriov_alloc_vlans(struct qlcnic_adapter *); +int qlcnic_sriov_alloc_vlans(struct qlcnic_adapter *); bool qlcnic_sriov_check_any_vlan(struct qlcnic_vf_info *); void qlcnic_sriov_del_vlan_id(struct qlcnic_sriov *, struct qlcnic_vf_info *, u16); diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c index dd03be3fc82a..42a44c97572a 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c @@ -432,7 +432,7 @@ static int qlcnic_sriov_set_guest_vlan_mode(struct qlcnic_adapter *adapter, struct qlcnic_cmd_args *cmd) { struct qlcnic_sriov *sriov = adapter->ahw->sriov; - int i, num_vlans; + int i, num_vlans, ret; u16 *vlans; if (sriov->allowed_vlans) @@ -443,7 +443,9 @@ static int qlcnic_sriov_set_guest_vlan_mode(struct qlcnic_adapter *adapter, dev_info(&adapter->pdev->dev, "Number of allowed Guest VLANs = %d\n", sriov->num_allowed_vlans); - qlcnic_sriov_alloc_vlans(adapter); + ret = qlcnic_sriov_alloc_vlans(adapter); + if (ret) + return ret; if (!sriov->any_vlan) return 0; @@ -2154,7 +2156,7 @@ static int qlcnic_sriov_vf_resume(struct qlcnic_adapter *adapter) return err; } -void qlcnic_sriov_alloc_vlans(struct qlcnic_adapter *adapter) +int qlcnic_sriov_alloc_vlans(struct qlcnic_adapter *adapter) { struct qlcnic_sriov *sriov = adapter->ahw->sriov; struct qlcnic_vf_info *vf; @@ -2164,7 +2166,11 @@ void qlcnic_sriov_alloc_vlans(struct qlcnic_adapter *adapter) vf = &sriov->vf_info[i]; vf->sriov_vlans = kcalloc(sriov->num_allowed_vlans, sizeof(*vf->sriov_vlans), GFP_KERNEL); + if (!vf->sriov_vlans) + return -ENOMEM; } + + return 0; } void qlcnic_sriov_free_vlans(struct qlcnic_adapter *adapter) diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_pf.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_pf.c index 447720b93e5a..e90fa97c0ae6 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_pf.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_pf.c @@ -597,7 +597,9 @@ static int __qlcnic_pci_sriov_enable(struct qlcnic_adapter *adapter, if (err) goto del_flr_queue; - qlcnic_sriov_alloc_vlans(adapter); + err = qlcnic_sriov_alloc_vlans(adapter); + if (err) + goto del_flr_queue; return err; From 53b1119a6e5028b125f431a0116ba73510d82a72 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 16 Dec 2021 11:12:11 -0500 Subject: [PATCH 641/868] NFSD: Fix READDIR buffer overflow If a client sends a READDIR count argument that is too small (say, zero), then the buffer size calculation in the new init_dirlist helper functions results in an underflow, allowing the XDR stream functions to write beyond the actual buffer. This calculation has always been suspect. NFSD has never sanity- checked the READDIR count argument, but the old entry encoders managed the problem correctly. With the commits below, entry encoding changed, exposing the underflow to the pointer arithmetic in xdr_reserve_space(). Modern NFS clients attempt to retrieve as much data as possible for each READDIR request. Also, we have no unit tests that exercise the behavior of READDIR at the lower bound of @count values. Thus this case was missed during testing. Reported-by: Anatoly Trosinenko Fixes: f5dcccd647da ("NFSD: Update the NFSv2 READDIR entry encoder to use struct xdr_stream") Fixes: 7f87fc2d34d4 ("NFSD: Update NFSv3 READDIR entry encoders to use struct xdr_stream") Signed-off-by: Chuck Lever --- fs/nfsd/nfs3proc.c | 11 ++++------- fs/nfsd/nfsproc.c | 8 ++++---- 2 files changed, 8 insertions(+), 11 deletions(-) diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c index 4418517f6f12..15dac36ca852 100644 --- a/fs/nfsd/nfs3proc.c +++ b/fs/nfsd/nfs3proc.c @@ -438,22 +438,19 @@ nfsd3_proc_link(struct svc_rqst *rqstp) static void nfsd3_init_dirlist_pages(struct svc_rqst *rqstp, struct nfsd3_readdirres *resp, - int count) + u32 count) { struct xdr_buf *buf = &resp->dirlist; struct xdr_stream *xdr = &resp->xdr; - count = min_t(u32, count, svc_max_payload(rqstp)); + count = clamp(count, (u32)(XDR_UNIT * 2), svc_max_payload(rqstp)); memset(buf, 0, sizeof(*buf)); /* Reserve room for the NULL ptr & eof flag (-2 words) */ buf->buflen = count - XDR_UNIT * 2; buf->pages = rqstp->rq_next_page; - while (count > 0) { - rqstp->rq_next_page++; - count -= PAGE_SIZE; - } + rqstp->rq_next_page += (buf->buflen + PAGE_SIZE - 1) >> PAGE_SHIFT; /* This is xdr_init_encode(), but it assumes that * the head kvec has already been consumed. */ @@ -462,7 +459,7 @@ static void nfsd3_init_dirlist_pages(struct svc_rqst *rqstp, xdr->page_ptr = buf->pages; xdr->iov = NULL; xdr->p = page_address(*buf->pages); - xdr->end = xdr->p + (PAGE_SIZE >> 2); + xdr->end = (void *)xdr->p + min_t(u32, buf->buflen, PAGE_SIZE); xdr->rqst = NULL; } diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c index eea5b59b6a6c..de282f3273c5 100644 --- a/fs/nfsd/nfsproc.c +++ b/fs/nfsd/nfsproc.c @@ -556,17 +556,17 @@ nfsd_proc_rmdir(struct svc_rqst *rqstp) static void nfsd_init_dirlist_pages(struct svc_rqst *rqstp, struct nfsd_readdirres *resp, - int count) + u32 count) { struct xdr_buf *buf = &resp->dirlist; struct xdr_stream *xdr = &resp->xdr; - count = min_t(u32, count, PAGE_SIZE); + count = clamp(count, (u32)(XDR_UNIT * 2), svc_max_payload(rqstp)); memset(buf, 0, sizeof(*buf)); /* Reserve room for the NULL ptr & eof flag (-2 words) */ - buf->buflen = count - sizeof(__be32) * 2; + buf->buflen = count - XDR_UNIT * 2; buf->pages = rqstp->rq_next_page; rqstp->rq_next_page++; @@ -577,7 +577,7 @@ static void nfsd_init_dirlist_pages(struct svc_rqst *rqstp, xdr->page_ptr = buf->pages; xdr->iov = NULL; xdr->p = page_address(*buf->pages); - xdr->end = xdr->p + (PAGE_SIZE >> 2); + xdr->end = (void *)xdr->p + min_t(u32, buf->buflen, PAGE_SIZE); xdr->rqst = NULL; } From 9a5875f14b0e3a13ae314883f1bb72b7f31fac07 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Noralf=20Tr=C3=B8nnes?= Date: Mon, 18 Oct 2021 13:22:01 +0200 Subject: [PATCH 642/868] gpio: dln2: Fix interrupts when replugging the device MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When replugging the device the following message shows up: gpio gpiochip2: (dln2): detected irqchip that is shared with multiple gpiochips: please fix the driver. This also has the effect that interrupts won't work. The same problem would also show up if multiple devices where plugged in. Fix this by allocating the irq_chip data structure per instance like other drivers do. I don't know when this problem appeared, but it is present in 5.10. Cc: # 5.10+ Cc: Daniel Baluta Signed-off-by: Noralf Trønnes Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-dln2.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/drivers/gpio/gpio-dln2.c b/drivers/gpio/gpio-dln2.c index 026903e3ef54..08b9e2cf4f2d 100644 --- a/drivers/gpio/gpio-dln2.c +++ b/drivers/gpio/gpio-dln2.c @@ -46,6 +46,7 @@ struct dln2_gpio { struct platform_device *pdev; struct gpio_chip gpio; + struct irq_chip irqchip; /* * Cache pin direction to save us one transfer, since the hardware has @@ -383,15 +384,6 @@ static void dln2_irq_bus_unlock(struct irq_data *irqd) mutex_unlock(&dln2->irq_lock); } -static struct irq_chip dln2_gpio_irqchip = { - .name = "dln2-irq", - .irq_mask = dln2_irq_mask, - .irq_unmask = dln2_irq_unmask, - .irq_set_type = dln2_irq_set_type, - .irq_bus_lock = dln2_irq_bus_lock, - .irq_bus_sync_unlock = dln2_irq_bus_unlock, -}; - static void dln2_gpio_event(struct platform_device *pdev, u16 echo, const void *data, int len) { @@ -473,8 +465,15 @@ static int dln2_gpio_probe(struct platform_device *pdev) dln2->gpio.direction_output = dln2_gpio_direction_output; dln2->gpio.set_config = dln2_gpio_set_config; + dln2->irqchip.name = "dln2-irq", + dln2->irqchip.irq_mask = dln2_irq_mask, + dln2->irqchip.irq_unmask = dln2_irq_unmask, + dln2->irqchip.irq_set_type = dln2_irq_set_type, + dln2->irqchip.irq_bus_lock = dln2_irq_bus_lock, + dln2->irqchip.irq_bus_sync_unlock = dln2_irq_bus_unlock, + girq = &dln2->gpio.irq; - girq->chip = &dln2_gpio_irqchip; + girq->chip = &dln2->irqchip; /* The event comes from the outside so no parent handler */ girq->parent_handler = NULL; girq->num_parents = 0; From 87959fa16cfbcf76245c11559db1940069621274 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sun, 19 Dec 2021 07:58:44 -0700 Subject: [PATCH 643/868] Revert "block: reduce kblockd_mod_delayed_work_on() CPU consumption" This reverts commit cb2ac2912a9ca7d3d26291c511939a41361d2d83. Alex and the kernel test robot report that this causes a significant performance regression with BFQ. I can reproduce that result, so let's revert this one as we're close to -rc6 and we there's no point in trying to rush a fix. Link: https://lore.kernel.org/linux-block/1639853092.524jxfaem2.none@localhost/ Link: https://lore.kernel.org/lkml/20211219141852.GH14057@xsang-OptiPlex-9020/ Reported-by: Alex Xu (Hello71) Reported-by: kernel test robot Signed-off-by: Jens Axboe --- block/blk-core.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/block/blk-core.c b/block/blk-core.c index c1833f95cb97..1378d084c770 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1484,8 +1484,6 @@ EXPORT_SYMBOL(kblockd_schedule_work); int kblockd_mod_delayed_work_on(int cpu, struct delayed_work *dwork, unsigned long delay) { - if (!delay) - return queue_work_on(cpu, kblockd_workqueue, &dwork->work); return mod_delayed_work_on(cpu, kblockd_workqueue, dwork, delay); } EXPORT_SYMBOL(kblockd_mod_delayed_work_on); From 57690554abe135fee81d6ac33cc94d75a7e224bb Mon Sep 17 00:00:00 2001 From: Andrew Cooper Date: Thu, 16 Dec 2021 00:08:56 +0000 Subject: [PATCH 644/868] x86/pkey: Fix undefined behaviour with PKRU_WD_BIT Both __pkru_allows_write() and arch_set_user_pkey_access() shift PKRU_WD_BIT (a signed constant) by up to 30 bits, hitting the sign bit. Use unsigned constants instead. Clearly pkey 15 has not been used in combination with UBSAN yet. Noticed by code inspection only. I can't actually provoke the compiler into generating incorrect logic as far as this shift is concerned. [ dhansen: add stable@ tag, plus minor changelog massaging, For anyone doing backports, these #defines were in arch/x86/include/asm/pgtable.h before 784a46618f6. ] Fixes: 33a709b25a76 ("mm/gup, x86/mm/pkeys: Check VMAs and PTEs for protection keys") Signed-off-by: Andrew Cooper Signed-off-by: Dave Hansen Signed-off-by: Borislav Petkov Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20211216000856.4480-1-andrew.cooper3@citrix.com --- arch/x86/include/asm/pkru.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/pkru.h b/arch/x86/include/asm/pkru.h index 4cd49afa0ca4..74f0a2d34ffd 100644 --- a/arch/x86/include/asm/pkru.h +++ b/arch/x86/include/asm/pkru.h @@ -4,8 +4,8 @@ #include -#define PKRU_AD_BIT 0x1 -#define PKRU_WD_BIT 0x2 +#define PKRU_AD_BIT 0x1u +#define PKRU_WD_BIT 0x2u #define PKRU_BITS_PER_PKEY 2 #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS From a7904a538933c525096ca2ccde1e60d0ee62c08e Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 19 Dec 2021 14:14:33 -0800 Subject: [PATCH 645/868] Linux 5.16-rc6 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 765115c99655..d85f1ff79f5c 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 5 PATCHLEVEL = 16 SUBLEVEL = 0 -EXTRAVERSION = -rc5 +EXTRAVERSION = -rc6 NAME = Gobble Gobble # *DOCUMENTATION* From 4ebfee2bbc1a9c343dd50565ba5ae249fac32267 Mon Sep 17 00:00:00 2001 From: Johnny Chuang Date: Mon, 20 Dec 2021 00:28:45 -0800 Subject: [PATCH 646/868] Input: elants_i2c - do not check Remark ID on eKTH3900/eKTH5312 The eKTH3900/eKTH5312 series do not support the firmware update rules of Remark ID. Exclude these two series from checking it when updating the firmware in touch controllers. Signed-off-by: Johnny Chuang Link: https://lore.kernel.org/r/1639619603-20616-1-git-send-email-johnny.chuang.emc@gmail.com Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/elants_i2c.c | 46 +++++++++++++++++++++++++- 1 file changed, 45 insertions(+), 1 deletion(-) diff --git a/drivers/input/touchscreen/elants_i2c.c b/drivers/input/touchscreen/elants_i2c.c index 7e13a66a8a95..879a4d984c90 100644 --- a/drivers/input/touchscreen/elants_i2c.c +++ b/drivers/input/touchscreen/elants_i2c.c @@ -117,6 +117,19 @@ #define ELAN_POWERON_DELAY_USEC 500 #define ELAN_RESET_DELAY_MSEC 20 +/* FW boot code version */ +#define BC_VER_H_BYTE_FOR_EKTH3900x1_I2C 0x72 +#define BC_VER_H_BYTE_FOR_EKTH3900x2_I2C 0x82 +#define BC_VER_H_BYTE_FOR_EKTH3900x3_I2C 0x92 +#define BC_VER_H_BYTE_FOR_EKTH5312x1_I2C 0x6D +#define BC_VER_H_BYTE_FOR_EKTH5312x2_I2C 0x6E +#define BC_VER_H_BYTE_FOR_EKTH5312cx1_I2C 0x77 +#define BC_VER_H_BYTE_FOR_EKTH5312cx2_I2C 0x78 +#define BC_VER_H_BYTE_FOR_EKTH5312x1_I2C_USB 0x67 +#define BC_VER_H_BYTE_FOR_EKTH5312x2_I2C_USB 0x68 +#define BC_VER_H_BYTE_FOR_EKTH5312cx1_I2C_USB 0x74 +#define BC_VER_H_BYTE_FOR_EKTH5312cx2_I2C_USB 0x75 + enum elants_chip_id { EKTH3500, EKTF3624, @@ -736,6 +749,37 @@ static int elants_i2c_validate_remark_id(struct elants_data *ts, return 0; } +static bool elants_i2c_should_check_remark_id(struct elants_data *ts) +{ + struct i2c_client *client = ts->client; + const u8 bootcode_version = ts->iap_version; + bool check; + + /* I2C eKTH3900 and eKTH5312 are NOT support Remark ID */ + if ((bootcode_version == BC_VER_H_BYTE_FOR_EKTH3900x1_I2C) || + (bootcode_version == BC_VER_H_BYTE_FOR_EKTH3900x2_I2C) || + (bootcode_version == BC_VER_H_BYTE_FOR_EKTH3900x3_I2C) || + (bootcode_version == BC_VER_H_BYTE_FOR_EKTH5312x1_I2C) || + (bootcode_version == BC_VER_H_BYTE_FOR_EKTH5312x2_I2C) || + (bootcode_version == BC_VER_H_BYTE_FOR_EKTH5312cx1_I2C) || + (bootcode_version == BC_VER_H_BYTE_FOR_EKTH5312cx2_I2C) || + (bootcode_version == BC_VER_H_BYTE_FOR_EKTH5312x1_I2C_USB) || + (bootcode_version == BC_VER_H_BYTE_FOR_EKTH5312x2_I2C_USB) || + (bootcode_version == BC_VER_H_BYTE_FOR_EKTH5312cx1_I2C_USB) || + (bootcode_version == BC_VER_H_BYTE_FOR_EKTH5312cx2_I2C_USB)) { + dev_dbg(&client->dev, + "eKTH3900/eKTH5312(0x%02x) are not support remark id\n", + bootcode_version); + check = false; + } else if (bootcode_version >= 0x60) { + check = true; + } else { + check = false; + } + + return check; +} + static int elants_i2c_do_update_firmware(struct i2c_client *client, const struct firmware *fw, bool force) @@ -749,7 +793,7 @@ static int elants_i2c_do_update_firmware(struct i2c_client *client, u16 send_id; int page, n_fw_pages; int error; - bool check_remark_id = ts->iap_version >= 0x60; + bool check_remark_id = elants_i2c_should_check_remark_id(ts); /* Recovery mode detection! */ if (force) { From 66c915d09b942fb3b2b0cb2f56562180901fba17 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Fri, 3 Dec 2021 15:15:54 +0100 Subject: [PATCH 647/868] mmc: core: Disable card detect during shutdown It's seems prone to problems by allowing card detect and its corresponding mmc_rescan() work to run, during platform shutdown. For example, we may end up turning off the power while initializing a card, which potentially could damage it. To avoid this scenario, let's add ->shutdown_pre() callback for the mmc host class device and then turn of the card detect from there. Reported-by: Al Cooper Suggested-by: Adrian Hunter Signed-off-by: Ulf Hansson Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20211203141555.105351-1-ulf.hansson@linaro.org --- drivers/mmc/core/core.c | 7 ++++++- drivers/mmc/core/core.h | 1 + drivers/mmc/core/host.c | 9 +++++++++ 3 files changed, 16 insertions(+), 1 deletion(-) diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c index 240c5af793dc..368f10405e13 100644 --- a/drivers/mmc/core/core.c +++ b/drivers/mmc/core/core.c @@ -2264,7 +2264,7 @@ void mmc_start_host(struct mmc_host *host) _mmc_detect_change(host, 0, false); } -void mmc_stop_host(struct mmc_host *host) +void __mmc_stop_host(struct mmc_host *host) { if (host->slot.cd_irq >= 0) { mmc_gpio_set_cd_wake(host, false); @@ -2273,6 +2273,11 @@ void mmc_stop_host(struct mmc_host *host) host->rescan_disable = 1; cancel_delayed_work_sync(&host->detect); +} + +void mmc_stop_host(struct mmc_host *host) +{ + __mmc_stop_host(host); /* clear pm flags now and let card drivers set them as needed */ host->pm_flags = 0; diff --git a/drivers/mmc/core/core.h b/drivers/mmc/core/core.h index 7931a4f0137d..f5f3f623ea49 100644 --- a/drivers/mmc/core/core.h +++ b/drivers/mmc/core/core.h @@ -70,6 +70,7 @@ static inline void mmc_delay(unsigned int ms) void mmc_rescan(struct work_struct *work); void mmc_start_host(struct mmc_host *host); +void __mmc_stop_host(struct mmc_host *host); void mmc_stop_host(struct mmc_host *host); void _mmc_detect_change(struct mmc_host *host, unsigned long delay, diff --git a/drivers/mmc/core/host.c b/drivers/mmc/core/host.c index d4683b1d263f..cf140f4ec864 100644 --- a/drivers/mmc/core/host.c +++ b/drivers/mmc/core/host.c @@ -80,9 +80,18 @@ static void mmc_host_classdev_release(struct device *dev) kfree(host); } +static int mmc_host_classdev_shutdown(struct device *dev) +{ + struct mmc_host *host = cls_dev_to_mmc_host(dev); + + __mmc_stop_host(host); + return 0; +} + static struct class mmc_host_class = { .name = "mmc_host", .dev_release = mmc_host_classdev_release, + .shutdown_pre = mmc_host_classdev_shutdown, .pm = MMC_HOST_CLASS_DEV_PM_OPS, }; From f89b548ca66be7500dcd92ee8e61590f7d08ac91 Mon Sep 17 00:00:00 2001 From: Martin Blumenstingl Date: Sun, 19 Dec 2021 16:34:41 +0100 Subject: [PATCH 648/868] mmc: meson-mx-sdhc: Set MANUAL_STOP for multi-block SDIO commands The vendor driver implements special handling for multi-block SD_IO_RW_EXTENDED (and SD_IO_RW_DIRECT) commands which have data attached to them. It sets the MANUAL_STOP bit in the MESON_SDHC_MISC register for these commands. In all other cases this bit is cleared. Here we omit SD_IO_RW_DIRECT since that command never has any data attached to it. This fixes SDIO wifi using the brcmfmac driver which reported the following error without this change on a Netxeon S82 board using a Meson8 (S802) SoC: brcmf_fw_alloc_request: using brcm/brcmfmac43362-sdio for chip BCM43362/1 brcmf_sdiod_ramrw: membytes transfer failed brcmf_sdio_download_code_file: error -110 on writing 219557 membytes at 0x00000000 brcmf_sdio_download_firmware: dongle image file download failed And with this change: brcmf_fw_alloc_request: using brcm/brcmfmac43362-sdio for chip BCM43362/1 brcmf_c_process_clm_blob: no clm_blob available (err=-2), device may have limited channels available brcmf_c_preinit_dcmds: Firmware: BCM43362/1 wl0: Apr 22 2013 14:50:00 version 5.90.195.89.6 FWID 01-b30a427d Fixes: e4bf1b0970ef96 ("mmc: host: meson-mx-sdhc: new driver for the Amlogic Meson SDHC host") Signed-off-by: Martin Blumenstingl Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20211219153442.463863-2-martin.blumenstingl@googlemail.com Signed-off-by: Ulf Hansson --- drivers/mmc/host/meson-mx-sdhc-mmc.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/mmc/host/meson-mx-sdhc-mmc.c b/drivers/mmc/host/meson-mx-sdhc-mmc.c index 7cd9c0ec2fcf..8fdd0bbbfa21 100644 --- a/drivers/mmc/host/meson-mx-sdhc-mmc.c +++ b/drivers/mmc/host/meson-mx-sdhc-mmc.c @@ -135,6 +135,7 @@ static void meson_mx_sdhc_start_cmd(struct mmc_host *mmc, struct mmc_command *cmd) { struct meson_mx_sdhc_host *host = mmc_priv(mmc); + bool manual_stop = false; u32 ictl, send; int pack_len; @@ -172,12 +173,27 @@ static void meson_mx_sdhc_start_cmd(struct mmc_host *mmc, else /* software flush: */ ictl |= MESON_SDHC_ICTL_DATA_XFER_OK; + + /* + * Mimic the logic from the vendor driver where (only) + * SD_IO_RW_EXTENDED commands with more than one block set the + * MESON_SDHC_MISC_MANUAL_STOP bit. This fixes the firmware + * download in the brcmfmac driver for a BCM43362/1 card. + * Without this sdio_memcpy_toio() (with a size of 219557 + * bytes) times out if MESON_SDHC_MISC_MANUAL_STOP is not set. + */ + manual_stop = cmd->data->blocks > 1 && + cmd->opcode == SD_IO_RW_EXTENDED; } else { pack_len = 0; ictl |= MESON_SDHC_ICTL_RESP_OK; } + regmap_update_bits(host->regmap, MESON_SDHC_MISC, + MESON_SDHC_MISC_MANUAL_STOP, + manual_stop ? MESON_SDHC_MISC_MANUAL_STOP : 0); + if (cmd->opcode == MMC_STOP_TRANSMISSION) send |= MESON_SDHC_SEND_DATA_STOP; From 93a2207c254ca102ebbdae47b00f19bbfbfa7ecd Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Mon, 20 Dec 2021 10:51:20 +0100 Subject: [PATCH 649/868] HID: holtek: fix mouse probing An overlook from the previous commit: we don't even parse or start the device, meaning that the device is not presented to user space. Fixes: 93020953d0fa ("HID: check for valid USB device for many HID drivers") Cc: stable@vger.kernel.org Link: https://bugs.archlinux.org/task/73048 Link: https://bugzilla.kernel.org/show_bug.cgi?id=215341 Link: https://lore.kernel.org/r/e4efbf13-bd8d-0370-629b-6c80c0044b15@leemhuis.info/ Signed-off-by: Benjamin Tissoires --- drivers/hid/hid-holtek-mouse.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/drivers/hid/hid-holtek-mouse.c b/drivers/hid/hid-holtek-mouse.c index b7172c48ef9f..7c907939bfae 100644 --- a/drivers/hid/hid-holtek-mouse.c +++ b/drivers/hid/hid-holtek-mouse.c @@ -65,8 +65,23 @@ static __u8 *holtek_mouse_report_fixup(struct hid_device *hdev, __u8 *rdesc, static int holtek_mouse_probe(struct hid_device *hdev, const struct hid_device_id *id) { + int ret; + if (!hid_is_usb(hdev)) return -EINVAL; + + ret = hid_parse(hdev); + if (ret) { + hid_err(hdev, "hid parse failed: %d\n", ret); + return ret; + } + + ret = hid_hw_start(hdev, HID_CONNECT_DEFAULT); + if (ret) { + hid_err(hdev, "hw start failed: %d\n", ret); + return ret; + } + return 0; } From 13251ce1dd9bb525da2becb9b26fdfb94ca58659 Mon Sep 17 00:00:00 2001 From: Jiasheng Jiang Date: Wed, 15 Dec 2021 16:36:05 +0800 Subject: [PATCH 650/868] HID: potential dereference of null pointer The return value of devm_kzalloc() needs to be checked. To avoid hdev->dev->driver_data to be null in case of the failure of alloc. Fixes: 14c9c014babe ("HID: add vivaldi HID driver") Cc: stable@vger.kernel.org Signed-off-by: Jiasheng Jiang Signed-off-by: Benjamin Tissoires Link: https://lore.kernel.org/r/20211215083605.117638-1-jiasheng@iscas.ac.cn --- drivers/hid/hid-vivaldi.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/hid/hid-vivaldi.c b/drivers/hid/hid-vivaldi.c index cd7ada48b1d9..72957a9f7117 100644 --- a/drivers/hid/hid-vivaldi.c +++ b/drivers/hid/hid-vivaldi.c @@ -57,6 +57,9 @@ static int vivaldi_probe(struct hid_device *hdev, int ret; drvdata = devm_kzalloc(&hdev->dev, sizeof(*drvdata), GFP_KERNEL); + if (!drvdata) + return -ENOMEM; + hid_set_drvdata(hdev, drvdata); ret = hid_parse(hdev); From 87a270625a89fc841f1a7e21aae6176543d8385c Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 20 Dec 2021 10:22:40 +0100 Subject: [PATCH 651/868] mac80211: fix locking in ieee80211_start_ap error path We need to hold the local->mtx to release the channel context, as even encoded by the lockdep_assert_held() there. Fix it. Cc: stable@vger.kernel.org Fixes: 295b02c4be74 ("mac80211: Add FILS discovery support") Reported-and-tested-by: syzbot+11c342e5e30e9539cabd@syzkaller.appspotmail.com Link: https://lore.kernel.org/r/20211220090836.cee3d59a1915.I36bba9b79dc2ff4d57c3c7aa30dff9a003fe8c5c@changeid Signed-off-by: Johannes Berg Signed-off-by: David S. Miller --- net/mac80211/cfg.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index bd3d3195097f..2d0dd69f9753 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1264,7 +1264,10 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev, return 0; error: + mutex_lock(&local->mtx); ieee80211_vif_release_channel(sdata); + mutex_unlock(&local->mtx); + return err; } From 662f11d55ffd02933e1bd275d732b97eddccf870 Mon Sep 17 00:00:00 2001 From: Sean Anderson Date: Fri, 17 Dec 2021 12:42:31 -0500 Subject: [PATCH 652/868] docs: networking: dpaa2: Fix DPNI header The DPNI object should get its own header, like the rest of the objects. Fixes: 60b91319a349 ("staging: fsl-mc: Convert documentation to rst format") Signed-off-by: Sean Anderson Signed-off-by: David S. Miller --- .../device_drivers/ethernet/freescale/dpaa2/overview.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/networking/device_drivers/ethernet/freescale/dpaa2/overview.rst b/Documentation/networking/device_drivers/ethernet/freescale/dpaa2/overview.rst index d638b5a8aadd..199647729251 100644 --- a/Documentation/networking/device_drivers/ethernet/freescale/dpaa2/overview.rst +++ b/Documentation/networking/device_drivers/ethernet/freescale/dpaa2/overview.rst @@ -183,6 +183,7 @@ PHY and allows physical transmission and reception of Ethernet frames. IRQ config, enable, reset DPNI (Datapath Network Interface) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Contains TX/RX queues, network interface configuration, and RX buffer pool configuration mechanisms. The TX/RX queues are in memory and are identified by queue number. From 75a2f31520095600f650597c0ac41f48b5ba0068 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Denis-Courmont?= Date: Sun, 19 Dec 2021 19:03:39 +0200 Subject: [PATCH 653/868] phonet/pep: refuse to enable an unbound pipe MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This ioctl() implicitly assumed that the socket was already bound to a valid local socket name, i.e. Phonet object. If the socket was not bound, two separate problems would occur: 1) We'd send an pipe enablement request with an invalid source object. 2) Later socket calls could BUG on the socket unexpectedly being connected yet not bound to a valid object. Reported-by: syzbot+2dc91e7fc3dea88b1e8a@syzkaller.appspotmail.com Signed-off-by: Rémi Denis-Courmont Signed-off-by: David S. Miller --- net/phonet/pep.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/phonet/pep.c b/net/phonet/pep.c index b4f90afb0638..65d463ad8770 100644 --- a/net/phonet/pep.c +++ b/net/phonet/pep.c @@ -947,6 +947,8 @@ static int pep_ioctl(struct sock *sk, int cmd, unsigned long arg) ret = -EBUSY; else if (sk->sk_state == TCP_ESTABLISHED) ret = -EISCONN; + else if (!pn->pn_sk.sobject) + ret = -EADDRNOTAVAIL; else ret = pep_sock_enable(sk, NULL, 0); release_sock(sk); From 64d16aca3d4f130f35bbf1120e15f58a62f743d5 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Tue, 14 Dec 2021 09:04:54 -0800 Subject: [PATCH 654/868] drm/i915/guc: Use correct context lock when callig clr_context_registered s/ce/cn/ when grabbing guc_state.lock before calling clr_context_registered. Fixes: 0f7976506de61 ("drm/i915/guc: Rework and simplify locking") Signed-off-by: Matthew Brost Reviewed-by: Daniele Ceraolo Spurio Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20211214170500.28569-2-matthew.brost@intel.com (cherry picked from commit b25db8c782ad7ae80d4cea2a09c222f4f8980bb9) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index c48557dfa04c..c50039a5ba1e 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -1664,9 +1664,9 @@ static int steal_guc_id(struct intel_guc *guc, struct intel_context *ce) list_del_init(&cn->guc_id.link); ce->guc_id = cn->guc_id; - spin_lock(&ce->guc_state.lock); + spin_lock(&cn->guc_state.lock); clr_context_registered(cn); - spin_unlock(&ce->guc_state.lock); + spin_unlock(&cn->guc_state.lock); set_context_guc_id_invalid(cn); From 7807bf28fe02a76bf112916c6b9194f282f5e43c Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Tue, 14 Dec 2021 09:04:55 -0800 Subject: [PATCH 655/868] drm/i915/guc: Only assign guc_id.id when stealing guc_id Previously assigned whole guc_id structure (list, spin lock) which is incorrect, only assign the guc_id.id. Fixes: 0f7976506de61 ("drm/i915/guc: Rework and simplify locking") Signed-off-by: Matthew Brost Reviewed-by: John Harrison Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20211214170500.28569-3-matthew.brost@intel.com (cherry picked from commit 939d8e9c87e704fd5437e2c8b80929591fe540eb) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index c50039a5ba1e..302e9ff0602c 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -1662,7 +1662,7 @@ static int steal_guc_id(struct intel_guc *guc, struct intel_context *ce) GEM_BUG_ON(intel_context_is_parent(cn)); list_del_init(&cn->guc_id.link); - ce->guc_id = cn->guc_id; + ce->guc_id.id = cn->guc_id.id; spin_lock(&cn->guc_state.lock); clr_context_registered(cn); From 484730e5862f6b872dca13840bed40fd7c60fa26 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Wed, 8 Dec 2021 11:06:52 +0100 Subject: [PATCH 656/868] parisc: Clear stale IIR value on instruction access rights trap When a trap 7 (Instruction access rights) occurs, this means the CPU couldn't execute an instruction due to missing execute permissions on the memory region. In this case it seems the CPU didn't even fetched the instruction from memory and thus did not store it in the cr19 (IIR) register before calling the trap handler. So, the trap handler will find some random old stale value in cr19. This patch simply overwrites the stale IIR value with a constant magic "bad food" value (0xbaadf00d), in the hope people don't start to try to understand the various random IIR values in trap 7 dumps. Noticed-by: John David Anglin Signed-off-by: Helge Deller --- arch/parisc/kernel/traps.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/parisc/kernel/traps.c b/arch/parisc/kernel/traps.c index b11fb26ce299..892b7fc8f3c4 100644 --- a/arch/parisc/kernel/traps.c +++ b/arch/parisc/kernel/traps.c @@ -730,6 +730,8 @@ void notrace handle_interruption(int code, struct pt_regs *regs) } mmap_read_unlock(current->mm); } + /* CPU could not fetch instruction, so clear stale IIR value. */ + regs->iir = 0xbaadf00d; fallthrough; case 27: /* Data memory protection ID trap */ From 8d84fca4375e3c35dadc16b8c7eee6821b2a575c Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Fri, 3 Dec 2021 23:41:12 +1100 Subject: [PATCH 657/868] powerpc/ptdump: Fix DEBUG_WX since generic ptdump conversion In note_prot_wx() we bail out without reporting anything if CONFIG_PPC_DEBUG_WX is disabled. But CONFIG_PPC_DEBUG_WX was removed in the conversion to generic ptdump, we now need to use CONFIG_DEBUG_WX instead. Fixes: e084728393a5 ("powerpc/ptdump: Convert powerpc to GENERIC_PTDUMP") Cc: stable@vger.kernel.org # v5.15+ Signed-off-by: Michael Ellerman Reviewed-by: Christophe Leroy Link: https://lore.kernel.org/r/20211203124112.2912562-1-mpe@ellerman.id.au --- arch/powerpc/mm/ptdump/ptdump.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/mm/ptdump/ptdump.c b/arch/powerpc/mm/ptdump/ptdump.c index bf251191e78d..32bfb215c485 100644 --- a/arch/powerpc/mm/ptdump/ptdump.c +++ b/arch/powerpc/mm/ptdump/ptdump.c @@ -183,7 +183,7 @@ static void note_prot_wx(struct pg_state *st, unsigned long addr) { pte_t pte = __pte(st->current_flags); - if (!IS_ENABLED(CONFIG_PPC_DEBUG_WX) || !st->check_wx) + if (!IS_ENABLED(CONFIG_DEBUG_WX) || !st->check_wx) return; if (!pte_write(pte) || !pte_exec(pte)) From 8f905c0e7354ef261360fb7535ea079b1082c105 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 20 Dec 2021 06:33:30 -0800 Subject: [PATCH 658/868] inet: fully convert sk->sk_rx_dst to RCU rules syzbot reported various issues around early demux, one being included in this changelog [1] sk->sk_rx_dst is using RCU protection without clearly documenting it. And following sequences in tcp_v4_do_rcv()/tcp_v6_do_rcv() are not following standard RCU rules. [a] dst_release(dst); [b] sk->sk_rx_dst = NULL; They look wrong because a delete operation of RCU protected pointer is supposed to clear the pointer before the call_rcu()/synchronize_rcu() guarding actual memory freeing. In some cases indeed, dst could be freed before [b] is done. We could cheat by clearing sk_rx_dst before calling dst_release(), but this seems the right time to stick to standard RCU annotations and debugging facilities. [1] BUG: KASAN: use-after-free in dst_check include/net/dst.h:470 [inline] BUG: KASAN: use-after-free in tcp_v4_early_demux+0x95b/0x960 net/ipv4/tcp_ipv4.c:1792 Read of size 2 at addr ffff88807f1cb73a by task syz-executor.5/9204 CPU: 0 PID: 9204 Comm: syz-executor.5 Not tainted 5.16.0-rc5-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0xcd/0x134 lib/dump_stack.c:106 print_address_description.constprop.0.cold+0x8d/0x320 mm/kasan/report.c:247 __kasan_report mm/kasan/report.c:433 [inline] kasan_report.cold+0x83/0xdf mm/kasan/report.c:450 dst_check include/net/dst.h:470 [inline] tcp_v4_early_demux+0x95b/0x960 net/ipv4/tcp_ipv4.c:1792 ip_rcv_finish_core.constprop.0+0x15de/0x1e80 net/ipv4/ip_input.c:340 ip_list_rcv_finish.constprop.0+0x1b2/0x6e0 net/ipv4/ip_input.c:583 ip_sublist_rcv net/ipv4/ip_input.c:609 [inline] ip_list_rcv+0x34e/0x490 net/ipv4/ip_input.c:644 __netif_receive_skb_list_ptype net/core/dev.c:5508 [inline] __netif_receive_skb_list_core+0x549/0x8e0 net/core/dev.c:5556 __netif_receive_skb_list net/core/dev.c:5608 [inline] netif_receive_skb_list_internal+0x75e/0xd80 net/core/dev.c:5699 gro_normal_list net/core/dev.c:5853 [inline] gro_normal_list net/core/dev.c:5849 [inline] napi_complete_done+0x1f1/0x880 net/core/dev.c:6590 virtqueue_napi_complete drivers/net/virtio_net.c:339 [inline] virtnet_poll+0xca2/0x11b0 drivers/net/virtio_net.c:1557 __napi_poll+0xaf/0x440 net/core/dev.c:7023 napi_poll net/core/dev.c:7090 [inline] net_rx_action+0x801/0xb40 net/core/dev.c:7177 __do_softirq+0x29b/0x9c2 kernel/softirq.c:558 invoke_softirq kernel/softirq.c:432 [inline] __irq_exit_rcu+0x123/0x180 kernel/softirq.c:637 irq_exit_rcu+0x5/0x20 kernel/softirq.c:649 common_interrupt+0x52/0xc0 arch/x86/kernel/irq.c:240 asm_common_interrupt+0x1e/0x40 arch/x86/include/asm/idtentry.h:629 RIP: 0033:0x7f5e972bfd57 Code: 39 d1 73 14 0f 1f 80 00 00 00 00 48 8b 50 f8 48 83 e8 08 48 39 ca 77 f3 48 39 c3 73 3e 48 89 13 48 8b 50 f8 48 89 38 49 8b 0e <48> 8b 3e 48 83 c3 08 48 83 c6 08 eb bc 48 39 d1 72 9e 48 39 d0 73 RSP: 002b:00007fff8a413210 EFLAGS: 00000283 RAX: 00007f5e97108990 RBX: 00007f5e97108338 RCX: ffffffff81d3aa45 RDX: ffffffff81d3aa45 RSI: 00007f5e97108340 RDI: ffffffff81d3aa45 RBP: 00007f5e97107eb8 R08: 00007f5e97108d88 R09: 0000000093c2e8d9 R10: 0000000000000000 R11: 0000000000000000 R12: 00007f5e97107eb0 R13: 00007f5e97108338 R14: 00007f5e97107ea8 R15: 0000000000000019 Allocated by task 13: kasan_save_stack+0x1e/0x50 mm/kasan/common.c:38 kasan_set_track mm/kasan/common.c:46 [inline] set_alloc_info mm/kasan/common.c:434 [inline] __kasan_slab_alloc+0x90/0xc0 mm/kasan/common.c:467 kasan_slab_alloc include/linux/kasan.h:259 [inline] slab_post_alloc_hook mm/slab.h:519 [inline] slab_alloc_node mm/slub.c:3234 [inline] slab_alloc mm/slub.c:3242 [inline] kmem_cache_alloc+0x202/0x3a0 mm/slub.c:3247 dst_alloc+0x146/0x1f0 net/core/dst.c:92 rt_dst_alloc+0x73/0x430 net/ipv4/route.c:1613 ip_route_input_slow+0x1817/0x3a20 net/ipv4/route.c:2340 ip_route_input_rcu net/ipv4/route.c:2470 [inline] ip_route_input_noref+0x116/0x2a0 net/ipv4/route.c:2415 ip_rcv_finish_core.constprop.0+0x288/0x1e80 net/ipv4/ip_input.c:354 ip_list_rcv_finish.constprop.0+0x1b2/0x6e0 net/ipv4/ip_input.c:583 ip_sublist_rcv net/ipv4/ip_input.c:609 [inline] ip_list_rcv+0x34e/0x490 net/ipv4/ip_input.c:644 __netif_receive_skb_list_ptype net/core/dev.c:5508 [inline] __netif_receive_skb_list_core+0x549/0x8e0 net/core/dev.c:5556 __netif_receive_skb_list net/core/dev.c:5608 [inline] netif_receive_skb_list_internal+0x75e/0xd80 net/core/dev.c:5699 gro_normal_list net/core/dev.c:5853 [inline] gro_normal_list net/core/dev.c:5849 [inline] napi_complete_done+0x1f1/0x880 net/core/dev.c:6590 virtqueue_napi_complete drivers/net/virtio_net.c:339 [inline] virtnet_poll+0xca2/0x11b0 drivers/net/virtio_net.c:1557 __napi_poll+0xaf/0x440 net/core/dev.c:7023 napi_poll net/core/dev.c:7090 [inline] net_rx_action+0x801/0xb40 net/core/dev.c:7177 __do_softirq+0x29b/0x9c2 kernel/softirq.c:558 Freed by task 13: kasan_save_stack+0x1e/0x50 mm/kasan/common.c:38 kasan_set_track+0x21/0x30 mm/kasan/common.c:46 kasan_set_free_info+0x20/0x30 mm/kasan/generic.c:370 ____kasan_slab_free mm/kasan/common.c:366 [inline] ____kasan_slab_free mm/kasan/common.c:328 [inline] __kasan_slab_free+0xff/0x130 mm/kasan/common.c:374 kasan_slab_free include/linux/kasan.h:235 [inline] slab_free_hook mm/slub.c:1723 [inline] slab_free_freelist_hook+0x8b/0x1c0 mm/slub.c:1749 slab_free mm/slub.c:3513 [inline] kmem_cache_free+0xbd/0x5d0 mm/slub.c:3530 dst_destroy+0x2d6/0x3f0 net/core/dst.c:127 rcu_do_batch kernel/rcu/tree.c:2506 [inline] rcu_core+0x7ab/0x1470 kernel/rcu/tree.c:2741 __do_softirq+0x29b/0x9c2 kernel/softirq.c:558 Last potentially related work creation: kasan_save_stack+0x1e/0x50 mm/kasan/common.c:38 __kasan_record_aux_stack+0xf5/0x120 mm/kasan/generic.c:348 __call_rcu kernel/rcu/tree.c:2985 [inline] call_rcu+0xb1/0x740 kernel/rcu/tree.c:3065 dst_release net/core/dst.c:177 [inline] dst_release+0x79/0xe0 net/core/dst.c:167 tcp_v4_do_rcv+0x612/0x8d0 net/ipv4/tcp_ipv4.c:1712 sk_backlog_rcv include/net/sock.h:1030 [inline] __release_sock+0x134/0x3b0 net/core/sock.c:2768 release_sock+0x54/0x1b0 net/core/sock.c:3300 tcp_sendmsg+0x36/0x40 net/ipv4/tcp.c:1441 inet_sendmsg+0x99/0xe0 net/ipv4/af_inet.c:819 sock_sendmsg_nosec net/socket.c:704 [inline] sock_sendmsg+0xcf/0x120 net/socket.c:724 sock_write_iter+0x289/0x3c0 net/socket.c:1057 call_write_iter include/linux/fs.h:2162 [inline] new_sync_write+0x429/0x660 fs/read_write.c:503 vfs_write+0x7cd/0xae0 fs/read_write.c:590 ksys_write+0x1ee/0x250 fs/read_write.c:643 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x44/0xae The buggy address belongs to the object at ffff88807f1cb700 which belongs to the cache ip_dst_cache of size 176 The buggy address is located 58 bytes inside of 176-byte region [ffff88807f1cb700, ffff88807f1cb7b0) The buggy address belongs to the page: page:ffffea0001fc72c0 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x7f1cb flags: 0xfff00000000200(slab|node=0|zone=1|lastcpupid=0x7ff) raw: 00fff00000000200 dead000000000100 dead000000000122 ffff8881413bb780 raw: 0000000000000000 0000000000100010 00000001ffffffff 0000000000000000 page dumped because: kasan: bad access detected page_owner tracks the page as allocated page last allocated via order 0, migratetype Unmovable, gfp_mask 0x112a20(GFP_ATOMIC|__GFP_NOWARN|__GFP_NORETRY|__GFP_HARDWALL), pid 5, ts 108466983062, free_ts 108048976062 prep_new_page mm/page_alloc.c:2418 [inline] get_page_from_freelist+0xa72/0x2f50 mm/page_alloc.c:4149 __alloc_pages+0x1b2/0x500 mm/page_alloc.c:5369 alloc_pages+0x1a7/0x300 mm/mempolicy.c:2191 alloc_slab_page mm/slub.c:1793 [inline] allocate_slab mm/slub.c:1930 [inline] new_slab+0x32d/0x4a0 mm/slub.c:1993 ___slab_alloc+0x918/0xfe0 mm/slub.c:3022 __slab_alloc.constprop.0+0x4d/0xa0 mm/slub.c:3109 slab_alloc_node mm/slub.c:3200 [inline] slab_alloc mm/slub.c:3242 [inline] kmem_cache_alloc+0x35c/0x3a0 mm/slub.c:3247 dst_alloc+0x146/0x1f0 net/core/dst.c:92 rt_dst_alloc+0x73/0x430 net/ipv4/route.c:1613 __mkroute_output net/ipv4/route.c:2564 [inline] ip_route_output_key_hash_rcu+0x921/0x2d00 net/ipv4/route.c:2791 ip_route_output_key_hash+0x18b/0x300 net/ipv4/route.c:2619 __ip_route_output_key include/net/route.h:126 [inline] ip_route_output_flow+0x23/0x150 net/ipv4/route.c:2850 ip_route_output_key include/net/route.h:142 [inline] geneve_get_v4_rt+0x3a6/0x830 drivers/net/geneve.c:809 geneve_xmit_skb drivers/net/geneve.c:899 [inline] geneve_xmit+0xc4a/0x3540 drivers/net/geneve.c:1082 __netdev_start_xmit include/linux/netdevice.h:4994 [inline] netdev_start_xmit include/linux/netdevice.h:5008 [inline] xmit_one net/core/dev.c:3590 [inline] dev_hard_start_xmit+0x1eb/0x920 net/core/dev.c:3606 __dev_queue_xmit+0x299a/0x3650 net/core/dev.c:4229 page last free stack trace: reset_page_owner include/linux/page_owner.h:24 [inline] free_pages_prepare mm/page_alloc.c:1338 [inline] free_pcp_prepare+0x374/0x870 mm/page_alloc.c:1389 free_unref_page_prepare mm/page_alloc.c:3309 [inline] free_unref_page+0x19/0x690 mm/page_alloc.c:3388 qlink_free mm/kasan/quarantine.c:146 [inline] qlist_free_all+0x5a/0xc0 mm/kasan/quarantine.c:165 kasan_quarantine_reduce+0x180/0x200 mm/kasan/quarantine.c:272 __kasan_slab_alloc+0xa2/0xc0 mm/kasan/common.c:444 kasan_slab_alloc include/linux/kasan.h:259 [inline] slab_post_alloc_hook mm/slab.h:519 [inline] slab_alloc_node mm/slub.c:3234 [inline] kmem_cache_alloc_node+0x255/0x3f0 mm/slub.c:3270 __alloc_skb+0x215/0x340 net/core/skbuff.c:414 alloc_skb include/linux/skbuff.h:1126 [inline] alloc_skb_with_frags+0x93/0x620 net/core/skbuff.c:6078 sock_alloc_send_pskb+0x783/0x910 net/core/sock.c:2575 mld_newpack+0x1df/0x770 net/ipv6/mcast.c:1754 add_grhead+0x265/0x330 net/ipv6/mcast.c:1857 add_grec+0x1053/0x14e0 net/ipv6/mcast.c:1995 mld_send_initial_cr.part.0+0xf6/0x230 net/ipv6/mcast.c:2242 mld_send_initial_cr net/ipv6/mcast.c:1232 [inline] mld_dad_work+0x1d3/0x690 net/ipv6/mcast.c:2268 process_one_work+0x9b2/0x1690 kernel/workqueue.c:2298 worker_thread+0x658/0x11f0 kernel/workqueue.c:2445 Memory state around the buggy address: ffff88807f1cb600: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff88807f1cb680: fb fb fb fb fb fb fc fc fc fc fc fc fc fc fc fc >ffff88807f1cb700: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ^ ffff88807f1cb780: fb fb fb fb fb fb fc fc fc fc fc fc fc fc fc fc ffff88807f1cb800: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb Fixes: 41063e9dd119 ("ipv4: Early TCP socket demux.") Signed-off-by: Eric Dumazet Link: https://lore.kernel.org/r/20211220143330.680945-1-eric.dumazet@gmail.com Signed-off-by: Jakub Kicinski --- include/net/sock.h | 2 +- net/ipv4/af_inet.c | 2 +- net/ipv4/tcp.c | 3 +-- net/ipv4/tcp_input.c | 2 +- net/ipv4/tcp_ipv4.c | 11 +++++++---- net/ipv4/udp.c | 6 +++--- net/ipv6/tcp_ipv6.c | 11 +++++++---- net/ipv6/udp.c | 4 ++-- 8 files changed, 23 insertions(+), 18 deletions(-) diff --git a/include/net/sock.h b/include/net/sock.h index bea21ff70e74..d47e9658da28 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -431,7 +431,7 @@ struct sock { #ifdef CONFIG_XFRM struct xfrm_policy __rcu *sk_policy[2]; #endif - struct dst_entry *sk_rx_dst; + struct dst_entry __rcu *sk_rx_dst; int sk_rx_dst_ifindex; u32 sk_rx_dst_cookie; diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 0189e3cd4a7d..6b5956500436 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -154,7 +154,7 @@ void inet_sock_destruct(struct sock *sk) kfree(rcu_dereference_protected(inet->inet_opt, 1)); dst_release(rcu_dereference_protected(sk->sk_dst_cache, 1)); - dst_release(sk->sk_rx_dst); + dst_release(rcu_dereference_protected(sk->sk_rx_dst, 1)); sk_refcnt_debug_dec(sk); } EXPORT_SYMBOL(inet_sock_destruct); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index bbb3d39c69af..2bb28bfd83bf 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3012,8 +3012,7 @@ int tcp_disconnect(struct sock *sk, int flags) icsk->icsk_ack.rcv_mss = TCP_MIN_MSS; memset(&tp->rx_opt, 0, sizeof(tp->rx_opt)); __sk_dst_reset(sk); - dst_release(sk->sk_rx_dst); - sk->sk_rx_dst = NULL; + dst_release(xchg((__force struct dst_entry **)&sk->sk_rx_dst, NULL)); tcp_saved_syn_free(tp); tp->compressed_ack = 0; tp->segs_in = 0; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 246ab7b5e857..0ce46849ec3d 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -5787,7 +5787,7 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb) trace_tcp_probe(sk, skb); tcp_mstamp_refresh(tp); - if (unlikely(!sk->sk_rx_dst)) + if (unlikely(!rcu_access_pointer(sk->sk_rx_dst))) inet_csk(sk)->icsk_af_ops->sk_rx_dst_set(sk, skb); /* * Header prediction. diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 13d868c43284..084df223b5df 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1701,7 +1701,10 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) struct sock *rsk; if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ - struct dst_entry *dst = sk->sk_rx_dst; + struct dst_entry *dst; + + dst = rcu_dereference_protected(sk->sk_rx_dst, + lockdep_sock_is_held(sk)); sock_rps_save_rxhash(sk, skb); sk_mark_napi_id(sk, skb); @@ -1709,8 +1712,8 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) if (sk->sk_rx_dst_ifindex != skb->skb_iif || !INDIRECT_CALL_1(dst->ops->check, ipv4_dst_check, dst, 0)) { + RCU_INIT_POINTER(sk->sk_rx_dst, NULL); dst_release(dst); - sk->sk_rx_dst = NULL; } } tcp_rcv_established(sk, skb); @@ -1786,7 +1789,7 @@ int tcp_v4_early_demux(struct sk_buff *skb) skb->sk = sk; skb->destructor = sock_edemux; if (sk_fullsock(sk)) { - struct dst_entry *dst = READ_ONCE(sk->sk_rx_dst); + struct dst_entry *dst = rcu_dereference(sk->sk_rx_dst); if (dst) dst = dst_check(dst, 0); @@ -2201,7 +2204,7 @@ void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb) struct dst_entry *dst = skb_dst(skb); if (dst && dst_hold_safe(dst)) { - sk->sk_rx_dst = dst; + rcu_assign_pointer(sk->sk_rx_dst, dst); sk->sk_rx_dst_ifindex = skb->skb_iif; } } diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 23b05e28490b..15c6b450b8db 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -2250,7 +2250,7 @@ bool udp_sk_rx_dst_set(struct sock *sk, struct dst_entry *dst) struct dst_entry *old; if (dst_hold_safe(dst)) { - old = xchg(&sk->sk_rx_dst, dst); + old = xchg((__force struct dst_entry **)&sk->sk_rx_dst, dst); dst_release(old); return old != dst; } @@ -2440,7 +2440,7 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, struct dst_entry *dst = skb_dst(skb); int ret; - if (unlikely(sk->sk_rx_dst != dst)) + if (unlikely(rcu_dereference(sk->sk_rx_dst) != dst)) udp_sk_rx_dst_set(sk, dst); ret = udp_unicast_rcv_skb(sk, skb, uh); @@ -2599,7 +2599,7 @@ int udp_v4_early_demux(struct sk_buff *skb) skb->sk = sk; skb->destructor = sock_efree; - dst = READ_ONCE(sk->sk_rx_dst); + dst = rcu_dereference(sk->sk_rx_dst); if (dst) dst = dst_check(dst, 0); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 551fce49841d..680e6481b967 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -107,7 +107,7 @@ static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb) if (dst && dst_hold_safe(dst)) { const struct rt6_info *rt = (const struct rt6_info *)dst; - sk->sk_rx_dst = dst; + rcu_assign_pointer(sk->sk_rx_dst, dst); sk->sk_rx_dst_ifindex = skb->skb_iif; sk->sk_rx_dst_cookie = rt6_get_cookie(rt); } @@ -1505,7 +1505,10 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) opt_skb = skb_clone(skb, sk_gfp_mask(sk, GFP_ATOMIC)); if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ - struct dst_entry *dst = sk->sk_rx_dst; + struct dst_entry *dst; + + dst = rcu_dereference_protected(sk->sk_rx_dst, + lockdep_sock_is_held(sk)); sock_rps_save_rxhash(sk, skb); sk_mark_napi_id(sk, skb); @@ -1513,8 +1516,8 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) if (sk->sk_rx_dst_ifindex != skb->skb_iif || INDIRECT_CALL_1(dst->ops->check, ip6_dst_check, dst, sk->sk_rx_dst_cookie) == NULL) { + RCU_INIT_POINTER(sk->sk_rx_dst, NULL); dst_release(dst); - sk->sk_rx_dst = NULL; } } @@ -1874,7 +1877,7 @@ INDIRECT_CALLABLE_SCOPE void tcp_v6_early_demux(struct sk_buff *skb) skb->sk = sk; skb->destructor = sock_edemux; if (sk_fullsock(sk)) { - struct dst_entry *dst = READ_ONCE(sk->sk_rx_dst); + struct dst_entry *dst = rcu_dereference(sk->sk_rx_dst); if (dst) dst = dst_check(dst, sk->sk_rx_dst_cookie); diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index e43b31d25fb6..a2caca6ccf11 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -956,7 +956,7 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, struct dst_entry *dst = skb_dst(skb); int ret; - if (unlikely(sk->sk_rx_dst != dst)) + if (unlikely(rcu_dereference(sk->sk_rx_dst) != dst)) udp6_sk_rx_dst_set(sk, dst); if (!uh->check && !udp_sk(sk)->no_check6_rx) { @@ -1070,7 +1070,7 @@ INDIRECT_CALLABLE_SCOPE void udp_v6_early_demux(struct sk_buff *skb) skb->sk = sk; skb->destructor = sock_efree; - dst = READ_ONCE(sk->sk_rx_dst); + dst = rcu_dereference(sk->sk_rx_dst); if (dst) dst = dst_check(dst, sk->sk_rx_dst_cookie); From a9725e1d3962ad00288c4ae6d9b518afc51b2adc Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Mon, 20 Dec 2021 09:46:08 -0500 Subject: [PATCH 659/868] docs: networking: replace skb_hwtstamp_tx with skb_tstamp_tx Tiny doc fix. The hardware transmit function was called skb_tstamp_tx from its introduction in commit ac45f602ee3d ("net: infrastructure for hardware time stamping") in the same series as this documentation. Fixes: cb9eff097831 ("net: new user space API for time stamping of incoming and outgoing packets") Signed-off-by: Willem de Bruijn Link: https://lore.kernel.org/r/20211220144608.2783526-1-willemdebruijn.kernel@gmail.com Signed-off-by: Jakub Kicinski --- Documentation/networking/timestamping.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/networking/timestamping.rst b/Documentation/networking/timestamping.rst index 80b13353254a..f5809206eb93 100644 --- a/Documentation/networking/timestamping.rst +++ b/Documentation/networking/timestamping.rst @@ -582,8 +582,8 @@ Time stamps for outgoing packets are to be generated as follows: and hardware timestamping is not possible (SKBTX_IN_PROGRESS not set). - As soon as the driver has sent the packet and/or obtained a hardware time stamp for it, it passes the time stamp back by - calling skb_hwtstamp_tx() with the original skb, the raw - hardware time stamp. skb_hwtstamp_tx() clones the original skb and + calling skb_tstamp_tx() with the original skb, the raw + hardware time stamp. skb_tstamp_tx() clones the original skb and adds the timestamps, therefore the original skb has to be freed now. If obtaining the hardware time stamp somehow fails, then the driver should not fall back to software time stamping. The rationale is that From 7e5cced9ca84df52d874aca6b632f930b3dc5bc6 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Mon, 20 Dec 2021 09:49:01 -0500 Subject: [PATCH 660/868] net: accept UFOv6 packages in virtio_net_hdr_to_skb Skb with skb->protocol 0 at the time of virtio_net_hdr_to_skb may have a protocol inferred from virtio_net_hdr with virtio_net_hdr_set_proto. Unlike TCP, UDP does not have separate types for IPv4 and IPv6. Type VIRTIO_NET_HDR_GSO_UDP is guessed to be IPv4/UDP. As of the below commit, UFOv6 packets are dropped due to not matching the protocol as obtained from dev_parse_header_protocol. Invert the test to take that L2 protocol field as starting point and pass both UFOv4 and UFOv6 for VIRTIO_NET_HDR_GSO_UDP. Fixes: 924a9bc362a5 ("net: check if protocol extracted by virtio_net_hdr_set_proto is correct") Link: https://lore.kernel.org/netdev/CABcq3pG9GRCYqFDBAJ48H1vpnnX=41u+MhQnayF1ztLH4WX0Fw@mail.gmail.com/ Reported-by: Andrew Melnichenko Signed-off-by: Willem de Bruijn Link: https://lore.kernel.org/r/20211220144901.2784030-1-willemdebruijn.kernel@gmail.com Signed-off-by: Jakub Kicinski --- include/linux/virtio_net.h | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h index 04e87f4b9417..22dd48c82560 100644 --- a/include/linux/virtio_net.h +++ b/include/linux/virtio_net.h @@ -7,6 +7,21 @@ #include #include +static inline bool virtio_net_hdr_match_proto(__be16 protocol, __u8 gso_type) +{ + switch (gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { + case VIRTIO_NET_HDR_GSO_TCPV4: + return protocol == cpu_to_be16(ETH_P_IP); + case VIRTIO_NET_HDR_GSO_TCPV6: + return protocol == cpu_to_be16(ETH_P_IPV6); + case VIRTIO_NET_HDR_GSO_UDP: + return protocol == cpu_to_be16(ETH_P_IP) || + protocol == cpu_to_be16(ETH_P_IPV6); + default: + return false; + } +} + static inline int virtio_net_hdr_set_proto(struct sk_buff *skb, const struct virtio_net_hdr *hdr) { @@ -88,9 +103,12 @@ static inline int virtio_net_hdr_to_skb(struct sk_buff *skb, if (!skb->protocol) { __be16 protocol = dev_parse_header_protocol(skb); - virtio_net_hdr_set_proto(skb, hdr); - if (protocol && protocol != skb->protocol) + if (!protocol) + virtio_net_hdr_set_proto(skb, hdr); + else if (!virtio_net_hdr_match_proto(protocol, hdr->gso_type)) return -EINVAL; + else + skb->protocol = protocol; } retry: if (!skb_flow_dissect_flow_keys_basic(NULL, skb, &keys, From 1ed1d592113959f00cc552c3b9f47ca2d157768f Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Mon, 20 Dec 2021 09:50:27 -0500 Subject: [PATCH 661/868] net: skip virtio_net_hdr_set_proto if protocol already set virtio_net_hdr_set_proto infers skb->protocol from the virtio_net_hdr gso_type, to avoid packets getting dropped for lack of a proto type. Its protocol choice is a guess, especially in the case of UFO, where the single VIRTIO_NET_HDR_GSO_UDP label covers both UFOv4 and UFOv6. Skip this best effort if the field is already initialized. Whether explicitly from userspace, or implicitly based on an earlier call to dev_parse_header_protocol (which is more robust, but was introduced after this patch). Fixes: 9d2f67e43b73 ("net/packet: fix packet drop as of virtio gso") Signed-off-by: Willem de Bruijn Link: https://lore.kernel.org/r/20211220145027.2784293-1-willemdebruijn.kernel@gmail.com Signed-off-by: Jakub Kicinski --- include/linux/virtio_net.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h index 22dd48c82560..a960de68ac69 100644 --- a/include/linux/virtio_net.h +++ b/include/linux/virtio_net.h @@ -25,6 +25,9 @@ static inline bool virtio_net_hdr_match_proto(__be16 protocol, __u8 gso_type) static inline int virtio_net_hdr_set_proto(struct sk_buff *skb, const struct virtio_net_hdr *hdr) { + if (skb->protocol) + return 0; + switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { case VIRTIO_NET_HDR_GSO_TCPV4: case VIRTIO_NET_HDR_GSO_UDP: From 1f06f7d97f741667bab0f459a4f940b21cab1549 Mon Sep 17 00:00:00 2001 From: Jeroen de Borst Date: Mon, 20 Dec 2021 11:27:46 -0800 Subject: [PATCH 662/868] gve: Correct order of processing device options The legacy raw addressing device option was processed before the new RDA queue format option. This caused the supported features mask, which is provided only on the RDA queue format option, not to be set. This disabled jumbo-frame support when using raw adressing. Fixes: 255489f5b33c ("gve: Add a jumbo-frame device option") Signed-off-by: Jeroen de Borst Link: https://lore.kernel.org/r/20211220192746.2900594-1-jeroendb@google.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/google/gve/gve_adminq.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/google/gve/gve_adminq.c b/drivers/net/ethernet/google/gve/gve_adminq.c index 83ae56c310d3..326b56b49216 100644 --- a/drivers/net/ethernet/google/gve/gve_adminq.c +++ b/drivers/net/ethernet/google/gve/gve_adminq.c @@ -738,10 +738,7 @@ int gve_adminq_describe_device(struct gve_priv *priv) * is not set to GqiRda, choose the queue format in a priority order: * DqoRda, GqiRda, GqiQpl. Use GqiQpl as default. */ - if (priv->queue_format == GVE_GQI_RDA_FORMAT) { - dev_info(&priv->pdev->dev, - "Driver is running with GQI RDA queue format.\n"); - } else if (dev_op_dqo_rda) { + if (dev_op_dqo_rda) { priv->queue_format = GVE_DQO_RDA_FORMAT; dev_info(&priv->pdev->dev, "Driver is running with DQO RDA queue format.\n"); @@ -753,6 +750,9 @@ int gve_adminq_describe_device(struct gve_priv *priv) "Driver is running with GQI RDA queue format.\n"); supported_features_mask = be32_to_cpu(dev_op_gqi_rda->supported_features_mask); + } else if (priv->queue_format == GVE_GQI_RDA_FORMAT) { + dev_info(&priv->pdev->dev, + "Driver is running with GQI RDA queue format.\n"); } else { priv->queue_format = GVE_GQI_QPL_FORMAT; if (dev_op_gqi_qpl) From ac8c58f5b535d6272324e2b8b4a0454781c9147e Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Mon, 20 Dec 2021 12:18:44 -0800 Subject: [PATCH 663/868] igb: fix deadlock caused by taking RTNL in RPM resume path Recent net core changes caused an issue with few Intel drivers (reportedly igb), where taking RTNL in RPM resume path results in a deadlock. See [0] for a bug report. I don't think the core changes are wrong, but taking RTNL in RPM resume path isn't needed. The Intel drivers are the only ones doing this. See [1] for a discussion on the issue. Following patch changes the RPM resume path to not take RTNL. [0] https://bugzilla.kernel.org/show_bug.cgi?id=215129 [1] https://lore.kernel.org/netdev/20211125074949.5f897431@kicinski-fedora-pc1c0hjn.dhcp.thefacebook.com/t/ Fixes: bd869245a3dc ("net: core: try to runtime-resume detached device in __dev_open") Fixes: f32a21376573 ("ethtool: runtime-resume netdev parent before ethtool ioctl ops") Tested-by: Martin Stolpe Signed-off-by: Heiner Kallweit Signed-off-by: Tony Nguyen Link: https://lore.kernel.org/r/20211220201844.2714498-1-anthony.l.nguyen@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/igb/igb_main.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index b597b8bfb910..446894dde182 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -9254,7 +9254,7 @@ static int __maybe_unused igb_suspend(struct device *dev) return __igb_shutdown(to_pci_dev(dev), NULL, 0); } -static int __maybe_unused igb_resume(struct device *dev) +static int __maybe_unused __igb_resume(struct device *dev, bool rpm) { struct pci_dev *pdev = to_pci_dev(dev); struct net_device *netdev = pci_get_drvdata(pdev); @@ -9297,17 +9297,24 @@ static int __maybe_unused igb_resume(struct device *dev) wr32(E1000_WUS, ~0); - rtnl_lock(); + if (!rpm) + rtnl_lock(); if (!err && netif_running(netdev)) err = __igb_open(netdev, true); if (!err) netif_device_attach(netdev); - rtnl_unlock(); + if (!rpm) + rtnl_unlock(); return err; } +static int __maybe_unused igb_resume(struct device *dev) +{ + return __igb_resume(dev, false); +} + static int __maybe_unused igb_runtime_idle(struct device *dev) { struct net_device *netdev = dev_get_drvdata(dev); @@ -9326,7 +9333,7 @@ static int __maybe_unused igb_runtime_suspend(struct device *dev) static int __maybe_unused igb_runtime_resume(struct device *dev) { - return igb_resume(dev); + return __igb_resume(dev, true); } static void igb_shutdown(struct pci_dev *pdev) @@ -9442,7 +9449,7 @@ static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev, * @pdev: Pointer to PCI device * * Restart the card from scratch, as if from a cold-boot. Implementation - * resembles the first-half of the igb_resume routine. + * resembles the first-half of the __igb_resume routine. **/ static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev) { @@ -9482,7 +9489,7 @@ static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev) * * This callback is called when the error recovery driver tells us that * its OK to resume normal operation. Implementation resembles the - * second-half of the igb_resume routine. + * second-half of the __igb_resume routine. */ static void igb_io_resume(struct pci_dev *pdev) { From b1e0887379422975f237d43d8839b751a6bcf154 Mon Sep 17 00:00:00 2001 From: Vincent Pelletier Date: Sat, 18 Dec 2021 02:18:40 +0000 Subject: [PATCH 664/868] usb: gadget: f_fs: Clear ffs_eventfd in ffs_data_clear. ffs_data_clear is indirectly called from both ffs_fs_kill_sb and ffs_ep0_release, so it ends up being called twice when userland closes ep0 and then unmounts f_fs. If userland provided an eventfd along with function's USB descriptors, it ends up calling eventfd_ctx_put as many times, causing a refcount underflow. NULL-ify ffs_eventfd to prevent these extraneous eventfd_ctx_put calls. Also, set epfiles to NULL right after de-allocating it, for readability. For completeness, ffs_data_clear actually ends up being called thrice, the last call being before the whole ffs structure gets freed, so when this specific sequence happens there is a second underflow happening (but not being reported): /sys/kernel/debug/tracing# modprobe usb_f_fs /sys/kernel/debug/tracing# echo ffs_data_clear > set_ftrace_filter /sys/kernel/debug/tracing# echo function > current_tracer /sys/kernel/debug/tracing# echo 1 > tracing_on (setup gadget, run and kill function userland process, teardown gadget) /sys/kernel/debug/tracing# echo 0 > tracing_on /sys/kernel/debug/tracing# cat trace smartcard-openp-436 [000] ..... 1946.208786: ffs_data_clear <-ffs_data_closed smartcard-openp-431 [000] ..... 1946.279147: ffs_data_clear <-ffs_data_closed smartcard-openp-431 [000] .n... 1946.905512: ffs_data_clear <-ffs_data_put Warning output corresponding to above trace: [ 1946.284139] WARNING: CPU: 0 PID: 431 at lib/refcount.c:28 refcount_warn_saturate+0x110/0x15c [ 1946.293094] refcount_t: underflow; use-after-free. [ 1946.298164] Modules linked in: usb_f_ncm(E) u_ether(E) usb_f_fs(E) hci_uart(E) btqca(E) btrtl(E) btbcm(E) btintel(E) bluetooth(E) nls_ascii(E) nls_cp437(E) vfat(E) fat(E) bcm2835_v4l2(CE) bcm2835_mmal_vchiq(CE) videobuf2_vmalloc(E) videobuf2_memops(E) sha512_generic(E) videobuf2_v4l2(E) sha512_arm(E) videobuf2_common(E) videodev(E) cpufreq_dt(E) snd_bcm2835(CE) brcmfmac(E) mc(E) vc4(E) ctr(E) brcmutil(E) snd_soc_core(E) snd_pcm_dmaengine(E) drbg(E) snd_pcm(E) snd_timer(E) snd(E) soundcore(E) drm_kms_helper(E) cec(E) ansi_cprng(E) rc_core(E) syscopyarea(E) raspberrypi_cpufreq(E) sysfillrect(E) sysimgblt(E) cfg80211(E) max17040_battery(OE) raspberrypi_hwmon(E) fb_sys_fops(E) regmap_i2c(E) ecdh_generic(E) rfkill(E) ecc(E) bcm2835_rng(E) rng_core(E) vchiq(CE) leds_gpio(E) libcomposite(E) fuse(E) configfs(E) ip_tables(E) x_tables(E) autofs4(E) ext4(E) crc16(E) mbcache(E) jbd2(E) crc32c_generic(E) sdhci_iproc(E) sdhci_pltfm(E) sdhci(E) [ 1946.399633] CPU: 0 PID: 431 Comm: smartcard-openp Tainted: G C OE 5.15.0-1-rpi #1 Debian 5.15.3-1 [ 1946.417950] Hardware name: BCM2835 [ 1946.425442] Backtrace: [ 1946.432048] [] (dump_backtrace) from [] (show_stack+0x20/0x24) [ 1946.448226] r7:00000009 r6:0000001c r5:c04a948c r4:c0a64e2c [ 1946.458412] [] (show_stack) from [] (dump_stack+0x28/0x30) [ 1946.470380] [] (dump_stack) from [] (__warn+0xe8/0x154) [ 1946.482067] r5:c04a948c r4:c0a71dc8 [ 1946.490184] [] (__warn) from [] (warn_slowpath_fmt+0xa0/0xe4) [ 1946.506758] r7:00000009 r6:0000001c r5:c0a71dc8 r4:c0a71e04 [ 1946.517070] [] (warn_slowpath_fmt) from [] (refcount_warn_saturate+0x110/0x15c) [ 1946.535309] r8:c0100224 r7:c0dfcb84 r6:ffffffff r5:c3b84c00 r4:c24a17c0 [ 1946.546708] [] (refcount_warn_saturate) from [] (eventfd_ctx_put+0x48/0x74) [ 1946.564476] [] (eventfd_ctx_put) from [] (ffs_data_clear+0xd0/0x118 [usb_f_fs]) [ 1946.582664] r5:c3b84c00 r4:c2695b00 [ 1946.590668] [] (ffs_data_clear [usb_f_fs]) from [] (ffs_data_closed+0x9c/0x150 [usb_f_fs]) [ 1946.609608] r5:bf54d014 r4:c2695b00 [ 1946.617522] [] (ffs_data_closed [usb_f_fs]) from [] (ffs_fs_kill_sb+0x2c/0x30 [usb_f_fs]) [ 1946.636217] r7:c0dfcb84 r6:c3a12260 r5:bf54d014 r4:c229f000 [ 1946.646273] [] (ffs_fs_kill_sb [usb_f_fs]) from [] (deactivate_locked_super+0x54/0x9c) [ 1946.664893] r5:bf54d014 r4:c229f000 [ 1946.672921] [] (deactivate_locked_super) from [] (deactivate_super+0x60/0x64) [ 1946.690722] r5:c2a09000 r4:c229f000 [ 1946.698706] [] (deactivate_super) from [] (cleanup_mnt+0xe4/0x14c) [ 1946.715553] r5:c2a09000 r4:00000000 [ 1946.723528] [] (cleanup_mnt) from [] (__cleanup_mnt+0x1c/0x20) [ 1946.739922] r7:c0dfcb84 r6:c3a12260 r5:c3a126fc r4:00000000 [ 1946.750088] [] (__cleanup_mnt) from [] (task_work_run+0x84/0xb8) [ 1946.766602] [] (task_work_run) from [] (do_work_pending+0x470/0x56c) [ 1946.783540] r7:5ac3c35a r6:c0d0424c r5:c200bfb0 r4:c200a000 [ 1946.793614] [] (do_work_pending) from [] (slow_work_pending+0xc/0x20) [ 1946.810553] Exception stack(0xc200bfb0 to 0xc200bff8) [ 1946.820129] bfa0: 00000000 00000000 000000aa b5e21430 [ 1946.837104] bfc0: bef867a0 00000001 bef86840 00000034 bef86838 bef86790 bef86794 bef867a0 [ 1946.854125] bfe0: 00000000 bef86798 b67b7a1c b6d626a4 60000010 b5a23760 [ 1946.865335] r10:00000000 r9:c200a000 r8:c0100224 r7:00000034 r6:bef86840 r5:00000001 [ 1946.881914] r4:bef867a0 [ 1946.888793] ---[ end trace 7387f2a9725b28d0 ]--- Fixes: 5e33f6fdf735 ("usb: gadget: ffs: add eventfd notification about ffs events") Cc: stable Signed-off-by: Vincent Pelletier Link: https://lore.kernel.org/r/f79eeea29f3f98de6782a064ec0f7351ad2f598f.1639793920.git.plr.vincent@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/f_fs.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c index e20c19a0f106..a7e069b18544 100644 --- a/drivers/usb/gadget/function/f_fs.c +++ b/drivers/usb/gadget/function/f_fs.c @@ -1773,11 +1773,15 @@ static void ffs_data_clear(struct ffs_data *ffs) BUG_ON(ffs->gadget); - if (ffs->epfiles) + if (ffs->epfiles) { ffs_epfiles_destroy(ffs->epfiles, ffs->eps_count); + ffs->epfiles = NULL; + } - if (ffs->ffs_eventfd) + if (ffs->ffs_eventfd) { eventfd_ctx_put(ffs->ffs_eventfd); + ffs->ffs_eventfd = NULL; + } kfree(ffs->raw_descs_data); kfree(ffs->raw_strings); @@ -1790,7 +1794,6 @@ static void ffs_data_reset(struct ffs_data *ffs) ffs_data_clear(ffs); - ffs->epfiles = NULL; ffs->raw_descs_data = NULL; ffs->raw_descs = NULL; ffs->raw_strings = NULL; From e3d4621c22f90c33321ae6a6baab60cdb8e5a77c Mon Sep 17 00:00:00 2001 From: Chunfeng Yun Date: Sat, 18 Dec 2021 17:57:46 +0800 Subject: [PATCH 665/868] usb: mtu3: fix interval value for intr and isoc Use the Interval value from isoc/intr endpoint descriptor, no need minus one. The original code doesn't cause transfer error for normal cases, but it may have side effect with respond time of ERDY or tPingTimeout. Signed-off-by: Chunfeng Yun Link: https://lore.kernel.org/r/20211218095749.6250-1-chunfeng.yun@mediatek.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/mtu3/mtu3_gadget.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/usb/mtu3/mtu3_gadget.c b/drivers/usb/mtu3/mtu3_gadget.c index a9a65b4bbfed..c51be015345b 100644 --- a/drivers/usb/mtu3/mtu3_gadget.c +++ b/drivers/usb/mtu3/mtu3_gadget.c @@ -77,7 +77,7 @@ static int mtu3_ep_enable(struct mtu3_ep *mep) if (usb_endpoint_xfer_int(desc) || usb_endpoint_xfer_isoc(desc)) { interval = desc->bInterval; - interval = clamp_val(interval, 1, 16) - 1; + interval = clamp_val(interval, 1, 16); if (usb_endpoint_xfer_isoc(desc) && comp_desc) mult = comp_desc->bmAttributes; } @@ -89,7 +89,7 @@ static int mtu3_ep_enable(struct mtu3_ep *mep) if (usb_endpoint_xfer_isoc(desc) || usb_endpoint_xfer_int(desc)) { interval = desc->bInterval; - interval = clamp_val(interval, 1, 16) - 1; + interval = clamp_val(interval, 1, 16); mult = usb_endpoint_maxp_mult(desc) - 1; } break; From a7aae769ca626819a7f9f078ebdc69a8a1b00c81 Mon Sep 17 00:00:00 2001 From: Chunfeng Yun Date: Sat, 18 Dec 2021 17:57:47 +0800 Subject: [PATCH 666/868] usb: mtu3: add memory barrier before set GPD's HWO There is a seldom issue that the controller access invalid address and trigger devapc or emimpu violation. That is due to memory access is out of order and cause gpd data is not correct. Add mb() to prohibit compiler or cpu from reordering to make sure GPD is fully written before setting its HWO. Fixes: 48e0d3735aa5 ("usb: mtu3: supports new QMU format") Cc: stable@vger.kernel.org Reported-by: Eddie Hung Signed-off-by: Chunfeng Yun Link: https://lore.kernel.org/r/20211218095749.6250-2-chunfeng.yun@mediatek.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/mtu3/mtu3_qmu.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/usb/mtu3/mtu3_qmu.c b/drivers/usb/mtu3/mtu3_qmu.c index 3f414f91b589..2ea3157ddb6e 100644 --- a/drivers/usb/mtu3/mtu3_qmu.c +++ b/drivers/usb/mtu3/mtu3_qmu.c @@ -273,6 +273,8 @@ static int mtu3_prepare_tx_gpd(struct mtu3_ep *mep, struct mtu3_request *mreq) gpd->dw3_info |= cpu_to_le32(GPD_EXT_FLAG_ZLP); } + /* prevent reorder, make sure GPD's HWO is set last */ + mb(); gpd->dw0_info |= cpu_to_le32(GPD_FLAGS_IOC | GPD_FLAGS_HWO); mreq->gpd = gpd; @@ -306,6 +308,8 @@ static int mtu3_prepare_rx_gpd(struct mtu3_ep *mep, struct mtu3_request *mreq) gpd->next_gpd = cpu_to_le32(lower_32_bits(enq_dma)); ext_addr |= GPD_EXT_NGP(mtu, upper_32_bits(enq_dma)); gpd->dw3_info = cpu_to_le32(ext_addr); + /* prevent reorder, make sure GPD's HWO is set last */ + mb(); gpd->dw0_info |= cpu_to_le32(GPD_FLAGS_IOC | GPD_FLAGS_HWO); mreq->gpd = gpd; @@ -445,7 +449,8 @@ static void qmu_tx_zlp_error_handler(struct mtu3 *mtu, u8 epnum) return; } mtu3_setbits(mbase, MU3D_EP_TXCR0(mep->epnum), TX_TXPKTRDY); - + /* prevent reorder, make sure GPD's HWO is set last */ + mb(); /* by pass the current GDP */ gpd_current->dw0_info |= cpu_to_le32(GPD_FLAGS_BPS | GPD_FLAGS_HWO); From 8c313e3bfd9adae8d5c4ba1cc696dcbc86fbf9bf Mon Sep 17 00:00:00 2001 From: Chunfeng Yun Date: Sat, 18 Dec 2021 17:57:48 +0800 Subject: [PATCH 667/868] usb: mtu3: fix list_head check warning This is caused by uninitialization of list_head. BUG: KASAN: use-after-free in __list_del_entry_valid+0x34/0xe4 Call trace: dump_backtrace+0x0/0x298 show_stack+0x24/0x34 dump_stack+0x130/0x1a8 print_address_description+0x88/0x56c __kasan_report+0x1b8/0x2a0 kasan_report+0x14/0x20 __asan_load8+0x9c/0xa0 __list_del_entry_valid+0x34/0xe4 mtu3_req_complete+0x4c/0x300 [mtu3] mtu3_gadget_stop+0x168/0x448 [mtu3] usb_gadget_unregister_driver+0x204/0x3a0 unregister_gadget_item+0x44/0xa4 Fixes: 83374e035b62 ("usb: mtu3: add tracepoints to help debug") Cc: stable@vger.kernel.org Reported-by: Yuwen Ng Signed-off-by: Chunfeng Yun Link: https://lore.kernel.org/r/20211218095749.6250-3-chunfeng.yun@mediatek.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/mtu3/mtu3_gadget.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/mtu3/mtu3_gadget.c b/drivers/usb/mtu3/mtu3_gadget.c index c51be015345b..b6c8a4a99c4d 100644 --- a/drivers/usb/mtu3/mtu3_gadget.c +++ b/drivers/usb/mtu3/mtu3_gadget.c @@ -235,6 +235,7 @@ struct usb_request *mtu3_alloc_request(struct usb_ep *ep, gfp_t gfp_flags) mreq->request.dma = DMA_ADDR_INVALID; mreq->epnum = mep->epnum; mreq->mep = mep; + INIT_LIST_HEAD(&mreq->list); trace_mtu3_alloc_request(mreq); return &mreq->request; From 43f3b8cbcf93da7c2755af4a543280c31f4adf16 Mon Sep 17 00:00:00 2001 From: Chunfeng Yun Date: Sat, 18 Dec 2021 17:57:49 +0800 Subject: [PATCH 668/868] usb: mtu3: set interval of FS intr and isoc endpoint Add support to set interval also for FS intr and isoc endpoint. Fixes: 4d79e042ed8b ("usb: mtu3: add support for usb3.1 IP") Cc: stable@vger.kernel.org Signed-off-by: Chunfeng Yun Link: https://lore.kernel.org/r/20211218095749.6250-4-chunfeng.yun@mediatek.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/mtu3/mtu3_gadget.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/usb/mtu3/mtu3_gadget.c b/drivers/usb/mtu3/mtu3_gadget.c index b6c8a4a99c4d..9977600616d7 100644 --- a/drivers/usb/mtu3/mtu3_gadget.c +++ b/drivers/usb/mtu3/mtu3_gadget.c @@ -92,6 +92,13 @@ static int mtu3_ep_enable(struct mtu3_ep *mep) interval = clamp_val(interval, 1, 16); mult = usb_endpoint_maxp_mult(desc) - 1; } + break; + case USB_SPEED_FULL: + if (usb_endpoint_xfer_isoc(desc)) + interval = clamp_val(desc->bInterval, 1, 16); + else if (usb_endpoint_xfer_int(desc)) + interval = clamp_val(desc->bInterval, 1, 255); + break; default: break; /*others are ignored */ From 67f74302f45d5d862f22ced3297624e50ac352f0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Tue, 7 Dec 2021 10:10:15 +0100 Subject: [PATCH 669/868] drm/nouveau: wait for the exclusive fence after the shared ones v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Always waiting for the exclusive fence resulted on some performance regressions. So try to wait for the shared fences first, then the exclusive fence should always be signaled already. v2: fix incorrectly placed "(", add some comment why we do this. Signed-off-by: Christian König Tested-by: Stefan Fritsch Tested-by: Dan Moulding Acked-by: Ben Skeggs Signed-off-by: Christian König Cc: Link: https://patchwork.freedesktop.org/patch/msgid/20211209102335.18321-1-christian.koenig@amd.com --- drivers/gpu/drm/nouveau/nouveau_fence.c | 54 +++++++++++++------------ 1 file changed, 28 insertions(+), 26 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c b/drivers/gpu/drm/nouveau/nouveau_fence.c index 05d0b3eb3690..0ae416aa76dc 100644 --- a/drivers/gpu/drm/nouveau/nouveau_fence.c +++ b/drivers/gpu/drm/nouveau/nouveau_fence.c @@ -353,34 +353,16 @@ nouveau_fence_sync(struct nouveau_bo *nvbo, struct nouveau_channel *chan, bool e if (ret) return ret; + + fobj = NULL; + } else { + fobj = dma_resv_shared_list(resv); } - fobj = dma_resv_shared_list(resv); - fence = dma_resv_excl_fence(resv); - - if (fence) { - struct nouveau_channel *prev = NULL; - bool must_wait = true; - - f = nouveau_local_fence(fence, chan->drm); - if (f) { - rcu_read_lock(); - prev = rcu_dereference(f->channel); - if (prev && (prev == chan || fctx->sync(f, prev, chan) == 0)) - must_wait = false; - rcu_read_unlock(); - } - - if (must_wait) - ret = dma_fence_wait(fence, intr); - - return ret; - } - - if (!exclusive || !fobj) - return ret; - - for (i = 0; i < fobj->shared_count && !ret; ++i) { + /* Waiting for the exclusive fence first causes performance regressions + * under some circumstances. So manually wait for the shared ones first. + */ + for (i = 0; i < (fobj ? fobj->shared_count : 0) && !ret; ++i) { struct nouveau_channel *prev = NULL; bool must_wait = true; @@ -400,6 +382,26 @@ nouveau_fence_sync(struct nouveau_bo *nvbo, struct nouveau_channel *chan, bool e ret = dma_fence_wait(fence, intr); } + fence = dma_resv_excl_fence(resv); + if (fence) { + struct nouveau_channel *prev = NULL; + bool must_wait = true; + + f = nouveau_local_fence(fence, chan->drm); + if (f) { + rcu_read_lock(); + prev = rcu_dereference(f->channel); + if (prev && (prev == chan || fctx->sync(f, prev, chan) == 0)) + must_wait = false; + rcu_read_unlock(); + } + + if (must_wait) + ret = dma_fence_wait(fence, intr); + + return ret; + } + return ret; } From cfd0d84ba28c18b531648c9d4a35ecca89ad9901 Mon Sep 17 00:00:00 2001 From: Todd Kjos Date: Mon, 20 Dec 2021 11:01:50 -0800 Subject: [PATCH 670/868] binder: fix async_free_space accounting for empty parcels In 4.13, commit 74310e06be4d ("android: binder: Move buffer out of area shared with user space") fixed a kernel structure visibility issue. As part of that patch, sizeof(void *) was used as the buffer size for 0-length data payloads so the driver could detect abusive clients sending 0-length asynchronous transactions to a server by enforcing limits on async_free_size. Unfortunately, on the "free" side, the accounting of async_free_space did not add the sizeof(void *) back. The result was that up to 8-bytes of async_free_space were leaked on every async transaction of 8-bytes or less. These small transactions are uncommon, so this accounting issue has gone undetected for several years. The fix is to use "buffer_size" (the allocated buffer size) instead of "size" (the logical buffer size) when updating the async_free_space during the free operation. These are the same except for this corner case of asynchronous transactions with payloads < 8 bytes. Fixes: 74310e06be4d ("android: binder: Move buffer out of area shared with user space") Signed-off-by: Todd Kjos Cc: stable@vger.kernel.org # 4.14+ Link: https://lore.kernel.org/r/20211220190150.2107077-1-tkjos@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/android/binder_alloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/android/binder_alloc.c b/drivers/android/binder_alloc.c index 340515f54498..47bc74a8c7b6 100644 --- a/drivers/android/binder_alloc.c +++ b/drivers/android/binder_alloc.c @@ -671,7 +671,7 @@ static void binder_free_buf_locked(struct binder_alloc *alloc, BUG_ON(buffer->user_data > alloc->buffer + alloc->buffer_size); if (buffer->async_transaction) { - alloc->free_async_space += size + sizeof(struct binder_buffer); + alloc->free_async_space += buffer_size + sizeof(struct binder_buffer); binder_alloc_debug(BINDER_DEBUG_BUFFER_ALLOC_ASYNC, "%d: binder_free_buf size %zd async free %zd\n", From 3a0152b219523227c2a62a0a122cf99608287176 Mon Sep 17 00:00:00 2001 From: Andra Paraschiv Date: Mon, 20 Dec 2021 19:58:56 +0000 Subject: [PATCH 671/868] nitro_enclaves: Use get_user_pages_unlocked() call to handle mmap assert After commit 5b78ed24e8ec ("mm/pagemap: add mmap_assert_locked() annotations to find_vma*()"), the call to get_user_pages() will trigger the mmap assert. static inline void mmap_assert_locked(struct mm_struct *mm) { lockdep_assert_held(&mm->mmap_lock); VM_BUG_ON_MM(!rwsem_is_locked(&mm->mmap_lock), mm); } [ 62.521410] kernel BUG at include/linux/mmap_lock.h:156! ........................................................... [ 62.538938] RIP: 0010:find_vma+0x32/0x80 ........................................................... [ 62.605889] Call Trace: [ 62.608502] [ 62.610956] ? lock_timer_base+0x61/0x80 [ 62.614106] find_extend_vma+0x19/0x80 [ 62.617195] __get_user_pages+0x9b/0x6a0 [ 62.620356] __gup_longterm_locked+0x42d/0x450 [ 62.623721] ? finish_wait+0x41/0x80 [ 62.626748] ? __kmalloc+0x178/0x2f0 [ 62.629768] ne_set_user_memory_region_ioctl.isra.0+0x225/0x6a0 [nitro_enclaves] [ 62.635776] ne_enclave_ioctl+0x1cf/0x6d7 [nitro_enclaves] [ 62.639541] __x64_sys_ioctl+0x82/0xb0 [ 62.642620] do_syscall_64+0x3b/0x90 [ 62.645642] entry_SYSCALL_64_after_hwframe+0x44/0xae Use get_user_pages_unlocked() when setting the enclave memory regions. That's a similar pattern as mmap_read_lock() used together with get_user_pages(). Fixes: 5b78ed24e8ec ("mm/pagemap: add mmap_assert_locked() annotations to find_vma*()") Cc: stable@vger.kernel.org Signed-off-by: Andra Paraschiv Link: https://lore.kernel.org/r/20211220195856.6549-1-andraprs@amazon.com Signed-off-by: Greg Kroah-Hartman --- drivers/virt/nitro_enclaves/ne_misc_dev.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/virt/nitro_enclaves/ne_misc_dev.c b/drivers/virt/nitro_enclaves/ne_misc_dev.c index 8939612ee0e0..6894ccb868a6 100644 --- a/drivers/virt/nitro_enclaves/ne_misc_dev.c +++ b/drivers/virt/nitro_enclaves/ne_misc_dev.c @@ -886,8 +886,9 @@ static int ne_set_user_memory_region_ioctl(struct ne_enclave *ne_enclave, goto put_pages; } - gup_rc = get_user_pages(mem_region.userspace_addr + memory_size, 1, FOLL_GET, - ne_mem_region->pages + i, NULL); + gup_rc = get_user_pages_unlocked(mem_region.userspace_addr + memory_size, 1, + ne_mem_region->pages + i, FOLL_GET); + if (gup_rc < 0) { rc = gup_rc; From e4844092581ceec22489b66c42edc88bc6079783 Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Tue, 21 Dec 2021 13:28:25 +0200 Subject: [PATCH 672/868] xhci: Fresco FL1100 controller should not have BROKEN_MSI quirk set. The Fresco Logic FL1100 controller needs the TRUST_TX_LENGTH quirk like other Fresco controllers, but should not have the BROKEN_MSI quirks set. BROKEN_MSI quirk causes issues in detecting usb drives connected to docks with this FL1100 controller. The BROKEN_MSI flag was apparently accidentally set together with the TRUST_TX_LENGTH quirk Original patch went to stable so this should go there as well. Fixes: ea0f69d82119 ("xhci: Enable trust tx length quirk for Fresco FL11 USB controller") Cc: stable@vger.kernel.org cc: Nikolay Martynov Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20211221112825.54690-2-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-pci.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index 3af017883231..5c351970cdf1 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -123,7 +123,6 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) /* Look for vendor-specific quirks */ if (pdev->vendor == PCI_VENDOR_ID_FRESCO_LOGIC && (pdev->device == PCI_DEVICE_ID_FRESCO_LOGIC_PDK || - pdev->device == PCI_DEVICE_ID_FRESCO_LOGIC_FL1100 || pdev->device == PCI_DEVICE_ID_FRESCO_LOGIC_FL1400)) { if (pdev->device == PCI_DEVICE_ID_FRESCO_LOGIC_PDK && pdev->revision == 0x0) { @@ -158,6 +157,10 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) pdev->device == PCI_DEVICE_ID_FRESCO_LOGIC_FL1009) xhci->quirks |= XHCI_BROKEN_STREAMS; + if (pdev->vendor == PCI_VENDOR_ID_FRESCO_LOGIC && + pdev->device == PCI_DEVICE_ID_FRESCO_LOGIC_FL1100) + xhci->quirks |= XHCI_TRUST_TX_LENGTH; + if (pdev->vendor == PCI_VENDOR_ID_NEC) xhci->quirks |= XHCI_NEC_HOST; From ff31ee0a0f471776f67be5e5275c18d17736fc6b Mon Sep 17 00:00:00 2001 From: Yann Gautier Date: Wed, 15 Dec 2021 15:17:26 +0100 Subject: [PATCH 673/868] mmc: mmci: stm32: clear DLYB_CR after sending tuning command During test campaign, and especially after several unbind/bind sequences, it has been seen that the SD-card on SDMMC1 thread could freeze. The freeze always appear on a CMD23 following a CMD19. Checking SDMMC internal registers shows that the tuning command (CMD19) has failed. The freeze is then due to the delay block involved in the tuning sequence. To correct this, clear the delay block register DLYB_CR register after the tuning commands. Signed-off-by: Christophe Kerello Signed-off-by: Yann Gautier Reviewed-by: Linus Walleij Fixes: 1103f807a3b9 ("mmc: mmci_sdmmc: Add execute tuning with delay block") Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20211215141727.4901-4-yann.gautier@foss.st.com Signed-off-by: Ulf Hansson --- drivers/mmc/host/mmci_stm32_sdmmc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/mmc/host/mmci_stm32_sdmmc.c b/drivers/mmc/host/mmci_stm32_sdmmc.c index fdaa11f92fe6..a75d3dd34d18 100644 --- a/drivers/mmc/host/mmci_stm32_sdmmc.c +++ b/drivers/mmc/host/mmci_stm32_sdmmc.c @@ -441,6 +441,8 @@ static int sdmmc_dlyb_phase_tuning(struct mmci_host *host, u32 opcode) return -EINVAL; } + writel_relaxed(0, dlyb->base + DLYB_CR); + phase = end_of_len - max_len / 2; sdmmc_dlyb_set_cfgr(dlyb, dlyb->unit, phase, false); From ffb76a86f8096a8206be03b14adda6092e18e275 Mon Sep 17 00:00:00 2001 From: Wu Bo Date: Tue, 21 Dec 2021 15:00:34 +0800 Subject: [PATCH 674/868] ipmi: Fix UAF when uninstall ipmi_si and ipmi_msghandler module Hi, When testing install and uninstall of ipmi_si.ko and ipmi_msghandler.ko, the system crashed. The log as follows: [ 141.087026] BUG: unable to handle kernel paging request at ffffffffc09b3a5a [ 141.087241] PGD 8fe4c0d067 P4D 8fe4c0d067 PUD 8fe4c0f067 PMD 103ad89067 PTE 0 [ 141.087464] Oops: 0010 [#1] SMP NOPTI [ 141.087580] CPU: 67 PID: 668 Comm: kworker/67:1 Kdump: loaded Not tainted 4.18.0.x86_64 #47 [ 141.088009] Workqueue: events 0xffffffffc09b3a40 [ 141.088009] RIP: 0010:0xffffffffc09b3a5a [ 141.088009] Code: Bad RIP value. [ 141.088009] RSP: 0018:ffffb9094e2c3e88 EFLAGS: 00010246 [ 141.088009] RAX: 0000000000000000 RBX: ffff9abfdb1f04a0 RCX: 0000000000000000 [ 141.088009] RDX: 0000000000000000 RSI: 0000000000000246 RDI: 0000000000000246 [ 141.088009] RBP: 0000000000000000 R08: ffff9abfffee3cb8 R09: 00000000000002e1 [ 141.088009] R10: ffffb9094cb73d90 R11: 00000000000f4240 R12: ffff9abfffee8700 [ 141.088009] R13: 0000000000000000 R14: ffff9abfdb1f04a0 R15: ffff9abfdb1f04a8 [ 141.088009] FS: 0000000000000000(0000) GS:ffff9abfffec0000(0000) knlGS:0000000000000000 [ 141.088009] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 141.088009] CR2: ffffffffc09b3a30 CR3: 0000008fe4c0a001 CR4: 00000000007606e0 [ 141.088009] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 141.088009] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 141.088009] PKRU: 55555554 [ 141.088009] Call Trace: [ 141.088009] ? process_one_work+0x195/0x390 [ 141.088009] ? worker_thread+0x30/0x390 [ 141.088009] ? process_one_work+0x390/0x390 [ 141.088009] ? kthread+0x10d/0x130 [ 141.088009] ? kthread_flush_work_fn+0x10/0x10 [ 141.088009] ? ret_from_fork+0x35/0x40] BUG: unable to handle kernel paging request at ffffffffc0b28a5a [ 200.223240] PGD 97fe00d067 P4D 97fe00d067 PUD 97fe00f067 PMD a580cbf067 PTE 0 [ 200.223464] Oops: 0010 [#1] SMP NOPTI [ 200.223579] CPU: 63 PID: 664 Comm: kworker/63:1 Kdump: loaded Not tainted 4.18.0.x86_64 #46 [ 200.224008] Workqueue: events 0xffffffffc0b28a40 [ 200.224008] RIP: 0010:0xffffffffc0b28a5a [ 200.224008] Code: Bad RIP value. [ 200.224008] RSP: 0018:ffffbf3c8e2a3e88 EFLAGS: 00010246 [ 200.224008] RAX: 0000000000000000 RBX: ffffa0799ad6bca0 RCX: 0000000000000000 [ 200.224008] RDX: 0000000000000000 RSI: 0000000000000246 RDI: 0000000000000246 [ 200.224008] RBP: 0000000000000000 R08: ffff9fe43fde3cb8 R09: 00000000000000d5 [ 200.224008] R10: ffffbf3c8cb53d90 R11: 00000000000f4240 R12: ffff9fe43fde8700 [ 200.224008] R13: 0000000000000000 R14: ffffa0799ad6bca0 R15: ffffa0799ad6bca8 [ 200.224008] FS: 0000000000000000(0000) GS:ffff9fe43fdc0000(0000) knlGS:0000000000000000 [ 200.224008] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 200.224008] CR2: ffffffffc0b28a30 CR3: 00000097fe00a002 CR4: 00000000007606e0 [ 200.224008] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 200.224008] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 200.224008] PKRU: 55555554 [ 200.224008] Call Trace: [ 200.224008] ? process_one_work+0x195/0x390 [ 200.224008] ? worker_thread+0x30/0x390 [ 200.224008] ? process_one_work+0x390/0x390 [ 200.224008] ? kthread+0x10d/0x130 [ 200.224008] ? kthread_flush_work_fn+0x10/0x10 [ 200.224008] ? ret_from_fork+0x35/0x40 [ 200.224008] kernel fault(0x1) notification starting on CPU 63 [ 200.224008] kernel fault(0x1) notification finished on CPU 63 [ 200.224008] CR2: ffffffffc0b28a5a [ 200.224008] ---[ end trace c82a412d93f57412 ]--- The reason is as follows: T1: rmmod ipmi_si. ->ipmi_unregister_smi() -> ipmi_bmc_unregister() -> __ipmi_bmc_unregister() -> kref_put(&bmc->usecount, cleanup_bmc_device); -> schedule_work(&bmc->remove_work); T2: rmmod ipmi_msghandler. ipmi_msghander module uninstalled, and the module space will be freed. T3: bmc->remove_work doing cleanup the bmc resource. -> cleanup_bmc_work() -> platform_device_unregister(&bmc->pdev); -> platform_device_del(pdev); -> device_del(&pdev->dev); -> kobject_uevent(&dev->kobj, KOBJ_REMOVE); -> kobject_uevent_env() -> dev_uevent() -> if (dev->type && dev->type->name) 'dev->type'(bmc_device_type) pointer space has freed when uninstall ipmi_msghander module, 'dev->type->name' cause the system crash. drivers/char/ipmi/ipmi_msghandler.c: 2820 static const struct device_type bmc_device_type = { 2821 .groups = bmc_dev_attr_groups, 2822 }; Steps to reproduce: Add a time delay in cleanup_bmc_work() function, and uninstall ipmi_si and ipmi_msghandler module. 2910 static void cleanup_bmc_work(struct work_struct *work) 2911 { 2912 struct bmc_device *bmc = container_of(work, struct bmc_device, 2913 remove_work); 2914 int id = bmc->pdev.id; /* Unregister overwrites id */ 2915 2916 msleep(3000); <--- 2917 platform_device_unregister(&bmc->pdev); 2918 ida_simple_remove(&ipmi_bmc_ida, id); 2919 } Use 'remove_work_wq' instead of 'system_wq' to solve this issues. Fixes: b2cfd8ab4add ("ipmi: Rework device id and guid handling to catch changing BMCs") Signed-off-by: Wu Bo Message-Id: <1640070034-56671-1-git-send-email-wubo40@huawei.com> Signed-off-by: Corey Minyard --- drivers/char/ipmi/ipmi_msghandler.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/char/ipmi/ipmi_msghandler.c b/drivers/char/ipmi/ipmi_msghandler.c index 266c7bc58dda..c59265146e9c 100644 --- a/drivers/char/ipmi/ipmi_msghandler.c +++ b/drivers/char/ipmi/ipmi_msghandler.c @@ -3031,7 +3031,7 @@ cleanup_bmc_device(struct kref *ref) * with removing the device attributes while reading a device * attribute. */ - schedule_work(&bmc->remove_work); + queue_work(remove_work_wq, &bmc->remove_work); } /* From 3e4d9a485029aa9e172dab5420abe775fd86f8e8 Mon Sep 17 00:00:00 2001 From: Vincent Whitchurch Date: Mon, 20 Dec 2021 14:06:56 +0100 Subject: [PATCH 675/868] gpio: virtio: remove timeout The driver imposes an arbitrary one second timeout on virtio requests, but the specification doesn't prevent the virtio device from taking longer to process requests, so remove this timeout to support all systems and device implementations. Fixes: 3a29355a22c0275fe86 ("gpio: Add virtio-gpio driver") Signed-off-by: Vincent Whitchurch Acked-by: Michael S. Tsirkin Acked-by: Viresh Kumar Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-virtio.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/gpio/gpio-virtio.c b/drivers/gpio/gpio-virtio.c index 84f96b78f32a..9f4941bc5760 100644 --- a/drivers/gpio/gpio-virtio.c +++ b/drivers/gpio/gpio-virtio.c @@ -100,11 +100,7 @@ static int _virtio_gpio_req(struct virtio_gpio *vgpio, u16 type, u16 gpio, virtqueue_kick(vgpio->request_vq); mutex_unlock(&vgpio->lock); - if (!wait_for_completion_timeout(&line->completion, HZ)) { - dev_err(dev, "GPIO operation timed out\n"); - ret = -ETIMEDOUT; - goto out; - } + wait_for_completion(&line->completion); if (unlikely(res->status != VIRTIO_GPIO_STATUS_OK)) { dev_err(dev, "GPIO request failed: %d\n", gpio); From 3f345e907a8e7c56fdebf7231cd67afc85d02aaa Mon Sep 17 00:00:00 2001 From: Heikki Krogerus Date: Tue, 21 Dec 2021 17:03:52 +0300 Subject: [PATCH 676/868] usb: typec: ucsi: Only check the contract if there is a connection The driver must make sure there is an actual connection before checking details about the USB Power Delivery contract. Those details are not valid unless there is a connection. This fixes NULL pointer dereference that is caused by an attempt to register bogus partner alternate mode that the firmware on some platform may report before the actual connection. Link: https://bugzilla.kernel.org/show_bug.cgi?id=215117 Fixes: 6cbe4b2d5a3f ("usb: typec: ucsi: Check the partner alt modes always if there is PD contract") Reported-by: Chris Hixon Signed-off-by: Heikki Krogerus Link: https://lore.kernel.org/r/eb34f98f-00ef-3238-2daa-80481116035d@leemhuis.info/ Link: https://lore.kernel.org/r/20211221140352.45501-1-heikki.krogerus@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/ucsi/ucsi.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/usb/typec/ucsi/ucsi.c b/drivers/usb/typec/ucsi/ucsi.c index 6aa28384f77f..08561bf7c40c 100644 --- a/drivers/usb/typec/ucsi/ucsi.c +++ b/drivers/usb/typec/ucsi/ucsi.c @@ -1150,7 +1150,9 @@ static int ucsi_register_port(struct ucsi *ucsi, int index) ret = 0; } - if (UCSI_CONSTAT_PWR_OPMODE(con->status.flags) == UCSI_CONSTAT_PWR_OPMODE_PD) { + if (con->partner && + UCSI_CONSTAT_PWR_OPMODE(con->status.flags) == + UCSI_CONSTAT_PWR_OPMODE_PD) { ucsi_get_src_pdos(con); ucsi_check_altmodes(con); } From 804034c4ffc502795cea9b3867acb2ec7fad99ba Mon Sep 17 00:00:00 2001 From: Miaoqian Lin Date: Fri, 10 Dec 2021 07:07:53 +0000 Subject: [PATCH 677/868] platform/mellanox: mlxbf-pmc: Fix an IS_ERR() vs NULL bug in mlxbf_pmc_map_counters The devm_ioremap() function returns NULL on error, it doesn't return error pointers. Also according to doc of device_property_read_u64_array, values in info array are properties of device or NULL. Signed-off-by: Miaoqian Lin Link: https://lore.kernel.org/r/20211210070753.10761-1-linmq006@gmail.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/mellanox/mlxbf-pmc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/platform/mellanox/mlxbf-pmc.c b/drivers/platform/mellanox/mlxbf-pmc.c index 04bc3b50aa7a..65b4a819f1bd 100644 --- a/drivers/platform/mellanox/mlxbf-pmc.c +++ b/drivers/platform/mellanox/mlxbf-pmc.c @@ -1374,8 +1374,8 @@ static int mlxbf_pmc_map_counters(struct device *dev) pmc->block[i].counters = info[2]; pmc->block[i].type = info[3]; - if (IS_ERR(pmc->block[i].mmio_base)) - return PTR_ERR(pmc->block[i].mmio_base); + if (!pmc->block[i].mmio_base) + return -ENOMEM; ret = mlxbf_pmc_create_groups(dev, i); if (ret) From 09fc14061f3ed28899c23b8714c066946fdbd43e Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Fri, 10 Dec 2021 08:35:29 -0600 Subject: [PATCH 678/868] platform/x86: amd-pmc: only use callbacks for suspend This driver is intended to be used exclusively for suspend to idle so callbacks to send OS_HINT during hibernate and S5 will set OS_HINT at the wrong time leading to an undefined behavior. Cc: stable@vger.kernel.org Signed-off-by: Mario Limonciello Link: https://lore.kernel.org/r/20211210143529.10594-1-mario.limonciello@amd.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/x86/amd-pmc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/platform/x86/amd-pmc.c b/drivers/platform/x86/amd-pmc.c index 841c44cd64c2..230593ae5d6d 100644 --- a/drivers/platform/x86/amd-pmc.c +++ b/drivers/platform/x86/amd-pmc.c @@ -508,7 +508,8 @@ static int __maybe_unused amd_pmc_resume(struct device *dev) } static const struct dev_pm_ops amd_pmc_pm_ops = { - SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(amd_pmc_suspend, amd_pmc_resume) + .suspend_noirq = amd_pmc_suspend, + .resume_noirq = amd_pmc_resume, }; static const struct pci_device_id pmc_pci_ids[] = { From eb66fb03a727cde0ab9b1a3858de55c26f3007da Mon Sep 17 00:00:00 2001 From: Wang Qing Date: Tue, 14 Dec 2021 04:18:36 -0800 Subject: [PATCH 679/868] platform/x86: apple-gmux: use resource_size() with res This should be (res->end - res->start + 1) here actually, use resource_size() derectly. Signed-off-by: Wang Qing Link: https://lore.kernel.org/r/1639484316-75873-1-git-send-email-wangqing@vivo.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/x86/apple-gmux.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/platform/x86/apple-gmux.c b/drivers/platform/x86/apple-gmux.c index 9aae45a45200..57553f9b4d1d 100644 --- a/drivers/platform/x86/apple-gmux.c +++ b/drivers/platform/x86/apple-gmux.c @@ -625,7 +625,7 @@ static int gmux_probe(struct pnp_dev *pnp, const struct pnp_device_id *id) } gmux_data->iostart = res->start; - gmux_data->iolen = res->end - res->start; + gmux_data->iolen = resource_size(res); if (gmux_data->iolen < GMUX_MIN_IO_LEN) { pr_err("gmux I/O region too small (%lu < %u)\n", From 8f66fce0f46560b9e910787ff7ad0974441c4f9c Mon Sep 17 00:00:00 2001 From: John David Anglin Date: Tue, 21 Dec 2021 13:21:22 -0500 Subject: [PATCH 680/868] parisc: Correct completer in lws start The completer in the "or,ev %r1,%r30,%r30" instruction is reversed, so we are not clipping the LWS number when we are called from a 32-bit process (W=0). We need to nulify the following depdi instruction when the least-significant bit of %r30 is 1. If the %r20 register is not clipped, a user process could perform a LWS call that would branch to an undefined location in the kernel and potentially crash the machine. Signed-off-by: John David Anglin Cc: stable@vger.kernel.org # 4.19+ Signed-off-by: Helge Deller --- arch/parisc/kernel/syscall.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/parisc/kernel/syscall.S b/arch/parisc/kernel/syscall.S index d2497b339d13..65c88ca7a7ac 100644 --- a/arch/parisc/kernel/syscall.S +++ b/arch/parisc/kernel/syscall.S @@ -472,7 +472,7 @@ lws_start: extrd,u %r1,PSW_W_BIT,1,%r1 /* sp must be aligned on 4, so deposit the W bit setting into * the bottom of sp temporarily */ - or,ev %r1,%r30,%r30 + or,od %r1,%r30,%r30 /* Clip LWS number to a 32-bit value for 32-bit processes */ depdi 0, 31, 32, %r20 From d3a5a68cff47f6eead84504c3c28376b85053242 Mon Sep 17 00:00:00 2001 From: John David Anglin Date: Tue, 21 Dec 2021 13:33:16 -0500 Subject: [PATCH 681/868] parisc: Fix mask used to select futex spinlock The address bits used to select the futex spinlock need to match those used in the LWS code in syscall.S. The mask 0x3f8 only selects 7 bits. It should select 8 bits. This change fixes the glibc nptl/tst-cond24 and nptl/tst-cond25 tests. Signed-off-by: John David Anglin Fixes: 53a42b6324b8 ("parisc: Switch to more fine grained lws locks") Cc: stable@vger.kernel.org # 5.10+ Signed-off-by: Helge Deller --- arch/parisc/include/asm/futex.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/parisc/include/asm/futex.h b/arch/parisc/include/asm/futex.h index 70cf8f0a7617..9cd4dd6e63ad 100644 --- a/arch/parisc/include/asm/futex.h +++ b/arch/parisc/include/asm/futex.h @@ -14,7 +14,7 @@ static inline void _futex_spin_lock(u32 __user *uaddr) { extern u32 lws_lock_start[]; - long index = ((long)uaddr & 0x3f8) >> 1; + long index = ((long)uaddr & 0x7f8) >> 1; arch_spinlock_t *s = (arch_spinlock_t *)&lws_lock_start[index]; preempt_disable(); arch_spin_lock(s); @@ -24,7 +24,7 @@ static inline void _futex_spin_unlock(u32 __user *uaddr) { extern u32 lws_lock_start[]; - long index = ((long)uaddr & 0x3f8) >> 1; + long index = ((long)uaddr & 0x7f8) >> 1; arch_spinlock_t *s = (arch_spinlock_t *)&lws_lock_start[index]; arch_spin_unlock(s); preempt_enable(); From cb8747b7d2a9e3d687a19a007575071d4b71cd05 Mon Sep 17 00:00:00 2001 From: Ismael Luceno Date: Mon, 15 Nov 2021 14:46:47 +0100 Subject: [PATCH 682/868] uapi: Fix undefined __always_inline on non-glibc systems This macro is defined by glibc itself, which makes the issue go unnoticed on those systems. On non-glibc systems it causes build failures on several utilities and libraries, like bpftool and objtool. Fixes: 1d509f2a6ebc ("x86/insn: Support big endian cross-compiles") Fixes: 2d7ce0e8a704 ("tools/virtio: more stubs") Fixes: 3fb321fde22d ("selftests/net: ipv6 flowlabel") Fixes: 50b3ed57dee9 ("selftests/bpf: test bpf flow dissection") Fixes: 9cacf81f8161 ("bpf: Remove extra lock_sock for TCP_ZEROCOPY_RECEIVE") Fixes: a4b2061242ec ("tools include uapi: Grab a copy of linux/in.h") Fixes: b12d6ec09730 ("bpf: btf: add btf print functionality") Fixes: c0dd967818a2 ("tools, include: Grab a copy of linux/erspan.h") Fixes: c4b6014e8bb0 ("tools: Add copy of perf_event.h to tools/include/linux/") Signed-off-by: Ismael Luceno Acked-by: Masami Hiramatsu Signed-off-by: Josh Poimboeuf Link: https://lore.kernel.org/r/20211115134647.1921-1-ismael@iodev.co.uk Cc: Martin Schwidefsky Cc: Vasily Gorbik --- include/uapi/linux/byteorder/big_endian.h | 1 + include/uapi/linux/byteorder/little_endian.h | 1 + 2 files changed, 2 insertions(+) diff --git a/include/uapi/linux/byteorder/big_endian.h b/include/uapi/linux/byteorder/big_endian.h index 2199adc6a6c2..80aa5c41a763 100644 --- a/include/uapi/linux/byteorder/big_endian.h +++ b/include/uapi/linux/byteorder/big_endian.h @@ -9,6 +9,7 @@ #define __BIG_ENDIAN_BITFIELD #endif +#include #include #include diff --git a/include/uapi/linux/byteorder/little_endian.h b/include/uapi/linux/byteorder/little_endian.h index 601c904fd5cd..cd98982e7523 100644 --- a/include/uapi/linux/byteorder/little_endian.h +++ b/include/uapi/linux/byteorder/little_endian.h @@ -9,6 +9,7 @@ #define __LITTLE_ENDIAN_BITFIELD #endif +#include #include #include From dcce50e6cc4d86a63dc0a9a6ee7d4f948ccd53a1 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Mon, 8 Nov 2021 14:35:59 -0800 Subject: [PATCH 683/868] compiler.h: Fix annotation macro misplacement with Clang When building with Clang and CONFIG_TRACE_BRANCH_PROFILING, there are a lot of unreachable warnings, like: arch/x86/kernel/traps.o: warning: objtool: handle_xfd_event()+0x134: unreachable instruction Without an input to the inline asm, 'volatile' is ignored for some reason and Clang feels free to move the reachable() annotation away from its intended location. Fix that by re-adding the counter value to the inputs. Fixes: f1069a8756b9 ("compiler.h: Avoid using inline asm operand modifiers") Fixes: c199f64ff93c ("instrumentation.h: Avoid using inline asm operand modifiers") Reported-by: kernel test robot Signed-off-by: Josh Poimboeuf Link: https://lore.kernel.org/r/0417e96909b97a406323409210de7bf13df0b170.1636410380.git.jpoimboe@redhat.com Cc: Peter Zijlstra Cc: x86@kernel.org Cc: Vasily Gorbik Cc: Miroslav Benes --- include/linux/compiler.h | 4 ++-- include/linux/instrumentation.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 3d5af56337bd..429dcebe2b99 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -121,7 +121,7 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, asm volatile(__stringify_label(c) ":\n\t" \ ".pushsection .discard.reachable\n\t" \ ".long " __stringify_label(c) "b - .\n\t" \ - ".popsection\n\t"); \ + ".popsection\n\t" : : "i" (c)); \ }) #define annotate_reachable() __annotate_reachable(__COUNTER__) @@ -129,7 +129,7 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, asm volatile(__stringify_label(c) ":\n\t" \ ".pushsection .discard.unreachable\n\t" \ ".long " __stringify_label(c) "b - .\n\t" \ - ".popsection\n\t"); \ + ".popsection\n\t" : : "i" (c)); \ }) #define annotate_unreachable() __annotate_unreachable(__COUNTER__) diff --git a/include/linux/instrumentation.h b/include/linux/instrumentation.h index fa2cd8c63dcc..24359b4a9605 100644 --- a/include/linux/instrumentation.h +++ b/include/linux/instrumentation.h @@ -11,7 +11,7 @@ asm volatile(__stringify(c) ": nop\n\t" \ ".pushsection .discard.instr_begin\n\t" \ ".long " __stringify(c) "b - .\n\t" \ - ".popsection\n\t"); \ + ".popsection\n\t" : : "i" (c)); \ }) #define instrumentation_begin() __instrumentation_begin(__COUNTER__) @@ -50,7 +50,7 @@ asm volatile(__stringify(c) ": nop\n\t" \ ".pushsection .discard.instr_end\n\t" \ ".long " __stringify(c) "b - .\n\t" \ - ".popsection\n\t"); \ + ".popsection\n\t" : : "i" (c)); \ }) #define instrumentation_end() __instrumentation_end(__COUNTER__) #else From 1c15b05baea71a5ff98235783e3e4ad227760876 Mon Sep 17 00:00:00 2001 From: Fernando Fernandez Mancera Date: Tue, 21 Dec 2021 12:13:45 +0100 Subject: [PATCH 684/868] bonding: fix ad_actor_system option setting to default When 802.3ad bond mode is configured the ad_actor_system option is set to "00:00:00:00:00:00". But when trying to set the all-zeroes MAC as actors' system address it was failing with EINVAL. An all-zeroes ethernet address is valid, only multicast addresses are not valid values. Fixes: 171a42c38c6e ("bonding: add netlink support for sys prio, actor sys mac, and port key") Signed-off-by: Fernando Fernandez Mancera Acked-by: Jay Vosburgh Link: https://lore.kernel.org/r/20211221111345.2462-1-ffmancera@riseup.net Signed-off-by: Jakub Kicinski --- Documentation/networking/bonding.rst | 11 ++++++----- drivers/net/bonding/bond_options.c | 2 +- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/Documentation/networking/bonding.rst b/Documentation/networking/bonding.rst index 31cfd7d674a6..c0a789b00806 100644 --- a/Documentation/networking/bonding.rst +++ b/Documentation/networking/bonding.rst @@ -196,11 +196,12 @@ ad_actor_sys_prio ad_actor_system In an AD system, this specifies the mac-address for the actor in - protocol packet exchanges (LACPDUs). The value cannot be NULL or - multicast. It is preferred to have the local-admin bit set for this - mac but driver does not enforce it. If the value is not given then - system defaults to using the masters' mac address as actors' system - address. + protocol packet exchanges (LACPDUs). The value cannot be a multicast + address. If the all-zeroes MAC is specified, bonding will internally + use the MAC of the bond itself. It is preferred to have the + local-admin bit set for this mac but driver does not enforce it. If + the value is not given then system defaults to using the masters' + mac address as actors' system address. This parameter has effect only in 802.3ad mode and is available through SysFs interface. diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c index a8fde3bc458f..b93337b5a721 100644 --- a/drivers/net/bonding/bond_options.c +++ b/drivers/net/bonding/bond_options.c @@ -1526,7 +1526,7 @@ static int bond_option_ad_actor_system_set(struct bonding *bond, mac = (u8 *)&newval->value; } - if (!is_valid_ether_addr(mac)) + if (is_multicast_ether_addr(mac)) goto err; netdev_dbg(bond->dev, "Setting ad_actor_system to %pM\n", mac); From aacb2016063dfa6da9378d76734cd9dc1e977619 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 13 Dec 2021 11:40:44 +0900 Subject: [PATCH 685/868] parisc: remove ARCH_DEFCONFIG Commit 2a86f6612164 ("kbuild: use KBUILD_DEFCONFIG as the fallback for DEFCONFIG_LIST") removed ARCH_DEFCONFIG because it does not make much sense. In the same development cycle, Commit ededa081ed20 ("parisc: Fix defconfig selection") added ARCH_DEFCONFIG for parisc. Please use KBUILD_DEFCONFIG in arch/*/Makefile for defconfig selection. Signed-off-by: Masahiro Yamada Acked-by: Helge Deller Signed-off-by: Helge Deller --- arch/parisc/Kconfig | 5 ----- 1 file changed, 5 deletions(-) diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index b2188da09c73..011dc32fdb4d 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -85,11 +85,6 @@ config MMU config STACK_GROWSUP def_bool y -config ARCH_DEFCONFIG - string - default "arch/parisc/configs/generic-32bit_defconfig" if !64BIT - default "arch/parisc/configs/generic-64bit_defconfig" if 64BIT - config GENERIC_LOCKBREAK bool default y From db6d6afe382de5a65d6ccf51253ab48b8e8336c3 Mon Sep 17 00:00:00 2001 From: Jiasheng Jiang Date: Wed, 22 Dec 2021 15:12:07 +0800 Subject: [PATCH 686/868] fjes: Check for error irq I find that platform_get_irq() will not always succeed. It will return error irq in case of the failure. Therefore, it might be better to check it if order to avoid the use of error irq. Fixes: 658d439b2292 ("fjes: Introduce FUJITSU Extended Socket Network Device driver") Signed-off-by: Jiasheng Jiang Signed-off-by: David S. Miller --- drivers/net/fjes/fjes_main.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/fjes/fjes_main.c b/drivers/net/fjes/fjes_main.c index b06c17ac8d4e..ebd287039a54 100644 --- a/drivers/net/fjes/fjes_main.c +++ b/drivers/net/fjes/fjes_main.c @@ -1262,6 +1262,11 @@ static int fjes_probe(struct platform_device *plat_dev) hw->hw_res.start = res->start; hw->hw_res.size = resource_size(res); hw->hw_res.irq = platform_get_irq(plat_dev, 0); + if (hw->hw_res.irq < 0) { + err = hw->hw_res.irq; + goto err_free_control_wq; + } + err = fjes_hw_init(&adapter->hw); if (err) goto err_free_control_wq; From cb93b3e11d405f20a405a07482d01147ef4934a3 Mon Sep 17 00:00:00 2001 From: Jiasheng Jiang Date: Wed, 22 Dec 2021 15:41:12 +0800 Subject: [PATCH 687/868] drivers: net: smc911x: Check for error irq Because platform_get_irq() could fail and return error irq. Therefore, it might be better to check it if order to avoid the use of error irq. Fixes: ae150435b59e ("smsc: Move the SMC (SMSC) drivers") Signed-off-by: Jiasheng Jiang Signed-off-by: David S. Miller --- drivers/net/ethernet/smsc/smc911x.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/ethernet/smsc/smc911x.c b/drivers/net/ethernet/smsc/smc911x.c index 89381f796985..dd6f69ced4ee 100644 --- a/drivers/net/ethernet/smsc/smc911x.c +++ b/drivers/net/ethernet/smsc/smc911x.c @@ -2072,6 +2072,11 @@ static int smc911x_drv_probe(struct platform_device *pdev) ndev->dma = (unsigned char)-1; ndev->irq = platform_get_irq(pdev, 0); + if (ndev->irq < 0) { + ret = ndev->irq; + goto release_both; + } + lp = netdev_priv(ndev); lp->netdev = ndev; #ifdef SMC_DYNAMIC_BUS_CONFIG From 99d7fbb5cedf598f67e8be106d6c7b8d91366aef Mon Sep 17 00:00:00 2001 From: Jiasheng Jiang Date: Wed, 22 Dec 2021 15:59:44 +0800 Subject: [PATCH 688/868] net: ks8851: Check for error irq Because platform_get_irq() could fail and return error irq. Therefore, it might be better to check it if order to avoid the use of error irq. Fixes: 797047f875b5 ("net: ks8851: Implement Parallel bus operations") Signed-off-by: Jiasheng Jiang Signed-off-by: David S. Miller --- drivers/net/ethernet/micrel/ks8851_par.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/micrel/ks8851_par.c b/drivers/net/ethernet/micrel/ks8851_par.c index 2e25798c610e..7f49042484bd 100644 --- a/drivers/net/ethernet/micrel/ks8851_par.c +++ b/drivers/net/ethernet/micrel/ks8851_par.c @@ -321,6 +321,8 @@ static int ks8851_probe_par(struct platform_device *pdev) return ret; netdev->irq = platform_get_irq(pdev, 0); + if (netdev->irq < 0) + return netdev->irq; return ks8851_probe_common(netdev, dev, msg_enable); } From d7f55471db2719629f773c2d6b5742a69595bfd3 Mon Sep 17 00:00:00 2001 From: Jackie Liu Date: Fri, 17 Dec 2021 10:07:54 +0800 Subject: [PATCH 689/868] memblock: fix memblock_phys_alloc() section mismatch error Fix modpost Section mismatch error in memblock_phys_alloc() [...] WARNING: modpost: vmlinux.o(.text.unlikely+0x1dcc): Section mismatch in reference from the function memblock_phys_alloc() to the function .init.text:memblock_phys_alloc_range() The function memblock_phys_alloc() references the function __init memblock_phys_alloc_range(). This is often because memblock_phys_alloc lacks a __init annotation or the annotation of memblock_phys_alloc_range is wrong. ERROR: modpost: Section mismatches detected. Set CONFIG_SECTION_MISMATCH_WARN_ONLY=y to allow them. [...] memblock_phys_alloc() is a one-line wrapper, make it __always_inline to avoid these section mismatches. Reported-by: k2ci Suggested-by: Mike Rapoport Signed-off-by: Jackie Liu [rppt: slightly massaged changelog ] Signed-off-by: Mike Rapoport Link: https://lore.kernel.org/r/20211217020754.2874872-1-liu.yun@linux.dev --- include/linux/memblock.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 8adcf1fa8096..9dc7cb239d21 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -405,8 +405,8 @@ phys_addr_t memblock_alloc_range_nid(phys_addr_t size, phys_addr_t end, int nid, bool exact_nid); phys_addr_t memblock_phys_alloc_try_nid(phys_addr_t size, phys_addr_t align, int nid); -static inline phys_addr_t memblock_phys_alloc(phys_addr_t size, - phys_addr_t align) +static __always_inline phys_addr_t memblock_phys_alloc(phys_addr_t size, + phys_addr_t align) { return memblock_phys_alloc_range(size, align, 0, MEMBLOCK_ALLOC_ACCESSIBLE); From b6fd77472dea76b7a2bad3a338ade920152972b8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 22 Dec 2021 16:53:50 +0200 Subject: [PATCH 690/868] ALSA: hda/hdmi: Disable silent stream on GLK MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The silent stream stuff recurses back into i915 audio component .get_power() from the .pin_eld_notify() hook. On GLK this will deadlock as i915 may already be holding the relevant modeset locks during .pin_eld_notify() and the GLK audio vs. CDCLK workaround will try to grab the same locks from .get_power(). Until someone comes up with a better fix just disable the silent stream support on GLK. Cc: stable@vger.kernel.org Cc: Harsha Priya Cc: Emmanuel Jillela Cc: Kai Vehmanen Cc: Takashi Iwai Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/2623 Fixes: 951894cf30f4 ("ALSA: hda/hdmi: Add Intel silent stream support") Signed-off-by: Ville Syrjälä Reviewed-by: Kai Vehmanen Link: https://lore.kernel.org/r/20211222145350.24342-1-ville.syrjala@linux.intel.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_hdmi.c | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c index 415701bd10ac..ffcde7409d2a 100644 --- a/sound/pci/hda/patch_hdmi.c +++ b/sound/pci/hda/patch_hdmi.c @@ -2947,7 +2947,8 @@ static int parse_intel_hdmi(struct hda_codec *codec) /* Intel Haswell and onwards; audio component with eld notifier */ static int intel_hsw_common_init(struct hda_codec *codec, hda_nid_t vendor_nid, - const int *port_map, int port_num, int dev_num) + const int *port_map, int port_num, int dev_num, + bool send_silent_stream) { struct hdmi_spec *spec; int err; @@ -2980,7 +2981,7 @@ static int intel_hsw_common_init(struct hda_codec *codec, hda_nid_t vendor_nid, * Enable silent stream feature, if it is enabled via * module param or Kconfig option */ - if (enable_silent_stream) + if (send_silent_stream) spec->send_silent_stream = true; return parse_intel_hdmi(codec); @@ -2988,12 +2989,18 @@ static int intel_hsw_common_init(struct hda_codec *codec, hda_nid_t vendor_nid, static int patch_i915_hsw_hdmi(struct hda_codec *codec) { - return intel_hsw_common_init(codec, 0x08, NULL, 0, 3); + return intel_hsw_common_init(codec, 0x08, NULL, 0, 3, + enable_silent_stream); } static int patch_i915_glk_hdmi(struct hda_codec *codec) { - return intel_hsw_common_init(codec, 0x0b, NULL, 0, 3); + /* + * Silent stream calls audio component .get_power() from + * .pin_eld_notify(). On GLK this will deadlock in i915 due + * to the audio vs. CDCLK workaround. + */ + return intel_hsw_common_init(codec, 0x0b, NULL, 0, 3, false); } static int patch_i915_icl_hdmi(struct hda_codec *codec) @@ -3004,7 +3011,8 @@ static int patch_i915_icl_hdmi(struct hda_codec *codec) */ static const int map[] = {0x0, 0x4, 0x6, 0x8, 0xa, 0xb}; - return intel_hsw_common_init(codec, 0x02, map, ARRAY_SIZE(map), 3); + return intel_hsw_common_init(codec, 0x02, map, ARRAY_SIZE(map), 3, + enable_silent_stream); } static int patch_i915_tgl_hdmi(struct hda_codec *codec) @@ -3016,7 +3024,8 @@ static int patch_i915_tgl_hdmi(struct hda_codec *codec) static const int map[] = {0x4, 0x6, 0x8, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf}; int ret; - ret = intel_hsw_common_init(codec, 0x02, map, ARRAY_SIZE(map), 4); + ret = intel_hsw_common_init(codec, 0x02, map, ARRAY_SIZE(map), 4, + enable_silent_stream); if (!ret) { struct hdmi_spec *spec = codec->spec; From 385f287f9853da402d94278e59f594501c1d1dad Mon Sep 17 00:00:00 2001 From: Libin Yang Date: Tue, 21 Dec 2021 09:08:16 +0800 Subject: [PATCH 691/868] ALSA: hda: intel-sdw-acpi: harden detection of controller MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The existing code currently sets a pointer to an ACPI handle before checking that it's actually a SoundWire controller. This can lead to issues where the graph walk continues and eventually fails, but the pointer was set already. This patch changes the logic so that the information provided to the caller is set when a controller is found. Reviewed-by: Péter Ujfalusi Signed-off-by: Libin Yang Signed-off-by: Pierre-Louis Bossart Signed-off-by: Bard Liao Link: https://lore.kernel.org/r/20211221010817.23636-2-yung-chuan.liao@linux.intel.com Signed-off-by: Takashi Iwai --- sound/hda/intel-sdw-acpi.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sound/hda/intel-sdw-acpi.c b/sound/hda/intel-sdw-acpi.c index c0123bc31c0d..ba8a872a2901 100644 --- a/sound/hda/intel-sdw-acpi.c +++ b/sound/hda/intel-sdw-acpi.c @@ -132,8 +132,6 @@ static acpi_status sdw_intel_acpi_cb(acpi_handle handle, u32 level, return AE_NOT_FOUND; } - info->handle = handle; - /* * On some Intel platforms, multiple children of the HDAS * device can be found, but only one of them is the SoundWire @@ -144,6 +142,9 @@ static acpi_status sdw_intel_acpi_cb(acpi_handle handle, u32 level, if (FIELD_GET(GENMASK(31, 28), adr) != SDW_LINK_TYPE) return AE_OK; /* keep going */ + /* found the correct SoundWire controller */ + info->handle = handle; + /* device found, stop namespace walk */ return AE_CTRL_TERMINATE; } From 78ea40efb48e978756db2ce45fcfa55bac056b91 Mon Sep 17 00:00:00 2001 From: Libin Yang Date: Tue, 21 Dec 2021 09:08:17 +0800 Subject: [PATCH 692/868] ALSA: hda: intel-sdw-acpi: go through HDAS ACPI at max depth of 2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the HDAS ACPI scope, the SoundWire may not be the direct child of HDAS. It needs to go through the ACPI table at max depth of 2 to find the SoundWire device from HDAS. Reviewed-by: Péter Ujfalusi Signed-off-by: Libin Yang Signed-off-by: Pierre-Louis Bossart Signed-off-by: Bard Liao Link: https://lore.kernel.org/r/20211221010817.23636-3-yung-chuan.liao@linux.intel.com Signed-off-by: Takashi Iwai --- sound/hda/intel-sdw-acpi.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/sound/hda/intel-sdw-acpi.c b/sound/hda/intel-sdw-acpi.c index ba8a872a2901..b7758dbe2371 100644 --- a/sound/hda/intel-sdw-acpi.c +++ b/sound/hda/intel-sdw-acpi.c @@ -165,8 +165,14 @@ int sdw_intel_acpi_scan(acpi_handle *parent_handle, acpi_status status; info->handle = NULL; + /* + * In the HDAS ACPI scope, 'SNDW' may be either the child of + * 'HDAS' or the grandchild of 'HDAS'. So let's go through + * the ACPI from 'HDAS' at max depth of 2 to find the 'SNDW' + * device. + */ status = acpi_walk_namespace(ACPI_TYPE_DEVICE, - parent_handle, 1, + parent_handle, 2, sdw_intel_acpi_cb, NULL, info, NULL); if (ACPI_FAILURE(status) || info->handle == NULL) From 39a8fc4971a00d22536aeb7d446ee4a97810611b Mon Sep 17 00:00:00 2001 From: Jaroslav Kysela Date: Sat, 18 Dec 2021 13:39:25 +0100 Subject: [PATCH 693/868] ALSA: rawmidi - fix the uninitalized user_pversion The user_pversion was uninitialized for the user space file structure in the open function, because the file private structure use kmalloc for the allocation. The kernel ALSA sequencer code clears the file structure, so no additional fixes are required. Cc: stable@kernel.org Cc: broonie@kernel.org BugLink: https://github.com/alsa-project/alsa-lib/issues/178 Fixes: 09d23174402d ("ALSA: rawmidi: introduce SNDRV_RAWMIDI_IOCTL_USER_PVERSION") Reported-by: syzbot+88412ee8811832b00dbe@syzkaller.appspotmail.com Signed-off-by: Jaroslav Kysela Link: https://lore.kernel.org/r/20211218123925.2583847-1-perex@perex.cz Signed-off-by: Takashi Iwai --- sound/core/rawmidi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/core/rawmidi.c b/sound/core/rawmidi.c index 6f30231bdb88..befa9809ff00 100644 --- a/sound/core/rawmidi.c +++ b/sound/core/rawmidi.c @@ -447,6 +447,7 @@ static int snd_rawmidi_open(struct inode *inode, struct file *file) err = -ENOMEM; goto __error; } + rawmidi_file->user_pversion = 0; init_waitqueue_entry(&wait, current); add_wait_queue(&rmidi->open_wait, &wait); while (1) { From edca7cc4b0accfa69dc032442fe0684e59c691b8 Mon Sep 17 00:00:00 2001 From: Werner Sembach Date: Wed, 15 Dec 2021 20:16:46 +0100 Subject: [PATCH 694/868] ALSA: hda/realtek: Fix quirk for Clevo NJ51CU The Clevo NJ51CU comes either with the ALC293 or the ALC256 codec, but uses the 0x8686 subproduct id in both cases. The ALC256 codec needs a different quirk for the headset microphone working and and edditional quirk for sound working after suspend and resume. When waking up from s3 suspend the Coef 0x10 is set to 0x0220 instead of 0x0020 on the ALC256 codec. Setting the value manually makes the sound work again. This patch does this automatically. [ minor coding style fix by tiwai ] Signed-off-by: Werner Sembach Fixes: b5acfe152abaa ("ALSA: hda/realtek: Add some Clove SSID in the ALC293(ALC1220)") Cc: Link: https://lore.kernel.org/r/20211215191646.844644-1-wse@tuxedocomputers.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index e59ff75eea75..28255e752c4a 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -6546,6 +6546,23 @@ static void alc233_fixup_no_audio_jack(struct hda_codec *codec, alc_process_coef_fw(codec, alc233_fixup_no_audio_jack_coefs); } +static void alc256_fixup_mic_no_presence_and_resume(struct hda_codec *codec, + const struct hda_fixup *fix, + int action) +{ + /* + * The Clevo NJ51CU comes either with the ALC293 or the ALC256 codec, + * but uses the 0x8686 subproduct id in both cases. The ALC256 codec + * needs an additional quirk for sound working after suspend and resume. + */ + if (codec->core.vendor_id == 0x10ec0256) { + alc_update_coef_idx(codec, 0x10, 1<<9, 0); + snd_hda_codec_set_pincfg(codec, 0x19, 0x04a11120); + } else { + snd_hda_codec_set_pincfg(codec, 0x1a, 0x04a1113c); + } +} + enum { ALC269_FIXUP_GPIO2, ALC269_FIXUP_SONY_VAIO, @@ -6766,6 +6783,7 @@ enum { ALC256_FIXUP_SET_COEF_DEFAULTS, ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE, ALC233_FIXUP_NO_AUDIO_JACK, + ALC256_FIXUP_MIC_NO_PRESENCE_AND_RESUME, }; static const struct hda_fixup alc269_fixups[] = { @@ -8490,6 +8508,12 @@ static const struct hda_fixup alc269_fixups[] = { .type = HDA_FIXUP_FUNC, .v.func = alc233_fixup_no_audio_jack, }, + [ALC256_FIXUP_MIC_NO_PRESENCE_AND_RESUME] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc256_fixup_mic_no_presence_and_resume, + .chained = true, + .chain_id = ALC269_FIXUP_HEADSET_MODE_NO_HP_MIC + }, }; static const struct snd_pci_quirk alc269_fixup_tbl[] = { @@ -8831,7 +8855,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1558, 0x8562, "Clevo NH[57][0-9]RZ[Q]", ALC269_FIXUP_DMIC), SND_PCI_QUIRK(0x1558, 0x8668, "Clevo NP50B[BE]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0x8680, "Clevo NJ50LU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), - SND_PCI_QUIRK(0x1558, 0x8686, "Clevo NH50[CZ]U", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x8686, "Clevo NH50[CZ]U", ALC256_FIXUP_MIC_NO_PRESENCE_AND_RESUME), SND_PCI_QUIRK(0x1558, 0x8a20, "Clevo NH55DCQ-Y", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0x8a51, "Clevo NH70RCQ-Y", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0x8d50, "Clevo NH55RCQ-M", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), From bdf1b5c3884f6a0dc91b0dbdb8c3b7d205f449e0 Mon Sep 17 00:00:00 2001 From: Jiasheng Jiang Date: Mon, 20 Dec 2021 21:56:03 +0800 Subject: [PATCH 695/868] sfc: Check null pointer of rx_queue->page_ring Because of the possible failure of the kcalloc, it should be better to set rx_queue->page_ptr_mask to 0 when it happens in order to maintain the consistency. Fixes: 5a6681e22c14 ("sfc: separate out SFC4000 ("Falcon") support into new sfc-falcon driver") Signed-off-by: Jiasheng Jiang Acked-by: Martin Habets Link: https://lore.kernel.org/r/20211220135603.954944-1-jiasheng@iscas.ac.cn Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/sfc/rx_common.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/sfc/rx_common.c b/drivers/net/ethernet/sfc/rx_common.c index 68fc7d317693..0983abc0cc5f 100644 --- a/drivers/net/ethernet/sfc/rx_common.c +++ b/drivers/net/ethernet/sfc/rx_common.c @@ -150,7 +150,10 @@ static void efx_init_rx_recycle_ring(struct efx_rx_queue *rx_queue) efx->rx_bufs_per_page); rx_queue->page_ring = kcalloc(page_ring_size, sizeof(*rx_queue->page_ring), GFP_KERNEL); - rx_queue->page_ptr_mask = page_ring_size - 1; + if (!rx_queue->page_ring) + rx_queue->page_ptr_mask = 0; + else + rx_queue->page_ptr_mask = page_ring_size - 1; } static void efx_fini_rx_recycle_ring(struct efx_rx_queue *rx_queue) From 9b8bdd1eb5890aeeab7391dddcf8bd51f7b07216 Mon Sep 17 00:00:00 2001 From: Jiasheng Jiang Date: Mon, 20 Dec 2021 22:03:44 +0800 Subject: [PATCH 696/868] sfc: falcon: Check null pointer of rx_queue->page_ring Because of the possible failure of the kcalloc, it should be better to set rx_queue->page_ptr_mask to 0 when it happens in order to maintain the consistency. Fixes: 5a6681e22c14 ("sfc: separate out SFC4000 ("Falcon") support into new sfc-falcon driver") Signed-off-by: Jiasheng Jiang Acked-by: Martin Habets Link: https://lore.kernel.org/r/20211220140344.978408-1-jiasheng@iscas.ac.cn Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/sfc/falcon/rx.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/sfc/falcon/rx.c b/drivers/net/ethernet/sfc/falcon/rx.c index 966f13e7475d..11a6aee852e9 100644 --- a/drivers/net/ethernet/sfc/falcon/rx.c +++ b/drivers/net/ethernet/sfc/falcon/rx.c @@ -728,7 +728,10 @@ static void ef4_init_rx_recycle_ring(struct ef4_nic *efx, efx->rx_bufs_per_page); rx_queue->page_ring = kcalloc(page_ring_size, sizeof(*rx_queue->page_ring), GFP_KERNEL); - rx_queue->page_ptr_mask = page_ring_size - 1; + if (!rx_queue->page_ring) + rx_queue->page_ptr_mask = 0; + else + rx_queue->page_ptr_mask = page_ring_size - 1; } void ef4_init_rx_queue(struct ef4_rx_queue *rx_queue) From 983d8e60f50806f90534cc5373d0ce867e5aaf79 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Wed, 22 Dec 2021 14:19:18 -0800 Subject: [PATCH 697/868] xfs: map unwritten blocks in XFS_IOC_{ALLOC,FREE}SP just like fallocate The old ALLOCSP/FREESP ioctls in XFS can be used to preallocate space at the end of files, just like fallocate and RESVSP. Make the behavior consistent with the other ioctls. Reported-by: Kirill Tkhai Signed-off-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner Reviewed-by: Eric Sandeen --- fs/xfs/xfs_ioctl.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index 174cd8950cb6..bc85e045845d 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -687,7 +687,8 @@ xfs_ioc_space( if (bf->l_start > XFS_ISIZE(ip)) { error = xfs_alloc_file_space(ip, XFS_ISIZE(ip), - bf->l_start - XFS_ISIZE(ip), 0); + bf->l_start - XFS_ISIZE(ip), + XFS_BMAPI_PREALLOC); if (error) goto out_unlock; } From 8035b1a2a37a29d8c717ef84fca8fe7278bc9f03 Mon Sep 17 00:00:00 2001 From: Pavel Skripkin Date: Tue, 21 Dec 2021 23:10:36 +0300 Subject: [PATCH 698/868] asix: fix uninit-value in asix_mdio_read() asix_read_cmd() may read less than sizeof(smsr) bytes and in this case smsr will be uninitialized. Fail log: BUG: KMSAN: uninit-value in asix_check_host_enable drivers/net/usb/asix_common.c:82 [inline] BUG: KMSAN: uninit-value in asix_check_host_enable drivers/net/usb/asix_common.c:82 [inline] drivers/net/usb/asix_common.c:497 BUG: KMSAN: uninit-value in asix_mdio_read+0x3c1/0xb00 drivers/net/usb/asix_common.c:497 drivers/net/usb/asix_common.c:497 asix_check_host_enable drivers/net/usb/asix_common.c:82 [inline] asix_check_host_enable drivers/net/usb/asix_common.c:82 [inline] drivers/net/usb/asix_common.c:497 asix_mdio_read+0x3c1/0xb00 drivers/net/usb/asix_common.c:497 drivers/net/usb/asix_common.c:497 Fixes: d9fe64e51114 ("net: asix: Add in_pm parameter") Reported-and-tested-by: syzbot+f44badb06036334e867a@syzkaller.appspotmail.com Reviewed-by: Andrew Lunn Signed-off-by: Pavel Skripkin Link: https://lore.kernel.org/r/8966e3b514edf39857dd93603fc79ec02e000a75.1640117288.git.paskripkin@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/usb/asix_common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/usb/asix_common.c b/drivers/net/usb/asix_common.c index 42ba4af68090..06823d7141b6 100644 --- a/drivers/net/usb/asix_common.c +++ b/drivers/net/usb/asix_common.c @@ -77,7 +77,7 @@ static int asix_check_host_enable(struct usbnet *dev, int in_pm) 0, 0, 1, &smsr, in_pm); if (ret == -ENODEV) break; - else if (ret < 0) + else if (ret < sizeof(smsr)) continue; else if (smsr & AX_HOST_EN) break; From d1652b70d07cc3eed96210c876c4879e1655f20e Mon Sep 17 00:00:00 2001 From: Pavel Skripkin Date: Tue, 21 Dec 2021 23:10:43 +0300 Subject: [PATCH 699/868] asix: fix wrong return value in asix_check_host_enable() If asix_read_cmd() returns 0 on 30th interation, 0 will be returned from asix_check_host_enable(), which is logically wrong. Fix it by returning -ETIMEDOUT explicitly if we have exceeded 30 iterations Also, replaced 30 with #define as suggested by Andrew Fixes: a786e3195d6a ("net: asix: fix uninit value bugs") Reported-by: Andrew Lunn Signed-off-by: Pavel Skripkin Reviewed-by: Andrew Lunn Link: https://lore.kernel.org/r/ecd3470ce6c2d5697ac635d0d3b14a47defb4acb.1640117288.git.paskripkin@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/usb/asix_common.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/usb/asix_common.c b/drivers/net/usb/asix_common.c index 06823d7141b6..71682970be58 100644 --- a/drivers/net/usb/asix_common.c +++ b/drivers/net/usb/asix_common.c @@ -9,6 +9,8 @@ #include "asix.h" +#define AX_HOST_EN_RETRIES 30 + int asix_read_cmd(struct usbnet *dev, u8 cmd, u16 value, u16 index, u16 size, void *data, int in_pm) { @@ -68,7 +70,7 @@ static int asix_check_host_enable(struct usbnet *dev, int in_pm) int i, ret; u8 smsr; - for (i = 0; i < 30; ++i) { + for (i = 0; i < AX_HOST_EN_RETRIES; ++i) { ret = asix_set_sw_mii(dev, in_pm); if (ret == -ENODEV || ret == -ETIMEDOUT) break; @@ -83,7 +85,7 @@ static int asix_check_host_enable(struct usbnet *dev, int in_pm) break; } - return ret; + return i >= AX_HOST_EN_RETRIES ? -ETIMEDOUT : ret; } static void reset_asix_rx_fixup_info(struct asix_rx_fixup_info *rx) From 4d625a97a7e96be016382e3bb0a3cead05fec153 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 21 Dec 2021 09:54:40 -0500 Subject: [PATCH 700/868] drm/amdgpu: fix runpm documentation It's not only supported by HG/PX laptops. It's supported by all dGPUs which supports BOCO/BACO functionality (runtime D3). BOCO - Bus Off, Chip Off. The entire chip is powered off. This is controlled by ACPI. BACO - Bus Active, Chip Off. The chip still shows up on the PCI bus, but the device itself is powered down. v2: fix missed HG/PX reference Reviewed-by: Evan Quan Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index ad95de6399af..73ac02372827 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -328,10 +328,11 @@ module_param_named(aspm, amdgpu_aspm, int, 0444); /** * DOC: runpm (int) - * Override for runtime power management control for dGPUs in PX/HG laptops. The amdgpu driver can dynamically power down - * the dGPU on PX/HG laptops when it is idle. The default is -1 (auto enable). Setting the value to 0 disables this functionality. + * Override for runtime power management control for dGPUs. The amdgpu driver can dynamically power down + * the dGPUs when they are idle if supported. The default is -1 (auto enable). + * Setting the value to 0 disables this functionality. */ -MODULE_PARM_DESC(runpm, "PX runtime pm (2 = force enable with BAMACO, 1 = force enable with BACO, 0 = disable, -1 = PX only default)"); +MODULE_PARM_DESC(runpm, "PX runtime pm (2 = force enable with BAMACO, 1 = force enable with BACO, 0 = disable, -1 = auto)"); module_param_named(runpm, amdgpu_runtime_pm, int, 0444); /** From 7b9762a5e8837b92a027d58d396a9d27f6440c36 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 22 Dec 2021 20:26:56 -0700 Subject: [PATCH 701/868] io_uring: zero iocb->ki_pos for stream file types io_uring supports using offset == -1 for using the current file position, and we read that in as part of read/write command setup. For the non-iter read/write types we pass in NULL for the position pointer, but for the iter types we should not be passing any anything but 0 for the position for a stream. Clear kiocb->ki_pos if the file is a stream, don't leave it as -1. If we do, then the request will error with -ESPIPE. Fixes: ba04291eb66e ("io_uring: allow use of offset == -1 to mean file position") Link: https://github.com/axboe/liburing/discussions/501 Reported-by: Samuel Williams Signed-off-by: Jens Axboe --- fs/io_uring.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index d5ab0e9a3f29..fb2a0cb4aaf8 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -2891,9 +2891,13 @@ static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe) req->flags |= io_file_get_flags(file) << REQ_F_SUPPORT_NOWAIT_BIT; kiocb->ki_pos = READ_ONCE(sqe->off); - if (kiocb->ki_pos == -1 && !(file->f_mode & FMODE_STREAM)) { - req->flags |= REQ_F_CUR_POS; - kiocb->ki_pos = file->f_pos; + if (kiocb->ki_pos == -1) { + if (!(file->f_mode & FMODE_STREAM)) { + req->flags |= REQ_F_CUR_POS; + kiocb->ki_pos = file->f_pos; + } else { + kiocb->ki_pos = 0; + } } kiocb->ki_flags = iocb_flags(file); ret = kiocb_set_rw_flags(kiocb, READ_ONCE(sqe->rw_flags)); From 1b8d0300a3e9f216ae4901bab886db7299899ec6 Mon Sep 17 00:00:00 2001 From: Lixiaokeng Date: Mon, 20 Dec 2021 19:39:06 +0800 Subject: [PATCH 702/868] scsi: libiscsi: Fix UAF in iscsi_conn_get_param()/iscsi_conn_teardown() |- iscsi_if_destroy_conn |-dev_attr_show |-iscsi_conn_teardown |-spin_lock_bh |-iscsi_sw_tcp_conn_get_param |-kfree(conn->persistent_address) |-iscsi_conn_get_param |-kfree(conn->local_ipaddr) ==>|-read persistent_address ==>|-read local_ipaddr |-spin_unlock_bh When iscsi_conn_teardown() and iscsi_conn_get_param() happen in parallel, a UAF may be triggered. Link: https://lore.kernel.org/r/046ec8a0-ce95-d3fc-3235-666a7c65b224@huawei.com Reported-by: Lu Tixiong Reviewed-by: Mike Christie Reviewed-by: Lee Duncan Signed-off-by: Lixiaokeng Signed-off-by: Linfeilong Signed-off-by: Martin K. Petersen --- drivers/scsi/libiscsi.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c index 284b939fb1ea..059dae8909ee 100644 --- a/drivers/scsi/libiscsi.c +++ b/drivers/scsi/libiscsi.c @@ -3100,6 +3100,8 @@ void iscsi_conn_teardown(struct iscsi_cls_conn *cls_conn) { struct iscsi_conn *conn = cls_conn->dd_data; struct iscsi_session *session = conn->session; + char *tmp_persistent_address = conn->persistent_address; + char *tmp_local_ipaddr = conn->local_ipaddr; del_timer_sync(&conn->transport_timer); @@ -3121,8 +3123,6 @@ void iscsi_conn_teardown(struct iscsi_cls_conn *cls_conn) spin_lock_bh(&session->frwd_lock); free_pages((unsigned long) conn->data, get_order(ISCSI_DEF_MAX_RECV_SEG_LEN)); - kfree(conn->persistent_address); - kfree(conn->local_ipaddr); /* regular RX path uses back_lock */ spin_lock_bh(&session->back_lock); kfifo_in(&session->cmdpool.queue, (void*)&conn->login_task, @@ -3134,6 +3134,8 @@ void iscsi_conn_teardown(struct iscsi_cls_conn *cls_conn) mutex_unlock(&session->eh_mutex); iscsi_destroy_conn(cls_conn); + kfree(tmp_persistent_address); + kfree(tmp_local_ipaddr); } EXPORT_SYMBOL_GPL(iscsi_conn_teardown); From 142c779d05d1fef75134c3cb63f52ccbc96d9e1f Mon Sep 17 00:00:00 2001 From: Alexey Makhalov Date: Mon, 20 Dec 2021 11:05:14 -0800 Subject: [PATCH 703/868] scsi: vmw_pvscsi: Set residual data length conditionally The PVSCSI implementation in the VMware hypervisor under specific configuration ("SCSI Bus Sharing" set to "Physical") returns zero dataLen in the completion descriptor for READ CAPACITY(16). As a result, the kernel can not detect proper disk geometry. This can be recognized by the kernel message: [ 0.776588] sd 1:0:0:0: [sdb] Sector size 0 reported, assuming 512. The PVSCSI implementation in QEMU does not set dataLen at all, keeping it zeroed. This leads to a boot hang as was reported by Shmulik Ladkani. It is likely that the controller returns the garbage at the end of the buffer. Residual length should be set by the driver in that case. The SCSI layer will erase corresponding data. See commit bdb2b8cab439 ("[SCSI] erase invalid data returned by device") for details. Commit e662502b3a78 ("scsi: vmw_pvscsi: Set correct residual data length") introduced the issue by setting residual length unconditionally, causing the SCSI layer to erase the useful payload beyond dataLen when this value is returned as 0. As a result, considering existing issues in implementations of PVSCSI controllers, we do not want to call scsi_set_resid() when dataLen == 0. Calling scsi_set_resid() has no effect if dataLen equals buffer length. Link: https://lore.kernel.org/lkml/20210824120028.30d9c071@blondie/ Link: https://lore.kernel.org/r/20211220190514.55935-1-amakhalov@vmware.com Fixes: e662502b3a78 ("scsi: vmw_pvscsi: Set correct residual data length") Cc: Matt Wang Cc: Martin K. Petersen Cc: Vishal Bhakta Cc: VMware PV-Drivers Cc: James E.J. Bottomley Cc: linux-scsi@vger.kernel.org Cc: stable@vger.kernel.org Reported-and-suggested-by: Shmulik Ladkani Signed-off-by: Alexey Makhalov Signed-off-by: Martin K. Petersen --- drivers/scsi/vmw_pvscsi.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/vmw_pvscsi.c b/drivers/scsi/vmw_pvscsi.c index c2ba65224633..1f037b8ab904 100644 --- a/drivers/scsi/vmw_pvscsi.c +++ b/drivers/scsi/vmw_pvscsi.c @@ -586,9 +586,12 @@ static void pvscsi_complete_request(struct pvscsi_adapter *adapter, * Commands like INQUIRY may transfer less data than * requested by the initiator via bufflen. Set residual * count to make upper layer aware of the actual amount - * of data returned. + * of data returned. There are cases when controller + * returns zero dataLen with non zero data - do not set + * residual count in that case. */ - scsi_set_resid(cmd, scsi_bufflen(cmd) - e->dataLen); + if (e->dataLen && (e->dataLen < scsi_bufflen(cmd))) + scsi_set_resid(cmd, scsi_bufflen(cmd) - e->dataLen); cmd->result = (DID_OK << 16); break; From 6b8b42585886c59a008015083282aae434349094 Mon Sep 17 00:00:00 2001 From: Miaoqian Lin Date: Wed, 22 Dec 2021 06:54:53 +0000 Subject: [PATCH 704/868] net/mlx5: DR, Fix NULL vs IS_ERR checking in dr_domain_init_resources The mlx5_get_uars_page() function returns error pointers. Using IS_ERR() to check the return value to fix this. Fixes: 4ec9e7b02697 ("net/mlx5: DR, Expose steering domain functionality") Signed-off-by: Miaoqian Lin Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c index 8cbd36c82b3b..f6e6d9209766 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c @@ -2,6 +2,7 @@ /* Copyright (c) 2019 Mellanox Technologies. */ #include +#include #include "dr_types.h" #define DR_DOMAIN_SW_STEERING_SUPPORTED(dmn, dmn_type) \ @@ -72,9 +73,9 @@ static int dr_domain_init_resources(struct mlx5dr_domain *dmn) } dmn->uar = mlx5_get_uars_page(dmn->mdev); - if (!dmn->uar) { + if (IS_ERR(dmn->uar)) { mlx5dr_err(dmn, "Couldn't allocate UAR\n"); - ret = -ENOMEM; + ret = PTR_ERR(dmn->uar); goto clean_pd; } From 624bf42c2e3930acca9fcfc340b2fa38e712da84 Mon Sep 17 00:00:00 2001 From: Yevgeny Kliteynik Date: Sun, 12 Dec 2021 16:19:58 +0200 Subject: [PATCH 705/868] net/mlx5: DR, Fix querying eswitch manager vport for ECPF On BlueField the E-Switch manager is the ECPF (vport 0xFFFE), but when querying capabilities of ECPF eswitch manager, need to query vport 0 with other_vport = 0. Fixes: 9091b821aaa4 ("net/mlx5: DR, Handle eswitch manager and uplink vports separately") Signed-off-by: Yevgeny Kliteynik Reviewed-by: Alex Vesker Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c index f6e6d9209766..c54cc45f63dc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c @@ -164,9 +164,7 @@ static int dr_domain_query_vport(struct mlx5dr_domain *dmn, static int dr_domain_query_esw_mngr(struct mlx5dr_domain *dmn) { - return dr_domain_query_vport(dmn, - dmn->info.caps.is_ecpf ? MLX5_VPORT_ECPF : 0, - false, + return dr_domain_query_vport(dmn, 0, false, &dmn->info.caps.vports.esw_manager_caps); } From 26a7993c93a74a3fee83a37b46e00e69e49e57c2 Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Tue, 26 Oct 2021 08:25:19 +0300 Subject: [PATCH 706/868] net/mlx5: Use first online CPU instead of hard coded CPU Hard coded CPU (0 in our case) might be offline. Hence, use the first online CPU instead. Fixes: f891b7cdbdcd ("net/mlx5: Enable single IRQ for PCI Function") Signed-off-by: Shay Drory Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c index 830444f927d4..0e84c005d160 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c @@ -398,7 +398,7 @@ irq_pool_request_vector(struct mlx5_irq_pool *pool, int vecidx, cpumask_copy(irq->mask, affinity); if (!irq_pool_is_sf_pool(pool) && !pool->xa_num_irqs.max && cpumask_empty(irq->mask)) - cpumask_set_cpu(0, irq->mask); + cpumask_set_cpu(cpumask_first(cpu_online_mask), irq->mask); irq_set_affinity_hint(irq->irqn, irq->mask); unlock: mutex_unlock(&pool->lock); From aa968f922039706f6d13e8870b49e424d0a8d9ad Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Wed, 24 Nov 2021 23:10:57 +0200 Subject: [PATCH 707/868] net/mlx5: Fix error print in case of IRQ request failed In case IRQ layer failed to find or to request irq, the driver is printing the first cpu of the provided affinity as part of the error print. Empty affinity is a valid input for the IRQ layer, and it is an error to call cpumask_first() on empty affinity. Remove the first cpu print from the error message. Fixes: c36326d38d93 ("net/mlx5: Round-Robin EQs over IRQs") Signed-off-by: Shay Drory Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c index 0e84c005d160..bcee30f5de0a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c @@ -356,8 +356,8 @@ static struct mlx5_irq *irq_pool_request_affinity(struct mlx5_irq_pool *pool, new_irq = irq_pool_create_irq(pool, affinity); if (IS_ERR(new_irq)) { if (!least_loaded_irq) { - mlx5_core_err(pool->dev, "Didn't find IRQ for cpu = %u\n", - cpumask_first(affinity)); + mlx5_core_err(pool->dev, "Didn't find a matching IRQ. err = %ld\n", + PTR_ERR(new_irq)); mutex_unlock(&pool->lock); return new_irq; } From 33de865f7bce3968676e43b0182af0a2dd359dae Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Tue, 23 Nov 2021 20:08:13 +0200 Subject: [PATCH 708/868] net/mlx5: Fix SF health recovery flow SF do not directly control the PCI device. During recovery flow SF should not be allowed to do pci disable or pci reset, its PF will do it. It fixes the following kernel trace: mlx5_core.sf mlx5_core.sf.25: mlx5_health_try_recover:387:(pid 40948): starting health recovery flow mlx5_core 0000:03:00.0: mlx5_pci_slot_reset was called mlx5_core 0000:03:00.0: wait vital counter value 0xab175 after 1 iterations mlx5_core.sf mlx5_core.sf.25: firmware version: 24.32.532 mlx5_core.sf mlx5_core.sf.23: mlx5_health_try_recover:387:(pid 40946): starting health recovery flow mlx5_core 0000:03:00.0: mlx5_pci_slot_reset was called mlx5_core 0000:03:00.0: wait vital counter value 0xab193 after 1 iterations mlx5_core.sf mlx5_core.sf.23: firmware version: 24.32.532 mlx5_core.sf mlx5_core.sf.25: mlx5_cmd_check:813:(pid 40948): ENABLE_HCA(0x104) op_mod(0x0) failed, status bad resource state(0x9), syndrome (0x658908) mlx5_core.sf mlx5_core.sf.25: mlx5_function_setup:1292:(pid 40948): enable hca failed mlx5_core.sf mlx5_core.sf.25: mlx5_health_try_recover:389:(pid 40948): health recovery failed Fixes: 1958fc2f0712 ("net/mlx5: SF, Add auxiliary device driver") Signed-off-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/main.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 7df9c7f8d9c8..65083496f913 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -1809,12 +1809,13 @@ void mlx5_disable_device(struct mlx5_core_dev *dev) int mlx5_recover_device(struct mlx5_core_dev *dev) { - int ret = -EIO; + if (!mlx5_core_is_sf(dev)) { + mlx5_pci_disable_device(dev); + if (mlx5_pci_slot_reset(dev->pdev) != PCI_ERS_RESULT_RECOVERED) + return -EIO; + } - mlx5_pci_disable_device(dev); - if (mlx5_pci_slot_reset(dev->pdev) == PCI_ERS_RESULT_RECOVERED) - ret = mlx5_load_one(dev); - return ret; + return mlx5_load_one(dev); } static struct pci_driver mlx5_core_driver = { From d671e109bd8548d067b27e39e183a484430bf102 Mon Sep 17 00:00:00 2001 From: Chris Mi Date: Tue, 14 Dec 2021 03:52:53 +0200 Subject: [PATCH 709/868] net/mlx5: Fix tc max supported prio for nic mode Only prio 1 is supported if firmware doesn't support ignore flow level for nic mode. The offending commit removed the check wrongly. Add it back. Fixes: 9a99c8f1253a ("net/mlx5e: E-Switch, Offload all chain 0 priorities when modify header and forward action is not supported") Signed-off-by: Chris Mi Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c index 97e5845b4cfd..d5e47630e284 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c @@ -121,6 +121,9 @@ u32 mlx5_chains_get_nf_ft_chain(struct mlx5_fs_chains *chains) u32 mlx5_chains_get_prio_range(struct mlx5_fs_chains *chains) { + if (!mlx5_chains_prios_supported(chains)) + return 1; + if (mlx5_chains_ignore_flow_level_supported(chains)) return UINT_MAX; From 918fc3855a6507a200e9cf22c20be852c0982687 Mon Sep 17 00:00:00 2001 From: Amir Tzin Date: Tue, 30 Nov 2021 16:05:44 +0200 Subject: [PATCH 710/868] net/mlx5e: Wrap the tx reporter dump callback to extract the sq Function mlx5e_tx_reporter_dump_sq() casts its void * argument to struct mlx5e_txqsq *, but in TX-timeout-recovery flow the argument is actually of type struct mlx5e_tx_timeout_ctx *. mlx5_core 0000:08:00.1 enp8s0f1: TX timeout detected mlx5_core 0000:08:00.1 enp8s0f1: TX timeout on queue: 1, SQ: 0x11ec, CQ: 0x146d, SQ Cons: 0x0 SQ Prod: 0x1, usecs since last trans: 21565000 BUG: stack guard page was hit at 0000000093f1a2de (stack is 00000000b66ea0dc..000000004d932dae) kernel stack overflow (page fault): 0000 [#1] SMP NOPTI CPU: 5 PID: 95 Comm: kworker/u20:1 Tainted: G W OE 5.13.0_mlnx #1 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 Workqueue: mlx5e mlx5e_tx_timeout_work [mlx5_core] RIP: 0010:mlx5e_tx_reporter_dump_sq+0xd3/0x180 [mlx5_core] Call Trace: mlx5e_tx_reporter_dump+0x43/0x1c0 [mlx5_core] devlink_health_do_dump.part.91+0x71/0xd0 devlink_health_report+0x157/0x1b0 mlx5e_reporter_tx_timeout+0xb9/0xf0 [mlx5_core] ? mlx5e_tx_reporter_err_cqe_recover+0x1d0/0x1d0 [mlx5_core] ? mlx5e_health_queue_dump+0xd0/0xd0 [mlx5_core] ? update_load_avg+0x19b/0x550 ? set_next_entity+0x72/0x80 ? pick_next_task_fair+0x227/0x340 ? finish_task_switch+0xa2/0x280 mlx5e_tx_timeout_work+0x83/0xb0 [mlx5_core] process_one_work+0x1de/0x3a0 worker_thread+0x2d/0x3c0 ? process_one_work+0x3a0/0x3a0 kthread+0x115/0x130 ? kthread_park+0x90/0x90 ret_from_fork+0x1f/0x30 --[ end trace 51ccabea504edaff ]--- RIP: 0010:mlx5e_tx_reporter_dump_sq+0xd3/0x180 PKRU: 55555554 Kernel panic - not syncing: Fatal exception Kernel Offset: disabled end Kernel panic - not syncing: Fatal exception To fix this bug add a wrapper for mlx5e_tx_reporter_dump_sq() which extracts the sq from struct mlx5e_tx_timeout_ctx and set it as the TX-timeout-recovery flow dump callback. Fixes: 5f29458b77d5 ("net/mlx5e: Support dump callback in TX reporter") Signed-off-by: Aya Levin Signed-off-by: Amir Tzin Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/en/reporter_tx.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c index 4f4bc8726ec4..614cd9477600 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c @@ -466,6 +466,14 @@ static int mlx5e_tx_reporter_dump_sq(struct mlx5e_priv *priv, struct devlink_fms return mlx5e_health_fmsg_named_obj_nest_end(fmsg); } +static int mlx5e_tx_reporter_timeout_dump(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg, + void *ctx) +{ + struct mlx5e_tx_timeout_ctx *to_ctx = ctx; + + return mlx5e_tx_reporter_dump_sq(priv, fmsg, to_ctx->sq); +} + static int mlx5e_tx_reporter_dump_all_sqs(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg) { @@ -561,7 +569,7 @@ int mlx5e_reporter_tx_timeout(struct mlx5e_txqsq *sq) to_ctx.sq = sq; err_ctx.ctx = &to_ctx; err_ctx.recover = mlx5e_tx_reporter_timeout_recover; - err_ctx.dump = mlx5e_tx_reporter_dump_sq; + err_ctx.dump = mlx5e_tx_reporter_timeout_dump; snprintf(err_str, sizeof(err_str), "TX timeout on queue: %d, SQ: 0x%x, CQ: 0x%x, SQ Cons: 0x%x SQ Prod: 0x%x, usecs since last trans: %u", sq->ch_ix, sq->sqn, sq->cq.mcq.cqn, sq->cc, sq->pc, From a0cb909644c36230a3c48904d14b91732de79fc0 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Mon, 13 Dec 2021 11:05:11 +0200 Subject: [PATCH 711/868] net/mlx5e: Fix skb memory leak when TC classifier action offloads are disabled When TC classifier action offloads are disabled (CONFIG_MLX5_CLS_ACT in Kconfig), the mlx5e_rep_tc_receive() function which is responsible for passing the skb to the stack (or freeing it) is defined as a nop, and results in leaking the skb memory. Replace the nop with a call to napi_gro_receive() to resolve the leak. Fixes: 28e7606fa8f1 ("net/mlx5e: Refactor rx handler of represetor device") Signed-off-by: Gal Pressman Reviewed-by: Ariel Levkovich Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.h b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.h index d6c7c81690eb..7c9dd3a75f8a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.h @@ -66,7 +66,7 @@ mlx5e_rep_setup_tc(struct net_device *dev, enum tc_setup_type type, static inline void mlx5e_rep_tc_receive(struct mlx5_cqe64 *cqe, struct mlx5e_rq *rq, - struct sk_buff *skb) {} + struct sk_buff *skb) { napi_gro_receive(rq->cq.napi, skb); } #endif /* CONFIG_MLX5_CLS_ACT */ From 17958d7cd731b977ae7d4af38d891c3a1235b5f1 Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Tue, 12 Oct 2021 19:40:09 +0300 Subject: [PATCH 712/868] net/mlx5e: Fix interoperability between XSK and ICOSQ recovery flow Both regular RQ and XSKRQ use the same ICOSQ for UMRs. When doing recovery for the ICOSQ, don't forget to deactivate XSKRQ. XSK can be opened and closed while channels are active, so a new mutex prevents the ICOSQ recovery from running at the same time. The ICOSQ recovery deactivates and reactivates XSKRQ, so any parallel change in XSK state would break consistency. As the regular RQ is running, it's not enough to just flush the recovery work, because it can be rescheduled. Fixes: be5323c8379f ("net/mlx5e: Report and recover from CQE error on ICOSQ") Signed-off-by: Maxim Mikityanskiy Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 2 ++ .../ethernet/mellanox/mlx5/core/en/health.h | 2 ++ .../mellanox/mlx5/core/en/reporter_rx.c | 35 ++++++++++++++++++- .../mellanox/mlx5/core/en/xsk/setup.c | 16 ++++++++- .../net/ethernet/mellanox/mlx5/core/en_main.c | 7 ++-- 5 files changed, 58 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index f0ac6b0d9653..f42067adc79d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -783,6 +783,8 @@ struct mlx5e_channel { DECLARE_BITMAP(state, MLX5E_CHANNEL_NUM_STATES); int ix; int cpu; + /* Sync between icosq recovery and XSK enable/disable. */ + struct mutex icosq_recovery_lock; }; struct mlx5e_ptp; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/health.h b/drivers/net/ethernet/mellanox/mlx5/core/en/health.h index d5b7110a4265..0107e4e73bb0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/health.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/health.h @@ -30,6 +30,8 @@ void mlx5e_reporter_rx_destroy(struct mlx5e_priv *priv); void mlx5e_reporter_icosq_cqe_err(struct mlx5e_icosq *icosq); void mlx5e_reporter_rq_cqe_err(struct mlx5e_rq *rq); void mlx5e_reporter_rx_timeout(struct mlx5e_rq *rq); +void mlx5e_reporter_icosq_suspend_recovery(struct mlx5e_channel *c); +void mlx5e_reporter_icosq_resume_recovery(struct mlx5e_channel *c); #define MLX5E_REPORTER_PER_Q_MAX_LEN 256 diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c index 74086eb556ae..2684e9da9f41 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c @@ -62,6 +62,7 @@ static void mlx5e_reset_icosq_cc_pc(struct mlx5e_icosq *icosq) static int mlx5e_rx_reporter_err_icosq_cqe_recover(void *ctx) { + struct mlx5e_rq *xskrq = NULL; struct mlx5_core_dev *mdev; struct mlx5e_icosq *icosq; struct net_device *dev; @@ -70,7 +71,13 @@ static int mlx5e_rx_reporter_err_icosq_cqe_recover(void *ctx) int err; icosq = ctx; + + mutex_lock(&icosq->channel->icosq_recovery_lock); + + /* mlx5e_close_rq cancels this work before RQ and ICOSQ are killed. */ rq = &icosq->channel->rq; + if (test_bit(MLX5E_RQ_STATE_ENABLED, &icosq->channel->xskrq.state)) + xskrq = &icosq->channel->xskrq; mdev = icosq->channel->mdev; dev = icosq->channel->netdev; err = mlx5_core_query_sq_state(mdev, icosq->sqn, &state); @@ -84,6 +91,9 @@ static int mlx5e_rx_reporter_err_icosq_cqe_recover(void *ctx) goto out; mlx5e_deactivate_rq(rq); + if (xskrq) + mlx5e_deactivate_rq(xskrq); + err = mlx5e_wait_for_icosq_flush(icosq); if (err) goto out; @@ -97,15 +107,28 @@ static int mlx5e_rx_reporter_err_icosq_cqe_recover(void *ctx) goto out; mlx5e_reset_icosq_cc_pc(icosq); + mlx5e_free_rx_in_progress_descs(rq); + if (xskrq) + mlx5e_free_rx_in_progress_descs(xskrq); + clear_bit(MLX5E_SQ_STATE_RECOVERING, &icosq->state); mlx5e_activate_icosq(icosq); - mlx5e_activate_rq(rq); + mlx5e_activate_rq(rq); rq->stats->recover++; + + if (xskrq) { + mlx5e_activate_rq(xskrq); + xskrq->stats->recover++; + } + + mutex_unlock(&icosq->channel->icosq_recovery_lock); + return 0; out: clear_bit(MLX5E_SQ_STATE_RECOVERING, &icosq->state); + mutex_unlock(&icosq->channel->icosq_recovery_lock); return err; } @@ -706,6 +729,16 @@ void mlx5e_reporter_icosq_cqe_err(struct mlx5e_icosq *icosq) mlx5e_health_report(priv, priv->rx_reporter, err_str, &err_ctx); } +void mlx5e_reporter_icosq_suspend_recovery(struct mlx5e_channel *c) +{ + mutex_lock(&c->icosq_recovery_lock); +} + +void mlx5e_reporter_icosq_resume_recovery(struct mlx5e_channel *c) +{ + mutex_unlock(&c->icosq_recovery_lock); +} + static const struct devlink_health_reporter_ops mlx5_rx_reporter_ops = { .name = "rx", .recover = mlx5e_rx_reporter_recover, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c index 538bc2419bd8..8526a5fbbf0b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c @@ -4,6 +4,7 @@ #include "setup.h" #include "en/params.h" #include "en/txrx.h" +#include "en/health.h" /* It matches XDP_UMEM_MIN_CHUNK_SIZE, but as this constant is private and may * change unexpectedly, and mlx5e has a minimum valid stride size for striding @@ -170,7 +171,13 @@ void mlx5e_close_xsk(struct mlx5e_channel *c) void mlx5e_activate_xsk(struct mlx5e_channel *c) { + /* ICOSQ recovery deactivates RQs. Suspend the recovery to avoid + * activating XSKRQ in the middle of recovery. + */ + mlx5e_reporter_icosq_suspend_recovery(c); set_bit(MLX5E_RQ_STATE_ENABLED, &c->xskrq.state); + mlx5e_reporter_icosq_resume_recovery(c); + /* TX queue is created active. */ spin_lock_bh(&c->async_icosq_lock); @@ -180,6 +187,13 @@ void mlx5e_activate_xsk(struct mlx5e_channel *c) void mlx5e_deactivate_xsk(struct mlx5e_channel *c) { - mlx5e_deactivate_rq(&c->xskrq); + /* ICOSQ recovery may reactivate XSKRQ if clear_bit is called in the + * middle of recovery. Suspend the recovery to avoid it. + */ + mlx5e_reporter_icosq_suspend_recovery(c); + clear_bit(MLX5E_RQ_STATE_ENABLED, &c->xskrq.state); + mlx5e_reporter_icosq_resume_recovery(c); + synchronize_net(); /* Sync with NAPI to prevent mlx5e_post_rx_wqes. */ + /* TX queue is disabled on close. */ } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 65571593ec5c..a572fc9690ed 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -1087,8 +1087,6 @@ void mlx5e_deactivate_rq(struct mlx5e_rq *rq) void mlx5e_close_rq(struct mlx5e_rq *rq) { cancel_work_sync(&rq->dim.work); - if (rq->icosq) - cancel_work_sync(&rq->icosq->recover_work); cancel_work_sync(&rq->recover_work); mlx5e_destroy_rq(rq); mlx5e_free_rx_descs(rq); @@ -2088,6 +2086,8 @@ static int mlx5e_open_queues(struct mlx5e_channel *c, if (err) goto err_close_xdpsq_cq; + mutex_init(&c->icosq_recovery_lock); + err = mlx5e_open_icosq(c, params, &cparam->icosq, &c->icosq); if (err) goto err_close_async_icosq; @@ -2156,9 +2156,12 @@ static void mlx5e_close_queues(struct mlx5e_channel *c) mlx5e_close_xdpsq(&c->xdpsq); if (c->xdp) mlx5e_close_xdpsq(&c->rq_xdpsq); + /* The same ICOSQ is used for UMRs for both RQ and XSKRQ. */ + cancel_work_sync(&c->icosq.recover_work); mlx5e_close_rq(&c->rq); mlx5e_close_sqs(c); mlx5e_close_icosq(&c->icosq); + mutex_destroy(&c->icosq_recovery_lock); mlx5e_close_icosq(&c->async_icosq); if (c->xdp) mlx5e_close_cq(&c->rq_xdpsq.cq); From 19c4aba2d4e23997061fb11aed8a3e41334bfa14 Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Wed, 22 Jul 2020 16:32:44 +0300 Subject: [PATCH 713/868] net/mlx5e: Fix ICOSQ recovery flow for XSK There are two ICOSQs per channel: one is needed for RX, and the other for async operations (XSK TX, kTLS offload). Currently, the recovery flow for both is the same, and async ICOSQ is mistakenly treated like the regular ICOSQ. This patch prevents running the regular ICOSQ recovery on async ICOSQ. The purpose of async ICOSQ is to handle XSK wakeup requests and post kTLS offload RX parameters, it has nothing to do with RQ and XSKRQ UMRs, so the regular recovery sequence is not applicable here. Fixes: be5323c8379f ("net/mlx5e: Report and recover from CQE error on ICOSQ") Signed-off-by: Maxim Mikityanskiy Reviewed-by: Aya Levin Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 3 -- .../net/ethernet/mellanox/mlx5/core/en_main.c | 30 ++++++++++++++----- 2 files changed, 22 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index f42067adc79d..b47a0d3ef22f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -1016,9 +1016,6 @@ int mlx5e_create_rq(struct mlx5e_rq *rq, struct mlx5e_rq_param *param); void mlx5e_destroy_rq(struct mlx5e_rq *rq); struct mlx5e_sq_param; -int mlx5e_open_icosq(struct mlx5e_channel *c, struct mlx5e_params *params, - struct mlx5e_sq_param *param, struct mlx5e_icosq *sq); -void mlx5e_close_icosq(struct mlx5e_icosq *sq); int mlx5e_open_xdpsq(struct mlx5e_channel *c, struct mlx5e_params *params, struct mlx5e_sq_param *param, struct xsk_buff_pool *xsk_pool, struct mlx5e_xdpsq *sq, bool is_redirect); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index a572fc9690ed..3b0f3a831216 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -1214,9 +1214,20 @@ static void mlx5e_icosq_err_cqe_work(struct work_struct *recover_work) mlx5e_reporter_icosq_cqe_err(sq); } +static void mlx5e_async_icosq_err_cqe_work(struct work_struct *recover_work) +{ + struct mlx5e_icosq *sq = container_of(recover_work, struct mlx5e_icosq, + recover_work); + + /* Not implemented yet. */ + + netdev_warn(sq->channel->netdev, "async_icosq recovery is not implemented\n"); +} + static int mlx5e_alloc_icosq(struct mlx5e_channel *c, struct mlx5e_sq_param *param, - struct mlx5e_icosq *sq) + struct mlx5e_icosq *sq, + work_func_t recover_work_func) { void *sqc_wq = MLX5_ADDR_OF(sqc, param->sqc, wq); struct mlx5_core_dev *mdev = c->mdev; @@ -1237,7 +1248,7 @@ static int mlx5e_alloc_icosq(struct mlx5e_channel *c, if (err) goto err_sq_wq_destroy; - INIT_WORK(&sq->recover_work, mlx5e_icosq_err_cqe_work); + INIT_WORK(&sq->recover_work, recover_work_func); return 0; @@ -1573,13 +1584,14 @@ void mlx5e_tx_err_cqe_work(struct work_struct *recover_work) mlx5e_reporter_tx_err_cqe(sq); } -int mlx5e_open_icosq(struct mlx5e_channel *c, struct mlx5e_params *params, - struct mlx5e_sq_param *param, struct mlx5e_icosq *sq) +static int mlx5e_open_icosq(struct mlx5e_channel *c, struct mlx5e_params *params, + struct mlx5e_sq_param *param, struct mlx5e_icosq *sq, + work_func_t recover_work_func) { struct mlx5e_create_sq_param csp = {}; int err; - err = mlx5e_alloc_icosq(c, param, sq); + err = mlx5e_alloc_icosq(c, param, sq, recover_work_func); if (err) return err; @@ -1618,7 +1630,7 @@ void mlx5e_deactivate_icosq(struct mlx5e_icosq *icosq) synchronize_net(); /* Sync with NAPI. */ } -void mlx5e_close_icosq(struct mlx5e_icosq *sq) +static void mlx5e_close_icosq(struct mlx5e_icosq *sq) { struct mlx5e_channel *c = sq->channel; @@ -2082,13 +2094,15 @@ static int mlx5e_open_queues(struct mlx5e_channel *c, spin_lock_init(&c->async_icosq_lock); - err = mlx5e_open_icosq(c, params, &cparam->async_icosq, &c->async_icosq); + err = mlx5e_open_icosq(c, params, &cparam->async_icosq, &c->async_icosq, + mlx5e_async_icosq_err_cqe_work); if (err) goto err_close_xdpsq_cq; mutex_init(&c->icosq_recovery_lock); - err = mlx5e_open_icosq(c, params, &cparam->icosq, &c->icosq); + err = mlx5e_open_icosq(c, params, &cparam->icosq, &c->icosq, + mlx5e_icosq_err_cqe_work); if (err) goto err_close_async_icosq; From 2820110d945923ab2f4901753e4ccbb2a506fa8e Mon Sep 17 00:00:00 2001 From: Chris Mi Date: Thu, 2 Dec 2021 11:18:02 +0800 Subject: [PATCH 714/868] net/mlx5e: Delete forward rule for ct or sample action When there is ct or sample action, the ct or sample rule will be deleted and return. But if there is an extra mirror action, the forward rule can't be deleted because of the return. Fix it by removing the return. Fixes: 69e2916ebce4 ("net/mlx5: CT: Add support for mirroring") Fixes: f94d6389f6a8 ("net/mlx5e: TC, Add support to offload sample action") Signed-off-by: Chris Mi Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 3d45f4ae80c0..f633448c3cc7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -1196,21 +1196,16 @@ void mlx5e_tc_unoffload_fdb_rules(struct mlx5_eswitch *esw, if (attr->flags & MLX5_ESW_ATTR_FLAG_SLOW_PATH) goto offload_rule_0; - if (flow_flag_test(flow, CT)) { - mlx5_tc_ct_delete_flow(get_ct_priv(flow->priv), flow, attr); - return; - } - - if (flow_flag_test(flow, SAMPLE)) { - mlx5e_tc_sample_unoffload(get_sample_priv(flow->priv), flow->rule[0], attr); - return; - } - if (attr->esw_attr->split_count) mlx5_eswitch_del_fwd_rule(esw, flow->rule[1], attr); + if (flow_flag_test(flow, CT)) + mlx5_tc_ct_delete_flow(get_ct_priv(flow->priv), flow, attr); + else if (flow_flag_test(flow, SAMPLE)) + mlx5e_tc_sample_unoffload(get_sample_priv(flow->priv), flow->rule[0], attr); + else offload_rule_0: - mlx5_eswitch_del_offloaded_rule(esw, flow->rule[0], attr); + mlx5_eswitch_del_offloaded_rule(esw, flow->rule[0], attr); } struct mlx5_flow_handle * From 4390c6edc0fb390e699d0f886f45575dfeafeb4b Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 6 Nov 2021 18:08:11 +0100 Subject: [PATCH 715/868] net/mlx5: Fix some error handling paths in 'mlx5e_tc_add_fdb_flow()' All the error handling paths of 'mlx5e_tc_add_fdb_flow()' end to 'err_out' where 'flow_flag_set(flow, FAILED);' is called. All but the new error handling paths added by the commits given in the Fixes tag below. Fix these error handling paths and branch to 'err_out'. Fixes: 166f431ec6be ("net/mlx5e: Add indirect tc offload of ovs internal port") Fixes: b16eb3c81fe2 ("net/mlx5: Support internal port as decap route device") Signed-off-by: Christophe JAILLET Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed (cherry picked from commit 31108d142f3632970f6f3e0224bd1c6781c9f87d) --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index f633448c3cc7..a60c7680fd2b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -1440,7 +1440,7 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, MLX5_FLOW_NAMESPACE_FDB, VPORT_TO_REG, metadata); if (err) - return err; + goto err_out; } } @@ -1456,13 +1456,15 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, if (attr->chain) { NL_SET_ERR_MSG_MOD(extack, "Internal port rule is only supported on chain 0"); - return -EOPNOTSUPP; + err = -EOPNOTSUPP; + goto err_out; } if (attr->dest_chain) { NL_SET_ERR_MSG_MOD(extack, "Internal port rule offload doesn't support goto action"); - return -EOPNOTSUPP; + err = -EOPNOTSUPP; + goto err_out; } int_port = mlx5e_tc_int_port_get(mlx5e_get_int_port_priv(priv), @@ -1470,8 +1472,10 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, flow_flag_test(flow, EGRESS) ? MLX5E_TC_INT_PORT_EGRESS : MLX5E_TC_INT_PORT_INGRESS); - if (IS_ERR(int_port)) - return PTR_ERR(int_port); + if (IS_ERR(int_port)) { + err = PTR_ERR(int_port); + goto err_out; + } esw_attr->int_port = int_port; } From c4499272566d677075c6a84f46baeb826a6a7182 Mon Sep 17 00:00:00 2001 From: Tim Crawford Date: Wed, 22 Dec 2021 11:51:54 -0700 Subject: [PATCH 716/868] platform/x86: system76_acpi: Guard System76 EC specific functionality Certain functionality or its implementation in System76 EC firmware may be different to the proprietary ODM EC firmware. Introduce a new bool, `has_open_ec`, to guard our specific logic. Detect the use of this by looking for a custom ACPI method name used in System76 firmware. Signed-off-by: Tim Crawford Link: https://lore.kernel.org/r/20211222185154.4560-1-tcrawford@system76.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/x86/system76_acpi.c | 58 ++++++++++++++-------------- 1 file changed, 30 insertions(+), 28 deletions(-) diff --git a/drivers/platform/x86/system76_acpi.c b/drivers/platform/x86/system76_acpi.c index 8b292ee95a14..7299ad08c838 100644 --- a/drivers/platform/x86/system76_acpi.c +++ b/drivers/platform/x86/system76_acpi.c @@ -35,6 +35,7 @@ struct system76_data { union acpi_object *nfan; union acpi_object *ntmp; struct input_dev *input; + bool has_open_ec; }; static const struct acpi_device_id device_ids[] = { @@ -279,20 +280,12 @@ static struct acpi_battery_hook system76_battery_hook = { static void system76_battery_init(void) { - acpi_handle handle; - - handle = ec_get_handle(); - if (handle && acpi_has_method(handle, "GBCT")) - battery_hook_register(&system76_battery_hook); + battery_hook_register(&system76_battery_hook); } static void system76_battery_exit(void) { - acpi_handle handle; - - handle = ec_get_handle(); - if (handle && acpi_has_method(handle, "GBCT")) - battery_hook_unregister(&system76_battery_hook); + battery_hook_unregister(&system76_battery_hook); } // Get the airplane mode LED brightness @@ -673,6 +666,10 @@ static int system76_add(struct acpi_device *acpi_dev) acpi_dev->driver_data = data; data->acpi_dev = acpi_dev; + // Some models do not run open EC firmware. Check for an ACPI method + // that only exists on open EC to guard functionality specific to it. + data->has_open_ec = acpi_has_method(acpi_device_handle(data->acpi_dev), "NFAN"); + err = system76_get(data, "INIT"); if (err) return err; @@ -718,27 +715,31 @@ static int system76_add(struct acpi_device *acpi_dev) if (err) goto error; - err = system76_get_object(data, "NFAN", &data->nfan); - if (err) - goto error; + if (data->has_open_ec) { + err = system76_get_object(data, "NFAN", &data->nfan); + if (err) + goto error; - err = system76_get_object(data, "NTMP", &data->ntmp); - if (err) - goto error; + err = system76_get_object(data, "NTMP", &data->ntmp); + if (err) + goto error; - data->therm = devm_hwmon_device_register_with_info(&acpi_dev->dev, - "system76_acpi", data, &thermal_chip_info, NULL); - err = PTR_ERR_OR_ZERO(data->therm); - if (err) - goto error; + data->therm = devm_hwmon_device_register_with_info(&acpi_dev->dev, + "system76_acpi", data, &thermal_chip_info, NULL); + err = PTR_ERR_OR_ZERO(data->therm); + if (err) + goto error; - system76_battery_init(); + system76_battery_init(); + } return 0; error: - kfree(data->ntmp); - kfree(data->nfan); + if (data->has_open_ec) { + kfree(data->ntmp); + kfree(data->nfan); + } return err; } @@ -749,14 +750,15 @@ static int system76_remove(struct acpi_device *acpi_dev) data = acpi_driver_data(acpi_dev); - system76_battery_exit(); + if (data->has_open_ec) { + system76_battery_exit(); + kfree(data->nfan); + kfree(data->ntmp); + } devm_led_classdev_unregister(&acpi_dev->dev, &data->ap_led); devm_led_classdev_unregister(&acpi_dev->dev, &data->kb_led); - kfree(data->nfan); - kfree(data->ntmp); - system76_get(data, "FINI"); return 0; From 4f6c131c3c31b9f68470ebd01320d5403d8719bb Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 22 Dec 2021 21:49:41 +0200 Subject: [PATCH 717/868] platform/x86/intel: Remove X86_PLATFORM_DRIVERS_INTEL While introduction of this menu brings a nice view in the configuration tools, it brought more issues than solves, i.e. it prevents to locate files in the intel/ subfolder without touching non-related Kconfig dependencies elsewhere. Drop X86_PLATFORM_DRIVERS_INTEL altogether. Note, on x86 it's enabled by default and it's quite unlikely anybody wants to disable all of the modules in this submenu. Fixes: 8bd836feb6ca ("platform/x86: intel_skl_int3472: Move to intel/ subfolder") Suggested-by: Hans de Goede Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20211222194941.76054-1-andriy.shevchenko@linux.intel.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/x86/Makefile | 2 +- drivers/platform/x86/intel/Kconfig | 15 --------------- 2 files changed, 1 insertion(+), 16 deletions(-) diff --git a/drivers/platform/x86/Makefile b/drivers/platform/x86/Makefile index 219478061683..253a096b5dd8 100644 --- a/drivers/platform/x86/Makefile +++ b/drivers/platform/x86/Makefile @@ -68,7 +68,7 @@ obj-$(CONFIG_THINKPAD_ACPI) += thinkpad_acpi.o obj-$(CONFIG_THINKPAD_LMI) += think-lmi.o # Intel -obj-$(CONFIG_X86_PLATFORM_DRIVERS_INTEL) += intel/ +obj-y += intel/ # MSI obj-$(CONFIG_MSI_LAPTOP) += msi-laptop.o diff --git a/drivers/platform/x86/intel/Kconfig b/drivers/platform/x86/intel/Kconfig index 38ce3e344589..40096b25994a 100644 --- a/drivers/platform/x86/intel/Kconfig +++ b/drivers/platform/x86/intel/Kconfig @@ -3,19 +3,6 @@ # Intel x86 Platform Specific Drivers # -menuconfig X86_PLATFORM_DRIVERS_INTEL - bool "Intel x86 Platform Specific Device Drivers" - default y - help - Say Y here to get to see options for device drivers for - various Intel x86 platforms, including vendor-specific - drivers. This option alone does not add any kernel code. - - If you say N, all options in this submenu will be skipped - and disabled. - -if X86_PLATFORM_DRIVERS_INTEL - source "drivers/platform/x86/intel/atomisp2/Kconfig" source "drivers/platform/x86/intel/int1092/Kconfig" source "drivers/platform/x86/intel/int33fe/Kconfig" @@ -183,5 +170,3 @@ config INTEL_UNCORE_FREQ_CONTROL To compile this driver as a module, choose M here: the module will be called intel-uncore-frequency. - -endif # X86_PLATFORM_DRIVERS_INTEL From 9695b7de5b4760ed22132aca919570c0190cb0ce Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Wed, 22 Dec 2021 19:39:52 +0100 Subject: [PATCH 718/868] veth: ensure skb entering GRO are not cloned. After commit d3256efd8e8b ("veth: allow enabling NAPI even without XDP"), if GRO is enabled on a veth device and TSO is disabled on the peer device, TCP skbs will go through the NAPI callback. If there is no XDP program attached, the veth code does not perform any share check, and shared/cloned skbs could enter the GRO engine. Ignat reported a BUG triggered later-on due to the above condition: [ 53.970529][ C1] kernel BUG at net/core/skbuff.c:3574! [ 53.981755][ C1] invalid opcode: 0000 [#1] PREEMPT SMP KASAN PTI [ 53.982634][ C1] CPU: 1 PID: 19 Comm: ksoftirqd/1 Not tainted 5.16.0-rc5+ #25 [ 53.982634][ C1] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 0.0.0 02/06/2015 [ 53.982634][ C1] RIP: 0010:skb_shift+0x13ef/0x23b0 [ 53.982634][ C1] Code: ea 03 0f b6 04 02 48 89 fa 83 e2 07 38 d0 7f 08 84 c0 0f 85 41 0c 00 00 41 80 7f 02 00 4d 8d b5 d0 00 00 00 0f 85 74 f5 ff ff <0f> 0b 4d 8d 77 20 be 04 00 00 00 4c 89 44 24 78 4c 89 f7 4c 89 8c [ 53.982634][ C1] RSP: 0018:ffff8881008f7008 EFLAGS: 00010246 [ 53.982634][ C1] RAX: 0000000000000000 RBX: ffff8881180b4c80 RCX: 0000000000000000 [ 53.982634][ C1] RDX: 0000000000000002 RSI: ffff8881180b4d3c RDI: ffff88810bc9cac2 [ 53.982634][ C1] RBP: ffff8881008f70b8 R08: ffff8881180b4cf4 R09: ffff8881180b4cf0 [ 53.982634][ C1] R10: ffffed1022999e5c R11: 0000000000000002 R12: 0000000000000590 [ 53.982634][ C1] R13: ffff88810f940c80 R14: ffff88810f940d50 R15: ffff88810bc9cac0 [ 53.982634][ C1] FS: 0000000000000000(0000) GS:ffff888235880000(0000) knlGS:0000000000000000 [ 53.982634][ C1] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 53.982634][ C1] CR2: 00007ff5f9b86680 CR3: 0000000108ce8004 CR4: 0000000000170ee0 [ 53.982634][ C1] Call Trace: [ 53.982634][ C1] [ 53.982634][ C1] tcp_sacktag_walk+0xaba/0x18e0 [ 53.982634][ C1] tcp_sacktag_write_queue+0xe7b/0x3460 [ 53.982634][ C1] tcp_ack+0x2666/0x54b0 [ 53.982634][ C1] tcp_rcv_established+0x4d9/0x20f0 [ 53.982634][ C1] tcp_v4_do_rcv+0x551/0x810 [ 53.982634][ C1] tcp_v4_rcv+0x22ed/0x2ed0 [ 53.982634][ C1] ip_protocol_deliver_rcu+0x96/0xaf0 [ 53.982634][ C1] ip_local_deliver_finish+0x1e0/0x2f0 [ 53.982634][ C1] ip_sublist_rcv_finish+0x211/0x440 [ 53.982634][ C1] ip_list_rcv_finish.constprop.0+0x424/0x660 [ 53.982634][ C1] ip_list_rcv+0x2c8/0x410 [ 53.982634][ C1] __netif_receive_skb_list_core+0x65c/0x910 [ 53.982634][ C1] netif_receive_skb_list_internal+0x5f9/0xcb0 [ 53.982634][ C1] napi_complete_done+0x188/0x6e0 [ 53.982634][ C1] gro_cell_poll+0x10c/0x1d0 [ 53.982634][ C1] __napi_poll+0xa1/0x530 [ 53.982634][ C1] net_rx_action+0x567/0x1270 [ 53.982634][ C1] __do_softirq+0x28a/0x9ba [ 53.982634][ C1] run_ksoftirqd+0x32/0x60 [ 53.982634][ C1] smpboot_thread_fn+0x559/0x8c0 [ 53.982634][ C1] kthread+0x3b9/0x490 [ 53.982634][ C1] ret_from_fork+0x22/0x30 [ 53.982634][ C1] Address the issue by skipping the GRO stage for shared or cloned skbs. To reduce the chance of OoO, try to unclone the skbs before giving up. v1 -> v2: - use avoid skb_copy and fallback to netif_receive_skb - Eric Reported-by: Ignat Korchagin Fixes: d3256efd8e8b ("veth: allow enabling NAPI even without XDP") Signed-off-by: Paolo Abeni Tested-by: Ignat Korchagin Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/b5f61c5602aab01bac8d711d8d1bfab0a4817db7.1640197544.git.pabeni@redhat.com Signed-off-by: Jakub Kicinski --- drivers/net/veth.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/veth.c b/drivers/net/veth.c index 50eb43e5bf45..2acdb8ad6c71 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -879,8 +879,12 @@ static int veth_xdp_rcv(struct veth_rq *rq, int budget, stats->xdp_bytes += skb->len; skb = veth_xdp_rcv_skb(rq, skb, bq, stats); - if (skb) - napi_gro_receive(&rq->xdp_napi, skb); + if (skb) { + if (skb_shared(skb) || skb_unclone(skb, GFP_ATOMIC)) + netif_receive_skb(skb); + else + napi_gro_receive(&rq->xdp_napi, skb); + } } done++; } From ae2778a64724f77fd6cad674461a045fb3307df7 Mon Sep 17 00:00:00 2001 From: Xiaoliang Yang Date: Thu, 23 Dec 2021 15:22:11 +0800 Subject: [PATCH 719/868] net: dsa: tag_ocelot: use traffic class to map priority on injected header For Ocelot switches, the CPU injected frames have an injection header where it can specify the QoS class of the packet and the DSA tag, now it uses the SKB priority to set that. If a traffic class to priority mapping is configured on the netdevice (with mqprio for example ...), it won't be considered for CPU injected headers. This patch make the QoS class aligned to the priority to traffic class mapping if it exists. Fixes: 8dce89aa5f32 ("net: dsa: ocelot: add tagger for Ocelot/Felix switches") Signed-off-by: Xiaoliang Yang Signed-off-by: Marouen Ghodhbane Link: https://lore.kernel.org/r/20211223072211.33130-1-xiaoliang.yang_1@nxp.com Signed-off-by: Jakub Kicinski --- net/dsa/tag_ocelot.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/net/dsa/tag_ocelot.c b/net/dsa/tag_ocelot.c index de1c849a0a70..4ed74d509d6a 100644 --- a/net/dsa/tag_ocelot.c +++ b/net/dsa/tag_ocelot.c @@ -47,9 +47,13 @@ static void ocelot_xmit_common(struct sk_buff *skb, struct net_device *netdev, void *injection; __be32 *prefix; u32 rew_op = 0; + u64 qos_class; ocelot_xmit_get_vlan_info(skb, dp, &vlan_tci, &tag_type); + qos_class = netdev_get_num_tc(netdev) ? + netdev_get_prio_tc_map(netdev, skb->priority) : skb->priority; + injection = skb_push(skb, OCELOT_TAG_LEN); prefix = skb_push(skb, OCELOT_SHORT_PREFIX_LEN); @@ -57,7 +61,7 @@ static void ocelot_xmit_common(struct sk_buff *skb, struct net_device *netdev, memset(injection, 0, OCELOT_TAG_LEN); ocelot_ifh_set_bypass(injection, 1); ocelot_ifh_set_src(injection, ds->num_ports); - ocelot_ifh_set_qos_class(injection, skb->priority); + ocelot_ifh_set_qos_class(injection, qos_class); ocelot_ifh_set_vlan_tci(injection, vlan_tci); ocelot_ifh_set_tag_type(injection, tag_type); From eccffcf4657ab9a148faaa0eb354d2a091caf552 Mon Sep 17 00:00:00 2001 From: Xiaoliang Yang Date: Thu, 23 Dec 2021 15:39:28 +0800 Subject: [PATCH 720/868] net: stmmac: ptp: fix potentially overflowing expression Convert the u32 variable to type u64 in a context where expression of type u64 is required to avoid potential overflow. Fixes: e9e3720002f6 ("net: stmmac: ptp: update tas basetime after ptp adjust") Signed-off-by: Xiaoliang Yang Link: https://lore.kernel.org/r/20211223073928.37371-1-xiaoliang.yang_1@nxp.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c index 580cc035536b..be9b58b2abf9 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c @@ -102,7 +102,7 @@ static int stmmac_adjust_time(struct ptp_clock_info *ptp, s64 delta) time.tv_nsec = priv->plat->est->btr_reserve[0]; time.tv_sec = priv->plat->est->btr_reserve[1]; basetime = timespec64_to_ktime(time); - cycle_time = priv->plat->est->ctr[1] * NSEC_PER_SEC + + cycle_time = (u64)priv->plat->est->ctr[1] * NSEC_PER_SEC + priv->plat->est->ctr[0]; time = stmmac_calc_tas_basetime(basetime, current_time_ns, From d95a56207c078e2019cf6659d890ec1e987e8420 Mon Sep 17 00:00:00 2001 From: Remi Pommarel Date: Thu, 23 Dec 2021 16:31:38 +0100 Subject: [PATCH 721/868] net: bridge: fix ioctl old_deviceless bridge argument Commit 561d8352818f ("bridge: use ndo_siocdevprivate") changed the source and destination arguments of copy_{to,from}_user in bridge's old_deviceless() from args[1] to uarg breaking SIOC{G,S}IFBR ioctls. Commit cbd7ad29a507 ("net: bridge: fix ioctl old_deviceless bridge argument") fixed only BRCTL_{ADD,DEL}_BRIDGES commands leaving BRCTL_GET_BRIDGES one untouched. The fixes BRCTL_GET_BRIDGES as well and has been tested with busybox's brctl. Example of broken brctl: $ brctl show bridge name bridge id STP enabled interfaces brctl: can't get bridge name for index 0: No such device or address Example of fixed brctl: $ brctl show bridge name bridge id STP enabled interfaces br0 8000.000000000000 no Fixes: 561d8352818f ("bridge: use ndo_siocdevprivate") Signed-off-by: Remi Pommarel Reviewed-by: Arnd Bergmann Acked-by: Nikolay Aleksandrov Link: https://lore.kernel.org/all/20211223153139.7661-2-repk@triplefau.lt/ Signed-off-by: Jakub Kicinski --- net/bridge/br_ioctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/bridge/br_ioctl.c b/net/bridge/br_ioctl.c index db4ab2c2ce18..891cfcf45644 100644 --- a/net/bridge/br_ioctl.c +++ b/net/bridge/br_ioctl.c @@ -337,7 +337,7 @@ static int old_deviceless(struct net *net, void __user *uarg) args[2] = get_bridge_ifindices(net, indices, args[2]); - ret = copy_to_user(uarg, indices, + ret = copy_to_user((void __user *)args[1], indices, array_size(args[2], sizeof(int))) ? -EFAULT : args[2]; From 45bf944e6703d43fe5e285808312acd8a34c1a24 Mon Sep 17 00:00:00 2001 From: Hayes Wang Date: Thu, 23 Dec 2021 17:27:01 +0800 Subject: [PATCH 722/868] r8152: fix the force speed doesn't work for RTL8156 It needs to set mdio force mode. Otherwise, link off always occurs when setting force speed. Fixes: 195aae321c82 ("r8152: support new chips") Signed-off-by: Hayes Wang Signed-off-by: Jakub Kicinski --- drivers/net/usb/r8152.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index f9877a3e83ac..a817dfd5c9eb 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -6584,6 +6584,21 @@ static bool rtl8153_in_nway(struct r8152 *tp) return true; } +static void r8156_mdio_force_mode(struct r8152 *tp) +{ + u16 data; + + /* Select force mode through 0xa5b4 bit 15 + * 0: MDIO force mode + * 1: MMD force mode + */ + data = ocp_reg_read(tp, 0xa5b4); + if (data & BIT(15)) { + data &= ~BIT(15); + ocp_reg_write(tp, 0xa5b4, data); + } +} + static void set_carrier(struct r8152 *tp) { struct net_device *netdev = tp->netdev; @@ -8016,6 +8031,7 @@ static void r8156_init(struct r8152 *tp) ocp_data |= ACT_ODMA; ocp_write_byte(tp, MCU_TYPE_USB, USB_BMU_CONFIG, ocp_data); + r8156_mdio_force_mode(tp); rtl_tally_reset(tp); tp->coalesce = 15000; /* 15 us */ @@ -8145,6 +8161,7 @@ static void r8156b_init(struct r8152 *tp) ocp_data &= ~(RX_AGG_DISABLE | RX_ZERO_EN); ocp_write_word(tp, MCU_TYPE_USB, USB_USB_CTRL, ocp_data); + r8156_mdio_force_mode(tp); rtl_tally_reset(tp); tp->coalesce = 15000; /* 15 us */ From b24edca309535c2d9af86aab95d64065f6ef1d26 Mon Sep 17 00:00:00 2001 From: Hayes Wang Date: Thu, 23 Dec 2021 17:27:02 +0800 Subject: [PATCH 723/868] r8152: sync ocp base There are some chances that the actual base of hardware is different from the value recorded by driver, so we have to reset the variable of ocp_base to sync it. Set ocp_base to -1. Then, it would be updated and the new base would be set to the hardware next time. Signed-off-by: Hayes Wang Signed-off-by: Jakub Kicinski --- drivers/net/usb/r8152.c | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index a817dfd5c9eb..3085e8118d7f 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -32,7 +32,7 @@ #define NETNEXT_VERSION "12" /* Information for net */ -#define NET_VERSION "11" +#define NET_VERSION "12" #define DRIVER_VERSION "v1." NETNEXT_VERSION "." NET_VERSION #define DRIVER_AUTHOR "Realtek linux nic maintainers " @@ -4016,6 +4016,11 @@ static void rtl_clear_bp(struct r8152 *tp, u16 type) ocp_write_word(tp, type, PLA_BP_BA, 0); } +static inline void rtl_reset_ocp_base(struct r8152 *tp) +{ + tp->ocp_base = -1; +} + static int rtl_phy_patch_request(struct r8152 *tp, bool request, bool wait) { u16 data, check; @@ -4087,8 +4092,6 @@ static int rtl_post_ram_code(struct r8152 *tp, u16 key_addr, bool wait) rtl_phy_patch_request(tp, false, wait); - ocp_write_word(tp, MCU_TYPE_PLA, PLA_OCP_GPHY_BASE, tp->ocp_base); - return 0; } @@ -4800,6 +4803,8 @@ static void rtl_ram_code_speed_up(struct r8152 *tp, struct fw_phy_speed_up *phy, u32 len; u8 *data; + rtl_reset_ocp_base(tp); + if (sram_read(tp, SRAM_GPHY_FW_VER) >= __le16_to_cpu(phy->version)) { dev_dbg(&tp->intf->dev, "PHY firmware has been the newest\n"); return; @@ -4845,7 +4850,8 @@ static void rtl_ram_code_speed_up(struct r8152 *tp, struct fw_phy_speed_up *phy, } } - ocp_write_word(tp, MCU_TYPE_PLA, PLA_OCP_GPHY_BASE, tp->ocp_base); + rtl_reset_ocp_base(tp); + rtl_phy_patch_request(tp, false, wait); if (sram_read(tp, SRAM_GPHY_FW_VER) == __le16_to_cpu(phy->version)) @@ -4861,6 +4867,8 @@ static int rtl8152_fw_phy_ver(struct r8152 *tp, struct fw_phy_ver *phy_ver) ver_addr = __le16_to_cpu(phy_ver->ver.addr); ver = __le16_to_cpu(phy_ver->ver.data); + rtl_reset_ocp_base(tp); + if (sram_read(tp, ver_addr) >= ver) { dev_dbg(&tp->intf->dev, "PHY firmware has been the newest\n"); return 0; @@ -4877,6 +4885,8 @@ static void rtl8152_fw_phy_fixup(struct r8152 *tp, struct fw_phy_fixup *fix) { u16 addr, data; + rtl_reset_ocp_base(tp); + addr = __le16_to_cpu(fix->setting.addr); data = ocp_reg_read(tp, addr); @@ -4908,6 +4918,8 @@ static void rtl8152_fw_phy_union_apply(struct r8152 *tp, struct fw_phy_union *ph u32 length; int i, num; + rtl_reset_ocp_base(tp); + num = phy->pre_num; for (i = 0; i < num; i++) sram_write(tp, __le16_to_cpu(phy->pre_set[i].addr), @@ -4938,6 +4950,8 @@ static void rtl8152_fw_phy_nc_apply(struct r8152 *tp, struct fw_phy_nc *phy) u32 length, i, num; __le16 *data; + rtl_reset_ocp_base(tp); + mode_reg = __le16_to_cpu(phy->mode_reg); sram_write(tp, mode_reg, __le16_to_cpu(phy->mode_pre)); sram_write(tp, __le16_to_cpu(phy->ba_reg), @@ -5107,6 +5121,7 @@ post_fw: if (rtl_fw->post_fw) rtl_fw->post_fw(tp); + rtl_reset_ocp_base(tp); strscpy(rtl_fw->version, fw_hdr->version, RTL_VER_SIZE); dev_info(&tp->intf->dev, "load %s successfully\n", rtl_fw->version); } @@ -8484,6 +8499,8 @@ static int rtl8152_resume(struct usb_interface *intf) mutex_lock(&tp->control); + rtl_reset_ocp_base(tp); + if (test_bit(SELECTIVE_SUSPEND, &tp->flags)) ret = rtl8152_runtime_resume(tp); else @@ -8499,6 +8516,7 @@ static int rtl8152_reset_resume(struct usb_interface *intf) struct r8152 *tp = usb_get_intfdata(intf); clear_bit(SELECTIVE_SUSPEND, &tp->flags); + rtl_reset_ocp_base(tp); tp->rtl_ops.init(tp); queue_delayed_work(system_long_wq, &tp->hw_phy_work, 0); set_ethernet_addr(tp, true); From 391e5975c0208ce3739587b33eba08be3e473d79 Mon Sep 17 00:00:00 2001 From: Nobuhiro Iwamatsu Date: Thu, 23 Dec 2021 16:36:33 +0900 Subject: [PATCH 724/868] net: stmmac: dwmac-visconti: Fix value of ETHER_CLK_SEL_FREQ_SEL_2P5M ETHER_CLK_SEL_FREQ_SEL_2P5M is not 0 bit of the register. This is a value, which is 0. Fix from BIT(0) to 0. Reported-by: Yuji Ishikawa Fixes: b38dd98ff8d0 ("net: stmmac: Add Toshiba Visconti SoCs glue driver") Signed-off-by: Nobuhiro Iwamatsu Link: https://lore.kernel.org/r/20211223073633.101306-1-nobuhiro1.iwamatsu@toshiba.co.jp Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/dwmac-visconti.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-visconti.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-visconti.c index 66fc8be34bb7..e2e0f977875d 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-visconti.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-visconti.c @@ -26,7 +26,7 @@ #define ETHER_CLK_SEL_FREQ_SEL_125M (BIT(9) | BIT(8)) #define ETHER_CLK_SEL_FREQ_SEL_50M BIT(9) #define ETHER_CLK_SEL_FREQ_SEL_25M BIT(8) -#define ETHER_CLK_SEL_FREQ_SEL_2P5M BIT(0) +#define ETHER_CLK_SEL_FREQ_SEL_2P5M 0 #define ETHER_CLK_SEL_TX_CLK_EXT_SEL_IN BIT(0) #define ETHER_CLK_SEL_TX_CLK_EXT_SEL_TXC BIT(10) #define ETHER_CLK_SEL_TX_CLK_EXT_SEL_DIV BIT(11) From 26a8b09437804fabfb1db080d676b96c0de68e7c Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 22 Dec 2021 11:50:23 +0100 Subject: [PATCH 725/868] platform/x86: intel_pmc_core: fix memleak on registration failure In case device registration fails during module initialisation, the platform device structure needs to be freed using platform_device_put() to properly free all resources (e.g. the device name). Fixes: 938835aa903a ("platform/x86: intel_pmc_core: do not create a static struct device") Cc: stable@vger.kernel.org # 5.9 Signed-off-by: Johan Hovold Reviewed-by: Greg Kroah-Hartman Link: https://lore.kernel.org/r/20211222105023.6205-1-johan@kernel.org Signed-off-by: Hans de Goede --- drivers/platform/x86/intel/pmc/pltdrv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/platform/x86/intel/pmc/pltdrv.c b/drivers/platform/x86/intel/pmc/pltdrv.c index 73797680b895..15ca8afdd973 100644 --- a/drivers/platform/x86/intel/pmc/pltdrv.c +++ b/drivers/platform/x86/intel/pmc/pltdrv.c @@ -65,7 +65,7 @@ static int __init pmc_core_platform_init(void) retval = platform_device_register(pmc_core_device); if (retval) - kfree(pmc_core_device); + platform_device_put(pmc_core_device); return retval; } From 736ef37fd9a44f5966e25319d08ff7ea99ac79e8 Mon Sep 17 00:00:00 2001 From: Coco Li Date: Thu, 23 Dec 2021 22:24:40 +0000 Subject: [PATCH 726/868] udp: using datalen to cap ipv6 udp max gso segments The max number of UDP gso segments is intended to cap to UDP_MAX_SEGMENTS, this is checked in udp_send_skb(). skb->len contains network and transport header len here, we should use only data len instead. This is the ipv6 counterpart to the below referenced commit, which missed the ipv6 change Fixes: 158390e45612 ("udp: using datalen to cap max gso segments") Signed-off-by: Coco Li Reviewed-by: Willem de Bruijn Link: https://lore.kernel.org/r/20211223222441.2975883-1-lixiaoyan@google.com Signed-off-by: Jakub Kicinski --- net/ipv6/udp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index a2caca6ccf11..8cde9efd7919 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1204,7 +1204,7 @@ static int udp_v6_send_skb(struct sk_buff *skb, struct flowi6 *fl6, kfree_skb(skb); return -EINVAL; } - if (skb->len > cork->gso_size * UDP_MAX_SEGMENTS) { + if (datalen > cork->gso_size * UDP_MAX_SEGMENTS) { kfree_skb(skb); return -EINVAL; } From 5471d5226c3b39b3d2f7011c082d5715795bd65c Mon Sep 17 00:00:00 2001 From: Coco Li Date: Thu, 23 Dec 2021 22:24:41 +0000 Subject: [PATCH 727/868] selftests: Calculate udpgso segment count without header adjustment The below referenced commit correctly updated the computation of number of segments (gso_size) by using only the gso payload size and removing the header lengths. With this change the regression test started failing. Update the tests to match this new behavior. Both IPv4 and IPv6 tests are updated, as a separate patch in this series will update udp_v6_send_skb to match this change in udp_send_skb. Fixes: 158390e45612 ("udp: using datalen to cap max gso segments") Signed-off-by: Coco Li Reviewed-by: Willem de Bruijn Link: https://lore.kernel.org/r/20211223222441.2975883-2-lixiaoyan@google.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/udpgso.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tools/testing/selftests/net/udpgso.c b/tools/testing/selftests/net/udpgso.c index c66da6ffd6d8..7badaf215de2 100644 --- a/tools/testing/selftests/net/udpgso.c +++ b/tools/testing/selftests/net/udpgso.c @@ -156,13 +156,13 @@ struct testcase testcases_v4[] = { }, { /* send max number of min sized segments */ - .tlen = UDP_MAX_SEGMENTS - CONST_HDRLEN_V4, + .tlen = UDP_MAX_SEGMENTS, .gso_len = 1, - .r_num_mss = UDP_MAX_SEGMENTS - CONST_HDRLEN_V4, + .r_num_mss = UDP_MAX_SEGMENTS, }, { /* send max number + 1 of min sized segments: fail */ - .tlen = UDP_MAX_SEGMENTS - CONST_HDRLEN_V4 + 1, + .tlen = UDP_MAX_SEGMENTS + 1, .gso_len = 1, .tfail = true, }, @@ -259,13 +259,13 @@ struct testcase testcases_v6[] = { }, { /* send max number of min sized segments */ - .tlen = UDP_MAX_SEGMENTS - CONST_HDRLEN_V6, + .tlen = UDP_MAX_SEGMENTS, .gso_len = 1, - .r_num_mss = UDP_MAX_SEGMENTS - CONST_HDRLEN_V6, + .r_num_mss = UDP_MAX_SEGMENTS, }, { /* send max number + 1 of min sized segments: fail */ - .tlen = UDP_MAX_SEGMENTS - CONST_HDRLEN_V6 + 1, + .tlen = UDP_MAX_SEGMENTS + 1, .gso_len = 1, .tfail = true, }, From 4eb1782eaa9fa1c224ad1fa0d13a9f09c3ab2d80 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 23 Dec 2021 17:43:14 +0100 Subject: [PATCH 728/868] recordmcount.pl: fix typo in s390 mcount regex Commit 85bf17b28f97 ("recordmcount.pl: look for jgnop instruction as well as bcrl on s390") added a new alternative mnemonic for the existing brcl instruction. This is required for the combination old gcc version (pre 9.0) and binutils since version 2.37. However at the same time this commit introduced a typo, replacing brcl with bcrl. As a result no mcount locations are detected anymore with old gcc versions (pre 9.0) and binutils before version 2.37. Fix this by using the correct mnemonic again. Reported-by: Miroslav Benes Cc: Jerome Marchand Cc: Fixes: 85bf17b28f97 ("recordmcount.pl: look for jgnop instruction as well as bcrl on s390") Link: https://lore.kernel.org/r/alpine.LSU.2.21.2112230949520.19849@pobox.suse.cz Signed-off-by: Heiko Carstens --- scripts/recordmcount.pl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/recordmcount.pl b/scripts/recordmcount.pl index 52a000b057a5..3ccb2c70add4 100755 --- a/scripts/recordmcount.pl +++ b/scripts/recordmcount.pl @@ -219,7 +219,7 @@ if ($arch eq "x86_64") { } elsif ($arch eq "s390" && $bits == 64) { if ($cc =~ /-DCC_USING_HOTPATCH/) { - $mcount_regex = "^\\s*([0-9a-fA-F]+):\\s*c0 04 00 00 00 00\\s*(bcrl\\s*0,|jgnop\\s*)[0-9a-f]+ <([^\+]*)>\$"; + $mcount_regex = "^\\s*([0-9a-fA-F]+):\\s*c0 04 00 00 00 00\\s*(brcl\\s*0,|jgnop\\s*)[0-9a-f]+ <([^\+]*)>\$"; $mcount_adjust = 0; } $alignment = 8; From b45396afa4177f2b1ddfeff7185da733fade1dc3 Mon Sep 17 00:00:00 2001 From: Miaoqian Lin Date: Fri, 24 Dec 2021 02:14:59 +0000 Subject: [PATCH 729/868] net: phy: fixed_phy: Fix NULL vs IS_ERR() checking in __fixed_phy_register The fixed_phy_get_gpiod function() returns NULL, it doesn't return error pointers, using NULL checking to fix this.i Fixes: 5468e82f7034 ("net: phy: fixed-phy: Drop GPIO from fixed_phy_add()") Signed-off-by: Miaoqian Lin Link: https://lore.kernel.org/r/20211224021500.10362-1-linmq006@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/fixed_phy.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/phy/fixed_phy.c b/drivers/net/phy/fixed_phy.c index c65fb5f5d2dc..a0c256bd5441 100644 --- a/drivers/net/phy/fixed_phy.c +++ b/drivers/net/phy/fixed_phy.c @@ -239,8 +239,8 @@ static struct phy_device *__fixed_phy_register(unsigned int irq, /* Check if we have a GPIO associated with this fixed phy */ if (!gpiod) { gpiod = fixed_phy_get_gpiod(np); - if (IS_ERR(gpiod)) - return ERR_CAST(gpiod); + if (!gpiod) + return ERR_PTR(-EINVAL); } /* Get the next available PHY address, up to PHY_MAX_ADDR */ From 5ec7d18d1813a5bead0b495045606c93873aecbb Mon Sep 17 00:00:00 2001 From: Xin Long Date: Thu, 23 Dec 2021 13:04:30 -0500 Subject: [PATCH 730/868] sctp: use call_rcu to free endpoint This patch is to delay the endpoint free by calling call_rcu() to fix another use-after-free issue in sctp_sock_dump(): BUG: KASAN: use-after-free in __lock_acquire+0x36d9/0x4c20 Call Trace: __lock_acquire+0x36d9/0x4c20 kernel/locking/lockdep.c:3218 lock_acquire+0x1ed/0x520 kernel/locking/lockdep.c:3844 __raw_spin_lock_bh include/linux/spinlock_api_smp.h:135 [inline] _raw_spin_lock_bh+0x31/0x40 kernel/locking/spinlock.c:168 spin_lock_bh include/linux/spinlock.h:334 [inline] __lock_sock+0x203/0x350 net/core/sock.c:2253 lock_sock_nested+0xfe/0x120 net/core/sock.c:2774 lock_sock include/net/sock.h:1492 [inline] sctp_sock_dump+0x122/0xb20 net/sctp/diag.c:324 sctp_for_each_transport+0x2b5/0x370 net/sctp/socket.c:5091 sctp_diag_dump+0x3ac/0x660 net/sctp/diag.c:527 __inet_diag_dump+0xa8/0x140 net/ipv4/inet_diag.c:1049 inet_diag_dump+0x9b/0x110 net/ipv4/inet_diag.c:1065 netlink_dump+0x606/0x1080 net/netlink/af_netlink.c:2244 __netlink_dump_start+0x59a/0x7c0 net/netlink/af_netlink.c:2352 netlink_dump_start include/linux/netlink.h:216 [inline] inet_diag_handler_cmd+0x2ce/0x3f0 net/ipv4/inet_diag.c:1170 __sock_diag_cmd net/core/sock_diag.c:232 [inline] sock_diag_rcv_msg+0x31d/0x410 net/core/sock_diag.c:263 netlink_rcv_skb+0x172/0x440 net/netlink/af_netlink.c:2477 sock_diag_rcv+0x2a/0x40 net/core/sock_diag.c:274 This issue occurs when asoc is peeled off and the old sk is freed after getting it by asoc->base.sk and before calling lock_sock(sk). To prevent the sk free, as a holder of the sk, ep should be alive when calling lock_sock(). This patch uses call_rcu() and moves sock_put and ep free into sctp_endpoint_destroy_rcu(), so that it's safe to try to hold the ep under rcu_read_lock in sctp_transport_traverse_process(). If sctp_endpoint_hold() returns true, it means this ep is still alive and we have held it and can continue to dump it; If it returns false, it means this ep is dead and can be freed after rcu_read_unlock, and we should skip it. In sctp_sock_dump(), after locking the sk, if this ep is different from tsp->asoc->ep, it means during this dumping, this asoc was peeled off before calling lock_sock(), and the sk should be skipped; If this ep is the same with tsp->asoc->ep, it means no peeloff happens on this asoc, and due to lock_sock, no peeloff will happen either until release_sock. Note that delaying endpoint free won't delay the port release, as the port release happens in sctp_endpoint_destroy() before calling call_rcu(). Also, freeing endpoint by call_rcu() makes it safe to access the sk by asoc->base.sk in sctp_assocs_seq_show() and sctp_rcv(). Thanks Jones to bring this issue up. v1->v2: - improve the changelog. - add kfree(ep) into sctp_endpoint_destroy_rcu(), as Jakub noticed. Reported-by: syzbot+9276d76e83e3bcde6c99@syzkaller.appspotmail.com Reported-by: Lee Jones Fixes: d25adbeb0cdb ("sctp: fix an use-after-free issue in sctp_sock_dump") Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/sctp/sctp.h | 6 +++--- include/net/sctp/structs.h | 3 ++- net/sctp/diag.c | 12 ++++++------ net/sctp/endpointola.c | 23 +++++++++++++++-------- net/sctp/socket.c | 23 +++++++++++++++-------- 5 files changed, 41 insertions(+), 26 deletions(-) diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 189fdb9db162..d314a180ab93 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -105,6 +105,7 @@ extern struct percpu_counter sctp_sockets_allocated; int sctp_asconf_mgmt(struct sctp_sock *, struct sctp_sockaddr_entry *); struct sk_buff *sctp_skb_recv_datagram(struct sock *, int, int, int *); +typedef int (*sctp_callback_t)(struct sctp_endpoint *, struct sctp_transport *, void *); void sctp_transport_walk_start(struct rhashtable_iter *iter); void sctp_transport_walk_stop(struct rhashtable_iter *iter); struct sctp_transport *sctp_transport_get_next(struct net *net, @@ -115,9 +116,8 @@ int sctp_transport_lookup_process(int (*cb)(struct sctp_transport *, void *), struct net *net, const union sctp_addr *laddr, const union sctp_addr *paddr, void *p); -int sctp_for_each_transport(int (*cb)(struct sctp_transport *, void *), - int (*cb_done)(struct sctp_transport *, void *), - struct net *net, int *pos, void *p); +int sctp_transport_traverse_process(sctp_callback_t cb, sctp_callback_t cb_done, + struct net *net, int *pos, void *p); int sctp_for_each_endpoint(int (*cb)(struct sctp_endpoint *, void *), void *p); int sctp_get_sctp_info(struct sock *sk, struct sctp_association *asoc, struct sctp_info *info); diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 899c29c326ba..8dabd8800006 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -1355,6 +1355,7 @@ struct sctp_endpoint { reconf_enable:1; __u8 strreset_enable; + struct rcu_head rcu; }; /* Recover the outter endpoint structure. */ @@ -1370,7 +1371,7 @@ static inline struct sctp_endpoint *sctp_ep(struct sctp_ep_common *base) struct sctp_endpoint *sctp_endpoint_new(struct sock *, gfp_t); void sctp_endpoint_free(struct sctp_endpoint *); void sctp_endpoint_put(struct sctp_endpoint *); -void sctp_endpoint_hold(struct sctp_endpoint *); +int sctp_endpoint_hold(struct sctp_endpoint *ep); void sctp_endpoint_add_asoc(struct sctp_endpoint *, struct sctp_association *); struct sctp_association *sctp_endpoint_lookup_assoc( const struct sctp_endpoint *ep, diff --git a/net/sctp/diag.c b/net/sctp/diag.c index 760b367644c1..a7d623171501 100644 --- a/net/sctp/diag.c +++ b/net/sctp/diag.c @@ -290,9 +290,8 @@ out: return err; } -static int sctp_sock_dump(struct sctp_transport *tsp, void *p) +static int sctp_sock_dump(struct sctp_endpoint *ep, struct sctp_transport *tsp, void *p) { - struct sctp_endpoint *ep = tsp->asoc->ep; struct sctp_comm_param *commp = p; struct sock *sk = ep->base.sk; struct sk_buff *skb = commp->skb; @@ -302,6 +301,8 @@ static int sctp_sock_dump(struct sctp_transport *tsp, void *p) int err = 0; lock_sock(sk); + if (ep != tsp->asoc->ep) + goto release; list_for_each_entry(assoc, &ep->asocs, asocs) { if (cb->args[4] < cb->args[1]) goto next; @@ -344,9 +345,8 @@ release: return err; } -static int sctp_sock_filter(struct sctp_transport *tsp, void *p) +static int sctp_sock_filter(struct sctp_endpoint *ep, struct sctp_transport *tsp, void *p) { - struct sctp_endpoint *ep = tsp->asoc->ep; struct sctp_comm_param *commp = p; struct sock *sk = ep->base.sk; const struct inet_diag_req_v2 *r = commp->r; @@ -505,8 +505,8 @@ skip: if (!(idiag_states & ~(TCPF_LISTEN | TCPF_CLOSE))) goto done; - sctp_for_each_transport(sctp_sock_filter, sctp_sock_dump, - net, &pos, &commp); + sctp_transport_traverse_process(sctp_sock_filter, sctp_sock_dump, + net, &pos, &commp); cb->args[2] = pos; done: diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c index 48c9c2c7602f..efffde7f2328 100644 --- a/net/sctp/endpointola.c +++ b/net/sctp/endpointola.c @@ -184,6 +184,18 @@ void sctp_endpoint_free(struct sctp_endpoint *ep) } /* Final destructor for endpoint. */ +static void sctp_endpoint_destroy_rcu(struct rcu_head *head) +{ + struct sctp_endpoint *ep = container_of(head, struct sctp_endpoint, rcu); + struct sock *sk = ep->base.sk; + + sctp_sk(sk)->ep = NULL; + sock_put(sk); + + kfree(ep); + SCTP_DBG_OBJCNT_DEC(ep); +} + static void sctp_endpoint_destroy(struct sctp_endpoint *ep) { struct sock *sk; @@ -213,18 +225,13 @@ static void sctp_endpoint_destroy(struct sctp_endpoint *ep) if (sctp_sk(sk)->bind_hash) sctp_put_port(sk); - sctp_sk(sk)->ep = NULL; - /* Give up our hold on the sock */ - sock_put(sk); - - kfree(ep); - SCTP_DBG_OBJCNT_DEC(ep); + call_rcu(&ep->rcu, sctp_endpoint_destroy_rcu); } /* Hold a reference to an endpoint. */ -void sctp_endpoint_hold(struct sctp_endpoint *ep) +int sctp_endpoint_hold(struct sctp_endpoint *ep) { - refcount_inc(&ep->base.refcnt); + return refcount_inc_not_zero(&ep->base.refcnt); } /* Release a reference to an endpoint and clean up if there are diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 33391254fa82..ad5028a07b18 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -5338,11 +5338,12 @@ int sctp_transport_lookup_process(int (*cb)(struct sctp_transport *, void *), } EXPORT_SYMBOL_GPL(sctp_transport_lookup_process); -int sctp_for_each_transport(int (*cb)(struct sctp_transport *, void *), - int (*cb_done)(struct sctp_transport *, void *), - struct net *net, int *pos, void *p) { +int sctp_transport_traverse_process(sctp_callback_t cb, sctp_callback_t cb_done, + struct net *net, int *pos, void *p) +{ struct rhashtable_iter hti; struct sctp_transport *tsp; + struct sctp_endpoint *ep; int ret; again: @@ -5351,26 +5352,32 @@ again: tsp = sctp_transport_get_idx(net, &hti, *pos + 1); for (; !IS_ERR_OR_NULL(tsp); tsp = sctp_transport_get_next(net, &hti)) { - ret = cb(tsp, p); - if (ret) - break; + ep = tsp->asoc->ep; + if (sctp_endpoint_hold(ep)) { /* asoc can be peeled off */ + ret = cb(ep, tsp, p); + if (ret) + break; + sctp_endpoint_put(ep); + } (*pos)++; sctp_transport_put(tsp); } sctp_transport_walk_stop(&hti); if (ret) { - if (cb_done && !cb_done(tsp, p)) { + if (cb_done && !cb_done(ep, tsp, p)) { (*pos)++; + sctp_endpoint_put(ep); sctp_transport_put(tsp); goto again; } + sctp_endpoint_put(ep); sctp_transport_put(tsp); } return ret; } -EXPORT_SYMBOL_GPL(sctp_for_each_transport); +EXPORT_SYMBOL_GPL(sctp_transport_traverse_process); /* 7.2.1 Association Status (SCTP_STATUS) From e6007b85dfa284c4726c249e3c2fc4181ca8e179 Mon Sep 17 00:00:00 2001 From: Ma Xinjian Date: Fri, 24 Dec 2021 17:59:28 +0800 Subject: [PATCH 731/868] selftests: mptcp: Remove the deprecated config NFT_COUNTER NFT_COUNTER was removed since 390ad4295aa ("netfilter: nf_tables: make counter support built-in") LKP/0Day will check if all configs listing under selftests are able to be enabled properly. For the missing configs, it will report something like: LKP WARN miss config CONFIG_NFT_COUNTER= of net/mptcp/config - it's not reasonable to keep the deprecated configs. - configs under kselftests are recommended by corresponding tests. So if some configs are missing, it will impact the testing results Reported-by: kernel test robot Signed-off-by: Ma Xinjian Signed-off-by: David S. Miller --- tools/testing/selftests/net/mptcp/config | 1 - 1 file changed, 1 deletion(-) diff --git a/tools/testing/selftests/net/mptcp/config b/tools/testing/selftests/net/mptcp/config index 0faaccd21447..2b82628decb1 100644 --- a/tools/testing/selftests/net/mptcp/config +++ b/tools/testing/selftests/net/mptcp/config @@ -9,7 +9,6 @@ CONFIG_NETFILTER=y CONFIG_NETFILTER_ADVANCED=y CONFIG_NETFILTER_NETLINK=m CONFIG_NF_TABLES=m -CONFIG_NFT_COUNTER=m CONFIG_NFT_COMPAT=m CONFIG_NETFILTER_XTABLES=m CONFIG_NETFILTER_XT_MATCH_BPF=m From 0129ab1f268b6cf88825eae819b9b84aa0a85634 Mon Sep 17 00:00:00 2001 From: Baokun Li Date: Fri, 24 Dec 2021 21:12:32 -0800 Subject: [PATCH 732/868] kfence: fix memory leak when cat kfence objects Hulk robot reported a kmemleak problem: unreferenced object 0xffff93d1d8cc02e8 (size 248): comm "cat", pid 23327, jiffies 4624670141 (age 495992.217s) hex dump (first 32 bytes): 00 40 85 19 d4 93 ff ff 00 10 00 00 00 00 00 00 .@.............. 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: seq_open+0x2a/0x80 full_proxy_open+0x167/0x1e0 do_dentry_open+0x1e1/0x3a0 path_openat+0x961/0xa20 do_filp_open+0xae/0x120 do_sys_openat2+0x216/0x2f0 do_sys_open+0x57/0x80 do_syscall_64+0x33/0x40 entry_SYSCALL_64_after_hwframe+0x44/0xa9 unreferenced object 0xffff93d419854000 (size 4096): comm "cat", pid 23327, jiffies 4624670141 (age 495992.217s) hex dump (first 32 bytes): 6b 66 65 6e 63 65 2d 23 32 35 30 3a 20 30 78 30 kfence-#250: 0x0 30 30 30 30 30 30 30 37 35 34 62 64 61 31 32 2d 0000000754bda12- backtrace: seq_read_iter+0x313/0x440 seq_read+0x14b/0x1a0 full_proxy_read+0x56/0x80 vfs_read+0xa5/0x1b0 ksys_read+0xa0/0xf0 do_syscall_64+0x33/0x40 entry_SYSCALL_64_after_hwframe+0x44/0xa9 I find that we can easily reproduce this problem with the following commands: cat /sys/kernel/debug/kfence/objects echo scan > /sys/kernel/debug/kmemleak cat /sys/kernel/debug/kmemleak The leaked memory is allocated in the stack below: do_syscall_64 do_sys_open do_dentry_open full_proxy_open seq_open ---> alloc seq_file vfs_read full_proxy_read seq_read seq_read_iter traverse ---> alloc seq_buf And it should have been released in the following process: do_syscall_64 syscall_exit_to_user_mode exit_to_user_mode_prepare task_work_run ____fput __fput full_proxy_release ---> free here However, the release function corresponding to file_operations is not implemented in kfence. As a result, a memory leak occurs. Therefore, the solution to this problem is to implement the corresponding release function. Link: https://lkml.kernel.org/r/20211206133628.2822545-1-libaokun1@huawei.com Fixes: 0ce20dd84089 ("mm: add Kernel Electric-Fence infrastructure") Signed-off-by: Baokun Li Reported-by: Hulk Robot Acked-by: Marco Elver Reviewed-by: Kefeng Wang Cc: Alexander Potapenko Cc: Dmitry Vyukov Cc: Yu Kuai Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/kfence/core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/mm/kfence/core.c b/mm/kfence/core.c index 09945784df9e..a19154a8d196 100644 --- a/mm/kfence/core.c +++ b/mm/kfence/core.c @@ -683,6 +683,7 @@ static const struct file_operations objects_fops = { .open = open_objects, .read = seq_read, .llseek = seq_lseek, + .release = seq_release, }; static int __init kfence_debugfs_init(void) From 338635340669d5b317c7e8dcf4fff4a0f3651d87 Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Fri, 24 Dec 2021 21:12:35 -0800 Subject: [PATCH 733/868] mm: mempolicy: fix THP allocations escaping mempolicy restrictions alloc_pages_vma() may try to allocate THP page on the local NUMA node first: page = __alloc_pages_node(hpage_node, gfp | __GFP_THISNODE | __GFP_NORETRY, order); And if the allocation fails it retries allowing remote memory: if (!page && (gfp & __GFP_DIRECT_RECLAIM)) page = __alloc_pages_node(hpage_node, gfp, order); However, this retry allocation completely ignores memory policy nodemask allowing allocation to escape restrictions. The first appearance of this bug seems to be the commit ac5b2c18911f ("mm: thp: relax __GFP_THISNODE for MADV_HUGEPAGE mappings"). The bug disappeared later in the commit 89c83fb539f9 ("mm, thp: consolidate THP gfp handling into alloc_hugepage_direct_gfpmask") and reappeared again in slightly different form in the commit 76e654cc91bb ("mm, page_alloc: allow hugepage fallback to remote nodes when madvised") Fix this by passing correct nodemask to the __alloc_pages() call. The demonstration/reproducer of the problem: $ mount -oremount,size=4G,huge=always /dev/shm/ $ echo always > /sys/kernel/mm/transparent_hugepage/defrag $ cat mbind_thp.c #include #include #include #include #include #include #include #include #define SIZE 2ULL << 30 int main(int argc, char **argv) { int fd; unsigned long long i; char *addr; pid_t pid; char buf[100]; unsigned long nodemask = 1; fd = open("/dev/shm/test", O_RDWR|O_CREAT); assert(fd > 0); assert(ftruncate(fd, SIZE) == 0); addr = mmap(NULL, SIZE, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); assert(mbind(addr, SIZE, MPOL_BIND, &nodemask, 2, MPOL_MF_STRICT|MPOL_MF_MOVE)==0); for (i = 0; i < SIZE; i+=4096) { addr[i] = 1; } pid = getpid(); snprintf(buf, sizeof(buf), "grep shm /proc/%d/numa_maps", pid); system(buf); sleep(10000); return 0; } $ gcc mbind_thp.c -o mbind_thp -lnuma $ numactl -H available: 2 nodes (0-1) node 0 cpus: 0 2 node 0 size: 1918 MB node 0 free: 1595 MB node 1 cpus: 1 3 node 1 size: 2014 MB node 1 free: 1731 MB node distances: node 0 1 0: 10 20 1: 20 10 $ rm -f /dev/shm/test; taskset -c 0 ./mbind_thp 7fd970a00000 bind:0 file=/dev/shm/test dirty=524288 active=0 N0=396800 N1=127488 kernelpagesize_kB=4 Link: https://lkml.kernel.org/r/20211208165343.22349-1-arbn@yandex-team.com Fixes: ac5b2c18911f ("mm: thp: relax __GFP_THISNODE for MADV_HUGEPAGE mappings") Signed-off-by: Andrey Ryabinin Acked-by: Michal Hocko Acked-by: Mel Gorman Acked-by: David Rientjes Cc: Andrea Arcangeli Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/mempolicy.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 10e9c87260ed..f6248affaf38 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -2140,8 +2140,7 @@ struct page *alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma, * memory with both reclaim and compact as well. */ if (!page && (gfp & __GFP_DIRECT_RECLAIM)) - page = __alloc_pages_node(hpage_node, - gfp, order); + page = __alloc_pages(gfp, order, hpage_node, nmask); goto out; } From 71d2bcec2d4d69ff109c497e6611d6c53c8926d4 Mon Sep 17 00:00:00 2001 From: Philipp Rudo Date: Fri, 24 Dec 2021 21:12:39 -0800 Subject: [PATCH 734/868] kernel/crash_core: suppress unknown crashkernel parameter warning When booting with crashkernel= on the kernel command line a warning similar to Kernel command line: ro console=ttyS0 crashkernel=256M Unknown kernel command line parameters "crashkernel=256M", will be passed to user space. is printed. This comes from crashkernel= being parsed independent from the kernel parameter handling mechanism. So the code in init/main.c doesn't know that crashkernel= is a valid kernel parameter and prints this incorrect warning. Suppress the warning by adding a dummy early_param handler for crashkernel=. Link: https://lkml.kernel.org/r/20211208133443.6867-1-prudo@redhat.com Fixes: 86d1919a4fb0 ("init: print out unknown kernel parameters") Signed-off-by: Philipp Rudo Acked-by: Baoquan He Cc: Andrew Halaney Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/crash_core.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/kernel/crash_core.c b/kernel/crash_core.c index eb53f5ec62c9..256cf6db573c 100644 --- a/kernel/crash_core.c +++ b/kernel/crash_core.c @@ -6,6 +6,7 @@ #include #include +#include #include #include @@ -295,6 +296,16 @@ int __init parse_crashkernel_low(char *cmdline, "crashkernel=", suffix_tbl[SUFFIX_LOW]); } +/* + * Add a dummy early_param handler to mark crashkernel= as a known command line + * parameter and suppress incorrect warnings in init/main.c. + */ +static int __init parse_crashkernel_dummy(char *arg) +{ + return 0; +} +early_param("crashkernel", parse_crashkernel_dummy); + Elf_Word *append_elf_note(Elf_Word *buf, char *name, unsigned int type, void *data, size_t data_len) { From 7e5b901e4609441fc6bb94701c4743b39b6c277e Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 24 Dec 2021 21:12:42 -0800 Subject: [PATCH 735/868] MAINTAINERS: mark more list instances as moderated Some lists that are moderated are not marked as moderated consistently, so mark them all as moderated. Link: https://lkml.kernel.org/r/20211209001330.18558-1-rdunlap@infradead.org Signed-off-by: Randy Dunlap Cc: Miquel Raynal Cc: Conor Culhane Cc: Ryder Lee Cc: Jianjun Wang Cc: Alexandre Belloni Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- MAINTAINERS | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 8912b2c1260c..fb18ce7168aa 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -14845,7 +14845,7 @@ PCIE DRIVER FOR MEDIATEK M: Ryder Lee M: Jianjun Wang L: linux-pci@vger.kernel.org -L: linux-mediatek@lists.infradead.org +L: linux-mediatek@lists.infradead.org (moderated for non-subscribers) S: Supported F: Documentation/devicetree/bindings/pci/mediatek* F: drivers/pci/controller/*mediatek* @@ -17423,7 +17423,7 @@ F: drivers/video/fbdev/sm712* SILVACO I3C DUAL-ROLE MASTER M: Miquel Raynal M: Conor Culhane -L: linux-i3c@lists.infradead.org +L: linux-i3c@lists.infradead.org (moderated for non-subscribers) S: Maintained F: Documentation/devicetree/bindings/i3c/silvaco,i3c-master.yaml F: drivers/i3c/master/svc-i3c-master.c From e37e7b0b3bd52ec4f8ab71b027bcec08f57f1b3b Mon Sep 17 00:00:00 2001 From: Naoya Horiguchi Date: Fri, 24 Dec 2021 21:12:45 -0800 Subject: [PATCH 736/868] mm, hwpoison: fix condition in free hugetlb page path When a memory error hits a tail page of a free hugepage, __page_handle_poison() is expected to be called to isolate the error in 4kB unit, but it's not called due to the outdated if-condition in memory_failure_hugetlb(). This loses the chance to isolate the error in the finer unit, so it's not optimal. Drop the condition. This "(p != head && TestSetPageHWPoison(head)" condition is based on the old semantics of PageHWPoison on hugepage (where PG_hwpoison flag was set on the subpage), so it's not necessray any more. By getting to set PG_hwpoison on head page for hugepages, concurrent error events on different subpages in a single hugepage can be prevented by TestSetPageHWPoison(head) at the beginning of memory_failure_hugetlb(). So dropping the condition should not reopen the race window originally mentioned in commit b985194c8c0a ("hwpoison, hugetlb: lock_page/unlock_page does not match for handling a free hugepage") [naoya.horiguchi@linux.dev: fix "HardwareCorrupted" counter] Link: https://lkml.kernel.org/r/20211220084851.GA1460264@u2004 Link: https://lkml.kernel.org/r/20211210110208.879740-1-naoya.horiguchi@linux.dev Signed-off-by: Naoya Horiguchi Reported-by: Fei Luo Reviewed-by: Mike Kravetz Cc: [5.14+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory-failure.c | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 07c875fdeaf0..682828b94ab6 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -1470,17 +1470,12 @@ static int memory_failure_hugetlb(unsigned long pfn, int flags) if (!(flags & MF_COUNT_INCREASED)) { res = get_hwpoison_page(p, flags); if (!res) { - /* - * Check "filter hit" and "race with other subpage." - */ lock_page(head); - if (PageHWPoison(head)) { - if ((hwpoison_filter(p) && TestClearPageHWPoison(p)) - || (p != head && TestSetPageHWPoison(head))) { + if (hwpoison_filter(p)) { + if (TestClearPageHWPoison(head)) num_poisoned_pages_dec(); - unlock_page(head); - return 0; - } + unlock_page(head); + return 0; } unlock_page(head); res = MF_FAILED; From 94ab10dd42a70acc5208a41325617e3d9cf81a70 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Fri, 24 Dec 2021 21:12:48 -0800 Subject: [PATCH 737/868] mm: delete unsafe BUG from page_cache_add_speculative() It is not easily reproducible, but on 5.16-rc I have several times hit the VM_BUG_ON_PAGE(PageTail(page), page) in page_cache_add_speculative(): usually from filemap_get_read_batch() for an ext4 read, yesterday from next_uptodate_page() from filemap_map_pages() for a shmem fault. That BUG used to be placed where page_ref_add_unless() had succeeded, but now it is placed before folio_ref_add_unless() is attempted: that is not safe, since it is only the acquired reference which makes the page safe from racing THP collapse or split. We could keep the BUG, checking PageTail only when folio_ref_try_add_rcu() has succeeded; but I don't think it adds much value - just delete it. Link: https://lkml.kernel.org/r/8b98fc6f-3439-8614-c3f3-945c659a1aba@google.com Fixes: 020853b6f5ea ("mm: Add folio_try_get_rcu()") Signed-off-by: Hugh Dickins Acked-by: Kirill A. Shutemov Reviewed-by: Matthew Wilcox (Oracle) Cc: Vlastimil Babka Cc: William Kucharski Cc: Christoph Hellwig Cc: Mike Rapoport Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/pagemap.h | 1 - 1 file changed, 1 deletion(-) diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 605246452305..d150a9082b31 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -285,7 +285,6 @@ static inline struct inode *folio_inode(struct folio *folio) static inline bool page_cache_add_speculative(struct page *page, int count) { - VM_BUG_ON_PAGE(PageTail(page), page); return folio_ref_try_add_rcu((struct folio *)page, count); } From 595ec1973c276f6c0c1de8aca5eef8dfd81f9b49 Mon Sep 17 00:00:00 2001 From: Thibaut Sautereau Date: Fri, 24 Dec 2021 21:12:51 -0800 Subject: [PATCH 738/868] mm/page_alloc: fix __alloc_size attribute for alloc_pages_exact_nid The second parameter of alloc_pages_exact_nid is the one indicating the size of memory pointed by the returned pointer. Link: https://lkml.kernel.org/r/YbjEgwhn4bGblp//@coeus Fixes: abd58f38dfb4 ("mm/page_alloc: add __alloc_size attributes for better bounds checking") Signed-off-by: Thibaut Sautereau Acked-by: Kees Cook Cc: Daniel Micay Cc: Levente Polyak Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/gfp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/gfp.h b/include/linux/gfp.h index b976c4177299..8fcc38467af6 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -624,7 +624,7 @@ extern unsigned long get_zeroed_page(gfp_t gfp_mask); void *alloc_pages_exact(size_t size, gfp_t gfp_mask) __alloc_size(1); void free_pages_exact(void *virt, size_t size); -__meminit void *alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask) __alloc_size(1); +__meminit void *alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask) __alloc_size(2); #define __get_free_page(gfp_mask) \ __get_free_pages((gfp_mask), 0) From 34796417964b8d0aef45a99cf6c2d20cebe33733 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Fri, 24 Dec 2021 21:12:54 -0800 Subject: [PATCH 739/868] mm/damon/dbgfs: protect targets destructions with kdamond_lock DAMON debugfs interface iterates current monitoring targets in 'dbgfs_target_ids_read()' while holding the corresponding 'kdamond_lock'. However, it also destructs the monitoring targets in 'dbgfs_before_terminate()' without holding the lock. This can result in a use_after_free bug. This commit avoids the race by protecting the destruction with the corresponding 'kdamond_lock'. Link: https://lkml.kernel.org/r/20211221094447.2241-1-sj@kernel.org Reported-by: Sangwoo Bae Fixes: 4bc05954d007 ("mm/damon: implement a debugfs-based user space interface") Signed-off-by: SeongJae Park Cc: [5.15.x] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/damon/dbgfs.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mm/damon/dbgfs.c b/mm/damon/dbgfs.c index 1efac0022e9a..4fbd729edc9e 100644 --- a/mm/damon/dbgfs.c +++ b/mm/damon/dbgfs.c @@ -650,10 +650,12 @@ static void dbgfs_before_terminate(struct damon_ctx *ctx) if (!targetid_is_pid(ctx)) return; + mutex_lock(&ctx->kdamond_lock); damon_for_each_target_safe(t, next, ctx) { put_pid((struct pid *)t->id); damon_destroy_target(t); } + mutex_unlock(&ctx->kdamond_lock); } static struct damon_ctx *dbgfs_new_ctx(void) From 2a57d83c78f889bf3f54eede908d0643c40d5418 Mon Sep 17 00:00:00 2001 From: Liu Shixin Date: Fri, 24 Dec 2021 21:12:58 -0800 Subject: [PATCH 740/868] mm/hwpoison: clear MF_COUNT_INCREASED before retrying get_any_page() Hulk Robot reported a panic in put_page_testzero() when testing madvise() with MADV_SOFT_OFFLINE. The BUG() is triggered when retrying get_any_page(). This is because we keep MF_COUNT_INCREASED flag in second try but the refcnt is not increased. page dumped because: VM_BUG_ON_PAGE(page_ref_count(page) == 0) ------------[ cut here ]------------ kernel BUG at include/linux/mm.h:737! invalid opcode: 0000 [#1] PREEMPT SMP CPU: 5 PID: 2135 Comm: sshd Tainted: G B 5.16.0-rc6-dirty #373 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-1ubuntu1.1 04/01/2014 RIP: release_pages+0x53f/0x840 Call Trace: free_pages_and_swap_cache+0x64/0x80 tlb_flush_mmu+0x6f/0x220 unmap_page_range+0xe6c/0x12c0 unmap_single_vma+0x90/0x170 unmap_vmas+0xc4/0x180 exit_mmap+0xde/0x3a0 mmput+0xa3/0x250 do_exit+0x564/0x1470 do_group_exit+0x3b/0x100 __do_sys_exit_group+0x13/0x20 __x64_sys_exit_group+0x16/0x20 do_syscall_64+0x34/0x80 entry_SYSCALL_64_after_hwframe+0x44/0xae Modules linked in: ---[ end trace e99579b570fe0649 ]--- RIP: 0010:release_pages+0x53f/0x840 Link: https://lkml.kernel.org/r/20211221074908.3910286-1-liushixin2@huawei.com Fixes: b94e02822deb ("mm,hwpoison: try to narrow window race for free pages") Signed-off-by: Liu Shixin Reported-by: Hulk Robot Reviewed-by: Oscar Salvador Acked-by: Naoya Horiguchi Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory-failure.c | 1 + 1 file changed, 1 insertion(+) diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 682828b94ab6..3a274468f193 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -2234,6 +2234,7 @@ retry: } else if (ret == 0) { if (soft_offline_free_page(page) && try_again) { try_again = false; + flags &= ~MF_COUNT_INCREASED; goto retry; } } From fc74e0a40e4f9fd0468e34045b0c45bba11dcbb2 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 26 Dec 2021 13:17:17 -0800 Subject: [PATCH 741/868] Linux 5.16-rc7 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index d85f1ff79f5c..17b4319ad2ff 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 5 PATCHLEVEL = 16 SUBLEVEL = 0 -EXTRAVERSION = -rc6 +EXTRAVERSION = -rc7 NAME = Gobble Gobble # *DOCUMENTATION* From 0f9d36af8f211d296ffd23bdce61a72cdfbb1a3c Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Sat, 18 Dec 2021 16:19:09 -0800 Subject: [PATCH 742/868] drm/i915: Fix possible uninitialized variable in parallel extension 'prev_engine' was declared inside the output loop and checked in the inner after at least 1 pass of either loop. The variable should be declared outside both loops as it needs to be persistent across the entire loop structure. Fixes: e5e32171a2cf ("drm/i915/guc: Connect UAPI to GuC multi-lrc interface") Signed-off-by: Matthew Brost Reviewed-by: Lucas De Marchi Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20211219001909.24348-1-matthew.brost@intel.com (cherry picked from commit cbffbac9c14220b8716b0a9c29d72243f6b14ef3) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index fb33d0322960..c37c9f0d8167 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -564,6 +564,7 @@ set_proto_ctx_engines_parallel_submit(struct i915_user_extension __user *base, container_of_user(base, typeof(*ext), base); const struct set_proto_ctx_engines *set = data; struct drm_i915_private *i915 = set->i915; + struct i915_engine_class_instance prev_engine; u64 flags; int err = 0, n, i, j; u16 slot, width, num_siblings; @@ -629,7 +630,6 @@ set_proto_ctx_engines_parallel_submit(struct i915_user_extension __user *base, /* Create contexts / engines */ for (i = 0; i < width; ++i) { intel_engine_mask_t current_mask = 0; - struct i915_engine_class_instance prev_engine; for (j = 0; j < num_siblings; ++j) { struct i915_engine_class_instance ci; From d46f329a3f6048e04736e86cb13c880645048792 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Tue, 14 Dec 2021 11:59:13 -0800 Subject: [PATCH 743/868] drm/i915: Increment composite fence seqno Increment composite fence seqno on each fence creation. Fixes: 544460c33821 ("drm/i915: Multi-BB execbuf") Signed-off-by: Matthew Brost Reviewed-by: John Harrison Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20211214195913.35735-1-matthew.brost@intel.com (cherry picked from commit 62eeb9ae1364cd96991ccc6e3c5c69d66b8c64df) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 9b24d9b5ade1..cb0bf6ffd0e3 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -3017,7 +3017,7 @@ eb_composite_fence_create(struct i915_execbuffer *eb, int out_fence_fd) fence_array = dma_fence_array_create(eb->num_batches, fences, eb->context->parallel.fence_context, - eb->context->parallel.seqno, + eb->context->parallel.seqno++, false); if (!fence_array) { kfree(fences); From f34e8875ae244462711e31fcc4a82db13a16d36f Mon Sep 17 00:00:00 2001 From: Dinh Nguyen Date: Mon, 22 Nov 2021 09:54:00 -0600 Subject: [PATCH 744/868] dt-bindings: spi: cadence-quadspi: document "intel,socfpga-qspi" The QSPI controller on Intel's SoCFPGA platform does not implement the CQSPI_REG_WR_COMPLETION_CTRL register, thus a write to this register results in a crash. Introduce the dts compatible "intel,socfpga-qspi" to differentiate the hardware. Acked-by: Pratyush Yadav Reviewed-by: Rob Herring Signed-off-by: Dinh Nguyen --- v3: revert to "intel,socfpga-qspi" v2: change binding to "cdns,qspi-nor-0010" to be more generic for other platforms --- Documentation/devicetree/bindings/spi/cdns,qspi-nor.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/spi/cdns,qspi-nor.yaml b/Documentation/devicetree/bindings/spi/cdns,qspi-nor.yaml index ca155abbda7a..037f41f58503 100644 --- a/Documentation/devicetree/bindings/spi/cdns,qspi-nor.yaml +++ b/Documentation/devicetree/bindings/spi/cdns,qspi-nor.yaml @@ -29,6 +29,7 @@ properties: - ti,am654-ospi - intel,lgm-qspi - xlnx,versal-ospi-1.0 + - intel,socfpga-qspi - const: cdns,qspi-nor - const: cdns,qspi-nor From 36de991e93908f7ad5c2a0eac9c4ecf8b723fa4a Mon Sep 17 00:00:00 2001 From: Dinh Nguyen Date: Mon, 22 Nov 2021 09:10:03 -0600 Subject: [PATCH 745/868] ARM: dts: socfpga: change qspi to "intel,socfpga-qspi" Because of commit 9cb2ff111712 ("spi: cadence-quadspi: Disable Auto-HW polling"), which does a write to the CQSPI_REG_WR_COMPLETION_CTRL register regardless of any condition. Well, the Cadence QuadSPI controller on Intel's SoCFPGA platforms does not implement the CQSPI_REG_WR_COMPLETION_CTRL register, thus a write to this register results in a crash! So starting with v5.16, I introduced the patch 98d948eb833 ("spi: cadence-quadspi: fix write completion support"), which adds the dts compatible "intel,socfpga-qspi" that is specific for versions that doesn't have the CQSPI_REG_WR_COMPLETION_CTRL register implemented. Signed-off-by: Dinh Nguyen --- v3: revert back to "intel,socfpga-qspi" v2: use both "cdns,qspi-nor" and "cdns,qspi-nor-0010" --- arch/arm/boot/dts/socfpga.dtsi | 2 +- arch/arm/boot/dts/socfpga_arria10.dtsi | 2 +- arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi | 2 +- arch/arm64/boot/dts/intel/socfpga_agilex.dtsi | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm/boot/dts/socfpga.dtsi b/arch/arm/boot/dts/socfpga.dtsi index 0b021eef0b53..7c1d6423d7f8 100644 --- a/arch/arm/boot/dts/socfpga.dtsi +++ b/arch/arm/boot/dts/socfpga.dtsi @@ -782,7 +782,7 @@ }; qspi: spi@ff705000 { - compatible = "cdns,qspi-nor"; + compatible = "intel,socfpga-qspi", "cdns,qspi-nor"; #address-cells = <1>; #size-cells = <0>; reg = <0xff705000 0x1000>, diff --git a/arch/arm/boot/dts/socfpga_arria10.dtsi b/arch/arm/boot/dts/socfpga_arria10.dtsi index a574ea91d9d3..3ba431dfa8c9 100644 --- a/arch/arm/boot/dts/socfpga_arria10.dtsi +++ b/arch/arm/boot/dts/socfpga_arria10.dtsi @@ -756,7 +756,7 @@ }; qspi: spi@ff809000 { - compatible = "cdns,qspi-nor"; + compatible = "intel,socfpga-qspi", "cdns,qspi-nor"; #address-cells = <1>; #size-cells = <0>; reg = <0xff809000 0x100>, diff --git a/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi b/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi index d301ac0d406b..3ec301bd08a9 100644 --- a/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi +++ b/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi @@ -594,7 +594,7 @@ }; qspi: spi@ff8d2000 { - compatible = "cdns,qspi-nor"; + compatible = "intel,socfpga-qspi", "cdns,qspi-nor"; #address-cells = <1>; #size-cells = <0>; reg = <0xff8d2000 0x100>, diff --git a/arch/arm64/boot/dts/intel/socfpga_agilex.dtsi b/arch/arm64/boot/dts/intel/socfpga_agilex.dtsi index 163f33b46e4f..0dd2d2ee765a 100644 --- a/arch/arm64/boot/dts/intel/socfpga_agilex.dtsi +++ b/arch/arm64/boot/dts/intel/socfpga_agilex.dtsi @@ -628,7 +628,7 @@ }; qspi: spi@ff8d2000 { - compatible = "cdns,qspi-nor"; + compatible = "intel,socfpga-qspi", "cdns,qspi-nor"; #address-cells = <1>; #size-cells = <0>; reg = <0xff8d2000 0x100>, From c1833c3964d5bd8c163bd4e01736a38bc473cb8a Mon Sep 17 00:00:00 2001 From: William Zhao Date: Thu, 23 Dec 2021 12:33:16 -0500 Subject: [PATCH 746/868] ip6_vti: initialize __ip6_tnl_parm struct in vti6_siocdevprivate The "__ip6_tnl_parm" struct was left uninitialized causing an invalid load of random data when the "__ip6_tnl_parm" struct was used elsewhere. As an example, in the function "ip6_tnl_xmit_ctl()", it tries to access the "collect_md" member. With "__ip6_tnl_parm" being uninitialized and containing random data, the UBSAN detected that "collect_md" held a non-boolean value. The UBSAN issue is as follows: =============================================================== UBSAN: invalid-load in net/ipv6/ip6_tunnel.c:1025:14 load of value 30 is not a valid value for type '_Bool' CPU: 1 PID: 228 Comm: kworker/1:3 Not tainted 5.16.0-rc4+ #8 Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011 Workqueue: ipv6_addrconf addrconf_dad_work Call Trace: dump_stack_lvl+0x44/0x57 ubsan_epilogue+0x5/0x40 __ubsan_handle_load_invalid_value+0x66/0x70 ? __cpuhp_setup_state+0x1d3/0x210 ip6_tnl_xmit_ctl.cold.52+0x2c/0x6f [ip6_tunnel] vti6_tnl_xmit+0x79c/0x1e96 [ip6_vti] ? lock_is_held_type+0xd9/0x130 ? vti6_rcv+0x100/0x100 [ip6_vti] ? lock_is_held_type+0xd9/0x130 ? rcu_read_lock_bh_held+0xc0/0xc0 ? lock_acquired+0x262/0xb10 dev_hard_start_xmit+0x1e6/0x820 __dev_queue_xmit+0x2079/0x3340 ? mark_lock.part.52+0xf7/0x1050 ? netdev_core_pick_tx+0x290/0x290 ? kvm_clock_read+0x14/0x30 ? kvm_sched_clock_read+0x5/0x10 ? sched_clock_cpu+0x15/0x200 ? find_held_lock+0x3a/0x1c0 ? lock_release+0x42f/0xc90 ? lock_downgrade+0x6b0/0x6b0 ? mark_held_locks+0xb7/0x120 ? neigh_connected_output+0x31f/0x470 ? lockdep_hardirqs_on+0x79/0x100 ? neigh_connected_output+0x31f/0x470 ? ip6_finish_output2+0x9b0/0x1d90 ? rcu_read_lock_bh_held+0x62/0xc0 ? ip6_finish_output2+0x9b0/0x1d90 ip6_finish_output2+0x9b0/0x1d90 ? ip6_append_data+0x330/0x330 ? ip6_mtu+0x166/0x370 ? __ip6_finish_output+0x1ad/0xfb0 ? nf_hook_slow+0xa6/0x170 ip6_output+0x1fb/0x710 ? nf_hook.constprop.32+0x317/0x430 ? ip6_finish_output+0x180/0x180 ? __ip6_finish_output+0xfb0/0xfb0 ? lock_is_held_type+0xd9/0x130 ndisc_send_skb+0xb33/0x1590 ? __sk_mem_raise_allocated+0x11cf/0x1560 ? dst_output+0x4a0/0x4a0 ? ndisc_send_rs+0x432/0x610 addrconf_dad_completed+0x30c/0xbb0 ? addrconf_rs_timer+0x650/0x650 ? addrconf_dad_work+0x73c/0x10e0 addrconf_dad_work+0x73c/0x10e0 ? addrconf_dad_completed+0xbb0/0xbb0 ? rcu_read_lock_sched_held+0xaf/0xe0 ? rcu_read_lock_bh_held+0xc0/0xc0 process_one_work+0x97b/0x1740 ? pwq_dec_nr_in_flight+0x270/0x270 worker_thread+0x87/0xbf0 ? process_one_work+0x1740/0x1740 kthread+0x3ac/0x490 ? set_kthread_struct+0x100/0x100 ret_from_fork+0x22/0x30 =============================================================== The solution is to initialize "__ip6_tnl_parm" struct to zeros in the "vti6_siocdevprivate()" function. Signed-off-by: William Zhao Signed-off-by: David S. Miller --- net/ipv6/ip6_vti.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index 527e9ead7449..5e9474bc54fc 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -808,6 +808,8 @@ vti6_siocdevprivate(struct net_device *dev, struct ifreq *ifr, void __user *data struct net *net = dev_net(dev); struct vti6_net *ip6n = net_generic(net, vti6_net_id); + memset(&p1, 0, sizeof(p1)); + switch (cmd) { case SIOCGETTUNNEL: if (dev == ip6n->fb_tnl_dev) { From 6d7373dabfd3933ee30c40fc8c09d2a788f6ece1 Mon Sep 17 00:00:00 2001 From: Karsten Graul Date: Mon, 27 Dec 2021 14:35:30 +0100 Subject: [PATCH 747/868] net/smc: fix using of uninitialized completions In smc_wr_tx_send_wait() the completion on index specified by pend->idx is initialized and after smc_wr_tx_send() was called the wait for completion starts. pend->idx is used to get the correct index for the wait, but the pend structure could already be cleared in smc_wr_tx_process_cqe(). Introduce pnd_idx to hold and use a local copy of the correct index. Fixes: 09c61d24f96d ("net/smc: wait for departure of an IB message") Signed-off-by: Karsten Graul Signed-off-by: David S. Miller --- net/smc/smc_wr.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c index 600ab5889227..79a7431f534e 100644 --- a/net/smc/smc_wr.c +++ b/net/smc/smc_wr.c @@ -358,18 +358,20 @@ int smc_wr_tx_send_wait(struct smc_link *link, struct smc_wr_tx_pend_priv *priv, unsigned long timeout) { struct smc_wr_tx_pend *pend; + u32 pnd_idx; int rc; pend = container_of(priv, struct smc_wr_tx_pend, priv); pend->compl_requested = 1; - init_completion(&link->wr_tx_compl[pend->idx]); + pnd_idx = pend->idx; + init_completion(&link->wr_tx_compl[pnd_idx]); rc = smc_wr_tx_send(link, priv); if (rc) return rc; /* wait for completion by smc_wr_tx_process_cqe() */ rc = wait_for_completion_interruptible_timeout( - &link->wr_tx_compl[pend->idx], timeout); + &link->wr_tx_compl[pnd_idx], timeout); if (rc <= 0) rc = -ENODATA; if (rc > 0) From 6c25449e1a32c594d743df8e8258e8ef870b6a77 Mon Sep 17 00:00:00 2001 From: yangxingwu Date: Mon, 27 Dec 2021 16:29:51 +0800 Subject: [PATCH 748/868] net: udp: fix alignment problem in udp4_seq_show() $ cat /pro/net/udp before: sl local_address rem_address st tx_queue rx_queue tr tm->when 26050: 0100007F:0035 00000000:0000 07 00000000:00000000 00:00000000 26320: 0100007F:0143 00000000:0000 07 00000000:00000000 00:00000000 27135: 00000000:8472 00000000:0000 07 00000000:00000000 00:00000000 after: sl local_address rem_address st tx_queue rx_queue tr tm->when 26050: 0100007F:0035 00000000:0000 07 00000000:00000000 00:00000000 26320: 0100007F:0143 00000000:0000 07 00000000:00000000 00:00000000 27135: 00000000:8472 00000000:0000 07 00000000:00000000 00:00000000 Signed-off-by: yangxingwu Signed-off-by: David S. Miller --- net/ipv4/udp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 15c6b450b8db..0cd6b857e7ec 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -3075,7 +3075,7 @@ int udp4_seq_show(struct seq_file *seq, void *v) { seq_setwidth(seq, 127); if (v == SEQ_START_TOKEN) - seq_puts(seq, " sl local_address rem_address st tx_queue " + seq_puts(seq, " sl local_address rem_address st tx_queue " "rx_queue tr tm->when retrnsmt uid timeout " "inode ref pointer drops"); else { From 5f50153288452e10b6edd69ec9112c49442b054a Mon Sep 17 00:00:00 2001 From: Zekun Shen Date: Sun, 26 Dec 2021 21:32:45 -0500 Subject: [PATCH 749/868] atlantic: Fix buff_ring OOB in aq_ring_rx_clean The function obtain the next buffer without boundary check. We should return with I/O error code. The bug is found by fuzzing and the crash report is attached. It is an OOB bug although reported as use-after-free. [ 4.804724] BUG: KASAN: use-after-free in aq_ring_rx_clean+0x1e88/0x2730 [atlantic] [ 4.805661] Read of size 4 at addr ffff888034fe93a8 by task ksoftirqd/0/9 [ 4.806505] [ 4.806703] CPU: 0 PID: 9 Comm: ksoftirqd/0 Tainted: G W 5.6.0 #34 [ 4.809030] Call Trace: [ 4.809343] dump_stack+0x76/0xa0 [ 4.809755] print_address_description.constprop.0+0x16/0x200 [ 4.810455] ? aq_ring_rx_clean+0x1e88/0x2730 [atlantic] [ 4.811234] ? aq_ring_rx_clean+0x1e88/0x2730 [atlantic] [ 4.813183] __kasan_report.cold+0x37/0x7c [ 4.813715] ? aq_ring_rx_clean+0x1e88/0x2730 [atlantic] [ 4.814393] kasan_report+0xe/0x20 [ 4.814837] aq_ring_rx_clean+0x1e88/0x2730 [atlantic] [ 4.815499] ? hw_atl_b0_hw_ring_rx_receive+0x9a5/0xb90 [atlantic] [ 4.816290] aq_vec_poll+0x179/0x5d0 [atlantic] [ 4.816870] ? _GLOBAL__sub_I_65535_1_aq_pci_func_init+0x20/0x20 [atlantic] [ 4.817746] ? __next_timer_interrupt+0xba/0xf0 [ 4.818322] net_rx_action+0x363/0xbd0 [ 4.818803] ? call_timer_fn+0x240/0x240 [ 4.819302] ? __switch_to_asm+0x40/0x70 [ 4.819809] ? napi_busy_loop+0x520/0x520 [ 4.820324] __do_softirq+0x18c/0x634 [ 4.820797] ? takeover_tasklets+0x5f0/0x5f0 [ 4.821343] run_ksoftirqd+0x15/0x20 [ 4.821804] smpboot_thread_fn+0x2f1/0x6b0 [ 4.822331] ? smpboot_unregister_percpu_thread+0x160/0x160 [ 4.823041] ? __kthread_parkme+0x80/0x100 [ 4.823571] ? smpboot_unregister_percpu_thread+0x160/0x160 [ 4.824301] kthread+0x2b5/0x3b0 [ 4.824723] ? kthread_create_on_node+0xd0/0xd0 [ 4.825304] ret_from_fork+0x35/0x40 Signed-off-by: Zekun Shen Signed-off-by: David S. Miller --- drivers/net/ethernet/aquantia/atlantic/aq_ring.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c index 81b3756417ec..77e76c9efd32 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c @@ -366,6 +366,10 @@ int aq_ring_rx_clean(struct aq_ring_s *self, if (!buff->is_eop) { buff_ = buff; do { + if (buff_->next >= self->size) { + err = -EIO; + goto err_exit; + } next_ = buff_->next, buff_ = &self->buff_ring[next_]; is_rsc_completed = @@ -389,6 +393,10 @@ int aq_ring_rx_clean(struct aq_ring_s *self, (buff->is_lro && buff->is_cso_err)) { buff_ = buff; do { + if (buff_->next >= self->size) { + err = -EIO; + goto err_exit; + } next_ = buff_->next, buff_ = &self->buff_ring[next_]; From ca506fca461b260ab32952b610c3d4aadc6c11fd Mon Sep 17 00:00:00 2001 From: Matthias-Christian Ott Date: Sun, 26 Dec 2021 23:12:08 +0100 Subject: [PATCH 750/868] net: usb: pegasus: Do not drop long Ethernet frames The D-Link DSB-650TX (2001:4002) is unable to receive Ethernet frames that are longer than 1518 octets, for example, Ethernet frames that contain 802.1Q VLAN tags. The frames are sent to the pegasus driver via USB but the driver discards them because they have the Long_pkt field set to 1 in the received status report. The function read_bulk_callback of the pegasus driver treats such received "packets" (in the terminology of the hardware) as errors but the field simply does just indicate that the Ethernet frame (MAC destination to FCS) is longer than 1518 octets. It seems that in the 1990s there was a distinction between "giant" (> 1518) and "runt" (< 64) frames and the hardware includes flags to indicate this distinction. It seems that the purpose of the distinction "giant" frames was to not allow infinitely long frames due to transmission errors and to allow hardware to have an upper limit of the frame size. However, the hardware already has such limit with its 2048 octet receive buffer and, therefore, Long_pkt is merely a convention and should not be treated as a receive error. Actually, the hardware is even able to receive Ethernet frames with 2048 octets which exceeds the claimed limit frame size limit of the driver of 1536 octets (PEGASUS_MTU). Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Matthias-Christian Ott Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/usb/pegasus.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/usb/pegasus.c b/drivers/net/usb/pegasus.c index c4cd40b090fd..feb247e355f7 100644 --- a/drivers/net/usb/pegasus.c +++ b/drivers/net/usb/pegasus.c @@ -493,11 +493,11 @@ static void read_bulk_callback(struct urb *urb) goto goon; rx_status = buf[count - 2]; - if (rx_status & 0x1e) { + if (rx_status & 0x1c) { netif_dbg(pegasus, rx_err, net, "RX packet error %x\n", rx_status); net->stats.rx_errors++; - if (rx_status & 0x06) /* long or runt */ + if (rx_status & 0x04) /* runt */ net->stats.rx_length_errors++; if (rx_status & 0x08) net->stats.rx_crc_errors++; From 7175f02c4e5f5a9430113ab9ca0fd0ce98b28a51 Mon Sep 17 00:00:00 2001 From: "Dmitry V. Levin" Date: Sun, 26 Dec 2021 16:01:27 +0300 Subject: [PATCH 751/868] uapi: fix linux/nfc.h userspace compilation errors Replace sa_family_t with __kernel_sa_family_t to fix the following linux/nfc.h userspace compilation errors: /usr/include/linux/nfc.h:266:2: error: unknown type name 'sa_family_t' sa_family_t sa_family; /usr/include/linux/nfc.h:274:2: error: unknown type name 'sa_family_t' sa_family_t sa_family; Fixes: 23b7869c0fd0 ("NFC: add the NFC socket raw protocol") Fixes: d646960f7986 ("NFC: Initial LLCP support") Cc: Signed-off-by: Dmitry V. Levin Reviewed-by: Krzysztof Kozlowski Signed-off-by: David S. Miller --- include/uapi/linux/nfc.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/uapi/linux/nfc.h b/include/uapi/linux/nfc.h index f6e3c8c9c744..aadad43d943a 100644 --- a/include/uapi/linux/nfc.h +++ b/include/uapi/linux/nfc.h @@ -263,7 +263,7 @@ enum nfc_sdp_attr { #define NFC_SE_ENABLED 0x1 struct sockaddr_nfc { - sa_family_t sa_family; + __kernel_sa_family_t sa_family; __u32 dev_idx; __u32 target_idx; __u32 nfc_protocol; @@ -271,7 +271,7 @@ struct sockaddr_nfc { #define NFC_LLCP_MAX_SERVICE_NAME 63 struct sockaddr_nfc_llcp { - sa_family_t sa_family; + __kernel_sa_family_t sa_family; __u32 dev_idx; __u32 target_idx; __u32 nfc_protocol; From 79b69a83705e621b258ac6d8ae6d3bfdb4b930aa Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sun, 26 Dec 2021 13:03:47 +0100 Subject: [PATCH 752/868] nfc: uapi: use kernel size_t to fix user-space builds MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix user-space builds if it includes /usr/include/linux/nfc.h before some of other headers: /usr/include/linux/nfc.h:281:9: error: unknown type name ‘size_t’ 281 | size_t service_name_len; | ^~~~~~ Fixes: d646960f7986 ("NFC: Initial LLCP support") Cc: Signed-off-by: Krzysztof Kozlowski Signed-off-by: David S. Miller --- include/uapi/linux/nfc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/uapi/linux/nfc.h b/include/uapi/linux/nfc.h index aadad43d943a..4fa4e979e948 100644 --- a/include/uapi/linux/nfc.h +++ b/include/uapi/linux/nfc.h @@ -278,7 +278,7 @@ struct sockaddr_nfc_llcp { __u8 dsap; /* Destination SAP, if known */ __u8 ssap; /* Source SAP to be bound to */ char service_name[NFC_LLCP_MAX_SERVICE_NAME]; /* Service name URI */; - size_t service_name_len; + __kernel_size_t service_name_len; }; /* NFC socket protocols */ From 732bc2ff080c447f8524f40c970c481f5da6eed3 Mon Sep 17 00:00:00 2001 From: Tom Rix Date: Fri, 24 Dec 2021 07:07:39 -0800 Subject: [PATCH 753/868] selinux: initialize proto variable in selinux_ip_postroute_compat() Clang static analysis reports this warning hooks.c:5765:6: warning: 4th function call argument is an uninitialized value if (selinux_xfrm_postroute_last(sksec->sid, skb, &ad, proto)) ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ selinux_parse_skb() can return ok without setting proto. The later call to selinux_xfrm_postroute_last() does an early check of proto and can return ok if the garbage proto value matches. So initialize proto. Cc: stable@vger.kernel.org Fixes: eef9b41622f2 ("selinux: cleanup selinux_xfrm_sock_rcv_skb() and selinux_xfrm_postroute_last()") Signed-off-by: Tom Rix [PM: typo/spelling and checkpatch.pl description fixes] Signed-off-by: Paul Moore --- security/selinux/hooks.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 1afc06ffd969..dde4ecc0cd18 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -5785,7 +5785,7 @@ static unsigned int selinux_ip_postroute_compat(struct sk_buff *skb, struct sk_security_struct *sksec; struct common_audit_data ad; struct lsm_network_audit net = {0,}; - u8 proto; + u8 proto = 0; sk = skb_to_full_sk(skb); if (sk == NULL) From 8c45096c60d6ce6341c374636100ed1b2c1c33a1 Mon Sep 17 00:00:00 2001 From: Prike Liang Date: Mon, 13 Dec 2021 16:17:02 +0800 Subject: [PATCH 754/868] drm/amd/pm: skip setting gfx cgpg in the s0ix suspend-resume In the s0ix entry need retain gfx in the gfxoff state,so here need't set gfx cgpg in the S0ix suspend-resume process. Moreover move the S0ix check into SMU12 can simplify the code condition check. Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/1712 Signed-off-by: Prike Liang Reviewed-by: Evan Quan Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 7 ++----- drivers/gpu/drm/amd/pm/swsmu/smu12/smu_v12_0.c | 3 ++- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c index 8a3244585d80..8a817932acdf 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c @@ -1568,9 +1568,7 @@ static int smu_suspend(void *handle) smu->watermarks_bitmap &= ~(WATERMARKS_LOADED); - /* skip CGPG when in S0ix */ - if (smu->is_apu && !adev->in_s0ix) - smu_set_gfx_cgpg(&adev->smu, false); + smu_set_gfx_cgpg(&adev->smu, false); return 0; } @@ -1601,8 +1599,7 @@ static int smu_resume(void *handle) return ret; } - if (smu->is_apu) - smu_set_gfx_cgpg(&adev->smu, true); + smu_set_gfx_cgpg(&adev->smu, true); smu->disable_uclk_switch = 0; diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu12/smu_v12_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu12/smu_v12_0.c index 43028f2cd28b..9c91e79c955f 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu12/smu_v12_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu12/smu_v12_0.c @@ -120,7 +120,8 @@ int smu_v12_0_powergate_sdma(struct smu_context *smu, bool gate) int smu_v12_0_set_gfx_cgpg(struct smu_context *smu, bool enable) { - if (!(smu->adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)) + /* Until now the SMU12 only implemented for Renoir series so here neen't do APU check. */ + if (!(smu->adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) || smu->adev->in_s0ix) return 0; return smu_cmn_send_smc_msg_with_param(smu, From daf8de0874ab5b74b38a38726fdd3d07ef98a7ee Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 12 Nov 2021 11:25:30 -0500 Subject: [PATCH 755/868] drm/amdgpu: always reset the asic in suspend (v2) If the platform suspend happens to fail and the power rail is not turned off, the GPU will be in an unknown state on resume, so reset the asic so that it will be in a known good state on resume even if the platform suspend failed. v2: handle s0ix Acked-by: Luben Tuikov Acked-by: Evan Quan Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 73ac02372827..1a97b8b237d5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -2154,7 +2154,10 @@ static int amdgpu_pmops_suspend(struct device *dev) adev->in_s3 = true; r = amdgpu_device_suspend(drm_dev, true); adev->in_s3 = false; - + if (r) + return r; + if (!adev->in_s0ix) + r = amdgpu_asic_reset(adev); return r; } From 7be3be2b027c12e84833b3dc9597d3bb7e4c5464 Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Fri, 17 Dec 2021 19:05:06 +0800 Subject: [PATCH 756/868] drm/amdgpu: put SMU into proper state on runpm suspending for BOCO capable platform By setting mp1_state as PP_MP1_STATE_UNLOAD, MP1 will do some proper cleanups and put itself into a state ready for PNP. That can workaround some random resuming failure observed on BOCO capable platforms. Signed-off-by: Evan Quan Acked-by: Alex Deucher Reviewed-by: Guchun Chen Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 1a97b8b237d5..86ca80da9eea 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -2238,12 +2238,27 @@ static int amdgpu_pmops_runtime_suspend(struct device *dev) if (amdgpu_device_supports_px(drm_dev)) drm_dev->switch_power_state = DRM_SWITCH_POWER_CHANGING; + /* + * By setting mp1_state as PP_MP1_STATE_UNLOAD, MP1 will do some + * proper cleanups and put itself into a state ready for PNP. That + * can address some random resuming failure observed on BOCO capable + * platforms. + * TODO: this may be also needed for PX capable platform. + */ + if (amdgpu_device_supports_boco(drm_dev)) + adev->mp1_state = PP_MP1_STATE_UNLOAD; + ret = amdgpu_device_suspend(drm_dev, false); if (ret) { adev->in_runpm = false; + if (amdgpu_device_supports_boco(drm_dev)) + adev->mp1_state = PP_MP1_STATE_NONE; return ret; } + if (amdgpu_device_supports_boco(drm_dev)) + adev->mp1_state = PP_MP1_STATE_NONE; + if (amdgpu_device_supports_px(drm_dev)) { /* Only need to handle PCI state in the driver for ATPX * PCI core handles it for _PR3. From 8b5fdfc57cc2471179d1c51081424ded833c16c8 Mon Sep 17 00:00:00 2001 From: wolfgang huang Date: Tue, 28 Dec 2021 16:01:20 +0800 Subject: [PATCH 757/868] mISDN: change function names to avoid conflicts As we build for mips, we meet following error. l1_init error with multiple definition. Some architecture devices usually marked with l1, l2, lxx as the start-up phase. so we change the mISDN function names, align with Isdnl2_xxx. mips-linux-gnu-ld: drivers/isdn/mISDN/layer1.o: in function `l1_init': (.text+0x890): multiple definition of `l1_init'; \ arch/mips/kernel/bmips_5xxx_init.o:(.text+0xf0): first defined here make[1]: *** [home/mips/kernel-build/linux/Makefile:1161: vmlinux] Error 1 Signed-off-by: wolfgang huang Reported-by: k2ci Signed-off-by: David S. Miller --- drivers/isdn/mISDN/core.c | 6 +++--- drivers/isdn/mISDN/core.h | 4 ++-- drivers/isdn/mISDN/layer1.c | 4 ++-- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/isdn/mISDN/core.c b/drivers/isdn/mISDN/core.c index 55891e420446..a41b4b264594 100644 --- a/drivers/isdn/mISDN/core.c +++ b/drivers/isdn/mISDN/core.c @@ -381,7 +381,7 @@ mISDNInit(void) err = mISDN_inittimer(&debug); if (err) goto error2; - err = l1_init(&debug); + err = Isdnl1_Init(&debug); if (err) goto error3; err = Isdnl2_Init(&debug); @@ -395,7 +395,7 @@ mISDNInit(void) error5: Isdnl2_cleanup(); error4: - l1_cleanup(); + Isdnl1_cleanup(); error3: mISDN_timer_cleanup(); error2: @@ -408,7 +408,7 @@ static void mISDN_cleanup(void) { misdn_sock_cleanup(); Isdnl2_cleanup(); - l1_cleanup(); + Isdnl1_cleanup(); mISDN_timer_cleanup(); class_unregister(&mISDN_class); diff --git a/drivers/isdn/mISDN/core.h b/drivers/isdn/mISDN/core.h index 23b44d303327..42599f49c189 100644 --- a/drivers/isdn/mISDN/core.h +++ b/drivers/isdn/mISDN/core.h @@ -60,8 +60,8 @@ struct Bprotocol *get_Bprotocol4id(u_int); extern int mISDN_inittimer(u_int *); extern void mISDN_timer_cleanup(void); -extern int l1_init(u_int *); -extern void l1_cleanup(void); +extern int Isdnl1_Init(u_int *); +extern void Isdnl1_cleanup(void); extern int Isdnl2_Init(u_int *); extern void Isdnl2_cleanup(void); diff --git a/drivers/isdn/mISDN/layer1.c b/drivers/isdn/mISDN/layer1.c index 98a3bc6c1700..7b31c25a550e 100644 --- a/drivers/isdn/mISDN/layer1.c +++ b/drivers/isdn/mISDN/layer1.c @@ -398,7 +398,7 @@ create_l1(struct dchannel *dch, dchannel_l1callback *dcb) { EXPORT_SYMBOL(create_l1); int -l1_init(u_int *deb) +Isdnl1_Init(u_int *deb) { debug = deb; l1fsm_s.state_count = L1S_STATE_COUNT; @@ -409,7 +409,7 @@ l1_init(u_int *deb) } void -l1_cleanup(void) +Isdnl1_cleanup(void) { mISDN_FsmFree(&l1fsm_s); } From 1cd5384c88af5b59bf9f3b6c1a151bc14b88c2cd Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 26 Dec 2021 18:51:44 +0100 Subject: [PATCH 758/868] net: ag71xx: Fix a potential double free in error handling paths 'ndev' is a managed resource allocated with devm_alloc_etherdev(), so there is no need to call free_netdev() explicitly or there will be a double free(). Simplify all error handling paths accordingly. Fixes: d51b6ce441d3 ("net: ethernet: add ag71xx driver") Signed-off-by: Christophe JAILLET Signed-off-by: David S. Miller --- drivers/net/ethernet/atheros/ag71xx.c | 23 ++++++++--------------- 1 file changed, 8 insertions(+), 15 deletions(-) diff --git a/drivers/net/ethernet/atheros/ag71xx.c b/drivers/net/ethernet/atheros/ag71xx.c index 88d2ab748399..4579ddf9c427 100644 --- a/drivers/net/ethernet/atheros/ag71xx.c +++ b/drivers/net/ethernet/atheros/ag71xx.c @@ -1913,15 +1913,12 @@ static int ag71xx_probe(struct platform_device *pdev) ag->mac_reset = devm_reset_control_get(&pdev->dev, "mac"); if (IS_ERR(ag->mac_reset)) { netif_err(ag, probe, ndev, "missing mac reset\n"); - err = PTR_ERR(ag->mac_reset); - goto err_free; + return PTR_ERR(ag->mac_reset); } ag->mac_base = devm_ioremap(&pdev->dev, res->start, resource_size(res)); - if (!ag->mac_base) { - err = -ENOMEM; - goto err_free; - } + if (!ag->mac_base) + return -ENOMEM; ndev->irq = platform_get_irq(pdev, 0); err = devm_request_irq(&pdev->dev, ndev->irq, ag71xx_interrupt, @@ -1929,7 +1926,7 @@ static int ag71xx_probe(struct platform_device *pdev) if (err) { netif_err(ag, probe, ndev, "unable to request IRQ %d\n", ndev->irq); - goto err_free; + return err; } ndev->netdev_ops = &ag71xx_netdev_ops; @@ -1957,10 +1954,8 @@ static int ag71xx_probe(struct platform_device *pdev) ag->stop_desc = dmam_alloc_coherent(&pdev->dev, sizeof(struct ag71xx_desc), &ag->stop_desc_dma, GFP_KERNEL); - if (!ag->stop_desc) { - err = -ENOMEM; - goto err_free; - } + if (!ag->stop_desc) + return -ENOMEM; ag->stop_desc->data = 0; ag->stop_desc->ctrl = 0; @@ -1975,7 +1970,7 @@ static int ag71xx_probe(struct platform_device *pdev) err = of_get_phy_mode(np, &ag->phy_if_mode); if (err) { netif_err(ag, probe, ndev, "missing phy-mode property in DT\n"); - goto err_free; + return err; } netif_napi_add(ndev, &ag->napi, ag71xx_poll, AG71XX_NAPI_WEIGHT); @@ -1983,7 +1978,7 @@ static int ag71xx_probe(struct platform_device *pdev) err = clk_prepare_enable(ag->clk_eth); if (err) { netif_err(ag, probe, ndev, "Failed to enable eth clk.\n"); - goto err_free; + return err; } ag71xx_wr(ag, AG71XX_REG_MAC_CFG1, 0); @@ -2019,8 +2014,6 @@ err_mdio_remove: ag71xx_mdio_remove(ag); err_put_clk: clk_disable_unprepare(ag->clk_eth); -err_free: - free_netdev(ndev); return err; } From 5be60a945329d82f06fc755a43eeefbfc5f77d72 Mon Sep 17 00:00:00 2001 From: Aleksander Jan Bajkowski Date: Mon, 27 Dec 2021 17:22:03 +0100 Subject: [PATCH 759/868] net: lantiq_xrx200: fix statistics of received bytes Received frames have FCS truncated. There is no need to subtract FCS length from the statistics. Fixes: fe1a56420cf2 ("net: lantiq: Add Lantiq / Intel VRX200 Ethernet driver") Signed-off-by: Aleksander Jan Bajkowski Signed-off-by: David S. Miller --- drivers/net/ethernet/lantiq_xrx200.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/lantiq_xrx200.c b/drivers/net/ethernet/lantiq_xrx200.c index 96bd6f2b21ed..80bfaf2fec92 100644 --- a/drivers/net/ethernet/lantiq_xrx200.c +++ b/drivers/net/ethernet/lantiq_xrx200.c @@ -224,7 +224,7 @@ static int xrx200_hw_receive(struct xrx200_chan *ch) skb->protocol = eth_type_trans(skb, net_dev); netif_receive_skb(skb); net_dev->stats.rx_packets++; - net_dev->stats.rx_bytes += len - ETH_FCS_LEN; + net_dev->stats.rx_bytes += len; return 0; } From 1b9dadba502234eea7244879b8d5d126bfaf9f0c Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Tue, 28 Dec 2021 12:48:11 +0000 Subject: [PATCH 760/868] NFC: st21nfca: Fix memory leak in device probe and remove 'phy->pending_skb' is alloced when device probe, but forgot to free in the error handling path and remove path, this cause memory leak as follows: unreferenced object 0xffff88800bc06800 (size 512): comm "8", pid 11775, jiffies 4295159829 (age 9.032s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [<00000000d66c09ce>] __kmalloc_node_track_caller+0x1ed/0x450 [<00000000c93382b3>] kmalloc_reserve+0x37/0xd0 [<000000005fea522c>] __alloc_skb+0x124/0x380 [<0000000019f29f9a>] st21nfca_hci_i2c_probe+0x170/0x8f2 Fix it by freeing 'pending_skb' in error and remove. Fixes: 68957303f44a ("NFC: ST21NFCA: Add driver for STMicroelectronics ST21NFCA NFC Chip") Reported-by: Hulk Robot Signed-off-by: Wei Yongjun Signed-off-by: David S. Miller --- drivers/nfc/st21nfca/i2c.c | 29 ++++++++++++++++++++--------- 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/drivers/nfc/st21nfca/i2c.c b/drivers/nfc/st21nfca/i2c.c index f126ce96a7df..35b32fb90906 100644 --- a/drivers/nfc/st21nfca/i2c.c +++ b/drivers/nfc/st21nfca/i2c.c @@ -524,7 +524,8 @@ static int st21nfca_hci_i2c_probe(struct i2c_client *client, phy->gpiod_ena = devm_gpiod_get(dev, "enable", GPIOD_OUT_LOW); if (IS_ERR(phy->gpiod_ena)) { nfc_err(dev, "Unable to get ENABLE GPIO\n"); - return PTR_ERR(phy->gpiod_ena); + r = PTR_ERR(phy->gpiod_ena); + goto out_free; } phy->se_status.is_ese_present = @@ -535,7 +536,7 @@ static int st21nfca_hci_i2c_probe(struct i2c_client *client, r = st21nfca_hci_platform_init(phy); if (r < 0) { nfc_err(&client->dev, "Unable to reboot st21nfca\n"); - return r; + goto out_free; } r = devm_request_threaded_irq(&client->dev, client->irq, NULL, @@ -544,15 +545,23 @@ static int st21nfca_hci_i2c_probe(struct i2c_client *client, ST21NFCA_HCI_DRIVER_NAME, phy); if (r < 0) { nfc_err(&client->dev, "Unable to register IRQ handler\n"); - return r; + goto out_free; } - return st21nfca_hci_probe(phy, &i2c_phy_ops, LLC_SHDLC_NAME, - ST21NFCA_FRAME_HEADROOM, - ST21NFCA_FRAME_TAILROOM, - ST21NFCA_HCI_LLC_MAX_PAYLOAD, - &phy->hdev, - &phy->se_status); + r = st21nfca_hci_probe(phy, &i2c_phy_ops, LLC_SHDLC_NAME, + ST21NFCA_FRAME_HEADROOM, + ST21NFCA_FRAME_TAILROOM, + ST21NFCA_HCI_LLC_MAX_PAYLOAD, + &phy->hdev, + &phy->se_status); + if (r) + goto out_free; + + return 0; + +out_free: + kfree_skb(phy->pending_skb); + return r; } static int st21nfca_hci_i2c_remove(struct i2c_client *client) @@ -563,6 +572,8 @@ static int st21nfca_hci_i2c_remove(struct i2c_client *client) if (phy->powered) st21nfca_hci_i2c_disable(phy); + if (phy->pending_skb) + kfree_skb(phy->pending_skb); return 0; } From 90cee52f2e780345d3629e278291aea5ac74f40f Mon Sep 17 00:00:00 2001 From: Dust Li Date: Tue, 28 Dec 2021 17:03:24 +0800 Subject: [PATCH 761/868] net/smc: don't send CDC/LLC message if link not ready We found smc_llc_send_link_delete_all() sometimes wait for 2s timeout when testing with RDMA link up/down. It is possible when a smc_link is in ACTIVATING state, the underlaying QP is still in RESET or RTR state, which cannot send any messages out. smc_llc_send_link_delete_all() use smc_link_usable() to checks whether the link is usable, if the QP is still in RESET or RTR state, but the smc_link is in ACTIVATING, this LLC message will always fail without any CQE entering the CQ, and we will always wait 2s before timeout. Since we cannot send any messages through the QP before the QP enter RTS. I add a wrapper smc_link_sendable() which checks the state of QP along with the link state. And replace smc_link_usable() with smc_link_sendable() in all LLC & CDC message sending routine. Fixes: 5f08318f617b ("smc: connection data control (CDC)") Signed-off-by: Dust Li Signed-off-by: David S. Miller --- net/smc/smc_core.c | 2 +- net/smc/smc_core.h | 6 ++++++ net/smc/smc_llc.c | 2 +- net/smc/smc_wr.c | 4 ++-- net/smc/smc_wr.h | 2 +- 5 files changed, 11 insertions(+), 5 deletions(-) diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index 387d28b2f8dd..55ca175e8d57 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -647,7 +647,7 @@ static void smcr_lgr_link_deactivate_all(struct smc_link_group *lgr) for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { struct smc_link *lnk = &lgr->lnk[i]; - if (smc_link_usable(lnk)) + if (smc_link_sendable(lnk)) lnk->state = SMC_LNK_INACTIVE; } wake_up_all(&lgr->llc_msg_waiter); diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h index 59cef3b830d8..d63b08274197 100644 --- a/net/smc/smc_core.h +++ b/net/smc/smc_core.h @@ -415,6 +415,12 @@ static inline bool smc_link_usable(struct smc_link *lnk) return true; } +static inline bool smc_link_sendable(struct smc_link *lnk) +{ + return smc_link_usable(lnk) && + lnk->qp_attr.cur_qp_state == IB_QPS_RTS; +} + static inline bool smc_link_active(struct smc_link *lnk) { return lnk->state == SMC_LNK_ACTIVE; diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c index b102680296b8..3e9fd8a3124c 100644 --- a/net/smc/smc_llc.c +++ b/net/smc/smc_llc.c @@ -1630,7 +1630,7 @@ void smc_llc_send_link_delete_all(struct smc_link_group *lgr, bool ord, u32 rsn) delllc.reason = htonl(rsn); for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { - if (!smc_link_usable(&lgr->lnk[i])) + if (!smc_link_sendable(&lgr->lnk[i])) continue; if (!smc_llc_send_message_wait(&lgr->lnk[i], &delllc)) break; diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c index 79a7431f534e..df1dc225cbab 100644 --- a/net/smc/smc_wr.c +++ b/net/smc/smc_wr.c @@ -188,7 +188,7 @@ void smc_wr_tx_cq_handler(struct ib_cq *ib_cq, void *cq_context) static inline int smc_wr_tx_get_free_slot_index(struct smc_link *link, u32 *idx) { *idx = link->wr_tx_cnt; - if (!smc_link_usable(link)) + if (!smc_link_sendable(link)) return -ENOLINK; for_each_clear_bit(*idx, link->wr_tx_mask, link->wr_tx_cnt) { if (!test_and_set_bit(*idx, link->wr_tx_mask)) @@ -231,7 +231,7 @@ int smc_wr_tx_get_free_slot(struct smc_link *link, } else { rc = wait_event_interruptible_timeout( link->wr_tx_wait, - !smc_link_usable(link) || + !smc_link_sendable(link) || lgr->terminating || (smc_wr_tx_get_free_slot_index(link, &idx) != -EBUSY), SMC_WR_TX_WAIT_FREE_SLOT_TIME); diff --git a/net/smc/smc_wr.h b/net/smc/smc_wr.h index f353311e6f84..48ed9b08ac7a 100644 --- a/net/smc/smc_wr.h +++ b/net/smc/smc_wr.h @@ -62,7 +62,7 @@ static inline void smc_wr_tx_set_wr_id(atomic_long_t *wr_tx_id, long val) static inline bool smc_wr_tx_link_hold(struct smc_link *link) { - if (!smc_link_usable(link)) + if (!smc_link_sendable(link)) return false; atomic_inc(&link->wr_tx_refcnt); return true; From 349d43127dac00c15231e8ffbcaabd70f7b0e544 Mon Sep 17 00:00:00 2001 From: Dust Li Date: Tue, 28 Dec 2021 17:03:25 +0800 Subject: [PATCH 762/868] net/smc: fix kernel panic caused by race of smc_sock A crash occurs when smc_cdc_tx_handler() tries to access smc_sock but smc_release() has already freed it. [ 4570.695099] BUG: unable to handle page fault for address: 000000002eae9e88 [ 4570.696048] #PF: supervisor write access in kernel mode [ 4570.696728] #PF: error_code(0x0002) - not-present page [ 4570.697401] PGD 0 P4D 0 [ 4570.697716] Oops: 0002 [#1] PREEMPT SMP NOPTI [ 4570.698228] CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.16.0-rc4+ #111 [ 4570.699013] Hardware name: Alibaba Cloud Alibaba Cloud ECS, BIOS 8c24b4c 04/0 [ 4570.699933] RIP: 0010:_raw_spin_lock+0x1a/0x30 <...> [ 4570.711446] Call Trace: [ 4570.711746] [ 4570.711992] smc_cdc_tx_handler+0x41/0xc0 [ 4570.712470] smc_wr_tx_tasklet_fn+0x213/0x560 [ 4570.712981] ? smc_cdc_tx_dismisser+0x10/0x10 [ 4570.713489] tasklet_action_common.isra.17+0x66/0x140 [ 4570.714083] __do_softirq+0x123/0x2f4 [ 4570.714521] irq_exit_rcu+0xc4/0xf0 [ 4570.714934] common_interrupt+0xba/0xe0 Though smc_cdc_tx_handler() checked the existence of smc connection, smc_release() may have already dismissed and released the smc socket before smc_cdc_tx_handler() further visits it. smc_cdc_tx_handler() |smc_release() if (!conn) | | |smc_cdc_tx_dismiss_slots() | smc_cdc_tx_dismisser() | |sock_put(&smc->sk) <- last sock_put, | smc_sock freed bh_lock_sock(&smc->sk) (panic) | To make sure we won't receive any CDC messages after we free the smc_sock, add a refcount on the smc_connection for inflight CDC message(posted to the QP but haven't received related CQE), and don't release the smc_connection until all the inflight CDC messages haven been done, for both success or failed ones. Using refcount on CDC messages brings another problem: when the link is going to be destroyed, smcr_link_clear() will reset the QP, which then remove all the pending CQEs related to the QP in the CQ. To make sure all the CQEs will always come back so the refcount on the smc_connection can always reach 0, smc_ib_modify_qp_reset() was replaced by smc_ib_modify_qp_error(). And remove the timeout in smc_wr_tx_wait_no_pending_sends() since we need to wait for all pending WQEs done, or we may encounter use-after- free when handling CQEs. For IB device removal routine, we need to wait for all the QPs on that device been destroyed before we can destroy CQs on the device, or the refcount on smc_connection won't reach 0 and smc_sock cannot be released. Fixes: 5f08318f617b ("smc: connection data control (CDC)") Reported-by: Wen Gu Signed-off-by: Dust Li Signed-off-by: David S. Miller --- net/smc/smc.h | 5 +++++ net/smc/smc_cdc.c | 52 +++++++++++++++++++++------------------------- net/smc/smc_cdc.h | 2 +- net/smc/smc_core.c | 25 +++++++++++++++++----- net/smc/smc_ib.c | 4 ++-- net/smc/smc_ib.h | 1 + net/smc/smc_wr.c | 41 +++--------------------------------- net/smc/smc_wr.h | 3 +-- 8 files changed, 57 insertions(+), 76 deletions(-) diff --git a/net/smc/smc.h b/net/smc/smc.h index f4286ca1f228..1a4fc1c6c4ab 100644 --- a/net/smc/smc.h +++ b/net/smc/smc.h @@ -180,6 +180,11 @@ struct smc_connection { u16 tx_cdc_seq; /* sequence # for CDC send */ u16 tx_cdc_seq_fin; /* sequence # - tx completed */ spinlock_t send_lock; /* protect wr_sends */ + atomic_t cdc_pend_tx_wr; /* number of pending tx CDC wqe + * - inc when post wqe, + * - dec on polled tx cqe + */ + wait_queue_head_t cdc_pend_tx_wq; /* wakeup on no cdc_pend_tx_wr*/ struct delayed_work tx_work; /* retry of smc_cdc_msg_send */ u32 tx_off; /* base offset in peer rmb */ diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c index 99acd337ba90..84c8a4374fdd 100644 --- a/net/smc/smc_cdc.c +++ b/net/smc/smc_cdc.c @@ -31,10 +31,6 @@ static void smc_cdc_tx_handler(struct smc_wr_tx_pend_priv *pnd_snd, struct smc_sock *smc; int diff; - if (!conn) - /* already dismissed */ - return; - smc = container_of(conn, struct smc_sock, conn); bh_lock_sock(&smc->sk); if (!wc_status) { @@ -51,6 +47,12 @@ static void smc_cdc_tx_handler(struct smc_wr_tx_pend_priv *pnd_snd, conn); conn->tx_cdc_seq_fin = cdcpend->ctrl_seq; } + + if (atomic_dec_and_test(&conn->cdc_pend_tx_wr) && + unlikely(wq_has_sleeper(&conn->cdc_pend_tx_wq))) + wake_up(&conn->cdc_pend_tx_wq); + WARN_ON(atomic_read(&conn->cdc_pend_tx_wr) < 0); + smc_tx_sndbuf_nonfull(smc); bh_unlock_sock(&smc->sk); } @@ -107,6 +109,10 @@ int smc_cdc_msg_send(struct smc_connection *conn, conn->tx_cdc_seq++; conn->local_tx_ctrl.seqno = conn->tx_cdc_seq; smc_host_msg_to_cdc((struct smc_cdc_msg *)wr_buf, conn, &cfed); + + atomic_inc(&conn->cdc_pend_tx_wr); + smp_mb__after_atomic(); /* Make sure cdc_pend_tx_wr added before post */ + rc = smc_wr_tx_send(link, (struct smc_wr_tx_pend_priv *)pend); if (!rc) { smc_curs_copy(&conn->rx_curs_confirmed, &cfed, conn); @@ -114,6 +120,7 @@ int smc_cdc_msg_send(struct smc_connection *conn, } else { conn->tx_cdc_seq--; conn->local_tx_ctrl.seqno = conn->tx_cdc_seq; + atomic_dec(&conn->cdc_pend_tx_wr); } return rc; @@ -136,7 +143,18 @@ int smcr_cdc_msg_send_validation(struct smc_connection *conn, peer->token = htonl(local->token); peer->prod_flags.failover_validation = 1; + /* We need to set pend->conn here to make sure smc_cdc_tx_handler() + * can handle properly + */ + smc_cdc_add_pending_send(conn, pend); + + atomic_inc(&conn->cdc_pend_tx_wr); + smp_mb__after_atomic(); /* Make sure cdc_pend_tx_wr added before post */ + rc = smc_wr_tx_send(link, (struct smc_wr_tx_pend_priv *)pend); + if (unlikely(rc)) + atomic_dec(&conn->cdc_pend_tx_wr); + return rc; } @@ -193,31 +211,9 @@ int smc_cdc_get_slot_and_msg_send(struct smc_connection *conn) return rc; } -static bool smc_cdc_tx_filter(struct smc_wr_tx_pend_priv *tx_pend, - unsigned long data) +void smc_cdc_wait_pend_tx_wr(struct smc_connection *conn) { - struct smc_connection *conn = (struct smc_connection *)data; - struct smc_cdc_tx_pend *cdc_pend = - (struct smc_cdc_tx_pend *)tx_pend; - - return cdc_pend->conn == conn; -} - -static void smc_cdc_tx_dismisser(struct smc_wr_tx_pend_priv *tx_pend) -{ - struct smc_cdc_tx_pend *cdc_pend = - (struct smc_cdc_tx_pend *)tx_pend; - - cdc_pend->conn = NULL; -} - -void smc_cdc_tx_dismiss_slots(struct smc_connection *conn) -{ - struct smc_link *link = conn->lnk; - - smc_wr_tx_dismiss_slots(link, SMC_CDC_MSG_TYPE, - smc_cdc_tx_filter, smc_cdc_tx_dismisser, - (unsigned long)conn); + wait_event(conn->cdc_pend_tx_wq, !atomic_read(&conn->cdc_pend_tx_wr)); } /* Send a SMC-D CDC header. diff --git a/net/smc/smc_cdc.h b/net/smc/smc_cdc.h index 0a0a89abd38b..696cc11f2303 100644 --- a/net/smc/smc_cdc.h +++ b/net/smc/smc_cdc.h @@ -291,7 +291,7 @@ int smc_cdc_get_free_slot(struct smc_connection *conn, struct smc_wr_buf **wr_buf, struct smc_rdma_wr **wr_rdma_buf, struct smc_cdc_tx_pend **pend); -void smc_cdc_tx_dismiss_slots(struct smc_connection *conn); +void smc_cdc_wait_pend_tx_wr(struct smc_connection *conn); int smc_cdc_msg_send(struct smc_connection *conn, struct smc_wr_buf *wr_buf, struct smc_cdc_tx_pend *pend); int smc_cdc_get_slot_and_msg_send(struct smc_connection *conn); diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index 55ca175e8d57..a6849362f4dd 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -1127,7 +1127,7 @@ void smc_conn_free(struct smc_connection *conn) smc_ism_unset_conn(conn); tasklet_kill(&conn->rx_tsklet); } else { - smc_cdc_tx_dismiss_slots(conn); + smc_cdc_wait_pend_tx_wr(conn); if (current_work() != &conn->abort_work) cancel_work_sync(&conn->abort_work); } @@ -1204,7 +1204,7 @@ void smcr_link_clear(struct smc_link *lnk, bool log) smc_llc_link_clear(lnk, log); smcr_buf_unmap_lgr(lnk); smcr_rtoken_clear_link(lnk); - smc_ib_modify_qp_reset(lnk); + smc_ib_modify_qp_error(lnk); smc_wr_free_link(lnk); smc_ib_destroy_queue_pair(lnk); smc_ib_dealloc_protection_domain(lnk); @@ -1336,7 +1336,7 @@ static void smc_conn_kill(struct smc_connection *conn, bool soft) else tasklet_unlock_wait(&conn->rx_tsklet); } else { - smc_cdc_tx_dismiss_slots(conn); + smc_cdc_wait_pend_tx_wr(conn); } smc_lgr_unregister_conn(conn); smc_close_active_abort(smc); @@ -1459,11 +1459,16 @@ void smc_smcd_terminate_all(struct smcd_dev *smcd) /* Called when an SMCR device is removed or the smc module is unloaded. * If smcibdev is given, all SMCR link groups using this device are terminated. * If smcibdev is NULL, all SMCR link groups are terminated. + * + * We must wait here for QPs been destroyed before we destroy the CQs, + * or we won't received any CQEs and cdc_pend_tx_wr cannot reach 0 thus + * smc_sock cannot be released. */ void smc_smcr_terminate_all(struct smc_ib_device *smcibdev) { struct smc_link_group *lgr, *lg; LIST_HEAD(lgr_free_list); + LIST_HEAD(lgr_linkdown_list); int i; spin_lock_bh(&smc_lgr_list.lock); @@ -1475,7 +1480,7 @@ void smc_smcr_terminate_all(struct smc_ib_device *smcibdev) list_for_each_entry_safe(lgr, lg, &smc_lgr_list.list, list) { for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { if (lgr->lnk[i].smcibdev == smcibdev) - smcr_link_down_cond_sched(&lgr->lnk[i]); + list_move_tail(&lgr->list, &lgr_linkdown_list); } } } @@ -1487,6 +1492,16 @@ void smc_smcr_terminate_all(struct smc_ib_device *smcibdev) __smc_lgr_terminate(lgr, false); } + list_for_each_entry_safe(lgr, lg, &lgr_linkdown_list, list) { + for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { + if (lgr->lnk[i].smcibdev == smcibdev) { + mutex_lock(&lgr->llc_conf_mutex); + smcr_link_down_cond(&lgr->lnk[i]); + mutex_unlock(&lgr->llc_conf_mutex); + } + } + } + if (smcibdev) { if (atomic_read(&smcibdev->lnk_cnt)) wait_event(smcibdev->lnks_deleted, @@ -1586,7 +1601,6 @@ static void smcr_link_down(struct smc_link *lnk) if (!lgr || lnk->state == SMC_LNK_UNUSED || list_empty(&lgr->list)) return; - smc_ib_modify_qp_reset(lnk); to_lnk = smc_switch_conns(lgr, lnk, true); if (!to_lnk) { /* no backup link available */ smcr_link_clear(lnk, true); @@ -1824,6 +1838,7 @@ create: conn->local_tx_ctrl.common.type = SMC_CDC_MSG_TYPE; conn->local_tx_ctrl.len = SMC_WR_TX_SIZE; conn->urg_state = SMC_URG_READ; + init_waitqueue_head(&conn->cdc_pend_tx_wq); INIT_WORK(&smc->conn.abort_work, smc_conn_abort_work); if (ini->is_smcd) { conn->rx_off = sizeof(struct smcd_cdc_msg); diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c index d93055ec17ae..fe5d5399c4e8 100644 --- a/net/smc/smc_ib.c +++ b/net/smc/smc_ib.c @@ -109,12 +109,12 @@ int smc_ib_modify_qp_rts(struct smc_link *lnk) IB_QP_MAX_QP_RD_ATOMIC); } -int smc_ib_modify_qp_reset(struct smc_link *lnk) +int smc_ib_modify_qp_error(struct smc_link *lnk) { struct ib_qp_attr qp_attr; memset(&qp_attr, 0, sizeof(qp_attr)); - qp_attr.qp_state = IB_QPS_RESET; + qp_attr.qp_state = IB_QPS_ERR; return ib_modify_qp(lnk->roce_qp, &qp_attr, IB_QP_STATE); } diff --git a/net/smc/smc_ib.h b/net/smc/smc_ib.h index 07585937370e..bfa1c6bf6313 100644 --- a/net/smc/smc_ib.h +++ b/net/smc/smc_ib.h @@ -90,6 +90,7 @@ int smc_ib_create_queue_pair(struct smc_link *lnk); int smc_ib_ready_link(struct smc_link *lnk); int smc_ib_modify_qp_rts(struct smc_link *lnk); int smc_ib_modify_qp_reset(struct smc_link *lnk); +int smc_ib_modify_qp_error(struct smc_link *lnk); long smc_ib_setup_per_ibdev(struct smc_ib_device *smcibdev); int smc_ib_get_memory_region(struct ib_pd *pd, int access_flags, struct smc_buf_desc *buf_slot, u8 link_idx); diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c index df1dc225cbab..c6cfdea8b71b 100644 --- a/net/smc/smc_wr.c +++ b/net/smc/smc_wr.c @@ -62,13 +62,9 @@ static inline bool smc_wr_is_tx_pend(struct smc_link *link) } /* wait till all pending tx work requests on the given link are completed */ -int smc_wr_tx_wait_no_pending_sends(struct smc_link *link) +void smc_wr_tx_wait_no_pending_sends(struct smc_link *link) { - if (wait_event_timeout(link->wr_tx_wait, !smc_wr_is_tx_pend(link), - SMC_WR_TX_WAIT_PENDING_TIME)) - return 0; - else /* timeout */ - return -EPIPE; + wait_event(link->wr_tx_wait, !smc_wr_is_tx_pend(link)); } static inline int smc_wr_tx_find_pending_index(struct smc_link *link, u64 wr_id) @@ -87,7 +83,6 @@ static inline void smc_wr_tx_process_cqe(struct ib_wc *wc) struct smc_wr_tx_pend pnd_snd; struct smc_link *link; u32 pnd_snd_idx; - int i; link = wc->qp->qp_context; @@ -128,14 +123,6 @@ static inline void smc_wr_tx_process_cqe(struct ib_wc *wc) } if (wc->status) { - for_each_set_bit(i, link->wr_tx_mask, link->wr_tx_cnt) { - /* clear full struct smc_wr_tx_pend including .priv */ - memset(&link->wr_tx_pends[i], 0, - sizeof(link->wr_tx_pends[i])); - memset(&link->wr_tx_bufs[i], 0, - sizeof(link->wr_tx_bufs[i])); - clear_bit(i, link->wr_tx_mask); - } if (link->lgr->smc_version == SMC_V2) { memset(link->wr_tx_v2_pend, 0, sizeof(*link->wr_tx_v2_pend)); @@ -421,25 +408,6 @@ int smc_wr_reg_send(struct smc_link *link, struct ib_mr *mr) return rc; } -void smc_wr_tx_dismiss_slots(struct smc_link *link, u8 wr_tx_hdr_type, - smc_wr_tx_filter filter, - smc_wr_tx_dismisser dismisser, - unsigned long data) -{ - struct smc_wr_tx_pend_priv *tx_pend; - struct smc_wr_rx_hdr *wr_tx; - int i; - - for_each_set_bit(i, link->wr_tx_mask, link->wr_tx_cnt) { - wr_tx = (struct smc_wr_rx_hdr *)&link->wr_tx_bufs[i]; - if (wr_tx->type != wr_tx_hdr_type) - continue; - tx_pend = &link->wr_tx_pends[i].priv; - if (filter(tx_pend, data)) - dismisser(tx_pend); - } -} - /****************************** receive queue ********************************/ int smc_wr_rx_register_handler(struct smc_wr_rx_handler *handler) @@ -675,10 +643,7 @@ void smc_wr_free_link(struct smc_link *lnk) smc_wr_wakeup_reg_wait(lnk); smc_wr_wakeup_tx_wait(lnk); - if (smc_wr_tx_wait_no_pending_sends(lnk)) - memset(lnk->wr_tx_mask, 0, - BITS_TO_LONGS(SMC_WR_BUF_CNT) * - sizeof(*lnk->wr_tx_mask)); + smc_wr_tx_wait_no_pending_sends(lnk); wait_event(lnk->wr_reg_wait, (!atomic_read(&lnk->wr_reg_refcnt))); wait_event(lnk->wr_tx_wait, (!atomic_read(&lnk->wr_tx_refcnt))); diff --git a/net/smc/smc_wr.h b/net/smc/smc_wr.h index 48ed9b08ac7a..47512ccce5ef 100644 --- a/net/smc/smc_wr.h +++ b/net/smc/smc_wr.h @@ -22,7 +22,6 @@ #define SMC_WR_BUF_CNT 16 /* # of ctrl buffers per link */ #define SMC_WR_TX_WAIT_FREE_SLOT_TIME (10 * HZ) -#define SMC_WR_TX_WAIT_PENDING_TIME (5 * HZ) #define SMC_WR_TX_SIZE 44 /* actual size of wr_send data (<=SMC_WR_BUF_SIZE) */ @@ -130,7 +129,7 @@ void smc_wr_tx_dismiss_slots(struct smc_link *lnk, u8 wr_rx_hdr_type, smc_wr_tx_filter filter, smc_wr_tx_dismisser dismisser, unsigned long data); -int smc_wr_tx_wait_no_pending_sends(struct smc_link *link); +void smc_wr_tx_wait_no_pending_sends(struct smc_link *link); int smc_wr_rx_register_handler(struct smc_wr_rx_handler *handler); int smc_wr_rx_post_init(struct smc_link *link); From ebae8973884ee9ac703b3bfe34cabbb118b18538 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 23 Dec 2021 14:13:02 -0500 Subject: [PATCH 763/868] drm/amdgpu: no DC support for headless chips Chips with no display hardware should return false for DC support. v2: drop Arcturus and Aldebaran Fixes: f7f12b25823c0d ("drm/amdgpu: default to true in amdgpu_device_asic_has_dc_support") Reviewed-by: Evan Quan Reviewed-by: Guchun Chen Reported-by: Tareque Md.Hanif Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 9dc86c5a1cad..694c3726e0f4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -3166,6 +3166,12 @@ static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev) bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type) { switch (asic_type) { +#ifdef CONFIG_DRM_AMDGPU_SI + case CHIP_HAINAN: +#endif + case CHIP_TOPAZ: + /* chips with no display hardware */ + return false; #if defined(CONFIG_DRM_AMD_DC) case CHIP_TAHITI: case CHIP_PITCAIRN: From 1e81dcc1ab7de7a789e60042ce82d5a612632599 Mon Sep 17 00:00:00 2001 From: Vinicius Costa Gomes Date: Mon, 13 Dec 2021 16:39:49 -0800 Subject: [PATCH 764/868] igc: Do not enable crosstimestamping for i225-V models It was reported that when PCIe PTM is enabled, some lockups could be observed with some integrated i225-V models. While the issue is investigated, we can disable crosstimestamp for those models and see no loss of functionality, because those models don't have any support for time synchronization. Fixes: a90ec8483732 ("igc: Add support for PTP getcrosststamp()") Link: https://lore.kernel.org/all/924175a188159f4e03bd69908a91e606b574139b.camel@gmx.de/ Reported-by: Stefan Dietrich Signed-off-by: Vinicius Costa Gomes Tested-by: Nechama Kraus Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igc/igc_ptp.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/igc/igc_ptp.c b/drivers/net/ethernet/intel/igc/igc_ptp.c index 30568e3544cd..4f9245aa79a1 100644 --- a/drivers/net/ethernet/intel/igc/igc_ptp.c +++ b/drivers/net/ethernet/intel/igc/igc_ptp.c @@ -768,7 +768,20 @@ int igc_ptp_get_ts_config(struct net_device *netdev, struct ifreq *ifr) */ static bool igc_is_crosststamp_supported(struct igc_adapter *adapter) { - return IS_ENABLED(CONFIG_X86_TSC) ? pcie_ptm_enabled(adapter->pdev) : false; + if (!IS_ENABLED(CONFIG_X86_TSC)) + return false; + + /* FIXME: it was noticed that enabling support for PCIe PTM in + * some i225-V models could cause lockups when bringing the + * interface up/down. There should be no downsides to + * disabling crosstimestamping support for i225-V, as it + * doesn't have any PTP support. That way we gain some time + * while root causing the issue. + */ + if (adapter->pdev->device == IGC_DEV_ID_I225_V) + return false; + + return pcie_ptm_enabled(adapter->pdev); } static struct system_counterval_t igc_device_tstamp_to_system(u64 tstamp) From f85846bbf43de38fb2c89fe7d2a085608c4eb25a Mon Sep 17 00:00:00 2001 From: James McLaughlin Date: Fri, 17 Dec 2021 16:49:33 -0700 Subject: [PATCH 765/868] igc: Fix TX timestamp support for non-MSI-X platforms Time synchronization was not properly enabled on non-MSI-X platforms. Fixes: 2c344ae24501 ("igc: Add support for TX timestamping") Signed-off-by: James McLaughlin Reviewed-by: Vinicius Costa Gomes Tested-by: Nechama Kraus Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igc/igc_main.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index 8e448288ee26..d28a80a00953 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -5467,6 +5467,9 @@ static irqreturn_t igc_intr_msi(int irq, void *data) mod_timer(&adapter->watchdog_timer, jiffies + 1); } + if (icr & IGC_ICR_TS) + igc_tsync_interrupt(adapter); + napi_schedule(&q_vector->napi); return IRQ_HANDLED; @@ -5510,6 +5513,9 @@ static irqreturn_t igc_intr(int irq, void *data) mod_timer(&adapter->watchdog_timer, jiffies + 1); } + if (icr & IGC_ICR_TS) + igc_tsync_interrupt(adapter); + napi_schedule(&q_vector->napi); return IRQ_HANDLED; From 9f3c16a430e8ac6b8211da106f4e4841d896ec99 Mon Sep 17 00:00:00 2001 From: Miaoqian Lin Date: Tue, 14 Dec 2021 01:10:27 +0000 Subject: [PATCH 766/868] perf expr: Fix return value of ids__new() callers of ids__new() function only do NULL checking for the return value. ids__new() calles hashmap__new(), which may return ERR_PTR(-ENOMEM). Instead of changing the checking one-by-one return NULL instead of ERR_PTR(-ENOMEM) to keep it consistent. Signed-off-by: Miaoqian Lin Reviewed-by: German Gomez Tested-by: German Gomez Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ian Rogers Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20211214011030.20200-1-linmq006@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/expr.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/expr.c b/tools/perf/util/expr.c index 254601060b39..666b59baeb70 100644 --- a/tools/perf/util/expr.c +++ b/tools/perf/util/expr.c @@ -66,7 +66,12 @@ static bool key_equal(const void *key1, const void *key2, struct hashmap *ids__new(void) { - return hashmap__new(key_hash, key_equal, NULL); + struct hashmap *hash; + + hash = hashmap__new(key_hash, key_equal, NULL); + if (IS_ERR(hash)) + return NULL; + return hash; } void ids__free(struct hashmap *ids) From a78abde220243d6f44a265fe36c49957f6fa9851 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 15 Dec 2021 10:06:34 +0200 Subject: [PATCH 767/868] perf intel-pt: Fix parsing of VM time correlation arguments Parser did not take ':' into account. Example: Before: $ perf record -e intel_pt//u uname Linux [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.026 MB perf.data ] $ perf inject -i perf.data --vm-time-correlation="dry-run 123" $ perf inject -i perf.data --vm-time-correlation="dry-run 123:456" Failed to parse VM Time Correlation options 0x620 [0x98]: failed to process type: 70 [Invalid argument] $ After: $ perf inject -i perf.data --vm-time-correlation="dry-run 123:456" $ Fixes: e3ff42bdebcfeb5f ("perf intel-pt: Parse VM Time Correlation options and set up decoding") Signed-off-by: Adrian Hunter Acked-by: Namhyung Kim Cc: Jiri Olsa Cc: Riccardo Mancini Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20211215080636.149562-2-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/intel-pt.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index 10c3187e4c5a..e8613cbda331 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -3625,6 +3625,7 @@ static int intel_pt_parse_vm_tm_corr_arg(struct intel_pt *pt, char **args) *args = p; return 0; } + p += 1; while (1) { vmcs = strtoull(p, &p, 0); if (errno) From 5e0c325cdb714409a5b242c9e73a1b61157abb36 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 15 Dec 2021 10:06:35 +0200 Subject: [PATCH 768/868] perf script: Fix CPU filtering of a script's switch events CPU filtering was not being applied to a script's switch events. Fixes: 5bf83c29a0ad2e78 ("perf script: Add scripting operation process_switch()") Signed-off-by: Adrian Hunter Acked-by: Namhyung Kim Cc: Jiri Olsa Cc: Riccardo Mancini Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20211215080636.149562-3-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-script.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 9434367af166..c82b033e8942 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -2473,7 +2473,7 @@ static int process_switch_event(struct perf_tool *tool, if (perf_event__process_switch(tool, event, sample, machine) < 0) return -1; - if (scripting_ops && scripting_ops->process_switch) + if (scripting_ops && scripting_ops->process_switch && !filter_cpu(sample)) scripting_ops->process_switch(event, sample, machine); if (!script->show_switch_events) From 0f80bfbf4919e32f52fe1312c3900ff4fbb7eeb9 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 15 Dec 2021 10:06:36 +0200 Subject: [PATCH 769/868] perf scripts python: intel-pt-events.py: Fix printing of switch events The intel-pt-events.py script displays only the last of consecutive switch statements but that may not be the last switch event for the CPU. Fix by keeping a dictionary of last context switch keyed by CPU, and make it possible to see all switch events by adding option --all-switch-events. Fixes: a92bf335fd82eeee ("perf scripts python: intel-pt-events.py: Add branches to script") Signed-off-by: Adrian Hunter Cc: Jiri Olsa Cc: Namhyung Kim Cc: Riccardo Mancini Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20211215080636.149562-4-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/scripts/python/intel-pt-events.py | 23 +++++++++++--------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/tools/perf/scripts/python/intel-pt-events.py b/tools/perf/scripts/python/intel-pt-events.py index 1d3a189a9a54..66452a8ec358 100644 --- a/tools/perf/scripts/python/intel-pt-events.py +++ b/tools/perf/scripts/python/intel-pt-events.py @@ -32,8 +32,7 @@ try: except: broken_pipe_exception = IOError -glb_switch_str = None -glb_switch_printed = True +glb_switch_str = {} glb_insn = False glb_disassembler = None glb_src = False @@ -70,6 +69,7 @@ def trace_begin(): ap = argparse.ArgumentParser(usage = "", add_help = False) ap.add_argument("--insn-trace", action='store_true') ap.add_argument("--src-trace", action='store_true') + ap.add_argument("--all-switch-events", action='store_true') global glb_args global glb_insn global glb_src @@ -256,10 +256,6 @@ def print_srccode(comm, param_dict, sample, symbol, dso, with_insn): print(start_str, src_str) def do_process_event(param_dict): - global glb_switch_printed - if not glb_switch_printed: - print(glb_switch_str) - glb_switch_printed = True event_attr = param_dict["attr"] sample = param_dict["sample"] raw_buf = param_dict["raw_buf"] @@ -274,6 +270,11 @@ def do_process_event(param_dict): dso = get_optional(param_dict, "dso") symbol = get_optional(param_dict, "symbol") + cpu = sample["cpu"] + if cpu in glb_switch_str: + print(glb_switch_str[cpu]) + del glb_switch_str[cpu] + if name[0:12] == "instructions": if glb_src: print_srccode(comm, param_dict, sample, symbol, dso, True) @@ -336,8 +337,6 @@ def auxtrace_error(typ, code, cpu, pid, tid, ip, ts, msg, cpumode, *x): sys.exit(1) def context_switch(ts, cpu, pid, tid, np_pid, np_tid, machine_pid, out, out_preempt, *x): - global glb_switch_printed - global glb_switch_str if out: out_str = "Switch out " else: @@ -350,6 +349,10 @@ def context_switch(ts, cpu, pid, tid, np_pid, np_tid, machine_pid, out, out_pree machine_str = "" else: machine_str = "machine PID %d" % machine_pid - glb_switch_str = "%16s %5d/%-5d [%03u] %9u.%09u %5d/%-5d %s %s" % \ + switch_str = "%16s %5d/%-5d [%03u] %9u.%09u %5d/%-5d %s %s" % \ (out_str, pid, tid, cpu, ts / 1000000000, ts %1000000000, np_pid, np_tid, machine_str, preempt_str) - glb_switch_printed = False + if glb_args.all_switch_events: + print(switch_str); + else: + global glb_switch_str + glb_switch_str[cpu] = switch_str From 2eb82577a16d4c8eb31e4ed520649850bb95b223 Mon Sep 17 00:00:00 2001 From: Charlene Liu Date: Sun, 5 Dec 2021 21:19:30 -0500 Subject: [PATCH 770/868] drm/amd/display: fix B0 TMDS deepcolor no dislay issue [why] B0 PHY C map to F, D map to G driver use logic instance, dmub does the remap. Driver still need use the right PHY instance to access right HW. [how] use phyical instance when program PHY register. [note] could move resync_control programming to dmub next. Tested-by: Daniel Wheeler Reviewed-by: Dmytro Laktyushkin Reviewed-by: Jun Lei Acked-by: Rodrigo Siqueira Signed-off-by: Charlene Liu Signed-off-by: Alex Deucher --- .../drm/amd/display/dc/dcn31/dcn31_resource.c | 25 +++++++++++++-- .../drm/amd/display/dc/dcn31/dcn31_resource.h | 31 +++++++++++++++++++ 2 files changed, 54 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c index 18896294ae12..2cc55ef97ec4 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c @@ -355,6 +355,14 @@ static const struct dce110_clk_src_regs clk_src_regs[] = { clk_src_regs(3, D), clk_src_regs(4, E) }; +/*pll_id being rempped in dmub, in driver it is logical instance*/ +static const struct dce110_clk_src_regs clk_src_regs_b0[] = { + clk_src_regs(0, A), + clk_src_regs(1, B), + clk_src_regs(2, F), + clk_src_regs(3, G), + clk_src_regs(4, E) +}; static const struct dce110_clk_src_shift cs_shift = { CS_COMMON_MASK_SH_LIST_DCN2_0(__SHIFT) @@ -2276,14 +2284,27 @@ static bool dcn31_resource_construct( dcn30_clock_source_create(ctx, ctx->dc_bios, CLOCK_SOURCE_COMBO_PHY_PLL1, &clk_src_regs[1], false); - pool->base.clock_sources[DCN31_CLK_SRC_PLL2] = + /*move phypllx_pixclk_resync to dmub next*/ + if (dc->ctx->asic_id.hw_internal_rev == YELLOW_CARP_B0) { + pool->base.clock_sources[DCN31_CLK_SRC_PLL2] = + dcn30_clock_source_create(ctx, ctx->dc_bios, + CLOCK_SOURCE_COMBO_PHY_PLL2, + &clk_src_regs_b0[2], false); + pool->base.clock_sources[DCN31_CLK_SRC_PLL3] = + dcn30_clock_source_create(ctx, ctx->dc_bios, + CLOCK_SOURCE_COMBO_PHY_PLL3, + &clk_src_regs_b0[3], false); + } else { + pool->base.clock_sources[DCN31_CLK_SRC_PLL2] = dcn30_clock_source_create(ctx, ctx->dc_bios, CLOCK_SOURCE_COMBO_PHY_PLL2, &clk_src_regs[2], false); - pool->base.clock_sources[DCN31_CLK_SRC_PLL3] = + pool->base.clock_sources[DCN31_CLK_SRC_PLL3] = dcn30_clock_source_create(ctx, ctx->dc_bios, CLOCK_SOURCE_COMBO_PHY_PLL3, &clk_src_regs[3], false); + } + pool->base.clock_sources[DCN31_CLK_SRC_PLL4] = dcn30_clock_source_create(ctx, ctx->dc_bios, CLOCK_SOURCE_COMBO_PHY_PLL4, diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.h b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.h index 416fe7a721d8..a513363b3326 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.h +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.h @@ -49,4 +49,35 @@ struct resource_pool *dcn31_create_resource_pool( const struct dc_init_data *init_data, struct dc *dc); +/*temp: B0 specific before switch to dcn313 headers*/ +#ifndef regPHYPLLF_PIXCLK_RESYNC_CNTL +#define regPHYPLLF_PIXCLK_RESYNC_CNTL 0x007e +#define regPHYPLLF_PIXCLK_RESYNC_CNTL_BASE_IDX 1 +#define regPHYPLLG_PIXCLK_RESYNC_CNTL 0x005f +#define regPHYPLLG_PIXCLK_RESYNC_CNTL_BASE_IDX 1 + +//PHYPLLF_PIXCLK_RESYNC_CNTL +#define PHYPLLF_PIXCLK_RESYNC_CNTL__PHYPLLF_PIXCLK_RESYNC_ENABLE__SHIFT 0x0 +#define PHYPLLF_PIXCLK_RESYNC_CNTL__PHYPLLF_DEEP_COLOR_DTO_ENABLE_STATUS__SHIFT 0x1 +#define PHYPLLF_PIXCLK_RESYNC_CNTL__PHYPLLF_DCCG_DEEP_COLOR_CNTL__SHIFT 0x4 +#define PHYPLLF_PIXCLK_RESYNC_CNTL__PHYPLLF_PIXCLK_ENABLE__SHIFT 0x8 +#define PHYPLLF_PIXCLK_RESYNC_CNTL__PHYPLLF_PIXCLK_DOUBLE_RATE_ENABLE__SHIFT 0x9 +#define PHYPLLF_PIXCLK_RESYNC_CNTL__PHYPLLF_PIXCLK_RESYNC_ENABLE_MASK 0x00000001L +#define PHYPLLF_PIXCLK_RESYNC_CNTL__PHYPLLF_DEEP_COLOR_DTO_ENABLE_STATUS_MASK 0x00000002L +#define PHYPLLF_PIXCLK_RESYNC_CNTL__PHYPLLF_DCCG_DEEP_COLOR_CNTL_MASK 0x00000030L +#define PHYPLLF_PIXCLK_RESYNC_CNTL__PHYPLLF_PIXCLK_ENABLE_MASK 0x00000100L +#define PHYPLLF_PIXCLK_RESYNC_CNTL__PHYPLLF_PIXCLK_DOUBLE_RATE_ENABLE_MASK 0x00000200L + +//PHYPLLG_PIXCLK_RESYNC_CNTL +#define PHYPLLG_PIXCLK_RESYNC_CNTL__PHYPLLG_PIXCLK_RESYNC_ENABLE__SHIFT 0x0 +#define PHYPLLG_PIXCLK_RESYNC_CNTL__PHYPLLG_DEEP_COLOR_DTO_ENABLE_STATUS__SHIFT 0x1 +#define PHYPLLG_PIXCLK_RESYNC_CNTL__PHYPLLG_DCCG_DEEP_COLOR_CNTL__SHIFT 0x4 +#define PHYPLLG_PIXCLK_RESYNC_CNTL__PHYPLLG_PIXCLK_ENABLE__SHIFT 0x8 +#define PHYPLLG_PIXCLK_RESYNC_CNTL__PHYPLLG_PIXCLK_DOUBLE_RATE_ENABLE__SHIFT 0x9 +#define PHYPLLG_PIXCLK_RESYNC_CNTL__PHYPLLG_PIXCLK_RESYNC_ENABLE_MASK 0x00000001L +#define PHYPLLG_PIXCLK_RESYNC_CNTL__PHYPLLG_DEEP_COLOR_DTO_ENABLE_STATUS_MASK 0x00000002L +#define PHYPLLG_PIXCLK_RESYNC_CNTL__PHYPLLG_DCCG_DEEP_COLOR_CNTL_MASK 0x00000030L +#define PHYPLLG_PIXCLK_RESYNC_CNTL__PHYPLLG_PIXCLK_ENABLE_MASK 0x00000100L +#define PHYPLLG_PIXCLK_RESYNC_CNTL__PHYPLLG_PIXCLK_DOUBLE_RATE_ENABLE_MASK 0x00000200L +#endif #endif /* _DCN31_RESOURCE_H_ */ From d97e631af2db84c8c9d63abf68d487d0bb559e4c Mon Sep 17 00:00:00 2001 From: "Lai, Derek" Date: Mon, 6 Dec 2021 17:10:59 +0800 Subject: [PATCH 771/868] drm/amd/display: Added power down for DCN10 [Why] The change of setting a timer callback on boot for 10 seconds is still working, just lacked power down for DCN10. [How] Added power down for DCN10. Tested-by: Daniel Wheeler Reviewed-by: Anthony Koo Acked-by: Rodrigo Siqueira Signed-off-by: Derek Lai Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn10/dcn10_init.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_init.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_init.c index 34001a30d449..10e613ec7d24 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_init.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_init.c @@ -78,6 +78,7 @@ static const struct hw_sequencer_funcs dcn10_funcs = { .get_clock = dcn10_get_clock, .get_vupdate_offset_from_vsync = dcn10_get_vupdate_offset_from_vsync, .calc_vupdate_position = dcn10_calc_vupdate_position, + .power_down = dce110_power_down, .set_backlight_level = dce110_set_backlight_level, .set_abm_immediate_disable = dce110_set_abm_immediate_disable, .set_pipe = dce110_set_pipe, From a07f8b9983543d465b50870ab4f845d4d710ed3f Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Thu, 9 Dec 2021 13:53:36 -0500 Subject: [PATCH 772/868] drm/amd/display: Send s0i2_rdy in stream_count == 0 optimization [Why] Otherwise SMU won't mark Display as idle when trying to perform s2idle. [How] Mark the bit in the dcn31 codepath, doesn't apply to older ASIC. It needed to be split from phy refclk off to prevent entering s2idle when PSR was engaged but driver was not ready. Fixes: 118a33151658 ("drm/amd/display: Add DCN3.1 clock manager support") Tested-by: Daniel Wheeler Reviewed-by: Eric Yang Acked-by: Rodrigo Siqueira Signed-off-by: Nicholas Kazlauskas Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c index f4c9a458ace8..9df38e2ee4f4 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c @@ -158,6 +158,7 @@ static void dcn31_update_clocks(struct clk_mgr *clk_mgr_base, union display_idle_optimization_u idle_info = { 0 }; idle_info.idle_info.df_request_disabled = 1; idle_info.idle_info.phy_ref_clk_off = 1; + idle_info.idle_info.s0i2_rdy = 1; dcn31_smu_set_display_idle_optimization(clk_mgr, idle_info.data); /* update power state */ clk_mgr_base->clks.pwr_state = DCN_PWR_STATE_LOW_POWER; From 33735c1c8d0223170d79dbe166976d9cd7339c7a Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Thu, 9 Dec 2021 16:05:36 -0500 Subject: [PATCH 773/868] drm/amd/display: Set optimize_pwr_state for DCN31 [Why] We'll exit optimized power state to do link detection but we won't enter back into the optimized power state. This could potentially block s2idle entry depending on the sequencing, but it also means we're losing some power during the transition period. [How] Hook up the handler like DCN21. It was also missed like the exit_optimized_pwr_state callback. Fixes: 64b1d0e8d500 ("drm/amd/display: Add DCN3.1 HWSEQ") Tested-by: Daniel Wheeler Reviewed-by: Eric Yang Acked-by: Rodrigo Siqueira Signed-off-by: Nicholas Kazlauskas Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c index 4f6e639e9353..17e2f2bb29ec 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c @@ -101,6 +101,7 @@ static const struct hw_sequencer_funcs dcn31_funcs = { .z10_restore = dcn31_z10_restore, .z10_save_init = dcn31_z10_save_init, .set_disp_pattern_generator = dcn30_set_disp_pattern_generator, + .optimize_pwr_state = dcn21_optimize_pwr_state, .exit_optimized_pwr_state = dcn21_exit_optimized_pwr_state, .update_visual_confirm_color = dcn20_update_visual_confirm_color, }; From 33bb63915fee190102cae7d6576bc51a0bc342b2 Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Fri, 17 Dec 2021 14:18:59 -0500 Subject: [PATCH 774/868] drm/amd/display: Fix USB4 null pointer dereference in update_psp_stream_config [Why] A porting error on a previous patch left the block of code that causes the crash from a NULL pointer dereference. More specifically, we try to access link_enc before it's assigned in the USB4 case in the following assignment: config.dio_output_idx = link_enc->transmitter - TRANSMITTER_UNIPHY_A; [How] That assignment occurs later depending on the ASIC version. It's only needed on DCN31 and only after link_enc is already assigned. Fixes: 986430446c917b ("drm/amd/display: fix a crash on USB4 over C20 PHY") Reviewed-by: Harry Wentland Signed-off-by: Nicholas Kazlauskas Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_link.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c index c8457babfdea..c0bdc23702c8 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c @@ -3945,12 +3945,9 @@ static void update_psp_stream_config(struct pipe_ctx *pipe_ctx, bool dpms_off) config.dig_be = pipe_ctx->stream->link->link_enc_hw_inst; #if defined(CONFIG_DRM_AMD_DC_DCN) config.stream_enc_idx = pipe_ctx->stream_res.stream_enc->id - ENGINE_ID_DIGA; - + if (pipe_ctx->stream->link->ep_type == DISPLAY_ENDPOINT_PHY || pipe_ctx->stream->link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA) { - link_enc = pipe_ctx->stream->link->link_enc; - config.dio_output_type = pipe_ctx->stream->link->ep_type; - config.dio_output_idx = link_enc->transmitter - TRANSMITTER_UNIPHY_A; if (pipe_ctx->stream->link->ep_type == DISPLAY_ENDPOINT_PHY) link_enc = pipe_ctx->stream->link->link_enc; else if (pipe_ctx->stream->link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA) From ee2698cf79cc759a397c61086c758d4cc85938bf Mon Sep 17 00:00:00 2001 From: Angus Wang Date: Thu, 9 Dec 2021 17:27:01 -0500 Subject: [PATCH 775/868] drm/amd/display: Changed pipe split policy to allow for multi-display pipe split [WHY] Current implementation of pipe split policy prevents pipe split with multiple displays connected, which caused the MCLK speed to be stuck at max [HOW] Changed the pipe split policies so that pipe split is allowed for multi-display configurations Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/1522 Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/1709 Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/1655 Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/1403 Note this is a backport of this commit from amdgpu drm-next for 5.16. Tested-by: Daniel Wheeler Reviewed-by: Aric Cyr Acked-by: Rodrigo Siqueira Signed-off-by: Angus Wang Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c | 2 +- drivers/gpu/drm/amd/display/dc/dcn201/dcn201_resource.c | 2 +- drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c | 2 +- drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c | 2 +- drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c | 2 +- drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c | 2 +- drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c | 2 +- drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c | 2 +- 8 files changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c index 3883f918b3bb..83f5d9aaffcb 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c @@ -1069,7 +1069,7 @@ static const struct dc_debug_options debug_defaults_drv = { .timing_trace = false, .clock_trace = true, .disable_pplib_clock_request = true, - .pipe_split_policy = MPC_SPLIT_AVOID_MULT_DISP, + .pipe_split_policy = MPC_SPLIT_DYNAMIC, .force_single_disp_pipe_split = false, .disable_dcc = DCC_ENABLE, .vsr_support = true, diff --git a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_resource.c b/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_resource.c index 0fa381088d1d..faec0297ec0a 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_resource.c @@ -603,7 +603,7 @@ static const struct dc_debug_options debug_defaults_drv = { .timing_trace = false, .clock_trace = true, .disable_pplib_clock_request = true, - .pipe_split_policy = MPC_SPLIT_AVOID, + .pipe_split_policy = MPC_SPLIT_DYNAMIC, .force_single_disp_pipe_split = false, .disable_dcc = DCC_ENABLE, .vsr_support = true, diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c index d452a0d1777e..79313d1ab5d9 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c @@ -874,7 +874,7 @@ static const struct dc_debug_options debug_defaults_drv = { .clock_trace = true, .disable_pplib_clock_request = true, .min_disp_clk_khz = 100000, - .pipe_split_policy = MPC_SPLIT_AVOID_MULT_DISP, + .pipe_split_policy = MPC_SPLIT_DYNAMIC, .force_single_disp_pipe_split = false, .disable_dcc = DCC_ENABLE, .vsr_support = true, diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c index 79a66e0c4303..98852b586295 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c @@ -840,7 +840,7 @@ static const struct dc_debug_options debug_defaults_drv = { .timing_trace = false, .clock_trace = true, .disable_pplib_clock_request = true, - .pipe_split_policy = MPC_SPLIT_AVOID_MULT_DISP, + .pipe_split_policy = MPC_SPLIT_DYNAMIC, .force_single_disp_pipe_split = false, .disable_dcc = DCC_ENABLE, .vsr_support = true, diff --git a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c b/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c index fbaa03f26d8b..e472b729d869 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c @@ -686,7 +686,7 @@ static const struct dc_debug_options debug_defaults_drv = { .disable_clock_gate = true, .disable_pplib_clock_request = true, .disable_pplib_wm_range = true, - .pipe_split_policy = MPC_SPLIT_AVOID_MULT_DISP, + .pipe_split_policy = MPC_SPLIT_DYNAMIC, .force_single_disp_pipe_split = false, .disable_dcc = DCC_ENABLE, .vsr_support = true, diff --git a/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c b/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c index fcf96cf08c76..16e7059393fa 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c @@ -211,7 +211,7 @@ static const struct dc_debug_options debug_defaults_drv = { .timing_trace = false, .clock_trace = true, .disable_pplib_clock_request = true, - .pipe_split_policy = MPC_SPLIT_AVOID_MULT_DISP, + .pipe_split_policy = MPC_SPLIT_DYNAMIC, .force_single_disp_pipe_split = false, .disable_dcc = DCC_ENABLE, .vsr_support = true, diff --git a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c b/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c index 4a9b64023675..87cec14b7870 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c @@ -193,7 +193,7 @@ static const struct dc_debug_options debug_defaults_drv = { .timing_trace = false, .clock_trace = true, .disable_pplib_clock_request = true, - .pipe_split_policy = MPC_SPLIT_AVOID_MULT_DISP, + .pipe_split_policy = MPC_SPLIT_DYNAMIC, .force_single_disp_pipe_split = false, .disable_dcc = DCC_ENABLE, .vsr_support = true, diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c index 2cc55ef97ec4..27afbe6ec0fe 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c @@ -1002,7 +1002,7 @@ static const struct dc_debug_options debug_defaults_drv = { .timing_trace = false, .clock_trace = true, .disable_pplib_clock_request = false, - .pipe_split_policy = MPC_SPLIT_AVOID, + .pipe_split_policy = MPC_SPLIT_DYNAMIC, .force_single_disp_pipe_split = false, .disable_dcc = DCC_ENABLE, .vsr_support = true, From 140c7bc7d1195750342ea0e6ab76179499ae7cd7 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 26 Dec 2021 15:06:17 +0100 Subject: [PATCH 776/868] ionic: Initialize the 'lif->dbid_inuse' bitmap When allocated, this bitmap is not initialized. Only the first bit is set a few lines below. Use bitmap_zalloc() to make sure that it is cleared before being used. Fixes: 6461b446f2a0 ("ionic: Add interrupts and doorbells") Signed-off-by: Christophe JAILLET Signed-off-by: Shannon Nelson Link: https://lore.kernel.org/r/6a478eae0b5e6c63774e1f0ddb1a3f8c38fa8ade.1640527506.git.christophe.jaillet@wanadoo.fr Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/pensando/ionic/ionic_lif.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c index 63f8a8163b5f..2ff7be17e5af 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c @@ -3135,7 +3135,7 @@ int ionic_lif_init(struct ionic_lif *lif) return -EINVAL; } - lif->dbid_inuse = bitmap_alloc(lif->dbid_count, GFP_KERNEL); + lif->dbid_inuse = bitmap_zalloc(lif->dbid_count, GFP_KERNEL); if (!lif->dbid_inuse) { dev_err(dev, "Failed alloc doorbell id bitmap, aborting\n"); return -ENOMEM; From 077cdda764c7f147e03e6065ba0cd1dbc1bf00d1 Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Wed, 22 Dec 2021 09:20:58 +0200 Subject: [PATCH 777/868] net/mlx5e: TC, Fix memory leak with rules with internal port Fix a memory leak with decap rule with internal port as destination device. The driver allocates a modify hdr action but doesn't set the flow attr modify hdr action which results in skipping releasing the modify hdr action when releasing the flow. backtrace: [<000000005f8c651c>] krealloc+0x83/0xd0 [<000000009f59b143>] alloc_mod_hdr_actions+0x156/0x310 [mlx5_core] [<000000002257f342>] mlx5e_tc_match_to_reg_set_and_get_id+0x12a/0x360 [mlx5_core] [<00000000b44ea75a>] mlx5e_tc_add_fdb_flow+0x962/0x1470 [mlx5_core] [<0000000003e384a0>] __mlx5e_add_fdb_flow+0x54c/0xb90 [mlx5_core] [<00000000ed8b22b6>] mlx5e_configure_flower+0xe45/0x4af0 [mlx5_core] [<00000000024f4ab5>] mlx5e_rep_indr_offload.isra.0+0xfe/0x1b0 [mlx5_core] [<000000006c3bb494>] mlx5e_rep_indr_setup_tc_cb+0x90/0x130 [mlx5_core] [<00000000d3dac2ea>] tc_setup_cb_add+0x1d2/0x420 Fixes: b16eb3c81fe2 ("net/mlx5: Support internal port as decap route device") Signed-off-by: Roi Dayan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index a60c7680fd2b..5e454a14428f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -1441,6 +1441,8 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, metadata); if (err) goto err_out; + + attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; } } From 992d8a4e38f0527f24e273ce3a9cd6dea1a6a436 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Mon, 29 Nov 2021 11:08:41 +0200 Subject: [PATCH 778/868] net/mlx5e: Fix wrong features assignment in case of error In case of an error in mlx5e_set_features(), 'netdev->features' must be updated with the correct state of the device to indicate which features were updated successfully. To do that we maintain a copy of 'netdev->features' and update it after successful feature changes, so we can assign it to back to 'netdev->features' if needed. However, since not all netdev features are handled by the driver (e.g. GRO/TSO/etc), some features may not be updated correctly in case of an error updating another feature. For example, while requesting to disable TSO (feature which is not handled by the driver) and enable HW-GRO, if an error occurs during HW-GRO enable, 'oper_features' will be assigned with 'netdev->features' and HW-GRO turned off. TSO will remain enabled in such case, which is a bug. To solve that, instead of using 'netdev->features' as the baseline of 'oper_features' and changing it on set feature success, use 'features' instead and update it in case of errors. Fixes: 75b81ce719b7 ("net/mlx5e: Don't override netdev features field unless in error flow") Signed-off-by: Gal Pressman Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 3b0f3a831216..41379844eee1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -3741,12 +3741,11 @@ static int set_feature_arfs(struct net_device *netdev, bool enable) static int mlx5e_handle_feature(struct net_device *netdev, netdev_features_t *features, - netdev_features_t wanted_features, netdev_features_t feature, mlx5e_feature_handler feature_handler) { - netdev_features_t changes = wanted_features ^ netdev->features; - bool enable = !!(wanted_features & feature); + netdev_features_t changes = *features ^ netdev->features; + bool enable = !!(*features & feature); int err; if (!(changes & feature)) @@ -3754,22 +3753,22 @@ static int mlx5e_handle_feature(struct net_device *netdev, err = feature_handler(netdev, enable); if (err) { + MLX5E_SET_FEATURE(features, feature, !enable); netdev_err(netdev, "%s feature %pNF failed, err %d\n", enable ? "Enable" : "Disable", &feature, err); return err; } - MLX5E_SET_FEATURE(features, feature, enable); return 0; } int mlx5e_set_features(struct net_device *netdev, netdev_features_t features) { - netdev_features_t oper_features = netdev->features; + netdev_features_t oper_features = features; int err = 0; #define MLX5E_HANDLE_FEATURE(feature, handler) \ - mlx5e_handle_feature(netdev, &oper_features, features, feature, handler) + mlx5e_handle_feature(netdev, &oper_features, feature, handler) err |= MLX5E_HANDLE_FEATURE(NETIF_F_LRO, set_feature_lro); err |= MLX5E_HANDLE_FEATURE(NETIF_F_GRO_HW, set_feature_hw_gro); From 5bec7ca2be6955ca1aa0d7bae2b981de9b1c9844 Mon Sep 17 00:00:00 2001 From: Ciara Loftus Date: Mon, 20 Dec 2021 15:52:50 +0000 Subject: [PATCH 779/868] xsk: Initialise xskb free_list_node This commit initialises the xskb's free_list_node when the xskb is allocated. This prevents a potential false negative returned from a call to list_empty for that node, such as the one introduced in commit 199d983bc015 ("xsk: Fix crash on double free in buffer pool") In my environment this issue caused packets to not be received by the xdpsock application if the traffic was running prior to application launch. This happened when the first batch of packets failed the xskmap lookup and XDP_PASS was returned from the bpf program. This action is handled in the i40e zc driver (and others) by allocating an skbuff, freeing the xdp_buff and adding the associated xskb to the xsk_buff_pool's free_list if it hadn't been added already. Without this fix, the xskb is not added to the free_list because the check to determine if it was added already returns an invalid positive result. Later, this caused allocation errors in the driver and the failure to receive packets. Fixes: 199d983bc015 ("xsk: Fix crash on double free in buffer pool") Fixes: 2b43470add8c ("xsk: Introduce AF_XDP buffer allocation API") Signed-off-by: Ciara Loftus Acked-by: Magnus Karlsson Link: https://lore.kernel.org/r/20211220155250.2746-1-ciara.loftus@intel.com Signed-off-by: Jakub Kicinski --- net/xdp/xsk_buff_pool.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c index bc4ad48ea4f0..fd39bb660ebc 100644 --- a/net/xdp/xsk_buff_pool.c +++ b/net/xdp/xsk_buff_pool.c @@ -83,6 +83,7 @@ struct xsk_buff_pool *xp_create_and_assign_umem(struct xdp_sock *xs, xskb = &pool->heads[i]; xskb->pool = pool; xskb->xdp.frame_sz = umem->chunk_size - umem->headroom; + INIT_LIST_HEAD(&xskb->free_list_node); if (pool->unaligned) pool->free_heads[i] = xskb; else From fb7bc9204095090731430c8921f9e629740c110a Mon Sep 17 00:00:00 2001 From: Tamir Duberstein Date: Wed, 29 Dec 2021 15:09:47 -0500 Subject: [PATCH 780/868] ipv6: raw: check passed optlen before reading Add a check that the user-provided option is at least as long as the number of bytes we intend to read. Before this patch we would blindly read sizeof(int) bytes even in cases where the user passed optlen Signed-off-by: Willem de Bruijn Link: https://lore.kernel.org/r/20211229200947.2862255-1-willemdebruijn.kernel@gmail.com Signed-off-by: Jakub Kicinski --- net/ipv6/raw.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 60f1e4f5be5a..c51d5ce3711c 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -1020,6 +1020,9 @@ static int do_rawv6_setsockopt(struct sock *sk, int level, int optname, struct raw6_sock *rp = raw6_sk(sk); int val; + if (optlen < sizeof(val)) + return -EINVAL; + if (copy_from_sockptr(&val, optval, sizeof(val))) return -EFAULT; From 99b40610956a8a8755653a67392e2a8b772453be Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Mon, 27 Dec 2021 19:21:15 +0200 Subject: [PATCH 781/868] net: bridge: mcast: add and enforce query interval minimum As reported[1] if query interval is set too low and we have multiple bridges or even a single bridge with multiple querier vlans configured we can crash the machine. Add a 1 second minimum which must be enforced by overwriting the value if set lower (i.e. without returning an error) to avoid breaking user-space. If that happens a log message is emitted to let the administrator know that the interval has been set to the minimum. The issue has been present since these intervals could be user-controlled. [1] https://lore.kernel.org/netdev/e8b9ce41-57b9-b6e2-a46a-ff9c791cf0ba@gmail.com/ Fixes: d902eee43f19 ("bridge: Add multicast count/interval sysfs entries") Reported-by: Eric Dumazet Signed-off-by: Nikolay Aleksandrov Signed-off-by: Jakub Kicinski --- net/bridge/br_multicast.c | 16 ++++++++++++++++ net/bridge/br_netlink.c | 2 +- net/bridge/br_private.h | 3 +++ net/bridge/br_sysfs_br.c | 2 +- net/bridge/br_vlan_options.c | 2 +- 5 files changed, 22 insertions(+), 3 deletions(-) diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index f3d751105343..998da4a2d209 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -4522,6 +4522,22 @@ int br_multicast_set_mld_version(struct net_bridge_mcast *brmctx, } #endif +void br_multicast_set_query_intvl(struct net_bridge_mcast *brmctx, + unsigned long val) +{ + unsigned long intvl_jiffies = clock_t_to_jiffies(val); + + if (intvl_jiffies < BR_MULTICAST_QUERY_INTVL_MIN) { + br_info(brmctx->br, + "trying to set multicast query interval below minimum, setting to %lu (%ums)\n", + jiffies_to_clock_t(BR_MULTICAST_QUERY_INTVL_MIN), + jiffies_to_msecs(BR_MULTICAST_QUERY_INTVL_MIN)); + intvl_jiffies = BR_MULTICAST_QUERY_INTVL_MIN; + } + + brmctx->multicast_query_interval = intvl_jiffies; +} + /** * br_multicast_list_adjacent - Returns snooped multicast addresses * @dev: The bridge port adjacent to which to retrieve addresses diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 0c8b5f1a15bc..701dd8b8455e 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -1357,7 +1357,7 @@ static int br_changelink(struct net_device *brdev, struct nlattr *tb[], if (data[IFLA_BR_MCAST_QUERY_INTVL]) { u64 val = nla_get_u64(data[IFLA_BR_MCAST_QUERY_INTVL]); - br->multicast_ctx.multicast_query_interval = clock_t_to_jiffies(val); + br_multicast_set_query_intvl(&br->multicast_ctx, val); } if (data[IFLA_BR_MCAST_QUERY_RESPONSE_INTVL]) { diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index c0efd697865a..4ed7f11042e8 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -28,6 +28,7 @@ #define BR_MAX_PORTS (1<multicast_ctx.multicast_query_interval = clock_t_to_jiffies(val); + br_multicast_set_query_intvl(&br->multicast_ctx, val); return 0; } diff --git a/net/bridge/br_vlan_options.c b/net/bridge/br_vlan_options.c index 8ffd4ed2563c..bf1ac0874279 100644 --- a/net/bridge/br_vlan_options.c +++ b/net/bridge/br_vlan_options.c @@ -521,7 +521,7 @@ static int br_vlan_process_global_one_opts(const struct net_bridge *br, u64 val; val = nla_get_u64(tb[BRIDGE_VLANDB_GOPTS_MCAST_QUERY_INTVL]); - v->br_mcast_ctx.multicast_query_interval = clock_t_to_jiffies(val); + br_multicast_set_query_intvl(&v->br_mcast_ctx, val); *changed = true; } if (tb[BRIDGE_VLANDB_GOPTS_MCAST_QUERY_RESPONSE_INTVL]) { From f83a112bd91a494cdee671aec74e777470fb4a07 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Mon, 27 Dec 2021 19:21:16 +0200 Subject: [PATCH 782/868] net: bridge: mcast: add and enforce startup query interval minimum As reported[1] if startup query interval is set too low in combination with large number of startup queries and we have multiple bridges or even a single bridge with multiple querier vlans configured we can crash the machine. Add a 1 second minimum which must be enforced by overwriting the value if set lower (i.e. without returning an error) to avoid breaking user-space. If that happens a log message is emitted to let the admin know that the startup interval has been set to the minimum. It doesn't make sense to make the startup interval lower than the normal query interval so use the same value of 1 second. The issue has been present since these intervals could be user-controlled. [1] https://lore.kernel.org/netdev/e8b9ce41-57b9-b6e2-a46a-ff9c791cf0ba@gmail.com/ Fixes: d902eee43f19 ("bridge: Add multicast count/interval sysfs entries") Reported-by: Eric Dumazet Signed-off-by: Nikolay Aleksandrov Signed-off-by: Jakub Kicinski --- net/bridge/br_multicast.c | 16 ++++++++++++++++ net/bridge/br_netlink.c | 2 +- net/bridge/br_private.h | 3 +++ net/bridge/br_sysfs_br.c | 2 +- net/bridge/br_vlan_options.c | 2 +- 5 files changed, 22 insertions(+), 3 deletions(-) diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 998da4a2d209..de2409889489 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -4538,6 +4538,22 @@ void br_multicast_set_query_intvl(struct net_bridge_mcast *brmctx, brmctx->multicast_query_interval = intvl_jiffies; } +void br_multicast_set_startup_query_intvl(struct net_bridge_mcast *brmctx, + unsigned long val) +{ + unsigned long intvl_jiffies = clock_t_to_jiffies(val); + + if (intvl_jiffies < BR_MULTICAST_STARTUP_QUERY_INTVL_MIN) { + br_info(brmctx->br, + "trying to set multicast startup query interval below minimum, setting to %lu (%ums)\n", + jiffies_to_clock_t(BR_MULTICAST_STARTUP_QUERY_INTVL_MIN), + jiffies_to_msecs(BR_MULTICAST_STARTUP_QUERY_INTVL_MIN)); + intvl_jiffies = BR_MULTICAST_STARTUP_QUERY_INTVL_MIN; + } + + brmctx->multicast_startup_query_interval = intvl_jiffies; +} + /** * br_multicast_list_adjacent - Returns snooped multicast addresses * @dev: The bridge port adjacent to which to retrieve addresses diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 701dd8b8455e..2ff83d84230d 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -1369,7 +1369,7 @@ static int br_changelink(struct net_device *brdev, struct nlattr *tb[], if (data[IFLA_BR_MCAST_STARTUP_QUERY_INTVL]) { u64 val = nla_get_u64(data[IFLA_BR_MCAST_STARTUP_QUERY_INTVL]); - br->multicast_ctx.multicast_startup_query_interval = clock_t_to_jiffies(val); + br_multicast_set_startup_query_intvl(&br->multicast_ctx, val); } if (data[IFLA_BR_MCAST_STATS_ENABLED]) { diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 4ed7f11042e8..2187a0c3fd22 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -29,6 +29,7 @@ #define BR_MULTICAST_DEFAULT_HASH_MAX 4096 #define BR_MULTICAST_QUERY_INTVL_MIN msecs_to_jiffies(1000) +#define BR_MULTICAST_STARTUP_QUERY_INTVL_MIN BR_MULTICAST_QUERY_INTVL_MIN #define BR_HWDOM_MAX BITS_PER_LONG @@ -966,6 +967,8 @@ size_t br_multicast_querier_state_size(void); size_t br_rports_size(const struct net_bridge_mcast *brmctx); void br_multicast_set_query_intvl(struct net_bridge_mcast *brmctx, unsigned long val); +void br_multicast_set_startup_query_intvl(struct net_bridge_mcast *brmctx, + unsigned long val); static inline bool br_group_is_l2(const struct br_ip *group) { diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c index f5bd1114a434..7b0c19772111 100644 --- a/net/bridge/br_sysfs_br.c +++ b/net/bridge/br_sysfs_br.c @@ -706,7 +706,7 @@ static ssize_t multicast_startup_query_interval_show( static int set_startup_query_interval(struct net_bridge *br, unsigned long val, struct netlink_ext_ack *extack) { - br->multicast_ctx.multicast_startup_query_interval = clock_t_to_jiffies(val); + br_multicast_set_startup_query_intvl(&br->multicast_ctx, val); return 0; } diff --git a/net/bridge/br_vlan_options.c b/net/bridge/br_vlan_options.c index bf1ac0874279..a6382973b3e7 100644 --- a/net/bridge/br_vlan_options.c +++ b/net/bridge/br_vlan_options.c @@ -535,7 +535,7 @@ static int br_vlan_process_global_one_opts(const struct net_bridge *br, u64 val; val = nla_get_u64(tb[BRIDGE_VLANDB_GOPTS_MCAST_STARTUP_QUERY_INTVL]); - v->br_mcast_ctx.multicast_startup_query_interval = clock_t_to_jiffies(val); + br_multicast_set_startup_query_intvl(&v->br_mcast_ctx, val); *changed = true; } if (tb[BRIDGE_VLANDB_GOPTS_MCAST_QUERIER]) { From d6f12f83989bb356ac6880a954f62c7667e35066 Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Wed, 29 Dec 2021 12:15:53 +0100 Subject: [PATCH 783/868] x86/build: Use the proper name CONFIG_FW_LOADER Commit in Fixes intends to add the expression regex only when FW_LOADER is enabled - not FW_LOADER_BUILTIN. Latter is a leftover from a previous patchset and not a valid config item. So, adjust the condition to the actual name of the config. [ bp: Cleanup commit message. ] Fixes: c8dcf655ec81 ("x86/build: Tuck away built-in firmware under FW_LOADER") Signed-off-by: Lukas Bulwahn Signed-off-by: Borislav Petkov Reviewed-by: Greg Kroah-Hartman Link: https://lore.kernel.org/r/20211229111553.5846-1-lukas.bulwahn@gmail.com --- arch/x86/tools/relocs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/tools/relocs.c b/arch/x86/tools/relocs.c index c736cf2ac76b..e2c5b296120d 100644 --- a/arch/x86/tools/relocs.c +++ b/arch/x86/tools/relocs.c @@ -68,7 +68,7 @@ static const char * const sym_regex_kernel[S_NSYMTYPES] = { "(__parainstructions|__alt_instructions)(_end)?|" "(__iommu_table|__apicdrivers|__smp_locks)(_end)?|" "__(start|end)_pci_.*|" -#if CONFIG_FW_LOADER_BUILTIN +#if CONFIG_FW_LOADER "__(start|end)_builtin_fw|" #endif "__(start|stop)___ksymtab(_gpl)?|" From 9c1952aeaa98b3cfc49e2a79cb2c7d6a674213e9 Mon Sep 17 00:00:00 2001 From: wujianguo Date: Wed, 29 Dec 2021 18:58:10 +0800 Subject: [PATCH 784/868] selftests/net: udpgso_bench_tx: fix dst ip argument udpgso_bench_tx call setup_sockaddr() for dest address before parsing all arguments, if we specify "-p ${dst_port}" after "-D ${dst_ip}", then ${dst_port} will be ignored, and using default cfg_port 8000. This will cause test case "multiple GRO socks" failed in udpgro.sh. Setup sockaddr after parsing all arguments. Fixes: 3a687bef148d ("selftests: udp gso benchmark") Signed-off-by: Jianguo Wu Reviewed-by: Willem de Bruijn Link: https://lore.kernel.org/r/ff620d9f-5b52-06ab-5286-44b945453002@163.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/udpgso_bench_tx.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/udpgso_bench_tx.c b/tools/testing/selftests/net/udpgso_bench_tx.c index 17512a43885e..f1fdaa270291 100644 --- a/tools/testing/selftests/net/udpgso_bench_tx.c +++ b/tools/testing/selftests/net/udpgso_bench_tx.c @@ -419,6 +419,7 @@ static void usage(const char *filepath) static void parse_opts(int argc, char **argv) { + const char *bind_addr = NULL; int max_len, hdrlen; int c; @@ -446,7 +447,7 @@ static void parse_opts(int argc, char **argv) cfg_cpu = strtol(optarg, NULL, 0); break; case 'D': - setup_sockaddr(cfg_family, optarg, &cfg_dst_addr); + bind_addr = optarg; break; case 'l': cfg_runtime_ms = strtoul(optarg, NULL, 10) * 1000; @@ -492,6 +493,11 @@ static void parse_opts(int argc, char **argv) } } + if (!bind_addr) + bind_addr = cfg_family == PF_INET6 ? "::" : "0.0.0.0"; + + setup_sockaddr(cfg_family, bind_addr, &cfg_dst_addr); + if (optind != argc) usage(argv[0]); From add25d6d6c85f7b6d00a055ee0a4169acf845681 Mon Sep 17 00:00:00 2001 From: Jianguo Wu Date: Wed, 29 Dec 2021 15:27:30 +0800 Subject: [PATCH 785/868] selftests: net: Fix a typo in udpgro_fwd.sh $rvs -> $rcv Fixes: a062260a9d5f ("selftests: net: add UDP GRO forwarding self-tests") Signed-off-by: Jianguo Wu Link: https://lore.kernel.org/r/d247d7c8-a03a-0abf-3c71-4006a051d133@163.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/udpgro_fwd.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/udpgro_fwd.sh b/tools/testing/selftests/net/udpgro_fwd.sh index 7f26591f236b..6a3985b8cd7f 100755 --- a/tools/testing/selftests/net/udpgro_fwd.sh +++ b/tools/testing/selftests/net/udpgro_fwd.sh @@ -132,7 +132,7 @@ run_test() { local rcv=`ip netns exec $NS_DST $ipt"-save" -c | grep 'dport 8000' | \ sed -e 's/\[//' -e 's/:.*//'` if [ $rcv != $pkts ]; then - echo " fail - received $rvs packets, expected $pkts" + echo " fail - received $rcv packets, expected $pkts" ret=1 return fi From e22e45fc9e41bf9fcc1e92cfb78eb92786728ef0 Mon Sep 17 00:00:00 2001 From: Muchun Song Date: Tue, 28 Dec 2021 18:41:45 +0800 Subject: [PATCH 786/868] net: fix use-after-free in tw_timer_handler A real world panic issue was found as follow in Linux 5.4. BUG: unable to handle page fault for address: ffffde49a863de28 PGD 7e6fe62067 P4D 7e6fe62067 PUD 7e6fe63067 PMD f51e064067 PTE 0 RIP: 0010:tw_timer_handler+0x20/0x40 Call Trace: call_timer_fn+0x2b/0x120 run_timer_softirq+0x1ef/0x450 __do_softirq+0x10d/0x2b8 irq_exit+0xc7/0xd0 smp_apic_timer_interrupt+0x68/0x120 apic_timer_interrupt+0xf/0x20 This issue was also reported since 2017 in the thread [1], unfortunately, the issue was still can be reproduced after fixing DCCP. The ipv4_mib_exit_net is called before tcp_sk_exit_batch when a net namespace is destroyed since tcp_sk_ops is registered befrore ipv4_mib_ops, which means tcp_sk_ops is in the front of ipv4_mib_ops in the list of pernet_list. There will be a use-after-free on net->mib.net_statistics in tw_timer_handler after ipv4_mib_exit_net if there are some inflight time-wait timers. This bug is not introduced by commit f2bf415cfed7 ("mib: add net to NET_ADD_STATS_BH") since the net_statistics is a global variable instead of dynamic allocation and freeing. Actually, commit 61a7e26028b9 ("mib: put net statistics on struct net") introduces the bug since it put net statistics on struct net and free it when net namespace is destroyed. Moving init_ipv4_mibs() to the front of tcp_init() to fix this bug and replace pr_crit() with panic() since continuing is meaningless when init_ipv4_mibs() fails. [1] https://groups.google.com/g/syzkaller/c/p1tn-_Kc6l4/m/smuL_FMAAgAJ?pli=1 Fixes: 61a7e26028b9 ("mib: put net statistics on struct net") Signed-off-by: Muchun Song Cc: Cong Wang Cc: Fam Zheng Cc: Link: https://lore.kernel.org/r/20211228104145.9426-1-songmuchun@bytedance.com Signed-off-by: Jakub Kicinski --- net/ipv4/af_inet.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 6b5956500436..5f70ffdae1b5 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1994,6 +1994,10 @@ static int __init inet_init(void) ip_init(); + /* Initialise per-cpu ipv4 mibs */ + if (init_ipv4_mibs()) + panic("%s: Cannot init ipv4 mibs\n", __func__); + /* Setup TCP slab cache for open requests. */ tcp_init(); @@ -2024,12 +2028,6 @@ static int __init inet_init(void) if (init_inet_pernet_ops()) pr_crit("%s: Cannot init ipv4 inet pernet ops\n", __func__); - /* - * Initialise per-cpu ipv4 mibs - */ - - if (init_ipv4_mibs()) - pr_crit("%s: Cannot init ipv4 mibs\n", __func__); ipv4_proc_init(); From 168fed986b3a7ec7b98cab1fe84e2f282b9e6a8f Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Tue, 28 Dec 2021 17:31:42 +0200 Subject: [PATCH 787/868] net: bridge: mcast: fix br_multicast_ctx_vlan_global_disabled helper We need to first check if the context is a vlan one, then we need to check the global bridge multicast vlan snooping flag, and finally the vlan's multicast flag, otherwise we will unnecessarily enable vlan mcast processing (e.g. querier timers). Fixes: 7b54aaaf53cb ("net: bridge: multicast: add vlan state initialization and control") Signed-off-by: Nikolay Aleksandrov Link: https://lore.kernel.org/r/20211228153142.536969-1-nikolay@nvidia.com Signed-off-by: Jakub Kicinski --- net/bridge/br_private.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 2187a0c3fd22..e8c6ee322c71 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -1153,9 +1153,9 @@ br_multicast_port_ctx_get_global(const struct net_bridge_mcast_port *pmctx) static inline bool br_multicast_ctx_vlan_global_disabled(const struct net_bridge_mcast *brmctx) { - return br_opt_get(brmctx->br, BROPT_MCAST_VLAN_SNOOPING_ENABLED) && - br_multicast_ctx_is_vlan(brmctx) && - !(brmctx->vlan->priv_flags & BR_VLFLAG_GLOBAL_MCAST_ENABLED); + return br_multicast_ctx_is_vlan(brmctx) && + (!br_opt_get(brmctx->br, BROPT_MCAST_VLAN_SNOOPING_ENABLED) || + !(brmctx->vlan->priv_flags & BR_VLFLAG_GLOBAL_MCAST_ENABLED)); } static inline bool From 92a34ab169f9eefe29cd420ce96b0a0a2a1da853 Mon Sep 17 00:00:00 2001 From: Jiasheng Jiang Date: Wed, 29 Dec 2021 11:21:18 +0800 Subject: [PATCH 788/868] net/ncsi: check for error return from call to nla_put_u32 As we can see from the comment of the nla_put() that it could return -EMSGSIZE if the tailroom of the skb is insufficient. Therefore, it should be better to check the return value of the nla_put_u32 and return the error code if error accurs. Also, there are many other functions have the same problem, and if this patch is correct, I will commit a new version to fix all. Fixes: 955dc68cb9b2 ("net/ncsi: Add generic netlink family") Signed-off-by: Jiasheng Jiang Link: https://lore.kernel.org/r/20211229032118.1706294-1-jiasheng@iscas.ac.cn Signed-off-by: Jakub Kicinski --- net/ncsi/ncsi-netlink.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/net/ncsi/ncsi-netlink.c b/net/ncsi/ncsi-netlink.c index bb5f1650f11c..c189b4c8a182 100644 --- a/net/ncsi/ncsi-netlink.c +++ b/net/ncsi/ncsi-netlink.c @@ -112,7 +112,11 @@ static int ncsi_write_package_info(struct sk_buff *skb, pnest = nla_nest_start_noflag(skb, NCSI_PKG_ATTR); if (!pnest) return -ENOMEM; - nla_put_u32(skb, NCSI_PKG_ATTR_ID, np->id); + rc = nla_put_u32(skb, NCSI_PKG_ATTR_ID, np->id); + if (rc) { + nla_nest_cancel(skb, pnest); + return rc; + } if ((0x1 << np->id) == ndp->package_whitelist) nla_put_flag(skb, NCSI_PKG_ATTR_FORCED); cnest = nla_nest_start_noflag(skb, NCSI_PKG_ATTR_CHANNEL_LIST); From be1c5b53227ba8280f1ebb01c6f5da3c9eebdaad Mon Sep 17 00:00:00 2001 From: xu xin Date: Thu, 30 Dec 2021 03:28:56 +0000 Subject: [PATCH 789/868] Documentation: fix outdated interpretation of ip_no_pmtu_disc The updating way of pmtu has changed, but documentation is still in the old way. So this patch updates the interpretation of ip_no_pmtu_disc and min_pmtu. See commit 28d35bcdd3925 ("net: ipv4: don't let PMTU updates increase route MTU") Reported-by: Zeal Robot Signed-off-by: xu xin Signed-off-by: David S. Miller --- Documentation/networking/ip-sysctl.rst | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst index c04431144f7a..2572eecc3e86 100644 --- a/Documentation/networking/ip-sysctl.rst +++ b/Documentation/networking/ip-sysctl.rst @@ -25,7 +25,8 @@ ip_default_ttl - INTEGER ip_no_pmtu_disc - INTEGER Disable Path MTU Discovery. If enabled in mode 1 and a fragmentation-required ICMP is received, the PMTU to this - destination will be set to min_pmtu (see below). You will need + destination will be set to the smallest of the old MTU to + this destination and min_pmtu (see below). You will need to raise min_pmtu to the smallest interface MTU on your system manually if you want to avoid locally generated fragments. @@ -49,7 +50,8 @@ ip_no_pmtu_disc - INTEGER Default: FALSE min_pmtu - INTEGER - default 552 - minimum discovered Path MTU + default 552 - minimum Path MTU. Unless this is changed mannually, + each cached pmtu will never be lower than this setting. ip_forward_use_pmtu - BOOLEAN By default we don't trust protocol path MTUs while forwarding From 8b3170e07539855ee91bc5e2fa7780a4c9b5c7aa Mon Sep 17 00:00:00 2001 From: Jianguo Wu Date: Thu, 30 Dec 2021 18:40:29 +0800 Subject: [PATCH 790/868] selftests: net: using ping6 for IPv6 in udpgro_fwd.sh udpgro_fwd.sh output following message: ping: 2001:db8:1::100: Address family for hostname not supported Using ping6 when pinging IPv6 addresses. Fixes: a062260a9d5f ("selftests: net: add UDP GRO forwarding self-tests") Signed-off-by: Jianguo Wu Signed-off-by: David S. Miller --- tools/testing/selftests/net/udpgro_fwd.sh | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/udpgro_fwd.sh b/tools/testing/selftests/net/udpgro_fwd.sh index 6a3985b8cd7f..3ea73013d956 100755 --- a/tools/testing/selftests/net/udpgro_fwd.sh +++ b/tools/testing/selftests/net/udpgro_fwd.sh @@ -185,6 +185,7 @@ for family in 4 6; do IPT=iptables SUFFIX=24 VXDEV=vxlan + PING=ping if [ $family = 6 ]; then BM_NET=$BM_NET_V6 @@ -192,6 +193,7 @@ for family in 4 6; do SUFFIX="64 nodad" VXDEV=vxlan6 IPT=ip6tables + PING="ping6" fi echo "IPv$family" @@ -237,7 +239,7 @@ for family in 4 6; do # load arp cache before running the test to reduce the amount of # stray traffic on top of the UDP tunnel - ip netns exec $NS_SRC ping -q -c 1 $OL_NET$DST_NAT >/dev/null + ip netns exec $NS_SRC $PING -q -c 1 $OL_NET$DST_NAT >/dev/null run_test "GRO fwd over UDP tunnel" $OL_NET$DST_NAT 1 1 $OL_NET$DST cleanup From bf2b09fedc17248b315f80fb249087b7d28a69a6 Mon Sep 17 00:00:00 2001 From: Miaoqian Lin Date: Thu, 30 Dec 2021 12:26:27 +0000 Subject: [PATCH 791/868] fsl/fman: Fix missing put_device() call in fman_port_probe The reference taken by 'of_find_device_by_node()' must be released when not needed anymore. Add the corresponding 'put_device()' in the and error handling paths. Fixes: 18a6c85fcc78 ("fsl/fman: Add FMan Port Support") Signed-off-by: Miaoqian Lin Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/fman/fman_port.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/freescale/fman/fman_port.c b/drivers/net/ethernet/freescale/fman/fman_port.c index d9baac0dbc7d..4c9d05c45c03 100644 --- a/drivers/net/ethernet/freescale/fman/fman_port.c +++ b/drivers/net/ethernet/freescale/fman/fman_port.c @@ -1805,7 +1805,7 @@ static int fman_port_probe(struct platform_device *of_dev) fman = dev_get_drvdata(&fm_pdev->dev); if (!fman) { err = -EINVAL; - goto return_err; + goto put_device; } err = of_property_read_u32(port_node, "cell-index", &val); @@ -1813,7 +1813,7 @@ static int fman_port_probe(struct platform_device *of_dev) dev_err(port->dev, "%s: reading cell-index for %pOF failed\n", __func__, port_node); err = -EINVAL; - goto return_err; + goto put_device; } port_id = (u8)val; port->dts_params.id = port_id; @@ -1847,7 +1847,7 @@ static int fman_port_probe(struct platform_device *of_dev) } else { dev_err(port->dev, "%s: Illegal port type\n", __func__); err = -EINVAL; - goto return_err; + goto put_device; } port->dts_params.type = port_type; @@ -1861,7 +1861,7 @@ static int fman_port_probe(struct platform_device *of_dev) dev_err(port->dev, "%s: incorrect qman-channel-id\n", __func__); err = -EINVAL; - goto return_err; + goto put_device; } port->dts_params.qman_channel_id = qman_channel_id; } @@ -1871,7 +1871,7 @@ static int fman_port_probe(struct platform_device *of_dev) dev_err(port->dev, "%s: of_address_to_resource() failed\n", __func__); err = -ENOMEM; - goto return_err; + goto put_device; } port->dts_params.fman = fman; @@ -1896,6 +1896,8 @@ static int fman_port_probe(struct platform_device *of_dev) return 0; +put_device: + put_device(&fm_pdev->dev); return_err: of_node_put(port_node); free_port: From 012e332286e2bb9f6ac77d195f17e74b2963d663 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Thu, 30 Dec 2021 20:23:09 +0100 Subject: [PATCH 792/868] fs/mount_setattr: always cleanup mount_kattr Make sure that finish_mount_kattr() is called after mount_kattr was succesfully built in both the success and failure case to prevent leaking any references we took when we built it. We returned early if path lookup failed thereby risking to leak an additional reference we took when building mount_kattr when an idmapped mount was requested. Cc: linux-fsdevel@vger.kernel.org Cc: stable@vger.kernel.org Fixes: 9caccd41541a ("fs: introduce MOUNT_ATTR_IDMAP") Signed-off-by: Christian Brauner Signed-off-by: Linus Torvalds --- fs/namespace.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/fs/namespace.c b/fs/namespace.c index 659a8f39c61a..b696543adab8 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -4263,12 +4263,11 @@ SYSCALL_DEFINE5(mount_setattr, int, dfd, const char __user *, path, return err; err = user_path_at(dfd, path, kattr.lookup_flags, &target); - if (err) - return err; - - err = do_mount_setattr(&target, &kattr); + if (!err) { + err = do_mount_setattr(&target, &kattr); + path_put(&target); + } finish_mount_kattr(&kattr); - path_put(&target); return err; } From 9f3ccdc3f6ef10084ceb3a47df0961bec6196fd0 Mon Sep 17 00:00:00 2001 From: Pavel Skripkin Date: Thu, 30 Dec 2021 20:57:46 -0800 Subject: [PATCH 793/868] Input: appletouch - initialize work before device registration Syzbot has reported warning in __flush_work(). This warning is caused by work->func == NULL, which means missing work initialization. This may happen, since input_dev->close() calls cancel_work_sync(&dev->work), but dev->work initalization happens _after_ input_register_device() call. So this patch moves dev->work initialization before registering input device Fixes: 5a6eb676d3bc ("Input: appletouch - improve powersaving for Geyser3 devices") Reported-and-tested-by: syzbot+b88c5eae27386b252bbd@syzkaller.appspotmail.com Signed-off-by: Pavel Skripkin Link: https://lore.kernel.org/r/20211230141151.17300-1-paskripkin@gmail.com Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov --- drivers/input/mouse/appletouch.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/input/mouse/appletouch.c b/drivers/input/mouse/appletouch.c index bfa26651c0be..627048bc6a12 100644 --- a/drivers/input/mouse/appletouch.c +++ b/drivers/input/mouse/appletouch.c @@ -916,6 +916,8 @@ static int atp_probe(struct usb_interface *iface, set_bit(BTN_TOOL_TRIPLETAP, input_dev->keybit); set_bit(BTN_LEFT, input_dev->keybit); + INIT_WORK(&dev->work, atp_reinit); + error = input_register_device(dev->input); if (error) goto err_free_buffer; @@ -923,8 +925,6 @@ static int atp_probe(struct usb_interface *iface, /* save our data pointer in this interface device */ usb_set_intfdata(iface, dev); - INIT_WORK(&dev->work, atp_reinit); - return 0; err_free_buffer: From bc7ec91718c49d938849697cfad98fcd9877cc26 Mon Sep 17 00:00:00 2001 From: "Leo L. Schwab" Date: Thu, 30 Dec 2021 21:05:00 -0800 Subject: [PATCH 794/868] Input: spaceball - fix parsing of movement data packets The spaceball.c module was not properly parsing the movement reports coming from the device. The code read axis data as signed 16-bit little-endian values starting at offset 2. In fact, axis data in Spaceball movement reports are signed 16-bit big-endian values starting at offset 3. This was determined first by visually inspecting the data packets, and later verified by consulting: http://spacemice.org/pdf/SpaceBall_2003-3003_Protocol.pdf If this ever worked properly, it was in the time before Git... Signed-off-by: Leo L. Schwab Link: https://lore.kernel.org/r/20211221101630.1146385-1-ewhac@ewhac.org Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov --- drivers/input/joystick/spaceball.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/input/joystick/spaceball.c b/drivers/input/joystick/spaceball.c index 429411c6c0a8..a85a4f33aea8 100644 --- a/drivers/input/joystick/spaceball.c +++ b/drivers/input/joystick/spaceball.c @@ -19,6 +19,7 @@ #include #include #include +#include #define DRIVER_DESC "SpaceTec SpaceBall 2003/3003/4000 FLX driver" @@ -75,9 +76,15 @@ static void spaceball_process_packet(struct spaceball* spaceball) case 'D': /* Ball data */ if (spaceball->idx != 15) return; - for (i = 0; i < 6; i++) + /* + * Skip first three bytes; read six axes worth of data. + * Axis values are signed 16-bit big-endian. + */ + data += 3; + for (i = 0; i < ARRAY_SIZE(spaceball_axes); i++) { input_report_abs(dev, spaceball_axes[i], - (__s16)((data[2 * i + 3] << 8) | data[2 * i + 2])); + (__s16)get_unaligned_be16(&data[i * 2])); + } break; case 'K': /* Button data */ From bb436283e25aaf1533ce061605d23a9564447bdf Mon Sep 17 00:00:00 2001 From: Pavel Skripkin Date: Fri, 31 Dec 2021 01:47:50 +0300 Subject: [PATCH 795/868] i2c: validate user data in compat ioctl Wrong user data may cause warning in i2c_transfer(), ex: zero msgs. Userspace should not be able to trigger warnings, so this patch adds validation checks for user data in compact ioctl to prevent reported warnings Reported-and-tested-by: syzbot+e417648b303855b91d8a@syzkaller.appspotmail.com Fixes: 7d5cb45655f2 ("i2c compat ioctls: move to ->compat_ioctl()") Signed-off-by: Pavel Skripkin Signed-off-by: Wolfram Sang --- drivers/i2c/i2c-dev.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/i2c/i2c-dev.c b/drivers/i2c/i2c-dev.c index bce0e8bb7852..cf5d049342ea 100644 --- a/drivers/i2c/i2c-dev.c +++ b/drivers/i2c/i2c-dev.c @@ -535,6 +535,9 @@ static long compat_i2cdev_ioctl(struct file *file, unsigned int cmd, unsigned lo sizeof(rdwr_arg))) return -EFAULT; + if (!rdwr_arg.msgs || rdwr_arg.nmsgs == 0) + return -EINVAL; + if (rdwr_arg.nmsgs > I2C_RDWR_IOCTL_MAX_MSGS) return -EINVAL; From c116fe1e1883ad3eda0a1938a9e3275a98aa51a5 Mon Sep 17 00:00:00 2001 From: Deep Majumder Date: Fri, 19 Nov 2021 11:44:01 +0530 Subject: [PATCH 796/868] Docs: Fixes link to I2C specification The link to the I2C specification is broken. Although "https://www.nxp.com" hosts Rev 7 (2021) of this specification, it is behind a login-wall. Thus, an additional link has been added (which doesn't require a login) and the NXP official docs link has been updated. Signed-off-by: Deep Majumder [wsa: minor updates to text and commit message] Signed-off-by: Wolfram Sang --- Documentation/i2c/summary.rst | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/Documentation/i2c/summary.rst b/Documentation/i2c/summary.rst index 136c4e333be7..786c618ba3be 100644 --- a/Documentation/i2c/summary.rst +++ b/Documentation/i2c/summary.rst @@ -11,9 +11,11 @@ systems. Some systems use variants that don't meet branding requirements, and so are not advertised as being I2C but come under different names, e.g. TWI (Two Wire Interface), IIC. -The official I2C specification is the `"I2C-bus specification and user -manual" (UM10204) `_ -published by NXP Semiconductors. +The latest official I2C specification is the `"I2C-bus specification and user +manual" (UM10204) `_ +published by NXP Semiconductors. However, you need to log-in to the site to +access the PDF. An older version of the specification (revision 6) is archived +`here `_. SMBus (System Management Bus) is based on the I2C protocol, and is mostly a subset of I2C protocols and signaling. Many I2C devices will work on an From eaa090538e8d21801c6d5f94590c3799e6a528b5 Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Thu, 30 Dec 2021 17:53:54 +0800 Subject: [PATCH 797/868] drm/amd/pm: keep the BACO feature enabled for suspend To pair with the workaround which always reset the ASIC in suspend. Otherwise, the reset which relies on BACO will fail. Fixes: daf8de0874ab5b ("drm/amdgpu: always reset the asic in suspend (v2)") Signed-off-by: Evan Quan Reviewed-by: Alex Deucher Reviewed-by: Guchun Chen Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c index 8a817932acdf..9d7d64fdf410 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c @@ -1400,8 +1400,14 @@ static int smu_disable_dpms(struct smu_context *smu) { struct amdgpu_device *adev = smu->adev; int ret = 0; + /* + * TODO: (adev->in_suspend && !adev->in_s0ix) is added to pair + * the workaround which always reset the asic in suspend. + * It's likely that workaround will be dropped in the future. + * Then the change here should be dropped together. + */ bool use_baco = !smu->is_apu && - ((amdgpu_in_reset(adev) && + (((amdgpu_in_reset(adev) || (adev->in_suspend && !adev->in_s0ix)) && (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO)) || ((adev->in_runpm || adev->in_s4) && amdgpu_asic_supports_baco(adev))); From 9a45ac2320d0a6ae01880a30d4b86025fce4061b Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 22 Dec 2021 22:41:18 -0500 Subject: [PATCH 798/868] fbdev: fbmem: add a helper to determine if an aperture is used by a fw fb Add a function for drivers to check if the a firmware initialized fb is corresponds to their aperture. This allows drivers to check if the device corresponds to what the firmware set up as the display device. Bug: https://bugzilla.kernel.org/show_bug.cgi?id=215203 Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/1840 Signed-off-by: Alex Deucher --- drivers/video/fbdev/core/fbmem.c | 47 ++++++++++++++++++++++++++++++++ include/linux/fb.h | 1 + 2 files changed, 48 insertions(+) diff --git a/drivers/video/fbdev/core/fbmem.c b/drivers/video/fbdev/core/fbmem.c index 826175ad88a2..0fa7ede94fa6 100644 --- a/drivers/video/fbdev/core/fbmem.c +++ b/drivers/video/fbdev/core/fbmem.c @@ -1762,6 +1762,53 @@ int remove_conflicting_framebuffers(struct apertures_struct *a, } EXPORT_SYMBOL(remove_conflicting_framebuffers); +/** + * is_firmware_framebuffer - detect if firmware-configured framebuffer matches + * @a: memory range, users of which are to be checked + * + * This function checks framebuffer devices (initialized by firmware/bootloader) + * which use memory range described by @a. If @a matchesm the function returns + * true, otherwise false. + */ +bool is_firmware_framebuffer(struct apertures_struct *a) +{ + bool do_free = false; + bool found = false; + int i; + + if (!a) { + a = alloc_apertures(1); + if (!a) + return false; + + a->ranges[0].base = 0; + a->ranges[0].size = ~0; + do_free = true; + } + + mutex_lock(®istration_lock); + /* check all firmware fbs and kick off if the base addr overlaps */ + for_each_registered_fb(i) { + struct apertures_struct *gen_aper; + + if (!(registered_fb[i]->flags & FBINFO_MISC_FIRMWARE)) + continue; + + gen_aper = registered_fb[i]->apertures; + if (fb_do_apertures_overlap(gen_aper, a)) { + found = true; + break; + } + } + mutex_unlock(®istration_lock); + + if (do_free) + kfree(a); + + return found; +} +EXPORT_SYMBOL(is_firmware_framebuffer); + /** * remove_conflicting_pci_framebuffers - remove firmware-configured framebuffers for PCI devices * @pdev: PCI device diff --git a/include/linux/fb.h b/include/linux/fb.h index 6f3db99ab990..3da95842b207 100644 --- a/include/linux/fb.h +++ b/include/linux/fb.h @@ -610,6 +610,7 @@ extern int remove_conflicting_pci_framebuffers(struct pci_dev *pdev, const char *name); extern int remove_conflicting_framebuffers(struct apertures_struct *a, const char *name, bool primary); +extern bool is_firmware_framebuffer(struct apertures_struct *a); extern int fb_prepare_logo(struct fb_info *fb_info, int rotate); extern int fb_show_logo(struct fb_info *fb_info, int rotate); extern char* fb_get_buffer_offset(struct fb_info *info, struct fb_pixmap *buf, u32 size); From b95dc06af3e683d6b7ddbbae178b2b2a21ee8b2b Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 22 Dec 2021 22:57:16 -0500 Subject: [PATCH 799/868] drm/amdgpu: disable runpm if we are the primary adapter If we are the primary adapter (i.e., the one used by the firwmare framebuffer), disable runtime pm. This fixes a regression caused by commit 55285e21f045 which results in the displays waking up shortly after they go to sleep due to the device coming out of runtime suspend and sending a hotplug uevent. v2: squash in reworked fix from Evan Fixes: 55285e21f045 ("fbdev/efifb: Release PCI device's runtime PM ref during FB destroy") Bug: https://bugzilla.kernel.org/show_bug.cgi?id=215203 Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/1840 Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 28 +++++++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 6 ++++++ 3 files changed, 35 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index b85b67a88a3d..7d67aec6f4a2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1077,6 +1077,7 @@ struct amdgpu_device { bool runpm; bool in_runpm; bool has_pr3; + bool is_fw_fb; bool pm_sysfs_en; bool ucode_sysfs_en; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 86ca80da9eea..99370bdd8c5b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -39,6 +39,7 @@ #include #include #include +#include #include "amdgpu.h" #include "amdgpu_irq.h" @@ -1890,6 +1891,26 @@ MODULE_DEVICE_TABLE(pci, pciidlist); static const struct drm_driver amdgpu_kms_driver; +static bool amdgpu_is_fw_framebuffer(resource_size_t base, + resource_size_t size) +{ + bool found = false; +#if IS_REACHABLE(CONFIG_FB) + struct apertures_struct *a; + + a = alloc_apertures(1); + if (!a) + return false; + + a->ranges[0].base = base; + a->ranges[0].size = size; + + found = is_firmware_framebuffer(a); + kfree(a); +#endif + return found; +} + static int amdgpu_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) { @@ -1898,6 +1919,8 @@ static int amdgpu_pci_probe(struct pci_dev *pdev, unsigned long flags = ent->driver_data; int ret, retry = 0, i; bool supports_atomic = false; + bool is_fw_fb; + resource_size_t base, size; /* skip devices which are owned by radeon */ for (i = 0; i < ARRAY_SIZE(amdgpu_unsupported_pciidlist); i++) { @@ -1966,6 +1989,10 @@ static int amdgpu_pci_probe(struct pci_dev *pdev, } #endif + base = pci_resource_start(pdev, 0); + size = pci_resource_len(pdev, 0); + is_fw_fb = amdgpu_is_fw_framebuffer(base, size); + /* Get rid of things like offb */ ret = drm_aperture_remove_conflicting_pci_framebuffers(pdev, &amdgpu_kms_driver); if (ret) @@ -1978,6 +2005,7 @@ static int amdgpu_pci_probe(struct pci_dev *pdev, adev->dev = &pdev->dev; adev->pdev = pdev; ddev = adev_to_drm(adev); + adev->is_fw_fb = is_fw_fb; if (!supports_atomic) ddev->driver_features &= ~DRIVER_ATOMIC; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 651c7abfde03..09ad17944eb2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -206,6 +206,12 @@ int amdgpu_driver_load_kms(struct amdgpu_device *adev, unsigned long flags) adev->runpm = true; break; } + /* XXX: disable runtime pm if we are the primary adapter + * to avoid displays being re-enabled after DPMS. + * This needs to be sorted out and fixed properly. + */ + if (adev->is_fw_fb) + adev->runpm = false; if (adev->runpm) dev_info(adev->dev, "Using BACO for runtime pm\n"); } From 7a3429bace0e08d94c39245631ea6bc109dafa49 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Thu, 30 Dec 2021 17:36:31 -0700 Subject: [PATCH 800/868] ipv4: Check attribute length for RTA_GATEWAY in multipath route syzbot reported uninit-value: ============================================================ BUG: KMSAN: uninit-value in fib_get_nhs+0xac4/0x1f80 net/ipv4/fib_semantics.c:708 fib_get_nhs+0xac4/0x1f80 net/ipv4/fib_semantics.c:708 fib_create_info+0x2411/0x4870 net/ipv4/fib_semantics.c:1453 fib_table_insert+0x45c/0x3a10 net/ipv4/fib_trie.c:1224 inet_rtm_newroute+0x289/0x420 net/ipv4/fib_frontend.c:886 Add helper to validate RTA_GATEWAY length before using the attribute. Fixes: 4e902c57417c ("[IPv4]: FIB configuration using struct fib_config") Reported-by: syzbot+d4b9a2851cc3ce998741@syzkaller.appspotmail.com Signed-off-by: David Ahern Cc: Thomas Graf Signed-off-by: David S. Miller --- net/ipv4/fib_semantics.c | 29 ++++++++++++++++++++++++++--- 1 file changed, 26 insertions(+), 3 deletions(-) diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index fde7797b5806..f1caa2c1c041 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -662,6 +662,19 @@ static int fib_count_nexthops(struct rtnexthop *rtnh, int remaining, return nhs; } +static int fib_gw_from_attr(__be32 *gw, struct nlattr *nla, + struct netlink_ext_ack *extack) +{ + if (nla_len(nla) < sizeof(*gw)) { + NL_SET_ERR_MSG(extack, "Invalid IPv4 address in RTA_GATEWAY"); + return -EINVAL; + } + + *gw = nla_get_in_addr(nla); + + return 0; +} + /* only called when fib_nh is integrated into fib_info */ static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh, int remaining, struct fib_config *cfg, @@ -704,7 +717,11 @@ static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh, return -EINVAL; } if (nla) { - fib_cfg.fc_gw4 = nla_get_in_addr(nla); + ret = fib_gw_from_attr(&fib_cfg.fc_gw4, nla, + extack); + if (ret) + goto errout; + if (fib_cfg.fc_gw4) fib_cfg.fc_gw_family = AF_INET; } else if (nlav) { @@ -902,6 +919,7 @@ int fib_nh_match(struct net *net, struct fib_config *cfg, struct fib_info *fi, attrlen = rtnh_attrlen(rtnh); if (attrlen > 0) { struct nlattr *nla, *nlav, *attrs = rtnh_attrs(rtnh); + int err; nla = nla_find(attrs, attrlen, RTA_GATEWAY); nlav = nla_find(attrs, attrlen, RTA_VIA); @@ -912,12 +930,17 @@ int fib_nh_match(struct net *net, struct fib_config *cfg, struct fib_info *fi, } if (nla) { + __be32 gw; + + err = fib_gw_from_attr(&gw, nla, extack); + if (err) + return err; + if (nh->fib_nh_gw_family != AF_INET || - nla_get_in_addr(nla) != nh->fib_nh_gw4) + gw != nh->fib_nh_gw4) return 1; } else if (nlav) { struct fib_config cfg2; - int err; err = fib_gw_from_via(&cfg2, nlav, extack); if (err) From 664b9c4b7392ce723b013201843264bf95481ce5 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Thu, 30 Dec 2021 17:36:32 -0700 Subject: [PATCH 801/868] ipv4: Check attribute length for RTA_FLOW in multipath route Make sure RTA_FLOW is at least 4B before using. Fixes: 4e902c57417c ("[IPv4]: FIB configuration using struct fib_config") Signed-off-by: David Ahern Signed-off-by: David S. Miller --- net/ipv4/fib_semantics.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index f1caa2c1c041..36bc429f1635 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -731,8 +731,13 @@ static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh, } nla = nla_find(attrs, attrlen, RTA_FLOW); - if (nla) + if (nla) { + if (nla_len(nla) < sizeof(u32)) { + NL_SET_ERR_MSG(extack, "Invalid RTA_FLOW"); + return -EINVAL; + } fib_cfg.fc_flow = nla_get_u32(nla); + } fib_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP); nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE); @@ -963,8 +968,14 @@ int fib_nh_match(struct net *net, struct fib_config *cfg, struct fib_info *fi, #ifdef CONFIG_IP_ROUTE_CLASSID nla = nla_find(attrs, attrlen, RTA_FLOW); - if (nla && nla_get_u32(nla) != nh->nh_tclassid) - return 1; + if (nla) { + if (nla_len(nla) < sizeof(u32)) { + NL_SET_ERR_MSG(extack, "Invalid RTA_FLOW"); + return -EINVAL; + } + if (nla_get_u32(nla) != nh->nh_tclassid) + return 1; + } #endif } From 4619bcf91399f00a40885100fb61d594d8454033 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Thu, 30 Dec 2021 17:36:33 -0700 Subject: [PATCH 802/868] ipv6: Check attribute length for RTA_GATEWAY in multipath route Commit referenced in the Fixes tag used nla_memcpy for RTA_GATEWAY as does the current nla_get_in6_addr. nla_memcpy protects against accessing memory greater than what is in the attribute, but there is no check requiring the attribute to have an IPv6 address. Add it. Fixes: 51ebd3181572 ("ipv6: add support of equal cost multipath (ECMP)") Signed-off-by: David Ahern Cc: Nicolas Dichtel Signed-off-by: David S. Miller --- net/ipv6/route.c | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 42d60c76d30a..d16599c225b8 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -5224,6 +5224,19 @@ out: return should_notify; } +static int fib6_gw_from_attr(struct in6_addr *gw, struct nlattr *nla, + struct netlink_ext_ack *extack) +{ + if (nla_len(nla) < sizeof(*gw)) { + NL_SET_ERR_MSG(extack, "Invalid IPv6 address in RTA_GATEWAY"); + return -EINVAL; + } + + *gw = nla_get_in6_addr(nla); + + return 0; +} + static int ip6_route_multipath_add(struct fib6_config *cfg, struct netlink_ext_ack *extack) { @@ -5264,7 +5277,13 @@ static int ip6_route_multipath_add(struct fib6_config *cfg, nla = nla_find(attrs, attrlen, RTA_GATEWAY); if (nla) { - r_cfg.fc_gateway = nla_get_in6_addr(nla); + int ret; + + ret = fib6_gw_from_attr(&r_cfg.fc_gateway, nla, + extack); + if (ret) + return ret; + r_cfg.fc_flags |= RTF_GATEWAY; } r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP); From 1ff15a710a862db1101b97810af14aedc835a86a Mon Sep 17 00:00:00 2001 From: David Ahern Date: Thu, 30 Dec 2021 17:36:34 -0700 Subject: [PATCH 803/868] ipv6: Check attribute length for RTA_GATEWAY when deleting multipath route Make sure RTA_GATEWAY for IPv6 multipath route has enough bytes to hold an IPv6 address. Fixes: 6b9ea5a64ed5 ("ipv6: fix multipath route replace error recovery") Signed-off-by: David Ahern Cc: Roopa Prabhu Signed-off-by: David S. Miller --- net/ipv6/route.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index d16599c225b8..b311c0bc9983 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -5453,7 +5453,11 @@ static int ip6_route_multipath_del(struct fib6_config *cfg, nla = nla_find(attrs, attrlen, RTA_GATEWAY); if (nla) { - nla_memcpy(&r_cfg.fc_gateway, nla, 16); + err = fib6_gw_from_attr(&r_cfg.fc_gateway, nla, + extack); + if (err) + return err; + r_cfg.fc_flags |= RTF_GATEWAY; } } From 8bda81a4d400cf8a72e554012f0d8c45e07a3904 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Thu, 30 Dec 2021 17:36:35 -0700 Subject: [PATCH 804/868] lwtunnel: Validate RTA_ENCAP_TYPE attribute length lwtunnel_valid_encap_type_attr is used to validate encap attributes within a multipath route. Add length validation checking to the type. lwtunnel_valid_encap_type_attr is called converting attributes to fib{6,}_config struct which means it is used before fib_get_nhs, ip6_route_multipath_add, and ip6_route_multipath_del - other locations that use rtnh_ok and then nla_get_u16 on RTA_ENCAP_TYPE attribute. Fixes: 9ed59592e3e3 ("lwtunnel: fix autoload of lwt modules") Signed-off-by: David Ahern Signed-off-by: David S. Miller --- net/core/lwtunnel.c | 4 ++++ net/ipv4/fib_semantics.c | 3 +++ net/ipv6/route.c | 4 ++++ 3 files changed, 11 insertions(+) diff --git a/net/core/lwtunnel.c b/net/core/lwtunnel.c index 2820aca2173a..9ccd64e8a666 100644 --- a/net/core/lwtunnel.c +++ b/net/core/lwtunnel.c @@ -197,6 +197,10 @@ int lwtunnel_valid_encap_type_attr(struct nlattr *attr, int remaining, nla_entype = nla_find(attrs, attrlen, RTA_ENCAP_TYPE); if (nla_entype) { + if (nla_len(nla_entype) < sizeof(u16)) { + NL_SET_ERR_MSG(extack, "Invalid RTA_ENCAP_TYPE"); + return -EINVAL; + } encap_type = nla_get_u16(nla_entype); if (lwtunnel_valid_encap_type(encap_type, diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 36bc429f1635..92c29ab3d042 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -740,6 +740,9 @@ static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh, } fib_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP); + /* RTA_ENCAP_TYPE length checked in + * lwtunnel_valid_encap_type_attr + */ nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE); if (nla) fib_cfg.fc_encap_type = nla_get_u16(nla); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index b311c0bc9983..d2ff8a7e1709 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -5287,6 +5287,10 @@ static int ip6_route_multipath_add(struct fib6_config *cfg, r_cfg.fc_flags |= RTF_GATEWAY; } r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP); + + /* RTA_ENCAP_TYPE length checked in + * lwtunnel_valid_encap_type_attr + */ nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE); if (nla) r_cfg.fc_encap_type = nla_get_u16(nla); From f5c73297181c6b3ad76537bad98eaad6d29b9333 Mon Sep 17 00:00:00 2001 From: Mike Kravetz Date: Thu, 30 Dec 2021 20:12:31 -0800 Subject: [PATCH 805/868] userfaultfd/selftests: fix hugetlb area allocations Currently, userfaultfd selftest for hugetlb as run from run_vmtests.sh or any environment where there are 'just enough' hugetlb pages will always fail with: testing events (fork, remap, remove): ERROR: UFFDIO_COPY error: -12 (errno=12, line=616) The ENOMEM error code implies there are not enough hugetlb pages. However, there are free hugetlb pages but they are all reserved. There is a basic problem with the way the test allocates hugetlb pages which has existed since the test was originally written. Due to the way 'cleanup' was done between different phases of the test, this issue was masked until recently. The issue was uncovered by commit 8ba6e8640844 ("userfaultfd/selftests: reinitialize test context in each test"). For the hugetlb test, src and dst areas are allocated as PRIVATE mappings of a hugetlb file. This means that at mmap time, pages are reserved for the src and dst areas. At the start of event testing (and other tests) the src area is populated which results in allocation of huge pages to fill the area and consumption of reserves associated with the area. Then, a child is forked to fault in the dst area. Note that the dst area was allocated in the parent and hence the parent owns the reserves associated with the mapping. The child has normal access to the dst area, but can not use the reserves created/owned by the parent. Thus, if there are no other huge pages available allocation of a page for the dst by the child will fail. Fix by not creating reserves for the dst area. In this way the child can use free (non-reserved) pages. Also, MAP_PRIVATE of a file only makes sense if you are interested in the contents of the file before making a COW copy. The test does not do this. So, just use MAP_ANONYMOUS | MAP_HUGETLB to create an anonymous hugetlb mapping. There is no need to create a hugetlb file in the non-shared case. Link: https://lkml.kernel.org/r/20211217172919.7861-1-mike.kravetz@oracle.com Signed-off-by: Mike Kravetz Cc: Axel Rasmussen Cc: Peter Xu Cc: Andrea Arcangeli Cc: Mina Almasry Cc: Shuah Khan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/testing/selftests/vm/userfaultfd.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/tools/testing/selftests/vm/userfaultfd.c b/tools/testing/selftests/vm/userfaultfd.c index 8a09057d2f22..9354a5e0321c 100644 --- a/tools/testing/selftests/vm/userfaultfd.c +++ b/tools/testing/selftests/vm/userfaultfd.c @@ -87,7 +87,7 @@ static bool test_uffdio_minor = false; static bool map_shared; static int shm_fd; -static int huge_fd; +static int huge_fd = -1; /* only used for hugetlb_shared test */ static char *huge_fd_off0; static unsigned long long *count_verify; static int uffd = -1; @@ -223,6 +223,9 @@ static void noop_alias_mapping(__u64 *start, size_t len, unsigned long offset) static void hugetlb_release_pages(char *rel_area) { + if (huge_fd == -1) + return; + if (fallocate(huge_fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, rel_area == huge_fd_off0 ? 0 : nr_pages * page_size, nr_pages * page_size)) @@ -235,16 +238,17 @@ static void hugetlb_allocate_area(void **alloc_area) char **alloc_area_alias; *alloc_area = mmap(NULL, nr_pages * page_size, PROT_READ | PROT_WRITE, - (map_shared ? MAP_SHARED : MAP_PRIVATE) | - MAP_HUGETLB, - huge_fd, *alloc_area == area_src ? 0 : - nr_pages * page_size); + map_shared ? MAP_SHARED : + MAP_PRIVATE | MAP_HUGETLB | + (*alloc_area == area_src ? 0 : MAP_NORESERVE), + huge_fd, + *alloc_area == area_src ? 0 : nr_pages * page_size); if (*alloc_area == MAP_FAILED) err("mmap of hugetlbfs file failed"); if (map_shared) { area_alias = mmap(NULL, nr_pages * page_size, PROT_READ | PROT_WRITE, - MAP_SHARED | MAP_HUGETLB, + MAP_SHARED, huge_fd, *alloc_area == area_src ? 0 : nr_pages * page_size); if (area_alias == MAP_FAILED) From ebb3f994dd92f8fb4d70c7541091216c1e10cb71 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Thu, 30 Dec 2021 20:12:34 -0800 Subject: [PATCH 806/868] mm/damon/dbgfs: fix 'struct pid' leaks in 'dbgfs_target_ids_write()' DAMON debugfs interface increases the reference counts of 'struct pid's for targets from the 'target_ids' file write callback ('dbgfs_target_ids_write()'), but decreases the counts only in DAMON monitoring termination callback ('dbgfs_before_terminate()'). Therefore, when 'target_ids' file is repeatedly written without DAMON monitoring start/termination, the reference count is not decreased and therefore memory for the 'struct pid' cannot be freed. This commit fixes this issue by decreasing the reference counts when 'target_ids' is written. Link: https://lkml.kernel.org/r/20211229124029.23348-1-sj@kernel.org Fixes: 4bc05954d007 ("mm/damon: implement a debugfs-based user space interface") Signed-off-by: SeongJae Park Cc: [5.15+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/damon/dbgfs.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/mm/damon/dbgfs.c b/mm/damon/dbgfs.c index 4fbd729edc9e..ad65436756af 100644 --- a/mm/damon/dbgfs.c +++ b/mm/damon/dbgfs.c @@ -353,6 +353,7 @@ static ssize_t dbgfs_target_ids_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { struct damon_ctx *ctx = file->private_data; + struct damon_target *t, *next_t; bool id_is_pid = true; char *kbuf, *nrs; unsigned long *targets; @@ -397,8 +398,12 @@ static ssize_t dbgfs_target_ids_write(struct file *file, goto unlock_out; } - /* remove targets with previously-set primitive */ - damon_set_targets(ctx, NULL, 0); + /* remove previously set targets */ + damon_for_each_target_safe(t, next_t, ctx) { + if (targetid_is_pid(ctx)) + put_pid((struct pid *)t->id); + damon_destroy_target(t); + } /* Configure the context for the address space type */ if (id_is_pid) From 1b4e3f26f9f7553b260b8aed43967500961448a6 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Thu, 2 Dec 2021 15:06:14 +0000 Subject: [PATCH 807/868] mm: vmscan: Reduce throttling due to a failure to make progress Mike Galbraith, Alexey Avramov and Darrick Wong all reported similar problems due to reclaim throttling for excessive lengths of time. In Alexey's case, a memory hog that should go OOM quickly stalls for several minutes before stalling. In Mike and Darrick's cases, a small memcg environment stalled excessively even though the system had enough memory overall. Commit 69392a403f49 ("mm/vmscan: throttle reclaim when no progress is being made") introduced the problem although commit a19594ca4a8b ("mm/vmscan: increase the timeout if page reclaim is not making progress") made it worse. Systems at or near an OOM state that cannot be recovered must reach OOM quickly and memcg should kill tasks if a memcg is near OOM. To address this, only stall for the first zone in the zonelist, reduce the timeout to 1 tick for VMSCAN_THROTTLE_NOPROGRESS and only stall if the scan control nr_reclaimed is 0, kswapd is still active and there were excessive pages pending for writeback. If kswapd has stopped reclaiming due to excessive failures, do not stall at all so that OOM triggers relatively quickly. Similarly, if an LRU is simply congested, only lightly throttle similar to NOPROGRESS. Alexey's original case was the most straight forward for i in {1..3}; do tail /dev/zero; done On vanilla 5.16-rc1, this test stalled heavily, after the patch the test completes in a few seconds similar to 5.15. Alexey's second test case added watching a youtube video while tail runs 10 times. On 5.15, playback only jitters slightly, 5.16-rc1 stalls a lot with lots of frames missing and numerous audio glitches. With this patch applies, the video plays similarly to 5.15. [lkp@intel.com: Fix W=1 build warning] Link: https://lore.kernel.org/r/99e779783d6c7fce96448a3402061b9dc1b3b602.camel@gmx.de Link: https://lore.kernel.org/r/20211124011954.7cab9bb4@mail.inbox.lv Link: https://lore.kernel.org/r/20211022144651.19914-1-mgorman@techsingularity.net Link: https://lore.kernel.org/r/20211202150614.22440-1-mgorman@techsingularity.net Link: https://linux-regtracking.leemhuis.info/regzbot/regression/20211124011954.7cab9bb4@mail.inbox.lv/ Reported-and-tested-by: Alexey Avramov Reported-and-tested-by: Mike Galbraith Reported-and-tested-by: Darrick J. Wong Reported-by: kernel test robot Acked-by: Hugh Dickins Tracked-by: Thorsten Leemhuis Fixes: 69392a403f49 ("mm/vmscan: throttle reclaim when no progress is being made") Signed-off-by: Mel Gorman Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 1 + include/trace/events/vmscan.h | 4 ++- mm/vmscan.c | 64 ++++++++++++++++++++++++++++++----- 3 files changed, 59 insertions(+), 10 deletions(-) diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 58e744b78c2c..936dc0b6c226 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -277,6 +277,7 @@ enum vmscan_throttle_state { VMSCAN_THROTTLE_WRITEBACK, VMSCAN_THROTTLE_ISOLATED, VMSCAN_THROTTLE_NOPROGRESS, + VMSCAN_THROTTLE_CONGESTED, NR_VMSCAN_THROTTLE, }; diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h index f25a6149d3ba..ca2e9009a651 100644 --- a/include/trace/events/vmscan.h +++ b/include/trace/events/vmscan.h @@ -30,12 +30,14 @@ #define _VMSCAN_THROTTLE_WRITEBACK (1 << VMSCAN_THROTTLE_WRITEBACK) #define _VMSCAN_THROTTLE_ISOLATED (1 << VMSCAN_THROTTLE_ISOLATED) #define _VMSCAN_THROTTLE_NOPROGRESS (1 << VMSCAN_THROTTLE_NOPROGRESS) +#define _VMSCAN_THROTTLE_CONGESTED (1 << VMSCAN_THROTTLE_CONGESTED) #define show_throttle_flags(flags) \ (flags) ? __print_flags(flags, "|", \ {_VMSCAN_THROTTLE_WRITEBACK, "VMSCAN_THROTTLE_WRITEBACK"}, \ {_VMSCAN_THROTTLE_ISOLATED, "VMSCAN_THROTTLE_ISOLATED"}, \ - {_VMSCAN_THROTTLE_NOPROGRESS, "VMSCAN_THROTTLE_NOPROGRESS"} \ + {_VMSCAN_THROTTLE_NOPROGRESS, "VMSCAN_THROTTLE_NOPROGRESS"}, \ + {_VMSCAN_THROTTLE_CONGESTED, "VMSCAN_THROTTLE_CONGESTED"} \ ) : "VMSCAN_THROTTLE_NONE" diff --git a/mm/vmscan.c b/mm/vmscan.c index fb9584641ac7..4c4d5f6cd8a3 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1021,6 +1021,39 @@ static void handle_write_error(struct address_space *mapping, unlock_page(page); } +static bool skip_throttle_noprogress(pg_data_t *pgdat) +{ + int reclaimable = 0, write_pending = 0; + int i; + + /* + * If kswapd is disabled, reschedule if necessary but do not + * throttle as the system is likely near OOM. + */ + if (pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES) + return true; + + /* + * If there are a lot of dirty/writeback pages then do not + * throttle as throttling will occur when the pages cycle + * towards the end of the LRU if still under writeback. + */ + for (i = 0; i < MAX_NR_ZONES; i++) { + struct zone *zone = pgdat->node_zones + i; + + if (!populated_zone(zone)) + continue; + + reclaimable += zone_reclaimable_pages(zone); + write_pending += zone_page_state_snapshot(zone, + NR_ZONE_WRITE_PENDING); + } + if (2 * write_pending <= reclaimable) + return true; + + return false; +} + void reclaim_throttle(pg_data_t *pgdat, enum vmscan_throttle_state reason) { wait_queue_head_t *wqh = &pgdat->reclaim_wait[reason]; @@ -1056,8 +1089,16 @@ void reclaim_throttle(pg_data_t *pgdat, enum vmscan_throttle_state reason) } break; + case VMSCAN_THROTTLE_CONGESTED: + fallthrough; case VMSCAN_THROTTLE_NOPROGRESS: - timeout = HZ/2; + if (skip_throttle_noprogress(pgdat)) { + cond_resched(); + return; + } + + timeout = 1; + break; case VMSCAN_THROTTLE_ISOLATED: timeout = HZ/50; @@ -3321,7 +3362,7 @@ again: if (!current_is_kswapd() && current_may_throttle() && !sc->hibernation_mode && test_bit(LRUVEC_CONGESTED, &target_lruvec->flags)) - reclaim_throttle(pgdat, VMSCAN_THROTTLE_WRITEBACK); + reclaim_throttle(pgdat, VMSCAN_THROTTLE_CONGESTED); if (should_continue_reclaim(pgdat, sc->nr_reclaimed - nr_reclaimed, sc)) @@ -3386,16 +3427,16 @@ static void consider_reclaim_throttle(pg_data_t *pgdat, struct scan_control *sc) } /* - * Do not throttle kswapd on NOPROGRESS as it will throttle on - * VMSCAN_THROTTLE_WRITEBACK if there are too many pages under - * writeback and marked for immediate reclaim at the tail of - * the LRU. + * Do not throttle kswapd or cgroup reclaim on NOPROGRESS as it will + * throttle on VMSCAN_THROTTLE_WRITEBACK if there are too many pages + * under writeback and marked for immediate reclaim at the tail of the + * LRU. */ - if (current_is_kswapd()) + if (current_is_kswapd() || cgroup_reclaim(sc)) return; /* Throttle if making no progress at high prioities. */ - if (sc->priority < DEF_PRIORITY - 2) + if (sc->priority == 1 && !sc->nr_reclaimed) reclaim_throttle(pgdat, VMSCAN_THROTTLE_NOPROGRESS); } @@ -3415,6 +3456,7 @@ static void shrink_zones(struct zonelist *zonelist, struct scan_control *sc) unsigned long nr_soft_scanned; gfp_t orig_mask; pg_data_t *last_pgdat = NULL; + pg_data_t *first_pgdat = NULL; /* * If the number of buffer_heads in the machine exceeds the maximum @@ -3478,14 +3520,18 @@ static void shrink_zones(struct zonelist *zonelist, struct scan_control *sc) /* need some check for avoid more shrink_zone() */ } + if (!first_pgdat) + first_pgdat = zone->zone_pgdat; + /* See comment about same check for global reclaim above */ if (zone->zone_pgdat == last_pgdat) continue; last_pgdat = zone->zone_pgdat; shrink_node(zone->zone_pgdat, sc); - consider_reclaim_throttle(zone->zone_pgdat, sc); } + consider_reclaim_throttle(first_pgdat, sc); + /* * Restore to original mask to avoid the impact on the caller if we * promoted it to __GFP_HIGHMEM. From 8008293888188c3923f5bd8a69370dae25ed14e5 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Fri, 31 Dec 2021 13:10:09 -0800 Subject: [PATCH 808/868] mm: vmscan: reduce throttling due to a failure to make progress -fix Hugh Dickins reported the following My tmpfs swapping load (tweaked to use huge pages more heavily than in real life) is far from being a realistic load: but it was notably slowed down by your throttling mods in 5.16-rc, and this patch makes it well again - thanks. But: it very quickly hit NULL pointer until I changed that last line to if (first_pgdat) consider_reclaim_throttle(first_pgdat, sc); The likely issue is that huge pages are a major component of the test workload. When this is the case, first_pgdat may never get set if compaction is ready to continue due to this check if (IS_ENABLED(CONFIG_COMPACTION) && sc->order > PAGE_ALLOC_COSTLY_ORDER && compaction_ready(zone, sc)) { sc->compaction_ready = true; continue; } If this was true for every zone in the zonelist, first_pgdat would never get set resulting in a NULL pointer exception. Link: https://lkml.kernel.org/r/20211209095453.GM3366@techsingularity.net Fixes: 1b4e3f26f9f75 ("mm: vmscan: Reduce throttling due to a failure to make progress") Signed-off-by: Mel Gorman Reported-by: Hugh Dickins Cc: Michal Hocko Cc: Vlastimil Babka Cc: Rik van Riel Cc: Mike Galbraith Cc: Darrick J. Wong Cc: Shakeel Butt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/vmscan.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mm/vmscan.c b/mm/vmscan.c index 4c4d5f6cd8a3..700434db5735 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -3530,7 +3530,8 @@ static void shrink_zones(struct zonelist *zonelist, struct scan_control *sc) shrink_node(zone->zone_pgdat, sc); } - consider_reclaim_throttle(first_pgdat, sc); + if (first_pgdat) + consider_reclaim_throttle(first_pgdat, sc); /* * Restore to original mask to avoid the impact on the caller if we From 5e75d0b215b868337e7a193f28a543ec00e858b1 Mon Sep 17 00:00:00 2001 From: Jianguo Wu Date: Fri, 31 Dec 2021 10:01:08 +0800 Subject: [PATCH 809/868] selftests: net: udpgro_fwd.sh: explicitly checking the available ping feature As Paolo pointed out, the result of ping IPv6 address depends on the running distro. So explicitly checking the available ping feature, as e.g. do the bareudp.sh self-tests. Fixes: 8b3170e07539 ("selftests: net: using ping6 for IPv6 in udpgro_fwd.sh") Signed-off-by: Jianguo Wu Link: https://lore.kernel.org/r/825ee22b-4245-dbf7-d2f7-a230770d6e21@163.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/udpgro_fwd.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/udpgro_fwd.sh b/tools/testing/selftests/net/udpgro_fwd.sh index 3ea73013d956..6f05e06f6761 100755 --- a/tools/testing/selftests/net/udpgro_fwd.sh +++ b/tools/testing/selftests/net/udpgro_fwd.sh @@ -193,7 +193,8 @@ for family in 4 6; do SUFFIX="64 nodad" VXDEV=vxlan6 IPT=ip6tables - PING="ping6" + # Use ping6 on systems where ping doesn't handle IPv6 + ping -w 1 -c 1 ::1 > /dev/null 2>&1 || PING="ping6" fi echo "IPv$family" From d6d86830705f173fca6087a3e67ceaf68db80523 Mon Sep 17 00:00:00 2001 From: Haimin Zhang Date: Fri, 31 Dec 2021 10:35:23 +0800 Subject: [PATCH 810/868] net ticp:fix a kernel-infoleak in __tipc_sendmsg() struct tipc_socket_addr.ref has a 4-byte hole,and __tipc_getname() currently copying it to user space,causing kernel-infoleak. BUG: KMSAN: kernel-infoleak in instrument_copy_to_user include/linux/instrumented.h:121 [inline] BUG: KMSAN: kernel-infoleak in instrument_copy_to_user include/linux/instrumented.h:121 [inline] lib/usercopy.c:33 BUG: KMSAN: kernel-infoleak in _copy_to_user+0x1c9/0x270 lib/usercopy.c:33 lib/usercopy.c:33 instrument_copy_to_user include/linux/instrumented.h:121 [inline] instrument_copy_to_user include/linux/instrumented.h:121 [inline] lib/usercopy.c:33 _copy_to_user+0x1c9/0x270 lib/usercopy.c:33 lib/usercopy.c:33 copy_to_user include/linux/uaccess.h:209 [inline] copy_to_user include/linux/uaccess.h:209 [inline] net/socket.c:287 move_addr_to_user+0x3f6/0x600 net/socket.c:287 net/socket.c:287 __sys_getpeername+0x470/0x6b0 net/socket.c:1987 net/socket.c:1987 __do_sys_getpeername net/socket.c:1997 [inline] __se_sys_getpeername net/socket.c:1994 [inline] __do_sys_getpeername net/socket.c:1997 [inline] net/socket.c:1994 __se_sys_getpeername net/socket.c:1994 [inline] net/socket.c:1994 __x64_sys_getpeername+0xda/0x120 net/socket.c:1994 net/socket.c:1994 do_syscall_x64 arch/x86/entry/common.c:51 [inline] do_syscall_x64 arch/x86/entry/common.c:51 [inline] arch/x86/entry/common.c:82 do_syscall_64+0x54/0xd0 arch/x86/entry/common.c:82 arch/x86/entry/common.c:82 entry_SYSCALL_64_after_hwframe+0x44/0xae Uninit was stored to memory at: tipc_getname+0x575/0x5e0 net/tipc/socket.c:757 net/tipc/socket.c:757 __sys_getpeername+0x3b3/0x6b0 net/socket.c:1984 net/socket.c:1984 __do_sys_getpeername net/socket.c:1997 [inline] __se_sys_getpeername net/socket.c:1994 [inline] __do_sys_getpeername net/socket.c:1997 [inline] net/socket.c:1994 __se_sys_getpeername net/socket.c:1994 [inline] net/socket.c:1994 __x64_sys_getpeername+0xda/0x120 net/socket.c:1994 net/socket.c:1994 do_syscall_x64 arch/x86/entry/common.c:51 [inline] do_syscall_x64 arch/x86/entry/common.c:51 [inline] arch/x86/entry/common.c:82 do_syscall_64+0x54/0xd0 arch/x86/entry/common.c:82 arch/x86/entry/common.c:82 entry_SYSCALL_64_after_hwframe+0x44/0xae Uninit was stored to memory at: msg_set_word net/tipc/msg.h:212 [inline] msg_set_destport net/tipc/msg.h:619 [inline] msg_set_word net/tipc/msg.h:212 [inline] net/tipc/socket.c:1486 msg_set_destport net/tipc/msg.h:619 [inline] net/tipc/socket.c:1486 __tipc_sendmsg+0x44fa/0x5890 net/tipc/socket.c:1486 net/tipc/socket.c:1486 tipc_sendmsg+0xeb/0x140 net/tipc/socket.c:1402 net/tipc/socket.c:1402 sock_sendmsg_nosec net/socket.c:704 [inline] sock_sendmsg net/socket.c:724 [inline] sock_sendmsg_nosec net/socket.c:704 [inline] net/socket.c:2409 sock_sendmsg net/socket.c:724 [inline] net/socket.c:2409 ____sys_sendmsg+0xe11/0x12c0 net/socket.c:2409 net/socket.c:2409 ___sys_sendmsg net/socket.c:2463 [inline] ___sys_sendmsg net/socket.c:2463 [inline] net/socket.c:2492 __sys_sendmsg+0x704/0x840 net/socket.c:2492 net/socket.c:2492 __do_sys_sendmsg net/socket.c:2501 [inline] __se_sys_sendmsg net/socket.c:2499 [inline] __do_sys_sendmsg net/socket.c:2501 [inline] net/socket.c:2499 __se_sys_sendmsg net/socket.c:2499 [inline] net/socket.c:2499 __x64_sys_sendmsg+0xe2/0x120 net/socket.c:2499 net/socket.c:2499 do_syscall_x64 arch/x86/entry/common.c:51 [inline] do_syscall_x64 arch/x86/entry/common.c:51 [inline] arch/x86/entry/common.c:82 do_syscall_64+0x54/0xd0 arch/x86/entry/common.c:82 arch/x86/entry/common.c:82 entry_SYSCALL_64_after_hwframe+0x44/0xae Local variable skaddr created at: __tipc_sendmsg+0x2d0/0x5890 net/tipc/socket.c:1419 net/tipc/socket.c:1419 tipc_sendmsg+0xeb/0x140 net/tipc/socket.c:1402 net/tipc/socket.c:1402 Bytes 4-7 of 16 are uninitialized Memory access of size 16 starts at ffff888113753e00 Data copied to user address 0000000020000280 Reported-by: syzbot+cdbd40e0c3ca02cae3b7@syzkaller.appspotmail.com Signed-off-by: Haimin Zhang Acked-by: Jon Maloy Link: https://lore.kernel.org/r/1640918123-14547-1-git-send-email-tcs.kernel@gmail.com Signed-off-by: Jakub Kicinski --- net/tipc/socket.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index ad570c2450be..3e63c83e641c 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -1461,6 +1461,8 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen) msg_set_syn(hdr, 1); } + memset(&skaddr, 0, sizeof(skaddr)); + /* Determine destination */ if (atype == TIPC_SERVICE_RANGE) { return tipc_sendmcast(sock, ua, m, dlen, timeout); From 938f2e0b57ffe8a6df71e1e177b2978b1b33fe5e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20L=C3=BCssing?= Date: Sat, 1 Jan 2022 06:27:13 +0100 Subject: [PATCH 811/868] batman-adv: mcast: don't send link-local multicast to mcast routers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The addition of routable multicast TX handling introduced a bug/regression for packets with a link-local multicast destination: These packets would be sent to all batman-adv nodes with a multicast router and to all batman-adv nodes with an old version without multicast router detection. This even disregards the batman-adv multicast fanout setting, which can potentially lead to an unwanted, high number of unicast transmissions or even congestion. Fixing this by avoiding to send link-local multicast packets to nodes in the multicast router list. Fixes: 11d458c1cb9b ("batman-adv: mcast: apply optimizations for routable packets, too") Signed-off-by: Linus Lüssing Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- net/batman-adv/multicast.c | 15 ++++++++++----- net/batman-adv/multicast.h | 10 ++++++---- net/batman-adv/soft-interface.c | 7 +++++-- 3 files changed, 21 insertions(+), 11 deletions(-) diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c index 433901dcf0c3..f4004cf0ff6f 100644 --- a/net/batman-adv/multicast.c +++ b/net/batman-adv/multicast.c @@ -1339,6 +1339,7 @@ batadv_mcast_forw_rtr_node_get(struct batadv_priv *bat_priv, * @bat_priv: the bat priv with all the soft interface information * @skb: The multicast packet to check * @orig: an originator to be set to forward the skb to + * @is_routable: stores whether the destination is routable * * Return: the forwarding mode as enum batadv_forw_mode and in case of * BATADV_FORW_SINGLE set the orig to the single originator the skb @@ -1346,17 +1347,16 @@ batadv_mcast_forw_rtr_node_get(struct batadv_priv *bat_priv, */ enum batadv_forw_mode batadv_mcast_forw_mode(struct batadv_priv *bat_priv, struct sk_buff *skb, - struct batadv_orig_node **orig) + struct batadv_orig_node **orig, int *is_routable) { int ret, tt_count, ip_count, unsnoop_count, total_count; bool is_unsnoopable = false; unsigned int mcast_fanout; struct ethhdr *ethhdr; - int is_routable = 0; int rtr_count = 0; ret = batadv_mcast_forw_mode_check(bat_priv, skb, &is_unsnoopable, - &is_routable); + is_routable); if (ret == -ENOMEM) return BATADV_FORW_NONE; else if (ret < 0) @@ -1369,7 +1369,7 @@ batadv_mcast_forw_mode(struct batadv_priv *bat_priv, struct sk_buff *skb, ip_count = batadv_mcast_forw_want_all_ip_count(bat_priv, ethhdr); unsnoop_count = !is_unsnoopable ? 0 : atomic_read(&bat_priv->mcast.num_want_all_unsnoopables); - rtr_count = batadv_mcast_forw_rtr_count(bat_priv, is_routable); + rtr_count = batadv_mcast_forw_rtr_count(bat_priv, *is_routable); total_count = tt_count + ip_count + unsnoop_count + rtr_count; @@ -1689,6 +1689,7 @@ batadv_mcast_forw_want_rtr(struct batadv_priv *bat_priv, * @bat_priv: the bat priv with all the soft interface information * @skb: the multicast packet to transmit * @vid: the vlan identifier + * @is_routable: stores whether the destination is routable * * Sends copies of a frame with multicast destination to any node that signaled * interest in it, that is either via the translation table or the according @@ -1701,7 +1702,7 @@ batadv_mcast_forw_want_rtr(struct batadv_priv *bat_priv, * is neither IPv4 nor IPv6. NET_XMIT_SUCCESS otherwise. */ int batadv_mcast_forw_send(struct batadv_priv *bat_priv, struct sk_buff *skb, - unsigned short vid) + unsigned short vid, int is_routable) { int ret; @@ -1717,12 +1718,16 @@ int batadv_mcast_forw_send(struct batadv_priv *bat_priv, struct sk_buff *skb, return ret; } + if (!is_routable) + goto skip_mc_router; + ret = batadv_mcast_forw_want_rtr(bat_priv, skb, vid); if (ret != NET_XMIT_SUCCESS) { kfree_skb(skb); return ret; } +skip_mc_router: consume_skb(skb); return ret; } diff --git a/net/batman-adv/multicast.h b/net/batman-adv/multicast.h index 9fee5da08311..8aec818d0bf6 100644 --- a/net/batman-adv/multicast.h +++ b/net/batman-adv/multicast.h @@ -43,7 +43,8 @@ enum batadv_forw_mode { enum batadv_forw_mode batadv_mcast_forw_mode(struct batadv_priv *bat_priv, struct sk_buff *skb, - struct batadv_orig_node **mcast_single_orig); + struct batadv_orig_node **mcast_single_orig, + int *is_routable); int batadv_mcast_forw_send_orig(struct batadv_priv *bat_priv, struct sk_buff *skb, @@ -51,7 +52,7 @@ int batadv_mcast_forw_send_orig(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node); int batadv_mcast_forw_send(struct batadv_priv *bat_priv, struct sk_buff *skb, - unsigned short vid); + unsigned short vid, int is_routable); void batadv_mcast_init(struct batadv_priv *bat_priv); @@ -68,7 +69,8 @@ void batadv_mcast_purge_orig(struct batadv_orig_node *orig_node); static inline enum batadv_forw_mode batadv_mcast_forw_mode(struct batadv_priv *bat_priv, struct sk_buff *skb, - struct batadv_orig_node **mcast_single_orig) + struct batadv_orig_node **mcast_single_orig, + int *is_routable) { return BATADV_FORW_ALL; } @@ -85,7 +87,7 @@ batadv_mcast_forw_send_orig(struct batadv_priv *bat_priv, static inline int batadv_mcast_forw_send(struct batadv_priv *bat_priv, struct sk_buff *skb, - unsigned short vid) + unsigned short vid, int is_routable) { kfree_skb(skb); return NET_XMIT_DROP; diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index 7ee09337fc40..2dbbe6c19609 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -198,6 +198,7 @@ static netdev_tx_t batadv_interface_tx(struct sk_buff *skb, int gw_mode; enum batadv_forw_mode forw_mode = BATADV_FORW_SINGLE; struct batadv_orig_node *mcast_single_orig = NULL; + int mcast_is_routable = 0; int network_offset = ETH_HLEN; __be16 proto; @@ -300,7 +301,8 @@ static netdev_tx_t batadv_interface_tx(struct sk_buff *skb, send: if (do_bcast && !is_broadcast_ether_addr(ethhdr->h_dest)) { forw_mode = batadv_mcast_forw_mode(bat_priv, skb, - &mcast_single_orig); + &mcast_single_orig, + &mcast_is_routable); if (forw_mode == BATADV_FORW_NONE) goto dropped; @@ -359,7 +361,8 @@ send: ret = batadv_mcast_forw_send_orig(bat_priv, skb, vid, mcast_single_orig); } else if (forw_mode == BATADV_FORW_SOME) { - ret = batadv_mcast_forw_send(bat_priv, skb, vid); + ret = batadv_mcast_forw_send(bat_priv, skb, vid, + mcast_is_routable); } else { if (batadv_dat_snoop_outgoing_arp_request(bat_priv, skb)) From ae81de737885820616f9c67c2e7935998b523d58 Mon Sep 17 00:00:00 2001 From: Gagan Kumar Date: Sat, 1 Jan 2022 11:11:25 +0530 Subject: [PATCH 812/868] mctp: Remove only static neighbour on RTM_DELNEIGH Add neighbour source flag in mctp_neigh_remove(...) to allow removal of only static neighbours. This should be a no-op change and might be useful later when mctp can have MCTP_NEIGH_DISCOVER neighbours. Signed-off-by: Gagan Kumar Signed-off-by: David S. Miller --- net/mctp/neigh.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/net/mctp/neigh.c b/net/mctp/neigh.c index 5cc042121493..6ad3e33bd4d4 100644 --- a/net/mctp/neigh.c +++ b/net/mctp/neigh.c @@ -85,8 +85,8 @@ void mctp_neigh_remove_dev(struct mctp_dev *mdev) mutex_unlock(&net->mctp.neigh_lock); } -// TODO: add a "source" flag so netlink can only delete static neighbours? -static int mctp_neigh_remove(struct mctp_dev *mdev, mctp_eid_t eid) +static int mctp_neigh_remove(struct mctp_dev *mdev, mctp_eid_t eid, + enum mctp_neigh_source source) { struct net *net = dev_net(mdev->dev); struct mctp_neigh *neigh, *tmp; @@ -94,7 +94,8 @@ static int mctp_neigh_remove(struct mctp_dev *mdev, mctp_eid_t eid) mutex_lock(&net->mctp.neigh_lock); list_for_each_entry_safe(neigh, tmp, &net->mctp.neighbours, list) { - if (neigh->dev == mdev && neigh->eid == eid) { + if (neigh->dev == mdev && neigh->eid == eid && + neigh->source == source) { list_del_rcu(&neigh->list); /* TODO: immediate RTM_DELNEIGH */ call_rcu(&neigh->rcu, __mctp_neigh_free); @@ -202,7 +203,7 @@ static int mctp_rtm_delneigh(struct sk_buff *skb, struct nlmsghdr *nlh, if (!mdev) return -ENODEV; - return mctp_neigh_remove(mdev, eid); + return mctp_neigh_remove(mdev, eid, MCTP_NEIGH_STATIC); } static int mctp_fill_neigh(struct sk_buff *skb, u32 portid, u32 seq, int event, From c255a34e02efb1393d23ffb205ba1a11320aeffb Mon Sep 17 00:00:00 2001 From: Arthur Kiyanovski Date: Sun, 2 Jan 2022 07:37:26 +0000 Subject: [PATCH 813/868] net: ena: Fix undefined state when tx request id is out of bounds ena_com_tx_comp_req_id_get() checks the req_id of a received completion, and if it is out of bounds returns -EINVAL. This is a sign that something is wrong with the device and it needs to be reset. The current code does not reset the device in this case, which leaves the driver in an undefined state, where this completion is not properly handled. This commit adds a call to handle_invalid_req_id() in ena_clean_tx_irq() and ena_clean_xdp_irq() which resets the device to fix the issue. This commit also removes unnecessary request id checks from validate_tx_req_id() and validate_xdp_req_id(). This check is unneeded because it was already performed in ena_com_tx_comp_req_id_get(), which is called right before these functions. Fixes: 548c4940b9f1 ("net: ena: Implement XDP_TX action") Signed-off-by: Shay Agroskin Signed-off-by: Arthur Kiyanovski Signed-off-by: David S. Miller --- drivers/net/ethernet/amazon/ena/ena_netdev.c | 34 ++++++++++++-------- 1 file changed, 20 insertions(+), 14 deletions(-) diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index 7d5d885d85d5..2274063e34cc 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -1288,26 +1288,22 @@ static int handle_invalid_req_id(struct ena_ring *ring, u16 req_id, static int validate_tx_req_id(struct ena_ring *tx_ring, u16 req_id) { - struct ena_tx_buffer *tx_info = NULL; + struct ena_tx_buffer *tx_info; - if (likely(req_id < tx_ring->ring_size)) { - tx_info = &tx_ring->tx_buffer_info[req_id]; - if (likely(tx_info->skb)) - return 0; - } + tx_info = &tx_ring->tx_buffer_info[req_id]; + if (likely(tx_info->skb)) + return 0; return handle_invalid_req_id(tx_ring, req_id, tx_info, false); } static int validate_xdp_req_id(struct ena_ring *xdp_ring, u16 req_id) { - struct ena_tx_buffer *tx_info = NULL; + struct ena_tx_buffer *tx_info; - if (likely(req_id < xdp_ring->ring_size)) { - tx_info = &xdp_ring->tx_buffer_info[req_id]; - if (likely(tx_info->xdpf)) - return 0; - } + tx_info = &xdp_ring->tx_buffer_info[req_id]; + if (likely(tx_info->xdpf)) + return 0; return handle_invalid_req_id(xdp_ring, req_id, tx_info, true); } @@ -1332,9 +1328,14 @@ static int ena_clean_tx_irq(struct ena_ring *tx_ring, u32 budget) rc = ena_com_tx_comp_req_id_get(tx_ring->ena_com_io_cq, &req_id); - if (rc) + if (rc) { + if (unlikely(rc == -EINVAL)) + handle_invalid_req_id(tx_ring, req_id, NULL, + false); break; + } + /* validate that the request id points to a valid skb */ rc = validate_tx_req_id(tx_ring, req_id); if (rc) break; @@ -1896,9 +1897,14 @@ static int ena_clean_xdp_irq(struct ena_ring *xdp_ring, u32 budget) rc = ena_com_tx_comp_req_id_get(xdp_ring->ena_com_io_cq, &req_id); - if (rc) + if (rc) { + if (unlikely(rc == -EINVAL)) + handle_invalid_req_id(xdp_ring, req_id, NULL, + true); break; + } + /* validate that the request id points to a valid xdp_frame */ rc = validate_xdp_req_id(xdp_ring, req_id); if (rc) break; From cb3d4f98f0b26eafa0b913ac3716e4714254a747 Mon Sep 17 00:00:00 2001 From: Arthur Kiyanovski Date: Sun, 2 Jan 2022 07:37:27 +0000 Subject: [PATCH 814/868] net: ena: Fix wrong rx request id by resetting device A wrong request id received from the device is a sign that something is wrong with it, therefore trigger a device reset. Also add some debug info to the "Page is NULL" print to make it easier to debug. Fixes: 1738cd3ed342 ("net: ena: Add a driver for Amazon Elastic Network Adapters (ENA)") Signed-off-by: Arthur Kiyanovski Signed-off-by: David S. Miller --- drivers/net/ethernet/amazon/ena/ena_netdev.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index 2274063e34cc..52a8c60b7e29 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -1428,6 +1428,7 @@ static struct sk_buff *ena_rx_skb(struct ena_ring *rx_ring, u16 *next_to_clean) { struct ena_rx_buffer *rx_info; + struct ena_adapter *adapter; u16 len, req_id, buf = 0; struct sk_buff *skb; void *page_addr; @@ -1440,8 +1441,14 @@ static struct sk_buff *ena_rx_skb(struct ena_ring *rx_ring, rx_info = &rx_ring->rx_buffer_info[req_id]; if (unlikely(!rx_info->page)) { - netif_err(rx_ring->adapter, rx_err, rx_ring->netdev, - "Page is NULL\n"); + adapter = rx_ring->adapter; + netif_err(adapter, rx_err, rx_ring->netdev, + "Page is NULL. qid %u req_id %u\n", rx_ring->qid, req_id); + ena_increase_stat(&rx_ring->rx_stats.bad_req_id, 1, &rx_ring->syncp); + adapter->reset_reason = ENA_REGS_RESET_INV_RX_REQ_ID; + /* Make sure reset reason is set before triggering the reset */ + smp_mb__before_atomic(); + set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags); return NULL; } From 5055dc0348b8b7c168e3296044bccd724e1ae6cd Mon Sep 17 00:00:00 2001 From: Arthur Kiyanovski Date: Sun, 2 Jan 2022 07:37:28 +0000 Subject: [PATCH 815/868] net: ena: Fix error handling when calculating max IO queues number The role of ena_calc_max_io_queue_num() is to return the number of queues supported by the device, which means the return value should be >=0. The function that calls ena_calc_max_io_queue_num(), checks the return value. If it is 0, it means the device reported it supports 0 IO queues. This case is considered an error and is handled by the calling function accordingly. However the current implementation of ena_calc_max_io_queue_num() is wrong, since when it detects the device supports 0 IO queues, it returns -EFAULT. In such a case the calling function doesn't detect the error, and therefore doesn't handle it. This commit changes ena_calc_max_io_queue_num() to return 0 in case the device reported it supports 0 queues, allowing the calling function to properly handle the error case. Fixes: 736ce3f414cc ("net: ena: make ethtool -l show correct max number of queues") Signed-off-by: Shay Agroskin Signed-off-by: Arthur Kiyanovski Signed-off-by: David S. Miller --- drivers/net/ethernet/amazon/ena/ena_netdev.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index 52a8c60b7e29..c72f0c7ff4aa 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -4026,10 +4026,6 @@ static u32 ena_calc_max_io_queue_num(struct pci_dev *pdev, max_num_io_queues = min_t(u32, max_num_io_queues, io_tx_cq_num); /* 1 IRQ for mgmnt and 1 IRQs for each IO direction */ max_num_io_queues = min_t(u32, max_num_io_queues, pci_msix_vec_count(pdev) - 1); - if (unlikely(!max_num_io_queues)) { - dev_err(&pdev->dev, "The device doesn't have io queues\n"); - return -EFAULT; - } return max_num_io_queues; } From f9d31c4cf4c11ff10317f038b9c6f7c3bda6cdd4 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Fri, 31 Dec 2021 18:37:37 -0500 Subject: [PATCH 816/868] sctp: hold endpoint before calling cb in sctp_transport_lookup_process The same fix in commit 5ec7d18d1813 ("sctp: use call_rcu to free endpoint") is also needed for dumping one asoc and sock after the lookup. Fixes: 86fdb3448cc1 ("sctp: ensure ep is not destroyed before doing the dump") Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/sctp/sctp.h | 3 +-- net/sctp/diag.c | 52 +++++++++++++++++++---------------------- net/sctp/socket.c | 22 +++++++++++------ 3 files changed, 40 insertions(+), 37 deletions(-) diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index d314a180ab93..3ae61ce2eabd 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -112,8 +112,7 @@ struct sctp_transport *sctp_transport_get_next(struct net *net, struct rhashtable_iter *iter); struct sctp_transport *sctp_transport_get_idx(struct net *net, struct rhashtable_iter *iter, int pos); -int sctp_transport_lookup_process(int (*cb)(struct sctp_transport *, void *), - struct net *net, +int sctp_transport_lookup_process(sctp_callback_t cb, struct net *net, const union sctp_addr *laddr, const union sctp_addr *paddr, void *p); int sctp_transport_traverse_process(sctp_callback_t cb, sctp_callback_t cb_done, diff --git a/net/sctp/diag.c b/net/sctp/diag.c index a7d623171501..034e2c74497d 100644 --- a/net/sctp/diag.c +++ b/net/sctp/diag.c @@ -245,48 +245,44 @@ static size_t inet_assoc_attr_size(struct sctp_association *asoc) + 64; } -static int sctp_tsp_dump_one(struct sctp_transport *tsp, void *p) +static int sctp_sock_dump_one(struct sctp_endpoint *ep, struct sctp_transport *tsp, void *p) { struct sctp_association *assoc = tsp->asoc; - struct sock *sk = tsp->asoc->base.sk; struct sctp_comm_param *commp = p; - struct sk_buff *in_skb = commp->skb; + struct sock *sk = ep->base.sk; const struct inet_diag_req_v2 *req = commp->r; - const struct nlmsghdr *nlh = commp->nlh; - struct net *net = sock_net(in_skb->sk); + struct sk_buff *skb = commp->skb; struct sk_buff *rep; int err; err = sock_diag_check_cookie(sk, req->id.idiag_cookie); if (err) - goto out; + return err; - err = -ENOMEM; rep = nlmsg_new(inet_assoc_attr_size(assoc), GFP_KERNEL); if (!rep) - goto out; + return -ENOMEM; lock_sock(sk); - if (sk != assoc->base.sk) { - release_sock(sk); - sk = assoc->base.sk; - lock_sock(sk); - } - err = inet_sctp_diag_fill(sk, assoc, rep, req, - sk_user_ns(NETLINK_CB(in_skb).sk), - NETLINK_CB(in_skb).portid, - nlh->nlmsg_seq, 0, nlh, - commp->net_admin); - release_sock(sk); - if (err < 0) { - WARN_ON(err == -EMSGSIZE); - kfree_skb(rep); + if (ep != assoc->ep) { + err = -EAGAIN; goto out; } - err = nlmsg_unicast(net->diag_nlsk, rep, NETLINK_CB(in_skb).portid); + err = inet_sctp_diag_fill(sk, assoc, rep, req, sk_user_ns(NETLINK_CB(skb).sk), + NETLINK_CB(skb).portid, commp->nlh->nlmsg_seq, 0, + commp->nlh, commp->net_admin); + if (err < 0) { + WARN_ON(err == -EMSGSIZE); + goto out; + } + release_sock(sk); + + return nlmsg_unicast(sock_net(skb->sk)->diag_nlsk, rep, NETLINK_CB(skb).portid); out: + release_sock(sk); + kfree_skb(rep); return err; } @@ -429,15 +425,15 @@ static void sctp_diag_get_info(struct sock *sk, struct inet_diag_msg *r, static int sctp_diag_dump_one(struct netlink_callback *cb, const struct inet_diag_req_v2 *req) { - struct sk_buff *in_skb = cb->skb; - struct net *net = sock_net(in_skb->sk); + struct sk_buff *skb = cb->skb; + struct net *net = sock_net(skb->sk); const struct nlmsghdr *nlh = cb->nlh; union sctp_addr laddr, paddr; struct sctp_comm_param commp = { - .skb = in_skb, + .skb = skb, .r = req, .nlh = nlh, - .net_admin = netlink_net_capable(in_skb, CAP_NET_ADMIN), + .net_admin = netlink_net_capable(skb, CAP_NET_ADMIN), }; if (req->sdiag_family == AF_INET) { @@ -460,7 +456,7 @@ static int sctp_diag_dump_one(struct netlink_callback *cb, paddr.v6.sin6_family = AF_INET6; } - return sctp_transport_lookup_process(sctp_tsp_dump_one, + return sctp_transport_lookup_process(sctp_sock_dump_one, net, &laddr, &paddr, &commp); } diff --git a/net/sctp/socket.c b/net/sctp/socket.c index ad5028a07b18..da08671a3f80 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -5317,23 +5317,31 @@ int sctp_for_each_endpoint(int (*cb)(struct sctp_endpoint *, void *), } EXPORT_SYMBOL_GPL(sctp_for_each_endpoint); -int sctp_transport_lookup_process(int (*cb)(struct sctp_transport *, void *), - struct net *net, +int sctp_transport_lookup_process(sctp_callback_t cb, struct net *net, const union sctp_addr *laddr, const union sctp_addr *paddr, void *p) { struct sctp_transport *transport; - int err; + struct sctp_endpoint *ep; + int err = -ENOENT; rcu_read_lock(); transport = sctp_addrs_lookup_transport(net, laddr, paddr); + if (!transport) { + rcu_read_unlock(); + return err; + } + ep = transport->asoc->ep; + if (!sctp_endpoint_hold(ep)) { /* asoc can be peeled off */ + sctp_transport_put(transport); + rcu_read_unlock(); + return err; + } rcu_read_unlock(); - if (!transport) - return -ENOENT; - err = cb(transport, p); + err = cb(ep, transport, p); + sctp_endpoint_put(ep); sctp_transport_put(transport); - return err; } EXPORT_SYMBOL_GPL(sctp_transport_lookup_process); From e0257a01d6689c273a019756ed5e13911cc1bfed Mon Sep 17 00:00:00 2001 From: John Garry Date: Wed, 22 Dec 2021 00:11:30 +0800 Subject: [PATCH 817/868] perf pmu: Fix alias events list Commit 0e0ae8742207c3b4 ("perf list: Display hybrid PMU events with cpu type") changes the event list for uncore PMUs or arm64 heterogeneous CPU systems, such that duplicate aliases are incorrectly listed per PMU (which they should not be), like: # perf list ... unc_cbo_cache_lookup.any_es [Unit: uncore_cbox L3 Lookup any request that access cache and found line in E or S-state] unc_cbo_cache_lookup.any_es [Unit: uncore_cbox L3 Lookup any request that access cache and found line in E or S-state] unc_cbo_cache_lookup.any_i [Unit: uncore_cbox L3 Lookup any request that access cache and found line in I-state] unc_cbo_cache_lookup.any_i [Unit: uncore_cbox L3 Lookup any request that access cache and found line in I-state] ... Notice how the events are listed twice. The named commit changed how we remove duplicate events, in that events for different PMUs are not treated as duplicates. I suppose this is to handle how "Each hybrid pmu event has been assigned with a pmu name". Fix PMU alias listing by restoring behaviour to remove duplicates for non-hybrid PMUs. Fixes: 0e0ae8742207c3b4 ("perf list: Display hybrid PMU events with cpu type") Signed-off-by: John Garry Tested-by: Zhengjun Xing Cc: Alexander Shishkin Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Kan Liang Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: https://lore.kernel.org/r/1640103090-140490-1-git-send-email-john.garry@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/pmu.c | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 6ae58406f4fc..8dfbba15aeb8 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -1659,6 +1659,21 @@ bool is_pmu_core(const char *name) return !strcmp(name, "cpu") || is_arm_pmu_core(name); } +static bool pmu_alias_is_duplicate(struct sevent *alias_a, + struct sevent *alias_b) +{ + /* Different names -> never duplicates */ + if (strcmp(alias_a->name, alias_b->name)) + return false; + + /* Don't remove duplicates for hybrid PMUs */ + if (perf_pmu__is_hybrid(alias_a->pmu) && + perf_pmu__is_hybrid(alias_b->pmu)) + return false; + + return true; +} + void print_pmu_events(const char *event_glob, bool name_only, bool quiet_flag, bool long_desc, bool details_flag, bool deprecated, const char *pmu_name) @@ -1744,12 +1759,8 @@ void print_pmu_events(const char *event_glob, bool name_only, bool quiet_flag, qsort(aliases, len, sizeof(struct sevent), cmp_sevent); for (j = 0; j < len; j++) { /* Skip duplicates */ - if (j > 0 && !strcmp(aliases[j].name, aliases[j - 1].name)) { - if (!aliases[j].pmu || !aliases[j - 1].pmu || - !strcmp(aliases[j].pmu, aliases[j - 1].pmu)) { - continue; - } - } + if (j > 0 && pmu_alias_is_duplicate(&aliases[j], &aliases[j - 1])) + continue; if (name_only) { printf("%s ", aliases[j].name); From 64f18d2d043015b3f835ce4c9f3beb97cfd19b6e Mon Sep 17 00:00:00 2001 From: yaowenbin Date: Wed, 29 Dec 2021 16:55:19 +0800 Subject: [PATCH 818/868] perf top: Fix TUI exit screen refresh race condition When the following command is executed several times, a coredump file is generated. $ timeout -k 9 5 perf top -e task-clock ******* ******* ******* 0.01% [kernel] [k] __do_softirq 0.01% libpthread-2.28.so [.] __pthread_mutex_lock 0.01% [kernel] [k] __ll_sc_atomic64_sub_return double free or corruption (!prev) perf top --sort comm,dso timeout: the monitored command dumped core When we terminate "perf top" using sending signal method, SLsmg_reset_smg() called. SLsmg_reset_smg() resets the SLsmg screen management routines by freeing all memory allocated while it was active. However SLsmg_reinit_smg() maybe be called by another thread. SLsmg_reinit_smg() will free the same memory accessed by SLsmg_reset_smg(), thus it results in a double free. SLsmg_reinit_smg() is called already protected by ui__lock, so we fix the problem by adding pthread_mutex_trylock of ui__lock when calling SLsmg_reset_smg(). Signed-off-by: Wenyu Liu Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: wuxu.wu@huawei.com Link: http://lore.kernel.org/lkml/a91e3943-7ddc-f5c0-a7f5-360f073c20e6@huawei.com Signed-off-by: Hewenliang Signed-off-by: yaowenbin Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/tui/setup.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/tools/perf/ui/tui/setup.c b/tools/perf/ui/tui/setup.c index e9bfe856a5de..b1be59b4e2a4 100644 --- a/tools/perf/ui/tui/setup.c +++ b/tools/perf/ui/tui/setup.c @@ -170,9 +170,11 @@ void ui__exit(bool wait_for_ok) "Press any key...", 0); SLtt_set_cursor_visibility(1); - SLsmg_refresh(); - SLsmg_reset_smg(); + if (!pthread_mutex_trylock(&ui__lock)) { + SLsmg_refresh(); + SLsmg_reset_smg(); + pthread_mutex_unlock(&ui__lock); + } SLang_reset_tty(); - perf_error__unregister(&perf_tui_eops); } From 29262e1f773b4b6a43711120be564c57fca07cfb Mon Sep 17 00:00:00 2001 From: Thomas Toye Date: Sat, 1 Jan 2022 18:22:07 +0100 Subject: [PATCH 819/868] rndis_host: support Hytera digital radios Hytera makes a range of digital (DMR) radios. These radios can be programmed to a allow a computer to control them over Ethernet over USB, either using NCM or RNDIS. This commit adds support for RNDIS for Hytera radios. I tested with a Hytera PD785 and a Hytera MD785G. When these radios are programmed to set up a Radio to PC Network using RNDIS, an USB interface will be added with class 2 (Communications), subclass 2 (Abstract Modem Control) and an interface protocol of 255 ("vendor specific" - lsusb even hints "MSFT RNDIS?"). This patch is similar to the solution of this StackOverflow user, but that only works for the Hytera MD785: https://stackoverflow.com/a/53550858 To use the "Radio to PC Network" functionality of Hytera DMR radios, the radios need to be programmed correctly in CPS (Hytera's Customer Programming Software). "Forward to PC" should be checked in "Network" (under "General Setting" in "Conventional") and the "USB Network Communication Protocol" should be set to RNDIS. Signed-off-by: Thomas Toye Signed-off-by: David S. Miller --- drivers/net/usb/rndis_host.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/usb/rndis_host.c b/drivers/net/usb/rndis_host.c index 4a84f90e377c..247f58cb0f84 100644 --- a/drivers/net/usb/rndis_host.c +++ b/drivers/net/usb/rndis_host.c @@ -608,6 +608,11 @@ static const struct usb_device_id products [] = { USB_DEVICE_AND_INTERFACE_INFO(0x1630, 0x0042, USB_CLASS_COMM, 2 /* ACM */, 0x0ff), .driver_info = (unsigned long) &rndis_poll_status_info, +}, { + /* Hytera Communications DMR radios' "Radio to PC Network" */ + USB_VENDOR_AND_INTERFACE_INFO(0x238b, + USB_CLASS_COMM, 2 /* ACM */, 0x0ff), + .driver_info = (unsigned long)&rndis_info, }, { /* RNDIS is MSFT's un-official variant of CDC ACM */ USB_INTERFACE_INFO(USB_CLASS_COMM, 2 /* ACM */, 0x0ff), From 1ef5e1d0dca5b4ffd49d7dec4a83660882f1fda4 Mon Sep 17 00:00:00 2001 From: Markus Koch Date: Sun, 2 Jan 2022 17:54:08 +0100 Subject: [PATCH 820/868] net/fsl: Remove leftover definition in xgmac_mdio commit 26eee0210ad7 ("net/fsl: fix a bug in xgmac_mdio") fixed a bug in the QorIQ mdio driver but left the (now unused) incorrect bit definition for MDIO_DATA_BSY in the code. This commit removes it. Signed-off-by: Markus Koch Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/xgmac_mdio.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/freescale/xgmac_mdio.c b/drivers/net/ethernet/freescale/xgmac_mdio.c index 0b68852379da..5b8b9bcf41a2 100644 --- a/drivers/net/ethernet/freescale/xgmac_mdio.c +++ b/drivers/net/ethernet/freescale/xgmac_mdio.c @@ -47,7 +47,6 @@ struct tgec_mdio_controller { #define MDIO_CTL_READ BIT(15) #define MDIO_DATA(x) (x & 0xffff) -#define MDIO_DATA_BSY BIT(31) struct mdio_fsl_priv { struct tgec_mdio_controller __iomem *mdio_base; From c9e6606c7fe92b50a02ce51dda82586ebdf99b48 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 2 Jan 2022 14:23:25 -0800 Subject: [PATCH 821/868] Linux 5.16-rc8 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 17b4319ad2ff..16d7f83ac368 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 5 PATCHLEVEL = 16 SUBLEVEL = 0 -EXTRAVERSION = -rc7 +EXTRAVERSION = -rc8 NAME = Gobble Gobble # *DOCUMENTATION* From e5a7431f5a2d6dcff7d516ee9d178a3254b17b87 Mon Sep 17 00:00:00 2001 From: Steven Lee Date: Tue, 14 Dec 2021 12:02:38 +0800 Subject: [PATCH 822/868] gpio: gpio-aspeed-sgpio: Fix wrong hwirq base in irq handler Each aspeed sgpio bank has 64 gpio pins(32 input pins and 32 output pins). The hwirq base for each sgpio bank should be multiples of 64 rather than multiples of 32. Signed-off-by: Steven Lee Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-aspeed-sgpio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpio/gpio-aspeed-sgpio.c b/drivers/gpio/gpio-aspeed-sgpio.c index 3d6ef37a7702..b3a9b8488f11 100644 --- a/drivers/gpio/gpio-aspeed-sgpio.c +++ b/drivers/gpio/gpio-aspeed-sgpio.c @@ -395,7 +395,7 @@ static void aspeed_sgpio_irq_handler(struct irq_desc *desc) reg = ioread32(bank_reg(data, bank, reg_irq_status)); for_each_set_bit(p, ®, 32) - generic_handle_domain_irq(gc->irq.domain, i * 32 + p * 2); + generic_handle_domain_irq(gc->irq.domain, (i * 32 + p) * 2); } chained_irq_exit(ic, desc); From 32e246b02f53b2fdaa81ea9f2ca6ff068c017fcb Mon Sep 17 00:00:00 2001 From: Gregory Fong Date: Mon, 3 Jan 2022 10:53:02 +0100 Subject: [PATCH 823/868] MAINTAINERS: update gpio-brcmstb maintainers Add Doug and Florian as maintainers for gpio-brcmstb, and remove myself. Signed-off-by: Gregory Fong Signed-off-by: Bartosz Golaszewski --- MAINTAINERS | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index fb18ce7168aa..dd36acc87ce6 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3769,7 +3769,8 @@ S: Supported F: drivers/net/wireless/broadcom/brcm80211/ BROADCOM BRCMSTB GPIO DRIVER -M: Gregory Fong +M: Doug Berger +M: Florian Fainelli L: bcm-kernel-feedback-list@broadcom.com S: Supported F: Documentation/devicetree/bindings/gpio/brcm,brcmstb-gpio.txt From e30a845b0376eb51c9c94f56bbd53b2e08ba822f Mon Sep 17 00:00:00 2001 From: David Ahern Date: Mon, 3 Jan 2022 10:19:11 -0700 Subject: [PATCH 824/868] ipv6: Continue processing multipath route even if gateway attribute is invalid ip6_route_multipath_del loop continues processing the multipath attribute even if delete of a nexthop path fails. For consistency, do the same if the gateway attribute is invalid. Fixes: 1ff15a710a86 ("ipv6: Check attribute length for RTA_GATEWAY when deleting multipath route") Signed-off-by: David Ahern Acked-by: Nicolas Dichtel Link: https://lore.kernel.org/r/20220103171911.94739-1-dsahern@kernel.org Signed-off-by: Jakub Kicinski --- net/ipv6/route.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index d2ff8a7e1709..087df86c15d1 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -5459,8 +5459,10 @@ static int ip6_route_multipath_del(struct fib6_config *cfg, if (nla) { err = fib6_gw_from_attr(&r_cfg.fc_gateway, nla, extack); - if (err) - return err; + if (err) { + last_err = err; + goto next_rtnh; + } r_cfg.fc_flags |= RTF_GATEWAY; } @@ -5469,6 +5471,7 @@ static int ip6_route_multipath_del(struct fib6_config *cfg, if (err) last_err = err; +next_rtnh: rtnh = rtnh_next(rtnh, &remaining); } From 95bdba23b5b4aa75fe3e6c84335e638641c707bb Mon Sep 17 00:00:00 2001 From: David Ahern Date: Mon, 3 Jan 2022 10:05:55 -0700 Subject: [PATCH 825/868] ipv6: Do cleanup if attribute validation fails in multipath route As Nicolas noted, if gateway validation fails walking the multipath attribute the code should jump to the cleanup to free previously allocated memory. Fixes: 1ff15a710a86 ("ipv6: Check attribute length for RTA_GATEWAY when deleting multipath route") Signed-off-by: David Ahern Acked-by: Nicolas Dichtel Link: https://lore.kernel.org/r/20220103170555.94638-1-dsahern@kernel.org Signed-off-by: Jakub Kicinski --- net/ipv6/route.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 087df86c15d1..1deb6297aab6 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -5277,12 +5277,10 @@ static int ip6_route_multipath_add(struct fib6_config *cfg, nla = nla_find(attrs, attrlen, RTA_GATEWAY); if (nla) { - int ret; - - ret = fib6_gw_from_attr(&r_cfg.fc_gateway, nla, + err = fib6_gw_from_attr(&r_cfg.fc_gateway, nla, extack); - if (ret) - return ret; + if (err) + goto cleanup; r_cfg.fc_flags |= RTF_GATEWAY; } From 46669e8616c649c71c4cfcd712fd3d107e771380 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Mon, 3 Jan 2022 13:49:36 -0800 Subject: [PATCH 826/868] md/raid1: fix missing bitmap update w/o WriteMostly devices commit [1] causes missing bitmap updates when there isn't any WriteMostly devices. Detailed steps to reproduce by Norbert (which somehow didn't make to lore): # setup md10 (raid1) with two drives (1 GByte sparse files) dd if=/dev/zero of=disk1 bs=1024k seek=1024 count=0 dd if=/dev/zero of=disk2 bs=1024k seek=1024 count=0 losetup /dev/loop11 disk1 losetup /dev/loop12 disk2 mdadm --create /dev/md10 --level=1 --raid-devices=2 /dev/loop11 /dev/loop12 # add bitmap (aka write-intent log) mdadm /dev/md10 --grow --bitmap=internal echo check > /sys/block/md10/md/sync_action root:# cat /sys/block/md10/md/mismatch_cnt 0 root:# # remove member drive disk2 (loop12) mdadm /dev/md10 -f loop12 ; mdadm /dev/md10 -r loop12 # modify degraded md device dd if=/dev/urandom of=/dev/md10 bs=512 count=1 # no blocks recorded as out of sync on the remaining member disk1/loop11 root:# mdadm -X /dev/loop11 | grep Bitmap Bitmap : 16 bits (chunks), 0 dirty (0.0%) root:# # re-add disk2, nothing synced because of empty bitmap mdadm /dev/md10 --re-add /dev/loop12 # check integrity again echo check > /sys/block/md10/md/sync_action # disk1 and disk2 are no longer in sync, reads return differend data root:# cat /sys/block/md10/md/mismatch_cnt 128 root:# # clean up mdadm -S /dev/md10 losetup -d /dev/loop11 losetup -d /dev/loop12 rm disk1 disk2 Fix this by moving the WriteMostly check to the if condition for alloc_behind_master_bio(). [1] commit fd3b6975e9c1 ("md/raid1: only allocate write behind bio for WriteMostly device") Fixes: fd3b6975e9c1 ("md/raid1: only allocate write behind bio for WriteMostly device") Cc: stable@vger.kernel.org # v5.12+ Cc: Guoqing Jiang Cc: Jens Axboe Reported-by: Norbert Warmuth Suggested-by: Linus Torvalds Signed-off-by: Song Liu --- drivers/md/raid1.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 7dc8026cf6ee..85505424f7a4 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -1496,12 +1496,13 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio, if (!r1_bio->bios[i]) continue; - if (first_clone && test_bit(WriteMostly, &rdev->flags)) { + if (first_clone) { /* do behind I/O ? * Not if there are too many, or cannot * allocate memory, or a reader on WriteMostly * is waiting for behind writes to flush */ if (bitmap && + test_bit(WriteMostly, &rdev->flags) && (atomic_read(&bitmap->behind_writes) < mddev->bitmap_info.max_write_behind) && !waitqueue_active(&bitmap->behind_wait)) { From 065e1ae02fbe5f56f4aa118414d45fc30647acd4 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Mon, 3 Jan 2022 11:34:52 -0800 Subject: [PATCH 827/868] Revert "net: phy: fixed_phy: Fix NULL vs IS_ERR() checking in __fixed_phy_register" This reverts commit b45396afa4177f2b1ddfeff7185da733fade1dc3 ("net: phy: fixed_phy: Fix NULL vs IS_ERR() checking in __fixed_phy_register") since it prevents any system that uses a fixed PHY without a GPIO descriptor from properly working: [ 5.971952] brcm-systemport 9300000.ethernet: failed to register fixed PHY [ 5.978854] brcm-systemport: probe of 9300000.ethernet failed with error -22 [ 5.986047] brcm-systemport 9400000.ethernet: failed to register fixed PHY [ 5.992947] brcm-systemport: probe of 9400000.ethernet failed with error -22 Fixes: b45396afa417 ("net: phy: fixed_phy: Fix NULL vs IS_ERR() checking in __fixed_phy_register") Signed-off-by: Florian Fainelli Reviewed-by: Andrew Lunn Link: https://lore.kernel.org/r/20220103193453.1214961-1-f.fainelli@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/fixed_phy.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/phy/fixed_phy.c b/drivers/net/phy/fixed_phy.c index a0c256bd5441..c65fb5f5d2dc 100644 --- a/drivers/net/phy/fixed_phy.c +++ b/drivers/net/phy/fixed_phy.c @@ -239,8 +239,8 @@ static struct phy_device *__fixed_phy_register(unsigned int irq, /* Check if we have a GPIO associated with this fixed phy */ if (!gpiod) { gpiod = fixed_phy_get_gpiod(np); - if (!gpiod) - return ERR_PTR(-EINVAL); + if (IS_ERR(gpiod)) + return ERR_CAST(gpiod); } /* Get the next available PHY address, up to PHY_MAX_ADDR */ From fa55a7d745de2d10489295b0674a403e2a5d490d Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Mon, 3 Jan 2022 18:11:30 +0100 Subject: [PATCH 828/868] seg6: export get_srh() for ICMP handling An ICMP error message can contain in its message body part of an IPv6 packet which invoked the error. Such a packet might contain a segment router header. Export get_srh() so the ICMP code can make use of it. Since his changes the scope of the function from local to global, add the seg6_ prefix to keep the namespace clean. And move it into seg6.c so it is always available, not just when IPV6_SEG6_LWTUNNEL is enabled. Signed-off-by: Andrew Lunn Reviewed-by: David Ahern Reviewed-by: Willem de Bruijn Signed-off-by: David S. Miller --- include/net/seg6.h | 1 + net/ipv6/seg6.c | 29 +++++++++++++++++++++++++++++ net/ipv6/seg6_local.c | 33 ++------------------------------- 3 files changed, 32 insertions(+), 31 deletions(-) diff --git a/include/net/seg6.h b/include/net/seg6.h index 9d19c15e8545..a6f25983670a 100644 --- a/include/net/seg6.h +++ b/include/net/seg6.h @@ -58,6 +58,7 @@ extern int seg6_local_init(void); extern void seg6_local_exit(void); extern bool seg6_validate_srh(struct ipv6_sr_hdr *srh, int len, bool reduced); +extern struct ipv6_sr_hdr *seg6_get_srh(struct sk_buff *skb, int flags); extern int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto); extern int seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh); diff --git a/net/ipv6/seg6.c b/net/ipv6/seg6.c index a8b5784afb1a..5bc9bf892199 100644 --- a/net/ipv6/seg6.c +++ b/net/ipv6/seg6.c @@ -75,6 +75,35 @@ bool seg6_validate_srh(struct ipv6_sr_hdr *srh, int len, bool reduced) return true; } +struct ipv6_sr_hdr *seg6_get_srh(struct sk_buff *skb, int flags) +{ + struct ipv6_sr_hdr *srh; + int len, srhoff = 0; + + if (ipv6_find_hdr(skb, &srhoff, IPPROTO_ROUTING, NULL, &flags) < 0) + return NULL; + + if (!pskb_may_pull(skb, srhoff + sizeof(*srh))) + return NULL; + + srh = (struct ipv6_sr_hdr *)(skb->data + srhoff); + + len = (srh->hdrlen + 1) << 3; + + if (!pskb_may_pull(skb, srhoff + len)) + return NULL; + + /* note that pskb_may_pull may change pointers in header; + * for this reason it is necessary to reload them when needed. + */ + srh = (struct ipv6_sr_hdr *)(skb->data + srhoff); + + if (!seg6_validate_srh(srh, len, true)) + return NULL; + + return srh; +} + static struct genl_family seg6_genl_family; static const struct nla_policy seg6_genl_policy[SEG6_ATTR_MAX + 1] = { diff --git a/net/ipv6/seg6_local.c b/net/ipv6/seg6_local.c index 2dc40b3f373e..ef88489c71f5 100644 --- a/net/ipv6/seg6_local.c +++ b/net/ipv6/seg6_local.c @@ -150,40 +150,11 @@ static struct seg6_local_lwt *seg6_local_lwtunnel(struct lwtunnel_state *lwt) return (struct seg6_local_lwt *)lwt->data; } -static struct ipv6_sr_hdr *get_srh(struct sk_buff *skb, int flags) -{ - struct ipv6_sr_hdr *srh; - int len, srhoff = 0; - - if (ipv6_find_hdr(skb, &srhoff, IPPROTO_ROUTING, NULL, &flags) < 0) - return NULL; - - if (!pskb_may_pull(skb, srhoff + sizeof(*srh))) - return NULL; - - srh = (struct ipv6_sr_hdr *)(skb->data + srhoff); - - len = (srh->hdrlen + 1) << 3; - - if (!pskb_may_pull(skb, srhoff + len)) - return NULL; - - /* note that pskb_may_pull may change pointers in header; - * for this reason it is necessary to reload them when needed. - */ - srh = (struct ipv6_sr_hdr *)(skb->data + srhoff); - - if (!seg6_validate_srh(srh, len, true)) - return NULL; - - return srh; -} - static struct ipv6_sr_hdr *get_and_validate_srh(struct sk_buff *skb) { struct ipv6_sr_hdr *srh; - srh = get_srh(skb, IP6_FH_F_SKIP_RH); + srh = seg6_get_srh(skb, IP6_FH_F_SKIP_RH); if (!srh) return NULL; @@ -200,7 +171,7 @@ static bool decap_and_validate(struct sk_buff *skb, int proto) struct ipv6_sr_hdr *srh; unsigned int off = 0; - srh = get_srh(skb, 0); + srh = seg6_get_srh(skb, 0); if (srh && srh->segments_left > 0) return false; From e41294408c56c68ea0f269d757527bf33b39118a Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Mon, 3 Jan 2022 18:11:31 +0100 Subject: [PATCH 829/868] icmp: ICMPV6: Examine invoking packet for Segment Route Headers. RFC8754 says: ICMP error packets generated within the SR domain are sent to source nodes within the SR domain. The invoking packet in the ICMP error message may contain an SRH. Since the destination address of a packet with an SRH changes as each segment is processed, it may not be the destination used by the socket or application that generated the invoking packet. For the source of an invoking packet to process the ICMP error message, the ultimate destination address of the IPv6 header may be required. The following logic is used to determine the destination address for use by protocol-error handlers. * Walk all extension headers of the invoking IPv6 packet to the routing extension header preceding the upper-layer header. - If routing header is type 4 Segment Routing Header (SRH) o The SID at Segment List[0] may be used as the destination address of the invoking packet. Mangle the skb so the network header points to the invoking packet inside the ICMP packet. The seg6 helpers can then be used on the skb to find any segment routing headers. If found, mark this fact in the IPv6 control block of the skb, and store the offset into the packet of the SRH. Then restore the skb back to its old state. Signed-off-by: Andrew Lunn Reviewed-by: David Ahern Reviewed-by: Willem de Bruijn Signed-off-by: David S. Miller --- include/linux/ipv6.h | 2 ++ include/net/seg6.h | 1 + net/ipv6/icmp.c | 6 +++++- net/ipv6/seg6.c | 30 ++++++++++++++++++++++++++++++ 4 files changed, 38 insertions(+), 1 deletion(-) diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 20c1f968da7c..a59d25f19385 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -133,6 +133,7 @@ struct inet6_skb_parm { __u16 dsthao; #endif __u16 frag_max_size; + __u16 srhoff; #define IP6SKB_XFRM_TRANSFORMED 1 #define IP6SKB_FORWARDED 2 @@ -142,6 +143,7 @@ struct inet6_skb_parm { #define IP6SKB_HOPBYHOP 32 #define IP6SKB_L3SLAVE 64 #define IP6SKB_JUMBOGRAM 128 +#define IP6SKB_SEG6 256 }; #if defined(CONFIG_NET_L3_MASTER_DEV) diff --git a/include/net/seg6.h b/include/net/seg6.h index a6f25983670a..02b0cd305787 100644 --- a/include/net/seg6.h +++ b/include/net/seg6.h @@ -59,6 +59,7 @@ extern void seg6_local_exit(void); extern bool seg6_validate_srh(struct ipv6_sr_hdr *srh, int len, bool reduced); extern struct ipv6_sr_hdr *seg6_get_srh(struct sk_buff *skb, int flags); +extern void seg6_icmp_srh(struct sk_buff *skb, struct inet6_skb_parm *opt); extern int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto); extern int seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh); diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index a7c31ab67c5d..96c5cc0f30ce 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -57,6 +57,7 @@ #include #include #include +#include #include #include #include @@ -820,6 +821,7 @@ out_bh_enable: void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info) { + struct inet6_skb_parm *opt = IP6CB(skb); const struct inet6_protocol *ipprot; int inner_offset; __be16 frag_off; @@ -829,6 +831,8 @@ void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info) if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) goto out; + seg6_icmp_srh(skb, opt); + nexthdr = ((struct ipv6hdr *)skb->data)->nexthdr; if (ipv6_ext_hdr(nexthdr)) { /* now skip over extension headers */ @@ -853,7 +857,7 @@ void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info) ipprot = rcu_dereference(inet6_protos[nexthdr]); if (ipprot && ipprot->err_handler) - ipprot->err_handler(skb, NULL, type, code, inner_offset, info); + ipprot->err_handler(skb, opt, type, code, inner_offset, info); raw6_icmp_error(skb, nexthdr, type, code, inner_offset, info); return; diff --git a/net/ipv6/seg6.c b/net/ipv6/seg6.c index 5bc9bf892199..73aaabf0e966 100644 --- a/net/ipv6/seg6.c +++ b/net/ipv6/seg6.c @@ -104,6 +104,36 @@ struct ipv6_sr_hdr *seg6_get_srh(struct sk_buff *skb, int flags) return srh; } +/* Determine if an ICMP invoking packet contains a segment routing + * header. If it does, extract the offset to the true destination + * address, which is in the first segment address. + */ +void seg6_icmp_srh(struct sk_buff *skb, struct inet6_skb_parm *opt) +{ + __u16 network_header = skb->network_header; + struct ipv6_sr_hdr *srh; + + /* Update network header to point to the invoking packet + * inside the ICMP packet, so we can use the seg6_get_srh() + * helper. + */ + skb_reset_network_header(skb); + + srh = seg6_get_srh(skb, 0); + if (!srh) + goto out; + + if (srh->type != IPV6_SRCRT_TYPE_4) + goto out; + + opt->flags |= IP6SKB_SEG6; + opt->srhoff = (unsigned char *)srh - skb->data; + +out: + /* Restore the network header back to the ICMP packet */ + skb->network_header = network_header; +} + static struct genl_family seg6_genl_family; static const struct nla_policy seg6_genl_policy[SEG6_ATTR_MAX + 1] = { From 222a011efc839ca1f51bf89fe7a2b3705fa55ccd Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Mon, 3 Jan 2022 18:11:32 +0100 Subject: [PATCH 830/868] udp6: Use Segment Routing Header for dest address if present When finding the socket to report an error on, if the invoking packet is using Segment Routing, the IPv6 destination address is that of an intermediate router, not the end destination. Extract the ultimate destination address from the segment address. This change allows traceroute to function in the presence of Segment Routing. Signed-off-by: Andrew Lunn Reviewed-by: David Ahern Reviewed-by: Willem de Bruijn Signed-off-by: David S. Miller --- include/net/seg6.h | 19 +++++++++++++++++++ net/ipv6/udp.c | 3 ++- 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/include/net/seg6.h b/include/net/seg6.h index 02b0cd305787..af668f17b398 100644 --- a/include/net/seg6.h +++ b/include/net/seg6.h @@ -65,4 +65,23 @@ extern int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, extern int seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh); extern int seg6_lookup_nexthop(struct sk_buff *skb, struct in6_addr *nhaddr, u32 tbl_id); + +/* If the packet which invoked an ICMP error contains an SRH return + * the true destination address from within the SRH, otherwise use the + * destination address in the IP header. + */ +static inline const struct in6_addr *seg6_get_daddr(struct sk_buff *skb, + struct inet6_skb_parm *opt) +{ + struct ipv6_sr_hdr *srh; + + if (opt->flags & IP6SKB_SEG6) { + srh = (struct ipv6_sr_hdr *)(skb->data + opt->srhoff); + return &srh->segments[0]; + } + + return NULL; +} + + #endif diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 8cde9efd7919..a0871c212741 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -40,6 +40,7 @@ #include #include #include +#include #include #include #include @@ -561,7 +562,7 @@ int __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt, struct ipv6_pinfo *np; const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data; const struct in6_addr *saddr = &hdr->saddr; - const struct in6_addr *daddr = &hdr->daddr; + const struct in6_addr *daddr = seg6_get_daddr(skb, opt) ? : &hdr->daddr; struct udphdr *uh = (struct udphdr *)(skb->data+offset); bool tunnel = false; struct sock *sk; From 3087a6f36ee028ec095c04a8531d7d33899b7fed Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 4 Jan 2022 10:21:26 +0100 Subject: [PATCH 831/868] netrom: fix copying in user data in nr_setsockopt This code used to copy in an unsigned long worth of data before the sockptr_t conversion, so restore that. Fixes: a7b75c5a8c41 ("net: pass a sockptr_t into ->setsockopt") Reported-by: Dan Carpenter Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- net/netrom/af_netrom.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index 775064cdd0ee..f1ba7dd3d253 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -306,7 +306,7 @@ static int nr_setsockopt(struct socket *sock, int level, int optname, if (optlen < sizeof(unsigned int)) return -EINVAL; - if (copy_from_sockptr(&opt, optval, sizeof(unsigned int))) + if (copy_from_sockptr(&opt, optval, sizeof(unsigned long))) return -EFAULT; switch (optname) { From 7d18a07897d07495ee140dd319b0e9265c0f68ba Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 4 Jan 2022 01:45:08 -0800 Subject: [PATCH 832/868] sch_qfq: prevent shift-out-of-bounds in qfq_init_qdisc tx_queue_len can be set to ~0U, we need to be more careful about overflows. __fls(0) is undefined, as this report shows: UBSAN: shift-out-of-bounds in net/sched/sch_qfq.c:1430:24 shift exponent 51770272 is too large for 32-bit type 'int' CPU: 0 PID: 25574 Comm: syz-executor.0 Not tainted 5.16.0-rc7-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0x201/0x2d8 lib/dump_stack.c:106 ubsan_epilogue lib/ubsan.c:151 [inline] __ubsan_handle_shift_out_of_bounds+0x494/0x530 lib/ubsan.c:330 qfq_init_qdisc+0x43f/0x450 net/sched/sch_qfq.c:1430 qdisc_create+0x895/0x1430 net/sched/sch_api.c:1253 tc_modify_qdisc+0x9d9/0x1e20 net/sched/sch_api.c:1660 rtnetlink_rcv_msg+0x934/0xe60 net/core/rtnetlink.c:5571 netlink_rcv_skb+0x200/0x470 net/netlink/af_netlink.c:2496 netlink_unicast_kernel net/netlink/af_netlink.c:1319 [inline] netlink_unicast+0x814/0x9f0 net/netlink/af_netlink.c:1345 netlink_sendmsg+0xaea/0xe60 net/netlink/af_netlink.c:1921 sock_sendmsg_nosec net/socket.c:704 [inline] sock_sendmsg net/socket.c:724 [inline] ____sys_sendmsg+0x5b9/0x910 net/socket.c:2409 ___sys_sendmsg net/socket.c:2463 [inline] __sys_sendmsg+0x280/0x370 net/socket.c:2492 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x44/0xd0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x44/0xae Fixes: 462dbc9101ac ("pkt_sched: QFQ Plus: fair-queueing service at DRR cost") Signed-off-by: Eric Dumazet Reported-by: syzbot Signed-off-by: David S. Miller --- net/sched/sch_qfq.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c index 0b7f9ba28deb..d4ce58c90f9f 100644 --- a/net/sched/sch_qfq.c +++ b/net/sched/sch_qfq.c @@ -1421,10 +1421,8 @@ static int qfq_init_qdisc(struct Qdisc *sch, struct nlattr *opt, if (err < 0) return err; - if (qdisc_dev(sch)->tx_queue_len + 1 > QFQ_MAX_AGG_CLASSES) - max_classes = QFQ_MAX_AGG_CLASSES; - else - max_classes = qdisc_dev(sch)->tx_queue_len + 1; + max_classes = min_t(u64, (u64)qdisc_dev(sch)->tx_queue_len + 1, + QFQ_MAX_AGG_CLASSES); /* max_cl_shift = floor(log_2(max_classes)) */ max_cl_shift = __fls(max_classes); q->max_agg_classes = 1< Date: Thu, 23 Dec 2021 08:28:48 -0800 Subject: [PATCH 833/868] mac80211: initialize variable have_higher_than_11mbit Clang static analysis reports this warnings mlme.c:5332:7: warning: Branch condition evaluates to a garbage value have_higher_than_11mbit) ^~~~~~~~~~~~~~~~~~~~~~~ have_higher_than_11mbit is only set to true some of the time in ieee80211_get_rates() but is checked all of the time. So have_higher_than_11mbit needs to be initialized to false. Fixes: 5d6a1b069b7f ("mac80211: set basic rates earlier") Signed-off-by: Tom Rix Reviewed-by: Nick Desaulniers Link: https://lore.kernel.org/r/20211223162848.3243702-1-trix@redhat.com Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 37f7d975f3da..3147ca89f608 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -5265,7 +5265,7 @@ static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata, */ if (new_sta) { u32 rates = 0, basic_rates = 0; - bool have_higher_than_11mbit; + bool have_higher_than_11mbit = false; int min_rate = INT_MAX, min_rate_index = -1; const struct cfg80211_bss_ies *ies; int shift = ieee80211_vif_get_shift(&sdata->vif); From 8b5cb7e41d9d77ffca036b0239177de123394a55 Mon Sep 17 00:00:00 2001 From: Pavel Skripkin Date: Thu, 30 Dec 2021 22:55:47 +0300 Subject: [PATCH 834/868] mac80211: mesh: embedd mesh_paths and mpp_paths into ieee80211_if_mesh Syzbot hit NULL deref in rhashtable_free_and_destroy(). The problem was in mesh_paths and mpp_paths being NULL. mesh_pathtbl_init() could fail in case of memory allocation failure, but nobody cared, since ieee80211_mesh_init_sdata() returns void. It led to leaving 2 pointers as NULL. Syzbot has found null deref on exit path, but it could happen anywhere else, because code assumes these pointers are valid. Since all ieee80211_*_setup_sdata functions are void and do not fail, let's embedd mesh_paths and mpp_paths into parent struct to avoid adding error handling on higher levels and follow the pattern of others setup_sdata functions Fixes: 60854fd94573 ("mac80211: mesh: convert path table to rhashtable") Reported-and-tested-by: syzbot+860268315ba86ea6b96b@syzkaller.appspotmail.com Signed-off-by: Pavel Skripkin Link: https://lore.kernel.org/r/20211230195547.23977-1-paskripkin@gmail.com Signed-off-by: Johannes Berg --- net/mac80211/ieee80211_i.h | 24 +++++++++- net/mac80211/mesh.h | 22 +-------- net/mac80211/mesh_pathtbl.c | 89 +++++++++++++------------------------ 3 files changed, 54 insertions(+), 81 deletions(-) diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 5666bbb8860b..482c98ede19b 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -647,6 +647,26 @@ struct mesh_csa_settings { struct cfg80211_csa_settings settings; }; +/** + * struct mesh_table + * + * @known_gates: list of known mesh gates and their mpaths by the station. The + * gate's mpath may or may not be resolved and active. + * @gates_lock: protects updates to known_gates + * @rhead: the rhashtable containing struct mesh_paths, keyed by dest addr + * @walk_head: linked list containing all mesh_path objects + * @walk_lock: lock protecting walk_head + * @entries: number of entries in the table + */ +struct mesh_table { + struct hlist_head known_gates; + spinlock_t gates_lock; + struct rhashtable rhead; + struct hlist_head walk_head; + spinlock_t walk_lock; + atomic_t entries; /* Up to MAX_MESH_NEIGHBOURS */ +}; + struct ieee80211_if_mesh { struct timer_list housekeeping_timer; struct timer_list mesh_path_timer; @@ -721,8 +741,8 @@ struct ieee80211_if_mesh { /* offset from skb->data while building IE */ int meshconf_offset; - struct mesh_table *mesh_paths; - struct mesh_table *mpp_paths; /* Store paths for MPP&MAP */ + struct mesh_table mesh_paths; + struct mesh_table mpp_paths; /* Store paths for MPP&MAP */ int mesh_paths_generation; int mpp_paths_generation; }; diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h index 77080b4f87b8..b2b717a78114 100644 --- a/net/mac80211/mesh.h +++ b/net/mac80211/mesh.h @@ -127,26 +127,6 @@ struct mesh_path { u32 path_change_count; }; -/** - * struct mesh_table - * - * @known_gates: list of known mesh gates and their mpaths by the station. The - * gate's mpath may or may not be resolved and active. - * @gates_lock: protects updates to known_gates - * @rhead: the rhashtable containing struct mesh_paths, keyed by dest addr - * @walk_head: linked list containing all mesh_path objects - * @walk_lock: lock protecting walk_head - * @entries: number of entries in the table - */ -struct mesh_table { - struct hlist_head known_gates; - spinlock_t gates_lock; - struct rhashtable rhead; - struct hlist_head walk_head; - spinlock_t walk_lock; - atomic_t entries; /* Up to MAX_MESH_NEIGHBOURS */ -}; - /* Recent multicast cache */ /* RMC_BUCKETS must be a power of 2, maximum 256 */ #define RMC_BUCKETS 256 @@ -308,7 +288,7 @@ int mesh_path_error_tx(struct ieee80211_sub_if_data *sdata, void mesh_path_assign_nexthop(struct mesh_path *mpath, struct sta_info *sta); void mesh_path_flush_pending(struct mesh_path *mpath); void mesh_path_tx_pending(struct mesh_path *mpath); -int mesh_pathtbl_init(struct ieee80211_sub_if_data *sdata); +void mesh_pathtbl_init(struct ieee80211_sub_if_data *sdata); void mesh_pathtbl_unregister(struct ieee80211_sub_if_data *sdata); int mesh_path_del(struct ieee80211_sub_if_data *sdata, const u8 *addr); void mesh_path_timer(struct timer_list *t); diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c index 7cab1cf09bf1..acc1c299f1ae 100644 --- a/net/mac80211/mesh_pathtbl.c +++ b/net/mac80211/mesh_pathtbl.c @@ -47,32 +47,24 @@ static void mesh_path_rht_free(void *ptr, void *tblptr) mesh_path_free_rcu(tbl, mpath); } -static struct mesh_table *mesh_table_alloc(void) +static void mesh_table_init(struct mesh_table *tbl) { - struct mesh_table *newtbl; + INIT_HLIST_HEAD(&tbl->known_gates); + INIT_HLIST_HEAD(&tbl->walk_head); + atomic_set(&tbl->entries, 0); + spin_lock_init(&tbl->gates_lock); + spin_lock_init(&tbl->walk_lock); - newtbl = kmalloc(sizeof(struct mesh_table), GFP_ATOMIC); - if (!newtbl) - return NULL; - - INIT_HLIST_HEAD(&newtbl->known_gates); - INIT_HLIST_HEAD(&newtbl->walk_head); - atomic_set(&newtbl->entries, 0); - spin_lock_init(&newtbl->gates_lock); - spin_lock_init(&newtbl->walk_lock); - if (rhashtable_init(&newtbl->rhead, &mesh_rht_params)) { - kfree(newtbl); - return NULL; - } - - return newtbl; + /* rhashtable_init() may fail only in case of wrong + * mesh_rht_params + */ + WARN_ON(rhashtable_init(&tbl->rhead, &mesh_rht_params)); } static void mesh_table_free(struct mesh_table *tbl) { rhashtable_free_and_destroy(&tbl->rhead, mesh_path_rht_free, tbl); - kfree(tbl); } /** @@ -238,13 +230,13 @@ static struct mesh_path *mpath_lookup(struct mesh_table *tbl, const u8 *dst, struct mesh_path * mesh_path_lookup(struct ieee80211_sub_if_data *sdata, const u8 *dst) { - return mpath_lookup(sdata->u.mesh.mesh_paths, dst, sdata); + return mpath_lookup(&sdata->u.mesh.mesh_paths, dst, sdata); } struct mesh_path * mpp_path_lookup(struct ieee80211_sub_if_data *sdata, const u8 *dst) { - return mpath_lookup(sdata->u.mesh.mpp_paths, dst, sdata); + return mpath_lookup(&sdata->u.mesh.mpp_paths, dst, sdata); } static struct mesh_path * @@ -281,7 +273,7 @@ __mesh_path_lookup_by_idx(struct mesh_table *tbl, int idx) struct mesh_path * mesh_path_lookup_by_idx(struct ieee80211_sub_if_data *sdata, int idx) { - return __mesh_path_lookup_by_idx(sdata->u.mesh.mesh_paths, idx); + return __mesh_path_lookup_by_idx(&sdata->u.mesh.mesh_paths, idx); } /** @@ -296,7 +288,7 @@ mesh_path_lookup_by_idx(struct ieee80211_sub_if_data *sdata, int idx) struct mesh_path * mpp_path_lookup_by_idx(struct ieee80211_sub_if_data *sdata, int idx) { - return __mesh_path_lookup_by_idx(sdata->u.mesh.mpp_paths, idx); + return __mesh_path_lookup_by_idx(&sdata->u.mesh.mpp_paths, idx); } /** @@ -309,7 +301,7 @@ int mesh_path_add_gate(struct mesh_path *mpath) int err; rcu_read_lock(); - tbl = mpath->sdata->u.mesh.mesh_paths; + tbl = &mpath->sdata->u.mesh.mesh_paths; spin_lock_bh(&mpath->state_lock); if (mpath->is_gate) { @@ -418,7 +410,7 @@ struct mesh_path *mesh_path_add(struct ieee80211_sub_if_data *sdata, if (!new_mpath) return ERR_PTR(-ENOMEM); - tbl = sdata->u.mesh.mesh_paths; + tbl = &sdata->u.mesh.mesh_paths; spin_lock_bh(&tbl->walk_lock); mpath = rhashtable_lookup_get_insert_fast(&tbl->rhead, &new_mpath->rhash, @@ -460,7 +452,7 @@ int mpp_path_add(struct ieee80211_sub_if_data *sdata, return -ENOMEM; memcpy(new_mpath->mpp, mpp, ETH_ALEN); - tbl = sdata->u.mesh.mpp_paths; + tbl = &sdata->u.mesh.mpp_paths; spin_lock_bh(&tbl->walk_lock); ret = rhashtable_lookup_insert_fast(&tbl->rhead, @@ -489,7 +481,7 @@ int mpp_path_add(struct ieee80211_sub_if_data *sdata, void mesh_plink_broken(struct sta_info *sta) { struct ieee80211_sub_if_data *sdata = sta->sdata; - struct mesh_table *tbl = sdata->u.mesh.mesh_paths; + struct mesh_table *tbl = &sdata->u.mesh.mesh_paths; static const u8 bcast[ETH_ALEN] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; struct mesh_path *mpath; @@ -548,7 +540,7 @@ static void __mesh_path_del(struct mesh_table *tbl, struct mesh_path *mpath) void mesh_path_flush_by_nexthop(struct sta_info *sta) { struct ieee80211_sub_if_data *sdata = sta->sdata; - struct mesh_table *tbl = sdata->u.mesh.mesh_paths; + struct mesh_table *tbl = &sdata->u.mesh.mesh_paths; struct mesh_path *mpath; struct hlist_node *n; @@ -563,7 +555,7 @@ void mesh_path_flush_by_nexthop(struct sta_info *sta) static void mpp_flush_by_proxy(struct ieee80211_sub_if_data *sdata, const u8 *proxy) { - struct mesh_table *tbl = sdata->u.mesh.mpp_paths; + struct mesh_table *tbl = &sdata->u.mesh.mpp_paths; struct mesh_path *mpath; struct hlist_node *n; @@ -597,8 +589,8 @@ static void table_flush_by_iface(struct mesh_table *tbl) */ void mesh_path_flush_by_iface(struct ieee80211_sub_if_data *sdata) { - table_flush_by_iface(sdata->u.mesh.mesh_paths); - table_flush_by_iface(sdata->u.mesh.mpp_paths); + table_flush_by_iface(&sdata->u.mesh.mesh_paths); + table_flush_by_iface(&sdata->u.mesh.mpp_paths); } /** @@ -644,7 +636,7 @@ int mesh_path_del(struct ieee80211_sub_if_data *sdata, const u8 *addr) /* flush relevant mpp entries first */ mpp_flush_by_proxy(sdata, addr); - err = table_path_del(sdata->u.mesh.mesh_paths, sdata, addr); + err = table_path_del(&sdata->u.mesh.mesh_paths, sdata, addr); sdata->u.mesh.mesh_paths_generation++; return err; } @@ -682,7 +674,7 @@ int mesh_path_send_to_gates(struct mesh_path *mpath) struct mesh_path *gate; bool copy = false; - tbl = sdata->u.mesh.mesh_paths; + tbl = &sdata->u.mesh.mesh_paths; rcu_read_lock(); hlist_for_each_entry_rcu(gate, &tbl->known_gates, gate_list) { @@ -762,29 +754,10 @@ void mesh_path_fix_nexthop(struct mesh_path *mpath, struct sta_info *next_hop) mesh_path_tx_pending(mpath); } -int mesh_pathtbl_init(struct ieee80211_sub_if_data *sdata) +void mesh_pathtbl_init(struct ieee80211_sub_if_data *sdata) { - struct mesh_table *tbl_path, *tbl_mpp; - int ret; - - tbl_path = mesh_table_alloc(); - if (!tbl_path) - return -ENOMEM; - - tbl_mpp = mesh_table_alloc(); - if (!tbl_mpp) { - ret = -ENOMEM; - goto free_path; - } - - sdata->u.mesh.mesh_paths = tbl_path; - sdata->u.mesh.mpp_paths = tbl_mpp; - - return 0; - -free_path: - mesh_table_free(tbl_path); - return ret; + mesh_table_init(&sdata->u.mesh.mesh_paths); + mesh_table_init(&sdata->u.mesh.mpp_paths); } static @@ -806,12 +779,12 @@ void mesh_path_tbl_expire(struct ieee80211_sub_if_data *sdata, void mesh_path_expire(struct ieee80211_sub_if_data *sdata) { - mesh_path_tbl_expire(sdata, sdata->u.mesh.mesh_paths); - mesh_path_tbl_expire(sdata, sdata->u.mesh.mpp_paths); + mesh_path_tbl_expire(sdata, &sdata->u.mesh.mesh_paths); + mesh_path_tbl_expire(sdata, &sdata->u.mesh.mpp_paths); } void mesh_pathtbl_unregister(struct ieee80211_sub_if_data *sdata) { - mesh_table_free(sdata->u.mesh.mesh_paths); - mesh_table_free(sdata->u.mesh.mpp_paths); + mesh_table_free(&sdata->u.mesh.mesh_paths); + mesh_table_free(&sdata->u.mesh.mpp_paths); } From 8ff5f5d9d8cfce6a1e368a6daa7123be133a6c35 Mon Sep 17 00:00:00 2001 From: Li Zhijian Date: Tue, 28 Dec 2021 09:44:06 +0800 Subject: [PATCH 835/868] RDMA/rxe: Prevent double freeing rxe_map_set() The same rxe_map_set could be freed twice: rxe_reg_user_mr() -> rxe_mr_init_user() -> rxe_mr_free_map_set() # 1st -> rxe_drop_ref() ... -> rxe_mr_cleanup() -> rxe_mr_free_map_set() # 2nd Follow normal convection and put resource cleanup either in the error unwind of the allocator, or the overall free function. Leave the object unchanged with a NULL cur_map_set on failure and remove the unncessary free in rxe_mr_init_user(). Link: https://lore.kernel.org/r/20211228014406.1033444-1-lizhijian@cn.fujitsu.com Signed-off-by: Li Zhijian Acked-by: Zhu Yanjun Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_mr.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c index 53271df10e47..bcf717bcf0b3 100644 --- a/drivers/infiniband/sw/rxe/rxe_mr.c +++ b/drivers/infiniband/sw/rxe/rxe_mr.c @@ -135,19 +135,19 @@ static int rxe_mr_alloc(struct rxe_mr *mr, int num_buf, int both) ret = rxe_mr_alloc_map_set(num_map, &mr->cur_map_set); if (ret) - goto err_out; + return -ENOMEM; if (both) { ret = rxe_mr_alloc_map_set(num_map, &mr->next_map_set); - if (ret) { - rxe_mr_free_map_set(mr->num_map, mr->cur_map_set); - goto err_out; - } + if (ret) + goto err_free; } return 0; -err_out: +err_free: + rxe_mr_free_map_set(mr->num_map, mr->cur_map_set); + mr->cur_map_set = NULL; return -ENOMEM; } @@ -214,7 +214,7 @@ int rxe_mr_init_user(struct rxe_pd *pd, u64 start, u64 length, u64 iova, pr_warn("%s: Unable to get virtual address\n", __func__); err = -ENOMEM; - goto err_cleanup_map; + goto err_release_umem; } buf->addr = (uintptr_t)vaddr; @@ -237,8 +237,6 @@ int rxe_mr_init_user(struct rxe_pd *pd, u64 start, u64 length, u64 iova, return 0; -err_cleanup_map: - rxe_mr_free_map_set(mr->num_map, mr->cur_map_set); err_release_umem: ib_umem_release(umem); err_out: From c370baa328022cbd46c59c821d1b467a97f047be Mon Sep 17 00:00:00 2001 From: Qiuxu Zhuo Date: Fri, 24 Dec 2021 04:11:26 -0500 Subject: [PATCH 836/868] EDAC/i10nm: Release mdev/mbase when failing to detect HBM On systems without HBM (High Bandwidth Memory) mdev/mbase are not released/unmapped. Add the code to release mdev/mbase when failing to detect HBM. [Tony: re-word commit message] Cc: Fixes: c945088384d0 ("EDAC/i10nm: Add support for high bandwidth memory") Reported-by: kernel test robot Reported-by: Dan Carpenter Signed-off-by: Qiuxu Zhuo Signed-off-by: Tony Luck Link: https://lore.kernel.org/r/20211224091126.1246-1-qiuxu.zhuo@intel.com --- drivers/edac/i10nm_base.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/edac/i10nm_base.c b/drivers/edac/i10nm_base.c index 83345bfac246..6cf50ee0b77c 100644 --- a/drivers/edac/i10nm_base.c +++ b/drivers/edac/i10nm_base.c @@ -358,6 +358,9 @@ static int i10nm_get_hbm_munits(void) mbase = ioremap(base + off, I10NM_HBM_IMC_MMIO_SIZE); if (!mbase) { + pci_dev_put(d->imc[lmc].mdev); + d->imc[lmc].mdev = NULL; + i10nm_printk(KERN_ERR, "Failed to ioremap for hbm mc 0x%llx\n", base + off); return -ENOMEM; @@ -368,6 +371,12 @@ static int i10nm_get_hbm_munits(void) mcmtr = I10NM_GET_MCMTR(&d->imc[lmc], 0); if (!I10NM_IS_HBM_IMC(mcmtr)) { + iounmap(d->imc[lmc].mbase); + d->imc[lmc].mbase = NULL; + d->imc[lmc].hbm_mc = false; + pci_dev_put(d->imc[lmc].mdev); + d->imc[lmc].mdev = NULL; + i10nm_printk(KERN_ERR, "This isn't an hbm mc!\n"); return -ENODEV; } From 754e4382354f7908923a1949d8dc8d05f82f09cb Mon Sep 17 00:00:00 2001 From: Pavel Skripkin Date: Tue, 4 Jan 2022 21:28:06 +0300 Subject: [PATCH 837/868] ieee802154: atusb: fix uninit value in atusb_set_extended_addr Alexander reported a use of uninitialized value in atusb_set_extended_addr(), that is caused by reading 0 bytes via usb_control_msg(). Fix it by validating if the number of bytes transferred is actually correct, since usb_control_msg() may read less bytes, than was requested by caller. Fail log: BUG: KASAN: uninit-cmp in ieee802154_is_valid_extended_unicast_addr include/linux/ieee802154.h:310 [inline] BUG: KASAN: uninit-cmp in atusb_set_extended_addr drivers/net/ieee802154/atusb.c:1000 [inline] BUG: KASAN: uninit-cmp in atusb_probe.cold+0x29f/0x14db drivers/net/ieee802154/atusb.c:1056 Uninit value used in comparison: 311daa649a2003bd stack handle: 000000009a2003bd ieee802154_is_valid_extended_unicast_addr include/linux/ieee802154.h:310 [inline] atusb_set_extended_addr drivers/net/ieee802154/atusb.c:1000 [inline] atusb_probe.cold+0x29f/0x14db drivers/net/ieee802154/atusb.c:1056 usb_probe_interface+0x314/0x7f0 drivers/usb/core/driver.c:396 Fixes: 7490b008d123 ("ieee802154: add support for atusb transceiver") Reported-by: Alexander Potapenko Acked-by: Alexander Aring Signed-off-by: Pavel Skripkin Link: https://lore.kernel.org/r/20220104182806.7188-1-paskripkin@gmail.com Signed-off-by: Stefan Schmidt --- drivers/net/ieee802154/atusb.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/net/ieee802154/atusb.c b/drivers/net/ieee802154/atusb.c index 23ee0b14cbfa..2f5e7b31032a 100644 --- a/drivers/net/ieee802154/atusb.c +++ b/drivers/net/ieee802154/atusb.c @@ -93,7 +93,9 @@ static int atusb_control_msg(struct atusb *atusb, unsigned int pipe, ret = usb_control_msg(usb_dev, pipe, request, requesttype, value, index, data, size, timeout); - if (ret < 0) { + if (ret < size) { + ret = ret < 0 ? ret : -ENODATA; + atusb->err = ret; dev_err(&usb_dev->dev, "%s: req 0x%02x val 0x%x idx 0x%x, error %d\n", @@ -861,9 +863,9 @@ static int atusb_get_and_show_build(struct atusb *atusb) if (!build) return -ENOMEM; - ret = atusb_control_msg(atusb, usb_rcvctrlpipe(usb_dev, 0), - ATUSB_BUILD, ATUSB_REQ_FROM_DEV, 0, 0, - build, ATUSB_BUILD_SIZE, 1000); + /* We cannot call atusb_control_msg() here, since this request may read various length data */ + ret = usb_control_msg(atusb->usb_dev, usb_rcvctrlpipe(usb_dev, 0), ATUSB_BUILD, + ATUSB_REQ_FROM_DEV, 0, 0, build, ATUSB_BUILD_SIZE, 1000); if (ret >= 0) { build[ret] = 0; dev_info(&usb_dev->dev, "Firmware: build %s\n", build); From 01cbf50877e602e2376af89e4a51c30bc574c618 Mon Sep 17 00:00:00 2001 From: Mateusz Palczewski Date: Wed, 3 Mar 2021 11:45:33 +0000 Subject: [PATCH 838/868] i40e: Fix to not show opcode msg on unsuccessful VF MAC change Hide i40e opcode information sent during response to VF in case when untrusted VF tried to change MAC on the VF interface. This is implemented by adding an additional parameter 'hide' to the response sent to VF function that hides the display of error information, but forwards the error code to VF. Previously it was not possible to send response with some error code to VF without displaying opcode information. Fixes: 5c3c48ac6bf5 ("i40e: implement virtual device interface") Signed-off-by: Grzegorz Szczurek Signed-off-by: Mateusz Palczewski Reviewed-by: Paul M Stillwell Jr Reviewed-by: Aleksandr Loktionov Tested-by: Tony Brelinski Signed-off-by: Tony Nguyen --- .../ethernet/intel/i40e/i40e_virtchnl_pf.c | 40 +++++++++++++++---- 1 file changed, 32 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index 2ea4deb8fc44..048f1678ab8a 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -1877,17 +1877,19 @@ sriov_configure_out: /***********************virtual channel routines******************/ /** - * i40e_vc_send_msg_to_vf + * i40e_vc_send_msg_to_vf_ex * @vf: pointer to the VF info * @v_opcode: virtual channel opcode * @v_retval: virtual channel return value * @msg: pointer to the msg buffer * @msglen: msg length + * @is_quiet: true for not printing unsuccessful return values, false otherwise * * send msg to VF **/ -static int i40e_vc_send_msg_to_vf(struct i40e_vf *vf, u32 v_opcode, - u32 v_retval, u8 *msg, u16 msglen) +static int i40e_vc_send_msg_to_vf_ex(struct i40e_vf *vf, u32 v_opcode, + u32 v_retval, u8 *msg, u16 msglen, + bool is_quiet) { struct i40e_pf *pf; struct i40e_hw *hw; @@ -1903,7 +1905,7 @@ static int i40e_vc_send_msg_to_vf(struct i40e_vf *vf, u32 v_opcode, abs_vf_id = vf->vf_id + hw->func_caps.vf_base_id; /* single place to detect unsuccessful return values */ - if (v_retval) { + if (v_retval && !is_quiet) { vf->num_invalid_msgs++; dev_info(&pf->pdev->dev, "VF %d failed opcode %d, retval: %d\n", vf->vf_id, v_opcode, v_retval); @@ -1933,6 +1935,23 @@ static int i40e_vc_send_msg_to_vf(struct i40e_vf *vf, u32 v_opcode, return 0; } +/** + * i40e_vc_send_msg_to_vf + * @vf: pointer to the VF info + * @v_opcode: virtual channel opcode + * @v_retval: virtual channel return value + * @msg: pointer to the msg buffer + * @msglen: msg length + * + * send msg to VF + **/ +static int i40e_vc_send_msg_to_vf(struct i40e_vf *vf, u32 v_opcode, + u32 v_retval, u8 *msg, u16 msglen) +{ + return i40e_vc_send_msg_to_vf_ex(vf, v_opcode, v_retval, + msg, msglen, false); +} + /** * i40e_vc_send_resp_to_vf * @vf: pointer to the VF info @@ -2695,6 +2714,7 @@ error_param: * i40e_check_vf_permission * @vf: pointer to the VF info * @al: MAC address list from virtchnl + * @is_quiet: set true for printing msg without opcode info, false otherwise * * Check that the given list of MAC addresses is allowed. Will return -EPERM * if any address in the list is not valid. Checks the following conditions: @@ -2709,13 +2729,15 @@ error_param: * addresses might not be accurate. **/ static inline int i40e_check_vf_permission(struct i40e_vf *vf, - struct virtchnl_ether_addr_list *al) + struct virtchnl_ether_addr_list *al, + bool *is_quiet) { struct i40e_pf *pf = vf->pf; struct i40e_vsi *vsi = pf->vsi[vf->lan_vsi_idx]; int mac2add_cnt = 0; int i; + *is_quiet = false; for (i = 0; i < al->num_elements; i++) { struct i40e_mac_filter *f; u8 *addr = al->list[i].addr; @@ -2739,6 +2761,7 @@ static inline int i40e_check_vf_permission(struct i40e_vf *vf, !ether_addr_equal(addr, vf->default_lan_addr.addr)) { dev_err(&pf->pdev->dev, "VF attempting to override administratively set MAC address, bring down and up the VF interface to resume normal operation\n"); + *is_quiet = true; return -EPERM; } @@ -2775,6 +2798,7 @@ static int i40e_vc_add_mac_addr_msg(struct i40e_vf *vf, u8 *msg) (struct virtchnl_ether_addr_list *)msg; struct i40e_pf *pf = vf->pf; struct i40e_vsi *vsi = NULL; + bool is_quiet = false; i40e_status ret = 0; int i; @@ -2791,7 +2815,7 @@ static int i40e_vc_add_mac_addr_msg(struct i40e_vf *vf, u8 *msg) */ spin_lock_bh(&vsi->mac_filter_hash_lock); - ret = i40e_check_vf_permission(vf, al); + ret = i40e_check_vf_permission(vf, al, &is_quiet); if (ret) { spin_unlock_bh(&vsi->mac_filter_hash_lock); goto error_param; @@ -2829,8 +2853,8 @@ static int i40e_vc_add_mac_addr_msg(struct i40e_vf *vf, u8 *msg) error_param: /* send the response to the VF */ - return i40e_vc_send_resp_to_vf(vf, VIRTCHNL_OP_ADD_ETH_ADDR, - ret); + return i40e_vc_send_msg_to_vf_ex(vf, VIRTCHNL_OP_ADD_ETH_ADDR, + ret, NULL, 0, is_quiet); } /** From 3116f59c12bd24c513194cd3acb3ec1f7d468954 Mon Sep 17 00:00:00 2001 From: Di Zhu Date: Mon, 29 Nov 2021 19:52:01 +0600 Subject: [PATCH 839/868] i40e: fix use-after-free in i40e_sync_filters_subtask() Using ifconfig command to delete the ipv6 address will cause the i40e network card driver to delete its internal mac_filter and i40e_service_task kernel thread will concurrently access the mac_filter. These two processes are not protected by lock so causing the following use-after-free problems. print_address_description+0x70/0x360 ? vprintk_func+0x5e/0xf0 kasan_report+0x1b2/0x330 i40e_sync_vsi_filters+0x4f0/0x1850 [i40e] i40e_sync_filters_subtask+0xe3/0x130 [i40e] i40e_service_task+0x195/0x24c0 [i40e] process_one_work+0x3f5/0x7d0 worker_thread+0x61/0x6c0 ? process_one_work+0x7d0/0x7d0 kthread+0x1c3/0x1f0 ? kthread_park+0xc0/0xc0 ret_from_fork+0x35/0x40 Allocated by task 2279810: kasan_kmalloc+0xa0/0xd0 kmem_cache_alloc_trace+0xf3/0x1e0 i40e_add_filter+0x127/0x2b0 [i40e] i40e_add_mac_filter+0x156/0x190 [i40e] i40e_addr_sync+0x2d/0x40 [i40e] __hw_addr_sync_dev+0x154/0x210 i40e_set_rx_mode+0x6d/0xf0 [i40e] __dev_set_rx_mode+0xfb/0x1f0 __dev_mc_add+0x6c/0x90 igmp6_group_added+0x214/0x230 __ipv6_dev_mc_inc+0x338/0x4f0 addrconf_join_solict.part.7+0xa2/0xd0 addrconf_dad_work+0x500/0x980 process_one_work+0x3f5/0x7d0 worker_thread+0x61/0x6c0 kthread+0x1c3/0x1f0 ret_from_fork+0x35/0x40 Freed by task 2547073: __kasan_slab_free+0x130/0x180 kfree+0x90/0x1b0 __i40e_del_filter+0xa3/0xf0 [i40e] i40e_del_mac_filter+0xf3/0x130 [i40e] i40e_addr_unsync+0x85/0xa0 [i40e] __hw_addr_sync_dev+0x9d/0x210 i40e_set_rx_mode+0x6d/0xf0 [i40e] __dev_set_rx_mode+0xfb/0x1f0 __dev_mc_del+0x69/0x80 igmp6_group_dropped+0x279/0x510 __ipv6_dev_mc_dec+0x174/0x220 addrconf_leave_solict.part.8+0xa2/0xd0 __ipv6_ifa_notify+0x4cd/0x570 ipv6_ifa_notify+0x58/0x80 ipv6_del_addr+0x259/0x4a0 inet6_addr_del+0x188/0x260 addrconf_del_ifaddr+0xcc/0x130 inet6_ioctl+0x152/0x190 sock_do_ioctl+0xd8/0x2b0 sock_ioctl+0x2e5/0x4c0 do_vfs_ioctl+0x14e/0xa80 ksys_ioctl+0x7c/0xa0 __x64_sys_ioctl+0x42/0x50 do_syscall_64+0x98/0x2c0 entry_SYSCALL_64_after_hwframe+0x65/0xca Fixes: 41c445ff0f48 ("i40e: main driver core") Signed-off-by: Di Zhu Signed-off-by: Rui Zhang Tested-by: Gurucharan G Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/i40e/i40e_main.c | 24 +++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index e118cf9265c7..e0c4d6113c02 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -99,6 +99,24 @@ MODULE_LICENSE("GPL v2"); static struct workqueue_struct *i40e_wq; +static void netdev_hw_addr_refcnt(struct i40e_mac_filter *f, + struct net_device *netdev, int delta) +{ + struct netdev_hw_addr *ha; + + if (!f || !netdev) + return; + + netdev_for_each_mc_addr(ha, netdev) { + if (ether_addr_equal(ha->addr, f->macaddr)) { + ha->refcount += delta; + if (ha->refcount <= 0) + ha->refcount = 1; + break; + } + } +} + /** * i40e_allocate_dma_mem_d - OS specific memory alloc for shared code * @hw: pointer to the HW structure @@ -2036,6 +2054,7 @@ static void i40e_undo_add_filter_entries(struct i40e_vsi *vsi, hlist_for_each_entry_safe(new, h, from, hlist) { /* We can simply free the wrapper structure */ hlist_del(&new->hlist); + netdev_hw_addr_refcnt(new->f, vsi->netdev, -1); kfree(new); } } @@ -2383,6 +2402,10 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi) &tmp_add_list, &tmp_del_list, vlan_filters); + + hlist_for_each_entry(new, &tmp_add_list, hlist) + netdev_hw_addr_refcnt(new->f, vsi->netdev, 1); + if (retval) goto err_no_memory_locked; @@ -2515,6 +2538,7 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi) if (new->f->state == I40E_FILTER_NEW) new->f->state = new->state; hlist_del(&new->hlist); + netdev_hw_addr_refcnt(new->f, vsi->netdev, -1); kfree(new); } spin_unlock_bh(&vsi->mac_filter_hash_lock); From 40feded8a247f95957a0de9abd100085fb320a2f Mon Sep 17 00:00:00 2001 From: Mateusz Palczewski Date: Thu, 9 Dec 2021 11:04:35 +0100 Subject: [PATCH 840/868] i40e: Fix for displaying message regarding NVM version When loading the i40e driver, it prints a message like: 'The driver for the device detected a newer version of the NVM image v1.x than expected v1.y. Please install the most recent version of the network driver.' This is misleading as the driver is working as expected. Fix that by removing the second part of message and changing it from dev_info to dev_dbg. Fixes: 4fb29bddb57f ("i40e: The driver now prints the API version in error message") Signed-off-by: Mateusz Palczewski Tested-by: Gurucharan G Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/i40e/i40e_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index e0c4d6113c02..17c3f6d69740 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -15475,8 +15475,8 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (hw->aq.api_maj_ver == I40E_FW_API_VERSION_MAJOR && hw->aq.api_min_ver > I40E_FW_MINOR_VERSION(hw)) - dev_info(&pdev->dev, - "The driver for the device detected a newer version of the NVM image v%u.%u than expected v%u.%u. Please install the most recent version of the network driver.\n", + dev_dbg(&pdev->dev, + "The driver for the device detected a newer version of the NVM image v%u.%u than v%u.%u.\n", hw->aq.api_maj_ver, hw->aq.api_min_ver, I40E_FW_API_VERSION_MAJOR, From e738451d78b2f8a9635d66c6a87f304b4d965f7a Mon Sep 17 00:00:00 2001 From: Jedrzej Jagielski Date: Fri, 17 Dec 2021 14:29:05 +0000 Subject: [PATCH 841/868] i40e: Fix incorrect netdev's real number of RX/TX queues There was a wrong queues representation in sysfs during driver's reinitialization in case of online cpus number is less than combined queues. It was caused by stopped NetworkManager, which is responsible for calling vsi_open function during driver's initialization. In specific situation (ex. 12 cpus online) there were 16 queues in /sys/class/net//queues. In case of modifying queues with value higher, than number of online cpus, then it caused write errors and other errors. Add updating of sysfs's queues representation during driver initialization. Fixes: 41c445ff0f48 ("i40e: main driver core") Signed-off-by: Lukasz Cieplicki Signed-off-by: Jedrzej Jagielski Tested-by: Gurucharan G Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/i40e/i40e_main.c | 32 ++++++++++++++++----- 1 file changed, 25 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 17c3f6d69740..61afc220fc6c 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -8740,6 +8740,27 @@ int i40e_open(struct net_device *netdev) return 0; } +/** + * i40e_netif_set_realnum_tx_rx_queues - Update number of tx/rx queues + * @vsi: vsi structure + * + * This updates netdev's number of tx/rx queues + * + * Returns status of setting tx/rx queues + **/ +static int i40e_netif_set_realnum_tx_rx_queues(struct i40e_vsi *vsi) +{ + int ret; + + ret = netif_set_real_num_rx_queues(vsi->netdev, + vsi->num_queue_pairs); + if (ret) + return ret; + + return netif_set_real_num_tx_queues(vsi->netdev, + vsi->num_queue_pairs); +} + /** * i40e_vsi_open - * @vsi: the VSI to open @@ -8776,13 +8797,7 @@ int i40e_vsi_open(struct i40e_vsi *vsi) goto err_setup_rx; /* Notify the stack of the actual queue counts. */ - err = netif_set_real_num_tx_queues(vsi->netdev, - vsi->num_queue_pairs); - if (err) - goto err_set_queues; - - err = netif_set_real_num_rx_queues(vsi->netdev, - vsi->num_queue_pairs); + err = i40e_netif_set_realnum_tx_rx_queues(vsi); if (err) goto err_set_queues; @@ -14173,6 +14188,9 @@ struct i40e_vsi *i40e_vsi_setup(struct i40e_pf *pf, u8 type, case I40E_VSI_MAIN: case I40E_VSI_VMDQ2: ret = i40e_config_netdev(vsi); + if (ret) + goto err_netdev; + ret = i40e_netif_set_realnum_tx_rx_queues(vsi); if (ret) goto err_netdev; ret = register_netdev(vsi->netdev); From b712941c8085e638bb92456e866ed3de4404e3d5 Mon Sep 17 00:00:00 2001 From: Karen Sornek Date: Wed, 1 Sep 2021 09:21:46 +0200 Subject: [PATCH 842/868] iavf: Fix limit of total number of queues to active queues of VF In the absence of this validation, if the user requests to configure queues more than the enabled queues, it results in sending the requested number of queues to the kernel stack (due to the asynchronous nature of VF response), in which case the stack might pick a queue to transmit that is not enabled and result in Tx hang. Fix this bug by limiting the total number of queues allocated for VF to active queues of VF. Fixes: d5b33d024496 ("i40evf: add ndo_setup_tc callback to i40evf") Signed-off-by: Ashwin Vijayavel Signed-off-by: Karen Sornek Tested-by: Konrad Jankowski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/iavf/iavf_main.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index 4e7c04047f91..e4439b095533 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -2708,8 +2708,11 @@ static int iavf_validate_ch_config(struct iavf_adapter *adapter, total_max_rate += tx_rate; num_qps += mqprio_qopt->qopt.count[i]; } - if (num_qps > IAVF_MAX_REQ_QUEUES) + if (num_qps > adapter->num_active_queues) { + dev_err(&adapter->pdev->dev, + "Cannot support requested number of queues\n"); return -EINVAL; + } ret = iavf_validate_tx_bandwidth(adapter, total_max_rate); return ret; From 1d5a474240407c38ca8c7484a656ee39f585399c Mon Sep 17 00:00:00 2001 From: Martin Habets Date: Sun, 2 Jan 2022 08:41:22 +0000 Subject: [PATCH 843/868] sfc: The RX page_ring is optional The RX page_ring is an optional feature that improves performance. When allocation fails the driver can still function, but possibly with a lower bandwidth. Guard against dereferencing a NULL page_ring. Fixes: 2768935a4660 ("sfc: reuse pages to avoid DMA mapping/unmapping costs") Signed-off-by: Martin Habets Reported-by: Jiasheng Jiang Link: https://lore.kernel.org/r/164111288276.5798.10330502993729113868.stgit@palantir17.mph.net Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/sfc/falcon/rx.c | 5 +++++ drivers/net/ethernet/sfc/rx_common.c | 5 +++++ 2 files changed, 10 insertions(+) diff --git a/drivers/net/ethernet/sfc/falcon/rx.c b/drivers/net/ethernet/sfc/falcon/rx.c index 11a6aee852e9..0c6cc2191369 100644 --- a/drivers/net/ethernet/sfc/falcon/rx.c +++ b/drivers/net/ethernet/sfc/falcon/rx.c @@ -110,6 +110,8 @@ static struct page *ef4_reuse_page(struct ef4_rx_queue *rx_queue) struct ef4_rx_page_state *state; unsigned index; + if (unlikely(!rx_queue->page_ring)) + return NULL; index = rx_queue->page_remove & rx_queue->page_ptr_mask; page = rx_queue->page_ring[index]; if (page == NULL) @@ -293,6 +295,9 @@ static void ef4_recycle_rx_pages(struct ef4_channel *channel, { struct ef4_rx_queue *rx_queue = ef4_channel_get_rx_queue(channel); + if (unlikely(!rx_queue->page_ring)) + return; + do { ef4_recycle_rx_page(channel, rx_buf); rx_buf = ef4_rx_buf_next(rx_queue, rx_buf); diff --git a/drivers/net/ethernet/sfc/rx_common.c b/drivers/net/ethernet/sfc/rx_common.c index 0983abc0cc5f..633ca77a26fd 100644 --- a/drivers/net/ethernet/sfc/rx_common.c +++ b/drivers/net/ethernet/sfc/rx_common.c @@ -45,6 +45,8 @@ static struct page *efx_reuse_page(struct efx_rx_queue *rx_queue) unsigned int index; struct page *page; + if (unlikely(!rx_queue->page_ring)) + return NULL; index = rx_queue->page_remove & rx_queue->page_ptr_mask; page = rx_queue->page_ring[index]; if (page == NULL) @@ -114,6 +116,9 @@ void efx_recycle_rx_pages(struct efx_channel *channel, { struct efx_rx_queue *rx_queue = efx_channel_get_rx_queue(channel); + if (unlikely(!rx_queue->page_ring)) + return; + do { efx_recycle_rx_page(channel, rx_buf); rx_buf = efx_rx_buf_next(rx_queue, rx_buf); From 4163cb3d1980383220ad7043002b930995dcba33 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Wed, 22 Dec 2021 12:13:12 +0200 Subject: [PATCH 844/868] Revert "RDMA/mlx5: Fix releasing unallocated memory in dereg MR flow" This patch is not the full fix and still causes to call traces during mlx5_ib_dereg_mr(). This reverts commit f0ae4afe3d35e67db042c58a52909e06262b740f. Fixes: f0ae4afe3d35 ("RDMA/mlx5: Fix releasing unallocated memory in dereg MR flow") Link: https://lore.kernel.org/r/20211222101312.1358616-1-maorg@nvidia.com Signed-off-by: Maor Gottlieb Acked-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/mlx5_ib.h | 6 +++--- drivers/infiniband/hw/mlx5/mr.c | 26 ++++++++++++++------------ 2 files changed, 17 insertions(+), 15 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 4a7a56ed740b..e636e954f6bf 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -664,6 +664,7 @@ struct mlx5_ib_mr { /* User MR data */ struct mlx5_cache_ent *cache_ent; + struct ib_umem *umem; /* This is zero'd when the MR is allocated */ union { @@ -675,7 +676,7 @@ struct mlx5_ib_mr { struct list_head list; }; - /* Used only by kernel MRs */ + /* Used only by kernel MRs (umem == NULL) */ struct { void *descs; void *descs_alloc; @@ -696,9 +697,8 @@ struct mlx5_ib_mr { int data_length; }; - /* Used only by User MRs */ + /* Used only by User MRs (umem != NULL) */ struct { - struct ib_umem *umem; unsigned int page_shift; /* Current access_flags */ int access_flags; diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 63e2129f1142..157d862fb864 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -1904,18 +1904,19 @@ err: return ret; } -static void mlx5_free_priv_descs(struct mlx5_ib_mr *mr) +static void +mlx5_free_priv_descs(struct mlx5_ib_mr *mr) { - struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device); - int size = mr->max_descs * mr->desc_size; + if (!mr->umem && mr->descs) { + struct ib_device *device = mr->ibmr.device; + int size = mr->max_descs * mr->desc_size; + struct mlx5_ib_dev *dev = to_mdev(device); - if (!mr->descs) - return; - - dma_unmap_single(&dev->mdev->pdev->dev, mr->desc_map, size, - DMA_TO_DEVICE); - kfree(mr->descs_alloc); - mr->descs = NULL; + dma_unmap_single(&dev->mdev->pdev->dev, mr->desc_map, size, + DMA_TO_DEVICE); + kfree(mr->descs_alloc); + mr->descs = NULL; + } } int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) @@ -1991,8 +1992,7 @@ int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) if (mr->cache_ent) { mlx5_mr_cache_free(dev, mr); } else { - if (!udata) - mlx5_free_priv_descs(mr); + mlx5_free_priv_descs(mr); kfree(mr); } return 0; @@ -2079,6 +2079,7 @@ static struct mlx5_ib_mr *mlx5_ib_alloc_pi_mr(struct ib_pd *pd, if (err) goto err_free_in; + mr->umem = NULL; kfree(in); return mr; @@ -2205,6 +2206,7 @@ static struct ib_mr *__mlx5_ib_alloc_mr(struct ib_pd *pd, } mr->ibmr.device = pd->device; + mr->umem = NULL; switch (mr_type) { case IB_MR_TYPE_MEM_REG: From 00fcf8c7dd564c44448ff6a39728d2ca0c8efbd8 Mon Sep 17 00:00:00 2001 From: Aaron Ma Date: Wed, 5 Jan 2022 23:51:02 +0800 Subject: [PATCH 845/868] Revert "net: usb: r8152: Add MAC passthrough support for more Lenovo Docks" This reverts commit f77b83b5bbab53d2be339184838b19ed2c62c0a5. This change breaks multiple usb to ethernet dongles attached on Lenovo USB hub. Fixes: f77b83b5bbab ("net: usb: r8152: Add MAC passthrough support for more Lenovo Docks") Signed-off-by: Aaron Ma Link: https://lore.kernel.org/r/20220105155102.8557-1-aaron.ma@canonical.com Signed-off-by: Jakub Kicinski --- drivers/net/usb/r8152.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index 3085e8118d7f..ef6010a3d33a 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -9638,9 +9638,12 @@ static int rtl8152_probe(struct usb_interface *intf, netdev->hw_features &= ~NETIF_F_RXCSUM; } - if (udev->parent && - le16_to_cpu(udev->parent->descriptor.idVendor) == VENDOR_ID_LENOVO) { - tp->lenovo_macpassthru = 1; + if (le16_to_cpu(udev->descriptor.idVendor) == VENDOR_ID_LENOVO) { + switch (le16_to_cpu(udev->descriptor.idProduct)) { + case DEVICE_ID_THINKPAD_THUNDERBOLT3_DOCK_GEN2: + case DEVICE_ID_THINKPAD_USB_C_DOCK_GEN2: + tp->lenovo_macpassthru = 1; + } } if (le16_to_cpu(udev->descriptor.bcdDevice) == 0x3011 && udev->serial && From 7694a7de22c53a312ea98960fcafc6ec62046531 Mon Sep 17 00:00:00 2001 From: Jiasheng Jiang Date: Fri, 31 Dec 2021 17:33:15 +0800 Subject: [PATCH 846/868] RDMA/uverbs: Check for null return of kmalloc_array Because of the possible failure of the allocation, data might be NULL pointer and will cause the dereference of the NULL pointer later. Therefore, it might be better to check it and return -ENOMEM. Fixes: 6884c6c4bd09 ("RDMA/verbs: Store the write/write_ex uapi entry points in the uverbs_api") Link: https://lore.kernel.org/r/20211231093315.1917667-1-jiasheng@iscas.ac.cn Signed-off-by: Jiasheng Jiang Reviewed-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/uverbs_uapi.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/infiniband/core/uverbs_uapi.c b/drivers/infiniband/core/uverbs_uapi.c index 2f2c7646fce1..a02916a3a79c 100644 --- a/drivers/infiniband/core/uverbs_uapi.c +++ b/drivers/infiniband/core/uverbs_uapi.c @@ -447,6 +447,9 @@ static int uapi_finalize(struct uverbs_api *uapi) uapi->num_write_ex = max_write_ex + 1; data = kmalloc_array(uapi->num_write + uapi->num_write_ex, sizeof(*uapi->write_methods), GFP_KERNEL); + if (!data) + return -ENOMEM; + for (i = 0; i != uapi->num_write + uapi->num_write_ex; i++) data[i] = &uapi->notsupp_method; uapi->write_methods = data; From db54c12a3d7e3eedd37aa08efc9362e905f07716 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Wed, 5 Jan 2022 14:44:36 +0000 Subject: [PATCH 847/868] selftests: set amt.sh executable amt.sh test script will not work because it doesn't have execution permission. So, it adds execution permission. Reported-by: Hangbin Liu Fixes: c08e8baea78e ("selftests: add amt interface selftest script") Signed-off-by: Taehee Yoo Link: https://lore.kernel.org/r/20220105144436.13415-1-ap420073@gmail.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/amt.sh | 0 1 file changed, 0 insertions(+), 0 deletions(-) mode change 100644 => 100755 tools/testing/selftests/net/amt.sh diff --git a/tools/testing/selftests/net/amt.sh b/tools/testing/selftests/net/amt.sh old mode 100644 new mode 100755 From b35a0f4dd544eaa6162b6d2f13a2557a121ae5fd Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 4 Jan 2022 14:21:52 +0200 Subject: [PATCH 848/868] RDMA/core: Don't infoleak GRH fields If dst->is_global field is not set, the GRH fields are not cleared and the following infoleak is reported. ===================================================== BUG: KMSAN: kernel-infoleak in instrument_copy_to_user include/linux/instrumented.h:121 [inline] BUG: KMSAN: kernel-infoleak in _copy_to_user+0x1c9/0x270 lib/usercopy.c:33 instrument_copy_to_user include/linux/instrumented.h:121 [inline] _copy_to_user+0x1c9/0x270 lib/usercopy.c:33 copy_to_user include/linux/uaccess.h:209 [inline] ucma_init_qp_attr+0x8c7/0xb10 drivers/infiniband/core/ucma.c:1242 ucma_write+0x637/0x6c0 drivers/infiniband/core/ucma.c:1732 vfs_write+0x8ce/0x2030 fs/read_write.c:588 ksys_write+0x28b/0x510 fs/read_write.c:643 __do_sys_write fs/read_write.c:655 [inline] __se_sys_write fs/read_write.c:652 [inline] __ia32_sys_write+0xdb/0x120 fs/read_write.c:652 do_syscall_32_irqs_on arch/x86/entry/common.c:114 [inline] __do_fast_syscall_32+0x96/0xf0 arch/x86/entry/common.c:180 do_fast_syscall_32+0x34/0x70 arch/x86/entry/common.c:205 do_SYSENTER_32+0x1b/0x20 arch/x86/entry/common.c:248 entry_SYSENTER_compat_after_hwframe+0x4d/0x5c Local variable resp created at: ucma_init_qp_attr+0xa4/0xb10 drivers/infiniband/core/ucma.c:1214 ucma_write+0x637/0x6c0 drivers/infiniband/core/ucma.c:1732 Bytes 40-59 of 144 are uninitialized Memory access of size 144 starts at ffff888167523b00 Data copied to user address 0000000020000100 CPU: 1 PID: 25910 Comm: syz-executor.1 Not tainted 5.16.0-rc5-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 ===================================================== Fixes: 4ba66093bdc6 ("IB/core: Check for global flag when using ah_attr") Link: https://lore.kernel.org/r/0e9dd51f93410b7b2f4f5562f52befc878b71afa.1641298868.git.leonro@nvidia.com Reported-by: syzbot+6d532fa8f9463da290bc@syzkaller.appspotmail.com Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/uverbs_marshall.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/core/uverbs_marshall.c b/drivers/infiniband/core/uverbs_marshall.c index b8d715c68ca4..11a080646916 100644 --- a/drivers/infiniband/core/uverbs_marshall.c +++ b/drivers/infiniband/core/uverbs_marshall.c @@ -66,7 +66,7 @@ void ib_copy_ah_attr_to_user(struct ib_device *device, struct rdma_ah_attr *src = ah_attr; struct rdma_ah_attr conv_ah; - memset(&dst->grh.reserved, 0, sizeof(dst->grh.reserved)); + memset(&dst->grh, 0, sizeof(dst->grh)); if ((ah_attr->type == RDMA_AH_ATTR_TYPE_OPA) && (rdma_ah_get_dlid(ah_attr) > be16_to_cpu(IB_LID_PERMISSIVE)) && From 0daf5cb217a9ca8ae91b8f966ddae322699fb71d Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sun, 19 Dec 2021 14:53:17 +0100 Subject: [PATCH 849/868] ftrace/samples: Add missing prototypes direct functions There's another compilation fail (first here [1]) reported by kernel test robot for W=1 clang build: >> samples/ftrace/ftrace-direct-multi-modify.c:7:6: warning: no previous prototype for function 'my_direct_func1' [-Wmissing-prototypes] void my_direct_func1(unsigned long ip) Direct functions in ftrace direct sample modules need to have prototypes defined. They are already global in order to be visible for the inline assembly, so there's no problem. The kernel test robot reported just error for ftrace-direct-multi-modify, but I got same errors also for the rest of the modules touched by this patch. [1] 67d4f6e3bf5d ftrace/samples: Add missing prototype for my_direct_func Link: https://lkml.kernel.org/r/20211219135317.212430-1-jolsa@kernel.org Reported-by: kernel test robot Fixes: e1067a07cfbc ("ftrace/samples: Add module to test multi direct modify interface") Fixes: ae0cc3b7e7f5 ("ftrace/samples: Add a sample module that implements modify_ftrace_direct()") Fixes: 156473a0ff4f ("ftrace: Add another example of register_ftrace_direct() use case") Fixes: b06457c83af6 ("ftrace: Add sample module that uses register_ftrace_direct()") Signed-off-by: Jiri Olsa Signed-off-by: Steven Rostedt --- samples/ftrace/ftrace-direct-modify.c | 3 +++ samples/ftrace/ftrace-direct-multi-modify.c | 3 +++ samples/ftrace/ftrace-direct-too.c | 3 +++ samples/ftrace/ftrace-direct.c | 2 ++ 4 files changed, 11 insertions(+) diff --git a/samples/ftrace/ftrace-direct-modify.c b/samples/ftrace/ftrace-direct-modify.c index 690e4a9ff333..2877cb053a82 100644 --- a/samples/ftrace/ftrace-direct-modify.c +++ b/samples/ftrace/ftrace-direct-modify.c @@ -4,6 +4,9 @@ #include #include +extern void my_direct_func1(void); +extern void my_direct_func2(void); + void my_direct_func1(void) { trace_printk("my direct func1\n"); diff --git a/samples/ftrace/ftrace-direct-multi-modify.c b/samples/ftrace/ftrace-direct-multi-modify.c index 91bc42a7adb9..6f43a39decd0 100644 --- a/samples/ftrace/ftrace-direct-multi-modify.c +++ b/samples/ftrace/ftrace-direct-multi-modify.c @@ -4,6 +4,9 @@ #include #include +extern void my_direct_func1(unsigned long ip); +extern void my_direct_func2(unsigned long ip); + void my_direct_func1(unsigned long ip) { trace_printk("my direct func1 ip %lx\n", ip); diff --git a/samples/ftrace/ftrace-direct-too.c b/samples/ftrace/ftrace-direct-too.c index 6e0de725bf22..b97e5ed46233 100644 --- a/samples/ftrace/ftrace-direct-too.c +++ b/samples/ftrace/ftrace-direct-too.c @@ -5,6 +5,9 @@ #include #include +extern void my_direct_func(struct vm_area_struct *vma, + unsigned long address, unsigned int flags); + void my_direct_func(struct vm_area_struct *vma, unsigned long address, unsigned int flags) { diff --git a/samples/ftrace/ftrace-direct.c b/samples/ftrace/ftrace-direct.c index a30aa42ec76a..c918b13edb49 100644 --- a/samples/ftrace/ftrace-direct.c +++ b/samples/ftrace/ftrace-direct.c @@ -5,6 +5,8 @@ #include #include +extern void my_direct_func(struct task_struct *p); + void my_direct_func(struct task_struct *p) { trace_printk("waking up %s-%d\n", p->comm, p->pid); From 823e670f7ed616d0ce993075c8afe0217885f79d Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Thu, 23 Dec 2021 16:04:38 +0530 Subject: [PATCH 850/868] tracing: Fix check for trace_percpu_buffer validity in get_trace_buf() With the new osnoise tracer, we are seeing the below splat: Kernel attempted to read user page (c7d880000) - exploit attempt? (uid: 0) BUG: Unable to handle kernel data access on read at 0xc7d880000 Faulting instruction address: 0xc0000000002ffa10 Oops: Kernel access of bad area, sig: 11 [#1] LE PAGE_SIZE=64K MMU=Radix SMP NR_CPUS=2048 NUMA pSeries ... NIP [c0000000002ffa10] __trace_array_vprintk.part.0+0x70/0x2f0 LR [c0000000002ff9fc] __trace_array_vprintk.part.0+0x5c/0x2f0 Call Trace: [c0000008bdd73b80] [c0000000001c49cc] put_prev_task_fair+0x3c/0x60 (unreliable) [c0000008bdd73be0] [c000000000301430] trace_array_printk_buf+0x70/0x90 [c0000008bdd73c00] [c0000000003178b0] trace_sched_switch_callback+0x250/0x290 [c0000008bdd73c90] [c000000000e70d60] __schedule+0x410/0x710 [c0000008bdd73d40] [c000000000e710c0] schedule+0x60/0x130 [c0000008bdd73d70] [c000000000030614] interrupt_exit_user_prepare_main+0x264/0x270 [c0000008bdd73de0] [c000000000030a70] syscall_exit_prepare+0x150/0x180 [c0000008bdd73e10] [c00000000000c174] system_call_vectored_common+0xf4/0x278 osnoise tracer on ppc64le is triggering osnoise_taint() for negative duration in get_int_safe_duration() called from trace_sched_switch_callback()->thread_exit(). The problem though is that the check for a valid trace_percpu_buffer is incorrect in get_trace_buf(). The check is being done after calculating the pointer for the current cpu, rather than on the main percpu pointer. Fix the check to be against trace_percpu_buffer. Link: https://lkml.kernel.org/r/a920e4272e0b0635cf20c444707cbce1b2c8973d.1640255304.git.naveen.n.rao@linux.vnet.ibm.com Cc: stable@vger.kernel.org Fixes: e2ace001176dc9 ("tracing: Choose static tp_printk buffer by explicit nesting count") Signed-off-by: Naveen N. Rao Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 88de94da596b..e1f55851e53f 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -3217,7 +3217,7 @@ static char *get_trace_buf(void) { struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer); - if (!buffer || buffer->nesting >= 4) + if (!trace_percpu_buffer || buffer->nesting >= 4) return NULL; buffer->nesting++; From f28439db470cca8b6b082239314e9fd10bd39034 Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Thu, 23 Dec 2021 16:04:39 +0530 Subject: [PATCH 851/868] tracing: Tag trace_percpu_buffer as a percpu pointer Tag trace_percpu_buffer as a percpu pointer to resolve warnings reported by sparse: /linux/kernel/trace/trace.c:3218:46: warning: incorrect type in initializer (different address spaces) /linux/kernel/trace/trace.c:3218:46: expected void const [noderef] __percpu *__vpp_verify /linux/kernel/trace/trace.c:3218:46: got struct trace_buffer_struct * /linux/kernel/trace/trace.c:3234:9: warning: incorrect type in initializer (different address spaces) /linux/kernel/trace/trace.c:3234:9: expected void const [noderef] __percpu *__vpp_verify /linux/kernel/trace/trace.c:3234:9: got int * Link: https://lkml.kernel.org/r/ebabd3f23101d89cb75671b68b6f819f5edc830b.1640255304.git.naveen.n.rao@linux.vnet.ibm.com Cc: stable@vger.kernel.org Reported-by: kernel test robot Fixes: 07d777fe8c398 ("tracing: Add percpu buffers for trace_printk()") Signed-off-by: Naveen N. Rao Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index e1f55851e53f..78ea542ce3bc 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -3207,7 +3207,7 @@ struct trace_buffer_struct { char buffer[4][TRACE_BUF_SIZE]; }; -static struct trace_buffer_struct *trace_percpu_buffer; +static struct trace_buffer_struct __percpu *trace_percpu_buffer; /* * This allows for lockless recording. If we're nested too deeply, then @@ -3236,7 +3236,7 @@ static void put_trace_buf(void) static int alloc_percpu_trace_buffer(void) { - struct trace_buffer_struct *buffers; + struct trace_buffer_struct __percpu *buffers; if (trace_percpu_buffer) return 0; From 72a4a87da8f7bcf868b338615a814b6542f277f3 Mon Sep 17 00:00:00 2001 From: Chris Packham Date: Wed, 5 Jan 2022 14:53:04 +1300 Subject: [PATCH 852/868] i2c: mpc: Avoid out of bounds memory access When performing an I2C transfer where the last message was a write KASAN would complain: BUG: KASAN: slab-out-of-bounds in mpc_i2c_do_action+0x154/0x630 Read of size 2 at addr c814e310 by task swapper/2/0 CPU: 2 PID: 0 Comm: swapper/2 Tainted: G B 5.16.0-rc8 #1 Call Trace: [e5ee9d50] [c08418e8] dump_stack_lvl+0x4c/0x6c (unreliable) [e5ee9d70] [c02f8a14] print_address_description.constprop.13+0x64/0x3b0 [e5ee9da0] [c02f9030] kasan_report+0x1f0/0x204 [e5ee9de0] [c0c76ee4] mpc_i2c_do_action+0x154/0x630 [e5ee9e30] [c0c782c4] mpc_i2c_isr+0x164/0x240 [e5ee9e60] [c00f3a04] __handle_irq_event_percpu+0xf4/0x3b0 [e5ee9ec0] [c00f3d40] handle_irq_event_percpu+0x80/0x110 [e5ee9f40] [c00f3e48] handle_irq_event+0x78/0xd0 [e5ee9f60] [c00fcfec] handle_fasteoi_irq+0x19c/0x370 [e5ee9fa0] [c00f1d84] generic_handle_irq+0x54/0x80 [e5ee9fc0] [c0006b54] __do_irq+0x64/0x200 [e5ee9ff0] [c0007958] __do_IRQ+0xe8/0x1c0 [c812dd50] [e3eaab20] 0xe3eaab20 [c812dd90] [c0007a4c] do_IRQ+0x1c/0x30 [c812dda0] [c0000c04] ExternalInput+0x144/0x160 --- interrupt: 500 at arch_cpu_idle+0x34/0x60 NIP: c000b684 LR: c000b684 CTR: c0019688 REGS: c812ddb0 TRAP: 0500 Tainted: G B (5.16.0-rc8) MSR: 00029002 CR: 22000488 XER: 20000000 GPR00: c10ef7fc c812de90 c80ff200 c2394718 00000001 00000001 c10e3f90 00000003 GPR08: 00000000 c0019688 c2394718 fc7d625b 22000484 00000000 21e17000 c208228c GPR16: e3e99284 00000000 ffffffff c2390000 c001bac0 c2082288 c812df60 c001ba60 GPR24: c23949c0 00000018 00080000 00000004 c80ff200 00000002 c2348ee4 c2394718 NIP [c000b684] arch_cpu_idle+0x34/0x60 LR [c000b684] arch_cpu_idle+0x34/0x60 --- interrupt: 500 [c812de90] [c10e3f90] rcu_eqs_enter.isra.60+0xc0/0x110 (unreliable) [c812deb0] [c10ef7fc] default_idle_call+0xbc/0x230 [c812dee0] [c00af0e8] do_idle+0x1c8/0x200 [c812df10] [c00af3c0] cpu_startup_entry+0x20/0x30 [c812df20] [c001e010] start_secondary+0x5d0/0xba0 [c812dff0] [c00028a0] __secondary_start+0x90/0xdc This happened because we would overrun the i2c->msgs array on the final interrupt for the I2C STOP. This didn't happen if the last message was a read because there is no interrupt in that case. Ensure that we only access the current message if we are not processing a I2C STOP condition. Fixes: 1538d82f4647 ("i2c: mpc: Interrupt driven transfer") Reported-by: Maxime Bizon Signed-off-by: Chris Packham Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-mpc.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/i2c/busses/i2c-mpc.c b/drivers/i2c/busses/i2c-mpc.c index 53b8da6dbb23..db26cc36e13f 100644 --- a/drivers/i2c/busses/i2c-mpc.c +++ b/drivers/i2c/busses/i2c-mpc.c @@ -492,7 +492,7 @@ static void mpc_i2c_finish(struct mpc_i2c *i2c, int rc) static void mpc_i2c_do_action(struct mpc_i2c *i2c) { - struct i2c_msg *msg = &i2c->msgs[i2c->curr_msg]; + struct i2c_msg *msg = NULL; int dir = 0; int recv_len = 0; u8 byte; @@ -501,10 +501,13 @@ static void mpc_i2c_do_action(struct mpc_i2c *i2c) i2c->cntl_bits &= ~(CCR_RSTA | CCR_MTX | CCR_TXAK); - if (msg->flags & I2C_M_RD) - dir = 1; - if (msg->flags & I2C_M_RECV_LEN) - recv_len = 1; + if (i2c->action != MPC_I2C_ACTION_STOP) { + msg = &i2c->msgs[i2c->curr_msg]; + if (msg->flags & I2C_M_RD) + dir = 1; + if (msg->flags & I2C_M_RECV_LEN) + recv_len = 1; + } switch (i2c->action) { case MPC_I2C_ACTION_RESTART: @@ -581,7 +584,7 @@ static void mpc_i2c_do_action(struct mpc_i2c *i2c) break; } - if (msg->len == i2c->byte_posn) { + if (msg && msg->len == i2c->byte_posn) { i2c->curr_msg++; i2c->byte_posn = 0; From 1756d7994ad85c2479af6ae5a9750b92324685af Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 6 Jan 2022 11:02:28 -1000 Subject: [PATCH 853/868] cgroup: Use open-time credentials for process migraton perm checks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit cgroup process migration permission checks are performed at write time as whether a given operation is allowed or not is dependent on the content of the write - the PID. This currently uses current's credentials which is a potential security weakness as it may allow scenarios where a less privileged process tricks a more privileged one into writing into a fd that it created. This patch makes both cgroup2 and cgroup1 process migration interfaces to use the credentials saved at the time of open (file->f_cred) instead of current's. Reported-by: "Eric W. Biederman" Suggested-by: Linus Torvalds Fixes: 187fe84067bd ("cgroup: require write perm on common ancestor when moving processes on the default hierarchy") Reviewed-by: Michal Koutný Signed-off-by: Tejun Heo --- kernel/cgroup/cgroup-v1.c | 7 ++++--- kernel/cgroup/cgroup.c | 9 ++++++++- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c index 81c9e0685948..0e7369103ba6 100644 --- a/kernel/cgroup/cgroup-v1.c +++ b/kernel/cgroup/cgroup-v1.c @@ -504,10 +504,11 @@ static ssize_t __cgroup1_procs_write(struct kernfs_open_file *of, goto out_unlock; /* - * Even if we're attaching all tasks in the thread group, we only - * need to check permissions on one of them. + * Even if we're attaching all tasks in the thread group, we only need + * to check permissions on one of them. Check permissions using the + * credentials from file open to protect against inherited fd attacks. */ - cred = current_cred(); + cred = of->file->f_cred; tcred = get_task_cred(task); if (!uid_eq(cred->euid, GLOBAL_ROOT_UID) && !uid_eq(cred->euid, tcred->uid) && diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 919194de39c8..2632e46da1d4 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -4892,6 +4892,7 @@ static ssize_t __cgroup_procs_write(struct kernfs_open_file *of, char *buf, { struct cgroup *src_cgrp, *dst_cgrp; struct task_struct *task; + const struct cred *saved_cred; ssize_t ret; bool locked; @@ -4909,9 +4910,15 @@ static ssize_t __cgroup_procs_write(struct kernfs_open_file *of, char *buf, src_cgrp = task_cgroup_from_root(task, &cgrp_dfl_root); spin_unlock_irq(&css_set_lock); - /* process and thread migrations follow same delegation rule */ + /* + * Process and thread migrations follow same delegation rule. Check + * permissions using the credentials from file open to protect against + * inherited fd attacks. + */ + saved_cred = override_creds(of->file->f_cred); ret = cgroup_attach_permissions(src_cgrp, dst_cgrp, of->file->f_path.dentry->d_sb, threadgroup); + revert_creds(saved_cred); if (ret) goto out_finish; From 0d2b5955b36250a9428c832664f2079cbf723bec Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 6 Jan 2022 11:02:29 -1000 Subject: [PATCH 854/868] cgroup: Allocate cgroup_file_ctx for kernfs_open_file->priv MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit of->priv is currently used by each interface file implementation to store private information. This patch collects the current two private data usages into struct cgroup_file_ctx which is allocated and freed by the common path. This allows generic private data which applies to multiple files, which will be used to in the following patch. Note that cgroup_procs iterator is now embedded as procs.iter in the new cgroup_file_ctx so that it doesn't need to be allocated and freed separately. v2: union dropped from cgroup_file_ctx and the procs iterator is embedded in cgroup_file_ctx as suggested by Linus. v3: Michal pointed out that cgroup1's procs pidlist uses of->priv too. Converted. Didn't change to embedded allocation as cgroup1 pidlists get stored for caching. Signed-off-by: Tejun Heo Cc: Linus Torvalds Reviewed-by: Michal Koutný --- kernel/cgroup/cgroup-internal.h | 17 +++++++++++ kernel/cgroup/cgroup-v1.c | 26 ++++++++-------- kernel/cgroup/cgroup.c | 53 +++++++++++++++++++++------------ 3 files changed, 65 insertions(+), 31 deletions(-) diff --git a/kernel/cgroup/cgroup-internal.h b/kernel/cgroup/cgroup-internal.h index bfbeabc17a9d..cf637bc4ab45 100644 --- a/kernel/cgroup/cgroup-internal.h +++ b/kernel/cgroup/cgroup-internal.h @@ -65,6 +65,23 @@ static inline struct cgroup_fs_context *cgroup_fc2context(struct fs_context *fc) return container_of(kfc, struct cgroup_fs_context, kfc); } +struct cgroup_pidlist; + +struct cgroup_file_ctx { + struct { + void *trigger; + } psi; + + struct { + bool started; + struct css_task_iter iter; + } procs; + + struct { + struct cgroup_pidlist *pidlist; + } procs1; +}; + /* * A cgroup can be associated with multiple css_sets as different tasks may * belong to different cgroups on different hierarchies. In the other diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c index 0e7369103ba6..41e0837a5a0b 100644 --- a/kernel/cgroup/cgroup-v1.c +++ b/kernel/cgroup/cgroup-v1.c @@ -394,6 +394,7 @@ static void *cgroup_pidlist_start(struct seq_file *s, loff_t *pos) * next pid to display, if any */ struct kernfs_open_file *of = s->private; + struct cgroup_file_ctx *ctx = of->priv; struct cgroup *cgrp = seq_css(s)->cgroup; struct cgroup_pidlist *l; enum cgroup_filetype type = seq_cft(s)->private; @@ -403,25 +404,24 @@ static void *cgroup_pidlist_start(struct seq_file *s, loff_t *pos) mutex_lock(&cgrp->pidlist_mutex); /* - * !NULL @of->priv indicates that this isn't the first start() - * after open. If the matching pidlist is around, we can use that. - * Look for it. Note that @of->priv can't be used directly. It - * could already have been destroyed. + * !NULL @ctx->procs1.pidlist indicates that this isn't the first + * start() after open. If the matching pidlist is around, we can use + * that. Look for it. Note that @ctx->procs1.pidlist can't be used + * directly. It could already have been destroyed. */ - if (of->priv) - of->priv = cgroup_pidlist_find(cgrp, type); + if (ctx->procs1.pidlist) + ctx->procs1.pidlist = cgroup_pidlist_find(cgrp, type); /* * Either this is the first start() after open or the matching * pidlist has been destroyed inbetween. Create a new one. */ - if (!of->priv) { - ret = pidlist_array_load(cgrp, type, - (struct cgroup_pidlist **)&of->priv); + if (!ctx->procs1.pidlist) { + ret = pidlist_array_load(cgrp, type, &ctx->procs1.pidlist); if (ret) return ERR_PTR(ret); } - l = of->priv; + l = ctx->procs1.pidlist; if (pid) { int end = l->length; @@ -449,7 +449,8 @@ static void *cgroup_pidlist_start(struct seq_file *s, loff_t *pos) static void cgroup_pidlist_stop(struct seq_file *s, void *v) { struct kernfs_open_file *of = s->private; - struct cgroup_pidlist *l = of->priv; + struct cgroup_file_ctx *ctx = of->priv; + struct cgroup_pidlist *l = ctx->procs1.pidlist; if (l) mod_delayed_work(cgroup_pidlist_destroy_wq, &l->destroy_dwork, @@ -460,7 +461,8 @@ static void cgroup_pidlist_stop(struct seq_file *s, void *v) static void *cgroup_pidlist_next(struct seq_file *s, void *v, loff_t *pos) { struct kernfs_open_file *of = s->private; - struct cgroup_pidlist *l = of->priv; + struct cgroup_file_ctx *ctx = of->priv; + struct cgroup_pidlist *l = ctx->procs1.pidlist; pid_t *p = v; pid_t *end = l->list + l->length; /* diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 2632e46da1d4..a84631d08d98 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -3630,6 +3630,7 @@ static int cgroup_cpu_pressure_show(struct seq_file *seq, void *v) static ssize_t cgroup_pressure_write(struct kernfs_open_file *of, char *buf, size_t nbytes, enum psi_res res) { + struct cgroup_file_ctx *ctx = of->priv; struct psi_trigger *new; struct cgroup *cgrp; struct psi_group *psi; @@ -3648,7 +3649,7 @@ static ssize_t cgroup_pressure_write(struct kernfs_open_file *of, char *buf, return PTR_ERR(new); } - psi_trigger_replace(&of->priv, new); + psi_trigger_replace(&ctx->psi.trigger, new); cgroup_put(cgrp); @@ -3679,12 +3680,16 @@ static ssize_t cgroup_cpu_pressure_write(struct kernfs_open_file *of, static __poll_t cgroup_pressure_poll(struct kernfs_open_file *of, poll_table *pt) { - return psi_trigger_poll(&of->priv, of->file, pt); + struct cgroup_file_ctx *ctx = of->priv; + + return psi_trigger_poll(&ctx->psi.trigger, of->file, pt); } static void cgroup_pressure_release(struct kernfs_open_file *of) { - psi_trigger_replace(&of->priv, NULL); + struct cgroup_file_ctx *ctx = of->priv; + + psi_trigger_replace(&ctx->psi.trigger, NULL); } bool cgroup_psi_enabled(void) @@ -3811,18 +3816,31 @@ static ssize_t cgroup_kill_write(struct kernfs_open_file *of, char *buf, static int cgroup_file_open(struct kernfs_open_file *of) { struct cftype *cft = of_cft(of); + struct cgroup_file_ctx *ctx; + int ret; - if (cft->open) - return cft->open(of); - return 0; + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + if (!ctx) + return -ENOMEM; + of->priv = ctx; + + if (!cft->open) + return 0; + + ret = cft->open(of); + if (ret) + kfree(ctx); + return ret; } static void cgroup_file_release(struct kernfs_open_file *of) { struct cftype *cft = of_cft(of); + struct cgroup_file_ctx *ctx = of->priv; if (cft->release) cft->release(of); + kfree(ctx); } static ssize_t cgroup_file_write(struct kernfs_open_file *of, char *buf, @@ -4751,21 +4769,21 @@ void css_task_iter_end(struct css_task_iter *it) static void cgroup_procs_release(struct kernfs_open_file *of) { - if (of->priv) { - css_task_iter_end(of->priv); - kfree(of->priv); - } + struct cgroup_file_ctx *ctx = of->priv; + + if (ctx->procs.started) + css_task_iter_end(&ctx->procs.iter); } static void *cgroup_procs_next(struct seq_file *s, void *v, loff_t *pos) { struct kernfs_open_file *of = s->private; - struct css_task_iter *it = of->priv; + struct cgroup_file_ctx *ctx = of->priv; if (pos) (*pos)++; - return css_task_iter_next(it); + return css_task_iter_next(&ctx->procs.iter); } static void *__cgroup_procs_start(struct seq_file *s, loff_t *pos, @@ -4773,21 +4791,18 @@ static void *__cgroup_procs_start(struct seq_file *s, loff_t *pos, { struct kernfs_open_file *of = s->private; struct cgroup *cgrp = seq_css(s)->cgroup; - struct css_task_iter *it = of->priv; + struct cgroup_file_ctx *ctx = of->priv; + struct css_task_iter *it = &ctx->procs.iter; /* * When a seq_file is seeked, it's always traversed sequentially * from position 0, so we can simply keep iterating on !0 *pos. */ - if (!it) { + if (!ctx->procs.started) { if (WARN_ON_ONCE((*pos))) return ERR_PTR(-EINVAL); - - it = kzalloc(sizeof(*it), GFP_KERNEL); - if (!it) - return ERR_PTR(-ENOMEM); - of->priv = it; css_task_iter_start(&cgrp->self, iter_flags, it); + ctx->procs.started = true; } else if (!(*pos)) { css_task_iter_end(it); css_task_iter_start(&cgrp->self, iter_flags, it); From e57457641613fef0d147ede8bd6a3047df588b95 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 6 Jan 2022 11:02:29 -1000 Subject: [PATCH 855/868] cgroup: Use open-time cgroup namespace for process migration perm checks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit cgroup process migration permission checks are performed at write time as whether a given operation is allowed or not is dependent on the content of the write - the PID. This currently uses current's cgroup namespace which is a potential security weakness as it may allow scenarios where a less privileged process tricks a more privileged one into writing into a fd that it created. This patch makes cgroup remember the cgroup namespace at the time of open and uses it for migration permission checks instad of current's. Note that this only applies to cgroup2 as cgroup1 doesn't have namespace support. This also fixes a use-after-free bug on cgroupns reported in https://lore.kernel.org/r/00000000000048c15c05d0083397@google.com Note that backporting this fix also requires the preceding patch. Reported-by: "Eric W. Biederman" Suggested-by: Linus Torvalds Cc: Michal Koutný Cc: Oleg Nesterov Reviewed-by: Michal Koutný Reported-by: syzbot+50f5cf33a284ce738b62@syzkaller.appspotmail.com Link: https://lore.kernel.org/r/00000000000048c15c05d0083397@google.com Fixes: 5136f6365ce3 ("cgroup: implement "nsdelegate" mount option") Signed-off-by: Tejun Heo --- kernel/cgroup/cgroup-internal.h | 2 ++ kernel/cgroup/cgroup.c | 28 +++++++++++++++++++--------- 2 files changed, 21 insertions(+), 9 deletions(-) diff --git a/kernel/cgroup/cgroup-internal.h b/kernel/cgroup/cgroup-internal.h index cf637bc4ab45..6e36e854b512 100644 --- a/kernel/cgroup/cgroup-internal.h +++ b/kernel/cgroup/cgroup-internal.h @@ -68,6 +68,8 @@ static inline struct cgroup_fs_context *cgroup_fc2context(struct fs_context *fc) struct cgroup_pidlist; struct cgroup_file_ctx { + struct cgroup_namespace *ns; + struct { void *trigger; } psi; diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index a84631d08d98..cafb8c114a21 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -3822,14 +3822,19 @@ static int cgroup_file_open(struct kernfs_open_file *of) ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); if (!ctx) return -ENOMEM; + + ctx->ns = current->nsproxy->cgroup_ns; + get_cgroup_ns(ctx->ns); of->priv = ctx; if (!cft->open) return 0; ret = cft->open(of); - if (ret) + if (ret) { + put_cgroup_ns(ctx->ns); kfree(ctx); + } return ret; } @@ -3840,13 +3845,14 @@ static void cgroup_file_release(struct kernfs_open_file *of) if (cft->release) cft->release(of); + put_cgroup_ns(ctx->ns); kfree(ctx); } static ssize_t cgroup_file_write(struct kernfs_open_file *of, char *buf, size_t nbytes, loff_t off) { - struct cgroup_namespace *ns = current->nsproxy->cgroup_ns; + struct cgroup_file_ctx *ctx = of->priv; struct cgroup *cgrp = of->kn->parent->priv; struct cftype *cft = of_cft(of); struct cgroup_subsys_state *css; @@ -3863,7 +3869,7 @@ static ssize_t cgroup_file_write(struct kernfs_open_file *of, char *buf, */ if ((cgrp->root->flags & CGRP_ROOT_NS_DELEGATE) && !(cft->flags & CFTYPE_NS_DELEGATABLE) && - ns != &init_cgroup_ns && ns->root_cset->dfl_cgrp == cgrp) + ctx->ns != &init_cgroup_ns && ctx->ns->root_cset->dfl_cgrp == cgrp) return -EPERM; if (cft->write) @@ -4853,9 +4859,9 @@ static int cgroup_may_write(const struct cgroup *cgrp, struct super_block *sb) static int cgroup_procs_write_permission(struct cgroup *src_cgrp, struct cgroup *dst_cgrp, - struct super_block *sb) + struct super_block *sb, + struct cgroup_namespace *ns) { - struct cgroup_namespace *ns = current->nsproxy->cgroup_ns; struct cgroup *com_cgrp = src_cgrp; int ret; @@ -4884,11 +4890,12 @@ static int cgroup_procs_write_permission(struct cgroup *src_cgrp, static int cgroup_attach_permissions(struct cgroup *src_cgrp, struct cgroup *dst_cgrp, - struct super_block *sb, bool threadgroup) + struct super_block *sb, bool threadgroup, + struct cgroup_namespace *ns) { int ret = 0; - ret = cgroup_procs_write_permission(src_cgrp, dst_cgrp, sb); + ret = cgroup_procs_write_permission(src_cgrp, dst_cgrp, sb, ns); if (ret) return ret; @@ -4905,6 +4912,7 @@ static int cgroup_attach_permissions(struct cgroup *src_cgrp, static ssize_t __cgroup_procs_write(struct kernfs_open_file *of, char *buf, bool threadgroup) { + struct cgroup_file_ctx *ctx = of->priv; struct cgroup *src_cgrp, *dst_cgrp; struct task_struct *task; const struct cred *saved_cred; @@ -4932,7 +4940,8 @@ static ssize_t __cgroup_procs_write(struct kernfs_open_file *of, char *buf, */ saved_cred = override_creds(of->file->f_cred); ret = cgroup_attach_permissions(src_cgrp, dst_cgrp, - of->file->f_path.dentry->d_sb, threadgroup); + of->file->f_path.dentry->d_sb, + threadgroup, ctx->ns); revert_creds(saved_cred); if (ret) goto out_finish; @@ -6152,7 +6161,8 @@ static int cgroup_css_set_fork(struct kernel_clone_args *kargs) goto err; ret = cgroup_attach_permissions(cset->dfl_cgrp, dst_cgrp, sb, - !(kargs->flags & CLONE_THREAD)); + !(kargs->flags & CLONE_THREAD), + current->nsproxy->cgroup_ns); if (ret) goto err; From b09c2baa56347ae65795350dfcc633dedb1c2970 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 6 Jan 2022 11:02:29 -1000 Subject: [PATCH 856/868] selftests: cgroup: Make cg_create() use 0755 for permission instead of 0644 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 0644 is an odd perm to create a cgroup which is a directory. Use the regular 0755 instead. This is necessary for euid switching test case. Reviewed-by: Michal Koutný Signed-off-by: Tejun Heo --- tools/testing/selftests/cgroup/cgroup_util.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/cgroup/cgroup_util.c b/tools/testing/selftests/cgroup/cgroup_util.c index 623cec04ad42..0cf7e90c0052 100644 --- a/tools/testing/selftests/cgroup/cgroup_util.c +++ b/tools/testing/selftests/cgroup/cgroup_util.c @@ -221,7 +221,7 @@ int cg_find_unified_root(char *root, size_t len) int cg_create(const char *cgroup) { - return mkdir(cgroup, 0644); + return mkdir(cgroup, 0755); } int cg_wait_for_proc_count(const char *cgroup, int count) From 613e040e4dc285367bff0f8f75ea59839bc10947 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 6 Jan 2022 11:02:29 -1000 Subject: [PATCH 857/868] selftests: cgroup: Test open-time credential usage for migration checks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a task is writing to an fd opened by a different task, the perm check should use the credentials of the latter task. Add a test for it. Tested-by: Michal Koutný Signed-off-by: Tejun Heo --- tools/testing/selftests/cgroup/test_core.c | 68 ++++++++++++++++++++++ 1 file changed, 68 insertions(+) diff --git a/tools/testing/selftests/cgroup/test_core.c b/tools/testing/selftests/cgroup/test_core.c index 3df648c37876..01b766506973 100644 --- a/tools/testing/selftests/cgroup/test_core.c +++ b/tools/testing/selftests/cgroup/test_core.c @@ -674,6 +674,73 @@ cleanup: return ret; } +/* + * cgroup migration permission check should be performed based on the + * credentials at the time of open instead of write. + */ +static int test_cgcore_lesser_euid_open(const char *root) +{ + const uid_t test_euid = 65534; /* usually nobody, any !root is fine */ + int ret = KSFT_FAIL; + char *cg_test_a = NULL, *cg_test_b = NULL; + char *cg_test_a_procs = NULL, *cg_test_b_procs = NULL; + int cg_test_b_procs_fd = -1; + uid_t saved_uid; + + cg_test_a = cg_name(root, "cg_test_a"); + cg_test_b = cg_name(root, "cg_test_b"); + + if (!cg_test_a || !cg_test_b) + goto cleanup; + + cg_test_a_procs = cg_name(cg_test_a, "cgroup.procs"); + cg_test_b_procs = cg_name(cg_test_b, "cgroup.procs"); + + if (!cg_test_a_procs || !cg_test_b_procs) + goto cleanup; + + if (cg_create(cg_test_a) || cg_create(cg_test_b)) + goto cleanup; + + if (cg_enter_current(cg_test_a)) + goto cleanup; + + if (chown(cg_test_a_procs, test_euid, -1) || + chown(cg_test_b_procs, test_euid, -1)) + goto cleanup; + + saved_uid = geteuid(); + if (seteuid(test_euid)) + goto cleanup; + + cg_test_b_procs_fd = open(cg_test_b_procs, O_RDWR); + + if (seteuid(saved_uid)) + goto cleanup; + + if (cg_test_b_procs_fd < 0) + goto cleanup; + + if (write(cg_test_b_procs_fd, "0", 1) >= 0 || errno != EACCES) + goto cleanup; + + ret = KSFT_PASS; + +cleanup: + cg_enter_current(root); + if (cg_test_b_procs_fd >= 0) + close(cg_test_b_procs_fd); + if (cg_test_b) + cg_destroy(cg_test_b); + if (cg_test_a) + cg_destroy(cg_test_a); + free(cg_test_b_procs); + free(cg_test_a_procs); + free(cg_test_b); + free(cg_test_a); + return ret; +} + #define T(x) { x, #x } struct corecg_test { int (*fn)(const char *root); @@ -689,6 +756,7 @@ struct corecg_test { T(test_cgcore_proc_migration), T(test_cgcore_thread_migration), T(test_cgcore_destroy), + T(test_cgcore_lesser_euid_open), }; #undef T From bf35a7879f1dfb0d050fe779168bcf25c7de66f5 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 6 Jan 2022 11:02:29 -1000 Subject: [PATCH 858/868] selftests: cgroup: Test open-time cgroup namespace usage for migration checks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a task is writing to an fd opened by a different task, the perm check should use the cgroup namespace of the latter task. Add a test for it. Tested-by: Michal Koutný Signed-off-by: Tejun Heo --- tools/testing/selftests/cgroup/test_core.c | 97 ++++++++++++++++++++++ 1 file changed, 97 insertions(+) diff --git a/tools/testing/selftests/cgroup/test_core.c b/tools/testing/selftests/cgroup/test_core.c index 01b766506973..600123503063 100644 --- a/tools/testing/selftests/cgroup/test_core.c +++ b/tools/testing/selftests/cgroup/test_core.c @@ -1,11 +1,14 @@ /* SPDX-License-Identifier: GPL-2.0 */ +#define _GNU_SOURCE #include +#include #include #include #include #include #include +#include #include #include #include @@ -741,6 +744,99 @@ cleanup: return ret; } +struct lesser_ns_open_thread_arg { + const char *path; + int fd; + int err; +}; + +static int lesser_ns_open_thread_fn(void *arg) +{ + struct lesser_ns_open_thread_arg *targ = arg; + + targ->fd = open(targ->path, O_RDWR); + targ->err = errno; + return 0; +} + +/* + * cgroup migration permission check should be performed based on the cgroup + * namespace at the time of open instead of write. + */ +static int test_cgcore_lesser_ns_open(const char *root) +{ + static char stack[65536]; + const uid_t test_euid = 65534; /* usually nobody, any !root is fine */ + int ret = KSFT_FAIL; + char *cg_test_a = NULL, *cg_test_b = NULL; + char *cg_test_a_procs = NULL, *cg_test_b_procs = NULL; + int cg_test_b_procs_fd = -1; + struct lesser_ns_open_thread_arg targ = { .fd = -1 }; + pid_t pid; + int status; + + cg_test_a = cg_name(root, "cg_test_a"); + cg_test_b = cg_name(root, "cg_test_b"); + + if (!cg_test_a || !cg_test_b) + goto cleanup; + + cg_test_a_procs = cg_name(cg_test_a, "cgroup.procs"); + cg_test_b_procs = cg_name(cg_test_b, "cgroup.procs"); + + if (!cg_test_a_procs || !cg_test_b_procs) + goto cleanup; + + if (cg_create(cg_test_a) || cg_create(cg_test_b)) + goto cleanup; + + if (cg_enter_current(cg_test_b)) + goto cleanup; + + if (chown(cg_test_a_procs, test_euid, -1) || + chown(cg_test_b_procs, test_euid, -1)) + goto cleanup; + + targ.path = cg_test_b_procs; + pid = clone(lesser_ns_open_thread_fn, stack + sizeof(stack), + CLONE_NEWCGROUP | CLONE_FILES | CLONE_VM | SIGCHLD, + &targ); + if (pid < 0) + goto cleanup; + + if (waitpid(pid, &status, 0) < 0) + goto cleanup; + + if (!WIFEXITED(status)) + goto cleanup; + + cg_test_b_procs_fd = targ.fd; + if (cg_test_b_procs_fd < 0) + goto cleanup; + + if (cg_enter_current(cg_test_a)) + goto cleanup; + + if ((status = write(cg_test_b_procs_fd, "0", 1)) >= 0 || errno != ENOENT) + goto cleanup; + + ret = KSFT_PASS; + +cleanup: + cg_enter_current(root); + if (cg_test_b_procs_fd >= 0) + close(cg_test_b_procs_fd); + if (cg_test_b) + cg_destroy(cg_test_b); + if (cg_test_a) + cg_destroy(cg_test_a); + free(cg_test_b_procs); + free(cg_test_a_procs); + free(cg_test_b); + free(cg_test_a); + return ret; +} + #define T(x) { x, #x } struct corecg_test { int (*fn)(const char *root); @@ -757,6 +853,7 @@ struct corecg_test { T(test_cgcore_thread_migration), T(test_cgcore_destroy), T(test_cgcore_lesser_euid_open), + T(test_cgcore_lesser_ns_open), }; #undef T From 597cb7968cb6243e915ba9599195656be14773e5 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Tue, 4 Jan 2022 22:41:03 -0800 Subject: [PATCH 859/868] KVM: SEV: Mark nested locking of kvm->lock Both source and dest vms' kvm->locks are held in sev_lock_two_vms. Mark one with a different subtype to avoid false positives from lockdep. Fixes: c9d61dcb0bc26 (KVM: SEV: accept signals in sev_lock_two_vms) Reported-by: Yiru Xu Tested-by: Jinrong Liang Signed-off-by: Wanpeng Li Message-Id: <1641364863-26331-1-git-send-email-wanpengli@tencent.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/sev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 7656a2c5662a..be2883141220 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -1565,7 +1565,7 @@ static int sev_lock_two_vms(struct kvm *dst_kvm, struct kvm *src_kvm) r = -EINTR; if (mutex_lock_killable(&dst_kvm->lock)) goto release_src; - if (mutex_lock_killable(&src_kvm->lock)) + if (mutex_lock_killable_nested(&src_kvm->lock, SINGLE_DEPTH_NESTING)) goto unlock_dst; return 0; From fffb5323780786c81ba005f8b8603d4a558aad28 Mon Sep 17 00:00:00 2001 From: Nikunj A Dadhania Date: Wed, 5 Jan 2022 09:33:37 +0530 Subject: [PATCH 860/868] KVM: x86: Check for rmaps allocation With TDP MMU being the default now, access to mmu_rmaps_stat debugfs file causes following oops: BUG: kernel NULL pointer dereference, address: 0000000000000000 PGD 0 P4D 0 Oops: 0000 [#1] PREEMPT SMP NOPTI CPU: 7 PID: 3185 Comm: cat Not tainted 5.16.0-rc4+ #204 RIP: 0010:pte_list_count+0x6/0x40 Call Trace: ? kvm_mmu_rmaps_stat_show+0x15e/0x320 seq_read_iter+0x126/0x4b0 ? aa_file_perm+0x124/0x490 seq_read+0xf5/0x140 full_proxy_read+0x5c/0x80 vfs_read+0x9f/0x1a0 ksys_read+0x67/0xe0 __x64_sys_read+0x19/0x20 do_syscall_64+0x3b/0xc0 entry_SYSCALL_64_after_hwframe+0x44/0xae RIP: 0033:0x7fca6fc13912 Return early when rmaps are not present. Reported-by: Vasant Hegde Tested-by: Vasant Hegde Signed-off-by: Nikunj A Dadhania Reviewed-by: Peter Xu Reviewed-by: Sean Christopherson Message-Id: <20220105040337.4234-1-nikunj@amd.com> Cc: stable@vger.kernel.org Fixes: 3bcd0662d66f ("KVM: X86: Introduce mmu_rmaps_stat per-vm debugfs file") Signed-off-by: Paolo Bonzini --- arch/x86/kvm/debugfs.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/kvm/debugfs.c b/arch/x86/kvm/debugfs.c index 54a83a744538..f33c804a922a 100644 --- a/arch/x86/kvm/debugfs.c +++ b/arch/x86/kvm/debugfs.c @@ -95,6 +95,9 @@ static int kvm_mmu_rmaps_stat_show(struct seq_file *m, void *v) unsigned int *log[KVM_NR_PAGE_SIZES], *cur; int i, j, k, l, ret; + if (!kvm_memslots_have_rmaps(kvm)) + return 0; + ret = -ENOMEM; memset(log, 0, sizeof(log)); for (i = 0; i < KVM_NR_PAGE_SIZES; i++) { From f06a82f9d31a87878a9295bac1defdadbc77bbc0 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 6 Jan 2022 23:20:30 +0100 Subject: [PATCH 861/868] perf trace: Avoid early exit due to running SIGCHLD handler before it makes sense to When running 'perf trace' with an BPF object like: # perf trace -e openat,tools/perf/examples/bpf/hello.c the event parsing eventually calls llvm__get_kbuild_opts() that runs a script and that ends up with SIGCHLD delivered to the 'perf trace' handler, which assumes the workload process is done and quits 'perf trace'. Move the SIGCHLD handler setup directly to trace__run(), where the event is parsed and the object is already compiled. Signed-off-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Christy Lee Cc: Ian Rogers Cc: Mark Rutland Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20220106222030.227499-1-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 0b52e08e558e..ef94388e8323 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -3925,6 +3925,7 @@ static int trace__run(struct trace *trace, int argc, const char **argv) bool draining = false; trace->live = true; + signal(SIGCHLD, sig_handler); if (!trace->raw_augmented_syscalls) { if (trace->trace_syscalls && trace__add_syscall_newtp(trace)) @@ -4873,7 +4874,6 @@ int cmd_trace(int argc, const char **argv) signal(SIGSEGV, sighandler_dump_stack); signal(SIGFPE, sighandler_dump_stack); - signal(SIGCHLD, sig_handler); signal(SIGINT, sig_handler); trace.evlist = evlist__new(); From dc9f2dd5de04d2bbcccbabdf5df9715c2ddcf25f Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 7 Jan 2022 16:02:54 -0300 Subject: [PATCH 862/868] Revert "libtraceevent: Increase libtraceevent logging when verbose" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 08efcb4a638d260ef7fcbae64ecf7ceceb3f1841. This breaks the build as it will prefer using libbpf-devel header files, even when not using LIBBPF_DYNAMIC=1, breaking the build. This was detected on OpenSuSE Tumbleweed with libtraceevent-devel 1.3.0, as described by Jiri Slaby: ======================================================================= It breaks build with LIBTRACEEVENT_DYNAMIC and version 1.3.0: > util/debug.c: In function ‘perf_debug_option’: > util/debug.c:243:17: error: implicit declaration of function ‘tep_set_loglevel’ [-Werror=implicit-function-declaration] > 243 | tep_set_loglevel(TEP_LOG_INFO); > | ^~~~~~~~~~~~~~~~ > util/debug.c:243:34: error: ‘TEP_LOG_INFO’ undeclared (first use in this function); did you mean ‘TEP_PRINT_INFO’? > 243 | tep_set_loglevel(TEP_LOG_INFO); > | ^~~~~~~~~~~~ > | TEP_PRINT_INFO > util/debug.c:243:34: note: each undeclared identifier is reported only once for each function it appears in > util/debug.c:245:34: error: ‘TEP_LOG_DEBUG’ undeclared (first use in this function) > 245 | tep_set_loglevel(TEP_LOG_DEBUG); > | ^~~~~~~~~~~~~ > util/debug.c:247:34: error: ‘TEP_LOG_ALL’ undeclared (first use in this function) > 247 | tep_set_loglevel(TEP_LOG_ALL); > | ^~~~~~~~~~~ It is because the gcc's command line looks like: gcc ... -I/home/abuild/rpmbuild/BUILD/tools/lib/ ... -DLIBTRACEEVENT_VERSION=65790 ... ======================================================================= The proper way to fix this is more involved and so not suitable for this late in the 5.16-rc stage. Reported-by: Jiri Slaby Link: https://lore.kernel.org/lkml/bc2b0786-8965-1bcd-2316-9d9bb37b9c31@kernel.org Cc: Andrii Nakryiko Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Cc: Song Liu Cc: Steven Rostedt Link: https://lore.kernel.org/lkml/YddGjjmlMZzxUZbN@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/debug.c | 19 ------------------- 1 file changed, 19 deletions(-) diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c index c7a9fa0ffae9..2c06abf6dcd2 100644 --- a/tools/perf/util/debug.c +++ b/tools/perf/util/debug.c @@ -24,16 +24,6 @@ #include "util/parse-sublevel-options.h" #include -#include - -#define MAKE_LIBTRACEEVENT_VERSION(a, b, c) ((a)*255*255+(b)*255+(c)) -#ifndef LIBTRACEEVENT_VERSION -/* - * If LIBTRACEEVENT_VERSION wasn't computed then set to version 1.1.0 that ships - * with the Linux kernel tools. - */ -#define LIBTRACEEVENT_VERSION MAKE_LIBTRACEEVENT_VERSION(1, 1, 0) -#endif int verbose; int debug_peo_args; @@ -238,15 +228,6 @@ int perf_debug_option(const char *str) /* Allow only verbose value in range (0, 10), otherwise set 0. */ verbose = (verbose < 0) || (verbose > 10) ? 0 : verbose; -#if MAKE_LIBTRACEEVENT_VERSION(1, 3, 0) <= LIBTRACEEVENT_VERSION - if (verbose == 1) - tep_set_loglevel(TEP_LOG_INFO); - else if (verbose == 2) - tep_set_loglevel(TEP_LOG_DEBUG); - else if (verbose >= 3) - tep_set_loglevel(TEP_LOG_ALL); -#endif - return 0; } From a19f75de73c220b4496d2aefb7a605dd032f7c01 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Thu, 6 Jan 2022 13:24:52 +0100 Subject: [PATCH 863/868] Revert "i2c: core: support bus regulator controlling in adapter" This largely reverts commit 5a7b95fb993ec399c8a685552aa6a8fc995c40bd. It breaks suspend with AMD GPUs, and we couldn't incrementally fix it. So, let's remove the code and go back to the drawing board. We keep the header extension to not break drivers already populating the regulator. We expect to re-add the code handling it soon. Fixes: 5a7b95fb993e ("i2c: core: support bus regulator controlling in adapter") Reported-by: "Tareque Md.Hanif" Link: https://lore.kernel.org/r/1295184560.182511.1639075777725@mail.yahoo.com Reported-by: Konstantin Kharlamov Link: https://lore.kernel.org/r/7143a7147978f4104171072d9f5225d2ce355ec1.camel@yandex.ru BugLink: https://gitlab.freedesktop.org/drm/amd/-/issues/1850 Tested-by: "Tareque Md.Hanif" Tested-by: Konstantin Kharlamov Signed-off-by: Wolfram Sang Cc: # 5.14+ --- drivers/i2c/i2c-core-base.c | 95 ------------------------------------- 1 file changed, 95 deletions(-) diff --git a/drivers/i2c/i2c-core-base.c b/drivers/i2c/i2c-core-base.c index f193f9058584..73253e667de1 100644 --- a/drivers/i2c/i2c-core-base.c +++ b/drivers/i2c/i2c-core-base.c @@ -466,14 +466,12 @@ static int i2c_smbus_host_notify_to_irq(const struct i2c_client *client) static int i2c_device_probe(struct device *dev) { struct i2c_client *client = i2c_verify_client(dev); - struct i2c_adapter *adap; struct i2c_driver *driver; int status; if (!client) return 0; - adap = client->adapter; client->irq = client->init_irq; if (!client->irq) { @@ -539,14 +537,6 @@ static int i2c_device_probe(struct device *dev) dev_dbg(dev, "probe\n"); - if (adap->bus_regulator) { - status = regulator_enable(adap->bus_regulator); - if (status < 0) { - dev_err(&adap->dev, "Failed to enable bus regulator\n"); - goto err_clear_wakeup_irq; - } - } - status = of_clk_set_defaults(dev->of_node, false); if (status < 0) goto err_clear_wakeup_irq; @@ -605,10 +595,8 @@ put_sync_adapter: static void i2c_device_remove(struct device *dev) { struct i2c_client *client = to_i2c_client(dev); - struct i2c_adapter *adap; struct i2c_driver *driver; - adap = client->adapter; driver = to_i2c_driver(dev->driver); if (driver->remove) { int status; @@ -623,8 +611,6 @@ static void i2c_device_remove(struct device *dev) devres_release_group(&client->dev, client->devres_group_id); dev_pm_domain_detach(&client->dev, !i2c_acpi_waive_d0_probe(dev)); - if (!pm_runtime_status_suspended(&client->dev) && adap->bus_regulator) - regulator_disable(adap->bus_regulator); dev_pm_clear_wake_irq(&client->dev); device_init_wakeup(&client->dev, false); @@ -634,86 +620,6 @@ static void i2c_device_remove(struct device *dev) pm_runtime_put(&client->adapter->dev); } -#ifdef CONFIG_PM_SLEEP -static int i2c_resume_early(struct device *dev) -{ - struct i2c_client *client = i2c_verify_client(dev); - int err; - - if (!client) - return 0; - - if (pm_runtime_status_suspended(&client->dev) && - client->adapter->bus_regulator) { - err = regulator_enable(client->adapter->bus_regulator); - if (err) - return err; - } - - return pm_generic_resume_early(&client->dev); -} - -static int i2c_suspend_late(struct device *dev) -{ - struct i2c_client *client = i2c_verify_client(dev); - int err; - - if (!client) - return 0; - - err = pm_generic_suspend_late(&client->dev); - if (err) - return err; - - if (!pm_runtime_status_suspended(&client->dev) && - client->adapter->bus_regulator) - return regulator_disable(client->adapter->bus_regulator); - - return 0; -} -#endif - -#ifdef CONFIG_PM -static int i2c_runtime_resume(struct device *dev) -{ - struct i2c_client *client = i2c_verify_client(dev); - int err; - - if (!client) - return 0; - - if (client->adapter->bus_regulator) { - err = regulator_enable(client->adapter->bus_regulator); - if (err) - return err; - } - - return pm_generic_runtime_resume(&client->dev); -} - -static int i2c_runtime_suspend(struct device *dev) -{ - struct i2c_client *client = i2c_verify_client(dev); - int err; - - if (!client) - return 0; - - err = pm_generic_runtime_suspend(&client->dev); - if (err) - return err; - - if (client->adapter->bus_regulator) - return regulator_disable(client->adapter->bus_regulator); - return 0; -} -#endif - -static const struct dev_pm_ops i2c_device_pm = { - SET_LATE_SYSTEM_SLEEP_PM_OPS(i2c_suspend_late, i2c_resume_early) - SET_RUNTIME_PM_OPS(i2c_runtime_suspend, i2c_runtime_resume, NULL) -}; - static void i2c_device_shutdown(struct device *dev) { struct i2c_client *client = i2c_verify_client(dev); @@ -773,7 +679,6 @@ struct bus_type i2c_bus_type = { .probe = i2c_device_probe, .remove = i2c_device_remove, .shutdown = i2c_device_shutdown, - .pm = &i2c_device_pm, }; EXPORT_SYMBOL_GPL(i2c_bus_type); From c8013355ead68dce152cf426686f8a5f80d88b40 Mon Sep 17 00:00:00 2001 From: Phil Elwell Date: Tue, 4 Jan 2022 18:02:47 +0100 Subject: [PATCH 864/868] ARM: dts: gpio-ranges property is now required Since [1], added in 5.7, the absence of a gpio-ranges property has prevented GPIOs from being restored to inputs when released. Add those properties for BCM283x and BCM2711 devices. [1] commit 2ab73c6d8323 ("gpio: Support GPIO controllers without pin-ranges") Link: https://lore.kernel.org/r/20220104170247.956760-1-linus.walleij@linaro.org Fixes: 2ab73c6d8323 ("gpio: Support GPIO controllers without pin-ranges") Fixes: 266423e60ea1 ("pinctrl: bcm2835: Change init order for gpio hogs") Reported-by: Stefan Wahren Reported-by: Florian Fainelli Reported-by: Jan Kiszka Signed-off-by: Phil Elwell Acked-by: Florian Fainelli Reviewed-by: Linus Walleij Link: https://lore.kernel.org/r/20211206092237.4105895-3-phil@raspberrypi.com Signed-off-by: Linus Walleij Acked-by: Florian Fainelli Signed-off-by: Olof Johansson --- arch/arm/boot/dts/bcm2711.dtsi | 2 ++ arch/arm/boot/dts/bcm283x.dtsi | 2 ++ 2 files changed, 4 insertions(+) diff --git a/arch/arm/boot/dts/bcm2711.dtsi b/arch/arm/boot/dts/bcm2711.dtsi index 9e01dbca4a01..dff18fc9a906 100644 --- a/arch/arm/boot/dts/bcm2711.dtsi +++ b/arch/arm/boot/dts/bcm2711.dtsi @@ -582,6 +582,8 @@ , ; + gpio-ranges = <&gpio 0 0 58>; + gpclk0_gpio49: gpclk0_gpio49 { pin-gpclk { pins = "gpio49"; diff --git a/arch/arm/boot/dts/bcm283x.dtsi b/arch/arm/boot/dts/bcm283x.dtsi index a3e06b680947..c113661a6668 100644 --- a/arch/arm/boot/dts/bcm283x.dtsi +++ b/arch/arm/boot/dts/bcm283x.dtsi @@ -126,6 +126,8 @@ interrupt-controller; #interrupt-cells = <2>; + gpio-ranges = <&gpio 0 0 54>; + /* Defines common pin muxing groups * * While each pin can have its mux selected From cf73ed894ee939d6706d65e0cd186e4a64e3af6d Mon Sep 17 00:00:00 2001 From: Nikita Travkin Date: Sat, 8 Jan 2022 23:19:19 -0800 Subject: [PATCH 865/868] Input: zinitix - make sure the IRQ is allocated before it gets enabled Since irq request is the last thing in the driver probe, it happens later than the input device registration. This means that there is a small time window where if the open method is called the driver will attempt to enable not yet available irq. Fix that by moving the irq request before the input device registration. Reviewed-by: Linus Walleij Fixes: 26822652c85e ("Input: add zinitix touchscreen driver") Signed-off-by: Nikita Travkin Link: https://lore.kernel.org/r/20220106072840.36851-2-nikita@trvn.ru Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/zinitix.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/input/touchscreen/zinitix.c b/drivers/input/touchscreen/zinitix.c index b8d901099378..1e70b8d2a8d7 100644 --- a/drivers/input/touchscreen/zinitix.c +++ b/drivers/input/touchscreen/zinitix.c @@ -488,6 +488,15 @@ static int zinitix_ts_probe(struct i2c_client *client) return error; } + error = devm_request_threaded_irq(&client->dev, client->irq, + NULL, zinitix_ts_irq_handler, + IRQF_ONESHOT | IRQF_NO_AUTOEN, + client->name, bt541); + if (error) { + dev_err(&client->dev, "Failed to request IRQ: %d\n", error); + return error; + } + error = zinitix_init_input_dev(bt541); if (error) { dev_err(&client->dev, @@ -513,15 +522,6 @@ static int zinitix_ts_probe(struct i2c_client *client) return -EINVAL; } - error = devm_request_threaded_irq(&client->dev, client->irq, - NULL, zinitix_ts_irq_handler, - IRQF_ONESHOT | IRQF_NO_AUTOEN, - client->name, bt541); - if (error) { - dev_err(&client->dev, "Failed to request IRQ: %d\n", error); - return error; - } - return 0; } From df5bc0aa7ff6e2e14cb75182b4eda20253c711d4 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Sun, 9 Jan 2022 13:11:37 -0500 Subject: [PATCH 866/868] Revert "drm/amdgpu: stop scheduler when calling hw_fini (v2)" This reverts commit f7d6779df642720e22bffd449e683bb8690bd3bf. This bisected regression has impacted suspend-resume stability since 5.15-rc1. It regressed -stable via 5.14.10. Link: https://bugzilla.kernel.org/show_bug.cgi?id=215315 Fixes: f7d6779df64 ("drm/amdgpu: stop scheduler when calling hw_fini (v2)") Cc: Guchun Chen Cc: Andrey Grodzovsky Cc: Christian Koenig Cc: Alex Deucher Cc: # 5.14+ Signed-off-by: Len Brown Signed-off-by: Linus Torvalds --- drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index 9afd11ca2709..45977a72b5dd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -547,9 +547,6 @@ void amdgpu_fence_driver_hw_fini(struct amdgpu_device *adev) if (!ring || !ring->fence_drv.initialized) continue; - if (!ring->no_scheduler) - drm_sched_stop(&ring->sched, NULL); - /* You can't wait for HW to signal if it's gone */ if (!drm_dev_is_unplugged(adev_to_drm(adev))) r = amdgpu_fence_wait_empty(ring); @@ -609,11 +606,6 @@ void amdgpu_fence_driver_hw_init(struct amdgpu_device *adev) if (!ring || !ring->fence_drv.initialized) continue; - if (!ring->no_scheduler) { - drm_sched_resubmit_jobs(&ring->sched); - drm_sched_start(&ring->sched, true); - } - /* enable the interrupt */ if (ring->fence_drv.irq_src) amdgpu_irq_get(adev, ring->fence_drv.irq_src, From df0cc57e057f18e44dac8e6c18aba47ab53202f9 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 9 Jan 2022 14:55:34 -0800 Subject: [PATCH 867/868] Linux 5.16 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 16d7f83ac368..08510230b42f 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 5 PATCHLEVEL = 16 SUBLEVEL = 0 -EXTRAVERSION = -rc8 +EXTRAVERSION = NAME = Gobble Gobble # *DOCUMENTATION* From 5f02ef741a785678930f3ff0a8b6b2b0ef1bb402 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Tue, 18 Jan 2022 04:34:43 -0500 Subject: [PATCH 868/868] KVM: VMX: switch blocked_vcpu_on_cpu_lock to raw spinlock blocked_vcpu_on_cpu_lock is taken from hard interrupt context (pi_wakeup_handler), therefore it cannot sleep. Switch it to a raw spinlock. Fixes: [41297.066254] BUG: scheduling while atomic: CPU 0/KVM/635218/0x00010001 [41297.066323] Preemption disabled at: [41297.066324] [] irq_enter_rcu+0xf/0x60 [41297.066339] Call Trace: [41297.066342] [41297.066346] dump_stack_lvl+0x34/0x44 [41297.066353] ? irq_enter_rcu+0xf/0x60 [41297.066356] __schedule_bug.cold+0x7d/0x8b [41297.066361] __schedule+0x439/0x5b0 [41297.066365] ? task_blocks_on_rt_mutex.constprop.0.isra.0+0x1b0/0x440 [41297.066369] schedule_rtlock+0x1e/0x40 [41297.066371] rtlock_slowlock_locked+0xf1/0x260 [41297.066374] rt_spin_lock+0x3b/0x60 [41297.066378] pi_wakeup_handler+0x31/0x90 [kvm_intel] [41297.066388] sysvec_kvm_posted_intr_wakeup_ipi+0x9d/0xd0 [41297.066392] [41297.066392] asm_sysvec_kvm_posted_intr_wakeup_ipi+0x12/0x20 ... Signed-off-by: Marcelo Tosatti Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/posted_intr.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/x86/kvm/vmx/posted_intr.c b/arch/x86/kvm/vmx/posted_intr.c index 1c94783b5a54..21ea58d25771 100644 --- a/arch/x86/kvm/vmx/posted_intr.c +++ b/arch/x86/kvm/vmx/posted_intr.c @@ -15,7 +15,7 @@ * can find which vCPU should be waken up. */ static DEFINE_PER_CPU(struct list_head, blocked_vcpu_on_cpu); -static DEFINE_PER_CPU(spinlock_t, blocked_vcpu_on_cpu_lock); +static DEFINE_PER_CPU(raw_spinlock_t, blocked_vcpu_on_cpu_lock); static inline struct pi_desc *vcpu_to_pi_desc(struct kvm_vcpu *vcpu) { @@ -121,9 +121,9 @@ static void __pi_post_block(struct kvm_vcpu *vcpu) new.control) != old.control); if (!WARN_ON_ONCE(vcpu->pre_pcpu == -1)) { - spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu)); + raw_spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu)); list_del(&vcpu->blocked_vcpu_list); - spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu)); + raw_spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu)); vcpu->pre_pcpu = -1; } } @@ -154,11 +154,11 @@ int pi_pre_block(struct kvm_vcpu *vcpu) local_irq_disable(); if (!WARN_ON_ONCE(vcpu->pre_pcpu != -1)) { vcpu->pre_pcpu = vcpu->cpu; - spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu)); + raw_spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu)); list_add_tail(&vcpu->blocked_vcpu_list, &per_cpu(blocked_vcpu_on_cpu, vcpu->pre_pcpu)); - spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu)); + raw_spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu)); } do { @@ -215,7 +215,7 @@ void pi_wakeup_handler(void) struct kvm_vcpu *vcpu; int cpu = smp_processor_id(); - spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, cpu)); + raw_spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, cpu)); list_for_each_entry(vcpu, &per_cpu(blocked_vcpu_on_cpu, cpu), blocked_vcpu_list) { struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu); @@ -223,13 +223,13 @@ void pi_wakeup_handler(void) if (pi_test_on(pi_desc) == 1) kvm_vcpu_kick(vcpu); } - spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, cpu)); + raw_spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, cpu)); } void __init pi_init_cpu(int cpu) { INIT_LIST_HEAD(&per_cpu(blocked_vcpu_on_cpu, cpu)); - spin_lock_init(&per_cpu(blocked_vcpu_on_cpu_lock, cpu)); + raw_spin_lock_init(&per_cpu(blocked_vcpu_on_cpu_lock, cpu)); } bool pi_has_pending_interrupt(struct kvm_vcpu *vcpu)