From ec738ca127d07ecac6afae36e2880341ec89150e Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 8 Feb 2023 17:02:30 +0100 Subject: [PATCH 001/173] mtd: spi-nor: fix memory leak when using debugfs_lookup() When calling debugfs_lookup() the result must have dput() called on it, otherwise the memory will leak over time. To solve this, remove the lookup and create the directory on the first device found, and then remove it when the module is unloaded. Cc: Tudor Ambarus Cc: Pratyush Yadav Cc: Miquel Raynal Cc: Richard Weinberger Cc: Vignesh Raghavendra Cc: linux-mtd@lists.infradead.org Reviewed-by: Michael Walle Link: https://lore.kernel.org/r/20230208160230.2179905-1-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- drivers/mtd/spi-nor/core.c | 14 +++++++++++++- drivers/mtd/spi-nor/core.h | 2 ++ drivers/mtd/spi-nor/debugfs.c | 11 ++++++++--- 3 files changed, 23 insertions(+), 4 deletions(-) diff --git a/drivers/mtd/spi-nor/core.c b/drivers/mtd/spi-nor/core.c index 0a78045ca1d9..522d375aeccf 100644 --- a/drivers/mtd/spi-nor/core.c +++ b/drivers/mtd/spi-nor/core.c @@ -3343,7 +3343,19 @@ static struct spi_mem_driver spi_nor_driver = { .remove = spi_nor_remove, .shutdown = spi_nor_shutdown, }; -module_spi_mem_driver(spi_nor_driver); + +static int __init spi_nor_module_init(void) +{ + return spi_mem_driver_register(&spi_nor_driver); +} +module_init(spi_nor_module_init); + +static void __exit spi_nor_module_exit(void) +{ + spi_mem_driver_unregister(&spi_nor_driver); + spi_nor_debugfs_shutdown(); +} +module_exit(spi_nor_module_exit); MODULE_LICENSE("GPL v2"); MODULE_AUTHOR("Huang Shijie "); diff --git a/drivers/mtd/spi-nor/core.h b/drivers/mtd/spi-nor/core.h index 25423225c29d..e0cc42a4a0c8 100644 --- a/drivers/mtd/spi-nor/core.h +++ b/drivers/mtd/spi-nor/core.h @@ -711,8 +711,10 @@ static inline struct spi_nor *mtd_to_spi_nor(struct mtd_info *mtd) #ifdef CONFIG_DEBUG_FS void spi_nor_debugfs_register(struct spi_nor *nor); +void spi_nor_debugfs_shutdown(void); #else static inline void spi_nor_debugfs_register(struct spi_nor *nor) {} +static inline void spi_nor_debugfs_shutdown(void) {} #endif #endif /* __LINUX_MTD_SPI_NOR_INTERNAL_H */ diff --git a/drivers/mtd/spi-nor/debugfs.c b/drivers/mtd/spi-nor/debugfs.c index 845b78c7ecc7..fc7ad203df12 100644 --- a/drivers/mtd/spi-nor/debugfs.c +++ b/drivers/mtd/spi-nor/debugfs.c @@ -226,13 +226,13 @@ static void spi_nor_debugfs_unregister(void *data) nor->debugfs_root = NULL; } +static struct dentry *rootdir; + void spi_nor_debugfs_register(struct spi_nor *nor) { - struct dentry *rootdir, *d; + struct dentry *d; int ret; - /* Create rootdir once. Will never be deleted again. */ - rootdir = debugfs_lookup(SPI_NOR_DEBUGFS_ROOT, NULL); if (!rootdir) rootdir = debugfs_create_dir(SPI_NOR_DEBUGFS_ROOT, NULL); @@ -247,3 +247,8 @@ void spi_nor_debugfs_register(struct spi_nor *nor) debugfs_create_file("capabilities", 0444, d, nor, &spi_nor_capabilities_fops); } + +void spi_nor_debugfs_shutdown(void) +{ + debugfs_remove(rootdir); +} From c0ad453e94e5c404efbcf668648d07eaa1a71ed7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ar=C4=B1n=C3=A7=20=C3=9CNAL?= Date: Sat, 18 Feb 2023 09:51:06 +0300 Subject: [PATCH 002/173] pinctrl: mediatek: add missing options to PINCTRL_MT7981 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There are options missing from PINCTRL_MT7981 whilst being on every other pin controller. Add them. Signed-off-by: Arınç ÜNAL Acked-by: Daniel Golle Link: https://lore.kernel.org/r/20230218065108.8958-1-arinc.unal@arinc9.com Signed-off-by: Linus Walleij --- drivers/pinctrl/mediatek/Kconfig | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/pinctrl/mediatek/Kconfig b/drivers/pinctrl/mediatek/Kconfig index f20c28334bcb..67818ba14d4e 100644 --- a/drivers/pinctrl/mediatek/Kconfig +++ b/drivers/pinctrl/mediatek/Kconfig @@ -130,6 +130,8 @@ config PINCTRL_MT7622 config PINCTRL_MT7981 bool "Mediatek MT7981 pin control" depends on OF + depends on ARM64 || COMPILE_TEST + default ARM64 && ARCH_MEDIATEK select PINCTRL_MTK_MOORE config PINCTRL_MT7986 From 6de67ca4dab7d855ef9598cd894cd7dfa4077f96 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ar=C4=B1n=C3=A7=20=C3=9CNAL?= Date: Sat, 18 Feb 2023 09:51:07 +0300 Subject: [PATCH 003/173] pinctrl: mediatek: fix naming inconsistency MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some options include "MediaTek", some "Mediatek". Rename all to "MediaTek" to address the naming inconsistency. Signed-off-by: Arınç ÜNAL Reviewed-by: Daniel Golle Link: https://lore.kernel.org/r/20230218065108.8958-2-arinc.unal@arinc9.com Signed-off-by: Linus Walleij --- drivers/pinctrl/mediatek/Kconfig | 42 ++++++++++++++++---------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/drivers/pinctrl/mediatek/Kconfig b/drivers/pinctrl/mediatek/Kconfig index 67818ba14d4e..a71874fed3d6 100644 --- a/drivers/pinctrl/mediatek/Kconfig +++ b/drivers/pinctrl/mediatek/Kconfig @@ -45,35 +45,35 @@ config PINCTRL_MTK_PARIS # For ARMv7 SoCs config PINCTRL_MT2701 - bool "Mediatek MT2701 pin control" + bool "MediaTek MT2701 pin control" depends on MACH_MT7623 || MACH_MT2701 || COMPILE_TEST depends on OF default MACH_MT2701 select PINCTRL_MTK config PINCTRL_MT7623 - bool "Mediatek MT7623 pin control with generic binding" + bool "MediaTek MT7623 pin control with generic binding" depends on MACH_MT7623 || COMPILE_TEST depends on OF default MACH_MT7623 select PINCTRL_MTK_MOORE config PINCTRL_MT7629 - bool "Mediatek MT7629 pin control" + bool "MediaTek MT7629 pin control" depends on MACH_MT7629 || COMPILE_TEST depends on OF default MACH_MT7629 select PINCTRL_MTK_MOORE config PINCTRL_MT8135 - bool "Mediatek MT8135 pin control" + bool "MediaTek MT8135 pin control" depends on MACH_MT8135 || COMPILE_TEST depends on OF default MACH_MT8135 select PINCTRL_MTK config PINCTRL_MT8127 - bool "Mediatek MT8127 pin control" + bool "MediaTek MT8127 pin control" depends on MACH_MT8127 || COMPILE_TEST depends on OF default MACH_MT8127 @@ -88,33 +88,33 @@ config PINCTRL_MT2712 select PINCTRL_MTK config PINCTRL_MT6765 - tristate "Mediatek MT6765 pin control" + tristate "MediaTek MT6765 pin control" depends on OF depends on ARM64 || COMPILE_TEST default ARM64 && ARCH_MEDIATEK select PINCTRL_MTK_PARIS config PINCTRL_MT6779 - tristate "Mediatek MT6779 pin control" + tristate "MediaTek MT6779 pin control" depends on OF depends on ARM64 || COMPILE_TEST default ARM64 && ARCH_MEDIATEK select PINCTRL_MTK_PARIS help Say yes here to support pin controller and gpio driver - on Mediatek MT6779 SoC. + on MediaTek MT6779 SoC. In MTK platform, we support virtual gpio and use it to map specific eint which doesn't have real gpio pin. config PINCTRL_MT6795 - bool "Mediatek MT6795 pin control" + bool "MediaTek MT6795 pin control" depends on OF depends on ARM64 || COMPILE_TEST default ARM64 && ARCH_MEDIATEK select PINCTRL_MTK_PARIS config PINCTRL_MT6797 - bool "Mediatek MT6797 pin control" + bool "MediaTek MT6797 pin control" depends on OF depends on ARM64 || COMPILE_TEST default ARM64 && ARCH_MEDIATEK @@ -128,42 +128,42 @@ config PINCTRL_MT7622 select PINCTRL_MTK_MOORE config PINCTRL_MT7981 - bool "Mediatek MT7981 pin control" + bool "MediaTek MT7981 pin control" depends on OF depends on ARM64 || COMPILE_TEST default ARM64 && ARCH_MEDIATEK select PINCTRL_MTK_MOORE config PINCTRL_MT7986 - bool "Mediatek MT7986 pin control" + bool "MediaTek MT7986 pin control" depends on OF depends on ARM64 || COMPILE_TEST default ARM64 && ARCH_MEDIATEK select PINCTRL_MTK_MOORE config PINCTRL_MT8167 - bool "Mediatek MT8167 pin control" + bool "MediaTek MT8167 pin control" depends on OF depends on ARM64 || COMPILE_TEST default ARM64 && ARCH_MEDIATEK select PINCTRL_MTK config PINCTRL_MT8173 - bool "Mediatek MT8173 pin control" + bool "MediaTek MT8173 pin control" depends on OF depends on ARM64 || COMPILE_TEST default ARM64 && ARCH_MEDIATEK select PINCTRL_MTK config PINCTRL_MT8183 - bool "Mediatek MT8183 pin control" + bool "MediaTek MT8183 pin control" depends on OF depends on ARM64 || COMPILE_TEST default ARM64 && ARCH_MEDIATEK select PINCTRL_MTK_PARIS config PINCTRL_MT8186 - bool "Mediatek MT8186 pin control" + bool "MediaTek MT8186 pin control" depends on OF depends on ARM64 || COMPILE_TEST default ARM64 && ARCH_MEDIATEK @@ -182,28 +182,28 @@ config PINCTRL_MT8188 map specific eint which doesn't have real gpio pin. config PINCTRL_MT8192 - bool "Mediatek MT8192 pin control" + bool "MediaTek MT8192 pin control" depends on OF depends on ARM64 || COMPILE_TEST default ARM64 && ARCH_MEDIATEK select PINCTRL_MTK_PARIS config PINCTRL_MT8195 - bool "Mediatek MT8195 pin control" + bool "MediaTek MT8195 pin control" depends on OF depends on ARM64 || COMPILE_TEST default ARM64 && ARCH_MEDIATEK select PINCTRL_MTK_PARIS config PINCTRL_MT8365 - bool "Mediatek MT8365 pin control" + bool "MediaTek MT8365 pin control" depends on OF depends on ARM64 || COMPILE_TEST default ARM64 && ARCH_MEDIATEK select PINCTRL_MTK config PINCTRL_MT8516 - bool "Mediatek MT8516 pin control" + bool "MediaTek MT8516 pin control" depends on OF depends on ARM64 || COMPILE_TEST default ARM64 && ARCH_MEDIATEK @@ -211,7 +211,7 @@ config PINCTRL_MT8516 # For PMIC config PINCTRL_MT6397 - bool "Mediatek MT6397 pin control" + bool "MediaTek MT6397 pin control" depends on MFD_MT6397 || COMPILE_TEST depends on OF default MFD_MT6397 From 7bb97e360acdd38b68ad0a1defb89c6e89c85596 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 24 Feb 2023 14:08:28 +0100 Subject: [PATCH 004/173] pinctrl: at91-pio4: fix domain name assignment Since commit d59f6617eef0 ("genirq: Allow fwnode to carry name information only") an IRQ domain is always given a name during allocation (e.g. used for the debugfs entry). Drop the no longer valid name assignment, which would lead to an attempt to free a string constant when removing the domain on late probe failures (e.g. probe deferral). Fixes: d59f6617eef0 ("genirq: Allow fwnode to carry name information only") Cc: stable@vger.kernel.org # 4.13 Signed-off-by: Johan Hovold Reviewed-by: Claudiu Beznea Tested-by: Claudiu Beznea # on SAMA7G5 Link: https://lore.kernel.org/r/20230224130828.27985-1-johan+linaro@kernel.org Signed-off-by: Linus Walleij --- drivers/pinctrl/pinctrl-at91-pio4.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/pinctrl/pinctrl-at91-pio4.c b/drivers/pinctrl/pinctrl-at91-pio4.c index 373eed8bc4be..c775d239444a 100644 --- a/drivers/pinctrl/pinctrl-at91-pio4.c +++ b/drivers/pinctrl/pinctrl-at91-pio4.c @@ -1206,7 +1206,6 @@ static int atmel_pinctrl_probe(struct platform_device *pdev) dev_err(dev, "can't add the irq domain\n"); return -ENODEV; } - atmel_pioctrl->irq_domain->name = "atmel gpio"; for (i = 0; i < atmel_pioctrl->npins; i++) { int irq = irq_create_mapping(atmel_pioctrl->irq_domain, i); From 657fd9da2d4b4aa0a384105b236baa22fa0233bf Mon Sep 17 00:00:00 2001 From: Horatiu Vultur Date: Mon, 6 Feb 2023 21:37:20 +0100 Subject: [PATCH 005/173] pinctrl: ocelot: Fix alt mode for ocelot In case the driver was trying to set an alternate mode for gpio 0 or 32 then the mode was not set correctly. The reason is that there is computation error inside the function ocelot_pinmux_set_mux because in this case it was trying to shift to left by -1. Fix this by actually shifting the function bits and not the position. Fixes: 4b36082e2e09 ("pinctrl: ocelot: fix pinmuxing for pins after 31") Signed-off-by: Horatiu Vultur Link: https://lore.kernel.org/r/20230206203720.1177718-1-horatiu.vultur@microchip.com Signed-off-by: Linus Walleij --- drivers/pinctrl/pinctrl-ocelot.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pinctrl/pinctrl-ocelot.c b/drivers/pinctrl/pinctrl-ocelot.c index 29e4a6282a64..1dcbd0937ef5 100644 --- a/drivers/pinctrl/pinctrl-ocelot.c +++ b/drivers/pinctrl/pinctrl-ocelot.c @@ -1204,7 +1204,7 @@ static int ocelot_pinmux_set_mux(struct pinctrl_dev *pctldev, regmap_update_bits(info->map, REG_ALT(0, info, pin->pin), BIT(p), f << p); regmap_update_bits(info->map, REG_ALT(1, info, pin->pin), - BIT(p), f << (p - 1)); + BIT(p), (f >> 1) << p); return 0; } From 913a956c4363c3b5fd13d3a00836fad4c46646a7 Mon Sep 17 00:00:00 2001 From: Dario Binacchi Date: Mon, 27 Feb 2023 21:51:31 +0100 Subject: [PATCH 006/173] pinctrl: stm32: use dynamic allocation of GPIO base Since commit 502df79b860563d7 ("gpiolib: Warn on drivers still using static gpiobase allocation"), one or more warnings are printed during boot on systems where static allocation of GPIO base is used: [ 0.197707] gpio gpiochip0: Static allocation of GPIO base is deprecated, use dynamic allocation. [ 0.199942] stm32f429-pinctrl soc:pinctrl@40020000: GPIOA bank added [ 0.200711] gpio gpiochip1: Static allocation of GPIO base is deprecated, use dynamic allocation. [ 0.202855] stm32f429-pinctrl soc:pinctrl@40020000: GPIOB bank added [ 0.203591] gpio gpiochip2: Static allocation of GPIO base is deprecated, use dynamic allocation. [ 0.205704] stm32f429-pinctrl soc:pinctrl@40020000: GPIOC bank added [ 0.206338] gpio gpiochip3: Static allocation of GPIO base is deprecated, use dynamic allocation. [ 0.208448] stm32f429-pinctrl soc:pinctrl@40020000: GPIOD bank added [ 0.209182] gpio gpiochip4: Static allocation of GPIO base is deprecated, use dynamic allocation. [ 0.211282] stm32f429-pinctrl soc:pinctrl@40020000: GPIOE bank added [ 0.212094] gpio gpiochip5: Static allocation of GPIO base is deprecated, use dynamic allocation. [ 0.214270] stm32f429-pinctrl soc:pinctrl@40020000: GPIOF bank added [ 0.215005] gpio gpiochip6: Static allocation of GPIO base is deprecated, use dynamic allocation. [ 0.217110] stm32f429-pinctrl soc:pinctrl@40020000: GPIOG bank added [ 0.217845] gpio gpiochip7: Static allocation of GPIO base is deprecated, use dynamic allocation. [ 0.219959] stm32f429-pinctrl soc:pinctrl@40020000: GPIOH bank added [ 0.220602] gpio gpiochip8: Static allocation of GPIO base is deprecated, use dynamic allocation. [ 0.222714] stm32f429-pinctrl soc:pinctrl@40020000: GPIOI bank added [ 0.223483] gpio gpiochip9: Static allocation of GPIO base is deprecated, use dynamic allocation. [ 0.225594] stm32f429-pinctrl soc:pinctrl@40020000: GPIOJ bank added [ 0.226336] gpio gpiochip10: Static allocation of GPIO base is deprecated, use dynamic allocation. [ 0.228490] stm32f429-pinctrl soc:pinctrl@40020000: GPIOK bank added So let's follow the suggestion and use dynamic allocation. Tested on STM32F429I-DISC1 board. Signed-off-by: Dario Binacchi Link: https://lore.kernel.org/r/20230227205131.2104082-1-dario.binacchi@amarulasolutions.com Signed-off-by: Linus Walleij --- drivers/pinctrl/stm32/pinctrl-stm32.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pinctrl/stm32/pinctrl-stm32.c b/drivers/pinctrl/stm32/pinctrl-stm32.c index cb33a23ab0c1..04ace4c7bd58 100644 --- a/drivers/pinctrl/stm32/pinctrl-stm32.c +++ b/drivers/pinctrl/stm32/pinctrl-stm32.c @@ -1330,7 +1330,7 @@ static int stm32_gpiolib_register_bank(struct stm32_pinctrl *pctl, struct fwnode if (fwnode_property_read_u32(fwnode, "st,bank-ioport", &bank_ioport_nr)) bank_ioport_nr = bank_nr; - bank->gpio_chip.base = bank_nr * STM32_GPIO_PINS_PER_BANK; + bank->gpio_chip.base = -1; bank->gpio_chip.ngpio = npins; bank->gpio_chip.fwnode = fwnode; From 1eb65c8687316c65140b48fad27133d583178e15 Mon Sep 17 00:00:00 2001 From: Mohammed Gamal Date: Fri, 17 Feb 2023 22:44:11 +0200 Subject: [PATCH 007/173] Drivers: vmbus: Check for channel allocation before looking up relids relid2channel() assumes vmbus channel array to be allocated when called. However, in cases such as kdump/kexec, not all relids will be reset by the host. When the second kernel boots and if the guest receives a vmbus interrupt during vmbus driver initialization before vmbus_connect() is called, before it finishes, or if it fails, the vmbus interrupt service routine is called which in turn calls relid2channel() and can cause a null pointer dereference. Print a warning and error out in relid2channel() for a channel id that's invalid in the second kernel. Fixes: 8b6a877c060e ("Drivers: hv: vmbus: Replace the per-CPU channel lists with a global array of channels") Signed-off-by: Mohammed Gamal Reviewed-by: Dexuan Cui Link: https://lore.kernel.org/r/20230217204411.212709-1-mgamal@redhat.com Signed-off-by: Wei Liu --- drivers/hv/connection.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/hv/connection.c b/drivers/hv/connection.c index 9dc27e5d367a..da51b50787df 100644 --- a/drivers/hv/connection.c +++ b/drivers/hv/connection.c @@ -409,6 +409,10 @@ void vmbus_disconnect(void) */ struct vmbus_channel *relid2channel(u32 relid) { + if (vmbus_connection.channels == NULL) { + pr_warn_once("relid2channel: relid=%d: No channels mapped!\n", relid); + return NULL; + } if (WARN_ON(relid >= MAX_CHANNEL_RELIDS)) return NULL; return READ_ONCE(vmbus_connection.channels[relid]); From 44ac5abac86b20856e6d9e5e5e40dcc2623fe330 Mon Sep 17 00:00:00 2001 From: Vegard Nossum Date: Sun, 5 Mar 2023 23:00:04 +0100 Subject: [PATCH 008/173] Documentation/security-bugs: move from admin-guide/ to process/ Jiri Kosina, Jonathan Corbet, and Willy Tarreau all expressed a desire to move this document under process/. Create a new section for security issues in the index and group it with embargoed-hardware-issues. I'm doing this at the start of the series to make all the subsequent changes show up in 'git blame'. Existing references were updated using: git grep -l security-bugs ':!Documentation/translations/' | xargs sed -i 's|admin-guide/security-bugs|process/security-bugs|g' git grep -l security-bugs Documentation/translations/ | xargs sed -i 's|Documentation/admin-guide/security-bugs|Documentation/process/security-bugs|g' git grep -l security-bugs Documentation/translations/ | xargs sed -i '/Original:/s|\.\./admin-guide/security-bugs|\.\./process/security-bugs|g' Notably, the page is not moved in the translations (due to my lack of knowledge of these languages), but the translations have been updated to point to the new location of the original document where these references exist. Link: https://lore.kernel.org/all/nycvar.YFH.7.76.2206062326230.10851@cbobk.fhfr.pm/ Suggested-by: Jiri Kosina Cc: Alex Shi Cc: Yanteng Si Cc: Hu Haowen Cc: Federico Vaga Cc: Tsugikazu Shibata Cc: Minchan Kim Cc: Jeimi Lee Cc: Carlos Bilbao Cc: Akira Yokosawa Signed-off-by: Vegard Nossum Acked-by: Carlos Bilbao Reviewed-by: Yanteng Si Reviewed-by: Akira Yokosawa Acked-by: Federico Vaga Reviewed-by: Bagas Sanjaya Link: https://lore.kernel.org/r/20230305220010.20895-2-vegard.nossum@oracle.com Signed-off-by: Greg Kroah-Hartman --- Documentation/admin-guide/index.rst | 1 - Documentation/admin-guide/reporting-issues.rst | 4 ++-- Documentation/process/howto.rst | 2 +- Documentation/process/index.rst | 9 ++++++++- Documentation/process/researcher-guidelines.rst | 2 +- Documentation/{admin-guide => process}/security-bugs.rst | 0 Documentation/process/stable-kernel-rules.rst | 2 +- Documentation/process/submitting-patches.rst | 2 +- .../translations/it_IT/admin-guide/security-bugs.rst | 2 +- .../translations/it_IT/process/submitting-patches.rst | 2 +- Documentation/translations/ja_JP/howto.rst | 2 +- Documentation/translations/ko_KR/howto.rst | 2 +- Documentation/translations/sp_SP/howto.rst | 2 +- .../translations/sp_SP/process/submitting-patches.rst | 2 +- .../translations/zh_CN/admin-guide/security-bugs.rst | 2 +- Documentation/translations/zh_CN/process/howto.rst | 2 +- .../translations/zh_TW/admin-guide/security-bugs.rst | 2 +- Documentation/translations/zh_TW/process/howto.rst | 2 +- MAINTAINERS | 4 ++-- 19 files changed, 26 insertions(+), 20 deletions(-) rename Documentation/{admin-guide => process}/security-bugs.rst (100%) diff --git a/Documentation/admin-guide/index.rst b/Documentation/admin-guide/index.rst index 0ad7e7ec0d27..09a563bbe3e7 100644 --- a/Documentation/admin-guide/index.rst +++ b/Documentation/admin-guide/index.rst @@ -36,7 +36,6 @@ problems and bugs in particular. reporting-issues reporting-regressions - security-bugs bug-hunting bug-bisect tainted-kernels diff --git a/Documentation/admin-guide/reporting-issues.rst b/Documentation/admin-guide/reporting-issues.rst index ec62151fe672..2fd5a030235a 100644 --- a/Documentation/admin-guide/reporting-issues.rst +++ b/Documentation/admin-guide/reporting-issues.rst @@ -395,7 +395,7 @@ might want to be aware of; it for example explains how to add your issue to the list of tracked regressions, to ensure it won't fall through the cracks. What qualifies as security issue is left to your judgment. Consider reading -Documentation/admin-guide/security-bugs.rst before proceeding, as it +Documentation/process/security-bugs.rst before proceeding, as it provides additional details how to best handle security issues. An issue is a 'really severe problem' when something totally unacceptably bad @@ -1269,7 +1269,7 @@ them when sending the report by mail. If you filed it in a bug tracker, forward the report's text to these addresses; but on top of it put a small note where you mention that you filed it with a link to the ticket. -See Documentation/admin-guide/security-bugs.rst for more information. +See Documentation/process/security-bugs.rst for more information. Duties after the report went out diff --git a/Documentation/process/howto.rst b/Documentation/process/howto.rst index cb6abcb2b6d0..deb8235e20ff 100644 --- a/Documentation/process/howto.rst +++ b/Documentation/process/howto.rst @@ -138,7 +138,7 @@ required reading: philosophy and is very important for people moving to Linux from development on other Operating Systems. - :ref:`Documentation/admin-guide/security-bugs.rst ` + :ref:`Documentation/process/security-bugs.rst ` If you feel you have found a security problem in the Linux kernel, please follow the steps in this document to help notify the kernel developers, and help solve the issue. diff --git a/Documentation/process/index.rst b/Documentation/process/index.rst index d4b6217472b0..565df595152e 100644 --- a/Documentation/process/index.rst +++ b/Documentation/process/index.rst @@ -35,6 +35,14 @@ Below are the essential guides that every developer should read. kernel-enforcement-statement kernel-driver-statement +For security issues, see: + +.. toctree:: + :maxdepth: 1 + + security-bugs + embargoed-hardware-issues + Other guides to the community that are of interest to most developers are: .. toctree:: @@ -47,7 +55,6 @@ Other guides to the community that are of interest to most developers are: submit-checklist kernel-docs deprecated - embargoed-hardware-issues maintainers researcher-guidelines diff --git a/Documentation/process/researcher-guidelines.rst b/Documentation/process/researcher-guidelines.rst index afc944e0e898..9fcfed3c350b 100644 --- a/Documentation/process/researcher-guidelines.rst +++ b/Documentation/process/researcher-guidelines.rst @@ -68,7 +68,7 @@ Before contributing, carefully read the appropriate documentation: * Documentation/process/development-process.rst * Documentation/process/submitting-patches.rst * Documentation/admin-guide/reporting-issues.rst -* Documentation/admin-guide/security-bugs.rst +* Documentation/process/security-bugs.rst Then send a patch (including a commit log with all the details listed below) and follow up on any feedback from other developers. diff --git a/Documentation/admin-guide/security-bugs.rst b/Documentation/process/security-bugs.rst similarity index 100% rename from Documentation/admin-guide/security-bugs.rst rename to Documentation/process/security-bugs.rst diff --git a/Documentation/process/stable-kernel-rules.rst b/Documentation/process/stable-kernel-rules.rst index 2fd8aa593a28..51df1197d5ab 100644 --- a/Documentation/process/stable-kernel-rules.rst +++ b/Documentation/process/stable-kernel-rules.rst @@ -39,7 +39,7 @@ Procedure for submitting patches to the -stable tree Security patches should not be handled (solely) by the -stable review process but should follow the procedures in - :ref:`Documentation/admin-guide/security-bugs.rst `. + :ref:`Documentation/process/security-bugs.rst `. For all other submissions, choose one of the following procedures ----------------------------------------------------------------- diff --git a/Documentation/process/submitting-patches.rst b/Documentation/process/submitting-patches.rst index eac7167dce83..7b223f306efa 100644 --- a/Documentation/process/submitting-patches.rst +++ b/Documentation/process/submitting-patches.rst @@ -254,7 +254,7 @@ If you have a patch that fixes an exploitable security bug, send that patch to security@kernel.org. For severe bugs, a short embargo may be considered to allow distributors to get the patch out to users; in such cases, obviously, the patch should not be sent to any public lists. See also -Documentation/admin-guide/security-bugs.rst. +Documentation/process/security-bugs.rst. Patches that fix a severe bug in a released kernel should be directed toward the stable maintainers by putting a line like this:: diff --git a/Documentation/translations/it_IT/admin-guide/security-bugs.rst b/Documentation/translations/it_IT/admin-guide/security-bugs.rst index 18a5822c7d9a..20994f4bfa31 100644 --- a/Documentation/translations/it_IT/admin-guide/security-bugs.rst +++ b/Documentation/translations/it_IT/admin-guide/security-bugs.rst @@ -1,6 +1,6 @@ .. include:: ../disclaimer-ita.rst -:Original: :ref:`Documentation/admin-guide/security-bugs.rst ` +:Original: :ref:`Documentation/process/security-bugs.rst ` .. _it_securitybugs: diff --git a/Documentation/translations/it_IT/process/submitting-patches.rst b/Documentation/translations/it_IT/process/submitting-patches.rst index c2cfa0948b2b..167fce813032 100644 --- a/Documentation/translations/it_IT/process/submitting-patches.rst +++ b/Documentation/translations/it_IT/process/submitting-patches.rst @@ -272,7 +272,7 @@ embargo potrebbe essere preso in considerazione per dare il tempo alle distribuzioni di prendere la patch e renderla disponibile ai loro utenti; in questo caso, ovviamente, la patch non dovrebbe essere inviata su alcuna lista di discussione pubblica. Leggete anche -Documentation/admin-guide/security-bugs.rst. +Documentation/process/security-bugs.rst. Patch che correggono bachi importanti su un kernel già rilasciato, dovrebbero essere inviate ai manutentori dei kernel stabili aggiungendo la seguente riga:: diff --git a/Documentation/translations/ja_JP/howto.rst b/Documentation/translations/ja_JP/howto.rst index 9b0b3436dfcf..8d856ebe873c 100644 --- a/Documentation/translations/ja_JP/howto.rst +++ b/Documentation/translations/ja_JP/howto.rst @@ -167,7 +167,7 @@ linux-api@vger.kernel.org に送ることを勧めます。 このドキュメントは Linux 開発の思想を理解するのに非常に重要です。 そして、他のOSでの開発者が Linux に移る時にとても重要です。 - :ref:`Documentation/admin-guide/security-bugs.rst ` + :ref:`Documentation/process/security-bugs.rst ` もし Linux カーネルでセキュリティ問題を発見したように思ったら、こ のドキュメントのステップに従ってカーネル開発者に連絡し、問題解決を 支援してください。 diff --git a/Documentation/translations/ko_KR/howto.rst b/Documentation/translations/ko_KR/howto.rst index 969e91a95bb0..34f14899c155 100644 --- a/Documentation/translations/ko_KR/howto.rst +++ b/Documentation/translations/ko_KR/howto.rst @@ -157,7 +157,7 @@ mtk.manpages@gmail.com의 메인테이너에게 보낼 것을 권장한다. 리눅스로 전향하는 사람들에게는 매우 중요하다. - :ref:`Documentation/admin-guide/security-bugs.rst ` + :ref:`Documentation/process/security-bugs.rst ` 여러분들이 리눅스 커널의 보안 문제를 발견했다고 생각한다면 이 문서에 나온 단계에 따라서 커널 개발자들에게 알리고 그 문제를 해결할 수 있도록 도와 달라. diff --git a/Documentation/translations/sp_SP/howto.rst b/Documentation/translations/sp_SP/howto.rst index f9818d687b54..f1629738b49d 100644 --- a/Documentation/translations/sp_SP/howto.rst +++ b/Documentation/translations/sp_SP/howto.rst @@ -135,7 +135,7 @@ de obligada lectura: de Linux y es muy importante para las personas que se mudan a Linux tras desarrollar otros sistemas operativos. - :ref:`Documentation/admin-guide/security-bugs.rst ` + :ref:`Documentation/process/security-bugs.rst ` Si cree que ha encontrado un problema de seguridad en el kernel de Linux, siga los pasos de este documento para ayudar a notificar a los desarrolladores del kernel y ayudar a resolver el problema. diff --git a/Documentation/translations/sp_SP/process/submitting-patches.rst b/Documentation/translations/sp_SP/process/submitting-patches.rst index bf95ceb5e865..c2757d9ab216 100644 --- a/Documentation/translations/sp_SP/process/submitting-patches.rst +++ b/Documentation/translations/sp_SP/process/submitting-patches.rst @@ -276,7 +276,7 @@ parche a security@kernel.org. Para errores graves, se debe mantener un poco de discreción y permitir que los distribuidores entreguen el parche a los usuarios; en esos casos, obviamente, el parche no debe enviarse a ninguna lista pública. Revise también -Documentation/admin-guide/security-bugs.rst. +Documentation/process/security-bugs.rst. Los parches que corrigen un error grave en un kernel en uso deben dirigirse hacia los maintainers estables poniendo una línea como esta:: diff --git a/Documentation/translations/zh_CN/admin-guide/security-bugs.rst b/Documentation/translations/zh_CN/admin-guide/security-bugs.rst index b8120391755d..d6b8f8a4e7f6 100644 --- a/Documentation/translations/zh_CN/admin-guide/security-bugs.rst +++ b/Documentation/translations/zh_CN/admin-guide/security-bugs.rst @@ -1,6 +1,6 @@ .. include:: ../disclaimer-zh_CN.rst -:Original: :doc:`../../../admin-guide/security-bugs` +:Original: :doc:`../../../process/security-bugs` :译者: diff --git a/Documentation/translations/zh_CN/process/howto.rst b/Documentation/translations/zh_CN/process/howto.rst index 10254751df6a..cc47be356dd3 100644 --- a/Documentation/translations/zh_CN/process/howto.rst +++ b/Documentation/translations/zh_CN/process/howto.rst @@ -125,7 +125,7 @@ Linux内核代码中包含有大量的文档。这些文档对于学习如何与 这篇文档对于理解Linux的开发哲学至关重要。对于将开发平台从其他操作系 统转移到Linux的人来说也很重要。 - :ref:`Documentation/admin-guide/security-bugs.rst ` + :ref:`Documentation/process/security-bugs.rst ` 如果你认为自己发现了Linux内核的安全性问题,请根据这篇文档中的步骤来 提醒其他内核开发者并帮助解决这个问题。 diff --git a/Documentation/translations/zh_TW/admin-guide/security-bugs.rst b/Documentation/translations/zh_TW/admin-guide/security-bugs.rst index eed260ef0c37..15f8e9005071 100644 --- a/Documentation/translations/zh_TW/admin-guide/security-bugs.rst +++ b/Documentation/translations/zh_TW/admin-guide/security-bugs.rst @@ -2,7 +2,7 @@ .. include:: ../disclaimer-zh_TW.rst -:Original: :doc:`../../../admin-guide/security-bugs` +:Original: :doc:`../../../process/security-bugs` :譯者: diff --git a/Documentation/translations/zh_TW/process/howto.rst b/Documentation/translations/zh_TW/process/howto.rst index 8fb8edcaee66..ea2f468d3e58 100644 --- a/Documentation/translations/zh_TW/process/howto.rst +++ b/Documentation/translations/zh_TW/process/howto.rst @@ -128,7 +128,7 @@ Linux內核代碼中包含有大量的文檔。這些文檔對於學習如何與 這篇文檔對於理解Linux的開發哲學至關重要。對於將開發平台從其他操作系 統轉移到Linux的人來說也很重要。 - :ref:`Documentation/admin-guide/security-bugs.rst ` + :ref:`Documentation/process/security-bugs.rst ` 如果你認爲自己發現了Linux內核的安全性問題,請根據這篇文檔中的步驟來 提醒其他內核開發者並幫助解決這個問題。 diff --git a/MAINTAINERS b/MAINTAINERS index 8d5bc223f305..f9f0366a6190 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -73,7 +73,7 @@ Tips for patch submitters and ideally, should come with a patch proposal. Please do not send automated reports to this list either. Such bugs will be handled better and faster in the usual public places. See - Documentation/admin-guide/security-bugs.rst for details. + Documentation/process/security-bugs.rst for details. 8. Happy hacking. @@ -18801,7 +18801,7 @@ F: include/uapi/linux/sed* SECURITY CONTACT M: Security Officers S: Supported -F: Documentation/admin-guide/security-bugs.rst +F: Documentation/process/security-bugs.rst SECURITY SUBSYSTEM M: Paul Moore From 3c728b1bc5b99c5275ac5c7788ef814c0e51ef54 Mon Sep 17 00:00:00 2001 From: Eugene Huang Date: Tue, 14 Mar 2023 17:05:52 +0800 Subject: [PATCH 009/173] ASOC: Intel: sof_sdw: add quirk for Intel 'Rooks County' NUC M15 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Same quirks as the 'Bishop County' NUC M15, except the rt711 is in the 'JD2 100K' jack detection mode. Link: https://github.com/thesofproject/linux/issues/4088 Signed-off-by: Eugene Huang Reviewed-by: Pierre-Louis Bossart Reviewed-by: Péter Ujfalusi Signed-off-by: Bard Liao Link: https://lore.kernel.org/r/20230314090553.498664-2-yung-chuan.liao@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/intel/boards/sof_sdw.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/sound/soc/intel/boards/sof_sdw.c b/sound/soc/intel/boards/sof_sdw.c index d2ed807abde9..767fa89d0870 100644 --- a/sound/soc/intel/boards/sof_sdw.c +++ b/sound/soc/intel/boards/sof_sdw.c @@ -213,6 +213,17 @@ static const struct dmi_system_id sof_sdw_quirk_table[] = { SOF_SDW_PCH_DMIC | RT711_JD1), }, + { + /* NUC15 'Rooks County' LAPRC510 and LAPRC710 skews */ + .callback = sof_sdw_quirk_cb, + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Intel(R) Client Systems"), + DMI_MATCH(DMI_PRODUCT_NAME, "LAPRC"), + }, + .driver_data = (void *)(SOF_SDW_TGL_HDMI | + SOF_SDW_PCH_DMIC | + RT711_JD2_100K), + }, /* TigerLake-SDCA devices */ { .callback = sof_sdw_quirk_cb, From 9c691a42b8926c8966561265cdae3ddc7464d3a2 Mon Sep 17 00:00:00 2001 From: Eugene Huang Date: Tue, 14 Mar 2023 17:05:53 +0800 Subject: [PATCH 010/173] ASoC: Intel: soc-acpi: add table for Intel 'Rooks County' NUC M15 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Same topology as the HP Omen 16-k0005TX, except with the rt1316 amp on link2. Link: https://github.com/thesofproject/linux/issues/4088 Signed-off-by: Eugene Huang Reviewed-by: Pierre-Louis Bossart Reviewed-by: Péter Ujfalusi Signed-off-by: Bard Liao Link: https://lore.kernel.org/r/20230314090553.498664-3-yung-chuan.liao@linux.intel.com Signed-off-by: Mark Brown --- .../intel/common/soc-acpi-intel-adl-match.c | 20 +++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/sound/soc/intel/common/soc-acpi-intel-adl-match.c b/sound/soc/intel/common/soc-acpi-intel-adl-match.c index 28dd2046e4ac..d8c80041388a 100644 --- a/sound/soc/intel/common/soc-acpi-intel-adl-match.c +++ b/sound/soc/intel/common/soc-acpi-intel-adl-match.c @@ -354,6 +354,20 @@ static const struct snd_soc_acpi_link_adr adl_sdw_rt711_link0_rt1316_link3[] = { {} }; +static const struct snd_soc_acpi_link_adr adl_sdw_rt711_link0_rt1316_link2[] = { + { + .mask = BIT(0), + .num_adr = ARRAY_SIZE(rt711_sdca_0_adr), + .adr_d = rt711_sdca_0_adr, + }, + { + .mask = BIT(2), + .num_adr = ARRAY_SIZE(rt1316_2_single_adr), + .adr_d = rt1316_2_single_adr, + }, + {} +}; + static const struct snd_soc_acpi_adr_device mx8373_2_adr[] = { { .adr = 0x000223019F837300ull, @@ -624,6 +638,12 @@ struct snd_soc_acpi_mach snd_soc_acpi_intel_adl_sdw_machines[] = { .drv_name = "sof_sdw", .sof_tplg_filename = "sof-adl-rt711-l0-rt1316-l3.tplg", }, + { + .link_mask = 0x5, /* 2 active links required */ + .links = adl_sdw_rt711_link0_rt1316_link2, + .drv_name = "sof_sdw", + .sof_tplg_filename = "sof-adl-rt711-l0-rt1316-l2.tplg", + }, { .link_mask = 0x1, /* link0 required */ .links = adl_rvp, From 083a25b18d6ad9f1f540e629909aa3eaaaf01823 Mon Sep 17 00:00:00 2001 From: Shengjiu Wang Date: Thu, 9 Mar 2023 15:13:37 +0800 Subject: [PATCH 011/173] ASoC: soc-pcm: fix hw->formats cleared by soc_pcm_hw_init() for dpcm The hw->formats may be set by snd_dmaengine_pcm_refine_runtime_hwparams() in component's startup()/open(), but soc_pcm_hw_init() will init hw->formats in dpcm_runtime_setup_fe() after component's startup()/open(), which causes the valuable hw->formats to be cleared. So need to store the hw->formats before initialization, then restore it after initialization. Signed-off-by: Shengjiu Wang Link: https://lore.kernel.org/r/1678346017-3660-1-git-send-email-shengjiu.wang@nxp.com Signed-off-by: Mark Brown --- sound/soc/soc-pcm.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c index 5eb056b942ce..7958c9defd49 100644 --- a/sound/soc/soc-pcm.c +++ b/sound/soc/soc-pcm.c @@ -1661,10 +1661,14 @@ static void dpcm_runtime_setup_fe(struct snd_pcm_substream *substream) struct snd_pcm_hardware *hw = &runtime->hw; struct snd_soc_dai *dai; int stream = substream->stream; + u64 formats = hw->formats; int i; soc_pcm_hw_init(hw); + if (formats) + hw->formats &= formats; + for_each_rtd_cpu_dais(fe, i, dai) { struct snd_soc_pcm_stream *cpu_stream; From 7c3940bf81e5664cdb50c3fedfec8f0a756a34fb Mon Sep 17 00:00:00 2001 From: "GuoRui.Yu" Date: Thu, 23 Feb 2023 00:53:15 +0800 Subject: [PATCH 012/173] swiotlb: fix the deadlock in swiotlb_do_find_slots In general, if swiotlb is sufficient, the logic of index = wrap_area_index(mem, index + 1) is fine, it will quickly take a slot and release the area->lock; But if swiotlb is insufficient and the device has min_align_mask requirements, such as NVME, we may not be able to satisfy index == wrap and exit the loop properly. In this case, other kernel threads will not be able to acquire the area->lock and release the slot, resulting in a deadlock. The current implementation of wrap_area_index does not involve a modulo operation, so adjusting the wrap to ensure the loop ends is not trivial. Introduce a new variable to record the number of loops and exit the loop after completing the traversal. Backtraces: Other CPUs are waiting this core to exit the swiotlb_do_find_slots loop. [10199.924391] RIP: 0010:swiotlb_do_find_slots+0x1fe/0x3e0 [10199.924403] Call Trace: [10199.924404] [10199.924405] swiotlb_tbl_map_single+0xec/0x1f0 [10199.924407] swiotlb_map+0x5c/0x260 [10199.924409] ? nvme_pci_setup_prps+0x1ed/0x340 [10199.924411] dma_direct_map_page+0x12e/0x1c0 [10199.924413] nvme_map_data+0x304/0x370 [10199.924415] nvme_prep_rq.part.0+0x31/0x120 [10199.924417] nvme_queue_rq+0x77/0x1f0 ... [ 9639.596311] NMI backtrace for cpu 48 [ 9639.596336] Call Trace: [ 9639.596337] [ 9639.596338] _raw_spin_lock_irqsave+0x37/0x40 [ 9639.596341] swiotlb_do_find_slots+0xef/0x3e0 [ 9639.596344] swiotlb_tbl_map_single+0xec/0x1f0 [ 9639.596347] swiotlb_map+0x5c/0x260 [ 9639.596349] dma_direct_map_sg+0x7a/0x280 [ 9639.596352] __dma_map_sg_attrs+0x30/0x70 [ 9639.596355] dma_map_sgtable+0x1d/0x30 [ 9639.596356] nvme_map_data+0xce/0x370 ... [ 9639.595665] NMI backtrace for cpu 50 [ 9639.595682] Call Trace: [ 9639.595682] [ 9639.595683] _raw_spin_lock_irqsave+0x37/0x40 [ 9639.595686] swiotlb_release_slots.isra.0+0x86/0x180 [ 9639.595688] dma_direct_unmap_sg+0xcf/0x1a0 [ 9639.595690] nvme_unmap_data.part.0+0x43/0xc0 Fixes: 1f221a0d0dbf ("swiotlb: respect min_align_mask") Signed-off-by: GuoRui.Yu Signed-off-by: Xiaokang Hu Signed-off-by: Christoph Hellwig --- kernel/dma/swiotlb.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c index 03e3251cd9d2..91454b513db0 100644 --- a/kernel/dma/swiotlb.c +++ b/kernel/dma/swiotlb.c @@ -625,8 +625,8 @@ static int swiotlb_do_find_slots(struct device *dev, int area_index, unsigned int iotlb_align_mask = dma_get_min_align_mask(dev) & ~(IO_TLB_SIZE - 1); unsigned int nslots = nr_slots(alloc_size), stride; - unsigned int index, wrap, count = 0, i; unsigned int offset = swiotlb_align_offset(dev, orig_addr); + unsigned int index, slots_checked, count = 0, i; unsigned long flags; unsigned int slot_base; unsigned int slot_index; @@ -649,15 +649,16 @@ static int swiotlb_do_find_slots(struct device *dev, int area_index, goto not_found; slot_base = area_index * mem->area_nslabs; - index = wrap = wrap_area_index(mem, ALIGN(area->index, stride)); + index = wrap_area_index(mem, ALIGN(area->index, stride)); - do { + for (slots_checked = 0; slots_checked < mem->area_nslabs; ) { slot_index = slot_base + index; if (orig_addr && (slot_addr(tbl_dma_addr, slot_index) & iotlb_align_mask) != (orig_addr & iotlb_align_mask)) { index = wrap_area_index(mem, index + 1); + slots_checked++; continue; } @@ -673,7 +674,8 @@ static int swiotlb_do_find_slots(struct device *dev, int area_index, goto found; } index = wrap_area_index(mem, index + stride); - } while (index != wrap); + slots_checked += stride; + } not_found: spin_unlock_irqrestore(&area->lock, flags); From f5bad62f9107b701a6def7cac1f5f65862219b83 Mon Sep 17 00:00:00 2001 From: Jonathan Denose Date: Fri, 17 Mar 2023 03:19:51 -0700 Subject: [PATCH 013/173] Input: i8042 - add quirk for Fujitsu Lifebook A574/H Fujitsu Lifebook A574/H requires the nomux option to properly probe the touchpad, especially when waking from sleep. Signed-off-by: Jonathan Denose Reviewed-by: Hans de Goede Link: https://lore.kernel.org/r/20230303152623.45859-1-jdenose@google.com Signed-off-by: Dmitry Torokhov --- drivers/input/serio/i8042-acpipnpio.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/input/serio/i8042-acpipnpio.h b/drivers/input/serio/i8042-acpipnpio.h index efc61736099b..fe7ffe30997c 100644 --- a/drivers/input/serio/i8042-acpipnpio.h +++ b/drivers/input/serio/i8042-acpipnpio.h @@ -610,6 +610,14 @@ static const struct dmi_system_id i8042_dmi_quirk_table[] __initconst = { }, .driver_data = (void *)(SERIO_QUIRK_NOMUX) }, + { + /* Fujitsu Lifebook A574/H */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU"), + DMI_MATCH(DMI_PRODUCT_NAME, "FMVA0501PZ"), + }, + .driver_data = (void *)(SERIO_QUIRK_NOMUX) + }, { /* Gigabyte M912 */ .matches = { From 8a0432bab6ea3203d220785da7ab3c7677f70ecb Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Fri, 17 Mar 2023 03:13:12 -0700 Subject: [PATCH 014/173] Input: goodix - add Lenovo Yoga Book X90F to nine_bytes_report DMI table The Android Lenovo Yoga Book X90F / X90L uses the same goodix touchscreen with 9 bytes touch reports for its touch keyboard as the already supported Windows Lenovo Yoga Book X91F/L, add a DMI match for this to the nine_bytes_report DMI table. When the quirk for the X91F/L was initially added it was written to also apply to the X90F/L but this does not work because the Android version of the Yoga Book uses completely different DMI strings. Also adjust the X91F/L quirk to reflect that it only applies to the X91F/L models. Signed-off-by: Hans de Goede Reviewed-by: Bastien Nocera Link: https://lore.kernel.org/r/20230315134442.71787-1-hdegoede@redhat.com Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/goodix.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/input/touchscreen/goodix.c b/drivers/input/touchscreen/goodix.c index b348172f19c3..d77f116680a0 100644 --- a/drivers/input/touchscreen/goodix.c +++ b/drivers/input/touchscreen/goodix.c @@ -124,10 +124,18 @@ static const unsigned long goodix_irq_flags[] = { static const struct dmi_system_id nine_bytes_report[] = { #if defined(CONFIG_DMI) && defined(CONFIG_X86) { - .ident = "Lenovo YogaBook", - /* YB1-X91L/F and YB1-X90L/F */ + /* Lenovo Yoga Book X90F / X90L */ .matches = { - DMI_MATCH(DMI_PRODUCT_NAME, "Lenovo YB1-X9") + DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Intel Corporation"), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "CHERRYVIEW D1 PLATFORM"), + DMI_EXACT_MATCH(DMI_PRODUCT_VERSION, "YETI-11"), + } + }, + { + /* Lenovo Yoga Book X91F / X91L */ + .matches = { + /* Non exact match to match F + L versions */ + DMI_MATCH(DMI_PRODUCT_NAME, "Lenovo YB1-X91"), } }, #endif From f8acb24aaf89fc46cd953229462ea8abe31b395f Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Wed, 15 Mar 2023 08:34:13 -0700 Subject: [PATCH 015/173] x86/hyperv: Block root partition functionality in a Confidential VM Hyper-V should never specify a VM that is a Confidential VM and also running in the root partition. Nonetheless, explicitly block such a combination to guard against a compromised Hyper-V maliciously trying to exploit root partition functionality in a Confidential VM to expose Confidential VM secrets. No known bug is being fixed, but the attack surface for Confidential VMs on Hyper-V is reduced. Signed-off-by: Michael Kelley Link: https://lore.kernel.org/r/1678894453-95392-1-git-send-email-mikelley@microsoft.com Signed-off-by: Wei Liu --- arch/x86/kernel/cpu/mshyperv.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index f36dc2f796c5..f1197366a97d 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -358,12 +358,16 @@ static void __init ms_hyperv_init_platform(void) * To mirror what Windows does we should extract CPU management * features and use the ReservedIdentityBit to detect if Linux is the * root partition. But that requires negotiating CPU management - * interface (a process to be finalized). + * interface (a process to be finalized). For now, use the privilege + * flag as the indicator for running as root. * - * For now, use the privilege flag as the indicator for running as - * root. + * Hyper-V should never specify running as root and as a Confidential + * VM. But to protect against a compromised/malicious Hyper-V trying + * to exploit root behavior to expose Confidential VM memory, ignore + * the root partition setting if also a Confidential VM. */ - if (cpuid_ebx(HYPERV_CPUID_FEATURES) & HV_CPU_MANAGEMENT) { + if ((ms_hyperv.priv_high & HV_CPU_MANAGEMENT) && + !(ms_hyperv.priv_high & HV_ISOLATION)) { hv_root_partition = true; pr_info("Hyper-V: running as root partition\n"); } From 205efd4619b860404ebb5882e5a119eb3b3b3716 Mon Sep 17 00:00:00 2001 From: Ge-org Brohammer Date: Fri, 17 Mar 2023 00:38:51 +0200 Subject: [PATCH 016/173] ASoC: amd: yc: Add DMI entries to support Victus by HP Laptop 16-e1xxx (8A22) This model requires an additional detection quirk to enable the internal microphone. Tried to use git send-email this time. Signed-off-by: Ge-org Brohammer Link: https://lore.kernel.org/r/PAVP195MB2261322C220E95D7F4B2732ADABC9@PAVP195MB2261.EURP195.PROD.OUTLOOK.COM Signed-off-by: Mark Brown --- sound/soc/amd/yc/acp6x-mach.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sound/soc/amd/yc/acp6x-mach.c b/sound/soc/amd/yc/acp6x-mach.c index 4a69ce702360..0acdf0156f07 100644 --- a/sound/soc/amd/yc/acp6x-mach.c +++ b/sound/soc/amd/yc/acp6x-mach.c @@ -269,6 +269,13 @@ static const struct dmi_system_id yc_acp_quirk_table[] = { DMI_MATCH(DMI_BOARD_NAME, "8A43"), } }, + { + .driver_data = &acp6x_card, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "HP"), + DMI_MATCH(DMI_BOARD_NAME, "8A22"), + } + }, {} }; From ab327f8acdf8d06601fbf058859a539a9422afff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=81lvaro=20Fern=C3=A1ndez=20Rojas?= Date: Fri, 17 Mar 2023 11:20:04 +0100 Subject: [PATCH 017/173] mips: bmips: BCM6358: disable RAC flush for TP1 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit RAC flush causes kernel panics on BCM6358 with EHCI/OHCI when booting from TP1: [ 3.881739] usb 1-1: new high-speed USB device number 2 using ehci-platform [ 3.895011] Reserved instruction in kernel code[#1]: [ 3.900113] CPU: 0 PID: 1 Comm: init Not tainted 5.10.16 #0 [ 3.905829] $ 0 : 00000000 10008700 00000000 77d94060 [ 3.911238] $ 4 : 7fd1f088 00000000 81431cac 81431ca0 [ 3.916641] $ 8 : 00000000 ffffefff 8075cd34 00000000 [ 3.922043] $12 : 806f8d40 f3e812b7 00000000 000d9aaa [ 3.927446] $16 : 7fd1f068 7fd1f080 7ff559b8 81428470 [ 3.932848] $20 : 00000000 00000000 55590000 77d70000 [ 3.938251] $24 : 00000018 00000010 [ 3.943655] $28 : 81430000 81431e60 81431f28 800157fc [ 3.949058] Hi : 00000000 [ 3.952013] Lo : 00000000 [ 3.955019] epc : 80015808 setup_sigcontext+0x54/0x24c [ 3.960464] ra : 800157fc setup_sigcontext+0x48/0x24c [ 3.965913] Status: 10008703 KERNEL EXL IE [ 3.970216] Cause : 00800028 (ExcCode 0a) [ 3.974340] PrId : 0002a010 (Broadcom BMIPS4350) [ 3.979170] Modules linked in: ohci_platform ohci_hcd fsl_mph_dr_of ehci_platform ehci_fsl ehci_hcd gpio_button_hotplug usbcore nls_base usb_common [ 3.992907] Process init (pid: 1, threadinfo=(ptrval), task=(ptrval), tls=77e22ec8) [ 4.000776] Stack : 81431ef4 7fd1f080 81431f28 81428470 7fd1f068 81431edc 7ff559b8 81428470 [ 4.009467] 81431f28 7fd1f080 55590000 77d70000 77d5498c 80015c70 806f0000 8063ae74 [ 4.018149] 08100002 81431f28 0000000a 08100002 81431f28 0000000a 77d6b418 00000003 [ 4.026831] ffffffff 80016414 80080734 81431ecc 81431ecc 00000001 00000000 04000000 [ 4.035512] 77d54874 00000000 00000000 00000000 00000000 00000012 00000002 00000000 [ 4.044196] ... [ 4.046706] Call Trace: [ 4.049238] [<80015808>] setup_sigcontext+0x54/0x24c [ 4.054356] [<80015c70>] setup_frame+0xdc/0x124 [ 4.059015] [<80016414>] do_notify_resume+0x1dc/0x288 [ 4.064207] [<80011b50>] work_notifysig+0x10/0x18 [ 4.069036] [ 4.070538] Code: 8fc300b4 00001025 26240008 ac830004 3c048063 0c0228aa 24846a00 26240010 [ 4.080686] [ 4.082517] ---[ end trace 22a8edb41f5f983b ]--- [ 4.087374] Kernel panic - not syncing: Fatal exception [ 4.092753] Rebooting in 1 seconds.. Because the bootloader (CFE) is not initializing the Read-ahead cache properly on the second thread (TP1). Since the RAC was not initialized properly, we should avoid flushing it at the risk of corrupting the instruction stream as seen in the trace above. Fixes: d59098a0e9cb ("MIPS: bmips: use generic dma noncoherent ops") Signed-off-by: Álvaro Fernández Rojas Signed-off-by: Thomas Bogendoerfer --- arch/mips/bmips/dma.c | 5 +++++ arch/mips/bmips/setup.c | 8 ++++++++ 2 files changed, 13 insertions(+) diff --git a/arch/mips/bmips/dma.c b/arch/mips/bmips/dma.c index 33788668cbdb..3779e7855bd7 100644 --- a/arch/mips/bmips/dma.c +++ b/arch/mips/bmips/dma.c @@ -5,6 +5,8 @@ #include #include +bool bmips_rac_flush_disable; + void arch_sync_dma_for_cpu_all(void) { void __iomem *cbr = BMIPS_GET_CBR(); @@ -15,6 +17,9 @@ void arch_sync_dma_for_cpu_all(void) boot_cpu_type() != CPU_BMIPS4380) return; + if (unlikely(bmips_rac_flush_disable)) + return; + /* Flush stale data out of the readahead cache */ cfg = __raw_readl(cbr + BMIPS_RAC_CONFIG); __raw_writel(cfg | 0x100, cbr + BMIPS_RAC_CONFIG); diff --git a/arch/mips/bmips/setup.c b/arch/mips/bmips/setup.c index e95b3f78e7cd..549a6392a3d2 100644 --- a/arch/mips/bmips/setup.c +++ b/arch/mips/bmips/setup.c @@ -35,6 +35,8 @@ #define REG_BCM6328_OTP ((void __iomem *)CKSEG1ADDR(0x1000062c)) #define BCM6328_TP1_DISABLED BIT(9) +extern bool bmips_rac_flush_disable; + static const unsigned long kbase = VMLINUX_LOAD_ADDRESS & 0xfff00000; struct bmips_quirk { @@ -104,6 +106,12 @@ static void bcm6358_quirks(void) * disable SMP for now */ bmips_smp_enabled = 0; + + /* + * RAC flush causes kernel panics on BCM6358 when booting from TP1 + * because the bootloader is not initializing it properly. + */ + bmips_rac_flush_disable = !!(read_c0_brcm_cmt_local() & (1 << 31)); } static void bcm6368_quirks(void) From ffa6206ebf8d39e83d87ac226df68dbbe155819a Mon Sep 17 00:00:00 2001 From: Matthias Benkmann Date: Sun, 19 Mar 2023 21:30:15 -0700 Subject: [PATCH 018/173] Input: xpad - fix incorrectly applied patch for MAP_PROFILE_BUTTON When commit commit fff1011a26d6 ("Input: xpad - add X-Box Adaptive Profile button") was applied, one hunk ended up in the wrong function; move it to where it belongs. Fixes: fff1011a26d6 ("Input: xpad - add X-Box Adaptive Profile button") Signed-off-by: Matthias Benkmann Link: https://lore.kernel.org/r/20230318162106.0aef4ba5@ninja Signed-off-by: Dmitry Torokhov --- drivers/input/joystick/xpad.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c index f642ec8e92dd..29131f1a2f06 100644 --- a/drivers/input/joystick/xpad.c +++ b/drivers/input/joystick/xpad.c @@ -781,9 +781,6 @@ static void xpad_process_packet(struct usb_xpad *xpad, u16 cmd, unsigned char *d input_report_key(dev, BTN_C, data[8]); input_report_key(dev, BTN_Z, data[9]); - /* Profile button has a value of 0-3, so it is reported as an axis */ - if (xpad->mapping & MAP_PROFILE_BUTTON) - input_report_abs(dev, ABS_PROFILE, data[34]); input_sync(dev); } @@ -1061,6 +1058,10 @@ static void xpadone_process_packet(struct usb_xpad *xpad, u16 cmd, unsigned char (__u16) le16_to_cpup((__le16 *)(data + 8))); } + /* Profile button has a value of 0-3, so it is reported as an axis */ + if (xpad->mapping & MAP_PROFILE_BUTTON) + input_report_abs(dev, ABS_PROFILE, data[34]); + /* paddle handling */ /* based on SDL's SDL_hidapi_xboxone.c */ if (xpad->mapping & MAP_PADDLES) { From 8980f190947ba29f23110408e712444884b74251 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Sun, 19 Mar 2023 21:36:36 -0700 Subject: [PATCH 019/173] Input: focaltech - use explicitly signed char type The recent change of -funsigned-char causes additions of negative numbers to become additions of large positive numbers, leading to wrong calculations of mouse movement. Change these casts to be explicitly signed, to take into account negative offsets. Fixes: 3bc753c06dd0 ("kbuild: treat char as always unsigned") Signed-off-by: Jason A. Donenfeld Reviewed-by: Hans de Goede Cc: stable@vger.kernel.org Link: https://bugzilla.kernel.org/show_bug.cgi?id=217211 Link: https://lore.kernel.org/r/20230318133010.1285202-1-Jason@zx2c4.com Signed-off-by: Dmitry Torokhov --- drivers/input/mouse/focaltech.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/input/mouse/focaltech.c b/drivers/input/mouse/focaltech.c index 6fd5fff0cbff..c74b99077d16 100644 --- a/drivers/input/mouse/focaltech.c +++ b/drivers/input/mouse/focaltech.c @@ -202,8 +202,8 @@ static void focaltech_process_rel_packet(struct psmouse *psmouse, state->pressed = packet[0] >> 7; finger1 = ((packet[0] >> 4) & 0x7) - 1; if (finger1 < FOC_MAX_FINGERS) { - state->fingers[finger1].x += (char)packet[1]; - state->fingers[finger1].y += (char)packet[2]; + state->fingers[finger1].x += (s8)packet[1]; + state->fingers[finger1].y += (s8)packet[2]; } else { psmouse_err(psmouse, "First finger in rel packet invalid: %d\n", finger1); @@ -218,8 +218,8 @@ static void focaltech_process_rel_packet(struct psmouse *psmouse, */ finger2 = ((packet[3] >> 4) & 0x7) - 1; if (finger2 < FOC_MAX_FINGERS) { - state->fingers[finger2].x += (char)packet[4]; - state->fingers[finger2].y += (char)packet[5]; + state->fingers[finger2].x += (s8)packet[4]; + state->fingers[finger2].y += (s8)packet[5]; } } From 754ff5060daf5a1cf4474eff9b4edeb6c17ef7ab Mon Sep 17 00:00:00 2001 From: msizanoen Date: Sun, 19 Mar 2023 23:02:56 -0700 Subject: [PATCH 020/173] Input: alps - fix compatibility with -funsigned-char The AlpsPS/2 code previously relied on the assumption that `char` is a signed type, which was true on x86 platforms (the only place where this driver is used) before kernel 6.2. However, on 6.2 and later, this assumption is broken due to the introduction of -funsigned-char as a new global compiler flag. Fix this by explicitly specifying the signedness of `char` when sign extending the values received from the device. Fixes: f3f33c677699 ("Input: alps - Rushmore and v7 resolution support") Signed-off-by: msizanoen Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20230320045228.182259-1-msizanoen@qtmlabs.xyz Signed-off-by: Dmitry Torokhov --- drivers/input/mouse/alps.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/input/mouse/alps.c b/drivers/input/mouse/alps.c index 989228b5a0a4..e2c11d9f3868 100644 --- a/drivers/input/mouse/alps.c +++ b/drivers/input/mouse/alps.c @@ -852,8 +852,8 @@ static void alps_process_packet_v6(struct psmouse *psmouse) x = y = z = 0; /* Divide 4 since trackpoint's speed is too fast */ - input_report_rel(dev2, REL_X, (char)x / 4); - input_report_rel(dev2, REL_Y, -((char)y / 4)); + input_report_rel(dev2, REL_X, (s8)x / 4); + input_report_rel(dev2, REL_Y, -((s8)y / 4)); psmouse_report_standard_buttons(dev2, packet[3]); @@ -1104,8 +1104,8 @@ static void alps_process_trackstick_packet_v7(struct psmouse *psmouse) ((packet[3] & 0x20) << 1); z = (packet[5] & 0x3f) | ((packet[3] & 0x80) >> 1); - input_report_rel(dev2, REL_X, (char)x); - input_report_rel(dev2, REL_Y, -((char)y)); + input_report_rel(dev2, REL_X, (s8)x); + input_report_rel(dev2, REL_Y, -((s8)y)); input_report_abs(dev2, ABS_PRESSURE, z); psmouse_report_standard_buttons(dev2, packet[1]); @@ -2294,20 +2294,20 @@ static int alps_get_v3_v7_resolution(struct psmouse *psmouse, int reg_pitch) if (reg < 0) return reg; - x_pitch = (char)(reg << 4) >> 4; /* sign extend lower 4 bits */ + x_pitch = (s8)(reg << 4) >> 4; /* sign extend lower 4 bits */ x_pitch = 50 + 2 * x_pitch; /* In 0.1 mm units */ - y_pitch = (char)reg >> 4; /* sign extend upper 4 bits */ + y_pitch = (s8)reg >> 4; /* sign extend upper 4 bits */ y_pitch = 36 + 2 * y_pitch; /* In 0.1 mm units */ reg = alps_command_mode_read_reg(psmouse, reg_pitch + 1); if (reg < 0) return reg; - x_electrode = (char)(reg << 4) >> 4; /* sign extend lower 4 bits */ + x_electrode = (s8)(reg << 4) >> 4; /* sign extend lower 4 bits */ x_electrode = 17 + x_electrode; - y_electrode = (char)reg >> 4; /* sign extend upper 4 bits */ + y_electrode = (s8)reg >> 4; /* sign extend upper 4 bits */ y_electrode = 13 + y_electrode; x_phys = x_pitch * (x_electrode - 1); /* In 0.1 mm units */ From cdce67099117ece371582f706c6eff7d3a65326d Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Fri, 10 Mar 2023 21:34:58 +0900 Subject: [PATCH 021/173] PCI: dwc: Fix PORT_LINK_CONTROL update when CDM check enabled If CDM_CHECK is enabled (by the DT "snps,enable-cdm-check" property), 'val' is overwritten by PCIE_PL_CHK_REG_CONTROL_STATUS initialization. Commit ec7b952f453c ("PCI: dwc: Always enable CDM check if "snps,enable-cdm-check" exists") did not account for further usage of 'val', so we wrote improper values to PCIE_PORT_LINK_CONTROL when the CDM check is enabled. Move the PCIE_PORT_LINK_CONTROL update to be completely after the PCIE_PL_CHK_REG_CONTROL_STATUS register initialization. [bhelgaas: commit log adapted from Serge's version] Fixes: ec7b952f453c ("PCI: dwc: Always enable CDM check if "snps,enable-cdm-check" exists") Link: https://lore.kernel.org/r/20230310123510.675685-2-yoshihiro.shimoda.uh@renesas.com Signed-off-by: Yoshihiro Shimoda Signed-off-by: Bjorn Helgaas Reviewed-by: Serge Semin --- drivers/pci/controller/dwc/pcie-designware.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/pci/controller/dwc/pcie-designware.c b/drivers/pci/controller/dwc/pcie-designware.c index 53a16b8b6ac2..8e33e6e59e68 100644 --- a/drivers/pci/controller/dwc/pcie-designware.c +++ b/drivers/pci/controller/dwc/pcie-designware.c @@ -1001,11 +1001,6 @@ void dw_pcie_setup(struct dw_pcie *pci) dw_pcie_writel_dbi(pci, PCIE_LINK_WIDTH_SPEED_CONTROL, val); } - val = dw_pcie_readl_dbi(pci, PCIE_PORT_LINK_CONTROL); - val &= ~PORT_LINK_FAST_LINK_MODE; - val |= PORT_LINK_DLL_LINK_EN; - dw_pcie_writel_dbi(pci, PCIE_PORT_LINK_CONTROL, val); - if (dw_pcie_cap_is(pci, CDM_CHECK)) { val = dw_pcie_readl_dbi(pci, PCIE_PL_CHK_REG_CONTROL_STATUS); val |= PCIE_PL_CHK_REG_CHK_REG_CONTINUOUS | @@ -1013,6 +1008,11 @@ void dw_pcie_setup(struct dw_pcie *pci) dw_pcie_writel_dbi(pci, PCIE_PL_CHK_REG_CONTROL_STATUS, val); } + val = dw_pcie_readl_dbi(pci, PCIE_PORT_LINK_CONTROL); + val &= ~PORT_LINK_FAST_LINK_MODE; + val |= PORT_LINK_DLL_LINK_EN; + dw_pcie_writel_dbi(pci, PCIE_PORT_LINK_CONTROL, val); + if (!pci->num_lanes) { dev_dbg(pci->dev, "Using h/w default number of lanes\n"); return; From 3ced71d273f8edf07bf01a831a49ca6b988e06b3 Mon Sep 17 00:00:00 2001 From: Kevin Locke Date: Tue, 21 Mar 2023 15:39:22 -0600 Subject: [PATCH 022/173] kbuild: deb-pkg: set version for linux-headers paths As a result of the switch to dh_listpackages, $version is no longer set when install_kernel_headers() is called. This causes files in the linux-headers deb package to be installed to a path with an empty $version (e.g. /usr/src/linux-headers-/scripts/sign-file rather than /usr/src/linux-headers-6.3.0-rc3/scripts/sign-file). To avoid this, while continuing to use the version information from dh_listpackages, pass $version from $package as the second argument of install_kernel_headers(). Fixes: 36862e14e316 ("kbuild: deb-pkg: use dh_listpackages to know enabled packages") Signed-off-by: Kevin Locke Signed-off-by: Masahiro Yamada --- scripts/package/builddeb | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scripts/package/builddeb b/scripts/package/builddeb index c5ae57167d7c..7b23f52c70c5 100755 --- a/scripts/package/builddeb +++ b/scripts/package/builddeb @@ -162,6 +162,7 @@ install_linux_image_dbg () { install_kernel_headers () { pdir=$1 + version=$2 rm -rf $pdir @@ -229,7 +230,7 @@ do linux-libc-dev) install_libc_headers debian/linux-libc-dev;; linux-headers-*) - install_kernel_headers debian/linux-headers;; + install_kernel_headers debian/linux-headers ${package#linux-headers-};; esac done From 39e7d2ab6ea9fd6b389091ec223d566934fe7be5 Mon Sep 17 00:00:00 2001 From: Petr Tesarik Date: Tue, 21 Mar 2023 09:31:26 +0100 Subject: [PATCH 023/173] swiotlb: use wrap_area_index() instead of open-coding it No functional change, just use an existing helper. Signed-off-by: Petr Tesarik Signed-off-by: Christoph Hellwig --- kernel/dma/swiotlb.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c index 91454b513db0..3856e2b524b4 100644 --- a/kernel/dma/swiotlb.c +++ b/kernel/dma/swiotlb.c @@ -695,10 +695,7 @@ found: /* * Update the indices to avoid searching in the next round. */ - if (index + nslots < mem->area_nslabs) - area->index = index + nslots; - else - area->index = 0; + area->index = wrap_area_index(mem, index + nslots); area->used += nslots; spin_unlock_irqrestore(&area->lock, flags); return slot_index; From 0eee5ae1025699ea93d44fdb6ef2365505082103 Mon Sep 17 00:00:00 2001 From: Petr Tesarik Date: Tue, 21 Mar 2023 09:31:27 +0100 Subject: [PATCH 024/173] swiotlb: fix slot alignment checks Explicit alignment and page alignment are used only to calculate the stride, not when checking actual slot physical address. Originally, only page alignment was implemented, and that worked, because the whole SWIOTLB is allocated on a page boundary, so aligning the start index was sufficient to ensure a page-aligned slot. When commit 1f221a0d0dbf ("swiotlb: respect min_align_mask") added support for min_align_mask, the index could be incremented in the search loop, potentially finding an unaligned slot if minimum device alignment is between IO_TLB_SIZE and PAGE_SIZE. The bug could go unnoticed, because the slot size is 2 KiB, and the most common page size is 4 KiB, so there is no alignment value in between. IIUC the intention has been to find a slot that conforms to all alignment constraints: device minimum alignment, an explicit alignment (given as function parameter) and optionally page alignment (if allocation size is >= PAGE_SIZE). The most restrictive mask can be trivially computed with logical AND. The rest can stay. Fixes: 1f221a0d0dbf ("swiotlb: respect min_align_mask") Fixes: e81e99bacc9f ("swiotlb: Support aligned swiotlb buffers") Signed-off-by: Petr Tesarik Signed-off-by: Christoph Hellwig --- kernel/dma/swiotlb.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c index 3856e2b524b4..5b919ef832b6 100644 --- a/kernel/dma/swiotlb.c +++ b/kernel/dma/swiotlb.c @@ -634,22 +634,26 @@ static int swiotlb_do_find_slots(struct device *dev, int area_index, BUG_ON(!nslots); BUG_ON(area_index >= mem->nareas); + /* + * For allocations of PAGE_SIZE or larger only look for page aligned + * allocations. + */ + if (alloc_size >= PAGE_SIZE) + iotlb_align_mask &= PAGE_MASK; + iotlb_align_mask &= alloc_align_mask; + /* * For mappings with an alignment requirement don't bother looping to - * unaligned slots once we found an aligned one. For allocations of - * PAGE_SIZE or larger only look for page aligned allocations. + * unaligned slots once we found an aligned one. */ stride = (iotlb_align_mask >> IO_TLB_SHIFT) + 1; - if (alloc_size >= PAGE_SIZE) - stride = max(stride, stride << (PAGE_SHIFT - IO_TLB_SHIFT)); - stride = max(stride, (alloc_align_mask >> IO_TLB_SHIFT) + 1); spin_lock_irqsave(&area->lock, flags); if (unlikely(nslots > mem->area_nslabs - area->used)) goto not_found; slot_base = area_index * mem->area_nslabs; - index = wrap_area_index(mem, ALIGN(area->index, stride)); + index = area->index; for (slots_checked = 0; slots_checked < mem->area_nslabs; ) { slot_index = slot_base + index; From e51f49512d98783b90799c9cc2002895ec3aa0eb Mon Sep 17 00:00:00 2001 From: Ranjani Sridharan Date: Wed, 22 Mar 2023 10:55:38 +0200 Subject: [PATCH 025/173] ASoC: SOF: ipc4: Ensure DSP is in D0I0 during sof_ipc4_set_get_data() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The set_get_data() IPC op bypasses the check for the no_pm flag as done with the regular IPC tx_msg op. Since set_get_data should be performed when the DSP is in D0I0, set the DSP power state to D0I0 before sending the IPC's in sof_ipc4_set_get_data(). Fixes: ceb89acc4dc8 ("ASoC: SOF: ipc4: Add support for mandatory message handling functionality") Signed-off-by: Ranjani Sridharan Reviewed-by: Bard Liao Reviewed-by: Péter Ujfalusi Reviewed-by: Pierre-Louis Bossart Signed-off-by: Peter Ujfalusi Link: https://lore.kernel.org/r/20230322085538.10214-1-peter.ujfalusi@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/sof/ipc4.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/sound/soc/sof/ipc4.c b/sound/soc/sof/ipc4.c index 8ede4b952997..246b56d24a6f 100644 --- a/sound/soc/sof/ipc4.c +++ b/sound/soc/sof/ipc4.c @@ -405,6 +405,9 @@ static int sof_ipc4_tx_msg(struct snd_sof_dev *sdev, void *msg_data, size_t msg_ static int sof_ipc4_set_get_data(struct snd_sof_dev *sdev, void *data, size_t payload_bytes, bool set) { + const struct sof_dsp_power_state target_state = { + .state = SOF_DSP_PM_D0, + }; size_t payload_limit = sdev->ipc->max_payload_size; struct sof_ipc4_msg *ipc4_msg = data; struct sof_ipc4_msg tx = {{ 0 }}; @@ -435,6 +438,11 @@ static int sof_ipc4_set_get_data(struct snd_sof_dev *sdev, void *data, tx.extension |= SOF_IPC4_MOD_EXT_MSG_FIRST_BLOCK(1); + /* ensure the DSP is in D0i0 before sending IPC */ + ret = snd_sof_dsp_set_power_state(sdev, &target_state); + if (ret < 0) + return ret; + /* Serialise IPC TX */ mutex_lock(&sdev->ipc->tx_mutex); From d701cf6578e8447af4ac0fa08e7c5a0fad6df1ae Mon Sep 17 00:00:00 2001 From: Niklas Schnelle Date: Tue, 21 Feb 2023 17:10:43 +0100 Subject: [PATCH 026/173] MAINTAINERS: Update s390-iommu driver maintainer information The s390 DMA API conversion changes currently under review will extend the use of the s390-iommu driver to the DMA API. With s390's mandatory use of an IOMMU this means all DMA for PCI devices will then use the s390-iommu driver. With this in mind and considering my involvement in these changes it makes sense to reflect this increased interdependence in the maintainer structure. Thus add myself as first maintainer and move Gerald to reviewer status. Reviewed-by: Gerald Schaefer Reviewed-by: Matthew Rosato Acked-by: Peter Oberparleiter Signed-off-by: Niklas Schnelle Link: https://lore.kernel.org/r/20230221161043.37065-1-schnelle@linux.ibm.com Signed-off-by: Joerg Roedel --- MAINTAINERS | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index d8ebab595b2a..33a61d516446 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -18291,8 +18291,9 @@ F: drivers/s390/block/dasd* F: include/linux/dasd_mod.h S390 IOMMU (PCI) +M: Niklas Schnelle M: Matthew Rosato -M: Gerald Schaefer +R: Gerald Schaefer L: linux-s390@vger.kernel.org S: Supported F: drivers/iommu/s390-iommu.c From e38c5e80c3d293a883c6f1d553f2146ec0bda35e Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 22 Mar 2023 15:53:32 +0100 Subject: [PATCH 027/173] ASoC: Intel: bytcr_rt5640: Add quirk for the Acer Iconia One 7 B1-750 The Acer Iconia One 7 B1-750 tablet mostly works fine with the defaults for an Bay Trail CR tablet. Except for the internal mic, instead of an analog mic on IN3 a digital mic on DMIC1 is uses. Add a quirk with these settings for this tablet. Acked-by: Pierre-Louis Bossart Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20230322145332.131525-1-hdegoede@redhat.com Signed-off-by: Mark Brown --- sound/soc/intel/boards/bytcr_rt5640.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/sound/soc/intel/boards/bytcr_rt5640.c b/sound/soc/intel/boards/bytcr_rt5640.c index 79e0039c79a3..5a12940ef907 100644 --- a/sound/soc/intel/boards/bytcr_rt5640.c +++ b/sound/soc/intel/boards/bytcr_rt5640.c @@ -533,6 +533,18 @@ static int byt_rt5640_aif1_hw_params(struct snd_pcm_substream *substream, /* Please keep this list alphabetically sorted */ static const struct dmi_system_id byt_rt5640_quirk_table[] = { + { /* Acer Iconia One 7 B1-750 */ + .matches = { + DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Insyde"), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "VESPA2"), + }, + .driver_data = (void *)(BYT_RT5640_DMIC1_MAP | + BYT_RT5640_JD_SRC_JD1_IN4P | + BYT_RT5640_OVCD_TH_1500UA | + BYT_RT5640_OVCD_SF_0P75 | + BYT_RT5640_SSP0_AIF1 | + BYT_RT5640_MCLK_EN), + }, { /* Acer Iconia Tab 8 W1-810 */ .matches = { DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Acer"), From 6165a16a5ad9b237bb3131cff4d3c601ccb8f9a3 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 21 Mar 2023 00:17:36 -0400 Subject: [PATCH 028/173] NFSv4: Fix hangs when recovering open state after a server reboot When we're using a cached open stateid or a delegation in order to avoid sending a CLAIM_PREVIOUS open RPC call to the server, we don't have a new open stateid to present to update_open_stateid(). Instead rely on nfs4_try_open_cached(), just as if we were doing a normal open. Fixes: d2bfda2e7aa0 ("NFSv4: don't reprocess cached open CLAIM_PREVIOUS") Cc: stable@vger.kernel.org Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/nfs4proc.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 22a93ae46cd7..5607b1e2b821 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1980,8 +1980,7 @@ _nfs4_opendata_reclaim_to_nfs4_state(struct nfs4_opendata *data) if (!data->rpc_done) { if (data->rpc_status) return ERR_PTR(data->rpc_status); - /* cached opens have already been processed */ - goto update; + return nfs4_try_open_cached(data); } ret = nfs_refresh_inode(inode, &data->f_attr); @@ -1990,7 +1989,7 @@ _nfs4_opendata_reclaim_to_nfs4_state(struct nfs4_opendata *data) if (data->o_res.delegation_type != 0) nfs4_opendata_check_deleg(data, state); -update: + if (!update_open_stateid(state, &data->o_res.stateid, NULL, data->o_arg.fmode)) return ERR_PTR(-EAGAIN); From 1073c15fd39e804ad36ff26a7c7d53b0ab51b184 Mon Sep 17 00:00:00 2001 From: Mirsad Goran Todorovac Date: Wed, 22 Mar 2023 09:51:07 +0100 Subject: [PATCH 029/173] scripts: merge_config: Fix typo in variable name. ${WARNOVERRIDE} was misspelled as ${WARNOVVERIDE}, which caused a shell syntax error in certain paths of the script execution. Fixes: 46dff8d7e381 ("scripts: merge_config: Add option to suppress warning on overrides") Signed-off-by: Mirsad Goran Todorovac Reviewed-by: Mark Brown Acked-by: Randy Dunlap Signed-off-by: Masahiro Yamada --- scripts/kconfig/merge_config.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/kconfig/merge_config.sh b/scripts/kconfig/merge_config.sh index 32620de473ad..902eb429b9db 100755 --- a/scripts/kconfig/merge_config.sh +++ b/scripts/kconfig/merge_config.sh @@ -145,7 +145,7 @@ for ORIG_MERGE_FILE in $MERGE_LIST ; do NEW_VAL=$(grep -w $CFG $MERGE_FILE) BUILTIN_FLAG=false if [ "$BUILTIN" = "true" ] && [ "${NEW_VAL#CONFIG_*=}" = "m" ] && [ "${PREV_VAL#CONFIG_*=}" = "y" ]; then - ${WARNOVVERIDE} Previous value: $PREV_VAL + ${WARNOVERRIDE} Previous value: $PREV_VAL ${WARNOVERRIDE} New value: $NEW_VAL ${WARNOVERRIDE} -y passed, will not demote y to m ${WARNOVERRIDE} From fb27e70f6e408dee5d22b083e7a38a59e6118253 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Wed, 22 Mar 2023 19:11:45 +0100 Subject: [PATCH 030/173] modpost: Fix processing of CRCs on 32-bit build machines modpost now reads CRCs from .*.cmd files, parsing them using strtol(). This is inconsistent with its parsing of Module.symvers and with their definition as *unsigned* 32-bit values. strtol() clamps values to [LONG_MIN, LONG_MAX], and when building on a 32-bit system this changes all CRCs >= 0x80000000 to be 0x7fffffff. Change extract_crcs_for_object() to use strtoul() instead. Cc: stable@vger.kernel.org Fixes: f292d875d0dc ("modpost: extract symbol versions from *.cmd files") Signed-off-by: Ben Hutchings Signed-off-by: Masahiro Yamada --- scripts/mod/modpost.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index efff8078e395..9466b6a2abae 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -1733,7 +1733,7 @@ static void extract_crcs_for_object(const char *object, struct module *mod) if (!isdigit(*p)) continue; /* skip this line */ - crc = strtol(p, &p, 0); + crc = strtoul(p, &p, 0); if (*p != '\n') continue; /* skip this line */ From 6f57937980142715e927697a6ffd2050f38ed6f6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Wed, 22 Mar 2023 22:45:40 +0100 Subject: [PATCH 031/173] pwm: hibvt: Explicitly set .polarity in .get_state() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The driver only both polarities. Complete the implementation of .get_state() by setting .polarity according to the configured hardware state. Fixes: d09f00810850 ("pwm: Add PWM driver for HiSilicon BVT SOCs") Link: https://lore.kernel.org/r/20230228135508.1798428-2-u.kleine-koenig@pengutronix.de Signed-off-by: Uwe Kleine-König Signed-off-by: Thierry Reding --- drivers/pwm/pwm-hibvt.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/pwm/pwm-hibvt.c b/drivers/pwm/pwm-hibvt.c index 12c05c155cab..1b9274c5ad87 100644 --- a/drivers/pwm/pwm-hibvt.c +++ b/drivers/pwm/pwm-hibvt.c @@ -146,6 +146,7 @@ static int hibvt_pwm_get_state(struct pwm_chip *chip, struct pwm_device *pwm, value = readl(base + PWM_CTRL_ADDR(pwm->hwpwm)); state->enabled = (PWM_ENABLE_MASK & value); + state->polarity = (PWM_POLARITY_MASK & value) ? PWM_POLARITY_INVERSED : PWM_POLARITY_NORMAL; return 0; } From 30006b77c7e130e01d1ab2148cc8abf73dfcc4bf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Wed, 22 Mar 2023 22:45:41 +0100 Subject: [PATCH 032/173] pwm: cros-ec: Explicitly set .polarity in .get_state() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The driver only supports normal polarity. Complete the implementation of .get_state() by setting .polarity accordingly. Reviewed-by: Guenter Roeck Fixes: 1f0d3bb02785 ("pwm: Add ChromeOS EC PWM driver") Link: https://lore.kernel.org/r/20230228135508.1798428-3-u.kleine-koenig@pengutronix.de Signed-off-by: Uwe Kleine-König Signed-off-by: Thierry Reding --- drivers/pwm/pwm-cros-ec.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/pwm/pwm-cros-ec.c b/drivers/pwm/pwm-cros-ec.c index 86df6702cb83..ad18b0ebe3f1 100644 --- a/drivers/pwm/pwm-cros-ec.c +++ b/drivers/pwm/pwm-cros-ec.c @@ -198,6 +198,7 @@ static int cros_ec_pwm_get_state(struct pwm_chip *chip, struct pwm_device *pwm, state->enabled = (ret > 0); state->period = EC_PWM_MAX_DUTY; + state->polarity = PWM_POLARITY_NORMAL; /* * Note that "disabled" and "duty cycle == 0" are treated the same. If From b20b097128d9145fadcea1cbb45c4d186cb57466 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Wed, 22 Mar 2023 22:45:42 +0100 Subject: [PATCH 033/173] pwm: iqs620a: Explicitly set .polarity in .get_state() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The driver only supports normal polarity. Complete the implementation of .get_state() by setting .polarity accordingly. Fixes: 6f0841a8197b ("pwm: Add support for Azoteq IQS620A PWM generator") Reviewed-by: Jeff LaBundy Link: https://lore.kernel.org/r/20230228135508.1798428-4-u.kleine-koenig@pengutronix.de Signed-off-by: Uwe Kleine-König Signed-off-by: Thierry Reding --- drivers/pwm/pwm-iqs620a.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/pwm/pwm-iqs620a.c b/drivers/pwm/pwm-iqs620a.c index 8362b4870c66..47b3141135f3 100644 --- a/drivers/pwm/pwm-iqs620a.c +++ b/drivers/pwm/pwm-iqs620a.c @@ -126,6 +126,7 @@ static int iqs620_pwm_get_state(struct pwm_chip *chip, struct pwm_device *pwm, mutex_unlock(&iqs620_pwm->lock); state->period = IQS620_PWM_PERIOD_NS; + state->polarity = PWM_POLARITY_NORMAL; return 0; } From 2be4dcf6627e1bcbbef8e6ba1811f5127d39202c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Wed, 22 Mar 2023 22:45:43 +0100 Subject: [PATCH 034/173] pwm: sprd: Explicitly set .polarity in .get_state() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The driver only supports normal polarity. Complete the implementation of .get_state() by setting .polarity accordingly. Fixes: 8aae4b02e8a6 ("pwm: sprd: Add Spreadtrum PWM support") Link: https://lore.kernel.org/r/20230228135508.1798428-5-u.kleine-koenig@pengutronix.de Signed-off-by: Uwe Kleine-König Signed-off-by: Thierry Reding --- drivers/pwm/pwm-sprd.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/pwm/pwm-sprd.c b/drivers/pwm/pwm-sprd.c index d866ce345f97..bde579a338c2 100644 --- a/drivers/pwm/pwm-sprd.c +++ b/drivers/pwm/pwm-sprd.c @@ -109,6 +109,7 @@ static int sprd_pwm_get_state(struct pwm_chip *chip, struct pwm_device *pwm, duty = val & SPRD_PWM_DUTY_MSK; tmp = (prescale + 1) * NSEC_PER_SEC * duty; state->duty_cycle = DIV_ROUND_CLOSEST_ULL(tmp, chn->clk_rate); + state->polarity = PWM_POLARITY_NORMAL; /* Disable PWM clocks if the PWM channel is not in enable state. */ if (!state->enabled) From 8caa81eb950cb2e9d2d6959b37d853162d197f57 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Wed, 22 Mar 2023 22:45:44 +0100 Subject: [PATCH 035/173] pwm: meson: Explicitly set .polarity in .get_state() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The driver only supports normal polarity. Complete the implementation of .get_state() by setting .polarity accordingly. This fixes a regression that was possible since commit c73a3107624d ("pwm: Handle .get_state() failures") which stopped to zero-initialize the state passed to the .get_state() callback. This was reported at https://forum.odroid.com/viewtopic.php?f=177&t=46360 . While this was an unintended side effect, the real issue is the driver's callback not setting the polarity. There is a complicating fact, that the .apply() callback fakes support for inversed polarity. This is not (and cannot) be matched by .get_state(). As fixing this isn't easy, only point it out in a comment to prevent authors of other drivers from copying that approach. Fixes: c375bcbaabdb ("pwm: meson: Read the full hardware state in meson_pwm_get_state()") Reported-by: Munehisa Kamata Acked-by: Martin Blumenstingl Link: https://lore.kernel.org/r/20230310191405.2606296-1-u.kleine-koenig@pengutronix.de Signed-off-by: Uwe Kleine-König Signed-off-by: Thierry Reding --- drivers/pwm/pwm-meson.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/pwm/pwm-meson.c b/drivers/pwm/pwm-meson.c index 16d79ca5d8f5..5cd7b90872c6 100644 --- a/drivers/pwm/pwm-meson.c +++ b/drivers/pwm/pwm-meson.c @@ -162,6 +162,12 @@ static int meson_pwm_calc(struct meson_pwm *meson, struct pwm_device *pwm, duty = state->duty_cycle; period = state->period; + /* + * Note this is wrong. The result is an output wave that isn't really + * inverted and so is wrongly identified by .get_state as normal. + * Fixing this needs some care however as some machines might rely on + * this. + */ if (state->polarity == PWM_POLARITY_INVERSED) duty = period - duty; @@ -358,6 +364,8 @@ static int meson_pwm_get_state(struct pwm_chip *chip, struct pwm_device *pwm, state->duty_cycle = 0; } + state->polarity = PWM_POLARITY_NORMAL; + return 0; } From 1271a7b98e7989ba6bb978e14403fc84efe16e13 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Wed, 22 Mar 2023 22:45:45 +0100 Subject: [PATCH 036/173] pwm: Zero-initialize the pwm_state passed to driver's .get_state() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is just to ensure that .usage_power is properly initialized and doesn't contain random stack data. The other members of struct pwm_state should get a value assigned in a successful call to .get_state(). So in the absence of bugs in driver implementations, this is only a safe-guard and no fix. Reported-by: Munehisa Kamata Link: https://lore.kernel.org/r/20230310214004.2619480-1-u.kleine-koenig@pengutronix.de Signed-off-by: Uwe Kleine-König Signed-off-by: Thierry Reding --- drivers/pwm/core.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/pwm/core.c b/drivers/pwm/core.c index e01147f66e15..474725714a05 100644 --- a/drivers/pwm/core.c +++ b/drivers/pwm/core.c @@ -115,7 +115,14 @@ static int pwm_device_request(struct pwm_device *pwm, const char *label) } if (pwm->chip->ops->get_state) { - struct pwm_state state; + /* + * Zero-initialize state because most drivers are unaware of + * .usage_power. The other members of state are supposed to be + * set by lowlevel drivers. We still initialize the whole + * structure for simplicity even though this might paper over + * faulty implementations of .get_state(). + */ + struct pwm_state state = { 0, }; err = pwm->chip->ops->get_state(pwm->chip, pwm, &state); trace_pwm_get(pwm, &state, err); @@ -448,7 +455,7 @@ static void pwm_apply_state_debug(struct pwm_device *pwm, { struct pwm_state *last = &pwm->last; struct pwm_chip *chip = pwm->chip; - struct pwm_state s1, s2; + struct pwm_state s1 = { 0 }, s2 = { 0 }; int err; if (!IS_ENABLED(CONFIG_PWM_DEBUG)) @@ -530,6 +537,7 @@ static void pwm_apply_state_debug(struct pwm_device *pwm, return; } + *last = (struct pwm_state){ 0 }; err = chip->ops->get_state(chip, pwm, last); trace_pwm_get(pwm, last, err); if (err) From a4a3203426f4b67535d6442ddc5dca8878a0678f Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Thu, 23 Mar 2023 11:01:25 +0000 Subject: [PATCH 037/173] ASoC: codecs: lpass: fix the order or clks turn off during suspend The order in which clocks are stopped matters as some of the clock like NPL are derived from MCLK. Without this patch, Dragonboard RB5 DSP would crash with below error: qcom_q6v5_pas 17300000.remoteproc: fatal error received: ABT_dal.c:278:ABTimeout: AHB Bus hang is detected, Number of bus hang detected := 2 , addr0 = 0x3370000 , addr1 = 0x0!!! Turn off fsgen first, followed by npl and then finally mclk, which is exactly the opposite order of enable sequence. Fixes: 1dc3459009c3 ("ASoC: codecs: lpass: register mclk after runtime pm") Reported-by: Amit Pundir Signed-off-by: Srinivas Kandagatla Tested-by: Amit Pundir Link: https://lore.kernel.org/r/20230323110125.23790-1-srinivas.kandagatla@linaro.org Signed-off-by: Mark Brown --- sound/soc/codecs/lpass-rx-macro.c | 4 ++-- sound/soc/codecs/lpass-tx-macro.c | 4 ++-- sound/soc/codecs/lpass-wsa-macro.c | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/sound/soc/codecs/lpass-rx-macro.c b/sound/soc/codecs/lpass-rx-macro.c index a73a7d7a1c0a..faba4237bd3d 100644 --- a/sound/soc/codecs/lpass-rx-macro.c +++ b/sound/soc/codecs/lpass-rx-macro.c @@ -3670,9 +3670,9 @@ static int __maybe_unused rx_macro_runtime_suspend(struct device *dev) regcache_cache_only(rx->regmap, true); regcache_mark_dirty(rx->regmap); - clk_disable_unprepare(rx->mclk); - clk_disable_unprepare(rx->npl); clk_disable_unprepare(rx->fsgen); + clk_disable_unprepare(rx->npl); + clk_disable_unprepare(rx->mclk); return 0; } diff --git a/sound/soc/codecs/lpass-tx-macro.c b/sound/soc/codecs/lpass-tx-macro.c index 473d3cd39554..589c490a8c48 100644 --- a/sound/soc/codecs/lpass-tx-macro.c +++ b/sound/soc/codecs/lpass-tx-macro.c @@ -2098,9 +2098,9 @@ static int __maybe_unused tx_macro_runtime_suspend(struct device *dev) regcache_cache_only(tx->regmap, true); regcache_mark_dirty(tx->regmap); - clk_disable_unprepare(tx->mclk); - clk_disable_unprepare(tx->npl); clk_disable_unprepare(tx->fsgen); + clk_disable_unprepare(tx->npl); + clk_disable_unprepare(tx->mclk); return 0; } diff --git a/sound/soc/codecs/lpass-wsa-macro.c b/sound/soc/codecs/lpass-wsa-macro.c index ba7480f3831e..3f6f1bdd4e03 100644 --- a/sound/soc/codecs/lpass-wsa-macro.c +++ b/sound/soc/codecs/lpass-wsa-macro.c @@ -2506,9 +2506,9 @@ static int __maybe_unused wsa_macro_runtime_suspend(struct device *dev) regcache_cache_only(wsa->regmap, true); regcache_mark_dirty(wsa->regmap); - clk_disable_unprepare(wsa->mclk); - clk_disable_unprepare(wsa->npl); clk_disable_unprepare(wsa->fsgen); + clk_disable_unprepare(wsa->npl); + clk_disable_unprepare(wsa->mclk); return 0; } From 943d045a6d796175e5d08f9973953b1d2c07d797 Mon Sep 17 00:00:00 2001 From: Siddharth Kawar Date: Mon, 20 Mar 2023 20:37:40 +0000 Subject: [PATCH 038/173] SUNRPC: fix shutdown of NFS TCP client socket NFS server Duplicate Request Cache (DRC) algorithms rely on NFS clients reconnecting using the same local TCP port. Unique NFS operations are identified by the per-TCP connection set of XIDs. This prevents file corruption when non-idempotent NFS operations are retried. Currently, NFS client TCP connections are using different local TCP ports when reconnecting to NFS servers. After an NFS server initiates shutdown of the TCP connection, the NFS client's TCP socket is set to NULL after the socket state has reached TCP_LAST_ACK(9). When reconnecting, the new socket attempts to reuse the same local port but fails with EADDRNOTAVAIL (99). This forces the socket to use a different local TCP port to reconnect to the remote NFS server. State Transition and Events: TCP_CLOSE_WAIT(8) TCP_LAST_ACK(9) connect(fail EADDRNOTAVAIL(99)) TCP_CLOSE(7) bind on new port connect success dmesg excerpts showing reconnect switching from TCP local port of 926 to 763 after commit 7c81e6a9d75b: [13354.947854] NFS call mkdir testW ... [13405.654781] RPC: xs_tcp_state_change client 00000000037d0f03... [13405.654813] RPC: state 8 conn 1 dead 0 zapped 1 sk_shutdown 1 [13405.654826] RPC: xs_data_ready... [13405.654892] RPC: xs_tcp_state_change client 00000000037d0f03... [13405.654895] RPC: state 9 conn 0 dead 0 zapped 1 sk_shutdown 3 [13405.654899] RPC: xs_tcp_state_change client 00000000037d0f03... [13405.654900] RPC: state 9 conn 0 dead 0 zapped 1 sk_shutdown 3 [13405.654950] RPC: xs_connect scheduled xprt 00000000037d0f03 [13405.654975] RPC: xs_bind 0.0.0.0:926: ok (0) [13405.654980] RPC: worker connecting xprt 00000000037d0f03 via tcp to 10.101.6.228 (port 2049) [13405.654991] RPC: 00000000037d0f03 connect status 99 connected 0 sock state 7 [13405.655001] RPC: xs_tcp_state_change client 00000000037d0f03... [13405.655002] RPC: state 7 conn 0 dead 0 zapped 1 sk_shutdown 3 [13405.655024] RPC: xs_connect scheduled xprt 00000000037d0f03 [13405.655038] RPC: xs_bind 0.0.0.0:763: ok (0) [13405.655041] RPC: worker connecting xprt 00000000037d0f03 via tcp to 10.101.6.228 (port 2049) [13405.655065] RPC: 00000000037d0f03 connect status 115 connected 0 sock state 2 State Transition and Events with patch applied: TCP_CLOSE_WAIT(8) TCP_LAST_ACK(9) TCP_CLOSE(7) connect(reuse of port succeeds) dmesg excerpts showing reconnect on same TCP local port of 936 with patch applied: [ 257.139935] NFS: mkdir(0:59/560857152), testQ [ 257.139937] NFS call mkdir testQ ... [ 307.822702] RPC: state 8 conn 1 dead 0 zapped 1 sk_shutdown 1 [ 307.822714] RPC: xs_data_ready... [ 307.822817] RPC: xs_tcp_state_change client 00000000ce702f14... [ 307.822821] RPC: state 9 conn 0 dead 0 zapped 1 sk_shutdown 3 [ 307.822825] RPC: xs_tcp_state_change client 00000000ce702f14... [ 307.822826] RPC: state 9 conn 0 dead 0 zapped 1 sk_shutdown 3 [ 307.823606] RPC: xs_tcp_state_change client 00000000ce702f14... [ 307.823609] RPC: state 7 conn 0 dead 0 zapped 1 sk_shutdown 3 [ 307.823629] RPC: xs_tcp_state_change client 00000000ce702f14... [ 307.823632] RPC: state 7 conn 0 dead 0 zapped 1 sk_shutdown 3 [ 307.823676] RPC: xs_connect scheduled xprt 00000000ce702f14 [ 307.823704] RPC: xs_bind 0.0.0.0:936: ok (0) [ 307.823709] RPC: worker connecting xprt 00000000ce702f14 via tcp to 10.101.1.30 (port 2049) [ 307.823748] RPC: 00000000ce702f14 connect status 115 connected 0 sock state 2 ... [ 314.916193] RPC: state 7 conn 0 dead 0 zapped 1 sk_shutdown 3 [ 314.916251] RPC: xs_connect scheduled xprt 00000000ce702f14 [ 314.916282] RPC: xs_bind 0.0.0.0:936: ok (0) [ 314.916292] RPC: worker connecting xprt 00000000ce702f14 via tcp to 10.101.1.30 (port 2049) [ 314.916342] RPC: 00000000ce702f14 connect status 115 connected 0 sock state 2 Fixes: 7c81e6a9d75b ("SUNRPC: Tweak TCP socket shutdown in the RPC client") Signed-off-by: Siddharth Rajendra Kawar Signed-off-by: Anna Schumaker --- net/sunrpc/xprtsock.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index adcbedc244d6..6cacd70a15ff 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -2158,6 +2158,7 @@ static void xs_tcp_shutdown(struct rpc_xprt *xprt) switch (skst) { case TCP_FIN_WAIT1: case TCP_FIN_WAIT2: + case TCP_LAST_ACK: break; case TCP_ESTABLISHED: case TCP_CLOSE_WAIT: From cbedf1a33970c9b825ae75b81fbd3e88e224a418 Mon Sep 17 00:00:00 2001 From: Werner Sembach Date: Thu, 23 Mar 2023 18:13:11 -0700 Subject: [PATCH 039/173] Input: i8042 - add TUXEDO devices to i8042 quirk tables for partial fix A lot of modern Clevo barebones have touchpad and/or keyboard issues after suspend fixable with nomux + reset + noloop + nopnp. Luckily, none of them have an external PS/2 port so this can safely be set for all of them. I'm not entirely sure if every device listed really needs all four quirks, but after testing and production use, no negative effects could be observed when setting all four. Setting SERIO_QUIRK_NOMUX or SERIO_QUIRK_RESET_ALWAYS on the Clevo N150CU and the Clevo NHxxRZQ makes the keyboard very laggy for ~5 seconds after boot and sometimes also after resume. However both are required for the keyboard to not fail completely sometimes after boot or resume. Signed-off-by: Werner Sembach Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20230321191619.647911-1-wse@tuxedocomputers.com Signed-off-by: Dmitry Torokhov --- drivers/input/serio/i8042-acpipnpio.h | 28 +++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/drivers/input/serio/i8042-acpipnpio.h b/drivers/input/serio/i8042-acpipnpio.h index fe7ffe30997c..028e45bd050b 100644 --- a/drivers/input/serio/i8042-acpipnpio.h +++ b/drivers/input/serio/i8042-acpipnpio.h @@ -1124,6 +1124,20 @@ static const struct dmi_system_id i8042_dmi_quirk_table[] __initconst = { .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS | SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP) }, + { + /* + * Setting SERIO_QUIRK_NOMUX or SERIO_QUIRK_RESET_ALWAYS makes + * the keyboard very laggy for ~5 seconds after boot and + * sometimes also after resume. + * However both are required for the keyboard to not fail + * completely sometimes after boot or resume. + */ + .matches = { + DMI_MATCH(DMI_BOARD_NAME, "N150CU"), + }, + .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS | + SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP) + }, { .matches = { DMI_MATCH(DMI_BOARD_NAME, "NH5xAx"), @@ -1131,6 +1145,20 @@ static const struct dmi_system_id i8042_dmi_quirk_table[] __initconst = { .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS | SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP) }, + { + /* + * Setting SERIO_QUIRK_NOMUX or SERIO_QUIRK_RESET_ALWAYS makes + * the keyboard very laggy for ~5 seconds after boot and + * sometimes also after resume. + * However both are required for the keyboard to not fail + * completely sometimes after boot or resume. + */ + .matches = { + DMI_MATCH(DMI_BOARD_NAME, "NHxxRZQ"), + }, + .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS | + SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP) + }, { .matches = { DMI_MATCH(DMI_BOARD_NAME, "NL5xRU"), From f6887a71bdd2f0dcba9b8180dd2223cfa8637e85 Mon Sep 17 00:00:00 2001 From: Jason Montleon Date: Fri, 24 Mar 2023 13:07:11 -0400 Subject: [PATCH 040/173] ASoC: hdac_hdmi: use set_stream() instead of set_tdm_slots() hdac_hdmi was not updated to use set_stream() instead of set_tdm_slots() in the original commit so HDMI no longer produces audio. Cc: stable@vger.kernel.org Link: https://lore.kernel.org/regressions/CAJD_bPKQdtaExvVEKxhQ47G-ZXDA=k+gzhMJRHLBe=mysPnuKA@mail.gmail.com/ Fixes: 636110411ca7 ("ASoC: Intel/SOF: use set_stream() instead of set_tdm_slots() for HDAudio") Signed-off-by: Jason Montleon Reviewed-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20230324170711.2526-1-jmontleo@redhat.com Signed-off-by: Mark Brown --- sound/soc/codecs/hdac_hdmi.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/sound/soc/codecs/hdac_hdmi.c b/sound/soc/codecs/hdac_hdmi.c index ed4f7cdda04f..8b6b76029694 100644 --- a/sound/soc/codecs/hdac_hdmi.c +++ b/sound/soc/codecs/hdac_hdmi.c @@ -436,23 +436,28 @@ static int hdac_hdmi_setup_audio_infoframe(struct hdac_device *hdev, return 0; } -static int hdac_hdmi_set_tdm_slot(struct snd_soc_dai *dai, - unsigned int tx_mask, unsigned int rx_mask, - int slots, int slot_width) +static int hdac_hdmi_set_stream(struct snd_soc_dai *dai, + void *stream, int direction) { struct hdac_hdmi_priv *hdmi = snd_soc_dai_get_drvdata(dai); struct hdac_device *hdev = hdmi->hdev; struct hdac_hdmi_dai_port_map *dai_map; struct hdac_hdmi_pcm *pcm; + struct hdac_stream *hstream; - dev_dbg(&hdev->dev, "%s: strm_tag: %d\n", __func__, tx_mask); + if (!stream) + return -EINVAL; + + hstream = (struct hdac_stream *)stream; + + dev_dbg(&hdev->dev, "%s: strm_tag: %d\n", __func__, hstream->stream_tag); dai_map = &hdmi->dai_map[dai->id]; pcm = hdac_hdmi_get_pcm_from_cvt(hdmi, dai_map->cvt); if (pcm) - pcm->stream_tag = (tx_mask << 4); + pcm->stream_tag = (hstream->stream_tag << 4); return 0; } @@ -1544,7 +1549,7 @@ static const struct snd_soc_dai_ops hdmi_dai_ops = { .startup = hdac_hdmi_pcm_open, .shutdown = hdac_hdmi_pcm_close, .hw_params = hdac_hdmi_set_hw_params, - .set_tdm_slot = hdac_hdmi_set_tdm_slot, + .set_stream = hdac_hdmi_set_stream, }; /* From 0808ed6ebbc292222ca069d339744870f6d801da Mon Sep 17 00:00:00 2001 From: Tomas Henzl Date: Fri, 24 Mar 2023 14:52:49 +0100 Subject: [PATCH 041/173] scsi: megaraid_sas: Fix fw_crash_buffer_show() If crash_dump_buf is not allocated then crash dump can't be available. Replace logical 'and' with 'or'. Signed-off-by: Tomas Henzl Link: https://lore.kernel.org/r/20230324135249.9733-1-thenzl@redhat.com Signed-off-by: Martin K. Petersen --- drivers/scsi/megaraid/megaraid_sas_base.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c index 3ceece988338..c895189375e2 100644 --- a/drivers/scsi/megaraid/megaraid_sas_base.c +++ b/drivers/scsi/megaraid/megaraid_sas_base.c @@ -3298,7 +3298,7 @@ fw_crash_buffer_show(struct device *cdev, spin_lock_irqsave(&instance->crashdump_lock, flags); buff_offset = instance->fw_crash_buffer_offset; - if (!instance->crash_dump_buf && + if (!instance->crash_dump_buf || !((instance->fw_crash_state == AVAILABLE) || (instance->fw_crash_state == COPYING))) { dev_err(&instance->pdev->dev, From 2309df27111a51734cb9240b4d3c25f2f3c6ab06 Mon Sep 17 00:00:00 2001 From: Tomas Henzl Date: Fri, 24 Mar 2023 16:01:34 +0100 Subject: [PATCH 042/173] scsi: megaraid_sas: Fix crash after a double completion When a physical disk is attached directly "without JBOD MAP support" (see megasas_get_tm_devhandle()) then there is no real error handling in the driver. Return FAILED instead of SUCCESS. Fixes: 18365b138508 ("megaraid_sas: Task management support") Signed-off-by: Tomas Henzl Link: https://lore.kernel.org/r/20230324150134.14696-1-thenzl@redhat.com Signed-off-by: Martin K. Petersen --- drivers/scsi/megaraid/megaraid_sas_fusion.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/megaraid/megaraid_sas_fusion.c b/drivers/scsi/megaraid/megaraid_sas_fusion.c index 84c9a55a5794..8a83f3fc2b86 100644 --- a/drivers/scsi/megaraid/megaraid_sas_fusion.c +++ b/drivers/scsi/megaraid/megaraid_sas_fusion.c @@ -4771,7 +4771,7 @@ int megasas_task_abort_fusion(struct scsi_cmnd *scmd) devhandle = megasas_get_tm_devhandle(scmd->device); if (devhandle == (u16)ULONG_MAX) { - ret = SUCCESS; + ret = FAILED; sdev_printk(KERN_INFO, scmd->device, "task abort issued for invalid devhandle\n"); mutex_unlock(&instance->reset_mutex); @@ -4841,7 +4841,7 @@ int megasas_reset_target_fusion(struct scsi_cmnd *scmd) devhandle = megasas_get_tm_devhandle(scmd->device); if (devhandle == (u16)ULONG_MAX) { - ret = SUCCESS; + ret = FAILED; sdev_printk(KERN_INFO, scmd->device, "target reset issued for invalid devhandle\n"); mutex_unlock(&instance->reset_mutex); From f0aa59a33d2ac2267d260fe21eaf92500df8e7b4 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Wed, 22 Mar 2023 11:22:11 +0900 Subject: [PATCH 043/173] scsi: core: Improve scsi_vpd_inquiry() checks Some USB-SATA adapters have broken behavior when an unsupported VPD page is probed: Depending on the VPD page number, a 4-byte header with a valid VPD page number but with a 0 length is returned. Currently, scsi_vpd_inquiry() only checks that the page number is valid to determine if the page is valid, which results in receiving only the 4-byte header for the non-existent page. This error manifests itself very often with page 0xb9 for the Concurrent Positioning Ranges detection done by sd_read_cpr(), resulting in the following error message: sd 0:0:0:0: [sda] Invalid Concurrent Positioning Ranges VPD page Prevent such misleading error message by adding a check in scsi_vpd_inquiry() to verify that the page length is not 0. Signed-off-by: Damien Le Moal Link: https://lore.kernel.org/r/20230322022211.116327-1-damien.lemoal@opensource.wdc.com Reviewed-by: Benjamin Block Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c index 5cce1ba70fc6..09ef0b31dfc0 100644 --- a/drivers/scsi/scsi.c +++ b/drivers/scsi/scsi.c @@ -314,11 +314,18 @@ static int scsi_vpd_inquiry(struct scsi_device *sdev, unsigned char *buffer, if (result) return -EIO; - /* Sanity check that we got the page back that we asked for */ + /* + * Sanity check that we got the page back that we asked for and that + * the page size is not 0. + */ if (buffer[1] != page) return -EIO; - return get_unaligned_be16(&buffer[2]) + 4; + result = get_unaligned_be16(&buffer[2]); + if (!result) + return -EIO; + + return result + 4; } static int scsi_get_vpd_size(struct scsi_device *sdev, u8 page) From d684a7a26f7d2c7122a4581ac966ed64e88fb29c Mon Sep 17 00:00:00 2001 From: Jerry Snitselaar Date: Fri, 24 Mar 2023 12:32:04 -0700 Subject: [PATCH 044/173] scsi: mpt3sas: Don't print sense pool info twice _base_allocate_sense_dma_pool() already prints out the sense pool information, so don't print it a second time after calling it in _base_allocate_memory_pools(). In addition the version in _base_allocate_memory_pools() was using the wrong size value, sz, which was last assigned when doing some nvme calculations instead of sense_sz to determine the pool size in kilobytes. Cc: Sathya Prakash Cc: Sreekanth Reddy Cc: Suganath Prabu Subramani Cc: MPT-FusionLinux.pdl@broadcom.com Cc: "Martin K. Petersen" Cc: "James E.J. Bottomley" Fixes: 970ac2bb70e7 ("scsi: mpt3sas: Force sense buffer allocations to be within same 4 GB region") Signed-off-by: Jerry Snitselaar Link: https://lore.kernel.org/r/20230324193204.567932-1-jsnitsel@redhat.com Signed-off-by: Martin K. Petersen --- drivers/scsi/mpt3sas/mpt3sas_base.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c index 2ee9ea57554d..14ae0a9c5d3d 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_base.c +++ b/drivers/scsi/mpt3sas/mpt3sas_base.c @@ -6616,11 +6616,6 @@ _base_allocate_memory_pools(struct MPT3SAS_ADAPTER *ioc) else if (rc == -EAGAIN) goto try_32bit_dma; total_sz += sense_sz; - ioc_info(ioc, - "sense pool(0x%p)- dma(0x%llx): depth(%d)," - "element_size(%d), pool_size(%d kB)\n", - ioc->sense, (unsigned long long)ioc->sense_dma, ioc->scsiio_depth, - SCSI_SENSE_BUFFERSIZE, sz / 1024); /* reply pool, 4 byte align */ sz = ioc->reply_free_queue_depth * ioc->reply_sz; rc = _base_allocate_reply_pool(ioc, sz); From 02bcba0b9f9da706d5bd1e8cbeb83493863e17b5 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 26 Mar 2023 10:29:33 +0200 Subject: [PATCH 045/173] regulator: Handle deferred clk devm_clk_get() can return -EPROBE_DEFER. So it is better to return the error code from devm_clk_get(), instead of a hard coded -ENOENT. This gives more opportunities to successfully probe the driver. Fixes: 8959e5324485 ("regulator: fixed: add possibility to enable by clock") Signed-off-by: Christophe JAILLET Link: https://lore.kernel.org/r/18459fae3d017a66313699c7c8456b28158b2dd0.1679819354.git.christophe.jaillet@wanadoo.fr Signed-off-by: Mark Brown --- drivers/regulator/fixed.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/regulator/fixed.c b/drivers/regulator/fixed.c index 2a9867abba20..e6724a229d23 100644 --- a/drivers/regulator/fixed.c +++ b/drivers/regulator/fixed.c @@ -215,7 +215,7 @@ static int reg_fixed_voltage_probe(struct platform_device *pdev) drvdata->enable_clock = devm_clk_get(dev, NULL); if (IS_ERR(drvdata->enable_clock)) { dev_err(dev, "Can't get enable-clock from devicetree\n"); - return -ENOENT; + return PTR_ERR(drvdata->enable_clock); } } else if (drvtype && drvtype->has_performance_state) { drvdata->desc.ops = &fixed_voltage_domain_ops; From fb5755100a0a5aa5957bdb204fd1e249684557fc Mon Sep 17 00:00:00 2001 From: Rajvi Jingar Date: Mon, 20 Mar 2023 14:20:29 -0700 Subject: [PATCH 046/173] platform/x86/intel/pmc: Alder Lake PCH slp_s0_residency fix For platforms with Alder Lake PCH (Alder Lake S and Raptor Lake S) the slp_s0_residency attribute has been reporting the wrong value. Unlike other platforms, ADL PCH does not have a counter for the time that the SLP_S0 signal was asserted. Instead, firmware uses the aggregate of the Low Power Mode (LPM) substate counters as the S0ix value. Since the LPM counters run at a different frequency, this lead to misreporting of the S0ix time. Add a check for Alder Lake PCH and adjust the frequency accordingly when display slp_s0_residency. Fixes: bbab31101f44 ("platform/x86/intel: pmc/core: Add Alderlake support to pmc core driver") Signed-off-by: Rajvi Jingar Signed-off-by: David E. Box Reviewed-by: Rajneesh Bhardwaj Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20230320212029.3154407-1-david.e.box@linux.intel.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/x86/intel/pmc/core.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/platform/x86/intel/pmc/core.c b/drivers/platform/x86/intel/pmc/core.c index 3a15d32d7644..b9591969e0fa 100644 --- a/drivers/platform/x86/intel/pmc/core.c +++ b/drivers/platform/x86/intel/pmc/core.c @@ -66,7 +66,18 @@ static inline void pmc_core_reg_write(struct pmc_dev *pmcdev, int reg_offset, static inline u64 pmc_core_adjust_slp_s0_step(struct pmc_dev *pmcdev, u32 value) { - return (u64)value * pmcdev->map->slp_s0_res_counter_step; + /* + * ADL PCH does not have the SLP_S0 counter and LPM Residency counters are + * used as a workaround which uses 30.5 usec tick. All other client + * programs have the legacy SLP_S0 residency counter that is using the 122 + * usec tick. + */ + const int lpm_adj_x2 = pmcdev->map->lpm_res_counter_step_x2; + + if (pmcdev->map == &adl_reg_map) + return (u64)value * GET_X2_COUNTER((u64)lpm_adj_x2); + else + return (u64)value * pmcdev->map->slp_s0_res_counter_step; } static int set_etr3(struct pmc_dev *pmcdev) From 441d901fbf669f6360566a4437b1e563b854de4a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Mon, 27 Mar 2023 13:05:02 +0000 Subject: [PATCH 047/173] platform/x86: gigabyte-wmi: add support for B650 AORUS ELITE AX MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This has been reported as working. Suggested-by: got3nks Link: https://github.com/t-8ch/linux-gigabyte-wmi-driver/issues/15#issuecomment-1483942966 Signed-off-by: Thomas Weißschuh Link: https://lore.kernel.org/r/20230327-gigabyte-wmi-b650-elite-ax-v1-1-d4d645c21d0b@weissschuh.net Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/x86/gigabyte-wmi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/platform/x86/gigabyte-wmi.c b/drivers/platform/x86/gigabyte-wmi.c index 4dd39ab6ecfa..5e5b17c50eb6 100644 --- a/drivers/platform/x86/gigabyte-wmi.c +++ b/drivers/platform/x86/gigabyte-wmi.c @@ -151,6 +151,7 @@ static const struct dmi_system_id gigabyte_wmi_known_working_platforms[] = { DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B550I AORUS PRO AX"), DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B550M AORUS PRO-P"), DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B550M DS3H"), + DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B650 AORUS ELITE AX"), DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B660 GAMING X DDR4"), DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B660I AORUS PRO DDR4"), DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("Z390 I AORUS PRO WIFI-CF"), From e5c972c1fadacc858b6a564d056f177275238040 Mon Sep 17 00:00:00 2001 From: Jeremi Piotrowski Date: Fri, 24 Mar 2023 15:52:33 +0100 Subject: [PATCH 048/173] KVM: SVM: Flush Hyper-V TLB when required The Hyper-V "EnlightenedNptTlb" enlightenment is always enabled when KVM is running on top of Hyper-V and Hyper-V exposes support for it (which is always). On AMD CPUs this enlightenment results in ASID invalidations not flushing TLB entries derived from the NPT. To force the underlying (L0) hypervisor to rebuild its shadow page tables, an explicit hypercall is needed. The original KVM implementation of Hyper-V's "EnlightenedNptTlb" on SVM only added remote TLB flush hooks. This worked out fine for a while, as sufficient remote TLB flushes where being issued in KVM to mask the problem. Since v5.17, changes in the TDP code reduced the number of flushes and the out-of-sync TLB prevents guests from booting successfully. Split svm_flush_tlb_current() into separate callbacks for the 3 cases (guest/all/current), and issue the required Hyper-V hypercall when a Hyper-V TLB flush is needed. The most important case where the TLB flush was missing is when loading a new PGD, which is followed by what is now svm_flush_tlb_current(). Cc: stable@vger.kernel.org # v5.17+ Fixes: 1e0c7d40758b ("KVM: SVM: hyper-v: Remote TLB flush for SVM") Link: https://lore.kernel.org/lkml/43980946-7bbf-dcef-7e40-af904c456250@linux.microsoft.com/ Suggested-by: Sean Christopherson Signed-off-by: Jeremi Piotrowski Reviewed-by: Vitaly Kuznetsov Message-Id: <20230324145233.4585-1-jpiotrowski@linux.microsoft.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/kvm_onhyperv.h | 5 +++++ arch/x86/kvm/svm/svm.c | 37 ++++++++++++++++++++++++++++++--- arch/x86/kvm/svm/svm_onhyperv.h | 15 +++++++++++++ 3 files changed, 54 insertions(+), 3 deletions(-) diff --git a/arch/x86/kvm/kvm_onhyperv.h b/arch/x86/kvm/kvm_onhyperv.h index 287e98ef9df3..6272dabec02d 100644 --- a/arch/x86/kvm/kvm_onhyperv.h +++ b/arch/x86/kvm/kvm_onhyperv.h @@ -12,6 +12,11 @@ int hv_remote_flush_tlb_with_range(struct kvm *kvm, int hv_remote_flush_tlb(struct kvm *kvm); void hv_track_root_tdp(struct kvm_vcpu *vcpu, hpa_t root_tdp); #else /* !CONFIG_HYPERV */ +static inline int hv_remote_flush_tlb(struct kvm *kvm) +{ + return -EOPNOTSUPP; +} + static inline void hv_track_root_tdp(struct kvm_vcpu *vcpu, hpa_t root_tdp) { } diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 252e7f37e4e2..f25bc3cbb250 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -3729,7 +3729,7 @@ static void svm_enable_nmi_window(struct kvm_vcpu *vcpu) svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF); } -static void svm_flush_tlb_current(struct kvm_vcpu *vcpu) +static void svm_flush_tlb_asid(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); @@ -3753,6 +3753,37 @@ static void svm_flush_tlb_current(struct kvm_vcpu *vcpu) svm->current_vmcb->asid_generation--; } +static void svm_flush_tlb_current(struct kvm_vcpu *vcpu) +{ + hpa_t root_tdp = vcpu->arch.mmu->root.hpa; + + /* + * When running on Hyper-V with EnlightenedNptTlb enabled, explicitly + * flush the NPT mappings via hypercall as flushing the ASID only + * affects virtual to physical mappings, it does not invalidate guest + * physical to host physical mappings. + */ + if (svm_hv_is_enlightened_tlb_enabled(vcpu) && VALID_PAGE(root_tdp)) + hyperv_flush_guest_mapping(root_tdp); + + svm_flush_tlb_asid(vcpu); +} + +static void svm_flush_tlb_all(struct kvm_vcpu *vcpu) +{ + /* + * When running on Hyper-V with EnlightenedNptTlb enabled, remote TLB + * flushes should be routed to hv_remote_flush_tlb() without requesting + * a "regular" remote flush. Reaching this point means either there's + * a KVM bug or a prior hv_remote_flush_tlb() call failed, both of + * which might be fatal to the guest. Yell, but try to recover. + */ + if (WARN_ON_ONCE(svm_hv_is_enlightened_tlb_enabled(vcpu))) + hv_remote_flush_tlb(vcpu->kvm); + + svm_flush_tlb_asid(vcpu); +} + static void svm_flush_tlb_gva(struct kvm_vcpu *vcpu, gva_t gva) { struct vcpu_svm *svm = to_svm(vcpu); @@ -4745,10 +4776,10 @@ static struct kvm_x86_ops svm_x86_ops __initdata = { .set_rflags = svm_set_rflags, .get_if_flag = svm_get_if_flag, - .flush_tlb_all = svm_flush_tlb_current, + .flush_tlb_all = svm_flush_tlb_all, .flush_tlb_current = svm_flush_tlb_current, .flush_tlb_gva = svm_flush_tlb_gva, - .flush_tlb_guest = svm_flush_tlb_current, + .flush_tlb_guest = svm_flush_tlb_asid, .vcpu_pre_run = svm_vcpu_pre_run, .vcpu_run = svm_vcpu_run, diff --git a/arch/x86/kvm/svm/svm_onhyperv.h b/arch/x86/kvm/svm/svm_onhyperv.h index cff838f15db5..786d46d73a8e 100644 --- a/arch/x86/kvm/svm/svm_onhyperv.h +++ b/arch/x86/kvm/svm/svm_onhyperv.h @@ -6,6 +6,8 @@ #ifndef __ARCH_X86_KVM_SVM_ONHYPERV_H__ #define __ARCH_X86_KVM_SVM_ONHYPERV_H__ +#include + #if IS_ENABLED(CONFIG_HYPERV) #include "kvm_onhyperv.h" @@ -15,6 +17,14 @@ static struct kvm_x86_ops svm_x86_ops; int svm_hv_enable_l2_tlb_flush(struct kvm_vcpu *vcpu); +static inline bool svm_hv_is_enlightened_tlb_enabled(struct kvm_vcpu *vcpu) +{ + struct hv_vmcb_enlightenments *hve = &to_svm(vcpu)->vmcb->control.hv_enlightenments; + + return ms_hyperv.nested_features & HV_X64_NESTED_ENLIGHTENED_TLB && + !!hve->hv_enlightenments_control.enlightened_npt_tlb; +} + static inline void svm_hv_init_vmcb(struct vmcb *vmcb) { struct hv_vmcb_enlightenments *hve = &vmcb->control.hv_enlightenments; @@ -80,6 +90,11 @@ static inline void svm_hv_update_vp_id(struct vmcb *vmcb, struct kvm_vcpu *vcpu) } #else +static inline bool svm_hv_is_enlightened_tlb_enabled(struct kvm_vcpu *vcpu) +{ + return false; +} + static inline void svm_hv_init_vmcb(struct vmcb *vmcb) { } From d583fbd7066a2dea43050521a95d9770f7d7593e Mon Sep 17 00:00:00 2001 From: Dmytro Maluka Date: Wed, 22 Mar 2023 21:43:43 +0100 Subject: [PATCH 049/173] KVM: irqfd: Make resampler_list an RCU list It is useful to be able to do read-only traversal of the list of all the registered irqfd resamplers without locking the resampler_lock mutex. In particular, we are going to traverse it to search for a resampler registered for the given irq of an irqchip, and that will be done with an irqchip spinlock (ioapic->lock) held, so it is undesirable to lock a mutex in this context. So turn this list into an RCU list. For protecting the read side, reuse kvm->irq_srcu which is already used for protecting a number of irq related things (kvm->irq_routing, irqfd->resampler->list, kvm->irq_ack_notifier_list, kvm->arch.mask_notifier_list). Signed-off-by: Dmytro Maluka Message-Id: <20230322204344.50138-2-dmy@semihalf.com> Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 1 + include/linux/kvm_irqfd.h | 2 +- virt/kvm/eventfd.c | 8 ++++++-- 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 8ada23756b0e..9f508c8e66e1 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -755,6 +755,7 @@ struct kvm { struct { spinlock_t lock; struct list_head items; + /* resampler_list update side is protected by resampler_lock. */ struct list_head resampler_list; struct mutex resampler_lock; } irqfds; diff --git a/include/linux/kvm_irqfd.h b/include/linux/kvm_irqfd.h index dac047abdba7..8ad43692e3bb 100644 --- a/include/linux/kvm_irqfd.h +++ b/include/linux/kvm_irqfd.h @@ -31,7 +31,7 @@ struct kvm_kernel_irqfd_resampler { /* * Entry in list of kvm->irqfd.resampler_list. Use for sharing * resamplers among irqfds on the same gsi. - * Accessed and modified under kvm->irqfds.resampler_lock + * RCU list modified under kvm->irqfds.resampler_lock */ struct list_head link; }; diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index 2a3ed401ce46..61aea70dd888 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -96,8 +96,12 @@ irqfd_resampler_shutdown(struct kvm_kernel_irqfd *irqfd) synchronize_srcu(&kvm->irq_srcu); if (list_empty(&resampler->list)) { - list_del(&resampler->link); + list_del_rcu(&resampler->link); kvm_unregister_irq_ack_notifier(kvm, &resampler->notifier); + /* + * synchronize_srcu(&kvm->irq_srcu) already called + * in kvm_unregister_irq_ack_notifier(). + */ kvm_set_irq(kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID, resampler->notifier.gsi, 0, false); kfree(resampler); @@ -369,7 +373,7 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args) resampler->notifier.irq_acked = irqfd_resampler_ack; INIT_LIST_HEAD(&resampler->link); - list_add(&resampler->link, &kvm->irqfds.resampler_list); + list_add_rcu(&resampler->link, &kvm->irqfds.resampler_list); kvm_register_irq_ack_notifier(kvm, &resampler->notifier); irqfd->resampler = resampler; From fef8f2b90edbd7089a4278021314f11f056b0cbb Mon Sep 17 00:00:00 2001 From: Dmytro Maluka Date: Wed, 22 Mar 2023 21:43:44 +0100 Subject: [PATCH 050/173] KVM: x86/ioapic: Resample the pending state of an IRQ when unmasking KVM irqfd based emulation of level-triggered interrupts doesn't work quite correctly in some cases, particularly in the case of interrupts that are handled in a Linux guest as oneshot interrupts (IRQF_ONESHOT). Such an interrupt is acked to the device in its threaded irq handler, i.e. later than it is acked to the interrupt controller (EOI at the end of hardirq), not earlier. Linux keeps such interrupt masked until its threaded handler finishes, to prevent the EOI from re-asserting an unacknowledged interrupt. However, with KVM + vfio (or whatever is listening on the resamplefd) we always notify resamplefd at the EOI, so vfio prematurely unmasks the host physical IRQ, thus a new physical interrupt is fired in the host. This extra interrupt in the host is not a problem per se. The problem is that it is unconditionally queued for injection into the guest, so the guest sees an extra bogus interrupt. [*] There are observed at least 2 user-visible issues caused by those extra erroneous interrupts for a oneshot irq in the guest: 1. System suspend aborted due to a pending wakeup interrupt from ChromeOS EC (drivers/platform/chrome/cros_ec.c). 2. Annoying "invalid report id data" errors from ELAN0000 touchpad (drivers/input/mouse/elan_i2c_core.c), flooding the guest dmesg every time the touchpad is touched. The core issue here is that by the time when the guest unmasks the IRQ, the physical IRQ line is no longer asserted (since the guest has acked the interrupt to the device in the meantime), yet we unconditionally inject the interrupt queued into the guest by the previous resampling. So to fix the issue, we need a way to detect that the IRQ is no longer pending, and cancel the queued interrupt in this case. With IOAPIC we are not able to probe the physical IRQ line state directly (at least not if the underlying physical interrupt controller is an IOAPIC too), so in this patch we use irqfd resampler for that. Namely, instead of injecting the queued interrupt, we just notify the resampler that this interrupt is done. If the IRQ line is actually already deasserted, we are done. If it is still asserted, a new interrupt will be shortly triggered through irqfd and injected into the guest. In the case if there is no irqfd resampler registered for this IRQ, we cannot fix the issue, so we keep the existing behavior: immediately unconditionally inject the queued interrupt. This patch fixes the issue for x86 IOAPIC only. In the long run, we can fix it for other irqchips and other architectures too, possibly taking advantage of reading the physical state of the IRQ line, which is possible with some other irqchips (e.g. with arm64 GIC, maybe even with the legacy x86 PIC). [*] In this description we assume that the interrupt is a physical host interrupt forwarded to the guest e.g. by vfio. Potentially the same issue may occur also with a purely virtual interrupt from an emulated device, e.g. if the guest handles this interrupt, again, as a oneshot interrupt. Signed-off-by: Dmytro Maluka Link: https://lore.kernel.org/kvm/31420943-8c5f-125c-a5ee-d2fde2700083@semihalf.com/ Link: https://lore.kernel.org/lkml/87o7wrug0w.wl-maz@kernel.org/ Message-Id: <20230322204344.50138-3-dmy@semihalf.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/ioapic.c | 36 ++++++++++++++++++++++++++++++++--- include/linux/kvm_host.h | 10 ++++++++++ virt/kvm/eventfd.c | 41 ++++++++++++++++++++++++++++++++++------ 3 files changed, 78 insertions(+), 9 deletions(-) diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c index 042dee556125..995eb5054360 100644 --- a/arch/x86/kvm/ioapic.c +++ b/arch/x86/kvm/ioapic.c @@ -368,9 +368,39 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val) mask_after = e->fields.mask; if (mask_before != mask_after) kvm_fire_mask_notifiers(ioapic->kvm, KVM_IRQCHIP_IOAPIC, index, mask_after); - if (e->fields.trig_mode == IOAPIC_LEVEL_TRIG - && ioapic->irr & (1 << index)) - ioapic_service(ioapic, index, false); + if (e->fields.trig_mode == IOAPIC_LEVEL_TRIG && + ioapic->irr & (1 << index) && !e->fields.mask && !e->fields.remote_irr) { + /* + * Pending status in irr may be outdated: the IRQ line may have + * already been deasserted by a device while the IRQ was masked. + * This occurs, for instance, if the interrupt is handled in a + * Linux guest as a oneshot interrupt (IRQF_ONESHOT). In this + * case the guest acknowledges the interrupt to the device in + * its threaded irq handler, i.e. after the EOI but before + * unmasking, so at the time of unmasking the IRQ line is + * already down but our pending irr bit is still set. In such + * cases, injecting this pending interrupt to the guest is + * buggy: the guest will receive an extra unwanted interrupt. + * + * So we need to check here if the IRQ is actually still pending. + * As we are generally not able to probe the IRQ line status + * directly, we do it through irqfd resampler. Namely, we clear + * the pending status and notify the resampler that this interrupt + * is done, without actually injecting it into the guest. If the + * IRQ line is actually already deasserted, we are done. If it is + * still asserted, a new interrupt will be shortly triggered + * through irqfd and injected into the guest. + * + * If, however, it's not possible to resample (no irqfd resampler + * registered for this irq), then unconditionally inject this + * pending interrupt into the guest, so the guest will not miss + * an interrupt, although may get an extra unwanted interrupt. + */ + if (kvm_notify_irqfd_resampler(ioapic->kvm, KVM_IRQCHIP_IOAPIC, index)) + ioapic->irr &= ~(1 << index); + else + ioapic_service(ioapic, index, false); + } if (e->fields.delivery_mode == APIC_DM_FIXED) { struct kvm_lapic_irq irq; diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 9f508c8e66e1..a9adf75344be 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1987,6 +1987,9 @@ int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args); #ifdef CONFIG_HAVE_KVM_IRQFD int kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args); void kvm_irqfd_release(struct kvm *kvm); +bool kvm_notify_irqfd_resampler(struct kvm *kvm, + unsigned int irqchip, + unsigned int pin); void kvm_irq_routing_update(struct kvm *); #else static inline int kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args) @@ -1995,6 +1998,13 @@ static inline int kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args) } static inline void kvm_irqfd_release(struct kvm *kvm) {} + +static inline bool kvm_notify_irqfd_resampler(struct kvm *kvm, + unsigned int irqchip, + unsigned int pin) +{ + return false; +} #endif #else diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index 61aea70dd888..b0af834ffa95 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -55,6 +55,15 @@ irqfd_inject(struct work_struct *work) irqfd->gsi, 1, false); } +static void irqfd_resampler_notify(struct kvm_kernel_irqfd_resampler *resampler) +{ + struct kvm_kernel_irqfd *irqfd; + + list_for_each_entry_srcu(irqfd, &resampler->list, resampler_link, + srcu_read_lock_held(&resampler->kvm->irq_srcu)) + eventfd_signal(irqfd->resamplefd, 1); +} + /* * Since resampler irqfds share an IRQ source ID, we de-assert once * then notify all of the resampler irqfds using this GSI. We can't @@ -65,7 +74,6 @@ irqfd_resampler_ack(struct kvm_irq_ack_notifier *kian) { struct kvm_kernel_irqfd_resampler *resampler; struct kvm *kvm; - struct kvm_kernel_irqfd *irqfd; int idx; resampler = container_of(kian, @@ -76,11 +84,7 @@ irqfd_resampler_ack(struct kvm_irq_ack_notifier *kian) resampler->notifier.gsi, 0, false); idx = srcu_read_lock(&kvm->irq_srcu); - - list_for_each_entry_srcu(irqfd, &resampler->list, resampler_link, - srcu_read_lock_held(&kvm->irq_srcu)) - eventfd_signal(irqfd->resamplefd, 1); - + irqfd_resampler_notify(resampler); srcu_read_unlock(&kvm->irq_srcu, idx); } @@ -648,6 +652,31 @@ void kvm_irq_routing_update(struct kvm *kvm) spin_unlock_irq(&kvm->irqfds.lock); } +bool kvm_notify_irqfd_resampler(struct kvm *kvm, + unsigned int irqchip, + unsigned int pin) +{ + struct kvm_kernel_irqfd_resampler *resampler; + int gsi, idx; + + idx = srcu_read_lock(&kvm->irq_srcu); + gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin); + if (gsi != -1) { + list_for_each_entry_srcu(resampler, + &kvm->irqfds.resampler_list, link, + srcu_read_lock_held(&kvm->irq_srcu)) { + if (resampler->notifier.gsi == gsi) { + irqfd_resampler_notify(resampler); + srcu_read_unlock(&kvm->irq_srcu, idx); + return true; + } + } + } + srcu_read_unlock(&kvm->irq_srcu, idx); + + return false; +} + /* * create a host-wide workqueue for issuing deferred shutdown requests * aggregated from all vm* instances. We need our own isolated From 0dc902267cb32ade1c29eed8208e566ad743518a Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 22 Mar 2023 07:12:20 -0700 Subject: [PATCH 051/173] KVM: x86: Suppress pending MMIO write exits if emulator detects exception Clear vcpu->mmio_needed when injecting an exception from the emulator to squash a (legitimate) warning about vcpu->mmio_needed being true at the start of KVM_RUN without a callback being registered to complete the userspace MMIO exit. Suppressing the MMIO write exit is inarguably wrong from an architectural perspective, but it is the least awful hack-a-fix due to shortcomings in KVM's uAPI, not to mention that KVM already suppresses MMIO writes in this scenario. Outside of REP string instructions, KVM doesn't provide a way to resume an instruction at the exact point where it was "interrupted" if said instruction partially completed before encountering an MMIO access. For MMIO reads, KVM immediately exits to userspace upon detecting MMIO as userspace provides the to-be-read value in a buffer, and so KVM can safely (more or less) restart the instruction from the beginning. When the emulator re-encounters the MMIO read, KVM will service the MMIO by getting the value from the buffer instead of exiting to userspace, i.e. KVM won't put the vCPU into an infinite loop. On an emulated MMIO write, KVM finishes the instruction before exiting to userspace, as exiting immediately would ultimately hang the vCPU due to the aforementioned shortcoming of KVM not being able to resume emulation in the middle of an instruction. For the vast majority of _emulated_ instructions, deferring the userspace exit doesn't cause problems as very few x86 instructions (again ignoring string operations) generate multiple writes. But for instructions that generate multiple writes, e.g. PUSHA (multiple pushes onto the stack), deferring the exit effectively results in only the final write triggering an exit to userspace. KVM does support multiple MMIO "fragments", but only for page splits; if an instruction performs multiple distinct MMIO writes, the number of fragments gets reset when the next MMIO write comes along and any previous MMIO writes are dropped. Circling back to the warning, if a deferred MMIO write coincides with an exception, e.g. in this case a #SS due to PUSHA underflowing the stack after queueing a write to an MMIO page on a previous push, KVM injects the exceptions and leaves the deferred MMIO pending without registering a callback, thus triggering the splat. Sweep the problem under the proverbial rug as dropping MMIO writes is not unique to the exception scenario (see above), i.e. instructions like PUSHA are fundamentally broken with respect to MMIO, and have been since KVM's inception. Reported-by: zhangjianguo Reported-by: syzbot+760a73552f47a8cd0fd9@syzkaller.appspotmail.com Reported-by: syzbot+8accb43ddc6bd1f5713a@syzkaller.appspotmail.com Signed-off-by: Sean Christopherson Message-Id: <20230322141220.2206241-1-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 7713420abab0..45017576ad5e 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -8903,6 +8903,8 @@ restart: } if (ctxt->have_exception) { + WARN_ON_ONCE(vcpu->mmio_needed && !vcpu->mmio_is_write); + vcpu->mmio_needed = false; r = 1; inject_emulated_exception(vcpu); } else if (vcpu->arch.pio.count) { From 6c41468c7c12d74843bb414fc00307ea8a6318c3 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 22 Mar 2023 07:32:59 -0700 Subject: [PATCH 052/173] KVM: x86: Clear "has_error_code", not "error_code", for RM exception injection When injecting an exception into a vCPU in Real Mode, suppress the error code by clearing the flag that tracks whether the error code is valid, not by clearing the error code itself. The "typo" was introduced by recent fix for SVM's funky Paged Real Mode. Opportunistically hoist the logic above the tracepoint so that the trace is coherent with respect to what is actually injected (this was also the behavior prior to the buggy commit). Fixes: b97f07458373 ("KVM: x86: determine if an exception has an error code only when injecting it.") Cc: stable@vger.kernel.org Cc: Maxim Levitsky Signed-off-by: Sean Christopherson Message-Id: <20230322143300.2209476-2-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 45017576ad5e..7d6f98b7635f 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -9908,13 +9908,20 @@ int kvm_check_nested_events(struct kvm_vcpu *vcpu) static void kvm_inject_exception(struct kvm_vcpu *vcpu) { + /* + * Suppress the error code if the vCPU is in Real Mode, as Real Mode + * exceptions don't report error codes. The presence of an error code + * is carried with the exception and only stripped when the exception + * is injected as intercepted #PF VM-Exits for AMD's Paged Real Mode do + * report an error code despite the CPU being in Real Mode. + */ + vcpu->arch.exception.has_error_code &= is_protmode(vcpu); + trace_kvm_inj_exception(vcpu->arch.exception.vector, vcpu->arch.exception.has_error_code, vcpu->arch.exception.error_code, vcpu->arch.exception.injected); - if (vcpu->arch.exception.error_code && !is_protmode(vcpu)) - vcpu->arch.exception.error_code = false; static_call(kvm_x86_inject_exception)(vcpu); } From 80962ec912db56d323883154efc2297473e692cb Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 22 Mar 2023 07:33:00 -0700 Subject: [PATCH 053/173] KVM: nVMX: Do not report error code when synthesizing VM-Exit from Real Mode Don't report an error code to L1 when synthesizing a nested VM-Exit and L2 is in Real Mode. Per Intel's SDM, regarding the error code valid bit: This bit is always 0 if the VM exit occurred while the logical processor was in real-address mode (CR0.PE=0). The bug was introduced by a recent fix for AMD's Paged Real Mode, which moved the error code suppression from the common "queue exception" path to the "inject exception" path, but missed VMX's "synthesize VM-Exit" path. Fixes: b97f07458373 ("KVM: x86: determine if an exception has an error code only when injecting it.") Cc: stable@vger.kernel.org Cc: Maxim Levitsky Signed-off-by: Sean Christopherson Message-Id: <20230322143300.2209476-3-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/nested.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 1bc2b80273c9..768487611db7 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -3868,7 +3868,12 @@ static void nested_vmx_inject_exception_vmexit(struct kvm_vcpu *vcpu) exit_qual = 0; } - if (ex->has_error_code) { + /* + * Unlike AMD's Paged Real Mode, which reports an error code on #PF + * VM-Exits even if the CPU is in Real Mode, Intel VMX never sets the + * "has error code" flags on VM-Exit if the CPU is in Real Mode. + */ + if (ex->has_error_code && is_protmode(vcpu)) { /* * Intel CPUs do not generate error codes with bits 31:16 set, * and more importantly VMX disallows setting bits 31:16 in the From c56610a869bce03490faf4f157076370c71b8ae3 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 24 Mar 2023 14:33:42 +0100 Subject: [PATCH 054/173] ACPI: bus: Rework system-level device notification handling For ACPI drivers that provide a ->notify() callback and set ACPI_DRIVER_ALL_NOTIFY_EVENTS in their flags, that callback can be invoked while either the ->add() or the ->remove() callback is running without any synchronization at the bus type level which is counter to the common-sense expectation that notification handling should only be enabled when the driver is actually bound to the device. As a result, if the driver is not careful enough, it's ->notify() callback may crash when it is invoked too early or too late [1]. This issue has been amplified by commit d6fb6ee1820c ("ACPI: bus: Drop driver member of struct acpi_device") that made acpi_bus_notify() check for the presence of the driver and its ->notify() callback directly instead of using an extra driver pointer that was only set and cleared by the bus type code, but it was present before that commit although it was harder to reproduce then. It can be addressed by using the observation that acpi_device_install_notify_handler() can be modified to install the handler for all types of events when ACPI_DRIVER_ALL_NOTIFY_EVENTS is set in the driver flags, in which case acpi_bus_notify() will not need to invoke the driver's ->notify() callback any more and that callback will only be invoked after acpi_device_install_notify_handler() has run and before acpi_device_remove_notify_handler() runs, which implies the correct ordering with respect to the other ACPI driver callbacks. Modify the code accordingly and while at it, drop two redundant local variables from acpi_bus_notify() and turn its description comment into a proper kerneldoc one. Fixes: d6fb6ee1820c ("ACPI: bus: Drop driver member of struct acpi_device") Link: https://lore.kernel.org/linux-acpi/9f6cba7a8a57e5a687c934e8e406e28c.squirrel@mail.panix.com # [1] Reported-by: Pierre Asselin Signed-off-by: Rafael J. Wysocki Tested-by: Pierre Asselin --- drivers/acpi/bus.c | 85 +++++++++++++++++++++------------------------- 1 file changed, 38 insertions(+), 47 deletions(-) diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c index 9531dd0fef50..a96da65057b1 100644 --- a/drivers/acpi/bus.c +++ b/drivers/acpi/bus.c @@ -459,85 +459,67 @@ out_free: Notification Handling -------------------------------------------------------------------------- */ -/* - * acpi_bus_notify - * --------------- - * Callback for all 'system-level' device notifications (values 0x00-0x7F). +/** + * acpi_bus_notify - Global system-level (0x00-0x7F) notifications handler + * @handle: Target ACPI object. + * @type: Notification type. + * @data: Ignored. + * + * This only handles notifications related to device hotplug. */ static void acpi_bus_notify(acpi_handle handle, u32 type, void *data) { struct acpi_device *adev; - u32 ost_code = ACPI_OST_SC_NON_SPECIFIC_FAILURE; - bool hotplug_event = false; switch (type) { case ACPI_NOTIFY_BUS_CHECK: acpi_handle_debug(handle, "ACPI_NOTIFY_BUS_CHECK event\n"); - hotplug_event = true; break; case ACPI_NOTIFY_DEVICE_CHECK: acpi_handle_debug(handle, "ACPI_NOTIFY_DEVICE_CHECK event\n"); - hotplug_event = true; break; case ACPI_NOTIFY_DEVICE_WAKE: acpi_handle_debug(handle, "ACPI_NOTIFY_DEVICE_WAKE event\n"); - break; + return; case ACPI_NOTIFY_EJECT_REQUEST: acpi_handle_debug(handle, "ACPI_NOTIFY_EJECT_REQUEST event\n"); - hotplug_event = true; break; case ACPI_NOTIFY_DEVICE_CHECK_LIGHT: acpi_handle_debug(handle, "ACPI_NOTIFY_DEVICE_CHECK_LIGHT event\n"); /* TBD: Exactly what does 'light' mean? */ - break; + return; case ACPI_NOTIFY_FREQUENCY_MISMATCH: acpi_handle_err(handle, "Device cannot be configured due " "to a frequency mismatch\n"); - break; + return; case ACPI_NOTIFY_BUS_MODE_MISMATCH: acpi_handle_err(handle, "Device cannot be configured due " "to a bus mode mismatch\n"); - break; + return; case ACPI_NOTIFY_POWER_FAULT: acpi_handle_err(handle, "Device has suffered a power fault\n"); - break; + return; default: acpi_handle_debug(handle, "Unknown event type 0x%x\n", type); - break; - } - - adev = acpi_get_acpi_dev(handle); - if (!adev) - goto err; - - if (adev->dev.driver) { - struct acpi_driver *driver = to_acpi_driver(adev->dev.driver); - - if (driver && driver->ops.notify && - (driver->flags & ACPI_DRIVER_ALL_NOTIFY_EVENTS)) - driver->ops.notify(adev, type); - } - - if (!hotplug_event) { - acpi_put_acpi_dev(adev); return; } - if (ACPI_SUCCESS(acpi_hotplug_schedule(adev, type))) + adev = acpi_get_acpi_dev(handle); + + if (adev && ACPI_SUCCESS(acpi_hotplug_schedule(adev, type))) return; acpi_put_acpi_dev(adev); - err: - acpi_evaluate_ost(handle, type, ost_code, NULL); + acpi_evaluate_ost(handle, type, ACPI_OST_SC_NON_SPECIFIC_FAILURE, NULL); } static void acpi_notify_device(acpi_handle handle, u32 event, void *data) @@ -562,42 +544,51 @@ static u32 acpi_device_fixed_event(void *data) return ACPI_INTERRUPT_HANDLED; } -static int acpi_device_install_notify_handler(struct acpi_device *device) +static int acpi_device_install_notify_handler(struct acpi_device *device, + struct acpi_driver *acpi_drv) { acpi_status status; - if (device->device_type == ACPI_BUS_TYPE_POWER_BUTTON) + if (device->device_type == ACPI_BUS_TYPE_POWER_BUTTON) { status = acpi_install_fixed_event_handler(ACPI_EVENT_POWER_BUTTON, acpi_device_fixed_event, device); - else if (device->device_type == ACPI_BUS_TYPE_SLEEP_BUTTON) + } else if (device->device_type == ACPI_BUS_TYPE_SLEEP_BUTTON) { status = acpi_install_fixed_event_handler(ACPI_EVENT_SLEEP_BUTTON, acpi_device_fixed_event, device); - else - status = acpi_install_notify_handler(device->handle, - ACPI_DEVICE_NOTIFY, + } else { + u32 type = acpi_drv->flags & ACPI_DRIVER_ALL_NOTIFY_EVENTS ? + ACPI_ALL_NOTIFY : ACPI_DEVICE_NOTIFY; + + status = acpi_install_notify_handler(device->handle, type, acpi_notify_device, device); + } if (ACPI_FAILURE(status)) return -EINVAL; return 0; } -static void acpi_device_remove_notify_handler(struct acpi_device *device) +static void acpi_device_remove_notify_handler(struct acpi_device *device, + struct acpi_driver *acpi_drv) { - if (device->device_type == ACPI_BUS_TYPE_POWER_BUTTON) + if (device->device_type == ACPI_BUS_TYPE_POWER_BUTTON) { acpi_remove_fixed_event_handler(ACPI_EVENT_POWER_BUTTON, acpi_device_fixed_event); - else if (device->device_type == ACPI_BUS_TYPE_SLEEP_BUTTON) + } else if (device->device_type == ACPI_BUS_TYPE_SLEEP_BUTTON) { acpi_remove_fixed_event_handler(ACPI_EVENT_SLEEP_BUTTON, acpi_device_fixed_event); - else - acpi_remove_notify_handler(device->handle, ACPI_DEVICE_NOTIFY, + } else { + u32 type = acpi_drv->flags & ACPI_DRIVER_ALL_NOTIFY_EVENTS ? + ACPI_ALL_NOTIFY : ACPI_DEVICE_NOTIFY; + + acpi_remove_notify_handler(device->handle, type, acpi_notify_device); + } } /* Handle events targeting \_SB device (at present only graceful shutdown) */ @@ -1039,7 +1030,7 @@ static int acpi_device_probe(struct device *dev) acpi_drv->name, acpi_dev->pnp.bus_id); if (acpi_drv->ops.notify) { - ret = acpi_device_install_notify_handler(acpi_dev); + ret = acpi_device_install_notify_handler(acpi_dev, acpi_drv); if (ret) { if (acpi_drv->ops.remove) acpi_drv->ops.remove(acpi_dev); @@ -1062,7 +1053,7 @@ static void acpi_device_remove(struct device *dev) struct acpi_driver *acpi_drv = to_acpi_driver(dev->driver); if (acpi_drv->ops.notify) - acpi_device_remove_notify_handler(acpi_dev); + acpi_device_remove_notify_handler(acpi_dev, acpi_drv); if (acpi_drv->ops.remove) acpi_drv->ops.remove(acpi_dev); From bb430b69422640891b0b8db762885730579a4145 Mon Sep 17 00:00:00 2001 From: Alyssa Ross Date: Mon, 20 Mar 2023 13:54:30 +0100 Subject: [PATCH 055/173] loop: LOOP_CONFIGURE: send uevents for partitions LOOP_CONFIGURE is, as far as I understand it, supposed to be a way to combine LOOP_SET_FD and LOOP_SET_STATUS64 into a single syscall. When using LOOP_SET_FD+LOOP_SET_STATUS64, a single uevent would be sent for each partition found on the loop device after the second ioctl(), but when using LOOP_CONFIGURE, no such uevent was being sent. In the old setup, uevents are disabled for LOOP_SET_FD, but not for LOOP_SET_STATUS64. This makes sense, as it prevents uevents being sent for a partially configured device during LOOP_SET_FD - they're only sent at the end of LOOP_SET_STATUS64. But for LOOP_CONFIGURE, uevents were disabled for the entire operation, so that final notification was never issued. To fix this, reduce the critical section to exclude the loop_reread_partitions() call, which causes the uevents to be issued, to after uevents are re-enabled, matching the behaviour of the LOOP_SET_FD+LOOP_SET_STATUS64 combination. I noticed this because Busybox's losetup program recently changed from using LOOP_SET_FD+LOOP_SET_STATUS64 to LOOP_CONFIGURE, and this broke my setup, for which I want a notification from the kernel any time a new partition becomes available. Signed-off-by: Alyssa Ross [hch: reduced the critical section] Signed-off-by: Christoph Hellwig Fixes: 3448914e8cc5 ("loop: Add LOOP_CONFIGURE ioctl") Link: https://lore.kernel.org/r/20230320125430.55367-1-hch@lst.de Signed-off-by: Jens Axboe --- drivers/block/loop.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 28eb59fd71ca..bc31bb7072a2 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -1010,9 +1010,6 @@ static int loop_configure(struct loop_device *lo, fmode_t mode, /* This is safe, since we have a reference from open(). */ __module_get(THIS_MODULE); - /* suppress uevents while reconfiguring the device */ - dev_set_uevent_suppress(disk_to_dev(lo->lo_disk), 1); - /* * If we don't hold exclusive handle for the device, upgrade to it * here to avoid changing device under exclusive owner. @@ -1067,6 +1064,9 @@ static int loop_configure(struct loop_device *lo, fmode_t mode, } } + /* suppress uevents while reconfiguring the device */ + dev_set_uevent_suppress(disk_to_dev(lo->lo_disk), 1); + disk_force_media_change(lo->lo_disk, DISK_EVENT_MEDIA_CHANGE); set_disk_ro(lo->lo_disk, (lo->lo_flags & LO_FLAGS_READ_ONLY) != 0); @@ -1109,17 +1109,17 @@ static int loop_configure(struct loop_device *lo, fmode_t mode, if (partscan) clear_bit(GD_SUPPRESS_PART_SCAN, &lo->lo_disk->state); + /* enable and uncork uevent now that we are done */ + dev_set_uevent_suppress(disk_to_dev(lo->lo_disk), 0); + loop_global_unlock(lo, is_loop); if (partscan) loop_reread_partitions(lo); + if (!(mode & FMODE_EXCL)) bd_abort_claiming(bdev, loop_configure); - error = 0; -done: - /* enable and uncork uevent now that we are done */ - dev_set_uevent_suppress(disk_to_dev(lo->lo_disk), 0); - return error; + return 0; out_unlock: loop_global_unlock(lo, is_loop); @@ -1130,7 +1130,7 @@ out_putf: fput(file); /* This is safe: open() is still holding a reference. */ module_put(THIS_MODULE); - goto done; + return error; } static void __loop_clr_fd(struct loop_device *lo, bool release) From 2f1a6be12ab6c8470d5776e68644726c94257c54 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Wed, 22 Mar 2023 10:33:28 +0000 Subject: [PATCH 056/173] btrfs: fix race between quota disable and quota assign ioctls The quota assign ioctl can currently run in parallel with a quota disable ioctl call. The assign ioctl uses the quota root, while the disable ioctl frees that root, and therefore we can have a use-after-free triggered in the assign ioctl, leading to a trace like the following when KASAN is enabled: [672.723][T736] BUG: KASAN: slab-use-after-free in btrfs_search_slot+0x2962/0x2db0 [672.723][T736] Read of size 8 at addr ffff888022ec0208 by task btrfs_search_sl/27736 [672.724][T736] [672.725][T736] CPU: 1 PID: 27736 Comm: btrfs_search_sl Not tainted 6.3.0-rc3 #37 [672.723][T736] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.15.0-1 04/01/2014 [672.727][T736] Call Trace: [672.728][T736] [672.728][T736] dump_stack_lvl+0xd9/0x150 [672.725][T736] print_report+0xc1/0x5e0 [672.720][T736] ? __virt_addr_valid+0x61/0x2e0 [672.727][T736] ? __phys_addr+0xc9/0x150 [672.725][T736] ? btrfs_search_slot+0x2962/0x2db0 [672.722][T736] kasan_report+0xc0/0xf0 [672.729][T736] ? btrfs_search_slot+0x2962/0x2db0 [672.724][T736] btrfs_search_slot+0x2962/0x2db0 [672.723][T736] ? fs_reclaim_acquire+0xba/0x160 [672.722][T736] ? split_leaf+0x13d0/0x13d0 [672.726][T736] ? rcu_is_watching+0x12/0xb0 [672.723][T736] ? kmem_cache_alloc+0x338/0x3c0 [672.722][T736] update_qgroup_status_item+0xf7/0x320 [672.724][T736] ? add_qgroup_rb+0x3d0/0x3d0 [672.739][T736] ? do_raw_spin_lock+0x12d/0x2b0 [672.730][T736] ? spin_bug+0x1d0/0x1d0 [672.737][T736] btrfs_run_qgroups+0x5de/0x840 [672.730][T736] ? btrfs_qgroup_rescan_worker+0xa70/0xa70 [672.738][T736] ? __del_qgroup_relation+0x4ba/0xe00 [672.738][T736] btrfs_ioctl+0x3d58/0x5d80 [672.735][T736] ? tomoyo_path_number_perm+0x16a/0x550 [672.737][T736] ? tomoyo_execute_permission+0x4a0/0x4a0 [672.731][T736] ? btrfs_ioctl_get_supported_features+0x50/0x50 [672.737][T736] ? __sanitizer_cov_trace_switch+0x54/0x90 [672.734][T736] ? do_vfs_ioctl+0x132/0x1660 [672.730][T736] ? vfs_fileattr_set+0xc40/0xc40 [672.730][T736] ? _raw_spin_unlock_irq+0x2e/0x50 [672.732][T736] ? sigprocmask+0xf2/0x340 [672.737][T736] ? __fget_files+0x26a/0x480 [672.732][T736] ? bpf_lsm_file_ioctl+0x9/0x10 [672.738][T736] ? btrfs_ioctl_get_supported_features+0x50/0x50 [672.736][T736] __x64_sys_ioctl+0x198/0x210 [672.736][T736] do_syscall_64+0x39/0xb0 [672.731][T736] entry_SYSCALL_64_after_hwframe+0x63/0xcd [672.739][T736] RIP: 0033:0x4556ad [672.742][T736] [672.743][T736] [672.748][T736] Allocated by task 27677: [672.743][T736] kasan_save_stack+0x22/0x40 [672.741][T736] kasan_set_track+0x25/0x30 [672.741][T736] __kasan_kmalloc+0xa4/0xb0 [672.749][T736] btrfs_alloc_root+0x48/0x90 [672.746][T736] btrfs_create_tree+0x146/0xa20 [672.744][T736] btrfs_quota_enable+0x461/0x1d20 [672.743][T736] btrfs_ioctl+0x4a1c/0x5d80 [672.747][T736] __x64_sys_ioctl+0x198/0x210 [672.749][T736] do_syscall_64+0x39/0xb0 [672.744][T736] entry_SYSCALL_64_after_hwframe+0x63/0xcd [672.756][T736] [672.757][T736] Freed by task 27677: [672.759][T736] kasan_save_stack+0x22/0x40 [672.759][T736] kasan_set_track+0x25/0x30 [672.756][T736] kasan_save_free_info+0x2e/0x50 [672.751][T736] ____kasan_slab_free+0x162/0x1c0 [672.758][T736] slab_free_freelist_hook+0x89/0x1c0 [672.752][T736] __kmem_cache_free+0xaf/0x2e0 [672.752][T736] btrfs_put_root+0x1ff/0x2b0 [672.759][T736] btrfs_quota_disable+0x80a/0xbc0 [672.752][T736] btrfs_ioctl+0x3e5f/0x5d80 [672.756][T736] __x64_sys_ioctl+0x198/0x210 [672.753][T736] do_syscall_64+0x39/0xb0 [672.765][T736] entry_SYSCALL_64_after_hwframe+0x63/0xcd [672.769][T736] [672.768][T736] The buggy address belongs to the object at ffff888022ec0000 [672.768][T736] which belongs to the cache kmalloc-4k of size 4096 [672.769][T736] The buggy address is located 520 bytes inside of [672.769][T736] freed 4096-byte region [ffff888022ec0000, ffff888022ec1000) [672.760][T736] [672.764][T736] The buggy address belongs to the physical page: [672.761][T736] page:ffffea00008bb000 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x22ec0 [672.766][T736] head:ffffea00008bb000 order:3 entire_mapcount:0 nr_pages_mapped:0 pincount:0 [672.779][T736] flags: 0xfff00000010200(slab|head|node=0|zone=1|lastcpupid=0x7ff) [672.770][T736] raw: 00fff00000010200 ffff888012842140 ffffea000054ba00 dead000000000002 [672.770][T736] raw: 0000000000000000 0000000000040004 00000001ffffffff 0000000000000000 [672.771][T736] page dumped because: kasan: bad access detected [672.778][T736] page_owner tracks the page as allocated [672.777][T736] page last allocated via order 3, migratetype Unmovable, gfp_mask 0xd2040(__GFP_IO|__GFP_NOWARN|__GFP_NORETRY|__GFP_COMP|__GFP_NOMEMALLOC), pid 88 [672.779][T736] get_page_from_freelist+0x119c/0x2d50 [672.779][T736] __alloc_pages+0x1cb/0x4a0 [672.776][T736] alloc_pages+0x1aa/0x270 [672.773][T736] allocate_slab+0x260/0x390 [672.771][T736] ___slab_alloc+0xa9a/0x13e0 [672.778][T736] __slab_alloc.constprop.0+0x56/0xb0 [672.771][T736] __kmem_cache_alloc_node+0x136/0x320 [672.789][T736] __kmalloc+0x4e/0x1a0 [672.783][T736] tomoyo_realpath_from_path+0xc3/0x600 [672.781][T736] tomoyo_path_perm+0x22f/0x420 [672.782][T736] tomoyo_path_unlink+0x92/0xd0 [672.780][T736] security_path_unlink+0xdb/0x150 [672.788][T736] do_unlinkat+0x377/0x680 [672.788][T736] __x64_sys_unlink+0xca/0x110 [672.789][T736] do_syscall_64+0x39/0xb0 [672.783][T736] entry_SYSCALL_64_after_hwframe+0x63/0xcd [672.784][T736] page last free stack trace: [672.787][T736] free_pcp_prepare+0x4e5/0x920 [672.787][T736] free_unref_page+0x1d/0x4e0 [672.784][T736] __unfreeze_partials+0x17c/0x1a0 [672.797][T736] qlist_free_all+0x6a/0x180 [672.796][T736] kasan_quarantine_reduce+0x189/0x1d0 [672.797][T736] __kasan_slab_alloc+0x64/0x90 [672.793][T736] kmem_cache_alloc+0x17c/0x3c0 [672.799][T736] getname_flags.part.0+0x50/0x4e0 [672.799][T736] getname_flags+0x9e/0xe0 [672.792][T736] vfs_fstatat+0x77/0xb0 [672.791][T736] __do_sys_newlstat+0x84/0x100 [672.798][T736] do_syscall_64+0x39/0xb0 [672.796][T736] entry_SYSCALL_64_after_hwframe+0x63/0xcd [672.790][T736] [672.791][T736] Memory state around the buggy address: [672.799][T736] ffff888022ec0100: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [672.805][T736] ffff888022ec0180: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [672.802][T736] >ffff888022ec0200: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [672.809][T736] ^ [672.809][T736] ffff888022ec0280: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [672.809][T736] ffff888022ec0300: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb Fix this by having the qgroup assign ioctl take the qgroup ioctl mutex before calling btrfs_run_qgroups(), which is what all qgroup ioctls should call. Reported-by: butt3rflyh4ck Link: https://lore.kernel.org/linux-btrfs/CAFcO6XN3VD8ogmHwqRk4kbiwtpUSNySu2VAxN8waEPciCHJvMA@mail.gmail.com/ CC: stable@vger.kernel.org # 5.10+ Reviewed-by: Qu Wenruo Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/ioctl.c | 2 ++ fs/btrfs/qgroup.c | 11 ++++++++++- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 439a5bf5ebc6..f9dbf41fc803 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -3732,7 +3732,9 @@ static long btrfs_ioctl_qgroup_assign(struct file *file, void __user *arg) } /* update qgroup status and info */ + mutex_lock(&fs_info->qgroup_ioctl_lock); err = btrfs_run_qgroups(trans); + mutex_unlock(&fs_info->qgroup_ioctl_lock); if (err < 0) btrfs_handle_fs_error(fs_info, err, "failed to update qgroup status and info"); diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 52a7d2fa2284..f41da7ac360d 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -2828,13 +2828,22 @@ cleanup: } /* - * called from commit_transaction. Writes all changed qgroups to disk. + * Writes all changed qgroups to disk. + * Called by the transaction commit path and the qgroup assign ioctl. */ int btrfs_run_qgroups(struct btrfs_trans_handle *trans) { struct btrfs_fs_info *fs_info = trans->fs_info; int ret = 0; + /* + * In case we are called from the qgroup assign ioctl, assert that we + * are holding the qgroup_ioctl_lock, otherwise we can race with a quota + * disable operation (ioctl) and access a freed quota root. + */ + if (trans->transaction->state != TRANS_STATE_COMMIT_DOING) + lockdep_assert_held(&fs_info->qgroup_ioctl_lock); + if (!fs_info->quota_root) return ret; From 50d281fc434cb8e2497f5e70a309ccca6b1a09f0 Mon Sep 17 00:00:00 2001 From: Anand Jain Date: Thu, 23 Mar 2023 15:56:48 +0800 Subject: [PATCH 057/173] btrfs: scan device in non-exclusive mode This fixes mkfs/mount/check failures due to race with systemd-udevd scan. During the device scan initiated by systemd-udevd, other user space EXCL operations such as mkfs, mount, or check may get blocked and result in a "Device or resource busy" error. This is because the device scan process opens the device with the EXCL flag in the kernel. Two reports were received: - btrfs/179 test case, where the fsck command failed with the -EBUSY error - LTP pwritev03 test case, where mkfs.vfs failed with the -EBUSY error, when mkfs.vfs tried to overwrite old btrfs filesystem on the device. In both cases, fsck and mkfs (respectively) were racing with a systemd-udevd device scan, and systemd-udevd won, resulting in the -EBUSY error for fsck and mkfs. Reproducing the problem has been difficult because there is a very small window during which these userspace threads can race to acquire the exclusive device open. Even on the system where the problem was observed, the problem occurrences were anywhere between 10 to 400 iterations and chances of reproducing decreases with debug printk()s. However, an exclusive device open is unnecessary for the scan process, as there are no write operations on the device during scan. Furthermore, during the mount process, the superblock is re-read in the below function call chain: btrfs_mount_root btrfs_open_devices open_fs_devices btrfs_open_one_device btrfs_get_bdev_and_sb So, to fix this issue, removes the FMODE_EXCL flag from the scan operation, and add a comment. The case where mkfs may still write to the device and a scan is running, the btrfs signature is not written at that time so scan will not recognize such device. Reported-by: Sherry Yang Reported-by: kernel test robot Link: https://lore.kernel.org/oe-lkp/202303170839.fdf23068-oliver.sang@intel.com CC: stable@vger.kernel.org # 5.4+ Signed-off-by: Anand Jain Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/volumes.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 6d0124b6e79e..ac0e8fb92fc8 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -1366,8 +1366,17 @@ struct btrfs_device *btrfs_scan_one_device(const char *path, fmode_t flags, * So, we need to add a special mount option to scan for * later supers, using BTRFS_SUPER_MIRROR_MAX instead */ - flags |= FMODE_EXCL; + /* + * Avoid using flag |= FMODE_EXCL here, as the systemd-udev may + * initiate the device scan which may race with the user's mount + * or mkfs command, resulting in failure. + * Since the device scan is solely for reading purposes, there is + * no need for FMODE_EXCL. Additionally, the devices are read again + * during the mount process. It is ok to get some inconsistent + * values temporarily, as the device paths of the fsid are the only + * required information for assembling the volume. + */ bdev = blkdev_get_by_path(path, flags, holder); if (IS_ERR(bdev)) return ERR_CAST(bdev); From 2d82a40aa7d6fcae0250ec68b8566cdee7bfd44c Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Wed, 22 Mar 2023 09:46:34 +0000 Subject: [PATCH 058/173] btrfs: fix deadlock when aborting transaction during relocation with scrub Before relocating a block group we pause scrub, then do the relocation and then unpause scrub. The relocation process requires starting and committing a transaction, and if we have a failure in the critical section of the transaction commit path (transaction state >= TRANS_STATE_COMMIT_START), we will deadlock if there is a paused scrub. That results in stack traces like the following: [42.479] BTRFS info (device sdc): relocating block group 53876686848 flags metadata|raid6 [42.936] BTRFS warning (device sdc): Skipping commit of aborted transaction. [42.936] ------------[ cut here ]------------ [42.936] BTRFS: Transaction aborted (error -28) [42.936] WARNING: CPU: 11 PID: 346822 at fs/btrfs/transaction.c:1977 btrfs_commit_transaction+0xcc8/0xeb0 [btrfs] [42.936] Modules linked in: dm_flakey dm_mod loop btrfs (...) [42.936] CPU: 11 PID: 346822 Comm: btrfs Tainted: G W 6.3.0-rc2-btrfs-next-127+ #1 [42.936] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.14.0-0-g155821a1990b-prebuilt.qemu.org 04/01/2014 [42.936] RIP: 0010:btrfs_commit_transaction+0xcc8/0xeb0 [btrfs] [42.936] Code: ff ff 45 8b (...) [42.936] RSP: 0018:ffffb58649633b48 EFLAGS: 00010282 [42.936] RAX: 0000000000000000 RBX: ffff8be6ef4d5bd8 RCX: 0000000000000000 [42.936] RDX: 0000000000000002 RSI: ffffffffb35e7782 RDI: 00000000ffffffff [42.936] RBP: ffff8be6ef4d5c98 R08: 0000000000000000 R09: ffffb586496339e8 [42.936] R10: 0000000000000001 R11: 0000000000000001 R12: ffff8be6d38c7c00 [42.936] R13: 00000000ffffffe4 R14: ffff8be6c268c000 R15: ffff8be6ef4d5cf0 [42.936] FS: 00007f381a82b340(0000) GS:ffff8beddfcc0000(0000) knlGS:0000000000000000 [42.936] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [42.936] CR2: 00007f1e35fb7638 CR3: 0000000117680006 CR4: 0000000000370ee0 [42.936] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [42.936] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [42.936] Call Trace: [42.936] [42.936] ? start_transaction+0xcb/0x610 [btrfs] [42.936] prepare_to_relocate+0x111/0x1a0 [btrfs] [42.936] relocate_block_group+0x57/0x5d0 [btrfs] [42.936] ? btrfs_wait_nocow_writers+0x25/0xb0 [btrfs] [42.936] btrfs_relocate_block_group+0x248/0x3c0 [btrfs] [42.936] ? __pfx_autoremove_wake_function+0x10/0x10 [42.936] btrfs_relocate_chunk+0x3b/0x150 [btrfs] [42.936] btrfs_balance+0x8ff/0x11d0 [btrfs] [42.936] ? __kmem_cache_alloc_node+0x14a/0x410 [42.936] btrfs_ioctl+0x2334/0x32c0 [btrfs] [42.937] ? mod_objcg_state+0xd2/0x360 [42.937] ? refill_obj_stock+0xb0/0x160 [42.937] ? seq_release+0x25/0x30 [42.937] ? __rseq_handle_notify_resume+0x3b5/0x4b0 [42.937] ? percpu_counter_add_batch+0x2e/0xa0 [42.937] ? __x64_sys_ioctl+0x88/0xc0 [42.937] __x64_sys_ioctl+0x88/0xc0 [42.937] do_syscall_64+0x38/0x90 [42.937] entry_SYSCALL_64_after_hwframe+0x72/0xdc [42.937] RIP: 0033:0x7f381a6ffe9b [42.937] Code: 00 48 89 44 24 (...) [42.937] RSP: 002b:00007ffd45ecf060 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 [42.937] RAX: ffffffffffffffda RBX: 0000000000000001 RCX: 00007f381a6ffe9b [42.937] RDX: 00007ffd45ecf150 RSI: 00000000c4009420 RDI: 0000000000000003 [42.937] RBP: 0000000000000003 R08: 0000000000000013 R09: 0000000000000000 [42.937] R10: 00007f381a60c878 R11: 0000000000000246 R12: 00007ffd45ed0423 [42.937] R13: 00007ffd45ecf150 R14: 0000000000000000 R15: 00007ffd45ecf148 [42.937] [42.937] ---[ end trace 0000000000000000 ]--- [42.937] BTRFS: error (device sdc: state A) in cleanup_transaction:1977: errno=-28 No space left [59.196] INFO: task btrfs:346772 blocked for more than 120 seconds. [59.196] Tainted: G W 6.3.0-rc2-btrfs-next-127+ #1 [59.196] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [59.196] task:btrfs state:D stack:0 pid:346772 ppid:1 flags:0x00004002 [59.196] Call Trace: [59.196] [59.196] __schedule+0x392/0xa70 [59.196] ? __pv_queued_spin_lock_slowpath+0x165/0x370 [59.196] schedule+0x5d/0xd0 [59.196] __scrub_blocked_if_needed+0x74/0xc0 [btrfs] [59.197] ? __pfx_autoremove_wake_function+0x10/0x10 [59.197] scrub_pause_off+0x21/0x50 [btrfs] [59.197] scrub_simple_mirror+0x1c7/0x950 [btrfs] [59.197] ? scrub_parity_put+0x1a5/0x1d0 [btrfs] [59.198] ? __pfx_autoremove_wake_function+0x10/0x10 [59.198] scrub_stripe+0x20d/0x740 [btrfs] [59.198] scrub_chunk+0xc4/0x130 [btrfs] [59.198] scrub_enumerate_chunks+0x3e4/0x7a0 [btrfs] [59.198] ? __pfx_autoremove_wake_function+0x10/0x10 [59.198] btrfs_scrub_dev+0x236/0x6a0 [btrfs] [59.199] ? btrfs_ioctl+0xd97/0x32c0 [btrfs] [59.199] ? _copy_from_user+0x7b/0x80 [59.199] btrfs_ioctl+0xde1/0x32c0 [btrfs] [59.199] ? refill_stock+0x33/0x50 [59.199] ? should_failslab+0xa/0x20 [59.199] ? kmem_cache_alloc_node+0x151/0x460 [59.199] ? alloc_io_context+0x1b/0x80 [59.199] ? preempt_count_add+0x70/0xa0 [59.199] ? __x64_sys_ioctl+0x88/0xc0 [59.199] __x64_sys_ioctl+0x88/0xc0 [59.199] do_syscall_64+0x38/0x90 [59.199] entry_SYSCALL_64_after_hwframe+0x72/0xdc [59.199] RIP: 0033:0x7f82ffaffe9b [59.199] RSP: 002b:00007f82ff9fcc50 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 [59.199] RAX: ffffffffffffffda RBX: 000055b191e36310 RCX: 00007f82ffaffe9b [59.199] RDX: 000055b191e36310 RSI: 00000000c400941b RDI: 0000000000000003 [59.199] RBP: 0000000000000000 R08: 00007fff1575016f R09: 0000000000000000 [59.199] R10: 0000000000000000 R11: 0000000000000246 R12: 00007f82ff9fd640 [59.199] R13: 000000000000006b R14: 00007f82ffa87580 R15: 0000000000000000 [59.199] [59.199] INFO: task btrfs:346773 blocked for more than 120 seconds. [59.200] Tainted: G W 6.3.0-rc2-btrfs-next-127+ #1 [59.200] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [59.201] task:btrfs state:D stack:0 pid:346773 ppid:1 flags:0x00004002 [59.201] Call Trace: [59.201] [59.201] __schedule+0x392/0xa70 [59.201] ? __pv_queued_spin_lock_slowpath+0x165/0x370 [59.201] schedule+0x5d/0xd0 [59.201] __scrub_blocked_if_needed+0x74/0xc0 [btrfs] [59.201] ? __pfx_autoremove_wake_function+0x10/0x10 [59.201] scrub_pause_off+0x21/0x50 [btrfs] [59.202] scrub_simple_mirror+0x1c7/0x950 [btrfs] [59.202] ? scrub_parity_put+0x1a5/0x1d0 [btrfs] [59.202] ? __pfx_autoremove_wake_function+0x10/0x10 [59.202] scrub_stripe+0x20d/0x740 [btrfs] [59.202] scrub_chunk+0xc4/0x130 [btrfs] [59.203] scrub_enumerate_chunks+0x3e4/0x7a0 [btrfs] [59.203] ? __pfx_autoremove_wake_function+0x10/0x10 [59.203] btrfs_scrub_dev+0x236/0x6a0 [btrfs] [59.203] ? btrfs_ioctl+0xd97/0x32c0 [btrfs] [59.203] ? _copy_from_user+0x7b/0x80 [59.203] btrfs_ioctl+0xde1/0x32c0 [btrfs] [59.204] ? should_failslab+0xa/0x20 [59.204] ? kmem_cache_alloc_node+0x151/0x460 [59.204] ? alloc_io_context+0x1b/0x80 [59.204] ? preempt_count_add+0x70/0xa0 [59.204] ? __x64_sys_ioctl+0x88/0xc0 [59.204] __x64_sys_ioctl+0x88/0xc0 [59.204] do_syscall_64+0x38/0x90 [59.204] entry_SYSCALL_64_after_hwframe+0x72/0xdc [59.204] RIP: 0033:0x7f82ffaffe9b [59.204] RSP: 002b:00007f82ff1fbc50 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 [59.204] RAX: ffffffffffffffda RBX: 000055b191e36790 RCX: 00007f82ffaffe9b [59.204] RDX: 000055b191e36790 RSI: 00000000c400941b RDI: 0000000000000003 [59.204] RBP: 0000000000000000 R08: 00007fff1575016f R09: 0000000000000000 [59.204] R10: 0000000000000000 R11: 0000000000000246 R12: 00007f82ff1fc640 [59.204] R13: 000000000000006b R14: 00007f82ffa87580 R15: 0000000000000000 [59.204] [59.204] INFO: task btrfs:346774 blocked for more than 120 seconds. [59.205] Tainted: G W 6.3.0-rc2-btrfs-next-127+ #1 [59.205] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [59.206] task:btrfs state:D stack:0 pid:346774 ppid:1 flags:0x00004002 [59.206] Call Trace: [59.206] [59.206] __schedule+0x392/0xa70 [59.206] schedule+0x5d/0xd0 [59.206] __scrub_blocked_if_needed+0x74/0xc0 [btrfs] [59.206] ? __pfx_autoremove_wake_function+0x10/0x10 [59.206] scrub_pause_off+0x21/0x50 [btrfs] [59.207] scrub_simple_mirror+0x1c7/0x950 [btrfs] [59.207] ? scrub_parity_put+0x1a5/0x1d0 [btrfs] [59.207] ? __pfx_autoremove_wake_function+0x10/0x10 [59.207] scrub_stripe+0x20d/0x740 [btrfs] [59.208] scrub_chunk+0xc4/0x130 [btrfs] [59.208] scrub_enumerate_chunks+0x3e4/0x7a0 [btrfs] [59.208] ? __mutex_unlock_slowpath.isra.0+0x9a/0x120 [59.208] btrfs_scrub_dev+0x236/0x6a0 [btrfs] [59.208] ? btrfs_ioctl+0xd97/0x32c0 [btrfs] [59.209] ? _copy_from_user+0x7b/0x80 [59.209] btrfs_ioctl+0xde1/0x32c0 [btrfs] [59.209] ? should_failslab+0xa/0x20 [59.209] ? kmem_cache_alloc_node+0x151/0x460 [59.209] ? alloc_io_context+0x1b/0x80 [59.209] ? preempt_count_add+0x70/0xa0 [59.209] ? __x64_sys_ioctl+0x88/0xc0 [59.209] __x64_sys_ioctl+0x88/0xc0 [59.209] do_syscall_64+0x38/0x90 [59.209] entry_SYSCALL_64_after_hwframe+0x72/0xdc [59.209] RIP: 0033:0x7f82ffaffe9b [59.209] RSP: 002b:00007f82fe9fac50 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 [59.209] RAX: ffffffffffffffda RBX: 000055b191e36c10 RCX: 00007f82ffaffe9b [59.209] RDX: 000055b191e36c10 RSI: 00000000c400941b RDI: 0000000000000003 [59.209] RBP: 0000000000000000 R08: 00007fff1575016f R09: 0000000000000000 [59.209] R10: 0000000000000000 R11: 0000000000000246 R12: 00007f82fe9fb640 [59.209] R13: 000000000000006b R14: 00007f82ffa87580 R15: 0000000000000000 [59.209] [59.209] INFO: task btrfs:346775 blocked for more than 120 seconds. [59.210] Tainted: G W 6.3.0-rc2-btrfs-next-127+ #1 [59.210] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [59.211] task:btrfs state:D stack:0 pid:346775 ppid:1 flags:0x00004002 [59.211] Call Trace: [59.211] [59.211] __schedule+0x392/0xa70 [59.211] schedule+0x5d/0xd0 [59.211] __scrub_blocked_if_needed+0x74/0xc0 [btrfs] [59.211] ? __pfx_autoremove_wake_function+0x10/0x10 [59.211] scrub_pause_off+0x21/0x50 [btrfs] [59.212] scrub_simple_mirror+0x1c7/0x950 [btrfs] [59.212] ? scrub_parity_put+0x1a5/0x1d0 [btrfs] [59.212] ? __pfx_autoremove_wake_function+0x10/0x10 [59.212] scrub_stripe+0x20d/0x740 [btrfs] [59.213] scrub_chunk+0xc4/0x130 [btrfs] [59.213] scrub_enumerate_chunks+0x3e4/0x7a0 [btrfs] [59.213] ? __mutex_unlock_slowpath.isra.0+0x9a/0x120 [59.213] btrfs_scrub_dev+0x236/0x6a0 [btrfs] [59.213] ? btrfs_ioctl+0xd97/0x32c0 [btrfs] [59.214] ? _copy_from_user+0x7b/0x80 [59.214] btrfs_ioctl+0xde1/0x32c0 [btrfs] [59.214] ? should_failslab+0xa/0x20 [59.214] ? kmem_cache_alloc_node+0x151/0x460 [59.214] ? alloc_io_context+0x1b/0x80 [59.214] ? preempt_count_add+0x70/0xa0 [59.214] ? __x64_sys_ioctl+0x88/0xc0 [59.214] __x64_sys_ioctl+0x88/0xc0 [59.214] do_syscall_64+0x38/0x90 [59.214] entry_SYSCALL_64_after_hwframe+0x72/0xdc [59.214] RIP: 0033:0x7f82ffaffe9b [59.214] RSP: 002b:00007f82fe1f9c50 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 [59.214] RAX: ffffffffffffffda RBX: 000055b191e37090 RCX: 00007f82ffaffe9b [59.214] RDX: 000055b191e37090 RSI: 00000000c400941b RDI: 0000000000000003 [59.214] RBP: 0000000000000000 R08: 00007fff1575016f R09: 0000000000000000 [59.214] R10: 0000000000000000 R11: 0000000000000246 R12: 00007f82fe1fa640 [59.214] R13: 000000000000006b R14: 00007f82ffa87580 R15: 0000000000000000 [59.214] [59.214] INFO: task btrfs:346776 blocked for more than 120 seconds. [59.215] Tainted: G W 6.3.0-rc2-btrfs-next-127+ #1 [59.216] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [59.217] task:btrfs state:D stack:0 pid:346776 ppid:1 flags:0x00004002 [59.217] Call Trace: [59.217] [59.217] __schedule+0x392/0xa70 [59.217] ? __pv_queued_spin_lock_slowpath+0x165/0x370 [59.217] schedule+0x5d/0xd0 [59.217] __scrub_blocked_if_needed+0x74/0xc0 [btrfs] [59.217] ? __pfx_autoremove_wake_function+0x10/0x10 [59.217] scrub_pause_off+0x21/0x50 [btrfs] [59.217] scrub_simple_mirror+0x1c7/0x950 [btrfs] [59.217] ? scrub_parity_put+0x1a5/0x1d0 [btrfs] [59.218] ? __pfx_autoremove_wake_function+0x10/0x10 [59.218] scrub_stripe+0x20d/0x740 [btrfs] [59.218] scrub_chunk+0xc4/0x130 [btrfs] [59.218] scrub_enumerate_chunks+0x3e4/0x7a0 [btrfs] [59.219] ? __pfx_autoremove_wake_function+0x10/0x10 [59.219] btrfs_scrub_dev+0x236/0x6a0 [btrfs] [59.219] ? btrfs_ioctl+0xd97/0x32c0 [btrfs] [59.219] ? _copy_from_user+0x7b/0x80 [59.219] btrfs_ioctl+0xde1/0x32c0 [btrfs] [59.219] ? should_failslab+0xa/0x20 [59.219] ? kmem_cache_alloc_node+0x151/0x460 [59.219] ? alloc_io_context+0x1b/0x80 [59.219] ? preempt_count_add+0x70/0xa0 [59.219] ? __x64_sys_ioctl+0x88/0xc0 [59.219] __x64_sys_ioctl+0x88/0xc0 [59.219] do_syscall_64+0x38/0x90 [59.219] entry_SYSCALL_64_after_hwframe+0x72/0xdc [59.219] RIP: 0033:0x7f82ffaffe9b [59.219] RSP: 002b:00007f82fd9f8c50 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 [59.219] RAX: ffffffffffffffda RBX: 000055b191e37510 RCX: 00007f82ffaffe9b [59.219] RDX: 000055b191e37510 RSI: 00000000c400941b RDI: 0000000000000003 [59.219] RBP: 0000000000000000 R08: 00007fff1575016f R09: 0000000000000000 [59.219] R10: 0000000000000000 R11: 0000000000000246 R12: 00007f82fd9f9640 [59.219] R13: 000000000000006b R14: 00007f82ffa87580 R15: 0000000000000000 [59.219] [59.219] INFO: task btrfs:346822 blocked for more than 120 seconds. [59.220] Tainted: G W 6.3.0-rc2-btrfs-next-127+ #1 [59.221] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [59.222] task:btrfs state:D stack:0 pid:346822 ppid:1 flags:0x00004002 [59.222] Call Trace: [59.222] [59.222] __schedule+0x392/0xa70 [59.222] schedule+0x5d/0xd0 [59.222] btrfs_scrub_cancel+0x91/0x100 [btrfs] [59.222] ? __pfx_autoremove_wake_function+0x10/0x10 [59.222] btrfs_commit_transaction+0x572/0xeb0 [btrfs] [59.223] ? start_transaction+0xcb/0x610 [btrfs] [59.223] prepare_to_relocate+0x111/0x1a0 [btrfs] [59.223] relocate_block_group+0x57/0x5d0 [btrfs] [59.223] ? btrfs_wait_nocow_writers+0x25/0xb0 [btrfs] [59.223] btrfs_relocate_block_group+0x248/0x3c0 [btrfs] [59.224] ? __pfx_autoremove_wake_function+0x10/0x10 [59.224] btrfs_relocate_chunk+0x3b/0x150 [btrfs] [59.224] btrfs_balance+0x8ff/0x11d0 [btrfs] [59.224] ? __kmem_cache_alloc_node+0x14a/0x410 [59.224] btrfs_ioctl+0x2334/0x32c0 [btrfs] [59.225] ? mod_objcg_state+0xd2/0x360 [59.225] ? refill_obj_stock+0xb0/0x160 [59.225] ? seq_release+0x25/0x30 [59.225] ? __rseq_handle_notify_resume+0x3b5/0x4b0 [59.225] ? percpu_counter_add_batch+0x2e/0xa0 [59.225] ? __x64_sys_ioctl+0x88/0xc0 [59.225] __x64_sys_ioctl+0x88/0xc0 [59.225] do_syscall_64+0x38/0x90 [59.225] entry_SYSCALL_64_after_hwframe+0x72/0xdc [59.225] RIP: 0033:0x7f381a6ffe9b [59.225] RSP: 002b:00007ffd45ecf060 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 [59.225] RAX: ffffffffffffffda RBX: 0000000000000001 RCX: 00007f381a6ffe9b [59.225] RDX: 00007ffd45ecf150 RSI: 00000000c4009420 RDI: 0000000000000003 [59.225] RBP: 0000000000000003 R08: 0000000000000013 R09: 0000000000000000 [59.225] R10: 00007f381a60c878 R11: 0000000000000246 R12: 00007ffd45ed0423 [59.225] R13: 00007ffd45ecf150 R14: 0000000000000000 R15: 00007ffd45ecf148 [59.225] What happens is the following: 1) A scrub is running, so fs_info->scrubs_running is 1; 2) Task A starts block group relocation, and at btrfs_relocate_chunk() it pauses scrub by calling btrfs_scrub_pause(). That increments fs_info->scrub_pause_req from 0 to 1 and waits for the scrub task to pause (for fs_info->scrubs_paused to be == to fs_info->scrubs_running); 3) The scrub task pauses at scrub_pause_off(), waiting for fs_info->scrub_pause_req to decrease to 0; 4) Task A then enters btrfs_relocate_block_group(), and down that call chain we start a transaction and then attempt to commit it; 5) When task A calls btrfs_commit_transaction(), it either will do the commit itself or wait for some other task that already started the commit of the transaction - it doesn't matter which case; 6) The transaction commit enters state TRANS_STATE_COMMIT_START; 7) An error happens during the transaction commit, like -ENOSPC when running delayed refs or delayed items for example; 8) This results in calling transaction.c:cleanup_transaction(), where we call btrfs_scrub_cancel(), incrementing fs_info->scrub_cancel_req from 0 to 1, and blocking this task waiting for fs_info->scrubs_running to decrease to 0; 9) From this point on, both the transaction commit and the scrub task hang forever: 1) The transaction commit is waiting for fs_info->scrubs_running to be decreased to 0; 2) The scrub task is at scrub_pause_off() waiting for fs_info->scrub_pause_req to decrease to 0 - so it can not proceed to stop the scrub and decrement fs_info->scrubs_running from 0 to 1. Therefore resulting in a deadlock. Fix this by having cleanup_transaction(), called if a transaction commit fails, not call btrfs_scrub_cancel() if relocation is in progress, and having btrfs_relocate_block_group() call btrfs_scrub_cancel() instead if the relocation failed and a transaction abort happened. This was triggered with btrfs/061 from fstests. Fixes: 55e3a601c81c ("btrfs: Fix data checksum error cause by replace with io-load.") CC: stable@vger.kernel.org # 4.14+ Reviewed-by: Josef Bacik Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/transaction.c | 15 ++++++++++++++- fs/btrfs/volumes.c | 9 ++++++++- 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 18329ebcb1cb..b8d5b1fa9a03 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -2035,7 +2035,20 @@ static void cleanup_transaction(struct btrfs_trans_handle *trans, int err) if (current->journal_info == trans) current->journal_info = NULL; - btrfs_scrub_cancel(fs_info); + + /* + * If relocation is running, we can't cancel scrub because that will + * result in a deadlock. Before relocating a block group, relocation + * pauses scrub, then starts and commits a transaction before unpausing + * scrub. If the transaction commit is being done by the relocation + * task or triggered by another task and the relocation task is waiting + * for the commit, and we end up here due to an error in the commit + * path, then calling btrfs_scrub_cancel() will deadlock, as we are + * asking for scrub to stop while having it asked to be paused higher + * above in relocation code. + */ + if (!test_bit(BTRFS_FS_RELOC_RUNNING, &fs_info->flags)) + btrfs_scrub_cancel(fs_info); kmem_cache_free(btrfs_trans_handle_cachep, trans); } diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index ac0e8fb92fc8..c6d592870400 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -3275,8 +3275,15 @@ int btrfs_relocate_chunk(struct btrfs_fs_info *fs_info, u64 chunk_offset) btrfs_scrub_pause(fs_info); ret = btrfs_relocate_block_group(fs_info, chunk_offset); btrfs_scrub_continue(fs_info); - if (ret) + if (ret) { + /* + * If we had a transaction abort, stop all running scrubs. + * See transaction.c:cleanup_transaction() why we do it here. + */ + if (BTRFS_FS_ERROR(fs_info)) + btrfs_scrub_cancel(fs_info); return ret; + } block_group = btrfs_lookup_block_group(fs_info, chunk_offset); if (!block_group) From 44378cd113e5f15bb0a89f5ac5a0e687b52feb90 Mon Sep 17 00:00:00 2001 From: Duy Nguyen Date: Tue, 28 Mar 2023 00:03:03 +0000 Subject: [PATCH 059/173] ASoC: da7213.c: add missing pm_runtime_disable() da7213.c is missing pm_runtime_disable(), thus we will get below error when rmmod -> insmod. $ rmmod snd-soc-da7213.ko $ insmod snd-soc-da7213.ko da7213 0-001a: Unbalanced pm_runtime_enable!" [Kuninori adjusted to latest upstream] Signed-off-by: Duy Nguyen Signed-off-by: Kuninori Morimoto Tested-by: Khanh Le Link: https://lore.kernel.org/r/87mt3xg2tk.wl-kuninori.morimoto.gx@renesas.com Signed-off-by: Mark Brown --- sound/soc/codecs/da7213.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/sound/soc/codecs/da7213.c b/sound/soc/codecs/da7213.c index 0068780fe0a7..1c1f211a8e2e 100644 --- a/sound/soc/codecs/da7213.c +++ b/sound/soc/codecs/da7213.c @@ -2022,6 +2022,11 @@ static int da7213_i2c_probe(struct i2c_client *i2c) return ret; } +static void da7213_i2c_remove(struct i2c_client *i2c) +{ + pm_runtime_disable(&i2c->dev); +} + static int __maybe_unused da7213_runtime_suspend(struct device *dev) { struct da7213_priv *da7213 = dev_get_drvdata(dev); @@ -2065,6 +2070,7 @@ static struct i2c_driver da7213_i2c_driver = { .pm = &da7213_pm, }, .probe_new = da7213_i2c_probe, + .remove = da7213_i2c_remove, .id_table = da7213_i2c_id, }; From 1231363aec86704a6b0467a12e3ca7bdf890e01d Mon Sep 17 00:00:00 2001 From: Juraj Pecigos Date: Sun, 26 Mar 2023 11:29:49 +0200 Subject: [PATCH 060/173] nvme-pci: mark Lexar NM760 as IGNORE_DEV_SUBNQN A system with more than one of these SSDs will only have one usable. The kernel fails to detect more than one nvme device due to duplicate cntlids. before: [ 9.395229] nvme 0000:01:00.0: platform quirk: setting simple suspend [ 9.395262] nvme nvme0: pci function 0000:01:00.0 [ 9.395282] nvme 0000:03:00.0: platform quirk: setting simple suspend [ 9.395305] nvme nvme1: pci function 0000:03:00.0 [ 9.409873] nvme nvme0: Duplicate cntlid 1 with nvme1, subsys nqn.2022-07.com.siliconmotion:nvm-subsystem-sn- , rejecting [ 9.409982] nvme nvme0: Removing after probe failure status: -22 [ 9.427487] nvme nvme1: allocated 64 MiB host memory buffer. [ 9.445088] nvme nvme1: 16/0/0 default/read/poll queues [ 9.449898] nvme nvme1: Ignoring bogus Namespace Identifiers after: [ 1.161890] nvme 0000:01:00.0: platform quirk: setting simple suspend [ 1.162660] nvme nvme0: pci function 0000:01:00.0 [ 1.162684] nvme 0000:03:00.0: platform quirk: setting simple suspend [ 1.162707] nvme nvme1: pci function 0000:03:00.0 [ 1.191354] nvme nvme0: allocated 64 MiB host memory buffer. [ 1.193378] nvme nvme1: allocated 64 MiB host memory buffer. [ 1.211044] nvme nvme1: 16/0/0 default/read/poll queues [ 1.211080] nvme nvme0: 16/0/0 default/read/poll queues [ 1.216145] nvme nvme0: Ignoring bogus Namespace Identifiers [ 1.216261] nvme nvme1: Ignoring bogus Namespace Identifiers Adding the NVME_QUIRK_IGNORE_DEV_SUBNQN quirk to resolves the issue. Signed-off-by: Juraj Pecigos Reviewed-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index b615906263f3..282d808400c5 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -3441,7 +3441,8 @@ static const struct pci_device_id nvme_id_table[] = { { PCI_DEVICE(0x1d97, 0x1d97), /* Lexar NM620 */ .driver_data = NVME_QUIRK_BOGUS_NID, }, { PCI_DEVICE(0x1d97, 0x2269), /* Lexar NM760 */ - .driver_data = NVME_QUIRK_BOGUS_NID, }, + .driver_data = NVME_QUIRK_BOGUS_NID | + NVME_QUIRK_IGNORE_DEV_SUBNQN, }, { PCI_DEVICE(PCI_VENDOR_ID_AMAZON, 0x0061), .driver_data = NVME_QUIRK_DMA_ADDRESS_BITS_48, }, { PCI_DEVICE(PCI_VENDOR_ID_AMAZON, 0x0065), From 21f27df854008b86349a203bf97fef79bb11f53e Mon Sep 17 00:00:00 2001 From: Nico Boehr Date: Mon, 13 Feb 2023 09:55:20 +0100 Subject: [PATCH 061/173] KVM: s390: pv: fix external interruption loop not always detected To determine whether the guest has caused an external interruption loop upon code 20 (external interrupt) intercepts, the ext_new_psw needs to be inspected to see whether external interrupts are enabled. Under non-PV, ext_new_psw can simply be taken from guest lowcore. Under PV, KVM can only access the encrypted guest lowcore and hence the ext_new_psw must not be taken from guest lowcore. handle_external_interrupt() incorrectly did that and hence was not able to reliably tell whether an external interruption loop is happening or not. False negatives cause spurious failures of my kvm-unit-test for extint loops[1] under PV. Since code 20 is only caused under PV if and only if the guest's ext_new_psw is enabled for external interrupts, false positive detection of a external interruption loop can not happen. Fix this issue by instead looking at the guest PSW in the state description. Since the PSW swap for external interrupt is done by the ultravisor before the intercept is caused, this reliably tells whether the guest is enabled for external interrupts in the ext_new_psw. Also update the comments to explain better what is happening. [1] https://lore.kernel.org/kvm/20220812062151.1980937-4-nrb@linux.ibm.com/ Signed-off-by: Nico Boehr Reviewed-by: Janosch Frank Reviewed-by: Christian Borntraeger Fixes: 201ae986ead7 ("KVM: s390: protvirt: Implement interrupt injection") Link: https://lore.kernel.org/r/20230213085520.100756-2-nrb@linux.ibm.com Message-Id: <20230213085520.100756-2-nrb@linux.ibm.com> Signed-off-by: Janosch Frank --- arch/s390/kvm/intercept.c | 32 ++++++++++++++++++++++++-------- 1 file changed, 24 insertions(+), 8 deletions(-) diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c index 0ee02dae14b2..2cda8d9d7c6e 100644 --- a/arch/s390/kvm/intercept.c +++ b/arch/s390/kvm/intercept.c @@ -271,10 +271,18 @@ static int handle_prog(struct kvm_vcpu *vcpu) * handle_external_interrupt - used for external interruption interceptions * @vcpu: virtual cpu * - * This interception only occurs if the CPUSTAT_EXT_INT bit was set, or if - * the new PSW does not have external interrupts disabled. In the first case, - * we've got to deliver the interrupt manually, and in the second case, we - * drop to userspace to handle the situation there. + * This interception occurs if: + * - the CPUSTAT_EXT_INT bit was already set when the external interrupt + * occurred. In this case, the interrupt needs to be injected manually to + * preserve interrupt priority. + * - the external new PSW has external interrupts enabled, which will cause an + * interruption loop. We drop to userspace in this case. + * + * The latter case can be detected by inspecting the external mask bit in the + * external new psw. + * + * Under PV, only the latter case can occur, since interrupt priorities are + * handled in the ultravisor. */ static int handle_external_interrupt(struct kvm_vcpu *vcpu) { @@ -285,10 +293,18 @@ static int handle_external_interrupt(struct kvm_vcpu *vcpu) vcpu->stat.exit_external_interrupt++; - rc = read_guest_lc(vcpu, __LC_EXT_NEW_PSW, &newpsw, sizeof(psw_t)); - if (rc) - return rc; - /* We can not handle clock comparator or timer interrupt with bad PSW */ + if (kvm_s390_pv_cpu_is_protected(vcpu)) { + newpsw = vcpu->arch.sie_block->gpsw; + } else { + rc = read_guest_lc(vcpu, __LC_EXT_NEW_PSW, &newpsw, sizeof(psw_t)); + if (rc) + return rc; + } + + /* + * Clock comparator or timer interrupt with external interrupt enabled + * will cause interrupt loop. Drop to userspace. + */ if ((eic == EXT_IRQ_CLK_COMP || eic == EXT_IRQ_CPU_TIMER) && (newpsw.mask & PSW_MASK_EXT)) return -EOPNOTSUPP; From 1abce0580b89464546ae06abd5891ebec43c9470 Mon Sep 17 00:00:00 2001 From: Benjamin Gray Date: Fri, 3 Mar 2023 09:59:47 +1100 Subject: [PATCH 062/173] powerpc/64s: Fix __pte_needs_flush() false positive warning Userspace PROT_NONE ptes set _PAGE_PRIVILEGED, triggering a false positive debug assertion that __pte_flags_need_flush() is not called on a kernel mapping. Detect when it is a userspace PROT_NONE page by checking the required bits of PAGE_NONE are set, and none of the RWX bits are set. pte_protnone() is insufficient here because it always returns 0 when CONFIG_NUMA_BALANCING=n. Fixes: b11931e9adc1 ("powerpc/64s: add pte_needs_flush and huge_pmd_needs_flush") Cc: stable@vger.kernel.org # v6.1+ Reported-by: Russell Currey Signed-off-by: Benjamin Gray Signed-off-by: Michael Ellerman Link: https://msgid.link/20230302225947.81083-1-bgray@linux.ibm.com --- arch/powerpc/include/asm/book3s/64/tlbflush.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush.h b/arch/powerpc/include/asm/book3s/64/tlbflush.h index 2bbc0fcce04a..5e26c7f2c25a 100644 --- a/arch/powerpc/include/asm/book3s/64/tlbflush.h +++ b/arch/powerpc/include/asm/book3s/64/tlbflush.h @@ -148,6 +148,11 @@ static inline void flush_tlb_fix_spurious_fault(struct vm_area_struct *vma, */ } +static inline bool __pte_protnone(unsigned long pte) +{ + return (pte & (pgprot_val(PAGE_NONE) | _PAGE_RWX)) == pgprot_val(PAGE_NONE); +} + static inline bool __pte_flags_need_flush(unsigned long oldval, unsigned long newval) { @@ -164,8 +169,8 @@ static inline bool __pte_flags_need_flush(unsigned long oldval, /* * We do not expect kernel mappings or non-PTEs or not-present PTEs. */ - VM_WARN_ON_ONCE(oldval & _PAGE_PRIVILEGED); - VM_WARN_ON_ONCE(newval & _PAGE_PRIVILEGED); + VM_WARN_ON_ONCE(!__pte_protnone(oldval) && oldval & _PAGE_PRIVILEGED); + VM_WARN_ON_ONCE(!__pte_protnone(newval) && newval & _PAGE_PRIVILEGED); VM_WARN_ON_ONCE(!(oldval & _PAGE_PTE)); VM_WARN_ON_ONCE(!(newval & _PAGE_PTE)); VM_WARN_ON_ONCE(!(oldval & _PAGE_PRESENT)); From fd7276189450110ed835eb0a334e62d2f1c4e3be Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sun, 26 Mar 2023 16:15:57 -0600 Subject: [PATCH 063/173] powerpc: Don't try to copy PPR for task with NULL pt_regs powerpc sets up PF_KTHREAD and PF_IO_WORKER with a NULL pt_regs, which from my (arguably very short) checking is not commonly done for other archs. This is fine, except when PF_IO_WORKER's have been created and the task does something that causes a coredump to be generated. Then we get this crash: Kernel attempted to read user page (160) - exploit attempt? (uid: 1000) BUG: Kernel NULL pointer dereference on read at 0x00000160 Faulting instruction address: 0xc0000000000c3a60 Oops: Kernel access of bad area, sig: 11 [#1] LE PAGE_SIZE=64K MMU=Radix SMP NR_CPUS=32 NUMA pSeries Modules linked in: bochs drm_vram_helper drm_kms_helper xts binfmt_misc ecb ctr syscopyarea sysfillrect cbc sysimgblt drm_ttm_helper aes_generic ttm sg libaes evdev joydev virtio_balloon vmx_crypto gf128mul drm dm_mod fuse loop configfs drm_panel_orientation_quirks ip_tables x_tables autofs4 hid_generic usbhid hid xhci_pci xhci_hcd usbcore usb_common sd_mod CPU: 1 PID: 1982 Comm: ppc-crash Not tainted 6.3.0-rc2+ #88 Hardware name: IBM pSeries (emulated by qemu) POWER9 (raw) 0x4e1202 0xf000005 of:SLOF,HEAD hv:linux,kvm pSeries NIP: c0000000000c3a60 LR: c000000000039944 CTR: c0000000000398e0 REGS: c0000000041833b0 TRAP: 0300 Not tainted (6.3.0-rc2+) MSR: 800000000280b033 CR: 88082828 XER: 200400f8 ... NIP memcpy_power7+0x200/0x7d0 LR ppr_get+0x64/0xb0 Call Trace: ppr_get+0x40/0xb0 (unreliable) __regset_get+0x180/0x1f0 regset_get_alloc+0x64/0x90 elf_core_dump+0xb98/0x1b60 do_coredump+0x1c34/0x24a0 get_signal+0x71c/0x1410 do_notify_resume+0x140/0x6f0 interrupt_exit_user_prepare_main+0x29c/0x320 interrupt_exit_user_prepare+0x6c/0xa0 interrupt_return_srr_user+0x8/0x138 Because ppr_get() is trying to copy from a PF_IO_WORKER with a NULL pt_regs. Check for a valid pt_regs in both ppc_get/ppr_set, and return an error if not set. The actual error value doesn't seem to be important here, so just pick -EINVAL. Fixes: fa439810cc1b ("powerpc/ptrace: Enable support for NT_PPPC_TAR, NT_PPC_PPR, NT_PPC_DSCR") Cc: stable@vger.kernel.org # v4.8+ Signed-off-by: Jens Axboe [mpe: Trim oops in change log, add Fixes & Cc stable] Signed-off-by: Michael Ellerman Link: https://msgid.link/d9f63344-fe7c-56ae-b420-4a1a04a2ae4c@kernel.dk --- arch/powerpc/kernel/ptrace/ptrace-view.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/powerpc/kernel/ptrace/ptrace-view.c b/arch/powerpc/kernel/ptrace/ptrace-view.c index 2087a785f05f..5fff0d04b23f 100644 --- a/arch/powerpc/kernel/ptrace/ptrace-view.c +++ b/arch/powerpc/kernel/ptrace/ptrace-view.c @@ -290,6 +290,9 @@ static int gpr_set(struct task_struct *target, const struct user_regset *regset, static int ppr_get(struct task_struct *target, const struct user_regset *regset, struct membuf to) { + if (!target->thread.regs) + return -EINVAL; + return membuf_write(&to, &target->thread.regs->ppr, sizeof(u64)); } @@ -297,6 +300,9 @@ static int ppr_set(struct task_struct *target, const struct user_regset *regset, unsigned int pos, unsigned int count, const void *kbuf, const void __user *ubuf) { + if (!target->thread.regs) + return -EINVAL; + return user_regset_copyin(&pos, &count, &kbuf, &ubuf, &target->thread.regs->ppr, 0, sizeof(u64)); } From 005308f7bdacf5685ed1a431244a183dbbb9e0e8 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 27 Mar 2023 19:56:18 -0600 Subject: [PATCH 064/173] io_uring/poll: clear single/double poll flags on poll arming Unless we have at least one entry queued, then don't call into io_poll_remove_entries(). Normally this isn't possible, but if we retry poll then we can have ->nr_entries cleared again as we're setting it up. If this happens for a poll retry, then we'll still have at least REQ_F_SINGLE_POLL set. io_poll_remove_entries() then thinks it has entries to remove. Clear REQ_F_SINGLE_POLL and REQ_F_DOUBLE_POLL unconditionally when arming a poll request. Fixes: c16bda37594f ("io_uring/poll: allow some retries for poll triggering spuriously") Cc: stable@vger.kernel.org Reported-by: Pengfei Xu Signed-off-by: Jens Axboe --- io_uring/poll.c | 1 + 1 file changed, 1 insertion(+) diff --git a/io_uring/poll.c b/io_uring/poll.c index 795facbd0e9f..55306e801081 100644 --- a/io_uring/poll.c +++ b/io_uring/poll.c @@ -726,6 +726,7 @@ int io_arm_poll_handler(struct io_kiocb *req, unsigned issue_flags) apoll = io_req_alloc_apoll(req, issue_flags); if (!apoll) return IO_APOLL_ABORTED; + req->flags &= ~(REQ_F_SINGLE_POLL | REQ_F_DOUBLE_POLL); req->flags |= REQ_F_POLLED; ipt.pt._qproc = io_async_queue_proc; From b26cd9325be4c1fcd331b77f10acb627c560d4d7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kornel=20Dul=C4=99ba?= Date: Mon, 20 Mar 2023 09:32:59 +0000 Subject: [PATCH 065/173] pinctrl: amd: Disable and mask interrupts on resume MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This fixes a similar problem to the one observed in: commit 4e5a04be88fe ("pinctrl: amd: disable and mask interrupts on probe"). On some systems, during suspend/resume cycle firmware leaves an interrupt enabled on a pin that is not used by the kernel. This confuses the AMD pinctrl driver and causes spurious interrupts. The driver already has logic to detect if a pin is used by the kernel. Leverage it to re-initialize interrupt fields of a pin only if it's not used by us. Cc: stable@vger.kernel.org Fixes: dbad75dd1f25 ("pinctrl: add AMD GPIO driver support.") Signed-off-by: Kornel Dulęba Link: https://lore.kernel.org/r/20230320093259.845178-1-korneld@chromium.org Signed-off-by: Linus Walleij --- drivers/pinctrl/pinctrl-amd.c | 36 +++++++++++++++++++---------------- 1 file changed, 20 insertions(+), 16 deletions(-) diff --git a/drivers/pinctrl/pinctrl-amd.c b/drivers/pinctrl/pinctrl-amd.c index 9236a132c7ba..609821b756c2 100644 --- a/drivers/pinctrl/pinctrl-amd.c +++ b/drivers/pinctrl/pinctrl-amd.c @@ -872,32 +872,34 @@ static const struct pinconf_ops amd_pinconf_ops = { .pin_config_group_set = amd_pinconf_group_set, }; -static void amd_gpio_irq_init(struct amd_gpio *gpio_dev) +static void amd_gpio_irq_init_pin(struct amd_gpio *gpio_dev, int pin) { - struct pinctrl_desc *desc = gpio_dev->pctrl->desc; + const struct pin_desc *pd; unsigned long flags; u32 pin_reg, mask; - int i; mask = BIT(WAKE_CNTRL_OFF_S0I3) | BIT(WAKE_CNTRL_OFF_S3) | BIT(INTERRUPT_MASK_OFF) | BIT(INTERRUPT_ENABLE_OFF) | BIT(WAKE_CNTRL_OFF_S4); - for (i = 0; i < desc->npins; i++) { - int pin = desc->pins[i].number; - const struct pin_desc *pd = pin_desc_get(gpio_dev->pctrl, pin); + pd = pin_desc_get(gpio_dev->pctrl, pin); + if (!pd) + return; - if (!pd) - continue; + raw_spin_lock_irqsave(&gpio_dev->lock, flags); + pin_reg = readl(gpio_dev->base + pin * 4); + pin_reg &= ~mask; + writel(pin_reg, gpio_dev->base + pin * 4); + raw_spin_unlock_irqrestore(&gpio_dev->lock, flags); +} - raw_spin_lock_irqsave(&gpio_dev->lock, flags); +static void amd_gpio_irq_init(struct amd_gpio *gpio_dev) +{ + struct pinctrl_desc *desc = gpio_dev->pctrl->desc; + int i; - pin_reg = readl(gpio_dev->base + i * 4); - pin_reg &= ~mask; - writel(pin_reg, gpio_dev->base + i * 4); - - raw_spin_unlock_irqrestore(&gpio_dev->lock, flags); - } + for (i = 0; i < desc->npins; i++) + amd_gpio_irq_init_pin(gpio_dev, i); } #ifdef CONFIG_PM_SLEEP @@ -950,8 +952,10 @@ static int amd_gpio_resume(struct device *dev) for (i = 0; i < desc->npins; i++) { int pin = desc->pins[i].number; - if (!amd_gpio_should_save(gpio_dev, pin)) + if (!amd_gpio_should_save(gpio_dev, pin)) { + amd_gpio_irq_init_pin(gpio_dev, pin); continue; + } raw_spin_lock_irqsave(&gpio_dev->lock, flags); gpio_dev->saved_regs[i] |= readl(gpio_dev->base + pin * 4) & PIN_IRQ_PENDING; From f91bf3272a18356e8585f6bbba896d794632f2af Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Thu, 16 Mar 2023 00:25:14 +0100 Subject: [PATCH 066/173] iommu/exynos: Fix set_platform_dma_ops() callback There are some subtle differences between release_device() and set_platform_dma_ops() callbacks, so separate those two callbacks. Device links should be removed only in release_device(), because they were created in probe_device() on purpose and they are needed for proper Exynos IOMMU driver operation. While fixing this, remove the conditional code as it is not really needed. Reported-by: Jason Gunthorpe Fixes: 189d496b48b1 ("iommu/exynos: Add missing set_platform_dma_ops callback") Signed-off-by: Marek Szyprowski Reviewed-by: Sam Protsenko Link: https://lore.kernel.org/r/20230315232514.1046589-1-m.szyprowski@samsung.com Signed-off-by: Joerg Roedel --- drivers/iommu/exynos-iommu.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c index 483aaaeb6dae..1abd187c6075 100644 --- a/drivers/iommu/exynos-iommu.c +++ b/drivers/iommu/exynos-iommu.c @@ -1415,23 +1415,26 @@ static struct iommu_device *exynos_iommu_probe_device(struct device *dev) return &data->iommu; } -static void exynos_iommu_release_device(struct device *dev) +static void exynos_iommu_set_platform_dma(struct device *dev) { struct exynos_iommu_owner *owner = dev_iommu_priv_get(dev); - struct sysmmu_drvdata *data; if (owner->domain) { struct iommu_group *group = iommu_group_get(dev); if (group) { -#ifndef CONFIG_ARM - WARN_ON(owner->domain != - iommu_group_default_domain(group)); -#endif exynos_iommu_detach_device(owner->domain, dev); iommu_group_put(group); } } +} + +static void exynos_iommu_release_device(struct device *dev) +{ + struct exynos_iommu_owner *owner = dev_iommu_priv_get(dev); + struct sysmmu_drvdata *data; + + exynos_iommu_set_platform_dma(dev); list_for_each_entry(data, &owner->controllers, owner_node) device_link_del(data->link); @@ -1479,7 +1482,7 @@ static const struct iommu_ops exynos_iommu_ops = { .domain_alloc = exynos_iommu_domain_alloc, .device_group = generic_device_group, #ifdef CONFIG_ARM - .set_platform_dma_ops = exynos_iommu_release_device, + .set_platform_dma_ops = exynos_iommu_set_platform_dma, #endif .probe_device = exynos_iommu_probe_device, .release_device = exynos_iommu_release_device, From b57841fb0b564c61508222e885ac8f30a2811089 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 28 Mar 2023 20:43:26 +0200 Subject: [PATCH 067/173] thermal: core: Drop excessive lockdep_assert_held() calls The lockdep_assert_held() calls added to cooling_device_stats_setup() and cooling_device_stats_destroy() by commit 790930f44289 ("thermal: core: Introduce thermal_cooling_device_update()") trigger false-positive lockdep reports in code paths that are not subject to race conditions (before cooling device registration and after cooling device removal). For this reason, remove the lockdep_assert_held() calls from both cooling_device_stats_setup() and cooling_device_stats_destroy() and add one to thermal_cooling_device_stats_reinit() that has to be called under the cdev lock. Fixes: 790930f44289 ("thermal: core: Introduce thermal_cooling_device_update()") Link: https://lore.kernel.org/linux-acpi/ZCIDTLFt27Ei7+V6@ideak-desk.fi.intel.com Reported-by: Imre Deak Signed-off-by: Rafael J. Wysocki --- drivers/thermal/thermal_sysfs.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/thermal/thermal_sysfs.c b/drivers/thermal/thermal_sysfs.c index a4aba7b8bb8b..6c20c9f90a05 100644 --- a/drivers/thermal/thermal_sysfs.c +++ b/drivers/thermal/thermal_sysfs.c @@ -876,8 +876,6 @@ static void cooling_device_stats_setup(struct thermal_cooling_device *cdev) unsigned long states = cdev->max_state + 1; int var; - lockdep_assert_held(&cdev->lock); - var = sizeof(*stats); var += sizeof(*stats->time_in_state) * states; var += sizeof(*stats->trans_table) * states * states; @@ -903,8 +901,6 @@ out: static void cooling_device_stats_destroy(struct thermal_cooling_device *cdev) { - lockdep_assert_held(&cdev->lock); - kfree(cdev->stats); cdev->stats = NULL; } @@ -931,6 +927,8 @@ void thermal_cooling_device_destroy_sysfs(struct thermal_cooling_device *cdev) void thermal_cooling_device_stats_reinit(struct thermal_cooling_device *cdev) { + lockdep_assert_held(&cdev->lock); + cooling_device_stats_destroy(cdev); cooling_device_stats_setup(cdev); } From 2280d425ba3599bdd85c41bd0ec8ba568f00c032 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Tue, 28 Mar 2023 10:45:20 +0100 Subject: [PATCH 068/173] btrfs: ignore fiemap path cache when there are multiple paths for a node During fiemap, when walking backreferences to determine if a b+tree node/leaf is shared, we may find a tree block (leaf or node) for which two parents were added to the references ulist. This happens if we get for example one direct ref (shared tree block ref) and one indirect ref (non-shared tree block ref) for the tree block at the current level, which can happen during relocation. In that case the fiemap path cache can not be used since it's meant for a single path, with one tree block at each possible level, so having multiple references for a tree block at any level may result in getting the level counter exceed BTRFS_MAX_LEVEL and eventually trigger the warning: WARN_ON_ONCE(level >= BTRFS_MAX_LEVEL) at lookup_backref_shared_cache() and at store_backref_shared_cache(). This is harmless since the code ignores any level >= BTRFS_MAX_LEVEL, the warning is there just to catch any unexpected case like the one described above. However if a user finds this it may be scary and get reported. So just ignore the path cache once we find a tree block for which there are more than one reference, which is the less common case, and update the cache with the sharedness check result for all levels below the level for which we found multiple references. Reported-by: Jarno Pelkonen Link: https://lore.kernel.org/linux-btrfs/CAKv8qLmDNAGJGCtsevxx_VZ_YOvvs1L83iEJkTgyA4joJertng@mail.gmail.com/ Fixes: 12a824dc67a6 ("btrfs: speedup checking for extent sharedness during fiemap") CC: stable@vger.kernel.org # 6.1+ Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/backref.c | 85 ++++++++++++++++++++++++++++++++++------------ 1 file changed, 63 insertions(+), 22 deletions(-) diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index 90e40d5ceccd..e54f0884802a 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -1921,8 +1921,7 @@ int btrfs_is_data_extent_shared(struct btrfs_inode *inode, u64 bytenr, level = -1; ULIST_ITER_INIT(&uiter); while (1) { - bool is_shared; - bool cached; + const unsigned long prev_ref_count = ctx->refs.nnodes; walk_ctx.bytenr = bytenr; ret = find_parent_nodes(&walk_ctx, &shared); @@ -1940,21 +1939,36 @@ int btrfs_is_data_extent_shared(struct btrfs_inode *inode, u64 bytenr, ret = 0; /* - * If our data extent was not directly shared (without multiple - * reference items), than it might have a single reference item - * with a count > 1 for the same offset, which means there are 2 - * (or more) file extent items that point to the data extent - - * this happens when a file extent item needs to be split and - * then one item gets moved to another leaf due to a b+tree leaf - * split when inserting some item. In this case the file extent - * items may be located in different leaves and therefore some - * of the leaves may be referenced through shared subtrees while - * others are not. Since our extent buffer cache only works for - * a single path (by far the most common case and simpler to - * deal with), we can not use it if we have multiple leaves - * (which implies multiple paths). + * More than one extent buffer (bytenr) may have been added to + * the ctx->refs ulist, in which case we have to check multiple + * tree paths in case the first one is not shared, so we can not + * use the path cache which is made for a single path. Multiple + * extent buffers at the current level happen when: + * + * 1) level -1, the data extent: If our data extent was not + * directly shared (without multiple reference items), then + * it might have a single reference item with a count > 1 for + * the same offset, which means there are 2 (or more) file + * extent items that point to the data extent - this happens + * when a file extent item needs to be split and then one + * item gets moved to another leaf due to a b+tree leaf split + * when inserting some item. In this case the file extent + * items may be located in different leaves and therefore + * some of the leaves may be referenced through shared + * subtrees while others are not. Since our extent buffer + * cache only works for a single path (by far the most common + * case and simpler to deal with), we can not use it if we + * have multiple leaves (which implies multiple paths). + * + * 2) level >= 0, a tree node/leaf: We can have a mix of direct + * and indirect references on a b+tree node/leaf, so we have + * to check multiple paths, and the extent buffer (the + * current bytenr) may be shared or not. One example is + * during relocation as we may get a shared tree block ref + * (direct ref) and a non-shared tree block ref (indirect + * ref) for the same node/leaf. */ - if (level == -1 && ctx->refs.nnodes > 1) + if ((ctx->refs.nnodes - prev_ref_count) > 1) ctx->use_path_cache = false; if (level >= 0) @@ -1964,18 +1978,45 @@ int btrfs_is_data_extent_shared(struct btrfs_inode *inode, u64 bytenr, if (!node) break; bytenr = node->val; - level++; - cached = lookup_backref_shared_cache(ctx, root, bytenr, level, - &is_shared); - if (cached) { - ret = (is_shared ? 1 : 0); - break; + if (ctx->use_path_cache) { + bool is_shared; + bool cached; + + level++; + cached = lookup_backref_shared_cache(ctx, root, bytenr, + level, &is_shared); + if (cached) { + ret = (is_shared ? 1 : 0); + break; + } } shared.share_count = 0; shared.have_delayed_delete_refs = false; cond_resched(); } + /* + * If the path cache is disabled, then it means at some tree level we + * got multiple parents due to a mix of direct and indirect backrefs or + * multiple leaves with file extent items pointing to the same data + * extent. We have to invalidate the cache and cache only the sharedness + * result for the levels where we got only one node/reference. + */ + if (!ctx->use_path_cache) { + int i = 0; + + level--; + if (ret >= 0 && level >= 0) { + bytenr = ctx->path_cache_entries[level].bytenr; + ctx->use_path_cache = true; + store_backref_shared_cache(ctx, root, bytenr, level, ret); + i = level + 1; + } + + for ( ; i < BTRFS_MAX_LEVEL; i++) + ctx->path_cache_entries[i].bytenr = 0; + } + /* * Cache the sharedness result for the data extent if we know our inode * has more than 1 file extent item that refers to the data extent. From e4056e38ec87b4c21eb34bb8e38b1b0ca1221744 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Fri, 24 Mar 2023 09:41:27 +0100 Subject: [PATCH 069/173] dt-bindings: pinctrl: qcom,sm8550-lpass-lpi: allow input-enabled and bias-bus-hold Add missing common pin configuration properties: input-enabled and bias-bus-hold. Fixes: 268e97ccc311 ("dt-bindings: pinctrl: qcom,sm8550-lpass-lpi-pinctrl: add SM8550 LPASS") Signed-off-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20230324084127.29362-1-krzysztof.kozlowski@linaro.org Signed-off-by: Linus Walleij --- .../bindings/pinctrl/qcom,sm8550-lpass-lpi-pinctrl.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Documentation/devicetree/bindings/pinctrl/qcom,sm8550-lpass-lpi-pinctrl.yaml b/Documentation/devicetree/bindings/pinctrl/qcom,sm8550-lpass-lpi-pinctrl.yaml index 5e90051ed314..8f60a9113e7a 100644 --- a/Documentation/devicetree/bindings/pinctrl/qcom,sm8550-lpass-lpi-pinctrl.yaml +++ b/Documentation/devicetree/bindings/pinctrl/qcom,sm8550-lpass-lpi-pinctrl.yaml @@ -96,9 +96,11 @@ $defs: 2: Lower Slew rate (slower edges) 3: Reserved (No adjustments) + bias-bus-hold: true bias-pull-down: true bias-pull-up: true bias-disable: true + input-enable: true output-high: true output-low: true From 5c2712387d4850e0b64121d5fd3e6c4e84ea3266 Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Tue, 28 Mar 2023 19:49:15 +0800 Subject: [PATCH 070/173] cacheinfo: Fix LLC is not exported through sysfs After entering 6.3-rc1 the LLC cacheinfo is not exported on our ACPI based arm64 server. This is because the LLC cacheinfo is partly reset when secondary CPUs boot up. On arm64 the primary cpu will allocate and setup cacheinfo: init_cpu_topology() for_each_possible_cpu() fetch_cache_info() // Allocate cacheinfo and init levels detect_cache_attributes() cache_shared_cpu_map_setup() if (!last_level_cache_is_valid()) // not valid, setup LLC cache_setup_properties() // setup LLC On secondary CPU boot up: detect_cache_attributes() populate_cache_leaves() get_cache_type() // Get cache type from clidr_el1, // for LLC type=CACHE_TYPE_NOCACHE cache_shared_cpu_map_setup() if (!last_level_cache_is_valid()) // Valid and won't go to this branch, // leave LLC's type=CACHE_TYPE_NOCACHE The last_level_cache_is_valid() use cacheinfo->{attributes, fw_token} to test it's valid or not, but populate_cache_leaves() will only reset LLC's type, so we won't try to re-setup LLC's type and leave it CACHE_TYPE_NOCACHE and won't export it through sysfs. This patch tries to fix this by not re-populating the cache leaves if the LLC is valid. Fixes: 5944ce092b97 ("arch_topology: Build cacheinfo from primary CPU") Signed-off-by: Yicong Yang Reviewed-by: Pierre Gondois Reviewed-by: Sudeep Holla Link: https://lore.kernel.org/r/20230328114915.33340-1-yangyicong@huawei.com Signed-off-by: Greg Kroah-Hartman --- drivers/base/cacheinfo.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/drivers/base/cacheinfo.c b/drivers/base/cacheinfo.c index f6573c335f4c..f3903d002819 100644 --- a/drivers/base/cacheinfo.c +++ b/drivers/base/cacheinfo.c @@ -474,12 +474,18 @@ int detect_cache_attributes(unsigned int cpu) populate_leaves: /* - * populate_cache_leaves() may completely setup the cache leaves and - * shared_cpu_map or it may leave it partially setup. + * If LLC is valid the cache leaves were already populated so just go to + * update the cpu map. */ - ret = populate_cache_leaves(cpu); - if (ret) - goto free_ci; + if (!last_level_cache_is_valid(cpu)) { + /* + * populate_cache_leaves() may completely setup the cache leaves and + * shared_cpu_map or it may leave it partially setup. + */ + ret = populate_cache_leaves(cpu); + if (ret) + goto free_ci; + } /* * For systems using DT for cache hierarchy, fw_token From eca9f6e6f83b6725b84e1c76fdde19b003cff0eb Mon Sep 17 00:00:00 2001 From: Haren Myneni Date: Mon, 20 Mar 2023 19:50:08 -0700 Subject: [PATCH 071/173] powerpc/pseries/vas: Ignore VAS update for DLPAR if copy/paste is not enabled The hypervisor supports user-mode NX from Power10. pseries_vas_dlpar_cpu() is called from lparcfg_write() to update VAS windows for DLPAR event in shared processor mode and the kernel gets -ENOTSUPP for HCALLs if the user-mode NX is not supported. The current VAS implementation also supports only with Radix page tables. Whereas in dedicated processor mode, pseries_vas_notifier() is registered only if the copy/paste feature is enabled. So instead of displaying HCALL error messages, update VAS capabilities if the copy/paste feature is available. This patch ignores updating VAS capabilities in pseries_vas_dlpar_cpu() and returns success if the copy/paste feature is not enabled. Then lparcfg_write() completes the processor DLPAR operations without any failures. Fixes: 2147783d6bf0 ("powerpc/pseries: Use lparcfg to reconfig VAS windows for DLPAR CPU") Cc: stable@vger.kernel.org # v6.1+ Signed-off-by: Haren Myneni Reviewed-by: Nathan Lynch Signed-off-by: Michael Ellerman Link: https://msgid.link/1d0e727e7dbd9a28627ef08ca9df9c86a50175e2.camel@linux.ibm.com --- arch/powerpc/platforms/pseries/vas.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/powerpc/platforms/pseries/vas.c b/arch/powerpc/platforms/pseries/vas.c index 559112312810..513180467562 100644 --- a/arch/powerpc/platforms/pseries/vas.c +++ b/arch/powerpc/platforms/pseries/vas.c @@ -856,6 +856,13 @@ int pseries_vas_dlpar_cpu(void) { int new_nr_creds, rc; + /* + * NX-GZIP is not enabled. Nothing to do for DLPAR event + */ + if (!copypaste_feat) + return 0; + + rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES, vascaps[VAS_GZIP_DEF_FEAT_TYPE].feat, (u64)virt_to_phys(&hv_cop_caps)); @@ -1012,6 +1019,7 @@ static int __init pseries_vas_init(void) * Linux supports user space COPY/PASTE only with Radix */ if (!radix_enabled()) { + copypaste_feat = false; pr_err("API is supported only with radix page tables\n"); return -ENOTSUPP; } From e3720f92e0237921da537e47a0b24e27899203f8 Mon Sep 17 00:00:00 2001 From: Guennadi Liakhovetski Date: Wed, 29 Mar 2023 14:38:28 +0300 Subject: [PATCH 072/173] ASoC: SOF: avoid a NULL dereference with unsupported widgets If an IPC4 topology contains an unsupported widget, its .module_info field won't be set, then sof_ipc4_route_setup() will cause a kernel Oops trying to dereference it. Add a check for such cases. Cc: stable@vger.kernel.org # 6.2 Signed-off-by: Guennadi Liakhovetski Signed-off-by: Peter Ujfalusi Link: https://lore.kernel.org/r/20230329113828.28562-1-peter.ujfalusi@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/sof/ipc4-topology.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/sound/soc/sof/ipc4-topology.c b/sound/soc/sof/ipc4-topology.c index a623707c8ffc..669b99a4f76e 100644 --- a/sound/soc/sof/ipc4-topology.c +++ b/sound/soc/sof/ipc4-topology.c @@ -1805,6 +1805,14 @@ static int sof_ipc4_route_setup(struct snd_sof_dev *sdev, struct snd_sof_route * u32 header, extension; int ret; + if (!src_fw_module || !sink_fw_module) { + /* The NULL module will print as "(efault)" */ + dev_err(sdev->dev, "source %s or sink %s widget weren't set up properly\n", + src_fw_module->man4_module_entry.name, + sink_fw_module->man4_module_entry.name); + return -ENODEV; + } + sroute->src_queue_id = sof_ipc4_get_queue_id(src_widget, sink_widget, SOF_PIN_TYPE_SOURCE); if (sroute->src_queue_id < 0) { From 4ff0b50de8cabba055efe50bbcb7506c41a69835 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Wed, 29 Mar 2023 15:03:43 +0100 Subject: [PATCH 073/173] io_uring/rsrc: fix rogue rsrc node grabbing We should not be looking at ctx->rsrc_node and anyhow modifying the node without holding uring_lock, grabbing references in such a way is not safe either. Cc: stable@vger.kernel.org Fixes: 5106dd6e74ab6 ("io_uring: propagate issue_flags state down to file assignment") Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/1202ede2d7bb90136e3482b2b84aad9ed483e5d6.1680098433.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- io_uring/rsrc.h | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/io_uring/rsrc.h b/io_uring/rsrc.h index 2b8743645efc..f27f4975217d 100644 --- a/io_uring/rsrc.h +++ b/io_uring/rsrc.h @@ -144,15 +144,13 @@ static inline void io_req_set_rsrc_node(struct io_kiocb *req, unsigned int issue_flags) { if (!req->rsrc_node) { + io_ring_submit_lock(ctx, issue_flags); + + lockdep_assert_held(&ctx->uring_lock); + req->rsrc_node = ctx->rsrc_node; - - if (!(issue_flags & IO_URING_F_UNLOCKED)) { - lockdep_assert_held(&ctx->uring_lock); - - io_charge_rsrc_node(ctx); - } else { - percpu_ref_get(&req->rsrc_node->refs); - } + io_charge_rsrc_node(ctx); + io_ring_submit_unlock(ctx, issue_flags); } } From 433279beba1d4872da10b7b60a539e0cb828b32b Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Tue, 28 Mar 2023 17:44:00 +0800 Subject: [PATCH 074/173] md: fix regression for null-ptr-deference in __md_stop() Commit 3e453522593d ("md: Free resources in __md_stop") tried to fix null-ptr-deference for 'active_io' by moving percpu_ref_exit() to __md_stop(), however, the commit also moving 'writes_pending' to __md_stop(), and this will cause mdadm tests broken: BUG: kernel NULL pointer dereference, address: 0000000000000038 Oops: 0000 [#1] PREEMPT SMP CPU: 15 PID: 17830 Comm: mdadm Not tainted 6.3.0-rc3-next-20230324-00009-g520d37 RIP: 0010:free_percpu+0x465/0x670 Call Trace: __percpu_ref_exit+0x48/0x70 percpu_ref_exit+0x1a/0x90 __md_stop+0xe9/0x170 do_md_stop+0x1e1/0x7b0 md_ioctl+0x90c/0x1aa0 blkdev_ioctl+0x19b/0x400 vfs_ioctl+0x20/0x50 __x64_sys_ioctl+0xba/0xe0 do_syscall_64+0x6c/0xe0 entry_SYSCALL_64_after_hwframe+0x63/0xcd And the problem can be reporduced 100% by following test: mdadm -CR /dev/md0 -l1 -n1 /dev/sda --force echo inactive > /sys/block/md0/md/array_state echo read-auto > /sys/block/md0/md/array_state echo inactive > /sys/block/md0/md/array_state Root cause: // start raid raid1_run mddev_init_writes_pending percpu_ref_init // inactive raid array_state_store do_md_stop __md_stop percpu_ref_exit // start raid again array_state_store do_md_run raid1_run mddev_init_writes_pending if (mddev->writes_pending.percpu_count_ptr) // won't reinit // inactive raid again ... percpu_ref_exit -> null-ptr-deference Before the commit, 'writes_pending' is exited when mddev is freed, and it's safe to restart raid because mddev_init_writes_pending() already make sure that 'writes_pending' will only be initialized once. Fix the prblem by moving 'writes_pending' back, it's a litter hard to find the relationship between alloc memory and free memory, however, code changes is much less and we lived with this for a long time already. Fixes: 3e453522593d ("md: Free resources in __md_stop") Signed-off-by: Yu Kuai Reviewed-by: Xiao Ni Signed-off-by: Song Liu Link: https://lore.kernel.org/r/20230328094400.1448955-1-yukuai1@huaweicloud.com --- drivers/md/md.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index 39e49e5d7182..13321dbb5fbc 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -6260,7 +6260,6 @@ static void __md_stop(struct mddev *mddev) module_put(pers->owner); clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); - percpu_ref_exit(&mddev->writes_pending); percpu_ref_exit(&mddev->active_io); bioset_exit(&mddev->bio_set); bioset_exit(&mddev->sync_set); @@ -6273,6 +6272,7 @@ void md_stop(struct mddev *mddev) */ __md_stop_writes(mddev); __md_stop(mddev); + percpu_ref_exit(&mddev->writes_pending); } EXPORT_SYMBOL_GPL(md_stop); @@ -7843,6 +7843,7 @@ static void md_free_disk(struct gendisk *disk) { struct mddev *mddev = disk->private_data; + percpu_ref_exit(&mddev->writes_pending); mddev_free(mddev); } From a57cc2dbb3738930d9cb361b9b473f90c8ede0b8 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Wed, 29 Mar 2023 08:22:07 -0700 Subject: [PATCH 075/173] thermal: intel: int340x: processor_thermal: Fix additional deadlock Commit 52f04f10b900 ("thermal: intel: int340x: processor_thermal: Fix deadlock") addressed deadlock issue during user space trip update. But it missed a case when thermal zone device is disabled when user writes 0. Call to thermal_zone_device_disable() also causes deadlock as it also tries to lock tz->lock, which is already claimed by trip_point_temp_store() in the thermal core code. Remove call to thermal_zone_device_disable() in the function sys_set_trip_temp(), which is called from trip_point_temp_store(). Fixes: 52f04f10b900 ("thermal: intel: int340x: processor_thermal: Fix deadlock") Signed-off-by: Srinivas Pandruvada Cc: 6.2+ # 6.2+ Signed-off-by: Rafael J. Wysocki --- .../thermal/intel/int340x_thermal/processor_thermal_device_pci.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/thermal/intel/int340x_thermal/processor_thermal_device_pci.c b/drivers/thermal/intel/int340x_thermal/processor_thermal_device_pci.c index 90526f46c9b1..d71ee50e7878 100644 --- a/drivers/thermal/intel/int340x_thermal/processor_thermal_device_pci.c +++ b/drivers/thermal/intel/int340x_thermal/processor_thermal_device_pci.c @@ -153,7 +153,6 @@ static int sys_set_trip_temp(struct thermal_zone_device *tzd, int trip, int temp cancel_delayed_work_sync(&pci_info->work); proc_thermal_mmio_write(pci_info, PROC_THERMAL_MMIO_INT_ENABLE_0, 0); proc_thermal_mmio_write(pci_info, PROC_THERMAL_MMIO_THRES_0, 0); - thermal_zone_device_disable(tzd); pci_info->stored_thres = 0; return 0; } From 1aa866931b8026a0dd636e9ef7b5c5dfb4cc5ce8 Mon Sep 17 00:00:00 2001 From: Conor Dooley Date: Fri, 24 Mar 2023 10:05:38 +0000 Subject: [PATCH 076/173] RISC-V: add non-alternative fallback for riscv_has_extension_[un]likely() The has_fpu() check, which in turn calls riscv_has_extension_likely(), relies on alternatives to figure out whether the system has an FPU. As a result, it will malfunction on XIP kernels, as they do not support the alternatives mechanism. When alternatives support is not present, fall back to using __riscv_isa_extension_available() in riscv_has_extension_[un]likely() instead stead, which handily takes the same argument, so that kernels that do not support alternatives can accurately report the presence of FPU support. Fixes: 702e64550b12 ("riscv: fpu: switch has_fpu() to riscv_has_extension_likely()") Link: https://lore.kernel.org/all/ad445951-3d13-4644-94d9-e0989cda39c3@spud/ Signed-off-by: Conor Dooley Reviewed-by: Andrew Jones Reviewed-by: Jason A. Donenfeld Link: https://lore.kernel.org/r/20230324100538.3514663-2-conor.dooley@microchip.com Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/hwcap.h | 50 ++++++++++++++++++++-------------- 1 file changed, 30 insertions(+), 20 deletions(-) diff --git a/arch/riscv/include/asm/hwcap.h b/arch/riscv/include/asm/hwcap.h index e3021b2590de..6263a0de1c6a 100644 --- a/arch/riscv/include/asm/hwcap.h +++ b/arch/riscv/include/asm/hwcap.h @@ -57,18 +57,31 @@ struct riscv_isa_ext_data { unsigned int isa_ext_id; }; +unsigned long riscv_isa_extension_base(const unsigned long *isa_bitmap); + +#define riscv_isa_extension_mask(ext) BIT_MASK(RISCV_ISA_EXT_##ext) + +bool __riscv_isa_extension_available(const unsigned long *isa_bitmap, int bit); +#define riscv_isa_extension_available(isa_bitmap, ext) \ + __riscv_isa_extension_available(isa_bitmap, RISCV_ISA_EXT_##ext) + static __always_inline bool riscv_has_extension_likely(const unsigned long ext) { compiletime_assert(ext < RISCV_ISA_EXT_MAX, "ext must be < RISCV_ISA_EXT_MAX"); - asm_volatile_goto( - ALTERNATIVE("j %l[l_no]", "nop", 0, %[ext], 1) - : - : [ext] "i" (ext) - : - : l_no); + if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE)) { + asm_volatile_goto( + ALTERNATIVE("j %l[l_no]", "nop", 0, %[ext], 1) + : + : [ext] "i" (ext) + : + : l_no); + } else { + if (!__riscv_isa_extension_available(NULL, ext)) + goto l_no; + } return true; l_no: @@ -81,26 +94,23 @@ riscv_has_extension_unlikely(const unsigned long ext) compiletime_assert(ext < RISCV_ISA_EXT_MAX, "ext must be < RISCV_ISA_EXT_MAX"); - asm_volatile_goto( - ALTERNATIVE("nop", "j %l[l_yes]", 0, %[ext], 1) - : - : [ext] "i" (ext) - : - : l_yes); + if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE)) { + asm_volatile_goto( + ALTERNATIVE("nop", "j %l[l_yes]", 0, %[ext], 1) + : + : [ext] "i" (ext) + : + : l_yes); + } else { + if (__riscv_isa_extension_available(NULL, ext)) + goto l_yes; + } return false; l_yes: return true; } -unsigned long riscv_isa_extension_base(const unsigned long *isa_bitmap); - -#define riscv_isa_extension_mask(ext) BIT_MASK(RISCV_ISA_EXT_##ext) - -bool __riscv_isa_extension_available(const unsigned long *isa_bitmap, int bit); -#define riscv_isa_extension_available(isa_bitmap, ext) \ - __riscv_isa_extension_available(isa_bitmap, RISCV_ISA_EXT_##ext) - #endif #endif /* _ASM_RISCV_HWCAP_H */ From 1ee7fc3f4d0a93831a20d5566f203d5ad6d44de8 Mon Sep 17 00:00:00 2001 From: Conor Dooley Date: Fri, 24 Mar 2023 10:05:39 +0000 Subject: [PATCH 077/173] RISC-V: always select RISCV_ALTERNATIVE for non-xip kernels When moving switch_to's has_fpu() over to using riscv_has_extension_likely() rather than static branches, the FPU code gained a dependency on the alternatives framework. That dependency has now been removed, as riscv_has_extension_ikely() now contains a fallback path, using __riscv_isa_extension_available(), but if CONFIG_RISCV_ALTERNATIVE isn't selected when CONFIG_FPU is, has_fpu() checks will not benefit from the "fast path" that the alternatives framework provides. We want to ensure that alternatives are available whenever riscv_has_extension_[un]likely() is used, rather than silently falling back to the slow path, but rather than rely on selecting RISCV_ALTERNATIVE in the myriad of locations that may use riscv_has_extension_[un]likely(), select it (almost) always instead by adding it to the main RISCV config entry. xip kernels cannot make use of the alternatives framework, so it is not enabled for those configurations, although this is the status quo. All current sites that select RISCV_ALTERNATIVE are converted to dependencies on the option instead. The explicit dependencies on !XIP_KERNEL can be dropped, as RISCV_ALTERNATIVE is not user selectable. Fixes: 702e64550b12 ("riscv: fpu: switch has_fpu() to riscv_has_extension_likely()") Link: https://lore.kernel.org/all/ZBruFRwt3rUVngPu@zx2c4.com/ Reported-by: Jason A. Donenfeld Signed-off-by: Conor Dooley Reviewed-by: Andrew Jones Reviewed-by: Jason A. Donenfeld Link: https://lore.kernel.org/r/20230324100538.3514663-3-conor.dooley@microchip.com Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig | 12 ++++++------ arch/riscv/Kconfig.erratas | 6 ++---- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index c5e42cc37604..2f6976418d0a 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -126,6 +126,7 @@ config RISCV select OF_IRQ select PCI_DOMAINS_GENERIC if PCI select PCI_MSI if PCI + select RISCV_ALTERNATIVE if !XIP_KERNEL select RISCV_INTC select RISCV_TIMER if RISCV_SBI select SIFIVE_PLIC @@ -401,9 +402,8 @@ config RISCV_ISA_C config RISCV_ISA_SVPBMT bool "SVPBMT extension support" depends on 64BIT && MMU - depends on !XIP_KERNEL + depends on RISCV_ALTERNATIVE default y - select RISCV_ALTERNATIVE help Adds support to dynamically detect the presence of the SVPBMT ISA-extension (Supervisor-mode: page-based memory types) and @@ -428,8 +428,8 @@ config TOOLCHAIN_HAS_ZBB config RISCV_ISA_ZBB bool "Zbb extension support for bit manipulation instructions" depends on TOOLCHAIN_HAS_ZBB - depends on !XIP_KERNEL && MMU - select RISCV_ALTERNATIVE + depends on MMU + depends on RISCV_ALTERNATIVE default y help Adds support to dynamically detect the presence of the ZBB @@ -443,9 +443,9 @@ config RISCV_ISA_ZBB config RISCV_ISA_ZICBOM bool "Zicbom extension support for non-coherent DMA operation" - depends on !XIP_KERNEL && MMU + depends on MMU + depends on RISCV_ALTERNATIVE default y - select RISCV_ALTERNATIVE select RISCV_DMA_NONCOHERENT help Adds support to dynamically detect the presence of the ZICBOM diff --git a/arch/riscv/Kconfig.erratas b/arch/riscv/Kconfig.erratas index 69621ae6d647..0c8f4652cd82 100644 --- a/arch/riscv/Kconfig.erratas +++ b/arch/riscv/Kconfig.erratas @@ -2,8 +2,7 @@ menu "CPU errata selection" config ERRATA_SIFIVE bool "SiFive errata" - depends on !XIP_KERNEL - select RISCV_ALTERNATIVE + depends on RISCV_ALTERNATIVE help All SiFive errata Kconfig depend on this Kconfig. Disabling this Kconfig will disable all SiFive errata. Please say "Y" @@ -35,8 +34,7 @@ config ERRATA_SIFIVE_CIP_1200 config ERRATA_THEAD bool "T-HEAD errata" - depends on !XIP_KERNEL - select RISCV_ALTERNATIVE + depends on RISCV_ALTERNATIVE help All T-HEAD errata Kconfig depend on this Kconfig. Disabling this Kconfig will disable all T-HEAD errata. Please say "Y" From 88eaba80328b31ef81813a1207b4056efd7006a6 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Mon, 20 Mar 2023 15:33:34 +0200 Subject: [PATCH 078/173] nvme-tcp: fix a possible UAF when failing to allocate an io queue When we allocate a nvme-tcp queue, we set the data_ready callback before we actually need to use it. This creates the potential that if a stray controller sends us data on the socket before we connect, we can trigger the io_work and start consuming the socket. In this case reported: we failed to allocate one of the io queues, and as we start releasing the queues that we already allocated, we get a UAF [1] from the io_work which is running before it should really. Fix this by setting the socket ops callbacks only before we start the queue, so that we can't accidentally schedule the io_work in the initialization phase before the queue started. While we are at it, rename nvme_tcp_restore_sock_calls to pair with nvme_tcp_setup_sock_ops. [1]: [16802.107284] nvme nvme4: starting error recovery [16802.109166] nvme nvme4: Reconnecting in 10 seconds... [16812.173535] nvme nvme4: failed to connect socket: -111 [16812.173745] nvme nvme4: Failed reconnect attempt 1 [16812.173747] nvme nvme4: Reconnecting in 10 seconds... [16822.413555] nvme nvme4: failed to connect socket: -111 [16822.413762] nvme nvme4: Failed reconnect attempt 2 [16822.413765] nvme nvme4: Reconnecting in 10 seconds... [16832.661274] nvme nvme4: creating 32 I/O queues. [16833.919887] BUG: kernel NULL pointer dereference, address: 0000000000000088 [16833.920068] nvme nvme4: Failed reconnect attempt 3 [16833.920094] #PF: supervisor write access in kernel mode [16833.920261] nvme nvme4: Reconnecting in 10 seconds... [16833.920368] #PF: error_code(0x0002) - not-present page [16833.921086] Workqueue: nvme_tcp_wq nvme_tcp_io_work [nvme_tcp] [16833.921191] RIP: 0010:_raw_spin_lock_bh+0x17/0x30 ... [16833.923138] Call Trace: [16833.923271] [16833.923402] lock_sock_nested+0x1e/0x50 [16833.923545] nvme_tcp_try_recv+0x40/0xa0 [nvme_tcp] [16833.923685] nvme_tcp_io_work+0x68/0xa0 [nvme_tcp] [16833.923824] process_one_work+0x1e8/0x390 [16833.923969] worker_thread+0x53/0x3d0 [16833.924104] ? process_one_work+0x390/0x390 [16833.924240] kthread+0x124/0x150 [16833.924376] ? set_kthread_struct+0x50/0x50 [16833.924518] ret_from_fork+0x1f/0x30 [16833.924655] Reported-by: Yanjun Zhang Signed-off-by: Sagi Grimberg Tested-by: Yanjun Zhang Signed-off-by: Christoph Hellwig --- drivers/nvme/host/tcp.c | 46 +++++++++++++++++++++++------------------ 1 file changed, 26 insertions(+), 20 deletions(-) diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 42c0598c31f2..49c9e7bc9116 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -1620,22 +1620,7 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl, int qid) if (ret) goto err_init_connect; - queue->rd_enabled = true; set_bit(NVME_TCP_Q_ALLOCATED, &queue->flags); - nvme_tcp_init_recv_ctx(queue); - - write_lock_bh(&queue->sock->sk->sk_callback_lock); - queue->sock->sk->sk_user_data = queue; - queue->state_change = queue->sock->sk->sk_state_change; - queue->data_ready = queue->sock->sk->sk_data_ready; - queue->write_space = queue->sock->sk->sk_write_space; - queue->sock->sk->sk_data_ready = nvme_tcp_data_ready; - queue->sock->sk->sk_state_change = nvme_tcp_state_change; - queue->sock->sk->sk_write_space = nvme_tcp_write_space; -#ifdef CONFIG_NET_RX_BUSY_POLL - queue->sock->sk->sk_ll_usec = 1; -#endif - write_unlock_bh(&queue->sock->sk->sk_callback_lock); return 0; @@ -1655,7 +1640,7 @@ err_destroy_mutex: return ret; } -static void nvme_tcp_restore_sock_calls(struct nvme_tcp_queue *queue) +static void nvme_tcp_restore_sock_ops(struct nvme_tcp_queue *queue) { struct socket *sock = queue->sock; @@ -1670,7 +1655,7 @@ static void nvme_tcp_restore_sock_calls(struct nvme_tcp_queue *queue) static void __nvme_tcp_stop_queue(struct nvme_tcp_queue *queue) { kernel_sock_shutdown(queue->sock, SHUT_RDWR); - nvme_tcp_restore_sock_calls(queue); + nvme_tcp_restore_sock_ops(queue); cancel_work_sync(&queue->io_work); } @@ -1688,21 +1673,42 @@ static void nvme_tcp_stop_queue(struct nvme_ctrl *nctrl, int qid) mutex_unlock(&queue->queue_lock); } +static void nvme_tcp_setup_sock_ops(struct nvme_tcp_queue *queue) +{ + write_lock_bh(&queue->sock->sk->sk_callback_lock); + queue->sock->sk->sk_user_data = queue; + queue->state_change = queue->sock->sk->sk_state_change; + queue->data_ready = queue->sock->sk->sk_data_ready; + queue->write_space = queue->sock->sk->sk_write_space; + queue->sock->sk->sk_data_ready = nvme_tcp_data_ready; + queue->sock->sk->sk_state_change = nvme_tcp_state_change; + queue->sock->sk->sk_write_space = nvme_tcp_write_space; +#ifdef CONFIG_NET_RX_BUSY_POLL + queue->sock->sk->sk_ll_usec = 1; +#endif + write_unlock_bh(&queue->sock->sk->sk_callback_lock); +} + static int nvme_tcp_start_queue(struct nvme_ctrl *nctrl, int idx) { struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl); + struct nvme_tcp_queue *queue = &ctrl->queues[idx]; int ret; + queue->rd_enabled = true; + nvme_tcp_init_recv_ctx(queue); + nvme_tcp_setup_sock_ops(queue); + if (idx) ret = nvmf_connect_io_queue(nctrl, idx); else ret = nvmf_connect_admin_queue(nctrl); if (!ret) { - set_bit(NVME_TCP_Q_LIVE, &ctrl->queues[idx].flags); + set_bit(NVME_TCP_Q_LIVE, &queue->flags); } else { - if (test_bit(NVME_TCP_Q_ALLOCATED, &ctrl->queues[idx].flags)) - __nvme_tcp_stop_queue(&ctrl->queues[idx]); + if (test_bit(NVME_TCP_Q_ALLOCATED, &queue->flags)) + __nvme_tcp_stop_queue(queue); dev_err(nctrl->device, "failed to connect queue: %d ret=%d\n", idx, ret); } From fd30d1cdcc4ff405fc54765edf2e11b03f2ed4f3 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Thu, 30 Mar 2023 06:52:38 -0600 Subject: [PATCH 079/173] io_uring: fix poll/netmsg alloc caches We increase cache->nr_cached when we free into the cache but don't decrease when we take from it, so in some time we'll get an empty cache with cache->nr_cached larger than IO_ALLOC_CACHE_MAX, that fails io_alloc_cache_put() and effectively disables caching. Fixes: 9b797a37c4bd8 ("io_uring: add abstraction around apoll cache") Cc: stable@vger.kernel.org Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- io_uring/alloc_cache.h | 1 + 1 file changed, 1 insertion(+) diff --git a/io_uring/alloc_cache.h b/io_uring/alloc_cache.h index 729793ae9712..c2cde88aeed5 100644 --- a/io_uring/alloc_cache.h +++ b/io_uring/alloc_cache.h @@ -27,6 +27,7 @@ static inline struct io_cache_entry *io_alloc_cache_get(struct io_alloc_cache *c struct hlist_node *node = cache->list.first; hlist_del(node); + cache->nr_cached--; return container_of(node, struct io_cache_entry, node); } From ae817e618d4b5d221daae34d32a39476e4bdcb36 Mon Sep 17 00:00:00 2001 From: David Arcari Date: Thu, 30 Mar 2023 09:42:18 -0400 Subject: [PATCH 080/173] thermal: intel: powerclamp: Fix cpumask and max_idle module parameters When cpumask is specified as a module parameter the value is overwritten by the module init routine. This can easily be fixed by checking to see if the mask has already been allocated in the init routine. When max_idle is specified as a module parameter a panic will occur. The problem is that the idle_injection_cpu_mask is not allocated until the module init routine executes. This can easily be fixed by allocating the cpumask if it's not already allocated. Fixes: ebf519710218 ("thermal: intel: powerclamp: Add two module parameters") Signed-off-by: David Arcari Reviewed-by: Srinivas Pandruvada Signed-off-by: Rafael J. Wysocki --- drivers/thermal/intel/intel_powerclamp.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/thermal/intel/intel_powerclamp.c b/drivers/thermal/intel/intel_powerclamp.c index c7ba5680cd48..91fc7e239497 100644 --- a/drivers/thermal/intel/intel_powerclamp.c +++ b/drivers/thermal/intel/intel_powerclamp.c @@ -235,6 +235,12 @@ static int max_idle_set(const char *arg, const struct kernel_param *kp) goto skip_limit_set; } + if (!cpumask_available(idle_injection_cpu_mask)) { + ret = allocate_copy_idle_injection_mask(cpu_present_mask); + if (ret) + goto skip_limit_set; + } + if (check_invalid(idle_injection_cpu_mask, new_max_idle)) { ret = -EINVAL; goto skip_limit_set; @@ -791,7 +797,8 @@ static int __init powerclamp_init(void) return retval; mutex_lock(&powerclamp_lock); - retval = allocate_copy_idle_injection_mask(cpu_present_mask); + if (!cpumask_available(idle_injection_cpu_mask)) + retval = allocate_copy_idle_injection_mask(cpu_present_mask); mutex_unlock(&powerclamp_lock); if (retval) From 179a88a8558bbf42991d361595281f3e45d7edfc Mon Sep 17 00:00:00 2001 From: David Disseldorp Date: Wed, 29 Mar 2023 22:24:06 +0200 Subject: [PATCH 081/173] cifs: fix DFS traversal oops without CONFIG_CIFS_DFS_UPCALL When compiled with CONFIG_CIFS_DFS_UPCALL disabled, cifs_dfs_d_automount is NULL. cifs.ko logic for mapping CIFS_FATTR_DFS_REFERRAL attributes to S_AUTOMOUNT and corresponding dentry flags is retained regardless of CONFIG_CIFS_DFS_UPCALL, leading to a NULL pointer dereference in VFS follow_automount() when traversing a DFS referral link: BUG: kernel NULL pointer dereference, address: 0000000000000000 ... Call Trace: __traverse_mounts+0xb5/0x220 ? cifs_revalidate_mapping+0x65/0xc0 [cifs] step_into+0x195/0x610 ? lookup_fast+0xe2/0xf0 path_lookupat+0x64/0x140 filename_lookup+0xc2/0x140 ? __create_object+0x299/0x380 ? kmem_cache_alloc+0x119/0x220 ? user_path_at_empty+0x31/0x50 user_path_at_empty+0x31/0x50 __x64_sys_chdir+0x2a/0xd0 ? exit_to_user_mode_prepare+0xca/0x100 do_syscall_64+0x42/0x90 entry_SYSCALL_64_after_hwframe+0x72/0xdc This fix adds an inline cifs_dfs_d_automount() {return -EREMOTE} handler when CONFIG_CIFS_DFS_UPCALL is disabled. An alternative would be to avoid flagging S_AUTOMOUNT, etc. without CONFIG_CIFS_DFS_UPCALL. This approach was chosen as it provides more control over the error path. Signed-off-by: David Disseldorp Cc: stable@vger.kernel.org Reviewed-by: Paulo Alcantara (SUSE) Reviewed-by: Ronnie Sahlberg Signed-off-by: Steve French --- fs/cifs/cifsfs.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index 71fe0a0a7992..415176b2cf32 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h @@ -124,7 +124,10 @@ extern const struct dentry_operations cifs_ci_dentry_ops; #ifdef CONFIG_CIFS_DFS_UPCALL extern struct vfsmount *cifs_dfs_d_automount(struct path *path); #else -#define cifs_dfs_d_automount NULL +static inline struct vfsmount *cifs_dfs_d_automount(struct path *path) +{ + return ERR_PTR(-EREMOTE); +} #endif /* Functions related to symlinks */ From 6cc041e90c178955219dcee4030bd5423f800f10 Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Wed, 29 Mar 2023 17:14:21 -0300 Subject: [PATCH 082/173] cifs: avoid races in parallel reconnects in smb1 Prevent multiple threads of doing negotiate, session setup and tree connect by holding @ses->session_mutex in cifs_reconnect_tcon() while reconnecting session and tcon. Signed-off-by: Paulo Alcantara (SUSE) Reviewed-by: Ronnie Sahlberg Signed-off-by: Steve French --- fs/cifs/cifssmb.c | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 38a697eca305..c9d57ba84be4 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -71,7 +71,7 @@ cifs_reconnect_tcon(struct cifs_tcon *tcon, int smb_command) int rc; struct cifs_ses *ses; struct TCP_Server_Info *server; - struct nls_table *nls_codepage; + struct nls_table *nls_codepage = NULL; /* * SMBs NegProt, SessSetup, uLogoff do not have tcon yet so check for @@ -99,6 +99,7 @@ cifs_reconnect_tcon(struct cifs_tcon *tcon, int smb_command) } spin_unlock(&tcon->tc_lock); +again: rc = cifs_wait_for_server_reconnect(server, tcon->retry); if (rc) return rc; @@ -110,8 +111,7 @@ cifs_reconnect_tcon(struct cifs_tcon *tcon, int smb_command) } spin_unlock(&ses->chan_lock); - nls_codepage = load_nls_default(); - + mutex_lock(&ses->session_mutex); /* * Recheck after acquire mutex. If another thread is negotiating * and the server never sends an answer the socket will be closed @@ -120,29 +120,38 @@ cifs_reconnect_tcon(struct cifs_tcon *tcon, int smb_command) spin_lock(&server->srv_lock); if (server->tcpStatus == CifsNeedReconnect) { spin_unlock(&server->srv_lock); + mutex_lock(&ses->session_mutex); + + if (tcon->retry) + goto again; rc = -EHOSTDOWN; goto out; } spin_unlock(&server->srv_lock); + nls_codepage = load_nls_default(); + /* * need to prevent multiple threads trying to simultaneously * reconnect the same SMB session */ + spin_lock(&ses->ses_lock); spin_lock(&ses->chan_lock); - if (!cifs_chan_needs_reconnect(ses, server)) { + if (!cifs_chan_needs_reconnect(ses, server) && + ses->ses_status == SES_GOOD) { spin_unlock(&ses->chan_lock); + spin_unlock(&ses->ses_lock); /* this means that we only need to tree connect */ if (tcon->need_reconnect) goto skip_sess_setup; - rc = -EHOSTDOWN; + mutex_unlock(&ses->session_mutex); goto out; } spin_unlock(&ses->chan_lock); + spin_unlock(&ses->ses_lock); - mutex_lock(&ses->session_mutex); rc = cifs_negotiate_protocol(0, ses, server); if (!rc) rc = cifs_setup_session(0, ses, server, nls_codepage); From 09ba47b44d26b475bbdf9c80db9e0193d2b58956 Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Wed, 29 Mar 2023 17:14:22 -0300 Subject: [PATCH 083/173] cifs: prevent infinite recursion in CIFSGetDFSRefer() We can't call smb_init() in CIFSGetDFSRefer() as cifs_reconnect_tcon() may end up calling CIFSGetDFSRefer() again to get new DFS referrals and thus causing an infinite recursion. Signed-off-by: Paulo Alcantara (SUSE) Reviewed-by: Ronnie Sahlberg Cc: stable@vger.kernel.org # 6.2 Signed-off-by: Steve French --- fs/cifs/cifssmb.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index c9d57ba84be4..0d30b17494e4 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -4382,8 +4382,13 @@ CIFSGetDFSRefer(const unsigned int xid, struct cifs_ses *ses, return -ENODEV; getDFSRetry: - rc = smb_init(SMB_COM_TRANSACTION2, 15, ses->tcon_ipc, (void **) &pSMB, - (void **) &pSMBr); + /* + * Use smb_init_no_reconnect() instead of smb_init() as + * CIFSGetDFSRefer() may be called from cifs_reconnect_tcon() and thus + * causing an infinite recursion. + */ + rc = smb_init_no_reconnect(SMB_COM_TRANSACTION2, 15, ses->tcon_ipc, + (void **)&pSMB, (void **)&pSMBr); if (rc) return rc; From e03677100707f849f01d8faf07ee58b4e56cdbf1 Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Wed, 29 Mar 2023 17:14:23 -0300 Subject: [PATCH 084/173] cifs: get rid of dead check in smb2_reconnect() The SMB2_IOCTL check in the switch statement will never be true as we return earlier from smb2_reconnect() if @smb2_command == SMB2_IOCTL. Signed-off-by: Paulo Alcantara (SUSE) Reviewed-by: Ronnie Sahlberg Signed-off-by: Steve French --- fs/cifs/smb2pdu.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 6bd2aa6af18f..2b92132097dc 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -310,7 +310,6 @@ out: case SMB2_READ: case SMB2_WRITE: case SMB2_LOCK: - case SMB2_IOCTL: case SMB2_QUERY_DIRECTORY: case SMB2_CHANGE_NOTIFY: case SMB2_QUERY_INFO: From f9d2b1e146e0f82f3d04629afd92698522058361 Mon Sep 17 00:00:00 2001 From: Bobby Eshleman Date: Wed, 29 Mar 2023 16:51:58 +0000 Subject: [PATCH 085/173] virtio/vsock: fix leaks due to missing skb owner This patch sets the skb owner in the recv and send path for virtio. For the send path, this solves the leak caused when virtio_transport_purge_skbs() finds skb->sk is always NULL and therefore never matches it with the current socket. Setting the owner upon allocation fixes this. For the recv path, this ensures correctness of accounting and also correct transfer of ownership in vsock_loopback (when skbs are sent from one socket and received by another). Fixes: 71dc9ec9ac7d ("virtio/vsock: replace virtio_vsock_pkt with sk_buff") Signed-off-by: Bobby Eshleman Reported-by: Cong Wang Link: https://lore.kernel.org/all/ZCCbATwov4U+GBUv@pop-os.localdomain/ Reviewed-by: Stefano Garzarella Signed-off-by: David S. Miller --- net/vmw_vsock/virtio_transport_common.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c index 37934dfe72f4..ee78b4082ef9 100644 --- a/net/vmw_vsock/virtio_transport_common.c +++ b/net/vmw_vsock/virtio_transport_common.c @@ -94,6 +94,11 @@ virtio_transport_alloc_skb(struct virtio_vsock_pkt_info *info, info->op, info->flags); + if (info->vsk && !skb_set_owner_sk_safe(skb, sk_vsock(info->vsk))) { + WARN_ONCE(1, "failed to allocate skb on vsock socket with sk_refcnt == 0\n"); + goto out; + } + return skb; out: @@ -1303,6 +1308,11 @@ void virtio_transport_recv_pkt(struct virtio_transport *t, goto free_pkt; } + if (!skb_set_owner_sk_safe(skb, sk)) { + WARN_ONCE(1, "receiving vsock socket has sk_refcnt == 0\n"); + goto free_pkt; + } + vsk = vsock_sk(sk); lock_sock(sk); From c7d624520c1bd4e42d8ceb8283d6505fc90acccb Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Wed, 29 Mar 2023 21:47:19 +0800 Subject: [PATCH 086/173] iommu/vt-d: Remove unnecessary locking in intel_irq_remapping_alloc() The global rwsem dmar_global_lock was introduced by commit 3a5670e8ac932 ("iommu/vt-d: Introduce a rwsem to protect global data structures"). It is used to protect DMAR related global data from DMAR hotplug operations. Using dmar_global_lock in intel_irq_remapping_alloc() is unnecessary as the DMAR global data structures are not touched there. Remove it to avoid below lockdep warning. ====================================================== WARNING: possible circular locking dependency detected 6.3.0-rc2 #468 Not tainted ------------------------------------------------------ swapper/0/1 is trying to acquire lock: ff1db4cb40178698 (&domain->mutex){+.+.}-{3:3}, at: __irq_domain_alloc_irqs+0x3b/0xa0 but task is already holding lock: ffffffffa0c1cdf0 (dmar_global_lock){++++}-{3:3}, at: intel_iommu_init+0x58e/0x880 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #1 (dmar_global_lock){++++}-{3:3}: lock_acquire+0xd6/0x320 down_read+0x42/0x180 intel_irq_remapping_alloc+0xad/0x750 mp_irqdomain_alloc+0xb8/0x2b0 irq_domain_alloc_irqs_locked+0x12f/0x2d0 __irq_domain_alloc_irqs+0x56/0xa0 alloc_isa_irq_from_domain.isra.7+0xa0/0xe0 mp_map_pin_to_irq+0x1dc/0x330 setup_IO_APIC+0x128/0x210 apic_intr_mode_init+0x67/0x110 x86_late_time_init+0x24/0x40 start_kernel+0x41e/0x7e0 secondary_startup_64_no_verify+0xe0/0xeb -> #0 (&domain->mutex){+.+.}-{3:3}: check_prevs_add+0x160/0xef0 __lock_acquire+0x147d/0x1950 lock_acquire+0xd6/0x320 __mutex_lock+0x9c/0xfc0 __irq_domain_alloc_irqs+0x3b/0xa0 dmar_alloc_hwirq+0x9e/0x120 iommu_pmu_register+0x11d/0x200 intel_iommu_init+0x5de/0x880 pci_iommu_init+0x12/0x40 do_one_initcall+0x65/0x350 kernel_init_freeable+0x3ca/0x610 kernel_init+0x1a/0x140 ret_from_fork+0x29/0x50 other info that might help us debug this: Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(dmar_global_lock); lock(&domain->mutex); lock(dmar_global_lock); lock(&domain->mutex); *** DEADLOCK *** Fixes: 9dbb8e3452ab ("irqdomain: Switch to per-domain locking") Reviewed-by: Jacob Pan Tested-by: Jason Gunthorpe Signed-off-by: Lu Baolu Link: https://lore.kernel.org/r/20230314051836.23817-1-baolu.lu@linux.intel.com Link: https://lore.kernel.org/r/20230329134721.469447-2-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/irq_remapping.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/iommu/intel/irq_remapping.c b/drivers/iommu/intel/irq_remapping.c index 6d01fa078c36..df9e261af0b5 100644 --- a/drivers/iommu/intel/irq_remapping.c +++ b/drivers/iommu/intel/irq_remapping.c @@ -311,14 +311,12 @@ static int set_ioapic_sid(struct irte *irte, int apic) if (!irte) return -1; - down_read(&dmar_global_lock); for (i = 0; i < MAX_IO_APICS; i++) { if (ir_ioapic[i].iommu && ir_ioapic[i].id == apic) { sid = (ir_ioapic[i].bus << 8) | ir_ioapic[i].devfn; break; } } - up_read(&dmar_global_lock); if (sid == 0) { pr_warn("Failed to set source-id of IOAPIC (%d)\n", apic); @@ -338,14 +336,12 @@ static int set_hpet_sid(struct irte *irte, u8 id) if (!irte) return -1; - down_read(&dmar_global_lock); for (i = 0; i < MAX_HPET_TBS; i++) { if (ir_hpet[i].iommu && ir_hpet[i].id == id) { sid = (ir_hpet[i].bus << 8) | ir_hpet[i].devfn; break; } } - up_read(&dmar_global_lock); if (sid == 0) { pr_warn("Failed to set source-id of HPET block (%d)\n", id); @@ -1339,9 +1335,7 @@ static int intel_irq_remapping_alloc(struct irq_domain *domain, if (!data) goto out_free_parent; - down_read(&dmar_global_lock); index = alloc_irte(iommu, &data->irq_2_iommu, nr_irqs); - up_read(&dmar_global_lock); if (index < 0) { pr_warn("Failed to allocate IRTE\n"); kfree(data); From bfd3c6b9fa4a1dc78139dd1621d5bea321ffa69d Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Wed, 29 Mar 2023 21:47:20 +0800 Subject: [PATCH 087/173] iommu/vt-d: Allow zero SAGAW if second-stage not supported The VT-d spec states (in section 11.4.2) that hardware implementations reporting second-stage translation support (SSTS) field as Clear also report the SAGAW field as 0. Fix an inappropriate check in alloc_iommu(). Fixes: 792fb43ce2c9 ("iommu/vt-d: Enable Intel IOMMU scalable mode by default") Suggested-by: Raghunathan Srinivasan Reviewed-by: Kevin Tian Signed-off-by: Jacob Pan Signed-off-by: Lu Baolu Link: https://lore.kernel.org/r/20230318024824.124542-1-baolu.lu@linux.intel.com Link: https://lore.kernel.org/r/20230329134721.469447-3-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/dmar.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c index 6acfe879589c..23828d189c2a 100644 --- a/drivers/iommu/intel/dmar.c +++ b/drivers/iommu/intel/dmar.c @@ -1071,7 +1071,8 @@ static int alloc_iommu(struct dmar_drhd_unit *drhd) } err = -EINVAL; - if (cap_sagaw(iommu->cap) == 0) { + if (!cap_sagaw(iommu->cap) && + (!ecap_smts(iommu->ecap) || ecap_slts(iommu->ecap))) { pr_info("%s: No supported address widths. Not attempting DMA translation.\n", iommu->name); drhd->ignored = 1; From 16812c96550c30a8d5743167ef4e462d6fbe7472 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Wed, 29 Mar 2023 21:47:21 +0800 Subject: [PATCH 088/173] iommu/vt-d: Fix an IOMMU perfmon warning when CPU hotplug A warning can be triggered when hotplug CPU 0. $ echo 0 > /sys/devices/system/cpu/cpu0/online ------------[ cut here ]------------ Voluntary context switch within RCU read-side critical section! WARNING: CPU: 0 PID: 19 at kernel/rcu/tree_plugin.h:318 rcu_note_context_switch+0x4f4/0x580 RIP: 0010:rcu_note_context_switch+0x4f4/0x580 Call Trace: ? perf_event_update_userpage+0x104/0x150 __schedule+0x8d/0x960 ? perf_event_set_state.part.82+0x11/0x50 schedule+0x44/0xb0 schedule_timeout+0x226/0x310 ? __perf_event_disable+0x64/0x1a0 ? _raw_spin_unlock+0x14/0x30 wait_for_completion+0x94/0x130 __wait_rcu_gp+0x108/0x130 synchronize_rcu+0x67/0x70 ? invoke_rcu_core+0xb0/0xb0 ? __bpf_trace_rcu_stall_warning+0x10/0x10 perf_pmu_migrate_context+0x121/0x370 iommu_pmu_cpu_offline+0x6a/0xa0 ? iommu_pmu_del+0x1e0/0x1e0 cpuhp_invoke_callback+0x129/0x510 cpuhp_thread_fun+0x94/0x150 smpboot_thread_fn+0x183/0x220 ? sort_range+0x20/0x20 kthread+0xe6/0x110 ? kthread_complete_and_exit+0x20/0x20 ret_from_fork+0x1f/0x30 ---[ end trace 0000000000000000 ]--- The synchronize_rcu() will be invoked in the perf_pmu_migrate_context(), when migrating a PMU to a new CPU. However, the current for_each_iommu() is within RCU read-side critical section. Two methods were considered to fix the issue. - Use the dmar_global_lock to replace the RCU read lock when going through the drhd list. But it triggers a lockdep warning. - Use the cpuhp_setup_state_multi() to set up a dedicated state for each IOMMU PMU. The lock can be avoided. The latter method is implemented in this patch. Since each IOMMU PMU has a dedicated state, add cpuhp_node and cpu in struct iommu_pmu to track the state. The state can be dynamically allocated now. Remove the CPUHP_AP_PERF_X86_IOMMU_PERF_ONLINE. Fixes: 46284c6ceb5e ("iommu/vt-d: Support cpumask for IOMMU perfmon") Reported-by: Ammy Yi Signed-off-by: Kan Liang Link: https://lore.kernel.org/r/20230328182028.1366416-1-kan.liang@linux.intel.com Signed-off-by: Lu Baolu Link: https://lore.kernel.org/r/20230329134721.469447-4-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.h | 2 ++ drivers/iommu/intel/perfmon.c | 68 ++++++++++++++++++++++------------- include/linux/cpuhotplug.h | 1 - 3 files changed, 46 insertions(+), 25 deletions(-) diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h index d6df3b865812..694ab9b7d3e9 100644 --- a/drivers/iommu/intel/iommu.h +++ b/drivers/iommu/intel/iommu.h @@ -641,6 +641,8 @@ struct iommu_pmu { DECLARE_BITMAP(used_mask, IOMMU_PMU_IDX_MAX); struct perf_event *event_list[IOMMU_PMU_IDX_MAX]; unsigned char irq_name[16]; + struct hlist_node cpuhp_node; + int cpu; }; #define IOMMU_IRQ_ID_OFFSET_PRQ (DMAR_UNITS_SUPPORTED) diff --git a/drivers/iommu/intel/perfmon.c b/drivers/iommu/intel/perfmon.c index e17d9743a0d8..cf43e798eca4 100644 --- a/drivers/iommu/intel/perfmon.c +++ b/drivers/iommu/intel/perfmon.c @@ -773,19 +773,34 @@ static void iommu_pmu_unset_interrupt(struct intel_iommu *iommu) iommu->perf_irq = 0; } -static int iommu_pmu_cpu_online(unsigned int cpu) +static int iommu_pmu_cpu_online(unsigned int cpu, struct hlist_node *node) { + struct iommu_pmu *iommu_pmu = hlist_entry_safe(node, typeof(*iommu_pmu), cpuhp_node); + if (cpumask_empty(&iommu_pmu_cpu_mask)) cpumask_set_cpu(cpu, &iommu_pmu_cpu_mask); + if (cpumask_test_cpu(cpu, &iommu_pmu_cpu_mask)) + iommu_pmu->cpu = cpu; + return 0; } -static int iommu_pmu_cpu_offline(unsigned int cpu) +static int iommu_pmu_cpu_offline(unsigned int cpu, struct hlist_node *node) { - struct dmar_drhd_unit *drhd; - struct intel_iommu *iommu; - int target; + struct iommu_pmu *iommu_pmu = hlist_entry_safe(node, typeof(*iommu_pmu), cpuhp_node); + int target = cpumask_first(&iommu_pmu_cpu_mask); + + /* + * The iommu_pmu_cpu_mask has been updated when offline the CPU + * for the first iommu_pmu. Migrate the other iommu_pmu to the + * new target. + */ + if (target < nr_cpu_ids && target != iommu_pmu->cpu) { + perf_pmu_migrate_context(&iommu_pmu->pmu, cpu, target); + iommu_pmu->cpu = target; + return 0; + } if (!cpumask_test_and_clear_cpu(cpu, &iommu_pmu_cpu_mask)) return 0; @@ -795,45 +810,50 @@ static int iommu_pmu_cpu_offline(unsigned int cpu) if (target < nr_cpu_ids) cpumask_set_cpu(target, &iommu_pmu_cpu_mask); else - target = -1; + return 0; - rcu_read_lock(); - - for_each_iommu(iommu, drhd) { - if (!iommu->pmu) - continue; - perf_pmu_migrate_context(&iommu->pmu->pmu, cpu, target); - } - rcu_read_unlock(); + perf_pmu_migrate_context(&iommu_pmu->pmu, cpu, target); + iommu_pmu->cpu = target; return 0; } static int nr_iommu_pmu; +static enum cpuhp_state iommu_cpuhp_slot; static int iommu_pmu_cpuhp_setup(struct iommu_pmu *iommu_pmu) { int ret; - if (nr_iommu_pmu++) - return 0; + if (!nr_iommu_pmu) { + ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, + "driver/iommu/intel/perfmon:online", + iommu_pmu_cpu_online, + iommu_pmu_cpu_offline); + if (ret < 0) + return ret; + iommu_cpuhp_slot = ret; + } - ret = cpuhp_setup_state(CPUHP_AP_PERF_X86_IOMMU_PERF_ONLINE, - "driver/iommu/intel/perfmon:online", - iommu_pmu_cpu_online, - iommu_pmu_cpu_offline); - if (ret) - nr_iommu_pmu = 0; + ret = cpuhp_state_add_instance(iommu_cpuhp_slot, &iommu_pmu->cpuhp_node); + if (ret) { + if (!nr_iommu_pmu) + cpuhp_remove_multi_state(iommu_cpuhp_slot); + return ret; + } + nr_iommu_pmu++; - return ret; + return 0; } static void iommu_pmu_cpuhp_free(struct iommu_pmu *iommu_pmu) { + cpuhp_state_remove_instance(iommu_cpuhp_slot, &iommu_pmu->cpuhp_node); + if (--nr_iommu_pmu) return; - cpuhp_remove_state(CPUHP_AP_PERF_X86_IOMMU_PERF_ONLINE); + cpuhp_remove_multi_state(iommu_cpuhp_slot); } void iommu_pmu_register(struct intel_iommu *iommu) diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index c6fab004104a..5b2f8147d1ae 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -218,7 +218,6 @@ enum cpuhp_state { CPUHP_AP_PERF_X86_CQM_ONLINE, CPUHP_AP_PERF_X86_CSTATE_ONLINE, CPUHP_AP_PERF_X86_IDXD_ONLINE, - CPUHP_AP_PERF_X86_IOMMU_PERF_ONLINE, CPUHP_AP_PERF_S390_CF_ONLINE, CPUHP_AP_PERF_S390_SF_ONLINE, CPUHP_AP_PERF_ARM_CCI_ONLINE, From 44d807320000db0d0013372ad39b53e12d52f758 Mon Sep 17 00:00:00 2001 From: Ziyang Xuan Date: Thu, 30 Mar 2023 09:25:32 +0800 Subject: [PATCH 089/173] net: qrtr: Fix a refcount bug in qrtr_recvmsg() Syzbot reported a bug as following: refcount_t: addition on 0; use-after-free. ... RIP: 0010:refcount_warn_saturate+0x17c/0x1f0 lib/refcount.c:25 ... Call Trace: __refcount_add include/linux/refcount.h:199 [inline] __refcount_inc include/linux/refcount.h:250 [inline] refcount_inc include/linux/refcount.h:267 [inline] kref_get include/linux/kref.h:45 [inline] qrtr_node_acquire net/qrtr/af_qrtr.c:202 [inline] qrtr_node_lookup net/qrtr/af_qrtr.c:398 [inline] qrtr_send_resume_tx net/qrtr/af_qrtr.c:1003 [inline] qrtr_recvmsg+0x85f/0x990 net/qrtr/af_qrtr.c:1070 sock_recvmsg_nosec net/socket.c:1017 [inline] sock_recvmsg+0xe2/0x160 net/socket.c:1038 qrtr_ns_worker+0x170/0x1700 net/qrtr/ns.c:688 process_one_work+0x991/0x15c0 kernel/workqueue.c:2390 worker_thread+0x669/0x1090 kernel/workqueue.c:2537 It occurs in the concurrent scenario of qrtr_recvmsg() and qrtr_endpoint_unregister() as following: cpu0 cpu1 qrtr_recvmsg qrtr_endpoint_unregister qrtr_send_resume_tx qrtr_node_release qrtr_node_lookup mutex_lock(&qrtr_node_lock) spin_lock_irqsave(&qrtr_nodes_lock, ) refcount_dec_and_test(&node->ref) [node->ref == 0] radix_tree_lookup [node != NULL] __qrtr_node_release qrtr_node_acquire spin_lock_irqsave(&qrtr_nodes_lock, ) kref_get(&node->ref) [WARNING] ... mutex_unlock(&qrtr_node_lock) Use qrtr_node_lock to protect qrtr_node_lookup() implementation, this is actually improving the protection of node reference. Fixes: 0a7e0d0ef054 ("net: qrtr: Migrate node lookup tree to spinlock") Reported-by: syzbot+a7492efaa5d61b51db23@syzkaller.appspotmail.com Link: https://syzkaller.appspot.com/bug?extid=a7492efaa5d61b51db23 Signed-off-by: Ziyang Xuan Signed-off-by: David S. Miller --- net/qrtr/af_qrtr.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/qrtr/af_qrtr.c b/net/qrtr/af_qrtr.c index 5c2fb992803b..3a70255c8d02 100644 --- a/net/qrtr/af_qrtr.c +++ b/net/qrtr/af_qrtr.c @@ -393,10 +393,12 @@ static struct qrtr_node *qrtr_node_lookup(unsigned int nid) struct qrtr_node *node; unsigned long flags; + mutex_lock(&qrtr_node_lock); spin_lock_irqsave(&qrtr_nodes_lock, flags); node = radix_tree_lookup(&qrtr_nodes, nid); node = qrtr_node_acquire(node); spin_unlock_irqrestore(&qrtr_nodes_lock, flags); + mutex_unlock(&qrtr_node_lock); return node; } From 653a180957a85c3fc30320cc7e84f5dc913a64f8 Mon Sep 17 00:00:00 2001 From: Michael Sit Wei Hong Date: Thu, 30 Mar 2023 17:14:02 +0800 Subject: [PATCH 090/173] net: phylink: add phylink_expects_phy() method Provide phylink_expects_phy() to allow MAC drivers to check if it is expecting a PHY to attach to. Since fixed-linked setups do not need to attach to a PHY. Provides a boolean value as to if the MAC should expect a PHY. Returns true if a PHY is expected. Reviewed-by: Russell King (Oracle) Signed-off-by: Michael Sit Wei Hong Signed-off-by: David S. Miller --- drivers/net/phy/phylink.c | 19 +++++++++++++++++++ include/linux/phylink.h | 1 + 2 files changed, 20 insertions(+) diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c index 1a2f074685fa..30c166b33468 100644 --- a/drivers/net/phy/phylink.c +++ b/drivers/net/phy/phylink.c @@ -1586,6 +1586,25 @@ void phylink_destroy(struct phylink *pl) } EXPORT_SYMBOL_GPL(phylink_destroy); +/** + * phylink_expects_phy() - Determine if phylink expects a phy to be attached + * @pl: a pointer to a &struct phylink returned from phylink_create() + * + * When using fixed-link mode, or in-band mode with 1000base-X or 2500base-X, + * no PHY is needed. + * + * Returns true if phylink will be expecting a PHY. + */ +bool phylink_expects_phy(struct phylink *pl) +{ + if (pl->cfg_link_an_mode == MLO_AN_FIXED || + (pl->cfg_link_an_mode == MLO_AN_INBAND && + phy_interface_mode_is_8023z(pl->link_config.interface))) + return false; + return true; +} +EXPORT_SYMBOL_GPL(phylink_expects_phy); + static void phylink_phy_change(struct phy_device *phydev, bool up) { struct phylink *pl = phydev->phylink; diff --git a/include/linux/phylink.h b/include/linux/phylink.h index c492c26202b5..637698ed5cb6 100644 --- a/include/linux/phylink.h +++ b/include/linux/phylink.h @@ -574,6 +574,7 @@ struct phylink *phylink_create(struct phylink_config *, struct fwnode_handle *, phy_interface_t iface, const struct phylink_mac_ops *mac_ops); void phylink_destroy(struct phylink *); +bool phylink_expects_phy(struct phylink *pl); int phylink_connect_phy(struct phylink *, struct phy_device *); int phylink_of_phy_connect(struct phylink *, struct device_node *, u32 flags); From fe2cfbc9680356a3d9f8adde8a38e715831e32f5 Mon Sep 17 00:00:00 2001 From: Michael Sit Wei Hong Date: Thu, 30 Mar 2023 17:14:03 +0800 Subject: [PATCH 091/173] net: stmmac: check if MAC needs to attach to a PHY After the introduction of the fixed-link support, the MAC driver no longer attempt to scan for a PHY to attach to. This causes the non fixed-link setups to stop working. Using the phylink_expects_phy() to check and determine if the MAC should expect and attach a PHY. Fixes: ab21cf920928 ("net: stmmac: make mdio register skips PHY scanning for fixed-link") Signed-off-by: Michael Sit Wei Hong Signed-off-by: Lai Peter Jun Ann Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 17310ade88dd..d41a5f92aee7 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -1135,6 +1135,7 @@ static int stmmac_init_phy(struct net_device *dev) { struct stmmac_priv *priv = netdev_priv(dev); struct fwnode_handle *fwnode; + bool phy_needed; int ret; fwnode = of_fwnode_handle(priv->plat->phylink_node); @@ -1144,10 +1145,11 @@ static int stmmac_init_phy(struct net_device *dev) if (fwnode) ret = phylink_fwnode_phy_connect(priv->phylink, fwnode, 0); + phy_needed = phylink_expects_phy(priv->phylink); /* Some DT bindings do not set-up the PHY handle. Let's try to * manually parse it */ - if (!fwnode || ret) { + if (!fwnode || phy_needed || ret) { int addr = priv->plat->phy_addr; struct phy_device *phydev; From 6fc21a6ed5953b1dd3a41ce7be1ea57f5ef8c081 Mon Sep 17 00:00:00 2001 From: Michael Sit Wei Hong Date: Thu, 30 Mar 2023 17:14:04 +0800 Subject: [PATCH 092/173] net: stmmac: remove redundant fixup to support fixed-link mode Currently, intel_speed_mode_2500() will fix-up xpcs_an_inband to 1 if the underlying controller has a max speed of 1000Mbps. The value has been initialized and modified if it is a fixed-linked setup earlier. This patch removes the fix-up to allow for fixed-linked setup support. In stmmac_phy_setup(), ovr_an_inband is set based on the value of xpcs_an_inband. Which in turn will return an error in phylink_parse_mode() where MLO_AN_FIXED and ovr_an_inband are both set. Fixes: c82386310d95 ("stmmac: intel: prepare to support 1000BASE-X phy interface setting") Signed-off-by: Michael Sit Wei Hong Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c index 13aa919633b4..ab9f876b6df7 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c @@ -251,7 +251,6 @@ static void intel_speed_mode_2500(struct net_device *ndev, void *intel_data) priv->plat->mdio_bus_data->xpcs_an_inband = false; } else { priv->plat->max_speed = 1000; - priv->plat->mdio_bus_data->xpcs_an_inband = true; } } From 154e07c164859fc90bf4e8143f2f6c1af9f3a35e Mon Sep 17 00:00:00 2001 From: Andrea Righi Date: Thu, 30 Mar 2023 11:54:42 +0200 Subject: [PATCH 093/173] l2tp: generate correct module alias strings Commit 65b32f801bfb ("uapi: move IPPROTO_L2TP to in.h") moved the definition of IPPROTO_L2TP from a define to an enum, but since __stringify doesn't work properly with enums, we ended up breaking the modalias strings for the l2tp modules: $ modinfo l2tp_ip l2tp_ip6 | grep alias alias: net-pf-2-proto-IPPROTO_L2TP alias: net-pf-2-proto-2-type-IPPROTO_L2TP alias: net-pf-10-proto-IPPROTO_L2TP alias: net-pf-10-proto-2-type-IPPROTO_L2TP Use the resolved number directly in MODULE_ALIAS_*() macros (as we already do with SOCK_DGRAM) to fix the alias strings: $ modinfo l2tp_ip l2tp_ip6 | grep alias alias: net-pf-2-proto-115 alias: net-pf-2-proto-115-type-2 alias: net-pf-10-proto-115 alias: net-pf-10-proto-115-type-2 Moreover, fix the ordering of the parameters passed to MODULE_ALIAS_NET_PF_PROTO_TYPE() by switching proto and type. Fixes: 65b32f801bfb ("uapi: move IPPROTO_L2TP to in.h") Link: https://lore.kernel.org/lkml/ZCQt7hmodtUaBlCP@righiandr-XPS-13-7390 Signed-off-by: Guillaume Nault Signed-off-by: Andrea Righi Reviewed-by: Wojciech Drewek Tested-by: Wojciech Drewek Signed-off-by: David S. Miller --- net/l2tp/l2tp_ip.c | 8 ++++---- net/l2tp/l2tp_ip6.c | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index 4db5a554bdbd..41a74fc84ca1 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -677,8 +677,8 @@ MODULE_AUTHOR("James Chapman "); MODULE_DESCRIPTION("L2TP over IP"); MODULE_VERSION("1.0"); -/* Use the value of SOCK_DGRAM (2) directory, because __stringify doesn't like - * enums +/* Use the values of SOCK_DGRAM (2) as type and IPPROTO_L2TP (115) as protocol, + * because __stringify doesn't like enums */ -MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET, 2, IPPROTO_L2TP); -MODULE_ALIAS_NET_PF_PROTO(PF_INET, IPPROTO_L2TP); +MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET, 115, 2); +MODULE_ALIAS_NET_PF_PROTO(PF_INET, 115); diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index 2478aa60145f..5137ea1861ce 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -806,8 +806,8 @@ MODULE_AUTHOR("Chris Elston "); MODULE_DESCRIPTION("L2TP IP encapsulation for IPv6"); MODULE_VERSION("1.0"); -/* Use the value of SOCK_DGRAM (2) directory, because __stringify doesn't like - * enums +/* Use the values of SOCK_DGRAM (2) as type and IPPROTO_L2TP (115) as protocol, + * because __stringify doesn't like enums */ -MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET6, 2, IPPROTO_L2TP); -MODULE_ALIAS_NET_PF_PROTO(PF_INET6, IPPROTO_L2TP); +MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET6, 115, 2); +MODULE_ALIAS_NET_PF_PROTO(PF_INET6, 115); From c5b959eeb7f9e40673b97c08c71cbfff5f5923f2 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 31 Mar 2023 09:48:56 +0200 Subject: [PATCH 094/173] net: netcp: MAX_SKB_FRAGS is now 'int' The type of MAX_SKB_FRAGS has changed recently, so the debug printk needs to be updated: drivers/net/ethernet/ti/netcp_core.c: In function 'netcp_create_interface': drivers/net/ethernet/ti/netcp_core.c:2084:30: error: format '%ld' expects argument of type 'long int', but argument 3 has type 'int' [-Werror=format=] 2084 | dev_err(dev, "tx-pool size too small, must be at least %ld\n", | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Fixes: 3948b05950fd ("net: introduce a config option to tweak MAX_SKB_FRAGS") Signed-off-by: Arnd Bergmann Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/netcp_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/ti/netcp_core.c b/drivers/net/ethernet/ti/netcp_core.c index 1bb596a9d8a2..dfdbcdeb991f 100644 --- a/drivers/net/ethernet/ti/netcp_core.c +++ b/drivers/net/ethernet/ti/netcp_core.c @@ -2081,7 +2081,7 @@ static int netcp_create_interface(struct netcp_device *netcp_device, netcp->tx_pool_region_id = temp[1]; if (netcp->tx_pool_size < MAX_SKB_FRAGS) { - dev_err(dev, "tx-pool size too small, must be at least %ld\n", + dev_err(dev, "tx-pool size too small, must be at least %d\n", MAX_SKB_FRAGS); ret = -ENODEV; goto quit; From 362f0b6678ad1377c322a7dd237ea6785efc7342 Mon Sep 17 00:00:00 2001 From: "Jiri Slaby (SUSE)" Date: Fri, 31 Mar 2023 08:35:15 +0200 Subject: [PATCH 095/173] net: wwan: t7xx: do not compile with -Werror When playing with various compilers or their versions, some choke on the t7xx code. For example (with gcc 13): In file included from ./arch/s390/include/generated/asm/rwonce.h:1, from ../include/linux/compiler.h:247, from ../include/linux/build_bug.h:5, from ../include/linux/bits.h:22, from ../drivers/net/wwan/t7xx/t7xx_state_monitor.c:17: In function 'preempt_count', inlined from 't7xx_fsm_append_event' at ../drivers/net/wwan/t7xx/t7xx_state_monitor.c:439:43: ../include/asm-generic/rwonce.h:44:26: error: array subscript 0 is outside array bounds of 'const volatile int[0]' [-Werror=array-bounds=] There is no reason for any code in the kernel to be built with -Werror by default. Note that we have generic CONFIG_WERROR. So if anyone wants -Werror, they can enable that. Signed-off-by: Jiri Slaby (SUSE) Link: https://lore.kernel.org/all/20230330232717.1f8bf5ea@kernel.org/ Cc: Chandrashekar Devegowda Cc: Intel Corporation Cc: Chiranjeevi Rapolu Cc: Liu Haijun Cc: M Chetan Kumar Cc: Ricardo Martinez Cc: Loic Poulain Cc: Sergey Ryazanov Cc: Johannes Berg Cc: "David S. Miller" Cc: Eric Dumazet Cc: Jakub Kicinski Cc: Paolo Abeni Cc: netdev@vger.kernel.org Signed-off-by: David S. Miller --- drivers/net/wwan/t7xx/Makefile | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/net/wwan/t7xx/Makefile b/drivers/net/wwan/t7xx/Makefile index 268ff9e87e5b..2652cd00504e 100644 --- a/drivers/net/wwan/t7xx/Makefile +++ b/drivers/net/wwan/t7xx/Makefile @@ -1,7 +1,5 @@ # SPDX-License-Identifier: GPL-2.0-only -ccflags-y += -Werror - obj-${CONFIG_MTK_T7XX} := mtk_t7xx.o mtk_t7xx-y:= t7xx_pci.o \ t7xx_pcie_mac.o \ From ffa5395a7901e83a68d88207c4592962906641bd Mon Sep 17 00:00:00 2001 From: Arseniy Krasnov Date: Fri, 31 Mar 2023 10:56:41 +0300 Subject: [PATCH 096/173] vsock/vmci: convert VMCI error code to -ENOMEM on send This adds conversion of VMCI specific error code to general -ENOMEM. It is needed, because af_vsock.c passes error value returned from transport to the user, which does not expect to get VMCI_ERROR_* values. Fixes: c43170b7e157 ("vsock: return errors other than -ENOMEM to socket") Signed-off-by: Arseniy Krasnov Reviewed-by: Vishnu Dasa Reviewed-by: Stefano Garzarella Signed-off-by: David S. Miller --- net/vmw_vsock/vmci_transport.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c index 36eb16a40745..95cc4d79ba29 100644 --- a/net/vmw_vsock/vmci_transport.c +++ b/net/vmw_vsock/vmci_transport.c @@ -1842,7 +1842,13 @@ static ssize_t vmci_transport_stream_enqueue( struct msghdr *msg, size_t len) { - return vmci_qpair_enquev(vmci_trans(vsk)->qpair, msg, len, 0); + ssize_t err; + + err = vmci_qpair_enquev(vmci_trans(vsk)->qpair, msg, len, 0); + if (err < 0) + err = -ENOMEM; + + return err; } static s64 vmci_transport_stream_has_data(struct vsock_sock *vsk) From cb2239c198ad9fbd5aced22cf93e45562da781eb Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Thu, 30 Mar 2023 09:13:16 +0200 Subject: [PATCH 097/173] fs: drop peer group ids under namespace lock When cleaning up peer group ids in the failure path we need to make sure to hold on to the namespace lock. Otherwise another thread might just turn the mount from a shared into a non-shared mount concurrently. Link: https://lore.kernel.org/lkml/00000000000088694505f8132d77@google.com Fixes: 2a1867219c7b ("fs: add mount_setattr()") Reported-by: syzbot+8ac3859139c685c4f597@syzkaller.appspotmail.com Cc: stable@vger.kernel.org # 5.12+ Message-Id: <20230330-vfs-mount_setattr-propagation-fix-v1-1-37548d91533b@kernel.org> Signed-off-by: Christian Brauner --- fs/namespace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/namespace.c b/fs/namespace.c index bc0f15257b49..6836e937ee61 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -4183,9 +4183,9 @@ out: unlock_mount_hash(); if (kattr->propagation) { - namespace_unlock(); if (err) cleanup_group_ids(mnt, NULL); + namespace_unlock(); } return err; From 9fdc1605c504204e0fdec7892b29c916579e06f3 Mon Sep 17 00:00:00 2001 From: Andy Chi Date: Fri, 31 Mar 2023 16:32:41 +0800 Subject: [PATCH 098/173] ALSA: hda/realtek: fix mute/micmute LEDs for a HP ProBook There is a HP ProBook which using ALC236 codec and need the ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF quirk to make mute LED and micmute LED work. Signed-off-by: Andy Chi Cc: Link: https://lore.kernel.org/r/20230331083242.58416-1-andy.chi@canonical.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index a2706fd87b14..bd4e1a3a55b4 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -9443,6 +9443,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8b47, "HP", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8b5d, "HP", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), SND_PCI_QUIRK(0x103c, 0x8b5e, "HP", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), + SND_PCI_QUIRK(0x103c, 0x8b66, "HP", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), SND_PCI_QUIRK(0x103c, 0x8b7a, "HP", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8b7d, "HP", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8b87, "HP", ALC236_FIXUP_HP_GPIO_LED), From e4efa515d58f1363d8a27e548f9c5769d3121e03 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 20 Mar 2023 13:22:52 +0100 Subject: [PATCH 099/173] wifi: brcmfmac: Fix SDIO suspend/resume regression After commit 92cadedd9d5f ("brcmfmac: Avoid keeping power to SDIO card unless WOWL is used"), the wifi adapter by default is turned off on suspend and then re-probed on resume. In at least 2 model x86/acpi tablets with brcmfmac43430a1 wifi adapters, the newly added re-probe on resume fails like this: brcmfmac: brcmf_sdio_bus_rxctl: resumed on timeout ieee80211 phy1: brcmf_bus_started: failed: -110 ieee80211 phy1: brcmf_attach: dongle is not responding: err=-110 brcmfmac: brcmf_sdio_firmware_callback: brcmf_attach failed It seems this specific brcmfmac model does not like being reprobed without it actually being turned off first. And the adapter is not being turned off during suspend because of commit f0992ace680c ("brcmfmac: prohibit ACPI power management for brcmfmac driver"). Now that the driver is being reprobed on resume, the disabling of ACPI pm is no longer necessary, except when WOWL is used (in which case there is no-reprobe). Move the dis-/en-abling of ACPI pm to brcmf_sdio_wowl_config(), this fixes the brcmfmac43430a1 suspend/resume regression and should help save some power when suspended. This change means that the code now also may re-enable ACPI pm when WOWL gets disabled. ACPI pm should only be re-enabled if it was enabled by the ACPI core originally. Add a brcmf_sdiod_acpi_save_power_manageable() to save the original state for this. This has been tested on the following devices: Asus T100TA brcmfmac43241b4-sdio Acer Iconia One 7 B1-750 brcmfmac43340-sdio Chuwi Hi8 brcmfmac43430a0-sdio Chuwi Hi8 brcmfmac43430a1-sdio (the Asus T100TA is the device for which the prohibiting of ACPI pm was originally added) Fixes: 92cadedd9d5f ("brcmfmac: Avoid keeping power to SDIO card unless WOWL is used") Cc: Ulf Hansson Signed-off-by: Hans de Goede Reviewed-by: Ulf Hansson Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20230320122252.240070-1-hdegoede@redhat.com --- .../broadcom/brcm80211/brcmfmac/bcmsdh.c | 36 +++++++++++++------ .../broadcom/brcm80211/brcmfmac/sdio.h | 2 ++ 2 files changed, 28 insertions(+), 10 deletions(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c index b7c918f241c9..65d4799a5658 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c @@ -994,15 +994,34 @@ static const struct sdio_device_id brcmf_sdmmc_ids[] = { MODULE_DEVICE_TABLE(sdio, brcmf_sdmmc_ids); -static void brcmf_sdiod_acpi_set_power_manageable(struct device *dev, - int val) +static void brcmf_sdiod_acpi_save_power_manageable(struct brcmf_sdio_dev *sdiodev) { #if IS_ENABLED(CONFIG_ACPI) struct acpi_device *adev; - adev = ACPI_COMPANION(dev); + adev = ACPI_COMPANION(&sdiodev->func1->dev); if (adev) - adev->flags.power_manageable = 0; + sdiodev->func1_power_manageable = adev->flags.power_manageable; + + adev = ACPI_COMPANION(&sdiodev->func2->dev); + if (adev) + sdiodev->func2_power_manageable = adev->flags.power_manageable; +#endif +} + +static void brcmf_sdiod_acpi_set_power_manageable(struct brcmf_sdio_dev *sdiodev, + int enable) +{ +#if IS_ENABLED(CONFIG_ACPI) + struct acpi_device *adev; + + adev = ACPI_COMPANION(&sdiodev->func1->dev); + if (adev) + adev->flags.power_manageable = enable ? sdiodev->func1_power_manageable : 0; + + adev = ACPI_COMPANION(&sdiodev->func2->dev); + if (adev) + adev->flags.power_manageable = enable ? sdiodev->func2_power_manageable : 0; #endif } @@ -1012,7 +1031,6 @@ static int brcmf_ops_sdio_probe(struct sdio_func *func, int err; struct brcmf_sdio_dev *sdiodev; struct brcmf_bus *bus_if; - struct device *dev; brcmf_dbg(SDIO, "Enter\n"); brcmf_dbg(SDIO, "Class=%x\n", func->class); @@ -1020,14 +1038,9 @@ static int brcmf_ops_sdio_probe(struct sdio_func *func, brcmf_dbg(SDIO, "sdio device ID: 0x%04x\n", func->device); brcmf_dbg(SDIO, "Function#: %d\n", func->num); - dev = &func->dev; - /* Set MMC_QUIRK_LENIENT_FN0 for this card */ func->card->quirks |= MMC_QUIRK_LENIENT_FN0; - /* prohibit ACPI power management for this device */ - brcmf_sdiod_acpi_set_power_manageable(dev, 0); - /* Consume func num 1 but dont do anything with it. */ if (func->num == 1) return 0; @@ -1059,6 +1072,7 @@ static int brcmf_ops_sdio_probe(struct sdio_func *func, dev_set_drvdata(&sdiodev->func1->dev, bus_if); sdiodev->dev = &sdiodev->func1->dev; + brcmf_sdiod_acpi_save_power_manageable(sdiodev); brcmf_sdiod_change_state(sdiodev, BRCMF_SDIOD_DOWN); brcmf_dbg(SDIO, "F2 found, calling brcmf_sdiod_probe...\n"); @@ -1124,6 +1138,8 @@ void brcmf_sdio_wowl_config(struct device *dev, bool enabled) if (sdiodev->settings->bus.sdio.oob_irq_supported || pm_caps & MMC_PM_WAKE_SDIO_IRQ) { + /* Stop ACPI from turning off the device when wowl is enabled */ + brcmf_sdiod_acpi_set_power_manageable(sdiodev, !enabled); sdiodev->wowl_enabled = enabled; brcmf_dbg(SDIO, "Configuring WOWL, enabled=%d\n", enabled); return; diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.h b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.h index b76d34d36bde..0d18ed15b403 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.h +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.h @@ -188,6 +188,8 @@ struct brcmf_sdio_dev { char nvram_name[BRCMF_FW_NAME_LEN]; char clm_name[BRCMF_FW_NAME_LEN]; bool wowl_enabled; + bool func1_power_manageable; + bool func2_power_manageable; enum brcmf_sdiod_state state; struct brcmf_sdiod_freezer *freezer; const struct firmware *clm_fw; From 2ceb76f734e37833824b7fab6af17c999eb48d2b Mon Sep 17 00:00:00 2001 From: Ben Greear Date: Wed, 22 Mar 2023 17:37:17 +0100 Subject: [PATCH 100/173] wifi: mt76: mt7921: Fix use-after-free in fw features query. Stop referencing 'features' memory after release_firmware is called. Fixes this crash: RIP: 0010:mt7921_check_offload_capability+0x17d mt7921_pci_probe+0xca/0x4b0 ... Signed-off-by: Ben Greear Signed-off-by: Lorenzo Bianconi Acked-by: Felix Fietkau Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/51fd8f76494348aa9ecbf0abc471ebe47a983dfd.1679502607.git.lorenzo@kernel.org --- drivers/net/wireless/mediatek/mt76/mt7921/init.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/init.c b/drivers/net/wireless/mediatek/mt76/mt7921/init.c index 80c71acfe159..cc94531185da 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7921/init.c +++ b/drivers/net/wireless/mediatek/mt76/mt7921/init.c @@ -171,12 +171,12 @@ mt7921_mac_init_band(struct mt7921_dev *dev, u8 band) u8 mt7921_check_offload_capability(struct device *dev, const char *fw_wm) { - struct mt7921_fw_features *features = NULL; const struct mt76_connac2_fw_trailer *hdr; struct mt7921_realease_info *rel_info; const struct firmware *fw; int ret, i, offset = 0; const u8 *data, *end; + u8 offload_caps = 0; ret = request_firmware(&fw, fw_wm, dev); if (ret) @@ -208,7 +208,10 @@ u8 mt7921_check_offload_capability(struct device *dev, const char *fw_wm) data += sizeof(*rel_info); if (rel_info->tag == MT7921_FW_TAG_FEATURE) { + struct mt7921_fw_features *features; + features = (struct mt7921_fw_features *)data; + offload_caps = features->data; break; } @@ -218,7 +221,7 @@ u8 mt7921_check_offload_capability(struct device *dev, const char *fw_wm) out: release_firmware(fw); - return features ? features->data : 0; + return offload_caps; } EXPORT_SYMBOL_GPL(mt7921_check_offload_capability); From eb85df0a5643612285f61f38122564498d0c49f7 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Tue, 28 Mar 2023 12:01:17 +0200 Subject: [PATCH 101/173] wifi: mt76: mt7921: fix fw used for offload check for mt7922 Fix the firmware version used for offload capability check used by 0x0616 devices. This path enables offload capabilities for 0x0616 devices. Link: https://bugzilla.kernel.org/show_bug.cgi?id=217245 Fixes: 034ae28b56f1 ("wifi: mt76: mt7921: introduce remain_on_channel support") Cc: stable@vger.kernel.org Signed-off-by: Lorenzo Bianconi Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/632d8f0c9781c9902d7160e2c080aa7e9232d50d.1679997487.git.lorenzo@kernel.org --- drivers/net/wireless/mediatek/mt76/mt7921/pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/pci.c b/drivers/net/wireless/mediatek/mt76/mt7921/pci.c index cb72ded37256..5c23c827abe4 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7921/pci.c +++ b/drivers/net/wireless/mediatek/mt76/mt7921/pci.c @@ -20,7 +20,7 @@ static const struct pci_device_id mt7921_pci_device_table[] = { { PCI_DEVICE(PCI_VENDOR_ID_MEDIATEK, 0x0608), .driver_data = (kernel_ulong_t)MT7921_FIRMWARE_WM }, { PCI_DEVICE(PCI_VENDOR_ID_MEDIATEK, 0x0616), - .driver_data = (kernel_ulong_t)MT7921_FIRMWARE_WM }, + .driver_data = (kernel_ulong_t)MT7922_FIRMWARE_WM }, { }, }; From f1594bc676579133a3cd906d7d27733289edfb86 Mon Sep 17 00:00:00 2001 From: Anh Tuan Phan Date: Fri, 24 Mar 2023 09:14:15 +0700 Subject: [PATCH 102/173] selftests mount: Fix mount_setattr_test builds failed MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When compiling selftests with target mount_setattr I encountered some errors with the below messages: mount_setattr_test.c: In function ‘mount_setattr_thread’: mount_setattr_test.c:343:16: error: variable ‘attr’ has initializer but incomplete type 343 | struct mount_attr attr = { | ^~~~~~~~~~ These errors might be because of linux/mount.h is not included. This patch resolves that issue. Signed-off-by: Anh Tuan Phan Acked-by: Christian Brauner Signed-off-by: Shuah Khan --- tools/testing/selftests/mount_setattr/mount_setattr_test.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/mount_setattr/mount_setattr_test.c b/tools/testing/selftests/mount_setattr/mount_setattr_test.c index 582669ca38e9..c6a8c732b802 100644 --- a/tools/testing/selftests/mount_setattr/mount_setattr_test.c +++ b/tools/testing/selftests/mount_setattr/mount_setattr_test.c @@ -18,6 +18,7 @@ #include #include #include +#include #include "../kselftest_harness.h" From 52882b9c7a761b2b4e44717d6fbd1ed94c601b7f Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Wed, 4 May 2022 17:48:07 +1000 Subject: [PATCH 103/173] KVM: PPC: Make KVM_CAP_IRQFD_RESAMPLE platform dependent When introduced, IRQFD resampling worked on POWER8 with XICS. However KVM on POWER9 has never implemented it - the compatibility mode code ("XICS-on-XIVE") misses the kvm_notify_acked_irq() call and the native XIVE mode does not handle INTx in KVM at all. This moved the capability support advertising to platforms and stops advertising it on XIVE, i.e. POWER9 and later. Signed-off-by: Alexey Kardashevskiy Acked-by: Anup Patel Acked-by: Nicholas Piggin Message-Id: <20220504074807.3616813-1-aik@ozlabs.ru> Signed-off-by: Paolo Bonzini --- arch/arm64/kvm/arm.c | 1 + arch/powerpc/kvm/powerpc.c | 6 ++++++ arch/s390/kvm/kvm-s390.c | 1 + arch/x86/kvm/x86.c | 1 + virt/kvm/kvm_main.c | 1 - 5 files changed, 9 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 3bd732eaf087..3f6a5efdbcf0 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -220,6 +220,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_VCPU_ATTRIBUTES: case KVM_CAP_PTP_KVM: case KVM_CAP_ARM_SYSTEM_SUSPEND: + case KVM_CAP_IRQFD_RESAMPLE: r = 1; break; case KVM_CAP_SET_GUEST_DEBUG2: diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 4c5405fc5538..d23e25e8432d 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -576,6 +576,12 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) break; #endif +#ifdef CONFIG_HAVE_KVM_IRQFD + case KVM_CAP_IRQFD_RESAMPLE: + r = !xive_enabled(); + break; +#endif + case KVM_CAP_PPC_ALLOC_HTAB: r = hv_enabled; break; diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 39b36562c043..1eeb9ae57879 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -573,6 +573,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_S390_VCPU_RESETS: case KVM_CAP_SET_GUEST_DEBUG: case KVM_CAP_S390_DIAG318: + case KVM_CAP_IRQFD_RESAMPLE: r = 1; break; case KVM_CAP_SET_GUEST_DEBUG2: diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 7d6f98b7635f..3d852ce84920 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4432,6 +4432,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_VAPIC: case KVM_CAP_ENABLE_CAP: case KVM_CAP_VM_DISABLE_NX_HUGE_PAGES: + case KVM_CAP_IRQFD_RESAMPLE: r = 1; break; case KVM_CAP_EXIT_HYPERCALL: diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index d255964ec331..b1679d08a216 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -4479,7 +4479,6 @@ static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg) #endif #ifdef CONFIG_HAVE_KVM_IRQFD case KVM_CAP_IRQFD: - case KVM_CAP_IRQFD_RESAMPLE: #endif case KVM_CAP_IOEVENTFD_ANY_LENGTH: case KVM_CAP_CHECK_EXTENSION_VM: From 52f91e51944808d83dfe2d5582601b5e84e472cc Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Fri, 31 Mar 2023 19:31:48 +0200 Subject: [PATCH 104/173] platform/x86: gigabyte-wmi: add support for X570S AORUS ELITE Add "X570S AORUS ELITE" to known working boards Reported-by: Brandon Nielsen Link: https://lore.kernel.org/r/20230331014902.7864-1-nielsenb@jetfuse.net Signed-off-by: Hans de Goede --- drivers/platform/x86/gigabyte-wmi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/platform/x86/gigabyte-wmi.c b/drivers/platform/x86/gigabyte-wmi.c index 5e5b17c50eb6..2a426040f749 100644 --- a/drivers/platform/x86/gigabyte-wmi.c +++ b/drivers/platform/x86/gigabyte-wmi.c @@ -161,6 +161,7 @@ static const struct dmi_system_id gigabyte_wmi_known_working_platforms[] = { DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("X570 GAMING X"), DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("X570 I AORUS PRO WIFI"), DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("X570 UD"), + DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("X570S AORUS ELITE"), DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("Z690M AORUS ELITE AX DDR4"), { } }; From e352d685fde427a8fc9beb2ba30888f5d6f2e5e6 Mon Sep 17 00:00:00 2001 From: weiliang1503 Date: Thu, 30 Mar 2023 19:49:43 +0800 Subject: [PATCH 105/173] platform/x86: asus-nb-wmi: Add quirk_asus_tablet_mode to other ROG Flow X13 models Make quirk_asus_tablet_mode apply on other ROG Flow X13 devices, which only affects the GV301Q model before. Signed-off-by: weiliang1503 Link: https://lore.kernel.org/r/20230330114943.15057-1-weiliang1503@gmail.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/x86/asus-nb-wmi.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/platform/x86/asus-nb-wmi.c b/drivers/platform/x86/asus-nb-wmi.c index cb15acdf14a3..e2c9a68d12df 100644 --- a/drivers/platform/x86/asus-nb-wmi.c +++ b/drivers/platform/x86/asus-nb-wmi.c @@ -464,7 +464,8 @@ static const struct dmi_system_id asus_quirks[] = { .ident = "ASUS ROG FLOW X13", .matches = { DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), - DMI_MATCH(DMI_PRODUCT_NAME, "GV301Q"), + /* Match GV301** */ + DMI_MATCH(DMI_PRODUCT_NAME, "GV301"), }, .driver_data = &quirk_asus_tablet_mode, }, From e3271a5917d1501089b1a224d702aa053e2877f4 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Thu, 30 Mar 2023 21:46:44 +0200 Subject: [PATCH 106/173] platform/x86: ideapad-laptop: Stop sending KEY_TOUCHPAD_TOGGLE Commit 5829f8a897e4 ("platform/x86: ideapad-laptop: Send KEY_TOUCHPAD_TOGGLE on some models") made ideapad-laptop send KEY_TOUCHPAD_TOGGLE when we receive an ACPI notify with VPC event bit 5 set and the touchpad-state has not been changed by the EC itself already. This was done under the assumption that this would be good to do to make the touchpad-toggle hotkey work on newer models where the EC does not toggle the touchpad on/off itself (because it is not routed through the PS/2 controller, but uses I2C). But it turns out that at least some models, e.g. the Yoga 7-15ITL5 the EC triggers an ACPI notify with VPC event bit 5 set on resume, which would now cause a spurious KEY_TOUCHPAD_TOGGLE on resume to which the desktop environment responds by disabling the touchpad in software, breaking the touchpad (until manually re-enabled) on resume. It was never confirmed that sending KEY_TOUCHPAD_TOGGLE actually improves things on new models and at least some new models like the Yoga 7-15ITL5 don't have a touchpad on/off toggle hotkey at all, while still sending ACPI notify events with VPC event bit 5 set. So it seems best to revert the change to send KEY_TOUCHPAD_TOGGLE when receiving an ACPI notify events with VPC event bit 5 and the touchpad state as reported by the EC has not changed. Note this is not a full revert the code to cache the last EC touchpad state is kept to avoid sending spurious KEY_TOUCHPAD_ON / _OFF events on resume. Fixes: 5829f8a897e4 ("platform/x86: ideapad-laptop: Send KEY_TOUCHPAD_TOGGLE on some models") Link: https://bugzilla.kernel.org/show_bug.cgi?id=217234 Cc: stable@vger.kernel.org Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20230330194644.64628-1-hdegoede@redhat.com --- drivers/platform/x86/ideapad-laptop.c | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/drivers/platform/x86/ideapad-laptop.c b/drivers/platform/x86/ideapad-laptop.c index 0eb5bfdd823a..959ec3c5f376 100644 --- a/drivers/platform/x86/ideapad-laptop.c +++ b/drivers/platform/x86/ideapad-laptop.c @@ -1170,7 +1170,6 @@ static const struct key_entry ideapad_keymap[] = { { KE_KEY, 65, { KEY_PROG4 } }, { KE_KEY, 66, { KEY_TOUCHPAD_OFF } }, { KE_KEY, 67, { KEY_TOUCHPAD_ON } }, - { KE_KEY, 68, { KEY_TOUCHPAD_TOGGLE } }, { KE_KEY, 128, { KEY_ESC } }, /* @@ -1526,18 +1525,16 @@ static void ideapad_sync_touchpad_state(struct ideapad_private *priv, bool send_ if (priv->features.ctrl_ps2_aux_port) i8042_command(¶m, value ? I8042_CMD_AUX_ENABLE : I8042_CMD_AUX_DISABLE); - if (send_events) { - /* - * On older models the EC controls the touchpad and toggles it - * on/off itself, in this case we report KEY_TOUCHPAD_ON/_OFF. - * If the EC did not toggle, report KEY_TOUCHPAD_TOGGLE. - */ - if (value != priv->r_touchpad_val) { - ideapad_input_report(priv, value ? 67 : 66); - sysfs_notify(&priv->platform_device->dev.kobj, NULL, "touchpad"); - } else { - ideapad_input_report(priv, 68); - } + /* + * On older models the EC controls the touchpad and toggles it on/off + * itself, in this case we report KEY_TOUCHPAD_ON/_OFF. Some models do + * an acpi-notify with VPC bit 5 set on resume, so this function get + * called with send_events=true on every resume. Therefor if the EC did + * not toggle, do nothing to avoid sending spurious KEY_TOUCHPAD_TOGGLE. + */ + if (send_events && value != priv->r_touchpad_val) { + ideapad_input_report(priv, value ? 67 : 66); + sysfs_notify(&priv->platform_device->dev.kobj, NULL, "touchpad"); } priv->r_touchpad_val = value; From 36d4d213c6d4fffae2645a601e8ae996de4c3645 Mon Sep 17 00:00:00 2001 From: Jeremy Soller Date: Fri, 31 Mar 2023 10:23:17 -0600 Subject: [PATCH 107/173] ALSA: hda/realtek: Add quirk for Clevo X370SNW Fixes speaker output and headset detection on Clevo X370SNW. Signed-off-by: Jeremy Soller Signed-off-by: Tim Crawford Cc: Link: https://lore.kernel.org/r/20230331162317.14992-1-tcrawford@system76.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index bd4e1a3a55b4..26187f5d56b5 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -2624,6 +2624,7 @@ static const struct snd_pci_quirk alc882_fixup_tbl[] = { SND_PCI_QUIRK(0x1462, 0xda57, "MSI Z270-Gaming", ALC1220_FIXUP_GB_DUAL_CODECS), SND_PCI_QUIRK_VENDOR(0x1462, "MSI", ALC882_FIXUP_GPIO3), SND_PCI_QUIRK(0x147b, 0x107a, "Abit AW9D-MAX", ALC882_FIXUP_ABIT_AW9D_MAX), + SND_PCI_QUIRK(0x1558, 0x3702, "Clevo X370SN[VW]", ALC1220_FIXUP_CLEVO_PB51ED_PINS), SND_PCI_QUIRK(0x1558, 0x50d3, "Clevo PC50[ER][CDF]", ALC1220_FIXUP_CLEVO_PB51ED_PINS), SND_PCI_QUIRK(0x1558, 0x65d1, "Clevo PB51[ER][CDF]", ALC1220_FIXUP_CLEVO_PB51ED_PINS), SND_PCI_QUIRK(0x1558, 0x65d2, "Clevo PB51R[CDF]", ALC1220_FIXUP_CLEVO_PB51ED_PINS), From 804d8e0a6e54427268790472781e03bc243f4ee3 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 31 Mar 2023 16:31:19 -0400 Subject: [PATCH 108/173] NFSD: Avoid calling OPDESC() with ops->opnum == OP_ILLEGAL OPDESC() simply indexes into nfsd4_ops[] by the op's operation number, without range checking that value. It assumes callers are careful to avoid calling it with an out-of-bounds opnum value. nfsd4_decode_compound() is not so careful, and can invoke OPDESC() with opnum set to OP_ILLEGAL, which is 10044 -- well beyond the end of nfsd4_ops[]. Reported-by: Jeff Layton Fixes: f4f9ef4a1b0a ("nfsd4: opdesc will be useful outside nfs4proc.c") Signed-off-by: Chuck Lever --- fs/nfsd/nfs4xdr.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 97edb32be77f..67bbd2d6334c 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -2476,10 +2476,12 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp) for (i = 0; i < argp->opcnt; i++) { op = &argp->ops[i]; op->replay = NULL; + op->opdesc = NULL; if (xdr_stream_decode_u32(argp->xdr, &op->opnum) < 0) return false; if (nfsd4_opnum_in_range(argp, op)) { + op->opdesc = OPDESC(op); op->status = nfsd4_dec_ops[op->opnum](argp, &op->u); if (op->status != nfs_ok) trace_nfsd_compound_decode_err(argp->rqstp, @@ -2490,7 +2492,7 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp) op->opnum = OP_ILLEGAL; op->status = nfserr_op_illegal; } - op->opdesc = OPDESC(op); + /* * We'll try to cache the result in the DRC if any one * op in the compound wants to be cached: From 15a8b55dbb1ba154d82627547c5761cac884d810 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 27 Mar 2023 06:21:37 -0400 Subject: [PATCH 109/173] nfsd: call op_release, even when op_func returns an error For ops with "trivial" replies, nfsd4_encode_operation will shortcut most of the encoding work and skip to just marshalling up the status. One of the things it skips is calling op_release. This could cause a memory leak in the layoutget codepath if there is an error at an inopportune time. Have the compound processing engine always call op_release, even when op_func sets an error in op->status. With this change, we also need nfsd4_block_get_device_info_scsi to set the gd_device pointer to NULL on error to avoid a double free. Reported-by: Zhi Li Link: https://bugzilla.redhat.com/show_bug.cgi?id=2181403 Fixes: 34b1744c91cc ("nfsd4: define ->op_release for compound ops") Signed-off-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/blocklayout.c | 1 + fs/nfsd/nfs4xdr.c | 11 +++++------ 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/fs/nfsd/blocklayout.c b/fs/nfsd/blocklayout.c index 04697f8dc37d..01d7fd108cf3 100644 --- a/fs/nfsd/blocklayout.c +++ b/fs/nfsd/blocklayout.c @@ -297,6 +297,7 @@ nfsd4_block_get_device_info_scsi(struct super_block *sb, out_free_dev: kfree(dev); + gdp->gd_device = NULL; return ret; } diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 67bbd2d6334c..7799835c2196 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -5400,10 +5400,8 @@ nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op) __be32 *p; p = xdr_reserve_space(xdr, 8); - if (!p) { - WARN_ON_ONCE(1); - return; - } + if (!p) + goto release; *p++ = cpu_to_be32(op->opnum); post_err_offset = xdr->buf->len; @@ -5418,8 +5416,6 @@ nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op) op->status = encoder(resp, op->status, &op->u); if (op->status) trace_nfsd_compound_encode_err(rqstp, op->opnum, op->status); - if (opdesc && opdesc->op_release) - opdesc->op_release(&op->u); xdr_commit_encode(xdr); /* nfsd4_check_resp_size guarantees enough room for error status */ @@ -5460,6 +5456,9 @@ nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op) } status: *p = op->status; +release: + if (opdesc && opdesc->op_release) + opdesc->op_release(&op->u); } /* From 7b50567bdcad8925ca1e075feb7171c12015afd1 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 7 Feb 2023 17:13:12 +0100 Subject: [PATCH 110/173] media: i2c: imx290: fix conditional function defintions The runtime suspend/resume functions are only referenced from the dev_pm_ops, but they use the old SET_RUNTIME_PM_OPS() helper that requires a __maybe_unused annotation to avoid a warning: drivers/media/i2c/imx290.c:1082:12: error: unused function 'imx290_runtime_resume' [-Werror,-Wunused-function] static int imx290_runtime_resume(struct device *dev) ^ drivers/media/i2c/imx290.c:1090:12: error: unused function 'imx290_runtime_suspend' [-Werror,-Wunused-function] static int imx290_runtime_suspend(struct device *dev) ^ Convert this to the new RUNTIME_PM_OPS() helper that so this is not required. To improve this further, also use the pm_ptr() helper that lets the dev_pm_ops get dropped entirely when CONFIG_PM is disabled. A related mistake happened in the of_match_ptr() macro here, which like SET_RUNTIME_PM_OPS() requires the match table to be marked as __maybe_unused, though I could not reproduce building this without CONFIG_OF. Remove the of_match_ptr() here as there is no point in dropping the match table in configurations without CONFIG_OF. Fixes: 02852c01f654 ("media: i2c: imx290: Initialize runtime PM before subdev") Signed-off-by: Arnd Bergmann Reported-by: Guenter Roeck Reported-by: Sudip Mukherjee Reviewed-by: Manivannan Sadhasivam Reviewed-by: Laurent Pinchart Signed-off-by: Linus Torvalds --- drivers/media/i2c/imx290.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/media/i2c/imx290.c b/drivers/media/i2c/imx290.c index 49d6c8bdec41..48ae2e0adf9e 100644 --- a/drivers/media/i2c/imx290.c +++ b/drivers/media/i2c/imx290.c @@ -1098,7 +1098,7 @@ static int imx290_runtime_suspend(struct device *dev) } static const struct dev_pm_ops imx290_pm_ops = { - SET_RUNTIME_PM_OPS(imx290_runtime_suspend, imx290_runtime_resume, NULL) + RUNTIME_PM_OPS(imx290_runtime_suspend, imx290_runtime_resume, NULL) }; /* ---------------------------------------------------------------------------- @@ -1362,8 +1362,8 @@ static struct i2c_driver imx290_i2c_driver = { .remove = imx290_remove, .driver = { .name = "imx290", - .pm = &imx290_pm_ops, - .of_match_table = of_match_ptr(imx290_of_match), + .pm = pm_ptr(&imx290_pm_ops), + .of_match_table = imx290_of_match, }, }; From 7f67aa097e875c87fba024e850cf405342300059 Mon Sep 17 00:00:00 2001 From: Karol Herbst Date: Fri, 31 Mar 2023 00:39:38 +0200 Subject: [PATCH 111/173] drm/nouveau/disp: Support more modes by checking with lower bpc This allows us to advertise more modes especially on HDR displays. Fixes using 4K@60 modes on my TV and main display both using a HDMI to DP adapter. Also fixes similar issues for users running into this. Cc: stable@vger.kernel.org # 5.10+ Signed-off-by: Karol Herbst Reviewed-by: Lyude Paul Link: https://patchwork.freedesktop.org/patch/msgid/20230330223938.4025569-1-kherbst@redhat.com --- drivers/gpu/drm/nouveau/dispnv50/disp.c | 32 +++++++++++++++++++++++++ drivers/gpu/drm/nouveau/nouveau_dp.c | 8 ++++--- 2 files changed, 37 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/nouveau/dispnv50/disp.c b/drivers/gpu/drm/nouveau/dispnv50/disp.c index ed9d374147b8..5bb777ff1313 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/disp.c +++ b/drivers/gpu/drm/nouveau/dispnv50/disp.c @@ -363,6 +363,35 @@ nv50_outp_atomic_check_view(struct drm_encoder *encoder, return 0; } +static void +nv50_outp_atomic_fix_depth(struct drm_encoder *encoder, struct drm_crtc_state *crtc_state) +{ + struct nv50_head_atom *asyh = nv50_head_atom(crtc_state); + struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder); + struct drm_display_mode *mode = &asyh->state.adjusted_mode; + unsigned int max_rate, mode_rate; + + switch (nv_encoder->dcb->type) { + case DCB_OUTPUT_DP: + max_rate = nv_encoder->dp.link_nr * nv_encoder->dp.link_bw; + + /* we don't support more than 10 anyway */ + asyh->or.bpc = min_t(u8, asyh->or.bpc, 10); + + /* reduce the bpc until it works out */ + while (asyh->or.bpc > 6) { + mode_rate = DIV_ROUND_UP(mode->clock * asyh->or.bpc * 3, 8); + if (mode_rate <= max_rate) + break; + + asyh->or.bpc -= 2; + } + break; + default: + break; + } +} + static int nv50_outp_atomic_check(struct drm_encoder *encoder, struct drm_crtc_state *crtc_state, @@ -381,6 +410,9 @@ nv50_outp_atomic_check(struct drm_encoder *encoder, if (crtc_state->mode_changed || crtc_state->connectors_changed) asyh->or.bpc = connector->display_info.bpc; + /* We might have to reduce the bpc */ + nv50_outp_atomic_fix_depth(encoder, crtc_state); + return 0; } diff --git a/drivers/gpu/drm/nouveau/nouveau_dp.c b/drivers/gpu/drm/nouveau/nouveau_dp.c index e00876f92aee..d49b4875fc3c 100644 --- a/drivers/gpu/drm/nouveau/nouveau_dp.c +++ b/drivers/gpu/drm/nouveau/nouveau_dp.c @@ -263,8 +263,6 @@ nouveau_dp_irq(struct work_struct *work) } /* TODO: - * - Use the minimum possible BPC here, once we add support for the max bpc - * property. * - Validate against the DP caps advertised by the GPU (we don't check these * yet) */ @@ -276,7 +274,11 @@ nv50_dp_mode_valid(struct drm_connector *connector, { const unsigned int min_clock = 25000; unsigned int max_rate, mode_rate, ds_max_dotclock, clock = mode->clock; - const u8 bpp = connector->display_info.bpc * 3; + /* Check with the minmum bpc always, so we can advertise better modes. + * In particlar not doing this causes modes to be dropped on HDR + * displays as we might check with a bpc of 16 even. + */ + const u8 bpp = 6 * 3; if (mode->flags & DRM_MODE_FLAG_INTERLACE && !outp->caps.dp_interlace) return MODE_NO_INTERLACE; From adef41b03b35839e5677aace628d02597f04a616 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 31 Mar 2023 21:16:22 -0700 Subject: [PATCH 112/173] Revert "net: netcp: MAX_SKB_FRAGS is now 'int'" This reverts commit c5b959eeb7f9e40673b97c08c71cbfff5f5923f2. Reverted change is required after commit 3948b05950fd ("net: introduce a config option to tweak MAX_SKB_FRAGS") which does not exist in this tree, yet. It's only present in -next trees at the time of writing. Reported-by: Nathan Chancellor Link: https://lore.kernel.org/all/20230331214444.GA1426512@dev-arch.thelio-3990X/ Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/ti/netcp_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/ti/netcp_core.c b/drivers/net/ethernet/ti/netcp_core.c index dfdbcdeb991f..1bb596a9d8a2 100644 --- a/drivers/net/ethernet/ti/netcp_core.c +++ b/drivers/net/ethernet/ti/netcp_core.c @@ -2081,7 +2081,7 @@ static int netcp_create_interface(struct netcp_device *netcp_device, netcp->tx_pool_region_id = temp[1]; if (netcp->tx_pool_size < MAX_SKB_FRAGS) { - dev_err(dev, "tx-pool size too small, must be at least %d\n", + dev_err(dev, "tx-pool size too small, must be at least %ld\n", MAX_SKB_FRAGS); ret = -ENODEV; goto quit; From 7d63b67125382ff0ffdfca434acbc94a38bd092b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 30 Mar 2023 17:45:02 +0000 Subject: [PATCH 113/173] icmp: guard against too small mtu syzbot was able to trigger a panic [1] in icmp_glue_bits(), or more exactly in skb_copy_and_csum_bits() There is no repro yet, but I think the issue is that syzbot manages to lower device mtu to a small value, fooling __icmp_send() __icmp_send() must make sure there is enough room for the packet to include at least the headers. We might in the future refactor skb_copy_and_csum_bits() and its callers to no longer crash when something bad happens. [1] kernel BUG at net/core/skbuff.c:3343 ! invalid opcode: 0000 [#1] PREEMPT SMP KASAN CPU: 0 PID: 15766 Comm: syz-executor.0 Not tainted 6.3.0-rc4-syzkaller-00039-gffe78bbd5121 #0 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.14.0-2 04/01/2014 RIP: 0010:skb_copy_and_csum_bits+0x798/0x860 net/core/skbuff.c:3343 Code: f0 c1 c8 08 41 89 c6 e9 73 ff ff ff e8 61 48 d4 f9 e9 41 fd ff ff 48 8b 7c 24 48 e8 52 48 d4 f9 e9 c3 fc ff ff e8 c8 27 84 f9 <0f> 0b 48 89 44 24 28 e8 3c 48 d4 f9 48 8b 44 24 28 e9 9d fb ff ff RSP: 0018:ffffc90000007620 EFLAGS: 00010246 RAX: 0000000000000000 RBX: 00000000000001e8 RCX: 0000000000000100 RDX: ffff8880276f6280 RSI: ffffffff87fdd138 RDI: 0000000000000005 RBP: 0000000000000000 R08: 0000000000000005 R09: 0000000000000000 R10: 00000000000001e8 R11: 0000000000000001 R12: 000000000000003c R13: 0000000000000000 R14: ffff888028244868 R15: 0000000000000b0e FS: 00007fbc81f1c700(0000) GS:ffff88802ca00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000001b2df43000 CR3: 00000000744db000 CR4: 0000000000150ef0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: icmp_glue_bits+0x7b/0x210 net/ipv4/icmp.c:353 __ip_append_data+0x1d1b/0x39f0 net/ipv4/ip_output.c:1161 ip_append_data net/ipv4/ip_output.c:1343 [inline] ip_append_data+0x115/0x1a0 net/ipv4/ip_output.c:1322 icmp_push_reply+0xa8/0x440 net/ipv4/icmp.c:370 __icmp_send+0xb80/0x1430 net/ipv4/icmp.c:765 ipv4_send_dest_unreach net/ipv4/route.c:1239 [inline] ipv4_link_failure+0x5a9/0x9e0 net/ipv4/route.c:1246 dst_link_failure include/net/dst.h:423 [inline] arp_error_report+0xcb/0x1c0 net/ipv4/arp.c:296 neigh_invalidate+0x20d/0x560 net/core/neighbour.c:1079 neigh_timer_handler+0xc77/0xff0 net/core/neighbour.c:1166 call_timer_fn+0x1a0/0x580 kernel/time/timer.c:1700 expire_timers+0x29b/0x4b0 kernel/time/timer.c:1751 __run_timers kernel/time/timer.c:2022 [inline] Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reported-by: syzbot+d373d60fddbdc915e666@syzkaller.appspotmail.com Signed-off-by: Eric Dumazet Link: https://lore.kernel.org/r/20230330174502.1915328-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/ipv4/icmp.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 8cebb476b3ab..b8607763d113 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -749,6 +749,11 @@ void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info, room = 576; room -= sizeof(struct iphdr) + icmp_param.replyopts.opt.opt.optlen; room -= sizeof(struct icmphdr); + /* Guard against tiny mtu. We need to include at least one + * IP network header for this message to make any sense. + */ + if (room <= (int)sizeof(struct iphdr)) + goto ende; icmp_param.data_len = skb_in->len - icmp_param.offset; if (icmp_param.data_len > room) From f785f5ee968f7045268b8be6b0abc850c4a4277c Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 31 Mar 2023 16:22:17 +0200 Subject: [PATCH 114/173] ALSA: hda/hdmi: Preserve the previous PCM device upon re-enablement When a DRM driver turns on or off the screen with the audio capability, it notifies the ELD to HD-audio HDMI codec driver via component ops. HDMI codec driver, in turn, attaches or detaches the PCM stream for the given port on the fly. The problem is that, since the recent code change, the HDMI driver always treats the PCM stream assignment dynamically; this ended up the confusion of the PCM device appearance. e.g. when a screen goes once off and on again, it may appear on a different PCM device before the screen-off. Although the application should treat such a change, it doesn't seem working gracefully with the current pipewire (maybe PulseAudio, too). As a workaround, this patch changes the HDMI codec driver behavior slightly to be more consistent. Now it remembers the previous PCM slot for the given port and try to assign to it. That is, if a port is re-enabled, the driver tries to use the same PCM slot that was assigned to that port previously. If it conflicts, a new slot is searched and used like before, instead. Note that multiple monitor connections are the only typical case where the PCM slot preservation is effective. As long as only a single monitor is connected, the behavior isn't changed, and the first PCM slot is still assigned always. Fixes: ef6f5494faf6 ("ALSA: hda/hdmi: Use only dynamic PCM device allocation") Reviewed-by: Jaroslav Kysela Link: https://bugzilla.kernel.org/show_bug.cgi?id=217259 Link: https://lore.kernel.org/r/20230331142217.19791-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_hdmi.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c index 9ea633fe9339..4ffa3a59f419 100644 --- a/sound/pci/hda/patch_hdmi.c +++ b/sound/pci/hda/patch_hdmi.c @@ -81,6 +81,7 @@ struct hdmi_spec_per_pin { struct delayed_work work; struct hdmi_pcm *pcm; /* pointer to spec->pcm_rec[n] dynamically*/ int pcm_idx; /* which pcm is attached. -1 means no pcm is attached */ + int prev_pcm_idx; /* previously assigned pcm index */ int repoll_count; bool setup; /* the stream has been set up by prepare callback */ bool silent_stream; @@ -1380,9 +1381,17 @@ static void hdmi_attach_hda_pcm(struct hdmi_spec *spec, /* pcm already be attached to the pin */ if (per_pin->pcm) return; + /* try the previously used slot at first */ + idx = per_pin->prev_pcm_idx; + if (idx >= 0) { + if (!test_bit(idx, &spec->pcm_bitmap)) + goto found; + per_pin->prev_pcm_idx = -1; /* no longer valid, clear it */ + } idx = hdmi_find_pcm_slot(spec, per_pin); if (idx == -EBUSY) return; + found: per_pin->pcm_idx = idx; per_pin->pcm = get_hdmi_pcm(spec, idx); set_bit(idx, &spec->pcm_bitmap); @@ -1398,6 +1407,7 @@ static void hdmi_detach_hda_pcm(struct hdmi_spec *spec, return; idx = per_pin->pcm_idx; per_pin->pcm_idx = -1; + per_pin->prev_pcm_idx = idx; /* remember the previous index */ per_pin->pcm = NULL; if (idx >= 0 && idx < spec->pcm_used) clear_bit(idx, &spec->pcm_bitmap); @@ -1924,6 +1934,7 @@ static int hdmi_add_pin(struct hda_codec *codec, hda_nid_t pin_nid) per_pin->pcm = NULL; per_pin->pcm_idx = -1; + per_pin->prev_pcm_idx = -1; per_pin->pin_nid = pin_nid; per_pin->pin_nid_idx = spec->num_nids; per_pin->dev_id = i; From 275b471e3d2daf1472ae8fa70dc1b50c9e0b9e75 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 30 Mar 2023 19:21:44 -0700 Subject: [PATCH 115/173] net: don't let netpoll invoke NAPI if in xmit context Commit 0db3dc73f7a3 ("[NETPOLL]: tx lock deadlock fix") narrowed down the region under netif_tx_trylock() inside netpoll_send_skb(). (At that point in time netif_tx_trylock() would lock all queues of the device.) Taking the tx lock was problematic because driver's cleanup method may take the same lock. So the change made us hold the xmit lock only around xmit, and expected the driver to take care of locking within ->ndo_poll_controller(). Unfortunately this only works if netpoll isn't itself called with the xmit lock already held. Netpoll code is careful and uses trylock(). The drivers, however, may be using plain lock(). Printing while holding the xmit lock is going to result in rare deadlocks. Luckily we record the xmit lock owners, so we can scan all the queues, the same way we scan NAPI owners. If any of the xmit locks is held by the local CPU we better not attempt any polling. It would be nice if we could narrow down the check to only the NAPIs and the queue we're trying to use. I don't see a way to do that now. Reported-by: Roman Gushchin Fixes: 0db3dc73f7a3 ("[NETPOLL]: tx lock deadlock fix") Signed-off-by: Jakub Kicinski Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/netpoll.c | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/net/core/netpoll.c b/net/core/netpoll.c index a089b704b986..e6a739b1afa9 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -137,6 +137,20 @@ static void queue_process(struct work_struct *work) } } +static int netif_local_xmit_active(struct net_device *dev) +{ + int i; + + for (i = 0; i < dev->num_tx_queues; i++) { + struct netdev_queue *txq = netdev_get_tx_queue(dev, i); + + if (READ_ONCE(txq->xmit_lock_owner) == smp_processor_id()) + return 1; + } + + return 0; +} + static void poll_one_napi(struct napi_struct *napi) { int work; @@ -183,7 +197,10 @@ void netpoll_poll_dev(struct net_device *dev) if (!ni || down_trylock(&ni->dev_lock)) return; - if (!netif_running(dev)) { + /* Some drivers will take the same locks in poll and xmit, + * we can't poll if local CPU is already in xmit. + */ + if (!netif_running(dev) || netif_local_xmit_active(dev)) { up(&ni->dev_lock); return; } From 089b91a0155c4de1209a07ff2a7dd299ff3ece47 Mon Sep 17 00:00:00 2001 From: Gustav Ekelund Date: Fri, 31 Mar 2023 10:40:13 +0200 Subject: [PATCH 116/173] net: dsa: mv88e6xxx: Reset mv88e6393x force WD event bit The force watchdog event bit is not cleared during SW reset in the mv88e6393x switch. This is a different behavior compared to mv886390 which clears the force WD event bit as advertised. This causes a force WD event to be handled over and over again as the SW reset following the event never clears the force WD event bit. Explicitly clear the watchdog event register to 0 in irq_action when handling an event to prevent the switch from sending continuous interrupts. Marvell aren't aware of any other stuck bits apart from the force WD bit. Fixes: de776d0d316f ("net: dsa: mv88e6xxx: add support for mv88e6393x family" Signed-off-by: Gustav Ekelund Reviewed-by: Andrew Lunn Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/dsa/mv88e6xxx/chip.c | 2 +- drivers/net/dsa/mv88e6xxx/global2.c | 20 ++++++++++++++++++++ drivers/net/dsa/mv88e6xxx/global2.h | 1 + 3 files changed, 22 insertions(+), 1 deletion(-) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 0de7b3611202..7108f745fbf0 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -5601,7 +5601,7 @@ static const struct mv88e6xxx_ops mv88e6393x_ops = { * .port_set_upstream_port method. */ .set_egress_port = mv88e6393x_set_egress_port, - .watchdog_ops = &mv88e6390_watchdog_ops, + .watchdog_ops = &mv88e6393x_watchdog_ops, .mgmt_rsvd2cpu = mv88e6393x_port_mgmt_rsvd2cpu, .pot_clear = mv88e6xxx_g2_pot_clear, .reset = mv88e6352_g1_reset, diff --git a/drivers/net/dsa/mv88e6xxx/global2.c b/drivers/net/dsa/mv88e6xxx/global2.c index ed3b2f88e783..a7af3cebae97 100644 --- a/drivers/net/dsa/mv88e6xxx/global2.c +++ b/drivers/net/dsa/mv88e6xxx/global2.c @@ -943,6 +943,26 @@ const struct mv88e6xxx_irq_ops mv88e6390_watchdog_ops = { .irq_free = mv88e6390_watchdog_free, }; +static int mv88e6393x_watchdog_action(struct mv88e6xxx_chip *chip, int irq) +{ + mv88e6390_watchdog_action(chip, irq); + + /* Fix for clearing the force WD event bit. + * Unreleased erratum on mv88e6393x. + */ + mv88e6xxx_g2_write(chip, MV88E6390_G2_WDOG_CTL, + MV88E6390_G2_WDOG_CTL_UPDATE | + MV88E6390_G2_WDOG_CTL_PTR_EVENT); + + return IRQ_HANDLED; +} + +const struct mv88e6xxx_irq_ops mv88e6393x_watchdog_ops = { + .irq_action = mv88e6393x_watchdog_action, + .irq_setup = mv88e6390_watchdog_setup, + .irq_free = mv88e6390_watchdog_free, +}; + static irqreturn_t mv88e6xxx_g2_watchdog_thread_fn(int irq, void *dev_id) { struct mv88e6xxx_chip *chip = dev_id; diff --git a/drivers/net/dsa/mv88e6xxx/global2.h b/drivers/net/dsa/mv88e6xxx/global2.h index e973114d6890..7e091965582b 100644 --- a/drivers/net/dsa/mv88e6xxx/global2.h +++ b/drivers/net/dsa/mv88e6xxx/global2.h @@ -369,6 +369,7 @@ int mv88e6xxx_g2_device_mapping_write(struct mv88e6xxx_chip *chip, int target, extern const struct mv88e6xxx_irq_ops mv88e6097_watchdog_ops; extern const struct mv88e6xxx_irq_ops mv88e6250_watchdog_ops; extern const struct mv88e6xxx_irq_ops mv88e6390_watchdog_ops; +extern const struct mv88e6xxx_irq_ops mv88e6393x_watchdog_ops; extern const struct mv88e6xxx_avb_ops mv88e6165_avb_ops; extern const struct mv88e6xxx_avb_ops mv88e6352_avb_ops; From e669ce46740a9815953bb4452a6bc5a7fdc21a50 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Fri, 31 Mar 2023 14:49:59 +0200 Subject: [PATCH 117/173] net: ethernet: mtk_eth_soc: fix remaining throughput regression Based on further tests, it seems that the QDMA shaper is not able to perform shaping close to the MAC link rate without throughput loss. This cannot be compensated by increasing the shaping rate, so it seems to be an internal limit. Fix the remaining throughput regression by detecting that condition and limiting shaping to ports with lower link speed. This patch intentionally ignores link speed gain from TRGMII, because even on such links, shaping to 1000 Mbit/s incurs some throughput degradation. Fixes: f63959c7eec3 ("net: ethernet: mtk_eth_soc: implement multi-queue support for per-port queues") Tested-By: Frank Wunderlich Reported-by: Frank Wunderlich Signed-off-by: Felix Fietkau Signed-off-by: David S. Miller --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 282f9435d5ff..e14050e17862 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -753,6 +753,7 @@ static void mtk_mac_link_up(struct phylink_config *config, MAC_MCR_FORCE_RX_FC); /* Configure speed */ + mac->speed = speed; switch (speed) { case SPEED_2500: case SPEED_1000: @@ -3235,6 +3236,9 @@ found: if (dp->index >= MTK_QDMA_NUM_QUEUES) return NOTIFY_DONE; + if (mac->speed > 0 && mac->speed <= s.base.speed) + s.base.speed = 0; + mtk_set_queue_speed(eth, dp->index + 3, s.base.speed); return NOTIFY_DONE; From 2584024b23552c00d95b50255e47bd18d306d31a Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 1 Apr 2023 19:09:57 -0400 Subject: [PATCH 118/173] sctp: check send stream number after wait_for_sndbuf This patch fixes a corner case where the asoc out stream count may change after wait_for_sndbuf. When the main thread in the client starts a connection, if its out stream count is set to N while the in stream count in the server is set to N - 2, another thread in the client keeps sending the msgs with stream number N - 1, and waits for sndbuf before processing INIT_ACK. However, after processing INIT_ACK, the out stream count in the client is shrunk to N - 2, the same to the in stream count in the server. The crash occurs when the thread waiting for sndbuf is awake and sends the msg in a non-existing stream(N - 1), the call trace is as below: KASAN: null-ptr-deref in range [0x0000000000000038-0x000000000000003f] Call Trace: sctp_cmd_send_msg net/sctp/sm_sideeffect.c:1114 [inline] sctp_cmd_interpreter net/sctp/sm_sideeffect.c:1777 [inline] sctp_side_effects net/sctp/sm_sideeffect.c:1199 [inline] sctp_do_sm+0x197d/0x5310 net/sctp/sm_sideeffect.c:1170 sctp_primitive_SEND+0x9f/0xc0 net/sctp/primitive.c:163 sctp_sendmsg_to_asoc+0x10eb/0x1a30 net/sctp/socket.c:1868 sctp_sendmsg+0x8d4/0x1d90 net/sctp/socket.c:2026 inet_sendmsg+0x9d/0xe0 net/ipv4/af_inet.c:825 sock_sendmsg_nosec net/socket.c:722 [inline] sock_sendmsg+0xde/0x190 net/socket.c:745 The fix is to add an unlikely check for the send stream number after the thread wakes up from the wait_for_sndbuf. Fixes: 5bbbbe32a431 ("sctp: introduce stream scheduler foundations") Reported-by: syzbot+47c24ca20a2fa01f082e@syzkaller.appspotmail.com Signed-off-by: Xin Long Signed-off-by: David S. Miller --- net/sctp/socket.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index b91616f819de..218e0982c370 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -1830,6 +1830,10 @@ static int sctp_sendmsg_to_asoc(struct sctp_association *asoc, err = sctp_wait_for_sndbuf(asoc, &timeo, msg_len); if (err) goto err; + if (unlikely(sinfo->sinfo_stream >= asoc->stream.outcnt)) { + err = -EINVAL; + goto err; + } } if (sctp_state(asoc, CLOSED)) { From f95b8ea79c47c0ad3d18f45ad538f9970e414d1f Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Tue, 7 Feb 2023 11:22:54 +0100 Subject: [PATCH 119/173] Revert "venus: firmware: Correct non-pix start and end addresses" This reverts commit a837e5161cff, which broke probing of the venus driver, at least on the SC7180 SoC HP X2 Chromebook: qcom-venus aa00000.video-codec: Adding to iommu group 11 qcom-venus aa00000.video-codec: non legacy binding qcom-venus aa00000.video-codec: failed to reset venus core qcom-venus: probe of aa00000.video-codec failed with error -110 Matthias Kaehlcke also reported that the same change caused a regression in SC7180 and sc7280, that prevents AOSS from entering sleep mode during system suspend. So let's revert this commit for now to fix both issues. Fixes: a837e5161cff ("venus: firmware: Correct non-pix start and end addresses") Reported-by: Matthias Kaehlcke Signed-off-by: Javier Martinez Canillas Signed-off-by: Linus Torvalds --- drivers/media/platform/qcom/venus/firmware.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/media/platform/qcom/venus/firmware.c b/drivers/media/platform/qcom/venus/firmware.c index 61ff20a7e935..cfb11c551167 100644 --- a/drivers/media/platform/qcom/venus/firmware.c +++ b/drivers/media/platform/qcom/venus/firmware.c @@ -38,8 +38,8 @@ static void venus_reset_cpu(struct venus_core *core) writel(fw_size, wrapper_base + WRAPPER_FW_END_ADDR); writel(0, wrapper_base + WRAPPER_CPA_START_ADDR); writel(fw_size, wrapper_base + WRAPPER_CPA_END_ADDR); - writel(0, wrapper_base + WRAPPER_NONPIX_START_ADDR); - writel(0, wrapper_base + WRAPPER_NONPIX_END_ADDR); + writel(fw_size, wrapper_base + WRAPPER_NONPIX_START_ADDR); + writel(fw_size, wrapper_base + WRAPPER_NONPIX_END_ADDR); if (IS_V6(core)) { /* Bring XTSS out of reset */ From 7e364e56293bb98cae1b55fd835f5991c4e96e7d Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 2 Apr 2023 14:29:29 -0700 Subject: [PATCH 120/173] Linux 6.3-rc5 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index da2586d4c728..ef4e96b9cd5b 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 3 SUBLEVEL = 0 -EXTRAVERSION = -rc4 +EXTRAVERSION = -rc5 NAME = Hurr durr I'ma ninja sloth # *DOCUMENTATION* From 764a2ab9eb56e1200083e771aab16186836edf1d Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Fri, 21 May 2021 11:38:11 +0200 Subject: [PATCH 121/173] drm/panfrost: Fix the panfrost_mmu_map_fault_addr() error path Make sure all bo->base.pages entries are either NULL or pointing to a valid page before calling drm_gem_shmem_put_pages(). Reported-by: Tomeu Vizoso Cc: Fixes: 187d2929206e ("drm/panfrost: Add support for GPU heap allocations") Signed-off-by: Boris Brezillon Reviewed-by: Steven Price Link: https://patchwork.freedesktop.org/patch/msgid/20210521093811.1018992-1-boris.brezillon@collabora.com --- drivers/gpu/drm/panfrost/panfrost_mmu.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/panfrost/panfrost_mmu.c b/drivers/gpu/drm/panfrost/panfrost_mmu.c index 666a5e53fe19..e961fa27702c 100644 --- a/drivers/gpu/drm/panfrost/panfrost_mmu.c +++ b/drivers/gpu/drm/panfrost/panfrost_mmu.c @@ -504,6 +504,7 @@ static int panfrost_mmu_map_fault_addr(struct panfrost_device *pfdev, int as, if (IS_ERR(pages[i])) { mutex_unlock(&bo->base.pages_lock); ret = PTR_ERR(pages[i]); + pages[i] = NULL; goto err_pages; } } From f1b17f429f066f920a6a1056332e66f8a5b92256 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Mon, 30 Jan 2023 12:06:31 +0000 Subject: [PATCH 122/173] drm/i915/ttm: fix sparse warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Sparse complains with: drivers/gpu/drm/i915/gem/i915_gem_ttm.c:1066:21: sparse: expected restricted vm_fault_t [assigned] [usertype] ret drivers/gpu/drm/i915/gem/i915_gem_ttm.c:1066:21: sparse: got int Fixes: 516198d317d8 ("drm/i915: audit bo->resource usage v3") Reported-by: kernel test robot Signed-off-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20230130101230.25347-1-matthew.auld@intel.com Reviewed-by: Nirmoy Das Acked-by: Christian König Signed-off-by: Christian König (cherry picked from commit fde789e8339c60c8c58e5a71fa819fcfe52d839e) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index 7420276827a5..4758f21c91e1 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -1067,11 +1067,12 @@ static vm_fault_t vm_fault_ttm(struct vm_fault *vmf) .interruptible = true, .no_wait_gpu = true, /* should be idle already */ }; + int err; GEM_BUG_ON(!bo->ttm || !(bo->ttm->page_flags & TTM_TT_FLAG_SWAPPED)); - ret = ttm_bo_validate(bo, i915_ttm_sys_placement(), &ctx); - if (ret) { + err = ttm_bo_validate(bo, i915_ttm_sys_placement(), &ctx); + if (err) { dma_resv_unlock(bo->base.resv); return VM_FAULT_SIGBUS; } From c74237496fbc799257b091179dd01a3200f7314d Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Mon, 13 Mar 2023 13:55:56 -0700 Subject: [PATCH 123/173] drm/i915/huc: Cancel HuC delayed load timer on reset. In the rare case where we do a full GT reset after starting the HuC load and before it completes (which basically boils down to i915 hanging during init), we need to cancel the delayed load fence, as it will be re-initialized in the post-reset recovery. Fixes: 27536e03271d ("drm/i915/huc: track delayed HuC load with a fence") Signed-off-by: Daniele Ceraolo Spurio Cc: Alan Previn Reviewed-by: Alan Previn Link: https://patchwork.freedesktop.org/patch/msgid/20230313205556.1174503-1-daniele.ceraolospurio@intel.com (cherry picked from commit cdf7911f7dbcb37228409a63bf75630776c45a15) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/gt/uc/intel_huc.c | 7 +++++++ drivers/gpu/drm/i915/gt/uc/intel_huc.h | 7 +------ 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc.c b/drivers/gpu/drm/i915/gt/uc/intel_huc.c index 410905da8e97..0c103ca160d1 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_huc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.c @@ -235,6 +235,13 @@ static void delayed_huc_load_fini(struct intel_huc *huc) i915_sw_fence_fini(&huc->delayed_load.fence); } +int intel_huc_sanitize(struct intel_huc *huc) +{ + delayed_huc_load_complete(huc); + intel_uc_fw_sanitize(&huc->fw); + return 0; +} + static bool vcs_supported(struct intel_gt *gt) { intel_engine_mask_t mask = gt->info.engine_mask; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc.h b/drivers/gpu/drm/i915/gt/uc/intel_huc.h index 52db03620c60..db555b3c1f56 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_huc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.h @@ -41,6 +41,7 @@ struct intel_huc { } delayed_load; }; +int intel_huc_sanitize(struct intel_huc *huc); void intel_huc_init_early(struct intel_huc *huc); int intel_huc_init(struct intel_huc *huc); void intel_huc_fini(struct intel_huc *huc); @@ -54,12 +55,6 @@ bool intel_huc_is_authenticated(struct intel_huc *huc); void intel_huc_register_gsc_notifier(struct intel_huc *huc, struct bus_type *bus); void intel_huc_unregister_gsc_notifier(struct intel_huc *huc, struct bus_type *bus); -static inline int intel_huc_sanitize(struct intel_huc *huc) -{ - intel_uc_fw_sanitize(&huc->fw); - return 0; -} - static inline bool intel_huc_is_supported(struct intel_huc *huc) { return intel_uc_fw_is_supported(&huc->fw); From 95d939bb97ff9be101ae4ceeb322535589da2190 Mon Sep 17 00:00:00 2001 From: Stanislav Lisovskiy Date: Mon, 27 Mar 2023 09:42:17 +0300 Subject: [PATCH 124/173] drm/i915: Use compressed bpp when calculating m/n value for DP MST DSC For obvious reasons, we use compressed bpp instead of pipe bpp for DSC DP SST case. Lets be consistent and use compressed bpp instead of pipe bpp, also in DP MST DSC case. Signed-off-by: Stanislav Lisovskiy Reviewed-by: Vinod Govindapillai Fixes: d51f25eb479a ("drm/i915: Add DSC support to MST path") Link: https://patchwork.freedesktop.org/patch/msgid/20230327064217.24033-1-stanislav.lisovskiy@intel.com (cherry picked from commit ea1deabc6f11575eb3375b454457eaa3c9837abc) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_dp_mst.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c index 2106b3de225a..7c9b328bc2d7 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c @@ -232,7 +232,7 @@ static int intel_dp_dsc_mst_compute_link_config(struct intel_encoder *encoder, return slots; } - intel_link_compute_m_n(crtc_state->pipe_bpp, + intel_link_compute_m_n(crtc_state->dsc.compressed_bpp, crtc_state->lane_count, adjusted_mode->crtc_clock, crtc_state->port_clock, From dc30c011469165d57af9adac5baff7d767d20e5c Mon Sep 17 00:00:00 2001 From: Min Li Date: Tue, 28 Mar 2023 17:36:27 +0800 Subject: [PATCH 125/173] drm/i915: fix race condition UAF in i915_perf_add_config_ioctl Userspace can guess the id value and try to race oa_config object creation with config remove, resulting in a use-after-free if we dereference the object after unlocking the metrics_lock. For that reason, unlocking the metrics_lock must be done after we are done dereferencing the object. Signed-off-by: Min Li Fixes: f89823c21224 ("drm/i915/perf: Implement I915_PERF_ADD/REMOVE_CONFIG interface") Cc: # v4.14+ Reviewed-by: Andi Shyti Reviewed-by: Umesh Nerlige Ramappa Signed-off-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20230328093627.5067-1-lm0963hack@gmail.com [tursulin: Manually added stable tag.] (cherry picked from commit 49f6f6483b652108bcb73accd0204a464b922395) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_perf.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 283a4a3c6862..004074936300 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -4638,13 +4638,13 @@ int i915_perf_add_config_ioctl(struct drm_device *dev, void *data, err = oa_config->id; goto sysfs_err; } - - mutex_unlock(&perf->metrics_lock); + id = oa_config->id; drm_dbg(&perf->i915->drm, "Added config %s id=%i\n", oa_config->uuid, oa_config->id); + mutex_unlock(&perf->metrics_lock); - return oa_config->id; + return id; sysfs_err: mutex_unlock(&perf->metrics_lock); From dc3421560a67361442f33ec962fc6dd48895a0df Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Mon, 20 Mar 2023 15:14:23 +0000 Subject: [PATCH 126/173] drm/i915: Fix context runtime accounting When considering whether to mark one context as stopped and another as started we need to look at whether the previous and new _contexts_ are different and not just requests. Otherwise the software tracked context start time was incorrectly updated to the most recent lite-restore time- stamp, which was in some cases resulting in active time going backward, until the context switch (typically the heartbeat pulse) would synchronise with the hardware tracked context runtime. Easiest use case to observe this behaviour was with a full screen clients with close to 100% engine load. Signed-off-by: Tvrtko Ursulin Fixes: bb6287cb1886 ("drm/i915: Track context current active time") Cc: # v5.19+ Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20230320151423.1708436-1-tvrtko.ursulin@linux.intel.com [tursulin: Fix spelling in commit msg.] (cherry picked from commit b3e70051879c665acdd3a1ab50d0ed58d6a8001f) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/gt/intel_execlists_submission.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c index 1bbe6708d0a7..750326434677 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@ -2018,6 +2018,8 @@ process_csb(struct intel_engine_cs *engine, struct i915_request **inactive) * inspecting the queue to see if we need to resumbit. */ if (*prev != *execlists->active) { /* elide lite-restores */ + struct intel_context *prev_ce = NULL, *active_ce = NULL; + /* * Note the inherent discrepancy between the HW runtime, * recorded as part of the context switch, and the CPU @@ -2029,9 +2031,15 @@ process_csb(struct intel_engine_cs *engine, struct i915_request **inactive) * and correct overselves later when updating from HW. */ if (*prev) - lrc_runtime_stop((*prev)->context); + prev_ce = (*prev)->context; if (*execlists->active) - lrc_runtime_start((*execlists->active)->context); + active_ce = (*execlists->active)->context; + if (prev_ce != active_ce) { + if (prev_ce) + lrc_runtime_stop(prev_ce); + if (active_ce) + lrc_runtime_start(active_ce); + } new_timeslice(execlists); } From ad651d68cee75e9ac20002254c4e5d09ee67a84b Mon Sep 17 00:00:00 2001 From: Daniel Golle Date: Sun, 2 Apr 2023 12:44:37 +0100 Subject: [PATCH 127/173] net: sfp: add quirk enabling 2500Base-x for HG MXPD-483II The HG MXPD-483II 1310nm SFP module is meant to operate with 2500Base-X, however, in their EEPROM they incorrectly specify: Transceiver type : Ethernet: 1000BASE-LX ... BR, Nominal : 2600MBd Use sfp_quirk_2500basex for this module to allow 2500Base-X mode anyway. https://forum.banana-pi.org/t/bpi-r3-sfp-module-compatibility/14573/60 Reported-by: chowtom Tested-by: chowtom Signed-off-by: Daniel Golle Reviewed-by: Russell King (Oracle) Signed-off-by: David S. Miller --- drivers/net/phy/sfp.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/phy/sfp.c b/drivers/net/phy/sfp.c index fb98db61e06c..8af10bb53e57 100644 --- a/drivers/net/phy/sfp.c +++ b/drivers/net/phy/sfp.c @@ -387,6 +387,10 @@ static const struct sfp_quirk sfp_quirks[] = { SFP_QUIRK_F("HALNy", "HL-GSFP", sfp_fixup_halny_gsfp), + // HG MXPD-483II-F 2.5G supports 2500Base-X, but incorrectly reports + // 2600MBd in their EERPOM + SFP_QUIRK_M("HG GENUINE", "MXPD-483II", sfp_quirk_2500basex), + // Huawei MA5671A can operate at 2500base-X, but report 1.2GBd NRZ in // their EEPROM SFP_QUIRK("HUAWEI", "MA5671A", sfp_quirk_2500basex, From 839349d13905927d8a567ca4d21d88c82028e31d Mon Sep 17 00:00:00 2001 From: Sricharan Ramabadhran Date: Mon, 3 Apr 2023 12:28:51 +0530 Subject: [PATCH 128/173] net: qrtr: Do not do DEL_SERVER broadcast after DEL_CLIENT On the remote side, when QRTR socket is removed, af_qrtr will call qrtr_port_remove() which broadcasts the DEL_CLIENT packet to all neighbours including local NS. NS upon receiving the DEL_CLIENT packet, will remove the lookups associated with the node:port and broadcasts the DEL_SERVER packet. But on the host side, due to the arrival of the DEL_CLIENT packet, the NS would've already deleted the server belonging to that port. So when the remote's NS again broadcasts the DEL_SERVER for that port, it throws below error message on the host: "failed while handling packet from 2:-2" So fix this error by not broadcasting the DEL_SERVER packet when the DEL_CLIENT packet gets processed." Fixes: 0c2204a4ad71 ("net: qrtr: Migrate nameservice to kernel from userspace") Reviewed-by: Manivannan Sadhasivam Signed-off-by: Ram Kumar Dharuman Signed-off-by: Sricharan Ramabadhran Signed-off-by: David S. Miller --- net/qrtr/ns.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/net/qrtr/ns.c b/net/qrtr/ns.c index 722936f7dd98..0f25a386138c 100644 --- a/net/qrtr/ns.c +++ b/net/qrtr/ns.c @@ -274,7 +274,7 @@ err: return NULL; } -static int server_del(struct qrtr_node *node, unsigned int port) +static int server_del(struct qrtr_node *node, unsigned int port, bool bcast) { struct qrtr_lookup *lookup; struct qrtr_server *srv; @@ -287,7 +287,7 @@ static int server_del(struct qrtr_node *node, unsigned int port) radix_tree_delete(&node->servers, port); /* Broadcast the removal of local servers */ - if (srv->node == qrtr_ns.local_node) + if (srv->node == qrtr_ns.local_node && bcast) service_announce_del(&qrtr_ns.bcast_sq, srv); /* Announce the service's disappearance to observers */ @@ -373,7 +373,7 @@ static int ctrl_cmd_bye(struct sockaddr_qrtr *from) } slot = radix_tree_iter_resume(slot, &iter); rcu_read_unlock(); - server_del(node, srv->port); + server_del(node, srv->port, true); rcu_read_lock(); } rcu_read_unlock(); @@ -459,10 +459,13 @@ static int ctrl_cmd_del_client(struct sockaddr_qrtr *from, kfree(lookup); } - /* Remove the server belonging to this port */ + /* Remove the server belonging to this port but don't broadcast + * DEL_SERVER. Neighbours would've already removed the server belonging + * to this port due to the DEL_CLIENT broadcast from qrtr_port_remove(). + */ node = node_get(node_id); if (node) - server_del(node, port); + server_del(node, port, false); /* Advertise the removal of this client to all local servers */ local_node = node_get(qrtr_ns.local_node); @@ -567,7 +570,7 @@ static int ctrl_cmd_del_server(struct sockaddr_qrtr *from, if (!node) return -ENOENT; - return server_del(node, port); + return server_del(node, port, true); } static int ctrl_cmd_new_lookup(struct sockaddr_qrtr *from, From ea30388baebcce37fd594d425a65037ca35e59e8 Mon Sep 17 00:00:00 2001 From: Ziyang Xuan Date: Mon, 3 Apr 2023 15:34:17 +0800 Subject: [PATCH 129/173] ipv6: Fix an uninit variable access bug in __ip6_make_skb() Syzbot reported a bug as following: ===================================================== BUG: KMSAN: uninit-value in arch_atomic64_inc arch/x86/include/asm/atomic64_64.h:88 [inline] BUG: KMSAN: uninit-value in arch_atomic_long_inc include/linux/atomic/atomic-long.h:161 [inline] BUG: KMSAN: uninit-value in atomic_long_inc include/linux/atomic/atomic-instrumented.h:1429 [inline] BUG: KMSAN: uninit-value in __ip6_make_skb+0x2f37/0x30f0 net/ipv6/ip6_output.c:1956 arch_atomic64_inc arch/x86/include/asm/atomic64_64.h:88 [inline] arch_atomic_long_inc include/linux/atomic/atomic-long.h:161 [inline] atomic_long_inc include/linux/atomic/atomic-instrumented.h:1429 [inline] __ip6_make_skb+0x2f37/0x30f0 net/ipv6/ip6_output.c:1956 ip6_finish_skb include/net/ipv6.h:1122 [inline] ip6_push_pending_frames+0x10e/0x550 net/ipv6/ip6_output.c:1987 rawv6_push_pending_frames+0xb12/0xb90 net/ipv6/raw.c:579 rawv6_sendmsg+0x297e/0x2e60 net/ipv6/raw.c:922 inet_sendmsg+0x101/0x180 net/ipv4/af_inet.c:827 sock_sendmsg_nosec net/socket.c:714 [inline] sock_sendmsg net/socket.c:734 [inline] ____sys_sendmsg+0xa8e/0xe70 net/socket.c:2476 ___sys_sendmsg+0x2a1/0x3f0 net/socket.c:2530 __sys_sendmsg net/socket.c:2559 [inline] __do_sys_sendmsg net/socket.c:2568 [inline] __se_sys_sendmsg net/socket.c:2566 [inline] __x64_sys_sendmsg+0x367/0x540 net/socket.c:2566 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x3d/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd Uninit was created at: slab_post_alloc_hook mm/slab.h:766 [inline] slab_alloc_node mm/slub.c:3452 [inline] __kmem_cache_alloc_node+0x71f/0xce0 mm/slub.c:3491 __do_kmalloc_node mm/slab_common.c:967 [inline] __kmalloc_node_track_caller+0x114/0x3b0 mm/slab_common.c:988 kmalloc_reserve net/core/skbuff.c:492 [inline] __alloc_skb+0x3af/0x8f0 net/core/skbuff.c:565 alloc_skb include/linux/skbuff.h:1270 [inline] __ip6_append_data+0x51c1/0x6bb0 net/ipv6/ip6_output.c:1684 ip6_append_data+0x411/0x580 net/ipv6/ip6_output.c:1854 rawv6_sendmsg+0x2882/0x2e60 net/ipv6/raw.c:915 inet_sendmsg+0x101/0x180 net/ipv4/af_inet.c:827 sock_sendmsg_nosec net/socket.c:714 [inline] sock_sendmsg net/socket.c:734 [inline] ____sys_sendmsg+0xa8e/0xe70 net/socket.c:2476 ___sys_sendmsg+0x2a1/0x3f0 net/socket.c:2530 __sys_sendmsg net/socket.c:2559 [inline] __do_sys_sendmsg net/socket.c:2568 [inline] __se_sys_sendmsg net/socket.c:2566 [inline] __x64_sys_sendmsg+0x367/0x540 net/socket.c:2566 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x3d/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd It is because icmp6hdr does not in skb linear region under the scenario of SOCK_RAW socket. Access icmp6_hdr(skb)->icmp6_type directly will trigger the uninit variable access bug. Use a local variable icmp6_type to carry the correct value in different scenarios. Fixes: 14878f75abd5 ("[IPV6]: Add ICMPMsgStats MIB (RFC 4293) [rev 2]") Reported-by: syzbot+8257f4dcef79de670baf@syzkaller.appspotmail.com Link: https://syzkaller.appspot.com/bug?id=3d605ec1d0a7f2a269a1a6936ac7f2b85975ee9c Signed-off-by: Ziyang Xuan Signed-off-by: David S. Miller --- net/ipv6/ip6_output.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index c314fdde0097..95a55c6630ad 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1965,8 +1965,13 @@ struct sk_buff *__ip6_make_skb(struct sock *sk, IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len); if (proto == IPPROTO_ICMPV6) { struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); + u8 icmp6_type; - ICMP6MSGOUT_INC_STATS(net, idev, icmp6_hdr(skb)->icmp6_type); + if (sk->sk_socket->type == SOCK_RAW && !inet_sk(sk)->hdrincl) + icmp6_type = fl6->fl6_icmp_type; + else + icmp6_type = icmp6_hdr(skb)->icmp6_type; + ICMP6MSGOUT_INC_STATS(net, idev, icmp6_type); ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS); } From a3c4c053014585dcf20f4df954791b74d8a8afcd Mon Sep 17 00:00:00 2001 From: Armin Wolf Date: Fri, 31 Mar 2023 23:33:19 +0200 Subject: [PATCH 130/173] platform/x86: think-lmi: Fix memory leak when showing current settings When retriving a item string with tlmi_setting(), the result has to be freed using kfree(). In current_value_show() however, malformed item strings are not freed, causing a memory leak. Fix this by eliminating the early return responsible for this. Reported-by: Mirsad Goran Todorovac Link: https://lore.kernel.org/platform-driver-x86/01e920bc-5882-ba0c-dd15-868bf0eca0b8@alu.unizg.hr/T/#t Tested-by: Mirsad Goran Todorovac Fixes: 0fdf10e5fc96 ("platform/x86: think-lmi: Split current_value to reflect only the value") Signed-off-by: Armin Wolf Link: https://lore.kernel.org/r/20230331213319.41040-1-W_Armin@gmx.de Tested-by: Mario Limonciello Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/x86/think-lmi.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/platform/x86/think-lmi.c b/drivers/platform/x86/think-lmi.c index c816646eb661..6034df6d577d 100644 --- a/drivers/platform/x86/think-lmi.c +++ b/drivers/platform/x86/think-lmi.c @@ -930,10 +930,12 @@ static ssize_t current_value_show(struct kobject *kobj, struct kobj_attribute *a /* validate and split from `item,value` -> `value` */ value = strpbrk(item, ","); if (!value || value == item || !strlen(value + 1)) - return -EINVAL; + ret = -EINVAL; + else + ret = sysfs_emit(buf, "%s\n", value + 1); - ret = sysfs_emit(buf, "%s\n", value + 1); kfree(item); + return ret; } From e7d796fccdc8d17c2d21817ebe4c7bf5bbfe5433 Mon Sep 17 00:00:00 2001 From: Mark Pearson Date: Sun, 2 Apr 2023 21:31:19 -0400 Subject: [PATCH 131/173] platform/x86: think-lmi: Fix memory leaks when parsing ThinkStation WMI strings My previous commit introduced a memory leak where the item allocated from tlmi_setting was not freed. This commit also renames it to avoid confusion with the similarly name variable in the same function. Fixes: 8a02d70679fc ("platform/x86: think-lmi: Add possible_values for ThinkStation") Reported-by: Mirsad Todorovac Link: https://lore.kernel.org/lkml/df26ff45-8933-f2b3-25f4-6ee51ccda7d8@gmx.de/T/ Signed-off-by: Mark Pearson Link: https://lore.kernel.org/r/20230403013120.2105-1-mpearson-lenovo@squebb.ca Tested-by: Mario Limonciello Tested-by: Mirsad Goran Todorovac Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/x86/think-lmi.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/platform/x86/think-lmi.c b/drivers/platform/x86/think-lmi.c index 6034df6d577d..87f832142d8d 100644 --- a/drivers/platform/x86/think-lmi.c +++ b/drivers/platform/x86/think-lmi.c @@ -1459,10 +1459,10 @@ static int tlmi_analyze(void) * name string. * Try and pull that out if it's available. */ - char *item, *optstart, *optend; + char *optitem, *optstart, *optend; - if (!tlmi_setting(setting->index, &item, LENOVO_BIOS_SETTING_GUID)) { - optstart = strstr(item, "[Optional:"); + if (!tlmi_setting(setting->index, &optitem, LENOVO_BIOS_SETTING_GUID)) { + optstart = strstr(optitem, "[Optional:"); if (optstart) { optstart += strlen("[Optional:"); optend = strstr(optstart, "]"); @@ -1471,6 +1471,7 @@ static int tlmi_analyze(void) kstrndup(optstart, optend - optstart, GFP_KERNEL); } + kfree(optitem); } } /* From 7065655216d4d034d71164641f3bec0b189ad6fa Mon Sep 17 00:00:00 2001 From: Mark Pearson Date: Sun, 2 Apr 2023 21:31:20 -0400 Subject: [PATCH 132/173] platform/x86: think-lmi: Clean up display of current_value on Thinkstation On ThinkStations on retrieving the attribute value the BIOS appends the possible values to the string. Clean up the display in the current_value_show function so the options part is not displayed. Fixes: a40cd7ef22fb ("platform/x86: think-lmi: Add WMI interface support on Lenovo platforms") Reported by Mario Limoncello Link: https://github.com/fwupd/fwupd/issues/5077#issuecomment-1488730526 Signed-off-by: Mark Pearson Link: https://lore.kernel.org/r/20230403013120.2105-2-mpearson-lenovo@squebb.ca Tested-by: Mario Limonciello Tested-by: Mirsad Goran Todorovac Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/x86/think-lmi.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/platform/x86/think-lmi.c b/drivers/platform/x86/think-lmi.c index 87f832142d8d..78dc82bda4dd 100644 --- a/drivers/platform/x86/think-lmi.c +++ b/drivers/platform/x86/think-lmi.c @@ -920,7 +920,7 @@ static ssize_t display_name_show(struct kobject *kobj, struct kobj_attribute *at static ssize_t current_value_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { struct tlmi_attr_setting *setting = to_tlmi_attr_setting(kobj); - char *item, *value; + char *item, *value, *p; int ret; ret = tlmi_setting(setting->index, &item, LENOVO_BIOS_SETTING_GUID); @@ -931,9 +931,12 @@ static ssize_t current_value_show(struct kobject *kobj, struct kobj_attribute *a value = strpbrk(item, ","); if (!value || value == item || !strlen(value + 1)) ret = -EINVAL; - else + else { + /* On Workstations remove the Options part after the value */ + p = strchrnul(value, ';'); + *p = '\0'; ret = sysfs_emit(buf, "%s\n", value + 1); - + } kfree(item); return ret; From 9a469c6dfab38326f99f105386db84230be09ee3 Mon Sep 17 00:00:00 2001 From: Benjamin Asbach Date: Sat, 1 Apr 2023 01:24:47 +0200 Subject: [PATCH 133/173] platform/x86: thinkpad_acpi: Add missing T14s Gen1 type to s2idle quirk list From the commit message adding the first s2idle quirks: > Lenovo laptops that contain NVME SSDs across a variety of generations have > trouble resuming from suspend to idle when the IOMMU translation layer is > active for the NVME storage device. > > This generally manifests as a large resume delay or page faults. These > delays and page faults occur as a result of a Lenovo BIOS specific SMI > that runs during the D3->D0 transition on NVME devices. Add the DMI ids for another variant of the T14s Gen1, which also needs the s2idle quirk. Link: https://lore.kernel.org/all/20220503183420.348-1-mario.limonciello@amd.com/ Link: https://bbs.archlinux.org/viewtopic.php?pid=2084655#p2084655 Signed-off-by: Benjamin Asbach Tested-by: Benjamin Asbach Link: https://lore.kernel.org/r/20230331232447.37204-1-asbachb.kernel@impl.it Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/x86/thinkpad_acpi.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c index 32c10457399e..7191ff2625b1 100644 --- a/drivers/platform/x86/thinkpad_acpi.c +++ b/drivers/platform/x86/thinkpad_acpi.c @@ -4478,6 +4478,14 @@ static const struct dmi_system_id fwbug_list[] __initconst = { DMI_MATCH(DMI_PRODUCT_NAME, "20UH"), } }, + { + .ident = "T14s Gen1 AMD", + .driver_data = &quirk_s2idle_bug, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "20UJ"), + } + }, { .ident = "P14s Gen1 AMD", .driver_data = &quirk_s2idle_bug, From cf5fa3ca0552f1b7ba8490de40700bbfb6979b17 Mon Sep 17 00:00:00 2001 From: Kalle Valo Date: Wed, 29 Mar 2023 19:20:38 +0300 Subject: [PATCH 134/173] wifi: ath11k: reduce the MHI timeout to 20s Currently ath11k breaks after hibernation, the reason being that ath11k expects that the wireless device will have power during suspend and the firmware will continue running. But of course during hibernation the power from the device is cut off and firmware is not running when resuming, so ath11k will fail. (The reason why ath11k needs the firmware running is the interaction between mac80211 and MHI stack, it's a long story and more info in the bugzilla report.) In SUSE kernels the watchdog timeout is reduced from the default 120 to 60 seconds: CONFIG_DPM_WATCHDOG_TIMEOUT=60 But as the ath11k MHI timeout is 90 seconds the kernel will crash before will ath11k will recover in resume callback. To avoid the crash reduce the MHI timeout to just 20 seconds. Tested-on: WCN6855 hw2.0 PCI WLAN.HSP.1.1-03125-QCAHSPSWPL_V1_V2_SILICONZ_LITE-3.6510.9 Link: https://bugzilla.kernel.org/show_bug.cgi?id=214649 Signed-off-by: Kalle Valo Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20230329162038.8637-1-kvalo@kernel.org --- drivers/net/wireless/ath/ath11k/mhi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath11k/mhi.c b/drivers/net/wireless/ath/ath11k/mhi.c index 86995e8dc913..a62ee05c5409 100644 --- a/drivers/net/wireless/ath/ath11k/mhi.c +++ b/drivers/net/wireless/ath/ath11k/mhi.c @@ -16,7 +16,7 @@ #include "pci.h" #include "pcic.h" -#define MHI_TIMEOUT_DEFAULT_MS 90000 +#define MHI_TIMEOUT_DEFAULT_MS 20000 #define RDDM_DUMP_SIZE 0x420000 static struct mhi_channel_config ath11k_mhi_channels_qca6390[] = { From e6db67fa871dee37d22701daba806bfcd4d9df49 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Thu, 30 Mar 2023 11:12:59 +0200 Subject: [PATCH 135/173] wifi: mt76: ignore key disable commands This helps avoid cleartext leakage of already queued or powersave buffered packets, when a reassoc triggers the key deletion. Cc: stable@vger.kernel.org Signed-off-by: Felix Fietkau Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20230330091259.61378-1-nbd@nbd.name --- .../net/wireless/mediatek/mt76/mt7603/main.c | 10 +-- .../net/wireless/mediatek/mt76/mt7615/mac.c | 70 ++++++------------- .../net/wireless/mediatek/mt76/mt7615/main.c | 15 ++-- .../wireless/mediatek/mt76/mt7615/mt7615.h | 6 +- .../net/wireless/mediatek/mt76/mt76x02_util.c | 18 ++--- .../net/wireless/mediatek/mt76/mt7915/main.c | 13 ++-- .../net/wireless/mediatek/mt76/mt7921/main.c | 13 ++-- .../net/wireless/mediatek/mt76/mt7996/main.c | 13 ++-- 8 files changed, 62 insertions(+), 96 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7603/main.c b/drivers/net/wireless/mediatek/mt76/mt7603/main.c index ca50feb0b3a9..1b1358c6bb46 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7603/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7603/main.c @@ -512,15 +512,15 @@ mt7603_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd, !(key->flags & IEEE80211_KEY_FLAG_PAIRWISE)) return -EOPNOTSUPP; - if (cmd == SET_KEY) { - key->hw_key_idx = wcid->idx; - wcid->hw_key_idx = idx; - } else { + if (cmd != SET_KEY) { if (idx == wcid->hw_key_idx) wcid->hw_key_idx = -1; - key = NULL; + return 0; } + + key->hw_key_idx = wcid->idx; + wcid->hw_key_idx = idx; mt76_wcid_key_setup(&dev->mt76, wcid, key); return mt7603_wtbl_set_key(dev, wcid->idx, key); diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c index a95602473359..51a968a6afdc 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c @@ -1193,8 +1193,7 @@ EXPORT_SYMBOL_GPL(mt7615_mac_enable_rtscts); static int mt7615_mac_wtbl_update_key(struct mt7615_dev *dev, struct mt76_wcid *wcid, struct ieee80211_key_conf *key, - enum mt76_cipher_type cipher, u16 cipher_mask, - enum set_key_cmd cmd) + enum mt76_cipher_type cipher, u16 cipher_mask) { u32 addr = mt7615_mac_wtbl_addr(dev, wcid->idx) + 30 * 4; u8 data[32] = {}; @@ -1203,27 +1202,18 @@ mt7615_mac_wtbl_update_key(struct mt7615_dev *dev, struct mt76_wcid *wcid, return -EINVAL; mt76_rr_copy(dev, addr, data, sizeof(data)); - if (cmd == SET_KEY) { - if (cipher == MT_CIPHER_TKIP) { - /* Rx/Tx MIC keys are swapped */ - memcpy(data, key->key, 16); - memcpy(data + 16, key->key + 24, 8); - memcpy(data + 24, key->key + 16, 8); - } else { - if (cipher_mask == BIT(cipher)) - memcpy(data, key->key, key->keylen); - else if (cipher != MT_CIPHER_BIP_CMAC_128) - memcpy(data, key->key, 16); - if (cipher == MT_CIPHER_BIP_CMAC_128) - memcpy(data + 16, key->key, 16); - } + if (cipher == MT_CIPHER_TKIP) { + /* Rx/Tx MIC keys are swapped */ + memcpy(data, key->key, 16); + memcpy(data + 16, key->key + 24, 8); + memcpy(data + 24, key->key + 16, 8); } else { + if (cipher_mask == BIT(cipher)) + memcpy(data, key->key, key->keylen); + else if (cipher != MT_CIPHER_BIP_CMAC_128) + memcpy(data, key->key, 16); if (cipher == MT_CIPHER_BIP_CMAC_128) - memset(data + 16, 0, 16); - else if (cipher_mask) - memset(data, 0, 16); - if (!cipher_mask) - memset(data, 0, sizeof(data)); + memcpy(data + 16, key->key, 16); } mt76_wr_copy(dev, addr, data, sizeof(data)); @@ -1234,7 +1224,7 @@ mt7615_mac_wtbl_update_key(struct mt7615_dev *dev, struct mt76_wcid *wcid, static int mt7615_mac_wtbl_update_pk(struct mt7615_dev *dev, struct mt76_wcid *wcid, enum mt76_cipher_type cipher, u16 cipher_mask, - int keyidx, enum set_key_cmd cmd) + int keyidx) { u32 addr = mt7615_mac_wtbl_addr(dev, wcid->idx), w0, w1; @@ -1253,9 +1243,7 @@ mt7615_mac_wtbl_update_pk(struct mt7615_dev *dev, struct mt76_wcid *wcid, else w0 &= ~MT_WTBL_W0_RX_IK_VALID; - if (cmd == SET_KEY && - (cipher != MT_CIPHER_BIP_CMAC_128 || - cipher_mask == BIT(cipher))) { + if (cipher != MT_CIPHER_BIP_CMAC_128 || cipher_mask == BIT(cipher)) { w0 &= ~MT_WTBL_W0_KEY_IDX; w0 |= FIELD_PREP(MT_WTBL_W0_KEY_IDX, keyidx); } @@ -1272,19 +1260,10 @@ mt7615_mac_wtbl_update_pk(struct mt7615_dev *dev, struct mt76_wcid *wcid, static void mt7615_mac_wtbl_update_cipher(struct mt7615_dev *dev, struct mt76_wcid *wcid, - enum mt76_cipher_type cipher, u16 cipher_mask, - enum set_key_cmd cmd) + enum mt76_cipher_type cipher, u16 cipher_mask) { u32 addr = mt7615_mac_wtbl_addr(dev, wcid->idx); - if (!cipher_mask) { - mt76_clear(dev, addr + 2 * 4, MT_WTBL_W2_KEY_TYPE); - return; - } - - if (cmd != SET_KEY) - return; - if (cipher == MT_CIPHER_BIP_CMAC_128 && cipher_mask & ~BIT(MT_CIPHER_BIP_CMAC_128)) return; @@ -1295,8 +1274,7 @@ mt7615_mac_wtbl_update_cipher(struct mt7615_dev *dev, struct mt76_wcid *wcid, int __mt7615_mac_wtbl_set_key(struct mt7615_dev *dev, struct mt76_wcid *wcid, - struct ieee80211_key_conf *key, - enum set_key_cmd cmd) + struct ieee80211_key_conf *key) { enum mt76_cipher_type cipher; u16 cipher_mask = wcid->cipher; @@ -1306,19 +1284,14 @@ int __mt7615_mac_wtbl_set_key(struct mt7615_dev *dev, if (cipher == MT_CIPHER_NONE) return -EOPNOTSUPP; - if (cmd == SET_KEY) - cipher_mask |= BIT(cipher); - else - cipher_mask &= ~BIT(cipher); - - mt7615_mac_wtbl_update_cipher(dev, wcid, cipher, cipher_mask, cmd); - err = mt7615_mac_wtbl_update_key(dev, wcid, key, cipher, cipher_mask, - cmd); + cipher_mask |= BIT(cipher); + mt7615_mac_wtbl_update_cipher(dev, wcid, cipher, cipher_mask); + err = mt7615_mac_wtbl_update_key(dev, wcid, key, cipher, cipher_mask); if (err < 0) return err; err = mt7615_mac_wtbl_update_pk(dev, wcid, cipher, cipher_mask, - key->keyidx, cmd); + key->keyidx); if (err < 0) return err; @@ -1329,13 +1302,12 @@ int __mt7615_mac_wtbl_set_key(struct mt7615_dev *dev, int mt7615_mac_wtbl_set_key(struct mt7615_dev *dev, struct mt76_wcid *wcid, - struct ieee80211_key_conf *key, - enum set_key_cmd cmd) + struct ieee80211_key_conf *key) { int err; spin_lock_bh(&dev->mt76.lock); - err = __mt7615_mac_wtbl_set_key(dev, wcid, key, cmd); + err = __mt7615_mac_wtbl_set_key(dev, wcid, key); spin_unlock_bh(&dev->mt76.lock); return err; diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/main.c b/drivers/net/wireless/mediatek/mt76/mt7615/main.c index ab4c1b4478aa..dadb13f2ca09 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7615/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7615/main.c @@ -391,18 +391,17 @@ static int mt7615_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd, if (cmd == SET_KEY) *wcid_keyidx = idx; - else if (idx == *wcid_keyidx) - *wcid_keyidx = -1; - else + else { + if (idx == *wcid_keyidx) + *wcid_keyidx = -1; goto out; + } - mt76_wcid_key_setup(&dev->mt76, wcid, - cmd == SET_KEY ? key : NULL); - + mt76_wcid_key_setup(&dev->mt76, wcid, key); if (mt76_is_mmio(&dev->mt76)) - err = mt7615_mac_wtbl_set_key(dev, wcid, key, cmd); + err = mt7615_mac_wtbl_set_key(dev, wcid, key); else - err = __mt7615_mac_wtbl_set_key(dev, wcid, key, cmd); + err = __mt7615_mac_wtbl_set_key(dev, wcid, key); out: mt7615_mutex_release(dev); diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h b/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h index 43591b4c1d9a..9e58f6924493 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h +++ b/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h @@ -490,11 +490,9 @@ int mt7615_mac_write_txwi(struct mt7615_dev *dev, __le32 *txwi, void mt7615_mac_set_timing(struct mt7615_phy *phy); int __mt7615_mac_wtbl_set_key(struct mt7615_dev *dev, struct mt76_wcid *wcid, - struct ieee80211_key_conf *key, - enum set_key_cmd cmd); + struct ieee80211_key_conf *key); int mt7615_mac_wtbl_set_key(struct mt7615_dev *dev, struct mt76_wcid *wcid, - struct ieee80211_key_conf *key, - enum set_key_cmd cmd); + struct ieee80211_key_conf *key); void mt7615_mac_reset_work(struct work_struct *work); u32 mt7615_mac_get_sta_tid_sn(struct mt7615_dev *dev, int wcid, u8 tid); diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02_util.c b/drivers/net/wireless/mediatek/mt76/mt76x02_util.c index 7451a63206a5..dcbb5c605dfe 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x02_util.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x02_util.c @@ -454,20 +454,20 @@ int mt76x02_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd, msta = sta ? (struct mt76x02_sta *)sta->drv_priv : NULL; wcid = msta ? &msta->wcid : &mvif->group_wcid; - if (cmd == SET_KEY) { - key->hw_key_idx = wcid->idx; - wcid->hw_key_idx = idx; - if (key->flags & IEEE80211_KEY_FLAG_RX_MGMT) { - key->flags |= IEEE80211_KEY_FLAG_SW_MGMT_TX; - wcid->sw_iv = true; - } - } else { + if (cmd != SET_KEY) { if (idx == wcid->hw_key_idx) { wcid->hw_key_idx = -1; wcid->sw_iv = false; } - key = NULL; + return 0; + } + + key->hw_key_idx = wcid->idx; + wcid->hw_key_idx = idx; + if (key->flags & IEEE80211_KEY_FLAG_RX_MGMT) { + key->flags |= IEEE80211_KEY_FLAG_SW_MGMT_TX; + wcid->sw_iv = true; } mt76_wcid_key_setup(&dev->mt76, wcid, key); diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/main.c b/drivers/net/wireless/mediatek/mt76/mt7915/main.c index 3bbccbdfc5eb..784191ec4802 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7915/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7915/main.c @@ -410,16 +410,15 @@ static int mt7915_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd, mt7915_mcu_add_bss_info(phy, vif, true); } - if (cmd == SET_KEY) + if (cmd == SET_KEY) { *wcid_keyidx = idx; - else if (idx == *wcid_keyidx) - *wcid_keyidx = -1; - else + } else { + if (idx == *wcid_keyidx) + *wcid_keyidx = -1; goto out; + } - mt76_wcid_key_setup(&dev->mt76, wcid, - cmd == SET_KEY ? key : NULL); - + mt76_wcid_key_setup(&dev->mt76, wcid, key); err = mt76_connac_mcu_add_key(&dev->mt76, vif, &msta->bip, key, MCU_EXT_CMD(STA_REC_UPDATE), &msta->wcid, cmd); diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/main.c b/drivers/net/wireless/mediatek/mt76/mt7921/main.c index 75eaf86c6a78..42933a6b7334 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7921/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7921/main.c @@ -569,16 +569,15 @@ static int mt7921_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd, mt7921_mutex_acquire(dev); - if (cmd == SET_KEY) + if (cmd == SET_KEY) { *wcid_keyidx = idx; - else if (idx == *wcid_keyidx) - *wcid_keyidx = -1; - else + } else { + if (idx == *wcid_keyidx) + *wcid_keyidx = -1; goto out; + } - mt76_wcid_key_setup(&dev->mt76, wcid, - cmd == SET_KEY ? key : NULL); - + mt76_wcid_key_setup(&dev->mt76, wcid, key); err = mt76_connac_mcu_add_key(&dev->mt76, vif, &msta->bip, key, MCU_UNI_CMD(STA_REC_UPDATE), &msta->wcid, cmd); diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/main.c b/drivers/net/wireless/mediatek/mt76/mt7996/main.c index 3e4da0350d96..1ba22d147949 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/main.c @@ -351,16 +351,15 @@ static int mt7996_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd, mt7996_mcu_add_bss_info(phy, vif, true); } - if (cmd == SET_KEY) + if (cmd == SET_KEY) { *wcid_keyidx = idx; - else if (idx == *wcid_keyidx) - *wcid_keyidx = -1; - else + } else { + if (idx == *wcid_keyidx) + *wcid_keyidx = -1; goto out; + } - mt76_wcid_key_setup(&dev->mt76, wcid, - cmd == SET_KEY ? key : NULL); - + mt76_wcid_key_setup(&dev->mt76, wcid, key); err = mt7996_mcu_add_key(&dev->mt76, vif, &msta->bip, key, MCU_WMWA_UNI_CMD(STA_REC_UPDATE), &msta->wcid, cmd); From 4ccf11c4e8a8e051499d53a12f502196c97a758e Mon Sep 17 00:00:00 2001 From: Tze-nan Wu Date: Tue, 21 Mar 2023 19:04:43 +0800 Subject: [PATCH 136/173] tracing/synthetic: Fix races on freeing last_cmd Currently, the "last_cmd" variable can be accessed by multiple processes asynchronously when multiple users manipulate synthetic_events node at the same time, it could lead to use-after-free or double-free. This patch add "lastcmd_mutex" to prevent "last_cmd" from being accessed asynchronously. ================================================================ It's easy to reproduce in the KASAN environment by running the two scripts below in different shells. script 1: while : do echo -n -e '\x88' > /sys/kernel/tracing/synthetic_events done script 2: while : do echo -n -e '\xb0' > /sys/kernel/tracing/synthetic_events done ================================================================ double-free scenario: process A process B ------------------- --------------- 1.kstrdup last_cmd 2.free last_cmd 3.free last_cmd(double-free) ================================================================ use-after-free scenario: process A process B ------------------- --------------- 1.kstrdup last_cmd 2.free last_cmd 3.tracing_log_err(use-after-free) ================================================================ Appendix 1. KASAN report double-free: BUG: KASAN: double-free in kfree+0xdc/0x1d4 Free of addr ***** by task sh/4879 Call trace: ... kfree+0xdc/0x1d4 create_or_delete_synth_event+0x60/0x1e8 trace_parse_run_command+0x2bc/0x4b8 synth_events_write+0x20/0x30 vfs_write+0x200/0x830 ... Allocated by task 4879: ... kstrdup+0x5c/0x98 create_or_delete_synth_event+0x6c/0x1e8 trace_parse_run_command+0x2bc/0x4b8 synth_events_write+0x20/0x30 vfs_write+0x200/0x830 ... Freed by task 5464: ... kfree+0xdc/0x1d4 create_or_delete_synth_event+0x60/0x1e8 trace_parse_run_command+0x2bc/0x4b8 synth_events_write+0x20/0x30 vfs_write+0x200/0x830 ... ================================================================ Appendix 2. KASAN report use-after-free: BUG: KASAN: use-after-free in strlen+0x5c/0x7c Read of size 1 at addr ***** by task sh/5483 sh: CPU: 7 PID: 5483 Comm: sh ... __asan_report_load1_noabort+0x34/0x44 strlen+0x5c/0x7c tracing_log_err+0x60/0x444 create_or_delete_synth_event+0xc4/0x204 trace_parse_run_command+0x2bc/0x4b8 synth_events_write+0x20/0x30 vfs_write+0x200/0x830 ... Allocated by task 5483: ... kstrdup+0x5c/0x98 create_or_delete_synth_event+0x80/0x204 trace_parse_run_command+0x2bc/0x4b8 synth_events_write+0x20/0x30 vfs_write+0x200/0x830 ... Freed by task 5480: ... kfree+0xdc/0x1d4 create_or_delete_synth_event+0x74/0x204 trace_parse_run_command+0x2bc/0x4b8 synth_events_write+0x20/0x30 vfs_write+0x200/0x830 ... Link: https://lore.kernel.org/linux-trace-kernel/20230321110444.1587-1-Tze-nan.Wu@mediatek.com Fixes: 27c888da9867 ("tracing: Remove size restriction on synthetic event cmd error logging") Cc: stable@vger.kernel.org Cc: Masami Hiramatsu Cc: Matthias Brugger Cc: AngeloGioacchino Del Regno Cc: "Tom Zanussi" Signed-off-by: Tze-nan Wu Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace_events_synth.c | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/kernel/trace/trace_events_synth.c b/kernel/trace/trace_events_synth.c index 46d0abb32d0f..f0ff730125bf 100644 --- a/kernel/trace/trace_events_synth.c +++ b/kernel/trace/trace_events_synth.c @@ -44,14 +44,21 @@ enum { ERRORS }; static const char *err_text[] = { ERRORS }; +DEFINE_MUTEX(lastcmd_mutex); static char *last_cmd; static int errpos(const char *str) { - if (!str || !last_cmd) - return 0; + int ret = 0; - return err_pos(last_cmd, str); + mutex_lock(&lastcmd_mutex); + if (!str || !last_cmd) + goto out; + + ret = err_pos(last_cmd, str); + out: + mutex_unlock(&lastcmd_mutex); + return ret; } static void last_cmd_set(const char *str) @@ -59,18 +66,22 @@ static void last_cmd_set(const char *str) if (!str) return; + mutex_lock(&lastcmd_mutex); kfree(last_cmd); - last_cmd = kstrdup(str, GFP_KERNEL); + mutex_unlock(&lastcmd_mutex); } static void synth_err(u8 err_type, u16 err_pos) { + mutex_lock(&lastcmd_mutex); if (!last_cmd) - return; + goto out; tracing_log_err(NULL, "synthetic_events", last_cmd, err_text, err_type, err_pos); + out: + mutex_unlock(&lastcmd_mutex); } static int create_synth_event(const char *raw_command); From 6455b6163d8c680366663cdb8c679514d55fc30c Mon Sep 17 00:00:00 2001 From: Zheng Yejian Date: Sat, 25 Mar 2023 10:12:47 +0800 Subject: [PATCH 137/173] ring-buffer: Fix race while reader and writer are on the same page When user reads file 'trace_pipe', kernel keeps printing following logs that warn at "cpu_buffer->reader_page->read > rb_page_size(reader)" in rb_get_reader_page(). It just looks like there's an infinite loop in tracing_read_pipe(). This problem occurs several times on arm64 platform when testing v5.10 and below. Call trace: rb_get_reader_page+0x248/0x1300 rb_buffer_peek+0x34/0x160 ring_buffer_peek+0xbc/0x224 peek_next_entry+0x98/0xbc __find_next_entry+0xc4/0x1c0 trace_find_next_entry_inc+0x30/0x94 tracing_read_pipe+0x198/0x304 vfs_read+0xb4/0x1e0 ksys_read+0x74/0x100 __arm64_sys_read+0x24/0x30 el0_svc_common.constprop.0+0x7c/0x1bc do_el0_svc+0x2c/0x94 el0_svc+0x20/0x30 el0_sync_handler+0xb0/0xb4 el0_sync+0x160/0x180 Then I dump the vmcore and look into the problematic per_cpu ring_buffer, I found that tail_page/commit_page/reader_page are on the same page while reader_page->read is obviously abnormal: tail_page == commit_page == reader_page == { .write = 0x100d20, .read = 0x8f9f4805, // Far greater than 0xd20, obviously abnormal!!! .entries = 0x10004c, .real_end = 0x0, .page = { .time_stamp = 0x857257416af0, .commit = 0xd20, // This page hasn't been full filled. // .data[0...0xd20] seems normal. } } The root cause is most likely the race that reader and writer are on the same page while reader saw an event that not fully committed by writer. To fix this, add memory barriers to make sure the reader can see the content of what is committed. Since commit a0fcaaed0c46 ("ring-buffer: Fix race between reset page and reading page") has added the read barrier in rb_get_reader_page(), here we just need to add the write barrier. Link: https://lore.kernel.org/linux-trace-kernel/20230325021247.2923907-1-zhengyejian1@huawei.com Cc: stable@vger.kernel.org Fixes: 77ae365eca89 ("ring-buffer: make lockless") Suggested-by: Steven Rostedt (Google) Signed-off-by: Zheng Yejian Signed-off-by: Steven Rostedt (Google) --- kernel/trace/ring_buffer.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index c6f47b6cfd5f..76a2d91eecad 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -3098,6 +3098,10 @@ rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer) if (RB_WARN_ON(cpu_buffer, rb_is_reader_page(cpu_buffer->tail_page))) return; + /* + * No need for a memory barrier here, as the update + * of the tail_page did it for this page. + */ local_set(&cpu_buffer->commit_page->page->commit, rb_page_write(cpu_buffer->commit_page)); rb_inc_page(&cpu_buffer->commit_page); @@ -3107,6 +3111,8 @@ rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer) while (rb_commit_index(cpu_buffer) != rb_page_write(cpu_buffer->commit_page)) { + /* Make sure the readers see the content of what is committed. */ + smp_wmb(); local_set(&cpu_buffer->commit_page->page->commit, rb_page_write(cpu_buffer->commit_page)); RB_WARN_ON(cpu_buffer, @@ -4684,7 +4690,12 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer) /* * Make sure we see any padding after the write update - * (see rb_reset_tail()) + * (see rb_reset_tail()). + * + * In addition, a writer may be writing on the reader page + * if the page has not been fully filled, so the read barrier + * is also needed to make sure we see the content of what is + * committed by the writer (see rb_set_commit_to_write()). */ smp_rmb(); From ea65b41807a26495ff2a73dd8b1bab2751940887 Mon Sep 17 00:00:00 2001 From: John Keeping Date: Mon, 27 Mar 2023 18:36:46 +0100 Subject: [PATCH 138/173] ftrace: Mark get_lock_parent_ip() __always_inline If the compiler decides not to inline this function then preemption tracing will always show an IP inside the preemption disabling path and never the function actually calling preempt_{enable,disable}. Link: https://lore.kernel.org/linux-trace-kernel/20230327173647.1690849-1-john@metanate.com Cc: Masami Hiramatsu Cc: Mark Rutland Cc: stable@vger.kernel.org Fixes: f904f58263e1d ("sched/debug: Fix preempt_disable_ip recording for preempt_disable()") Signed-off-by: John Keeping Signed-off-by: Steven Rostedt (Google) --- include/linux/ftrace.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 366c730beaa3..402fc061de75 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -980,7 +980,7 @@ static inline void __ftrace_enabled_restore(int enabled) #define CALLER_ADDR5 ((unsigned long)ftrace_return_address(5)) #define CALLER_ADDR6 ((unsigned long)ftrace_return_address(6)) -static inline unsigned long get_lock_parent_ip(void) +static __always_inline unsigned long get_lock_parent_ip(void) { unsigned long addr = CALLER_ADDR0; From b9f451a9029a16eb7913ace09b92493d00f2e564 Mon Sep 17 00:00:00 2001 From: Daniel Bristot de Oliveira Date: Wed, 29 Mar 2023 17:50:15 +0200 Subject: [PATCH 139/173] tracing/timerlat: Notify new max thread latency timerlat is not reporting a new tracing_max_latency for the thread latency. The reason is that it is not calling notify_new_max_latency() function after the new thread latency is sampled. Call notify_new_max_latency() after computing the thread latency. Link: https://lkml.kernel.org/r/16e18d61d69073d0192ace07bf61e405cca96e9c.1680104184.git.bristot@kernel.org Cc: stable@vger.kernel.org Fixes: dae181349f1e ("tracing/osnoise: Support a list of trace_array *tr") Signed-off-by: Daniel Bristot de Oliveira Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace_osnoise.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/trace/trace_osnoise.c b/kernel/trace/trace_osnoise.c index 9176bb7a9bb4..e8116094bed8 100644 --- a/kernel/trace/trace_osnoise.c +++ b/kernel/trace/trace_osnoise.c @@ -1738,6 +1738,8 @@ static int timerlat_main(void *data) trace_timerlat_sample(&s); + notify_new_max_latency(diff); + timerlat_dump_stack(time_to_us(diff)); tlat->tracing_thread = false; From d3cba7f02cd82118c32651c73374d8a5a459d9a6 Mon Sep 17 00:00:00 2001 From: Daniel Bristot de Oliveira Date: Wed, 29 Mar 2023 17:50:16 +0200 Subject: [PATCH 140/173] tracing/osnoise: Fix notify new tracing_max_latency osnoise/timerlat tracers are reporting new max latency on instances where the tracing is off, creating inconsistencies between the max reported values in the trace and in the tracing_max_latency. Thus only report new tracing_max_latency on active tracing instances. Link: https://lkml.kernel.org/r/ecd109fde4a0c24ab0f00ba1e9a144ac19a91322.1680104184.git.bristot@kernel.org Cc: stable@vger.kernel.org Fixes: dae181349f1e ("tracing/osnoise: Support a list of trace_array *tr") Signed-off-by: Daniel Bristot de Oliveira Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace_osnoise.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/trace_osnoise.c b/kernel/trace/trace_osnoise.c index e8116094bed8..4496975f2029 100644 --- a/kernel/trace/trace_osnoise.c +++ b/kernel/trace/trace_osnoise.c @@ -1296,7 +1296,7 @@ static void notify_new_max_latency(u64 latency) rcu_read_lock(); list_for_each_entry_rcu(inst, &osnoise_instances, list) { tr = inst->tr; - if (tr->max_latency < latency) { + if (tracer_tracing_is_on(tr) && tr->max_latency < latency) { tr->max_latency = latency; latency_fsnotify(tr); } From f82e7ca019dfad3b006fd3b772f7ac569672db55 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Thu, 9 Mar 2023 22:13:02 -0500 Subject: [PATCH 141/173] tracing: Error if a trace event has an array for a __field() A __field() in the TRACE_EVENT() macro is used to set up the fields of the trace event data. It is for single storage units (word, char, int, pointer, etc) and not for complex structures or arrays. Unfortunately, there's nothing preventing the build from accepting: __field(int, arr[5]); from building. It will turn into a array value. This use to work fine, as the offset and size use to be determined by the macro using the field name, but things have changed and the offset and size are now determined by the type. So the above would only be size 4, and the next field will be located 4 bytes from it (instead of 20). The proper way to declare static arrays is to use the __array() macro. Instead of __field(int, arr[5]) it should be __array(int, arr, 5). Add some macro tricks to the building of a trace event from the TRACE_EVENT() macro such that __field(int, arr[5]) will fail to build. A comment by the failure will explain why the build failed. Link: https://lore.kernel.org/lkml/20230306122549.236561-1-douglas.raillard@arm.com/ Link: https://lore.kernel.org/linux-trace-kernel/20230309221302.642e82d9@gandalf.local.home Reported-by: Douglas RAILLARD Signed-off-by: Steven Rostedt (Google) Acked-by: Masami Hiramatsu (Google) --- include/trace/stages/stage5_get_offsets.h | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/include/trace/stages/stage5_get_offsets.h b/include/trace/stages/stage5_get_offsets.h index ac5c24d3beeb..e30a13be46ba 100644 --- a/include/trace/stages/stage5_get_offsets.h +++ b/include/trace/stages/stage5_get_offsets.h @@ -9,17 +9,30 @@ #undef __entry #define __entry entry +/* + * Fields should never declare an array: i.e. __field(int, arr[5]) + * If they do, it will cause issues in parsing and possibly corrupt the + * events. To prevent that from happening, test the sizeof() a fictitious + * type called "struct _test_no_array_##item" which will fail if "item" + * contains array elements (like "arr[5]"). + * + * If you hit this, use __array(int, arr, 5) instead. + */ #undef __field -#define __field(type, item) +#define __field(type, item) \ + { (void)sizeof(struct _test_no_array_##item *); } #undef __field_ext -#define __field_ext(type, item, filter_type) +#define __field_ext(type, item, filter_type) \ + { (void)sizeof(struct _test_no_array_##item *); } #undef __field_struct -#define __field_struct(type, item) +#define __field_struct(type, item) \ + { (void)sizeof(struct _test_no_array_##item *); } #undef __field_struct_ext -#define __field_struct_ext(type, item, filter_type) +#define __field_struct_ext(type, item, filter_type) \ + { (void)sizeof(struct _test_no_array_##item *); } #undef __array #define __array(type, item, len) From c6b486fb33680ad5a3a6390ce693c835caaae3f7 Mon Sep 17 00:00:00 2001 From: Siddharth Vadapalli Date: Mon, 3 Apr 2023 14:33:21 +0530 Subject: [PATCH 142/173] net: ethernet: ti: am65-cpsw: Fix mdio cleanup in probe In the am65_cpsw_nuss_probe() function's cleanup path, the call to of_platform_device_destroy() for the common->mdio_dev device is invoked unconditionally. It is possible that either the MDIO node is not present in the device-tree, or the MDIO node is disabled in the device-tree. In both these cases, the MDIO device is not created, resulting in a NULL pointer dereference when the of_platform_device_destroy() function is invoked on the common->mdio_dev device on the cleanup path. Fix this by ensuring that the common->mdio_dev device exists, before attempting to invoke of_platform_device_destroy(). Fixes: a45cfcc69a25 ("net: ethernet: ti: am65-cpsw-nuss: use of_platform_device_create() for mdio") Signed-off-by: Siddharth Vadapalli Reviewed-by: Roger Quadros Link: https://lore.kernel.org/r/20230403090321.835877-1-s-vadapalli@ti.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/ti/am65-cpsw-nuss.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c index 4e3861c47708..bcea87b7151c 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c +++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c @@ -2926,7 +2926,8 @@ err_free_phylink: am65_cpsw_nuss_phylink_cleanup(common); am65_cpts_release(common->cpts); err_of_clear: - of_platform_device_destroy(common->mdio_dev, NULL); + if (common->mdio_dev) + of_platform_device_destroy(common->mdio_dev, NULL); err_pm_clear: pm_runtime_put_sync(dev); pm_runtime_disable(dev); @@ -2956,7 +2957,8 @@ static int am65_cpsw_nuss_remove(struct platform_device *pdev) am65_cpts_release(common->cpts); am65_cpsw_disable_serdes_phy(common); - of_platform_device_destroy(common->mdio_dev, NULL); + if (common->mdio_dev) + of_platform_device_destroy(common->mdio_dev, NULL); pm_runtime_put_sync(&pdev->dev); pm_runtime_disable(&pdev->dev); From e4395701330fc4aee530905039516fe770b81417 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Fri, 31 Mar 2023 12:32:24 -0300 Subject: [PATCH 143/173] iommufd: Check for uptr overflow syzkaller found that setting up a map with a user VA that wraps past zero can trigger WARN_ONs, particularly from pin_user_pages weirdly returning 0 due to invalid arguments. Prevent creating a pages with a uptr and size that would math overflow. WARNING: CPU: 0 PID: 518 at drivers/iommu/iommufd/pages.c:793 pfn_reader_user_pin+0x2e6/0x390 Modules linked in: CPU: 0 PID: 518 Comm: repro Not tainted 6.3.0-rc2-eeac8ede1755+ #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014 RIP: 0010:pfn_reader_user_pin+0x2e6/0x390 Code: b1 11 e9 25 fe ff ff e8 28 e4 0f ff 31 ff 48 89 de e8 2e e6 0f ff 48 85 db 74 0a e8 14 e4 0f ff e9 4d ff ff ff e8 0a e4 0f ff <0f> 0b bb f2 ff ff ff e9 3c ff ff ff e8 f9 e3 0f ff ba 01 00 00 00 RSP: 0018:ffffc90000f9fa30 EFLAGS: 00010246 RAX: 0000000000000000 RBX: 0000000000000000 RCX: ffffffff821e2b72 RDX: 0000000000000000 RSI: ffff888014184680 RDI: 0000000000000002 RBP: ffffc90000f9fa78 R08: 00000000000000ff R09: 0000000079de6f4e R10: ffffc90000f9f790 R11: ffff888014185418 R12: ffffc90000f9fc60 R13: 0000000000000002 R14: ffff888007879800 R15: 0000000000000000 FS: 00007f4227555740(0000) GS:ffff88807dc00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000020000043 CR3: 000000000e748005 CR4: 0000000000770ef0 PKRU: 55555554 Call Trace: pfn_reader_next+0x14a/0x7b0 ? interval_tree_double_span_iter_update+0x11a/0x140 pfn_reader_first+0x140/0x1b0 iopt_pages_rw_slow+0x71/0x280 ? __this_cpu_preempt_check+0x20/0x30 iopt_pages_rw_access+0x2b2/0x5b0 iommufd_access_rw+0x19f/0x2f0 iommufd_test+0xd11/0x16f0 ? write_comp_data+0x2f/0x90 iommufd_fops_ioctl+0x206/0x330 __x64_sys_ioctl+0x10e/0x160 ? __pfx_iommufd_fops_ioctl+0x10/0x10 do_syscall_64+0x3b/0x90 entry_SYSCALL_64_after_hwframe+0x72/0xdc Cc: Fixes: 8d160cd4d506 ("iommufd: Algorithms for PFN storage") Link: https://lore.kernel.org/r/1-v1-ceab6a4d7d7a+94-iommufd_syz_jgg@nvidia.com Reviewed-by: Kevin Tian Reported-by: Pengfei Xu Tested-by: Pengfei Xu Signed-off-by: Jason Gunthorpe --- drivers/iommu/iommufd/pages.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/iommu/iommufd/pages.c b/drivers/iommu/iommufd/pages.c index f8d92c9bb65b..400ec7c91ed7 100644 --- a/drivers/iommu/iommufd/pages.c +++ b/drivers/iommu/iommufd/pages.c @@ -1142,6 +1142,7 @@ struct iopt_pages *iopt_alloc_pages(void __user *uptr, unsigned long length, bool writable) { struct iopt_pages *pages; + unsigned long end; /* * The iommu API uses size_t as the length, and protect the DIV_ROUND_UP @@ -1150,6 +1151,9 @@ struct iopt_pages *iopt_alloc_pages(void __user *uptr, unsigned long length, if (length > SIZE_MAX - PAGE_SIZE || length == 0) return ERR_PTR(-EINVAL); + if (check_add_overflow((unsigned long)uptr, length, &end)) + return ERR_PTR(-EOVERFLOW); + pages = kzalloc(sizeof(*pages), GFP_KERNEL_ACCOUNT); if (!pages) return ERR_PTR(-ENOMEM); From 727c28c1cef2bc013d2c8bb6c50e410a3882a04e Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Fri, 31 Mar 2023 12:32:25 -0300 Subject: [PATCH 144/173] iommufd: Fix unpinning of pages when an access is present syzkaller found that the calculation of batch_last_index should use 'start_index' since at input to this function the batch is either empty or it has already been adjusted to cross any accesses so it will start at the point we are unmapping from. Getting this wrong causes the unmap to run over the end of the pages which corrupts pages that were never mapped. In most cases this triggers the num pinned debugging: WARNING: CPU: 0 PID: 557 at drivers/iommu/iommufd/pages.c:294 __iopt_area_unfill_domain+0x152/0x560 Modules linked in: CPU: 0 PID: 557 Comm: repro Not tainted 6.3.0-rc2-eeac8ede1755 #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014 RIP: 0010:__iopt_area_unfill_domain+0x152/0x560 Code: d2 0f ff 44 8b 64 24 54 48 8b 44 24 48 31 ff 44 89 e6 48 89 44 24 38 e8 fc d3 0f ff 45 85 e4 0f 85 eb 01 00 00 e8 0e d2 0f ff <0f> 0b e8 07 d2 0f ff 48 8b 44 24 38 89 5c 24 58 89 18 8b 44 24 54 RSP: 0018:ffffc9000108baf0 EFLAGS: 00010246 RAX: 0000000000000000 RBX: 00000000ffffffff RCX: ffffffff821e3f85 RDX: 0000000000000000 RSI: ffff88800faf0000 RDI: 0000000000000002 RBP: ffffc9000108bd18 R08: 000000000003ca25 R09: 0000000000000014 R10: 000000000003ca00 R11: 0000000000000024 R12: 0000000000000004 R13: 0000000000000801 R14: 00000000000007ff R15: 0000000000000800 FS: 00007f3499ce1740(0000) GS:ffff88807dc00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000020000243 CR3: 00000000179c2001 CR4: 0000000000770ef0 PKRU: 55555554 Call Trace: iopt_area_unfill_domain+0x32/0x40 iopt_table_remove_domain+0x23f/0x4c0 iommufd_device_selftest_detach+0x3a/0x90 iommufd_selftest_destroy+0x55/0x70 iommufd_object_destroy_user+0xce/0x130 iommufd_destroy+0xa2/0xc0 iommufd_fops_ioctl+0x206/0x330 __x64_sys_ioctl+0x10e/0x160 do_syscall_64+0x3b/0x90 entry_SYSCALL_64_after_hwframe+0x72/0xdc Also add some useful WARN_ON sanity checks. Cc: Fixes: 8d160cd4d506 ("iommufd: Algorithms for PFN storage") Link: https://lore.kernel.org/r/2-v1-ceab6a4d7d7a+94-iommufd_syz_jgg@nvidia.com Reviewed-by: Kevin Tian Reported-by: Pengfei Xu Tested-by: Pengfei Xu Signed-off-by: Jason Gunthorpe --- drivers/iommu/iommufd/pages.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/iommufd/pages.c b/drivers/iommu/iommufd/pages.c index 400ec7c91ed7..b11aace83654 100644 --- a/drivers/iommu/iommufd/pages.c +++ b/drivers/iommu/iommufd/pages.c @@ -1207,13 +1207,21 @@ iopt_area_unpin_domain(struct pfn_batch *batch, struct iopt_area *area, unsigned long start = max(start_index, *unmapped_end_index); + if (IS_ENABLED(CONFIG_IOMMUFD_TEST) && + batch->total_pfns) + WARN_ON(*unmapped_end_index - + batch->total_pfns != + start_index); batch_from_domain(batch, domain, area, start, last_index); - batch_last_index = start + batch->total_pfns - 1; + batch_last_index = start_index + batch->total_pfns - 1; } else { batch_last_index = last_index; } + if (IS_ENABLED(CONFIG_IOMMUFD_TEST)) + WARN_ON(batch_last_index > real_last_index); + /* * unmaps must always 'cut' at a place where the pfns are not * contiguous to pair with the maps that always install From 13a0d1ae7ee6b438f5537711a8c60cba00554943 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Fri, 31 Mar 2023 12:32:26 -0300 Subject: [PATCH 145/173] iommufd: Do not corrupt the pfn list when doing batch carry If batch->end is 0 then setting npfns[0] before computing the new value of pfns will fail to adjust the pfn and result in various page accounting corruptions. It should be ordered after. This seems to result in various kinds of page meta-data corruption related failures: WARNING: CPU: 1 PID: 527 at mm/gup.c:75 try_grab_folio+0x503/0x740 Modules linked in: CPU: 1 PID: 527 Comm: repro Not tainted 6.3.0-rc2-eeac8ede1755+ #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014 RIP: 0010:try_grab_folio+0x503/0x740 Code: e3 01 48 89 de e8 6d c1 dd ff 48 85 db 0f 84 7c fe ff ff e8 4f bf dd ff 49 8d 47 ff 48 89 45 d0 e9 73 fe ff ff e8 3d bf dd ff <0f> 0b 31 db e9 d0 fc ff ff e8 2f bf dd ff 48 8b 5d c8 31 ff 48 89 RSP: 0018:ffffc90000f37908 EFLAGS: 00010046 RAX: 0000000000000000 RBX: 00000000fffffc02 RCX: ffffffff81504c26 RDX: 0000000000000000 RSI: ffff88800d030000 RDI: 0000000000000002 RBP: ffffc90000f37948 R08: 000000000003ca24 R09: 0000000000000008 R10: 000000000003ca00 R11: 0000000000000023 R12: ffffea000035d540 R13: 0000000000000001 R14: 0000000000000000 R15: ffffea000035d540 FS: 00007fecbf659740(0000) GS:ffff88807dd00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00000000200011c3 CR3: 000000000ef66006 CR4: 0000000000770ee0 PKRU: 55555554 Call Trace: internal_get_user_pages_fast+0xd32/0x2200 pin_user_pages_fast+0x65/0x90 pfn_reader_user_pin+0x376/0x390 pfn_reader_next+0x14a/0x7b0 pfn_reader_first+0x140/0x1b0 iopt_area_fill_domain+0x74/0x210 iopt_table_add_domain+0x30e/0x6e0 iommufd_device_selftest_attach+0x7f/0x140 iommufd_test+0x10ff/0x16f0 iommufd_fops_ioctl+0x206/0x330 __x64_sys_ioctl+0x10e/0x160 do_syscall_64+0x3b/0x90 entry_SYSCALL_64_after_hwframe+0x72/0xdc Cc: Fixes: f394576eb11d ("iommufd: PFN handling for iopt_pages") Link: https://lore.kernel.org/r/3-v1-ceab6a4d7d7a+94-iommufd_syz_jgg@nvidia.com Reviewed-by: Kevin Tian Reported-by: Pengfei Xu Tested-by: Pengfei Xu Signed-off-by: Jason Gunthorpe --- drivers/iommu/iommufd/pages.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/iommufd/pages.c b/drivers/iommu/iommufd/pages.c index b11aace83654..3c47846cc5ef 100644 --- a/drivers/iommu/iommufd/pages.c +++ b/drivers/iommu/iommufd/pages.c @@ -294,9 +294,9 @@ static void batch_clear_carry(struct pfn_batch *batch, unsigned int keep_pfns) batch->npfns[batch->end - 1] < keep_pfns); batch->total_pfns = keep_pfns; - batch->npfns[0] = keep_pfns; batch->pfns[0] = batch->pfns[batch->end - 1] + (batch->npfns[batch->end - 1] - keep_pfns); + batch->npfns[0] = keep_pfns; batch->end = 0; } From 218c597325f4faf7b7a6049233a30d7842b5b2dc Mon Sep 17 00:00:00 2001 From: Corinna Vinschen Date: Mon, 3 Apr 2023 14:11:20 +0200 Subject: [PATCH 146/173] net: stmmac: fix up RX flow hash indirection table when setting channels stmmac_reinit_queues() fails to fix up the RX hash. Even if the number of channels gets restricted, the output of `ethtool -x' indicates that all RX queues are used: $ ethtool -l enp0s29f2 Channel parameters for enp0s29f2: Pre-set maximums: RX: 8 TX: 8 Other: n/a Combined: n/a Current hardware settings: RX: 8 TX: 8 Other: n/a Combined: n/a $ ethtool -x enp0s29f2 RX flow hash indirection table for enp0s29f2 with 8 RX ring(s): 0: 0 1 2 3 4 5 6 7 8: 0 1 2 3 4 5 6 7 [...] $ ethtool -L enp0s29f2 rx 3 $ ethtool -x enp0s29f2 RX flow hash indirection table for enp0s29f2 with 3 RX ring(s): 0: 0 1 2 3 4 5 6 7 8: 0 1 2 3 4 5 6 7 [...] Fix this by setting the indirection table according to the number of specified queues. The result is now as expected: $ ethtool -L enp0s29f2 rx 3 $ ethtool -x enp0s29f2 RX flow hash indirection table for enp0s29f2 with 3 RX ring(s): 0: 0 1 2 0 1 2 0 1 8: 2 0 1 2 0 1 2 0 [...] Tested on Intel Elkhart Lake. Fixes: 0366f7e06a6b ("net: stmmac: add ethtool support for get/set channels") Signed-off-by: Corinna Vinschen Link: https://lore.kernel.org/r/20230403121120.489138-1-vinschen@redhat.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index d41a5f92aee7..59cbf3597eb4 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -6950,7 +6950,7 @@ static void stmmac_napi_del(struct net_device *dev) int stmmac_reinit_queues(struct net_device *dev, u32 rx_cnt, u32 tx_cnt) { struct stmmac_priv *priv = netdev_priv(dev); - int ret = 0; + int ret = 0, i; if (netif_running(dev)) stmmac_release(dev); @@ -6959,6 +6959,10 @@ int stmmac_reinit_queues(struct net_device *dev, u32 rx_cnt, u32 tx_cnt) priv->plat->rx_queues_to_use = rx_cnt; priv->plat->tx_queues_to_use = tx_cnt; + if (!netif_is_rxfh_configured(dev)) + for (i = 0; i < ARRAY_SIZE(priv->rss.table); i++) + priv->rss.table[i] = ethtool_rxfh_indir_default(i, + rx_cnt); stmmac_napi_add(dev); From 5085e41f9e83a1bec51da1f20b54f2ec3a13a3fe Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 30 Mar 2023 14:24:27 -0400 Subject: [PATCH 147/173] sunrpc: only free unix grouplist after RCU settles While the unix_gid object is rcu-freed, the group_info list that it contains is not. Ensure that we only put the group list reference once we are really freeing the unix_gid object. Reported-by: Zhi Li Link: https://bugzilla.redhat.com/show_bug.cgi?id=2183056 Signed-off-by: Jeff Layton Fixes: fd5d2f78261b ("SUNRPC: Make server side AUTH_UNIX use lockless lookups") Signed-off-by: Chuck Lever --- net/sunrpc/svcauth_unix.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c index 983c5891cb56..4246363cb095 100644 --- a/net/sunrpc/svcauth_unix.c +++ b/net/sunrpc/svcauth_unix.c @@ -416,14 +416,23 @@ static int unix_gid_hash(kuid_t uid) return hash_long(from_kuid(&init_user_ns, uid), GID_HASHBITS); } +static void unix_gid_free(struct rcu_head *rcu) +{ + struct unix_gid *ug = container_of(rcu, struct unix_gid, rcu); + struct cache_head *item = &ug->h; + + if (test_bit(CACHE_VALID, &item->flags) && + !test_bit(CACHE_NEGATIVE, &item->flags)) + put_group_info(ug->gi); + kfree(ug); +} + static void unix_gid_put(struct kref *kref) { struct cache_head *item = container_of(kref, struct cache_head, ref); struct unix_gid *ug = container_of(item, struct unix_gid, h); - if (test_bit(CACHE_VALID, &item->flags) && - !test_bit(CACHE_NEGATIVE, &item->flags)) - put_group_info(ug->gi); - kfree_rcu(ug, rcu); + + call_rcu(&ug->rcu, unix_gid_free); } static int unix_gid_match(struct cache_head *corig, struct cache_head *cnew) From 8be8f170e8383fd1421e8b87950e90d7dd45be07 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 28 Mar 2023 13:47:58 -0400 Subject: [PATCH 148/173] NFS: Remove "select RPCSEC_GSS_KRB5 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If CONFIG_CRYPTO=n (e.g. arm/shmobile_defconfig): WARNING: unmet direct dependencies detected for RPCSEC_GSS_KRB5 Depends on [n]: NETWORK_FILESYSTEMS [=y] && SUNRPC [=y] && CRYPTO [=n] Selected by [y]: - NFS_V4 [=y] && NETWORK_FILESYSTEMS [=y] && NFS_FS [=y] As NFSv4 can work without crypto enabled, remove the RPCSEC_GSS_KRB5 dependency altogether. Trond says: > It is possible to use the NFSv4.1 client with just AUTH_SYS, and > in fact there are plenty of people out there using only that. The > fact that RFC5661 gets its knickers in a twist about RPCSEC_GSS > support is largely irrelevant to those people. > > The other issue is that ’select’ enforces the strict dependency > that if the NFS client is compiled into the kernel, then the > RPCSEC_GSS and kerberos code needs to be compiled in as well: they > cannot exist as modules. Fixes: e57d06527738 ("NFS & NFSD: Update GSS dependencies") Reported-by: kernel test robot Reported-by: Niklas Söderlund Suggested-by: Trond Myklebust Signed-off-by: Chuck Lever --- fs/nfs/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig index 450d6c3bc05e..c1c7ed2fd860 100644 --- a/fs/nfs/Kconfig +++ b/fs/nfs/Kconfig @@ -75,7 +75,6 @@ config NFS_V3_ACL config NFS_V4 tristate "NFS client support for NFS version 4" depends on NFS_FS - select RPCSEC_GSS_KRB5 select KEYS help This option enables support for version 4 of the NFS protocol From 7de82c2f36fb26aa78440bbf0efcf360b691d98b Mon Sep 17 00:00:00 2001 From: Dai Ngo Date: Sat, 1 Apr 2023 13:22:08 -0700 Subject: [PATCH 149/173] NFSD: callback request does not use correct credential for AUTH_SYS Currently callback request does not use the credential specified in CREATE_SESSION if the security flavor for the back channel is AUTH_SYS. Problem was discovered by pynfs 4.1 DELEG5 and DELEG7 test with error: DELEG5 st_delegation.testCBSecParms : FAILURE expected callback with uid, gid == 17, 19, got 0, 0 Signed-off-by: Dai Ngo Reviewed-by: Jeff Layton Fixes: 8276c902bbe9 ("SUNRPC: remove uid and gid from struct auth_cred") Signed-off-by: Chuck Lever --- fs/nfsd/nfs4callback.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 2a815f5a52c4..4039ffcf90ba 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -946,8 +946,8 @@ static const struct cred *get_backchannel_cred(struct nfs4_client *clp, struct r if (!kcred) return NULL; - kcred->uid = ses->se_cb_sec.uid; - kcred->gid = ses->se_cb_sec.gid; + kcred->fsuid = ses->se_cb_sec.uid; + kcred->fsgid = ses->se_cb_sec.gid; return kcred; } } From b4a01ace20f5c93c724abffc0a83ec84f514b98d Mon Sep 17 00:00:00 2001 From: Simei Su Date: Wed, 22 Mar 2023 10:24:15 +0800 Subject: [PATCH 150/173] ice: fix wrong fallback logic for FDIR When adding a FDIR filter, if ice_vc_fdir_set_irq_ctx returns failure, the inserted fdir entry will not be removed and if ice_vc_fdir_write_fltr returns failure, the fdir context info for irq handler will not be cleared which may lead to inconsistent or memory leak issue. This patch refines failure cases to resolve this issue. Fixes: 1f7ea1cd6a37 ("ice: Enable FDIR Configure for AVF") Signed-off-by: Simei Su Tested-by: Rafal Romanowski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c index 5fd75e75772e..4d007d8c2540 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c @@ -1871,7 +1871,7 @@ int ice_vc_add_fdir_fltr(struct ice_vf *vf, u8 *msg) v_ret = VIRTCHNL_STATUS_SUCCESS; stat->status = VIRTCHNL_FDIR_FAILURE_RULE_NORESOURCE; dev_dbg(dev, "VF %d: set FDIR context failed\n", vf->vf_id); - goto err_free_conf; + goto err_rem_entry; } ret = ice_vc_fdir_write_fltr(vf, conf, true, is_tun); @@ -1880,15 +1880,16 @@ int ice_vc_add_fdir_fltr(struct ice_vf *vf, u8 *msg) stat->status = VIRTCHNL_FDIR_FAILURE_RULE_NORESOURCE; dev_err(dev, "VF %d: writing FDIR rule failed, ret:%d\n", vf->vf_id, ret); - goto err_rem_entry; + goto err_clr_irq; } exit: kfree(stat); return ret; -err_rem_entry: +err_clr_irq: ice_vc_fdir_clear_irq_ctx(vf); +err_rem_entry: ice_vc_fdir_remove_entry(vf, conf, conf->flow_id); err_free_conf: devm_kfree(dev, conf); From 83c911dc5e0e8e6eaa6431c06972a8f159bfe2fc Mon Sep 17 00:00:00 2001 From: Lingyu Liu Date: Tue, 28 Mar 2023 10:49:11 +0000 Subject: [PATCH 151/173] ice: Reset FDIR counter in FDIR init stage Reset the FDIR counters when FDIR inits. Without this patch, when VF initializes or resets, all the FDIR counters are not cleaned, which may cause unexpected behaviors for future FDIR rule create (e.g., rule conflict). Fixes: 1f7ea1cd6a37 ("ice: Enable FDIR Configure for AVF") Signed-off-by: Junfeng Guo Signed-off-by: Lingyu Liu Tested-by: Rafal Romanowski Signed-off-by: Tony Nguyen --- .../net/ethernet/intel/ice/ice_virtchnl_fdir.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c index 4d007d8c2540..daa6a1e894cf 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c @@ -541,6 +541,21 @@ static void ice_vc_fdir_rem_prof_all(struct ice_vf *vf) } } +/** + * ice_vc_fdir_reset_cnt_all - reset all FDIR counters for this VF FDIR + * @fdir: pointer to the VF FDIR structure + */ +static void ice_vc_fdir_reset_cnt_all(struct ice_vf_fdir *fdir) +{ + enum ice_fltr_ptype flow; + + for (flow = ICE_FLTR_PTYPE_NONF_NONE; + flow < ICE_FLTR_PTYPE_MAX; flow++) { + fdir->fdir_fltr_cnt[flow][0] = 0; + fdir->fdir_fltr_cnt[flow][1] = 0; + } +} + /** * ice_vc_fdir_has_prof_conflict * @vf: pointer to the VF structure @@ -1998,6 +2013,7 @@ void ice_vf_fdir_init(struct ice_vf *vf) spin_lock_init(&fdir->ctx_lock); fdir->ctx_irq.flags = 0; fdir->ctx_done.flags = 0; + ice_vc_fdir_reset_cnt_all(fdir); } /** From d564fa1ff19e893e2971d66e5c8f49dc1cdc8ffc Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Mon, 9 Jan 2023 15:11:52 +0200 Subject: [PATCH 152/173] asm-generic/io.h: suppress endianness warnings for readq() and writeq() Commit c1d55d50139b ("asm-generic/io.h: Fix sparse warnings on big-endian architectures") missed fixing the 64-bit accessors. Arnd explains in the attached link why the casts are necessary, even if __raw_readq() and __raw_writeq() do not take endian-specific types. Link: https://lore.kernel.org/lkml/9105d6fc-880b-4734-857d-e3d30b87ccf6@app.fastmail.com/ Suggested-by: Arnd Bergmann Signed-off-by: Vladimir Oltean Reviewed-by: Jonathan Cameron Signed-off-by: Arnd Bergmann --- include/asm-generic/io.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/asm-generic/io.h b/include/asm-generic/io.h index 4c44a29b5e8e..d78c3056c98f 100644 --- a/include/asm-generic/io.h +++ b/include/asm-generic/io.h @@ -236,7 +236,7 @@ static inline u64 readq(const volatile void __iomem *addr) log_read_mmio(64, addr, _THIS_IP_, _RET_IP_); __io_br(); - val = __le64_to_cpu(__raw_readq(addr)); + val = __le64_to_cpu((__le64 __force)__raw_readq(addr)); __io_ar(val); log_post_read_mmio(val, 64, addr, _THIS_IP_, _RET_IP_); return val; @@ -287,7 +287,7 @@ static inline void writeq(u64 value, volatile void __iomem *addr) { log_write_mmio(value, 64, addr, _THIS_IP_, _RET_IP_); __io_bw(); - __raw_writeq(__cpu_to_le64(value), addr); + __raw_writeq((u64 __force)__cpu_to_le64(value), addr); __io_aw(); log_post_write_mmio(value, 64, addr, _THIS_IP_, _RET_IP_); } From 05d3855b4d21ef3c2df26be1cbba9d2c68915fcb Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Mon, 9 Jan 2023 15:11:53 +0200 Subject: [PATCH 153/173] asm-generic/io.h: suppress endianness warnings for relaxed accessors Copy the forced type casts from the normal MMIO accessors to suppress the sparse warnings that point out __raw_readl() returns a native endian word (just like readl()). Signed-off-by: Vladimir Oltean Signed-off-by: Arnd Bergmann --- include/asm-generic/io.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/include/asm-generic/io.h b/include/asm-generic/io.h index d78c3056c98f..587e7e9b9a37 100644 --- a/include/asm-generic/io.h +++ b/include/asm-generic/io.h @@ -319,7 +319,7 @@ static inline u16 readw_relaxed(const volatile void __iomem *addr) u16 val; log_read_mmio(16, addr, _THIS_IP_, _RET_IP_); - val = __le16_to_cpu(__raw_readw(addr)); + val = __le16_to_cpu((__le16 __force)__raw_readw(addr)); log_post_read_mmio(val, 16, addr, _THIS_IP_, _RET_IP_); return val; } @@ -332,7 +332,7 @@ static inline u32 readl_relaxed(const volatile void __iomem *addr) u32 val; log_read_mmio(32, addr, _THIS_IP_, _RET_IP_); - val = __le32_to_cpu(__raw_readl(addr)); + val = __le32_to_cpu((__le32 __force)__raw_readl(addr)); log_post_read_mmio(val, 32, addr, _THIS_IP_, _RET_IP_); return val; } @@ -345,7 +345,7 @@ static inline u64 readq_relaxed(const volatile void __iomem *addr) u64 val; log_read_mmio(64, addr, _THIS_IP_, _RET_IP_); - val = __le64_to_cpu(__raw_readq(addr)); + val = __le64_to_cpu((__le64 __force)__raw_readq(addr)); log_post_read_mmio(val, 64, addr, _THIS_IP_, _RET_IP_); return val; } @@ -366,7 +366,7 @@ static inline void writeb_relaxed(u8 value, volatile void __iomem *addr) static inline void writew_relaxed(u16 value, volatile void __iomem *addr) { log_write_mmio(value, 16, addr, _THIS_IP_, _RET_IP_); - __raw_writew(cpu_to_le16(value), addr); + __raw_writew((u16 __force)cpu_to_le16(value), addr); log_post_write_mmio(value, 16, addr, _THIS_IP_, _RET_IP_); } #endif @@ -376,7 +376,7 @@ static inline void writew_relaxed(u16 value, volatile void __iomem *addr) static inline void writel_relaxed(u32 value, volatile void __iomem *addr) { log_write_mmio(value, 32, addr, _THIS_IP_, _RET_IP_); - __raw_writel(__cpu_to_le32(value), addr); + __raw_writel((u32 __force)__cpu_to_le32(value), addr); log_post_write_mmio(value, 32, addr, _THIS_IP_, _RET_IP_); } #endif @@ -386,7 +386,7 @@ static inline void writel_relaxed(u32 value, volatile void __iomem *addr) static inline void writeq_relaxed(u64 value, volatile void __iomem *addr) { log_write_mmio(value, 64, addr, _THIS_IP_, _RET_IP_); - __raw_writeq(__cpu_to_le64(value), addr); + __raw_writeq((u64 __force)__cpu_to_le64(value), addr); log_post_write_mmio(value, 64, addr, _THIS_IP_, _RET_IP_); } #endif From 656e9007ef5862746cdf7ac16267c8e06e7b0989 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 2 Mar 2023 09:53:31 +0100 Subject: [PATCH 154/173] asm-generic: avoid __generic_cmpxchg_local warnings Code that passes a 32-bit constant into cmpxchg() produces a harmless sparse warning because of the truncation in the branch that is not taken: fs/erofs/zdata.c: note: in included file (through /home/arnd/arm-soc/arch/arm/include/asm/cmpxchg.h, /home/arnd/arm-soc/arch/arm/include/asm/atomic.h, /home/arnd/arm-soc/include/linux/atomic.h, ...): include/asm-generic/cmpxchg-local.h:29:33: warning: cast truncates bits from constant value (5f0ecafe becomes fe) include/asm-generic/cmpxchg-local.h:33:34: warning: cast truncates bits from constant value (5f0ecafe becomes cafe) include/asm-generic/cmpxchg-local.h:29:33: warning: cast truncates bits from constant value (5f0ecafe becomes fe) include/asm-generic/cmpxchg-local.h:30:42: warning: cast truncates bits from constant value (5f0edead becomes ad) include/asm-generic/cmpxchg-local.h:33:34: warning: cast truncates bits from constant value (5f0ecafe becomes cafe) include/asm-generic/cmpxchg-local.h:34:44: warning: cast truncates bits from constant value (5f0edead becomes dead) This was reported as a regression to Matt's recent __generic_cmpxchg_local patch, though this patch only added more warnings on top of the ones that were already there. Rewording the truncation to use an explicit bitmask instead of a cast to a smaller type avoids the warning but otherwise leaves the code unchanged. I had another look at why the cast is even needed for atomic_cmpxchg(), and as Matt describes the problem here is that atomic_t contains a signed 'int', but cmpxchg() takes an 'unsigned long' argument, and converting between the two leads to a 64-bit sign-extension of negative 32-bit atomics. I checked the other implementations of arch_cmpxchg() and did not find any others that run into the same problem as __generic_cmpxchg_local(), but it's easy to be on the safe side here and always convert the signed int into an unsigned int when calling arch_cmpxchg(), as this will work even when any of the arch_cmpxchg() implementations run into the same problem. Fixes: 624654152284 ("locking/atomic: cmpxchg: Make __generic_cmpxchg_local compare against zero-extended 'old' value") Reviewed-by: Matt Evans Signed-off-by: Arnd Bergmann --- include/asm-generic/atomic.h | 4 ++-- include/asm-generic/cmpxchg-local.h | 12 ++++++------ include/asm-generic/cmpxchg.h | 6 +++--- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/include/asm-generic/atomic.h b/include/asm-generic/atomic.h index 04b8be9f1a77..e271d6708c87 100644 --- a/include/asm-generic/atomic.h +++ b/include/asm-generic/atomic.h @@ -130,7 +130,7 @@ ATOMIC_OP(xor, ^) #define arch_atomic_read(v) READ_ONCE((v)->counter) #define arch_atomic_set(v, i) WRITE_ONCE(((v)->counter), (i)) -#define arch_atomic_xchg(ptr, v) (arch_xchg(&(ptr)->counter, (v))) -#define arch_atomic_cmpxchg(v, old, new) (arch_cmpxchg(&((v)->counter), (old), (new))) +#define arch_atomic_xchg(ptr, v) (arch_xchg(&(ptr)->counter, (u32)(v))) +#define arch_atomic_cmpxchg(v, old, new) (arch_cmpxchg(&((v)->counter), (u32)(old), (u32)(new))) #endif /* __ASM_GENERIC_ATOMIC_H */ diff --git a/include/asm-generic/cmpxchg-local.h b/include/asm-generic/cmpxchg-local.h index c3e7315b7c1d..3df9f59a544e 100644 --- a/include/asm-generic/cmpxchg-local.h +++ b/include/asm-generic/cmpxchg-local.h @@ -26,16 +26,16 @@ static inline unsigned long __generic_cmpxchg_local(volatile void *ptr, raw_local_irq_save(flags); switch (size) { case 1: prev = *(u8 *)ptr; - if (prev == (u8)old) - *(u8 *)ptr = (u8)new; + if (prev == (old & 0xffu)) + *(u8 *)ptr = (new & 0xffu); break; case 2: prev = *(u16 *)ptr; - if (prev == (u16)old) - *(u16 *)ptr = (u16)new; + if (prev == (old & 0xffffu)) + *(u16 *)ptr = (new & 0xffffu); break; case 4: prev = *(u32 *)ptr; - if (prev == (u32)old) - *(u32 *)ptr = (u32)new; + if (prev == (old & 0xffffffffffu)) + *(u32 *)ptr = (new & 0xffffffffu); break; case 8: prev = *(u64 *)ptr; if (prev == old) diff --git a/include/asm-generic/cmpxchg.h b/include/asm-generic/cmpxchg.h index dca4419922a9..848de25fc4bf 100644 --- a/include/asm-generic/cmpxchg.h +++ b/include/asm-generic/cmpxchg.h @@ -32,7 +32,7 @@ unsigned long __generic_xchg(unsigned long x, volatile void *ptr, int size) #else local_irq_save(flags); ret = *(volatile u8 *)ptr; - *(volatile u8 *)ptr = x; + *(volatile u8 *)ptr = (x & 0xffu); local_irq_restore(flags); return ret; #endif /* __xchg_u8 */ @@ -43,7 +43,7 @@ unsigned long __generic_xchg(unsigned long x, volatile void *ptr, int size) #else local_irq_save(flags); ret = *(volatile u16 *)ptr; - *(volatile u16 *)ptr = x; + *(volatile u16 *)ptr = (x & 0xffffu); local_irq_restore(flags); return ret; #endif /* __xchg_u16 */ @@ -54,7 +54,7 @@ unsigned long __generic_xchg(unsigned long x, volatile void *ptr, int size) #else local_irq_save(flags); ret = *(volatile u32 *)ptr; - *(volatile u32 *)ptr = x; + *(volatile u32 *)ptr = (x & 0xffffffffu); local_irq_restore(flags); return ret; #endif /* __xchg_u32 */ From fb5015bc8b733323b58f015b88e4f316010ec856 Mon Sep 17 00:00:00 2001 From: Takahiro Itazuri Date: Fri, 31 Mar 2023 10:31:16 +0100 Subject: [PATCH 155/173] docs: kvm: x86: Fix broken field list Add a missing ":" to fix a broken field list. Signed-off-by: Takahiro Itazuri Fixes: ba7bb663f554 ("KVM: x86: Provide per VM capability for disabling PMU virtualization") Message-Id: <20230331093116.99820-1-itazur@amazon.com> Signed-off-by: Paolo Bonzini --- Documentation/virt/kvm/api.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index 62de0768d6aa..a5c803f39832 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -8296,11 +8296,11 @@ ENOSYS for the others. 8.35 KVM_CAP_PMU_CAPABILITY --------------------------- -:Capability KVM_CAP_PMU_CAPABILITY +:Capability: KVM_CAP_PMU_CAPABILITY :Architectures: x86 :Type: vm :Parameters: arg[0] is bitmask of PMU virtualization capabilities. -:Returns 0 on success, -EINVAL when arg[0] contains invalid bits +:Returns: 0 on success, -EINVAL when arg[0] contains invalid bits This capability alters PMU virtualization in KVM. From 0a78cf7264d29abeca098eae0b188a10aabc8a32 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 3 Apr 2023 12:49:58 -0700 Subject: [PATCH 156/173] raw: Fix NULL deref in raw_get_next(). Dae R. Jeong reported a NULL deref in raw_get_next() [0]. It seems that the repro was running these sequences in parallel so that one thread was iterating on a socket that was being freed in another netns. unshare(0x40060200) r0 = syz_open_procfs(0x0, &(0x7f0000002080)='net/raw\x00') socket$inet_icmp_raw(0x2, 0x3, 0x1) pread64(r0, &(0x7f0000000000)=""/10, 0xa, 0x10000000007f) After commit 0daf07e52709 ("raw: convert raw sockets to RCU"), we use RCU and hlist_nulls_for_each_entry() to iterate over SOCK_RAW sockets. However, we should use spinlock for slow paths to avoid the NULL deref. Also, SOCK_RAW does not use SLAB_TYPESAFE_BY_RCU, and the slab object is not reused during iteration in the grace period. In fact, the lockless readers do not check the nulls marker with get_nulls_value(). So, SOCK_RAW should use hlist instead of hlist_nulls. Instead of adding an unnecessary barrier by sk_nulls_for_each_rcu(), let's convert hlist_nulls to hlist and use sk_for_each_rcu() for fast paths and sk_for_each() and spinlock for /proc/net/raw. [0]: general protection fault, probably for non-canonical address 0xdffffc0000000005: 0000 [#1] PREEMPT SMP KASAN KASAN: null-ptr-deref in range [0x0000000000000028-0x000000000000002f] CPU: 2 PID: 20952 Comm: syz-executor.0 Not tainted 6.2.0-g048ec869bafd-dirty #7 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.14.0-0-g155821a1990b-prebuilt.qemu.org 04/01/2014 RIP: 0010:read_pnet include/net/net_namespace.h:383 [inline] RIP: 0010:sock_net include/net/sock.h:649 [inline] RIP: 0010:raw_get_next net/ipv4/raw.c:974 [inline] RIP: 0010:raw_get_idx net/ipv4/raw.c:986 [inline] RIP: 0010:raw_seq_start+0x431/0x800 net/ipv4/raw.c:995 Code: ef e8 33 3d 94 f7 49 8b 6d 00 4c 89 ef e8 b7 65 5f f7 49 89 ed 49 83 c5 98 0f 84 9a 00 00 00 48 83 c5 c8 48 89 e8 48 c1 e8 03 <42> 80 3c 30 00 74 08 48 89 ef e8 00 3d 94 f7 4c 8b 7d 00 48 89 ef RSP: 0018:ffffc9001154f9b0 EFLAGS: 00010206 RAX: 0000000000000005 RBX: 1ffff1100302c8fd RCX: 0000000000000000 RDX: 0000000000000028 RSI: ffffc9001154f988 RDI: ffffc9000f77a338 RBP: 0000000000000029 R08: ffffffff8a50ffb4 R09: fffffbfff24b6bd9 R10: fffffbfff24b6bd9 R11: 0000000000000000 R12: ffff88801db73b78 R13: fffffffffffffff9 R14: dffffc0000000000 R15: 0000000000000030 FS: 00007f843ae8e700(0000) GS:ffff888063700000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000055bb9614b35f CR3: 000000003c672000 CR4: 00000000003506e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: seq_read_iter+0x4c6/0x10f0 fs/seq_file.c:225 seq_read+0x224/0x320 fs/seq_file.c:162 pde_read fs/proc/inode.c:316 [inline] proc_reg_read+0x23f/0x330 fs/proc/inode.c:328 vfs_read+0x31e/0xd30 fs/read_write.c:468 ksys_pread64 fs/read_write.c:665 [inline] __do_sys_pread64 fs/read_write.c:675 [inline] __se_sys_pread64 fs/read_write.c:672 [inline] __x64_sys_pread64+0x1e9/0x280 fs/read_write.c:672 do_syscall_x64 arch/x86/entry/common.c:51 [inline] do_syscall_64+0x4e/0xa0 arch/x86/entry/common.c:82 entry_SYSCALL_64_after_hwframe+0x63/0xcd RIP: 0033:0x478d29 Code: f7 d8 64 89 02 b8 ff ff ff ff c3 66 0f 1f 44 00 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 bc ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007f843ae8dbe8 EFLAGS: 00000246 ORIG_RAX: 0000000000000011 RAX: ffffffffffffffda RBX: 0000000000791408 RCX: 0000000000478d29 RDX: 000000000000000a RSI: 0000000020000000 RDI: 0000000000000003 RBP: 00000000f477909a R08: 0000000000000000 R09: 0000000000000000 R10: 000010000000007f R11: 0000000000000246 R12: 0000000000791740 R13: 0000000000791414 R14: 0000000000791408 R15: 00007ffc2eb48a50 Modules linked in: ---[ end trace 0000000000000000 ]--- RIP: 0010:read_pnet include/net/net_namespace.h:383 [inline] RIP: 0010:sock_net include/net/sock.h:649 [inline] RIP: 0010:raw_get_next net/ipv4/raw.c:974 [inline] RIP: 0010:raw_get_idx net/ipv4/raw.c:986 [inline] RIP: 0010:raw_seq_start+0x431/0x800 net/ipv4/raw.c:995 Code: ef e8 33 3d 94 f7 49 8b 6d 00 4c 89 ef e8 b7 65 5f f7 49 89 ed 49 83 c5 98 0f 84 9a 00 00 00 48 83 c5 c8 48 89 e8 48 c1 e8 03 <42> 80 3c 30 00 74 08 48 89 ef e8 00 3d 94 f7 4c 8b 7d 00 48 89 ef RSP: 0018:ffffc9001154f9b0 EFLAGS: 00010206 RAX: 0000000000000005 RBX: 1ffff1100302c8fd RCX: 0000000000000000 RDX: 0000000000000028 RSI: ffffc9001154f988 RDI: ffffc9000f77a338 RBP: 0000000000000029 R08: ffffffff8a50ffb4 R09: fffffbfff24b6bd9 R10: fffffbfff24b6bd9 R11: 0000000000000000 R12: ffff88801db73b78 R13: fffffffffffffff9 R14: dffffc0000000000 R15: 0000000000000030 FS: 00007f843ae8e700(0000) GS:ffff888063700000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f92ff166000 CR3: 000000003c672000 CR4: 00000000003506e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Fixes: 0daf07e52709 ("raw: convert raw sockets to RCU") Reported-by: syzbot Reported-by: Dae R. Jeong Link: https://lore.kernel.org/netdev/ZCA2mGV_cmq7lIfV@dragonet/ Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Signed-off-by: Jakub Kicinski --- include/net/raw.h | 4 ++-- net/ipv4/raw.c | 36 +++++++++++++++++++----------------- net/ipv4/raw_diag.c | 10 ++++------ net/ipv6/raw.c | 10 ++++------ 4 files changed, 29 insertions(+), 31 deletions(-) diff --git a/include/net/raw.h b/include/net/raw.h index 2c004c20ed99..3af5289fdead 100644 --- a/include/net/raw.h +++ b/include/net/raw.h @@ -37,7 +37,7 @@ int raw_rcv(struct sock *, struct sk_buff *); struct raw_hashinfo { spinlock_t lock; - struct hlist_nulls_head ht[RAW_HTABLE_SIZE] ____cacheline_aligned; + struct hlist_head ht[RAW_HTABLE_SIZE] ____cacheline_aligned; }; static inline u32 raw_hashfunc(const struct net *net, u32 proto) @@ -51,7 +51,7 @@ static inline void raw_hashinfo_init(struct raw_hashinfo *hashinfo) spin_lock_init(&hashinfo->lock); for (i = 0; i < RAW_HTABLE_SIZE; i++) - INIT_HLIST_NULLS_HEAD(&hashinfo->ht[i], i); + INIT_HLIST_HEAD(&hashinfo->ht[i]); } #ifdef CONFIG_PROC_FS diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 94df935ee0c5..8088a5011e7d 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -91,12 +91,12 @@ EXPORT_SYMBOL_GPL(raw_v4_hashinfo); int raw_hash_sk(struct sock *sk) { struct raw_hashinfo *h = sk->sk_prot->h.raw_hash; - struct hlist_nulls_head *hlist; + struct hlist_head *hlist; hlist = &h->ht[raw_hashfunc(sock_net(sk), inet_sk(sk)->inet_num)]; spin_lock(&h->lock); - __sk_nulls_add_node_rcu(sk, hlist); + sk_add_node_rcu(sk, hlist); sock_set_flag(sk, SOCK_RCU_FREE); spin_unlock(&h->lock); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); @@ -110,7 +110,7 @@ void raw_unhash_sk(struct sock *sk) struct raw_hashinfo *h = sk->sk_prot->h.raw_hash; spin_lock(&h->lock); - if (__sk_nulls_del_node_init_rcu(sk)) + if (sk_del_node_init_rcu(sk)) sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); spin_unlock(&h->lock); } @@ -163,16 +163,15 @@ static int icmp_filter(const struct sock *sk, const struct sk_buff *skb) static int raw_v4_input(struct net *net, struct sk_buff *skb, const struct iphdr *iph, int hash) { - struct hlist_nulls_head *hlist; - struct hlist_nulls_node *hnode; int sdif = inet_sdif(skb); + struct hlist_head *hlist; int dif = inet_iif(skb); int delivered = 0; struct sock *sk; hlist = &raw_v4_hashinfo.ht[hash]; rcu_read_lock(); - sk_nulls_for_each(sk, hnode, hlist) { + sk_for_each_rcu(sk, hlist) { if (!raw_v4_match(net, sk, iph->protocol, iph->saddr, iph->daddr, dif, sdif)) continue; @@ -264,10 +263,9 @@ static void raw_err(struct sock *sk, struct sk_buff *skb, u32 info) void raw_icmp_error(struct sk_buff *skb, int protocol, u32 info) { struct net *net = dev_net(skb->dev); - struct hlist_nulls_head *hlist; - struct hlist_nulls_node *hnode; int dif = skb->dev->ifindex; int sdif = inet_sdif(skb); + struct hlist_head *hlist; const struct iphdr *iph; struct sock *sk; int hash; @@ -276,7 +274,7 @@ void raw_icmp_error(struct sk_buff *skb, int protocol, u32 info) hlist = &raw_v4_hashinfo.ht[hash]; rcu_read_lock(); - sk_nulls_for_each(sk, hnode, hlist) { + sk_for_each_rcu(sk, hlist) { iph = (const struct iphdr *)skb->data; if (!raw_v4_match(net, sk, iph->protocol, iph->daddr, iph->saddr, dif, sdif)) @@ -950,14 +948,13 @@ static struct sock *raw_get_first(struct seq_file *seq, int bucket) { struct raw_hashinfo *h = pde_data(file_inode(seq->file)); struct raw_iter_state *state = raw_seq_private(seq); - struct hlist_nulls_head *hlist; - struct hlist_nulls_node *hnode; + struct hlist_head *hlist; struct sock *sk; for (state->bucket = bucket; state->bucket < RAW_HTABLE_SIZE; ++state->bucket) { hlist = &h->ht[state->bucket]; - sk_nulls_for_each(sk, hnode, hlist) { + sk_for_each(sk, hlist) { if (sock_net(sk) == seq_file_net(seq)) return sk; } @@ -970,7 +967,7 @@ static struct sock *raw_get_next(struct seq_file *seq, struct sock *sk) struct raw_iter_state *state = raw_seq_private(seq); do { - sk = sk_nulls_next(sk); + sk = sk_next(sk); } while (sk && sock_net(sk) != seq_file_net(seq)); if (!sk) @@ -989,9 +986,12 @@ static struct sock *raw_get_idx(struct seq_file *seq, loff_t pos) } void *raw_seq_start(struct seq_file *seq, loff_t *pos) - __acquires(RCU) + __acquires(&h->lock) { - rcu_read_lock(); + struct raw_hashinfo *h = pde_data(file_inode(seq->file)); + + spin_lock(&h->lock); + return *pos ? raw_get_idx(seq, *pos - 1) : SEQ_START_TOKEN; } EXPORT_SYMBOL_GPL(raw_seq_start); @@ -1010,9 +1010,11 @@ void *raw_seq_next(struct seq_file *seq, void *v, loff_t *pos) EXPORT_SYMBOL_GPL(raw_seq_next); void raw_seq_stop(struct seq_file *seq, void *v) - __releases(RCU) + __releases(&h->lock) { - rcu_read_unlock(); + struct raw_hashinfo *h = pde_data(file_inode(seq->file)); + + spin_unlock(&h->lock); } EXPORT_SYMBOL_GPL(raw_seq_stop); diff --git a/net/ipv4/raw_diag.c b/net/ipv4/raw_diag.c index 999321834b94..da3591a66a16 100644 --- a/net/ipv4/raw_diag.c +++ b/net/ipv4/raw_diag.c @@ -57,8 +57,7 @@ static bool raw_lookup(struct net *net, struct sock *sk, static struct sock *raw_sock_get(struct net *net, const struct inet_diag_req_v2 *r) { struct raw_hashinfo *hashinfo = raw_get_hashinfo(r); - struct hlist_nulls_head *hlist; - struct hlist_nulls_node *hnode; + struct hlist_head *hlist; struct sock *sk; int slot; @@ -68,7 +67,7 @@ static struct sock *raw_sock_get(struct net *net, const struct inet_diag_req_v2 rcu_read_lock(); for (slot = 0; slot < RAW_HTABLE_SIZE; slot++) { hlist = &hashinfo->ht[slot]; - sk_nulls_for_each(sk, hnode, hlist) { + sk_for_each_rcu(sk, hlist) { if (raw_lookup(net, sk, r)) { /* * Grab it and keep until we fill @@ -142,9 +141,8 @@ static void raw_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, struct raw_hashinfo *hashinfo = raw_get_hashinfo(r); struct net *net = sock_net(skb->sk); struct inet_diag_dump_data *cb_data; - struct hlist_nulls_head *hlist; - struct hlist_nulls_node *hnode; int num, s_num, slot, s_slot; + struct hlist_head *hlist; struct sock *sk = NULL; struct nlattr *bc; @@ -161,7 +159,7 @@ static void raw_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, num = 0; hlist = &hashinfo->ht[slot]; - sk_nulls_for_each(sk, hnode, hlist) { + sk_for_each_rcu(sk, hlist) { struct inet_sock *inet = inet_sk(sk); if (!net_eq(sock_net(sk), net)) diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index bac9ba747bde..a327aa481df4 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -141,10 +141,9 @@ EXPORT_SYMBOL(rawv6_mh_filter_unregister); static bool ipv6_raw_deliver(struct sk_buff *skb, int nexthdr) { struct net *net = dev_net(skb->dev); - struct hlist_nulls_head *hlist; - struct hlist_nulls_node *hnode; const struct in6_addr *saddr; const struct in6_addr *daddr; + struct hlist_head *hlist; struct sock *sk; bool delivered = false; __u8 hash; @@ -155,7 +154,7 @@ static bool ipv6_raw_deliver(struct sk_buff *skb, int nexthdr) hash = raw_hashfunc(net, nexthdr); hlist = &raw_v6_hashinfo.ht[hash]; rcu_read_lock(); - sk_nulls_for_each(sk, hnode, hlist) { + sk_for_each_rcu(sk, hlist) { int filtered; if (!raw_v6_match(net, sk, nexthdr, daddr, saddr, @@ -333,15 +332,14 @@ void raw6_icmp_error(struct sk_buff *skb, int nexthdr, u8 type, u8 code, int inner_offset, __be32 info) { struct net *net = dev_net(skb->dev); - struct hlist_nulls_head *hlist; - struct hlist_nulls_node *hnode; + struct hlist_head *hlist; struct sock *sk; int hash; hash = raw_hashfunc(net, nexthdr); hlist = &raw_v6_hashinfo.ht[hash]; rcu_read_lock(); - sk_nulls_for_each(sk, hnode, hlist) { + sk_for_each_rcu(sk, hlist) { /* Note: ipv6_hdr(skb) != skb->data */ const struct ipv6hdr *ip6h = (const struct ipv6hdr *)skb->data; From ab5fb73ffa01072b4d8031cc05801fa1cb653bee Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 3 Apr 2023 12:49:59 -0700 Subject: [PATCH 157/173] ping: Fix potentail NULL deref for /proc/net/icmp. After commit dbca1596bbb0 ("ping: convert to RCU lookups, get rid of rwlock"), we use RCU for ping sockets, but we should use spinlock for /proc/net/icmp to avoid a potential NULL deref mentioned in the previous patch. Let's go back to using spinlock there. Note we can convert ping sockets to use hlist instead of hlist_nulls because we do not use SLAB_TYPESAFE_BY_RCU for ping sockets. Fixes: dbca1596bbb0 ("ping: convert to RCU lookups, get rid of rwlock") Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Signed-off-by: Jakub Kicinski --- net/ipv4/ping.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 409ec2a1f95b..5178a3f3cb53 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -1089,13 +1089,13 @@ static struct sock *ping_get_idx(struct seq_file *seq, loff_t pos) } void *ping_seq_start(struct seq_file *seq, loff_t *pos, sa_family_t family) - __acquires(RCU) + __acquires(ping_table.lock) { struct ping_iter_state *state = seq->private; state->bucket = 0; state->family = family; - rcu_read_lock(); + spin_lock(&ping_table.lock); return *pos ? ping_get_idx(seq, *pos-1) : SEQ_START_TOKEN; } @@ -1121,9 +1121,9 @@ void *ping_seq_next(struct seq_file *seq, void *v, loff_t *pos) EXPORT_SYMBOL_GPL(ping_seq_next); void ping_seq_stop(struct seq_file *seq, void *v) - __releases(RCU) + __releases(ping_table.lock) { - rcu_read_unlock(); + spin_unlock(&ping_table.lock); } EXPORT_SYMBOL_GPL(ping_seq_stop); From e847c7675e19ef344913724dc68f83df31ad6a17 Mon Sep 17 00:00:00 2001 From: Andy Roulin Date: Mon, 3 Apr 2023 14:20:53 -0700 Subject: [PATCH 158/173] ethtool: reset #lanes when lanes is omitted If the number of lanes was forced and then subsequently the user omits this parameter, the ksettings->lanes is reset. The driver should then reset the number of lanes to the device's default for the specified speed. However, although the ksettings->lanes is set to 0, the mod variable is not set to true to indicate the driver and userspace should be notified of the changes. The consequence is that the same ethtool operation will produce different results based on the initial state. If the initial state is: $ ethtool swp1 | grep -A 3 'Speed: ' Speed: 500000Mb/s Lanes: 2 Duplex: Full Auto-negotiation: on then executing 'ethtool -s swp1 speed 50000 autoneg off' will yield: $ ethtool swp1 | grep -A 3 'Speed: ' Speed: 500000Mb/s Lanes: 2 Duplex: Full Auto-negotiation: off While if the initial state is: $ ethtool swp1 | grep -A 3 'Speed: ' Speed: 500000Mb/s Lanes: 1 Duplex: Full Auto-negotiation: off executing the same 'ethtool -s swp1 speed 50000 autoneg off' results in: $ ethtool swp1 | grep -A 3 'Speed: ' Speed: 500000Mb/s Lanes: 1 Duplex: Full Auto-negotiation: off This patch fixes this behavior. Omitting lanes will always results in the driver choosing the default lane width for the chosen speed. In this scenario, regardless of the initial state, the end state will be, e.g., $ ethtool swp1 | grep -A 3 'Speed: ' Speed: 500000Mb/s Lanes: 2 Duplex: Full Auto-negotiation: off Fixes: 012ce4dd3102 ("ethtool: Extend link modes settings uAPI with lanes") Signed-off-by: Andy Roulin Reviewed-by: Danielle Ratson Reviewed-by: Ido Schimmel Link: https://lore.kernel.org/r/ac238d6b-8726-8156-3810-6471291dbc7f@nvidia.com Signed-off-by: Jakub Kicinski --- net/ethtool/linkmodes.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/net/ethtool/linkmodes.c b/net/ethtool/linkmodes.c index fab66c169b9f..20165e07ef90 100644 --- a/net/ethtool/linkmodes.c +++ b/net/ethtool/linkmodes.c @@ -270,11 +270,12 @@ static int ethnl_update_linkmodes(struct genl_info *info, struct nlattr **tb, "lanes configuration not supported by device"); return -EOPNOTSUPP; } - } else if (!lsettings->autoneg) { - /* If autoneg is off and lanes parameter is not passed from user, - * set the lanes parameter to 0. + } else if (!lsettings->autoneg && ksettings->lanes) { + /* If autoneg is off and lanes parameter is not passed from user but + * it was defined previously then set the lanes parameter to 0. */ ksettings->lanes = 0; + *mod = true; } ret = ethnl_update_bitset(ksettings->link_modes.advertising, From a1865f2e7d10dde00d35a2122b38d2e469ae67ed Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 3 Apr 2023 21:46:43 +0000 Subject: [PATCH 159/173] netlink: annotate lockless accesses to nlk->max_recvmsg_len syzbot reported a data-race in data-race in netlink_recvmsg() [1] Indeed, netlink_recvmsg() can be run concurrently, and netlink_dump() also needs protection. [1] BUG: KCSAN: data-race in netlink_recvmsg / netlink_recvmsg read to 0xffff888141840b38 of 8 bytes by task 23057 on cpu 0: netlink_recvmsg+0xea/0x730 net/netlink/af_netlink.c:1988 sock_recvmsg_nosec net/socket.c:1017 [inline] sock_recvmsg net/socket.c:1038 [inline] __sys_recvfrom+0x1ee/0x2e0 net/socket.c:2194 __do_sys_recvfrom net/socket.c:2212 [inline] __se_sys_recvfrom net/socket.c:2208 [inline] __x64_sys_recvfrom+0x78/0x90 net/socket.c:2208 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd write to 0xffff888141840b38 of 8 bytes by task 23037 on cpu 1: netlink_recvmsg+0x114/0x730 net/netlink/af_netlink.c:1989 sock_recvmsg_nosec net/socket.c:1017 [inline] sock_recvmsg net/socket.c:1038 [inline] ____sys_recvmsg+0x156/0x310 net/socket.c:2720 ___sys_recvmsg net/socket.c:2762 [inline] do_recvmmsg+0x2e5/0x710 net/socket.c:2856 __sys_recvmmsg net/socket.c:2935 [inline] __do_sys_recvmmsg net/socket.c:2958 [inline] __se_sys_recvmmsg net/socket.c:2951 [inline] __x64_sys_recvmmsg+0xe2/0x160 net/socket.c:2951 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd value changed: 0x0000000000000000 -> 0x0000000000001000 Reported by Kernel Concurrency Sanitizer on: CPU: 1 PID: 23037 Comm: syz-executor.2 Not tainted 6.3.0-rc4-syzkaller-00195-g5a57b48fdfcb #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 03/02/2023 Fixes: 9063e21fb026 ("netlink: autosize skb lengthes") Reported-by: syzbot Signed-off-by: Eric Dumazet Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20230403214643.768555-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/netlink/af_netlink.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index c64277659753..f365dfdd672d 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1952,7 +1952,7 @@ static int netlink_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, struct scm_cookie scm; struct sock *sk = sock->sk; struct netlink_sock *nlk = nlk_sk(sk); - size_t copied; + size_t copied, max_recvmsg_len; struct sk_buff *skb, *data_skb; int err, ret; @@ -1985,9 +1985,10 @@ static int netlink_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, #endif /* Record the max length of recvmsg() calls for future allocations */ - nlk->max_recvmsg_len = max(nlk->max_recvmsg_len, len); - nlk->max_recvmsg_len = min_t(size_t, nlk->max_recvmsg_len, - SKB_WITH_OVERHEAD(32768)); + max_recvmsg_len = max(READ_ONCE(nlk->max_recvmsg_len), len); + max_recvmsg_len = min_t(size_t, max_recvmsg_len, + SKB_WITH_OVERHEAD(32768)); + WRITE_ONCE(nlk->max_recvmsg_len, max_recvmsg_len); copied = data_skb->len; if (len < copied) { @@ -2236,6 +2237,7 @@ static int netlink_dump(struct sock *sk) struct netlink_ext_ack extack = {}; struct netlink_callback *cb; struct sk_buff *skb = NULL; + size_t max_recvmsg_len; struct module *module; int err = -ENOBUFS; int alloc_min_size; @@ -2258,8 +2260,9 @@ static int netlink_dump(struct sock *sk) cb = &nlk->cb; alloc_min_size = max_t(int, cb->min_dump_alloc, NLMSG_GOODSIZE); - if (alloc_min_size < nlk->max_recvmsg_len) { - alloc_size = nlk->max_recvmsg_len; + max_recvmsg_len = READ_ONCE(nlk->max_recvmsg_len); + if (alloc_min_size < max_recvmsg_len) { + alloc_size = max_recvmsg_len; skb = alloc_skb(alloc_size, (GFP_KERNEL & ~__GFP_DIRECT_RECLAIM) | __GFP_NOWARN | __GFP_NORETRY); From 3ce9345580974863c060fa32971537996a7b2d57 Mon Sep 17 00:00:00 2001 From: Shailend Chand Date: Mon, 3 Apr 2023 10:28:09 -0700 Subject: [PATCH 160/173] gve: Secure enough bytes in the first TX desc for all TCP pkts Non-GSO TCP packets whose SKBs' linear portion did not include the entire TCP header were not populating the first Tx descriptor with as many bytes as the vNIC expected. This change ensures that all TCP packets populate the first descriptor with the correct number of bytes. Fixes: 893ce44df565 ("gve: Add basic driver framework for Compute Engine Virtual NIC") Signed-off-by: Shailend Chand Link: https://lore.kernel.org/r/20230403172809.2939306-1-shailend@google.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/google/gve/gve.h | 2 ++ drivers/net/ethernet/google/gve/gve_tx.c | 12 +++++------- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/google/gve/gve.h b/drivers/net/ethernet/google/gve/gve.h index 64eb0442c82f..005cb9dfe078 100644 --- a/drivers/net/ethernet/google/gve/gve.h +++ b/drivers/net/ethernet/google/gve/gve.h @@ -47,6 +47,8 @@ #define GVE_RX_BUFFER_SIZE_DQO 2048 +#define GVE_GQ_TX_MIN_PKT_DESC_BYTES 182 + /* Each slot in the desc ring has a 1:1 mapping to a slot in the data ring */ struct gve_rx_desc_queue { struct gve_rx_desc *desc_ring; /* the descriptor ring */ diff --git a/drivers/net/ethernet/google/gve/gve_tx.c b/drivers/net/ethernet/google/gve/gve_tx.c index 4888bf05fbed..5e11b8236754 100644 --- a/drivers/net/ethernet/google/gve/gve_tx.c +++ b/drivers/net/ethernet/google/gve/gve_tx.c @@ -284,8 +284,8 @@ static inline int gve_skb_fifo_bytes_required(struct gve_tx_ring *tx, int bytes; int hlen; - hlen = skb_is_gso(skb) ? skb_checksum_start_offset(skb) + - tcp_hdrlen(skb) : skb_headlen(skb); + hlen = skb_is_gso(skb) ? skb_checksum_start_offset(skb) + tcp_hdrlen(skb) : + min_t(int, GVE_GQ_TX_MIN_PKT_DESC_BYTES, skb->len); pad_bytes = gve_tx_fifo_pad_alloc_one_frag(&tx->tx_fifo, hlen); @@ -454,13 +454,11 @@ static int gve_tx_add_skb_copy(struct gve_priv *priv, struct gve_tx_ring *tx, st pkt_desc = &tx->desc[idx]; l4_hdr_offset = skb_checksum_start_offset(skb); - /* If the skb is gso, then we want the tcp header in the first segment - * otherwise we want the linear portion of the skb (which will contain - * the checksum because skb->csum_start and skb->csum_offset are given - * relative to skb->head) in the first segment. + /* If the skb is gso, then we want the tcp header alone in the first segment + * otherwise we want the minimum required by the gVNIC spec. */ hlen = is_gso ? l4_hdr_offset + tcp_hdrlen(skb) : - skb_headlen(skb); + min_t(int, GVE_GQ_TX_MIN_PKT_DESC_BYTES, skb->len); info->skb = skb; /* We don't want to split the header, so if necessary, pad to the end From 9d52727f8043cfda241ae96896628d92fa9c50bb Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Tue, 4 Apr 2023 22:21:14 -0400 Subject: [PATCH 161/173] tracing: Have tracing_snapshot_instance_cond() write errors to the appropriate instance If a trace instance has a failure with its snapshot code, the error message is to be written to that instance's buffer. But currently, the message is written to the top level buffer. Worse yet, it may also disable the top level buffer and not the instance that had the issue. Link: https://lkml.kernel.org/r/20230405022341.688730321@goodmis.org Cc: stable@vger.kernel.org Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Andrew Morton Cc: Ross Zwisler Fixes: 2824f50332486 ("tracing: Make the snapshot trigger work with instances") Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 937e9676dfd4..ed1d1093f5e9 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1149,22 +1149,22 @@ static void tracing_snapshot_instance_cond(struct trace_array *tr, unsigned long flags; if (in_nmi()) { - internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n"); - internal_trace_puts("*** snapshot is being ignored ***\n"); + trace_array_puts(tr, "*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n"); + trace_array_puts(tr, "*** snapshot is being ignored ***\n"); return; } if (!tr->allocated_snapshot) { - internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n"); - internal_trace_puts("*** stopping trace here! ***\n"); - tracing_off(); + trace_array_puts(tr, "*** SNAPSHOT NOT ALLOCATED ***\n"); + trace_array_puts(tr, "*** stopping trace here! ***\n"); + tracer_tracing_off(tr); return; } /* Note, snapshot can not be used when the tracer uses it */ if (tracer->use_max_tr) { - internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n"); - internal_trace_puts("*** Can not use snapshot (sorry) ***\n"); + trace_array_puts(tr, "*** LATENCY TRACER ACTIVE ***\n"); + trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n"); return; } From e94891641c21f607e4d6887bcd3beff882fcc483 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Tue, 4 Apr 2023 22:21:15 -0400 Subject: [PATCH 162/173] tracing: Fix ftrace_boot_snapshot command line logic The kernel command line ftrace_boot_snapshot by itself is supposed to trigger a snapshot at the end of boot up of the main top level trace buffer. A ftrace_boot_snapshot=foo will do the same for an instance called foo that was created by trace_instance=foo,... The logic was broken where if ftrace_boot_snapshot was by itself, it would trigger a snapshot for all instances that had tracing enabled, regardless if it asked for a snapshot or not. When a snapshot is requested for a buffer, the buffer's tr->allocated_snapshot is set to true. Use that to know if a trace buffer wants a snapshot at boot up or not. Since the top level buffer is part of the ftrace_trace_arrays list, there's no reason to treat it differently than the other buffers. Just iterate the list if ftrace_boot_snapshot was specified. Link: https://lkml.kernel.org/r/20230405022341.895334039@goodmis.org Cc: stable@vger.kernel.org Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Andrew Morton Cc: Ross Zwisler Fixes: 9c1c251d670bc ("tracing: Allow boot instances to have snapshot buffers") Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index ed1d1093f5e9..4686473b8497 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -10393,19 +10393,20 @@ out: void __init ftrace_boot_snapshot(void) { +#ifdef CONFIG_TRACER_MAX_TRACE struct trace_array *tr; - if (snapshot_at_boot) { - tracing_snapshot(); - internal_trace_puts("** Boot snapshot taken **\n"); - } + if (!snapshot_at_boot) + return; list_for_each_entry(tr, &ftrace_trace_arrays, list) { - if (tr == &global_trace) + if (!tr->allocated_snapshot) continue; - trace_array_puts(tr, "** Boot snapshot taken **\n"); + tracing_snapshot_instance(tr); + trace_array_puts(tr, "** Boot snapshot taken **\n"); } +#endif } void __init early_trace_init(void) From 774e7cb50359eff7e966f4e2e80c9486014d3a23 Mon Sep 17 00:00:00 2001 From: Karol Wachowski Date: Fri, 31 Mar 2023 13:36:02 +0200 Subject: [PATCH 163/173] accel/ivpu: Add dma fence to command buffers only Currently job->done_fence is added to every BO handle within a job. If job handle (command buffer) is shared between multiple submits, KMD will add the fence in each of them. Then bo_wait_ioctl() executed on command buffer will exit only when all jobs containing that handle are done. This creates deadlock scenario for user mode driver in case when job handle is added as dependency of another job, because bo_wait_ioctl() of first job will wait until second job finishes, and second job can not finish before first one. Having fences added only to job buffer handle allows user space to execute bo_wait_ioctl() on the job even if it's handle is submitted with other job. Fixes: cd7272215c44 ("accel/ivpu: Add command buffer submission logic") Signed-off-by: Karol Wachowski Signed-off-by: Stanislaw Gruszka Reviewed-by: Jeffrey Hugo Signed-off-by: Jacek Lawrynowicz Link: https://patchwork.freedesktop.org/patch/msgid/20230331113603.2802515-2-stanislaw.gruszka@linux.intel.com --- drivers/accel/ivpu/ivpu_job.c | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/drivers/accel/ivpu/ivpu_job.c b/drivers/accel/ivpu/ivpu_job.c index 910301fae435..3c6f1e16cf2f 100644 --- a/drivers/accel/ivpu/ivpu_job.c +++ b/drivers/accel/ivpu/ivpu_job.c @@ -461,26 +461,22 @@ ivpu_job_prepare_bos_for_submit(struct drm_file *file, struct ivpu_job *job, u32 job->cmd_buf_vpu_addr = bo->vpu_addr + commands_offset; - ret = drm_gem_lock_reservations((struct drm_gem_object **)job->bos, buf_count, - &acquire_ctx); + ret = drm_gem_lock_reservations((struct drm_gem_object **)job->bos, 1, &acquire_ctx); if (ret) { ivpu_warn(vdev, "Failed to lock reservations: %d\n", ret); return ret; } - for (i = 0; i < buf_count; i++) { - ret = dma_resv_reserve_fences(job->bos[i]->base.resv, 1); - if (ret) { - ivpu_warn(vdev, "Failed to reserve fences: %d\n", ret); - goto unlock_reservations; - } + ret = dma_resv_reserve_fences(bo->base.resv, 1); + if (ret) { + ivpu_warn(vdev, "Failed to reserve fences: %d\n", ret); + goto unlock_reservations; } - for (i = 0; i < buf_count; i++) - dma_resv_add_fence(job->bos[i]->base.resv, job->done_fence, DMA_RESV_USAGE_WRITE); + dma_resv_add_fence(bo->base.resv, job->done_fence, DMA_RESV_USAGE_WRITE); unlock_reservations: - drm_gem_unlock_reservations((struct drm_gem_object **)job->bos, buf_count, &acquire_ctx); + drm_gem_unlock_reservations((struct drm_gem_object **)job->bos, 1, &acquire_ctx); wmb(); /* Flush write combining buffers */ From 0ec8671837a61d841462179686c5819d951d3b10 Mon Sep 17 00:00:00 2001 From: Jacek Lawrynowicz Date: Fri, 31 Mar 2023 13:36:03 +0200 Subject: [PATCH 164/173] accel/ivpu: Fix S3 system suspend when not idle Wait for VPU to be idle in ivpu_pm_suspend_cb() before powering off the device, so jobs are not lost and TDRs are not triggered after resume. Fixes: 852be13f3bd3 ("accel/ivpu: Add PM support") Signed-off-by: Stanislaw Gruszka Reviewed-by: Jeffrey Hugo Signed-off-by: Jacek Lawrynowicz Link: https://patchwork.freedesktop.org/patch/msgid/20230331113603.2802515-3-stanislaw.gruszka@linux.intel.com --- drivers/accel/ivpu/ivpu_pm.c | 26 +++++++++++--------------- 1 file changed, 11 insertions(+), 15 deletions(-) diff --git a/drivers/accel/ivpu/ivpu_pm.c b/drivers/accel/ivpu/ivpu_pm.c index 7df72fa8100f..bde42d6383da 100644 --- a/drivers/accel/ivpu/ivpu_pm.c +++ b/drivers/accel/ivpu/ivpu_pm.c @@ -140,32 +140,28 @@ int ivpu_pm_suspend_cb(struct device *dev) { struct drm_device *drm = dev_get_drvdata(dev); struct ivpu_device *vdev = to_ivpu_device(drm); - int ret; + unsigned long timeout; ivpu_dbg(vdev, PM, "Suspend..\n"); - ret = ivpu_suspend(vdev); - if (ret && vdev->pm->suspend_reschedule_counter) { - ivpu_dbg(vdev, PM, "Failed to enter idle, rescheduling suspend, retries left %d\n", - vdev->pm->suspend_reschedule_counter); - pm_schedule_suspend(dev, vdev->timeout.reschedule_suspend); - vdev->pm->suspend_reschedule_counter--; - return -EBUSY; - } else if (!vdev->pm->suspend_reschedule_counter) { - ivpu_warn(vdev, "Failed to enter idle, force suspend\n"); - ivpu_pm_prepare_cold_boot(vdev); - } else { - ivpu_pm_prepare_warm_boot(vdev); + timeout = jiffies + msecs_to_jiffies(vdev->timeout.tdr); + while (!ivpu_hw_is_idle(vdev)) { + cond_resched(); + if (time_after_eq(jiffies, timeout)) { + ivpu_err(vdev, "Failed to enter idle on system suspend\n"); + return -EBUSY; + } } - vdev->pm->suspend_reschedule_counter = PM_RESCHEDULE_LIMIT; + ivpu_suspend(vdev); + ivpu_pm_prepare_warm_boot(vdev); pci_save_state(to_pci_dev(dev)); pci_set_power_state(to_pci_dev(dev), PCI_D3hot); ivpu_dbg(vdev, PM, "Suspend done.\n"); - return ret; + return 0; } int ivpu_pm_resume_cb(struct device *dev) From b45193cb4df556fe6251b285a5ce44046dd36b4a Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Tue, 4 Apr 2023 09:31:28 +0200 Subject: [PATCH 165/173] can: j1939: j1939_tp_tx_dat_new(): fix out-of-bounds memory access In the j1939_tp_tx_dat_new() function, an out-of-bounds memory access could occur during the memcpy() operation if the size of skb->cb is larger than the size of struct j1939_sk_buff_cb. This is because the memcpy() operation uses the size of skb->cb, leading to a read beyond the struct j1939_sk_buff_cb. Updated the memcpy() operation to use the size of struct j1939_sk_buff_cb instead of the size of skb->cb. This ensures that the memcpy() operation only reads the memory within the bounds of struct j1939_sk_buff_cb, preventing out-of-bounds memory access. Additionally, add a BUILD_BUG_ON() to check that the size of skb->cb is greater than or equal to the size of struct j1939_sk_buff_cb. This ensures that the skb->cb buffer is large enough to hold the j1939_sk_buff_cb structure. Fixes: 9d71dd0c7009 ("can: add support of SAE J1939 protocol") Reported-by: Shuangpeng Bai Tested-by: Shuangpeng Bai Signed-off-by: Oleksij Rempel Link: https://groups.google.com/g/syzkaller/c/G_LL-C3plRs/m/-8xCi6dCAgAJ Link: https://lore.kernel.org/all/20230404073128.3173900-1-o.rempel@pengutronix.de Cc: stable@vger.kernel.org [mkl: rephrase commit message] Signed-off-by: Marc Kleine-Budde --- net/can/j1939/transport.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/can/j1939/transport.c b/net/can/j1939/transport.c index fb92c3609e17..fe3df23a2595 100644 --- a/net/can/j1939/transport.c +++ b/net/can/j1939/transport.c @@ -604,7 +604,10 @@ sk_buff *j1939_tp_tx_dat_new(struct j1939_priv *priv, /* reserve CAN header */ skb_reserve(skb, offsetof(struct can_frame, data)); - memcpy(skb->cb, re_skcb, sizeof(skb->cb)); + /* skb->cb must be large enough to hold a j1939_sk_buff_cb structure */ + BUILD_BUG_ON(sizeof(skb->cb) < sizeof(*re_skcb)); + + memcpy(skb->cb, re_skcb, sizeof(*re_skcb)); skcb = j1939_skb_to_cb(skb); if (swap_src_dst) j1939_skbcb_swap(skcb); From 0145462fc802cd447ef5d029758043c7f15b4b1e Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Thu, 30 Mar 2023 19:02:48 +0200 Subject: [PATCH 166/173] can: isotp: isotp_recvmsg(): use sock_recv_cmsgs() to get SOCK_RXQ_OVFL infos isotp.c was still using sock_recv_timestamp() which does not provide control messages to detect dropped PDUs in the receive path. Fixes: e057dd3fc20f ("can: add ISO 15765-2:2016 transport protocol") Signed-off-by: Oliver Hartkopp Link: https://lore.kernel.org/all/20230330170248.62342-1-socketcan@hartkopp.net Cc: stable@vger.kernel.org Signed-off-by: Marc Kleine-Budde --- net/can/isotp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/can/isotp.c b/net/can/isotp.c index 9bc344851704..47c2ebad10ed 100644 --- a/net/can/isotp.c +++ b/net/can/isotp.c @@ -1120,7 +1120,7 @@ static int isotp_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, if (ret < 0) goto out_err; - sock_recv_timestamp(msg, sk, skb); + sock_recv_cmsgs(msg, sk, skb); if (msg->msg_name) { __sockaddr_check_size(ISOTP_MIN_NAMELEN); From 79e19fa79cb5d5f1b3bf3e3ae24989ccb93c7b7b Mon Sep 17 00:00:00 2001 From: Michal Sojka Date: Fri, 31 Mar 2023 14:55:11 +0200 Subject: [PATCH 167/173] can: isotp: isotp_ops: fix poll() to not report false EPOLLOUT events When using select()/poll()/epoll() with a non-blocking ISOTP socket to wait for when non-blocking write is possible, a false EPOLLOUT event is sometimes returned. This can happen at least after sending a message which must be split to multiple CAN frames. The reason is that isotp_sendmsg() returns -EAGAIN when tx.state is not equal to ISOTP_IDLE and this behavior is not reflected in datagram_poll(), which is used in isotp_ops. This is fixed by introducing ISOTP-specific poll function, which suppresses the EPOLLOUT events in that case. v2: https://lore.kernel.org/all/20230302092812.320643-1-michal.sojka@cvut.cz v1: https://lore.kernel.org/all/20230224010659.48420-1-michal.sojka@cvut.cz https://lore.kernel.org/all/b53a04a2-ba1f-3858-84c1-d3eb3301ae15@hartkopp.net Signed-off-by: Michal Sojka Reported-by: Jakub Jira Tested-by: Oliver Hartkopp Acked-by: Oliver Hartkopp Fixes: e057dd3fc20f ("can: add ISO 15765-2:2016 transport protocol") Link: https://lore.kernel.org/all/20230331125511.372783-1-michal.sojka@cvut.cz Cc: stable@vger.kernel.org Signed-off-by: Marc Kleine-Budde --- net/can/isotp.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/net/can/isotp.c b/net/can/isotp.c index 47c2ebad10ed..281b7766c54e 100644 --- a/net/can/isotp.c +++ b/net/can/isotp.c @@ -1608,6 +1608,21 @@ static int isotp_init(struct sock *sk) return 0; } +static __poll_t isotp_poll(struct file *file, struct socket *sock, poll_table *wait) +{ + struct sock *sk = sock->sk; + struct isotp_sock *so = isotp_sk(sk); + + __poll_t mask = datagram_poll(file, sock, wait); + poll_wait(file, &so->wait, wait); + + /* Check for false positives due to TX state */ + if ((mask & EPOLLWRNORM) && (so->tx.state != ISOTP_IDLE)) + mask &= ~(EPOLLOUT | EPOLLWRNORM); + + return mask; +} + static int isotp_sock_no_ioctlcmd(struct socket *sock, unsigned int cmd, unsigned long arg) { @@ -1623,7 +1638,7 @@ static const struct proto_ops isotp_ops = { .socketpair = sock_no_socketpair, .accept = sock_no_accept, .getname = isotp_getname, - .poll = datagram_poll, + .poll = isotp_poll, .ioctl = isotp_sock_no_ioctlcmd, .gettstamp = sock_gettstamp, .listen = sock_no_listen, From 051737439eaee5bdd03d3c2ef5510d54a478fd05 Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Fri, 31 Mar 2023 15:19:35 +0200 Subject: [PATCH 168/173] can: isotp: fix race between isotp_sendsmg() and isotp_release() As discussed with Dae R. Jeong and Hillf Danton here [1] the sendmsg() function in isotp.c might get into a race condition when restoring the former tx.state from the old_state. Remove the old_state concept and implement proper locking for the ISOTP_IDLE transitions in isotp_sendmsg(), inspired by a simplification idea from Hillf Danton. Introduce a new tx.state ISOTP_SHUTDOWN and use the same locking mechanism from isotp_release() which resolves a potential race between isotp_sendsmg() and isotp_release(). [1] https://lore.kernel.org/linux-can/ZB%2F93xJxq%2FBUqAgG@dragonet v1: https://lore.kernel.org/all/20230331102114.15164-1-socketcan@hartkopp.net v2: https://lore.kernel.org/all/20230331123600.3550-1-socketcan@hartkopp.net take care of signal interrupts for wait_event_interruptible() in isotp_release() v3: https://lore.kernel.org/all/20230331130654.9886-1-socketcan@hartkopp.net take care of signal interrupts for wait_event_interruptible() in isotp_sendmsg() in the wait_tx_done case v4: https://lore.kernel.org/all/20230331131935.21465-1-socketcan@hartkopp.net take care of signal interrupts for wait_event_interruptible() in isotp_sendmsg() in ALL cases Cc: Dae R. Jeong Cc: Hillf Danton Signed-off-by: Oliver Hartkopp Fixes: 4f027cba8216 ("can: isotp: split tx timer into transmission and timeout") Link: https://lore.kernel.org/all/20230331131935.21465-1-socketcan@hartkopp.net Cc: stable@vger.kernel.org [mkl: rephrase commit message] Signed-off-by: Marc Kleine-Budde --- net/can/isotp.c | 55 ++++++++++++++++++++++++++++--------------------- 1 file changed, 31 insertions(+), 24 deletions(-) diff --git a/net/can/isotp.c b/net/can/isotp.c index 281b7766c54e..5761d4ab839d 100644 --- a/net/can/isotp.c +++ b/net/can/isotp.c @@ -119,7 +119,8 @@ enum { ISOTP_WAIT_FIRST_FC, ISOTP_WAIT_FC, ISOTP_WAIT_DATA, - ISOTP_SENDING + ISOTP_SENDING, + ISOTP_SHUTDOWN, }; struct tpcon { @@ -880,8 +881,8 @@ static enum hrtimer_restart isotp_tx_timer_handler(struct hrtimer *hrtimer) txtimer); struct sock *sk = &so->sk; - /* don't handle timeouts in IDLE state */ - if (so->tx.state == ISOTP_IDLE) + /* don't handle timeouts in IDLE or SHUTDOWN state */ + if (so->tx.state == ISOTP_IDLE || so->tx.state == ISOTP_SHUTDOWN) return HRTIMER_NORESTART; /* we did not get any flow control or echo frame in time */ @@ -918,7 +919,6 @@ static int isotp_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) { struct sock *sk = sock->sk; struct isotp_sock *so = isotp_sk(sk); - u32 old_state = so->tx.state; struct sk_buff *skb; struct net_device *dev; struct canfd_frame *cf; @@ -928,23 +928,24 @@ static int isotp_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) int off; int err; - if (!so->bound) + if (!so->bound || so->tx.state == ISOTP_SHUTDOWN) return -EADDRNOTAVAIL; +wait_free_buffer: /* we do not support multiple buffers - for now */ - if (cmpxchg(&so->tx.state, ISOTP_IDLE, ISOTP_SENDING) != ISOTP_IDLE || - wq_has_sleeper(&so->wait)) { - if (msg->msg_flags & MSG_DONTWAIT) { - err = -EAGAIN; - goto err_out; - } + if (wq_has_sleeper(&so->wait) && (msg->msg_flags & MSG_DONTWAIT)) + return -EAGAIN; - /* wait for complete transmission of current pdu */ - err = wait_event_interruptible(so->wait, so->tx.state == ISOTP_IDLE); - if (err) - goto err_out; + /* wait for complete transmission of current pdu */ + err = wait_event_interruptible(so->wait, so->tx.state == ISOTP_IDLE); + if (err) + goto err_event_drop; - so->tx.state = ISOTP_SENDING; + if (cmpxchg(&so->tx.state, ISOTP_IDLE, ISOTP_SENDING) != ISOTP_IDLE) { + if (so->tx.state == ISOTP_SHUTDOWN) + return -EADDRNOTAVAIL; + + goto wait_free_buffer; } if (!size || size > MAX_MSG_LENGTH) { @@ -1074,7 +1075,9 @@ static int isotp_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) if (wait_tx_done) { /* wait for complete transmission of current pdu */ - wait_event_interruptible(so->wait, so->tx.state == ISOTP_IDLE); + err = wait_event_interruptible(so->wait, so->tx.state == ISOTP_IDLE); + if (err) + goto err_event_drop; if (sk->sk_err) return -sk->sk_err; @@ -1082,13 +1085,15 @@ static int isotp_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) return size; +err_event_drop: + /* got signal: force tx state machine to be idle */ + so->tx.state = ISOTP_IDLE; + hrtimer_cancel(&so->txfrtimer); + hrtimer_cancel(&so->txtimer); err_out_drop: /* drop this PDU and unlock a potential wait queue */ - old_state = ISOTP_IDLE; -err_out: - so->tx.state = old_state; - if (so->tx.state == ISOTP_IDLE) - wake_up_interruptible(&so->wait); + so->tx.state = ISOTP_IDLE; + wake_up_interruptible(&so->wait); return err; } @@ -1150,10 +1155,12 @@ static int isotp_release(struct socket *sock) net = sock_net(sk); /* wait for complete transmission of current pdu */ - wait_event_interruptible(so->wait, so->tx.state == ISOTP_IDLE); + while (wait_event_interruptible(so->wait, so->tx.state == ISOTP_IDLE) == 0 && + cmpxchg(&so->tx.state, ISOTP_IDLE, ISOTP_SHUTDOWN) != ISOTP_IDLE) + ; /* force state machines to be idle also when a signal occurred */ - so->tx.state = ISOTP_IDLE; + so->tx.state = ISOTP_SHUTDOWN; so->rx.state = ISOTP_IDLE; spin_lock(&isotp_notifier_lock); From 3357c6e429643231e60447b52ffbb7ac895aca22 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Tue, 4 Apr 2023 19:45:04 -0400 Subject: [PATCH 169/173] tracing: Free error logs of tracing instances When a tracing instance is removed, the error messages that hold errors that occurred in the instance needs to be freed. The following reports a memory leak: # cd /sys/kernel/tracing # mkdir instances/foo # echo 'hist:keys=x' > instances/foo/events/sched/sched_switch/trigger # cat instances/foo/error_log [ 117.404795] hist:sched:sched_switch: error: Couldn't find field Command: hist:keys=x ^ # rmdir instances/foo Then check for memory leaks: # echo scan > /sys/kernel/debug/kmemleak # cat /sys/kernel/debug/kmemleak unreferenced object 0xffff88810d8ec700 (size 192): comm "bash", pid 869, jiffies 4294950577 (age 215.752s) hex dump (first 32 bytes): 60 dd 68 61 81 88 ff ff 60 dd 68 61 81 88 ff ff `.ha....`.ha.... a0 30 8c 83 ff ff ff ff 26 00 0a 00 00 00 00 00 .0......&....... backtrace: [<00000000dae26536>] kmalloc_trace+0x2a/0xa0 [<00000000b2938940>] tracing_log_err+0x277/0x2e0 [<000000004a0e1b07>] parse_atom+0x966/0xb40 [<0000000023b24337>] parse_expr+0x5f3/0xdb0 [<00000000594ad074>] event_hist_trigger_parse+0x27f8/0x3560 [<00000000293a9645>] trigger_process_regex+0x135/0x1a0 [<000000005c22b4f2>] event_trigger_write+0x87/0xf0 [<000000002cadc509>] vfs_write+0x162/0x670 [<0000000059c3b9be>] ksys_write+0xca/0x170 [<00000000f1cddc00>] do_syscall_64+0x3e/0xc0 [<00000000868ac68c>] entry_SYSCALL_64_after_hwframe+0x72/0xdc unreferenced object 0xffff888170c35a00 (size 32): comm "bash", pid 869, jiffies 4294950577 (age 215.752s) hex dump (first 32 bytes): 0a 20 20 43 6f 6d 6d 61 6e 64 3a 20 68 69 73 74 . Command: hist 3a 6b 65 79 73 3d 78 0a 00 00 00 00 00 00 00 00 :keys=x......... backtrace: [<000000006a747de5>] __kmalloc+0x4d/0x160 [<000000000039df5f>] tracing_log_err+0x29b/0x2e0 [<000000004a0e1b07>] parse_atom+0x966/0xb40 [<0000000023b24337>] parse_expr+0x5f3/0xdb0 [<00000000594ad074>] event_hist_trigger_parse+0x27f8/0x3560 [<00000000293a9645>] trigger_process_regex+0x135/0x1a0 [<000000005c22b4f2>] event_trigger_write+0x87/0xf0 [<000000002cadc509>] vfs_write+0x162/0x670 [<0000000059c3b9be>] ksys_write+0xca/0x170 [<00000000f1cddc00>] do_syscall_64+0x3e/0xc0 [<00000000868ac68c>] entry_SYSCALL_64_after_hwframe+0x72/0xdc The problem is that the error log needs to be freed when the instance is removed. Link: https://lore.kernel.org/lkml/76134d9f-a5ba-6a0d-37b3-28310b4a1e91@alu.unizg.hr/ Link: https://lore.kernel.org/linux-trace-kernel/20230404194504.5790b95f@gandalf.local.home Cc: stable@vger.kernel.org Cc: Masami Hiramatsu Cc: Andrew Morton Cc: Mark Rutland Cc: Thorsten Leemhuis Cc: Ulf Hansson Cc: Eric Biggers Fixes: 2f754e771b1a6 ("tracing: Have the error logs show up in the proper instances") Reported-by: Mirsad Goran Todorovac Tested-by: Mirsad Todorovac Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 4686473b8497..36a6037823cd 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -9516,6 +9516,7 @@ static int __remove_instance(struct trace_array *tr) tracefs_remove(tr->dir); free_percpu(tr->last_func_repeats); free_trace_buffers(tr); + clear_tracing_err_log(tr); for (i = 0; i < tr->nr_topts; i++) { kfree(tr->topts[i].topts); From abc33494ddd5c1e4bd0e17c4abe361038fb6693f Mon Sep 17 00:00:00 2001 From: Greg Ungerer Date: Tue, 4 Apr 2023 15:22:07 +1000 Subject: [PATCH 170/173] net: fec: make use of MDIO C45 quirk Not all fec MDIO bus drivers support C45 mode transactions. The older fec hardware block in many ColdFire SoCs does not appear to support them, at least according to most of the different ColdFire SoC reference manuals. The bits used to generate C45 access on the iMX parts, in the OP field of the MMFR register, are documented as generating non-compliant MII frames (it is not documented as to exactly how they are non-compliant). Commit 8d03ad1ab0b0 ("net: fec: Separate C22 and C45 transactions") means the fec driver will always register c45 MDIO read and write methods. During probe these will always be accessed now generating non-compliant MII accesses on ColdFire based devices. Add a quirk define, FEC_QUIRK_HAS_MDIO_C45, that can be used to distinguish silicon that supports MDIO C45 framing or not. Add this to all the existing iMX quirks, so they will be behave as they do now (*). (*) it seems that some iMX parts may not support C45 transactions either. The iMX25 and iMX50 Reference Manuals contain similar wording to the ColdFire Reference Manuals on this. Fixes: 8d03ad1ab0b0 ("net: fec: Separate C22 and C45 transactions") Signed-off-by: Greg Ungerer Reviewed-by: Wei Fang Reviewed-by: Andrew Lunn Link: https://lore.kernel.org/r/20230404052207.3064861-1-gerg@linux-m68k.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/fec.h | 5 ++++ drivers/net/ethernet/freescale/fec_main.c | 32 ++++++++++++++--------- 2 files changed, 25 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/freescale/fec.h b/drivers/net/ethernet/freescale/fec.h index 5ba1e0d71c68..9939ccafb556 100644 --- a/drivers/net/ethernet/freescale/fec.h +++ b/drivers/net/ethernet/freescale/fec.h @@ -507,6 +507,11 @@ struct bufdesc_ex { /* i.MX6Q adds pm_qos support */ #define FEC_QUIRK_HAS_PMQOS BIT(23) +/* Not all FEC hardware block MDIOs support accesses in C45 mode. + * Older blocks in the ColdFire parts do not support it. + */ +#define FEC_QUIRK_HAS_MDIO_C45 BIT(24) + struct bufdesc_prop { int qid; /* Address of Rx and Tx buffers */ diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index f3b16a6673e2..160c1b3525f5 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -100,18 +100,19 @@ struct fec_devinfo { static const struct fec_devinfo fec_imx25_info = { .quirks = FEC_QUIRK_USE_GASKET | FEC_QUIRK_MIB_CLEAR | - FEC_QUIRK_HAS_FRREG, + FEC_QUIRK_HAS_FRREG | FEC_QUIRK_HAS_MDIO_C45, }; static const struct fec_devinfo fec_imx27_info = { - .quirks = FEC_QUIRK_MIB_CLEAR | FEC_QUIRK_HAS_FRREG, + .quirks = FEC_QUIRK_MIB_CLEAR | FEC_QUIRK_HAS_FRREG | + FEC_QUIRK_HAS_MDIO_C45, }; static const struct fec_devinfo fec_imx28_info = { .quirks = FEC_QUIRK_ENET_MAC | FEC_QUIRK_SWAP_FRAME | FEC_QUIRK_SINGLE_MDIO | FEC_QUIRK_HAS_RACC | FEC_QUIRK_HAS_FRREG | FEC_QUIRK_CLEAR_SETUP_MII | - FEC_QUIRK_NO_HARD_RESET, + FEC_QUIRK_NO_HARD_RESET | FEC_QUIRK_HAS_MDIO_C45, }; static const struct fec_devinfo fec_imx6q_info = { @@ -119,11 +120,12 @@ static const struct fec_devinfo fec_imx6q_info = { FEC_QUIRK_HAS_BUFDESC_EX | FEC_QUIRK_HAS_CSUM | FEC_QUIRK_HAS_VLAN | FEC_QUIRK_ERR006358 | FEC_QUIRK_HAS_RACC | FEC_QUIRK_CLEAR_SETUP_MII | - FEC_QUIRK_HAS_PMQOS, + FEC_QUIRK_HAS_PMQOS | FEC_QUIRK_HAS_MDIO_C45, }; static const struct fec_devinfo fec_mvf600_info = { - .quirks = FEC_QUIRK_ENET_MAC | FEC_QUIRK_HAS_RACC, + .quirks = FEC_QUIRK_ENET_MAC | FEC_QUIRK_HAS_RACC | + FEC_QUIRK_HAS_MDIO_C45, }; static const struct fec_devinfo fec_imx6x_info = { @@ -132,7 +134,8 @@ static const struct fec_devinfo fec_imx6x_info = { FEC_QUIRK_HAS_VLAN | FEC_QUIRK_HAS_AVB | FEC_QUIRK_ERR007885 | FEC_QUIRK_BUG_CAPTURE | FEC_QUIRK_HAS_RACC | FEC_QUIRK_HAS_COALESCE | - FEC_QUIRK_CLEAR_SETUP_MII | FEC_QUIRK_HAS_MULTI_QUEUES, + FEC_QUIRK_CLEAR_SETUP_MII | FEC_QUIRK_HAS_MULTI_QUEUES | + FEC_QUIRK_HAS_MDIO_C45, }; static const struct fec_devinfo fec_imx6ul_info = { @@ -140,7 +143,8 @@ static const struct fec_devinfo fec_imx6ul_info = { FEC_QUIRK_HAS_BUFDESC_EX | FEC_QUIRK_HAS_CSUM | FEC_QUIRK_HAS_VLAN | FEC_QUIRK_ERR007885 | FEC_QUIRK_BUG_CAPTURE | FEC_QUIRK_HAS_RACC | - FEC_QUIRK_HAS_COALESCE | FEC_QUIRK_CLEAR_SETUP_MII, + FEC_QUIRK_HAS_COALESCE | FEC_QUIRK_CLEAR_SETUP_MII | + FEC_QUIRK_HAS_MDIO_C45, }; static const struct fec_devinfo fec_imx8mq_info = { @@ -150,7 +154,8 @@ static const struct fec_devinfo fec_imx8mq_info = { FEC_QUIRK_ERR007885 | FEC_QUIRK_BUG_CAPTURE | FEC_QUIRK_HAS_RACC | FEC_QUIRK_HAS_COALESCE | FEC_QUIRK_CLEAR_SETUP_MII | FEC_QUIRK_HAS_MULTI_QUEUES | - FEC_QUIRK_HAS_EEE | FEC_QUIRK_WAKEUP_FROM_INT2, + FEC_QUIRK_HAS_EEE | FEC_QUIRK_WAKEUP_FROM_INT2 | + FEC_QUIRK_HAS_MDIO_C45, }; static const struct fec_devinfo fec_imx8qm_info = { @@ -160,14 +165,15 @@ static const struct fec_devinfo fec_imx8qm_info = { FEC_QUIRK_ERR007885 | FEC_QUIRK_BUG_CAPTURE | FEC_QUIRK_HAS_RACC | FEC_QUIRK_HAS_COALESCE | FEC_QUIRK_CLEAR_SETUP_MII | FEC_QUIRK_HAS_MULTI_QUEUES | - FEC_QUIRK_DELAYED_CLKS_SUPPORT, + FEC_QUIRK_DELAYED_CLKS_SUPPORT | FEC_QUIRK_HAS_MDIO_C45, }; static const struct fec_devinfo fec_s32v234_info = { .quirks = FEC_QUIRK_ENET_MAC | FEC_QUIRK_HAS_GBIT | FEC_QUIRK_HAS_BUFDESC_EX | FEC_QUIRK_HAS_CSUM | FEC_QUIRK_HAS_VLAN | FEC_QUIRK_HAS_AVB | - FEC_QUIRK_ERR007885 | FEC_QUIRK_BUG_CAPTURE, + FEC_QUIRK_ERR007885 | FEC_QUIRK_BUG_CAPTURE | + FEC_QUIRK_HAS_MDIO_C45, }; static struct platform_device_id fec_devtype[] = { @@ -2434,8 +2440,10 @@ static int fec_enet_mii_init(struct platform_device *pdev) fep->mii_bus->name = "fec_enet_mii_bus"; fep->mii_bus->read = fec_enet_mdio_read_c22; fep->mii_bus->write = fec_enet_mdio_write_c22; - fep->mii_bus->read_c45 = fec_enet_mdio_read_c45; - fep->mii_bus->write_c45 = fec_enet_mdio_write_c45; + if (fep->quirks & FEC_QUIRK_HAS_MDIO_C45) { + fep->mii_bus->read_c45 = fec_enet_mdio_read_c45; + fep->mii_bus->write_c45 = fec_enet_mdio_write_c45; + } snprintf(fep->mii_bus->id, MII_BUS_ID_SIZE, "%s-%x", pdev->name, fep->dev_id + 1); fep->mii_bus->priv = fep; From 38e058cc7d245dc8034426415bee8fec16ace1bd Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Tue, 4 Apr 2023 15:24:11 +0800 Subject: [PATCH 171/173] selftests: net: rps_default_mask.sh: delete veth link specifically When deleting the netns and recreating a new one while re-adding the veth interface, there is a small window of time during which the old veth interface has not yet been removed. This can cause the new addition to fail. To resolve this issue, we can either wait for a short while to ensure that the old veth interface is deleted, or we can specifically remove the veth interface. Before this patch: # ./rps_default_mask.sh empty rps_default_mask [ ok ] changing rps_default_mask dont affect existing devices [ ok ] changing rps_default_mask dont affect existing netns [ ok ] changing rps_default_mask affect newly created devices [ ok ] changing rps_default_mask don't affect newly child netns[II][ ok ] rps_default_mask is 0 by default in child netns [ ok ] RTNETLINK answers: File exists changing rps_default_mask in child ns don't affect the main one[ ok ] cat: /sys/class/net/vethC11an1/queues/rx-0/rps_cpus: No such file or directory changing rps_default_mask in child ns affects new childns devices./rps_default_mask.sh: line 36: [: -eq: unary operator expected [fail] expected 1 found changing rps_default_mask in child ns don't affect existing devices[ ok ] After this patch: # ./rps_default_mask.sh empty rps_default_mask [ ok ] changing rps_default_mask dont affect existing devices [ ok ] changing rps_default_mask dont affect existing netns [ ok ] changing rps_default_mask affect newly created devices [ ok ] changing rps_default_mask don't affect newly child netns[II][ ok ] rps_default_mask is 0 by default in child netns [ ok ] changing rps_default_mask in child ns don't affect the main one[ ok ] changing rps_default_mask in child ns affects new childns devices[ ok ] changing rps_default_mask in child ns don't affect existing devices[ ok ] Fixes: 3a7d84eae03b ("self-tests: more rps self tests") Signed-off-by: Hangbin Liu Acked-by: Paolo Abeni Link: https://lore.kernel.org/r/20230404072411.879476-1-liuhangbin@gmail.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/rps_default_mask.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/net/rps_default_mask.sh b/tools/testing/selftests/net/rps_default_mask.sh index 0fd0d2db3abc..a26c5624429f 100755 --- a/tools/testing/selftests/net/rps_default_mask.sh +++ b/tools/testing/selftests/net/rps_default_mask.sh @@ -60,6 +60,7 @@ ip link set dev $VETH up ip -n $NETNS link set dev $VETH up chk_rps "changing rps_default_mask affect newly created devices" "" $VETH 3 chk_rps "changing rps_default_mask don't affect newly child netns[II]" $NETNS $VETH 0 +ip link del dev $VETH ip netns del $NETNS setup From 24e3fce00c0b557491ff596c0682a29dee6fe848 Mon Sep 17 00:00:00 2001 From: Song Yoong Siang Date: Tue, 4 Apr 2023 12:48:23 +0800 Subject: [PATCH 172/173] net: stmmac: Add queue reset into stmmac_xdp_open() function Queue reset was moved out from __init_dma_rx_desc_rings() and __init_dma_tx_desc_rings() functions. Thus, the driver fails to transmit and receive packet after XDP prog setup. This commit adds the missing queue reset into stmmac_xdp_open() function. Fixes: f9ec5723c3db ("net: ethernet: stmicro: stmmac: move queue reset to dedicated functions") Cc: # 6.0+ Signed-off-by: Song Yoong Siang Reviewed-by: Alexander Duyck Link: https://lore.kernel.org/r/20230404044823.3226144-1-yoong.siang.song@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 59cbf3597eb4..38abc898f149 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -6624,6 +6624,8 @@ int stmmac_xdp_open(struct net_device *dev) goto init_error; } + stmmac_reset_queues_param(priv); + /* DMA CSR Channel configuration */ for (chan = 0; chan < dma_csr_ch; chan++) { stmmac_init_chan(priv, priv->ioaddr, priv->plat->dma_cfg, chan); From 8fbc10b995a506e173f1080dfa2764f232a65e02 Mon Sep 17 00:00:00 2001 From: Michael Sit Wei Hong Date: Thu, 6 Apr 2023 10:45:41 +0800 Subject: [PATCH 173/173] net: stmmac: check fwnode for phy device before scanning for phy Some DT devices already have phy device configured in the DT/ACPI. Current implementation scans for a phy unconditionally even though there is a phy listed in the DT/ACPI and already attached. We should check the fwnode if there is any phy device listed in fwnode and decide whether to scan for a phy to attach to. Fixes: fe2cfbc96803 ("net: stmmac: check if MAC needs to attach to a PHY") Reported-by: Martin Blumenstingl Link: https://lore.kernel.org/lkml/20230403212434.296975-1-martin.blumenstingl@googlemail.com/ Tested-by: Guenter Roeck Tested-by: Shahab Vahedi Tested-by: Marek Szyprowski Tested-by: Martin Blumenstingl Suggested-by: Russell King (Oracle) Signed-off-by: Michael Sit Wei Hong Link: https://lore.kernel.org/r/20230406024541.3556305-1-michael.wei.hong.sit@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 38abc898f149..d7fcab057032 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -1134,22 +1134,26 @@ static void stmmac_check_pcs_mode(struct stmmac_priv *priv) static int stmmac_init_phy(struct net_device *dev) { struct stmmac_priv *priv = netdev_priv(dev); + struct fwnode_handle *phy_fwnode; struct fwnode_handle *fwnode; - bool phy_needed; int ret; + if (!phylink_expects_phy(priv->phylink)) + return 0; + fwnode = of_fwnode_handle(priv->plat->phylink_node); if (!fwnode) fwnode = dev_fwnode(priv->device); if (fwnode) - ret = phylink_fwnode_phy_connect(priv->phylink, fwnode, 0); + phy_fwnode = fwnode_get_phy_node(fwnode); + else + phy_fwnode = NULL; - phy_needed = phylink_expects_phy(priv->phylink); /* Some DT bindings do not set-up the PHY handle. Let's try to * manually parse it */ - if (!fwnode || phy_needed || ret) { + if (!phy_fwnode || IS_ERR(phy_fwnode)) { int addr = priv->plat->phy_addr; struct phy_device *phydev; @@ -1165,6 +1169,9 @@ static int stmmac_init_phy(struct net_device *dev) } ret = phylink_connect_phy(priv->phylink, phydev); + } else { + fwnode_handle_put(phy_fwnode); + ret = phylink_fwnode_phy_connect(priv->phylink, fwnode, 0); } if (!priv->plat->pmt) {