From ec738ca127d07ecac6afae36e2880341ec89150e Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Wed, 8 Feb 2023 17:02:30 +0100
Subject: [PATCH 001/173] mtd: spi-nor: fix memory leak when using
 debugfs_lookup()

When calling debugfs_lookup() the result must have dput() called on it,
otherwise the memory will leak over time.  To solve this, remove the
lookup and create the directory on the first device found, and then
remove it when the module is unloaded.

Cc: Tudor Ambarus <tudor.ambarus@microchip.com>
Cc: Pratyush Yadav <pratyush@kernel.org>
Cc: Miquel Raynal <miquel.raynal@bootlin.com>
Cc: Richard Weinberger <richard@nod.at>
Cc: Vignesh Raghavendra <vigneshr@ti.com>
Cc: linux-mtd@lists.infradead.org
Reviewed-by: Michael Walle <michael@walle.cc>
Link: https://lore.kernel.org/r/20230208160230.2179905-1-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/mtd/spi-nor/core.c    | 14 +++++++++++++-
 drivers/mtd/spi-nor/core.h    |  2 ++
 drivers/mtd/spi-nor/debugfs.c | 11 ++++++++---
 3 files changed, 23 insertions(+), 4 deletions(-)

diff --git a/drivers/mtd/spi-nor/core.c b/drivers/mtd/spi-nor/core.c
index 0a78045ca1d9..522d375aeccf 100644
--- a/drivers/mtd/spi-nor/core.c
+++ b/drivers/mtd/spi-nor/core.c
@@ -3343,7 +3343,19 @@ static struct spi_mem_driver spi_nor_driver = {
 	.remove = spi_nor_remove,
 	.shutdown = spi_nor_shutdown,
 };
-module_spi_mem_driver(spi_nor_driver);
+
+static int __init spi_nor_module_init(void)
+{
+	return spi_mem_driver_register(&spi_nor_driver);
+}
+module_init(spi_nor_module_init);
+
+static void __exit spi_nor_module_exit(void)
+{
+	spi_mem_driver_unregister(&spi_nor_driver);
+	spi_nor_debugfs_shutdown();
+}
+module_exit(spi_nor_module_exit);
 
 MODULE_LICENSE("GPL v2");
 MODULE_AUTHOR("Huang Shijie <shijie8@gmail.com>");
diff --git a/drivers/mtd/spi-nor/core.h b/drivers/mtd/spi-nor/core.h
index 25423225c29d..e0cc42a4a0c8 100644
--- a/drivers/mtd/spi-nor/core.h
+++ b/drivers/mtd/spi-nor/core.h
@@ -711,8 +711,10 @@ static inline struct spi_nor *mtd_to_spi_nor(struct mtd_info *mtd)
 
 #ifdef CONFIG_DEBUG_FS
 void spi_nor_debugfs_register(struct spi_nor *nor);
+void spi_nor_debugfs_shutdown(void);
 #else
 static inline void spi_nor_debugfs_register(struct spi_nor *nor) {}
+static inline void spi_nor_debugfs_shutdown(void) {}
 #endif
 
 #endif /* __LINUX_MTD_SPI_NOR_INTERNAL_H */
diff --git a/drivers/mtd/spi-nor/debugfs.c b/drivers/mtd/spi-nor/debugfs.c
index 845b78c7ecc7..fc7ad203df12 100644
--- a/drivers/mtd/spi-nor/debugfs.c
+++ b/drivers/mtd/spi-nor/debugfs.c
@@ -226,13 +226,13 @@ static void spi_nor_debugfs_unregister(void *data)
 	nor->debugfs_root = NULL;
 }
 
+static struct dentry *rootdir;
+
 void spi_nor_debugfs_register(struct spi_nor *nor)
 {
-	struct dentry *rootdir, *d;
+	struct dentry *d;
 	int ret;
 
-	/* Create rootdir once. Will never be deleted again. */
-	rootdir = debugfs_lookup(SPI_NOR_DEBUGFS_ROOT, NULL);
 	if (!rootdir)
 		rootdir = debugfs_create_dir(SPI_NOR_DEBUGFS_ROOT, NULL);
 
@@ -247,3 +247,8 @@ void spi_nor_debugfs_register(struct spi_nor *nor)
 	debugfs_create_file("capabilities", 0444, d, nor,
 			    &spi_nor_capabilities_fops);
 }
+
+void spi_nor_debugfs_shutdown(void)
+{
+	debugfs_remove(rootdir);
+}

From c0ad453e94e5c404efbcf668648d07eaa1a71ed7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ar=C4=B1n=C3=A7=20=C3=9CNAL?= <arinc.unal@arinc9.com>
Date: Sat, 18 Feb 2023 09:51:06 +0300
Subject: [PATCH 002/173] pinctrl: mediatek: add missing options to
 PINCTRL_MT7981
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

There are options missing from PINCTRL_MT7981 whilst being on every other
pin controller. Add them.

Signed-off-by: Arınç ÜNAL <arinc.unal@arinc9.com>
Acked-by: Daniel Golle <daniel@makrotopia.org>
Link: https://lore.kernel.org/r/20230218065108.8958-1-arinc.unal@arinc9.com
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/pinctrl/mediatek/Kconfig | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/pinctrl/mediatek/Kconfig b/drivers/pinctrl/mediatek/Kconfig
index f20c28334bcb..67818ba14d4e 100644
--- a/drivers/pinctrl/mediatek/Kconfig
+++ b/drivers/pinctrl/mediatek/Kconfig
@@ -130,6 +130,8 @@ config PINCTRL_MT7622
 config PINCTRL_MT7981
 	bool "Mediatek MT7981 pin control"
 	depends on OF
+	depends on ARM64 || COMPILE_TEST
+	default ARM64 && ARCH_MEDIATEK
 	select PINCTRL_MTK_MOORE
 
 config PINCTRL_MT7986

From 6de67ca4dab7d855ef9598cd894cd7dfa4077f96 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ar=C4=B1n=C3=A7=20=C3=9CNAL?= <arinc.unal@arinc9.com>
Date: Sat, 18 Feb 2023 09:51:07 +0300
Subject: [PATCH 003/173] pinctrl: mediatek: fix naming inconsistency
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Some options include "MediaTek", some "Mediatek". Rename all to "MediaTek"
to address the naming inconsistency.

Signed-off-by: Arınç ÜNAL <arinc.unal@arinc9.com>
Reviewed-by: Daniel Golle <daniel@makrotopia.org>
Link: https://lore.kernel.org/r/20230218065108.8958-2-arinc.unal@arinc9.com
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/pinctrl/mediatek/Kconfig | 42 ++++++++++++++++----------------
 1 file changed, 21 insertions(+), 21 deletions(-)

diff --git a/drivers/pinctrl/mediatek/Kconfig b/drivers/pinctrl/mediatek/Kconfig
index 67818ba14d4e..a71874fed3d6 100644
--- a/drivers/pinctrl/mediatek/Kconfig
+++ b/drivers/pinctrl/mediatek/Kconfig
@@ -45,35 +45,35 @@ config PINCTRL_MTK_PARIS
 
 # For ARMv7 SoCs
 config PINCTRL_MT2701
-	bool "Mediatek MT2701 pin control"
+	bool "MediaTek MT2701 pin control"
 	depends on MACH_MT7623 || MACH_MT2701 || COMPILE_TEST
 	depends on OF
 	default MACH_MT2701
 	select PINCTRL_MTK
 
 config PINCTRL_MT7623
-	bool "Mediatek MT7623 pin control with generic binding"
+	bool "MediaTek MT7623 pin control with generic binding"
 	depends on MACH_MT7623 || COMPILE_TEST
 	depends on OF
 	default MACH_MT7623
 	select PINCTRL_MTK_MOORE
 
 config PINCTRL_MT7629
-	bool "Mediatek MT7629 pin control"
+	bool "MediaTek MT7629 pin control"
 	depends on MACH_MT7629 || COMPILE_TEST
 	depends on OF
 	default MACH_MT7629
 	select PINCTRL_MTK_MOORE
 
 config PINCTRL_MT8135
-	bool "Mediatek MT8135 pin control"
+	bool "MediaTek MT8135 pin control"
 	depends on MACH_MT8135 || COMPILE_TEST
 	depends on OF
 	default MACH_MT8135
 	select PINCTRL_MTK
 
 config PINCTRL_MT8127
-	bool "Mediatek MT8127 pin control"
+	bool "MediaTek MT8127 pin control"
 	depends on MACH_MT8127 || COMPILE_TEST
 	depends on OF
 	default MACH_MT8127
@@ -88,33 +88,33 @@ config PINCTRL_MT2712
 	select PINCTRL_MTK
 
 config PINCTRL_MT6765
-	tristate "Mediatek MT6765 pin control"
+	tristate "MediaTek MT6765 pin control"
 	depends on OF
 	depends on ARM64 || COMPILE_TEST
 	default ARM64 && ARCH_MEDIATEK
 	select PINCTRL_MTK_PARIS
 
 config PINCTRL_MT6779
-	tristate "Mediatek MT6779 pin control"
+	tristate "MediaTek MT6779 pin control"
 	depends on OF
 	depends on ARM64 || COMPILE_TEST
 	default ARM64 && ARCH_MEDIATEK
 	select PINCTRL_MTK_PARIS
 	help
 	  Say yes here to support pin controller and gpio driver
-	  on Mediatek MT6779 SoC.
+	  on MediaTek MT6779 SoC.
 	  In MTK platform, we support virtual gpio and use it to
 	  map specific eint which doesn't have real gpio pin.
 
 config PINCTRL_MT6795
-	bool "Mediatek MT6795 pin control"
+	bool "MediaTek MT6795 pin control"
 	depends on OF
 	depends on ARM64 || COMPILE_TEST
 	default ARM64 && ARCH_MEDIATEK
 	select PINCTRL_MTK_PARIS
 
 config PINCTRL_MT6797
-	bool "Mediatek MT6797 pin control"
+	bool "MediaTek MT6797 pin control"
 	depends on OF
 	depends on ARM64 || COMPILE_TEST
 	default ARM64 && ARCH_MEDIATEK
@@ -128,42 +128,42 @@ config PINCTRL_MT7622
 	select PINCTRL_MTK_MOORE
 
 config PINCTRL_MT7981
-	bool "Mediatek MT7981 pin control"
+	bool "MediaTek MT7981 pin control"
 	depends on OF
 	depends on ARM64 || COMPILE_TEST
 	default ARM64 && ARCH_MEDIATEK
 	select PINCTRL_MTK_MOORE
 
 config PINCTRL_MT7986
-	bool "Mediatek MT7986 pin control"
+	bool "MediaTek MT7986 pin control"
 	depends on OF
 	depends on ARM64 || COMPILE_TEST
 	default ARM64 && ARCH_MEDIATEK
 	select PINCTRL_MTK_MOORE
 
 config PINCTRL_MT8167
-	bool "Mediatek MT8167 pin control"
+	bool "MediaTek MT8167 pin control"
 	depends on OF
 	depends on ARM64 || COMPILE_TEST
 	default ARM64 && ARCH_MEDIATEK
 	select PINCTRL_MTK
 
 config PINCTRL_MT8173
-	bool "Mediatek MT8173 pin control"
+	bool "MediaTek MT8173 pin control"
 	depends on OF
 	depends on ARM64 || COMPILE_TEST
 	default ARM64 && ARCH_MEDIATEK
 	select PINCTRL_MTK
 
 config PINCTRL_MT8183
-	bool "Mediatek MT8183 pin control"
+	bool "MediaTek MT8183 pin control"
 	depends on OF
 	depends on ARM64 || COMPILE_TEST
 	default ARM64 && ARCH_MEDIATEK
 	select PINCTRL_MTK_PARIS
 
 config PINCTRL_MT8186
-	bool "Mediatek MT8186 pin control"
+	bool "MediaTek MT8186 pin control"
 	depends on OF
 	depends on ARM64 || COMPILE_TEST
 	default ARM64 && ARCH_MEDIATEK
@@ -182,28 +182,28 @@ config PINCTRL_MT8188
 	  map specific eint which doesn't have real gpio pin.
 
 config PINCTRL_MT8192
-	bool "Mediatek MT8192 pin control"
+	bool "MediaTek MT8192 pin control"
 	depends on OF
 	depends on ARM64 || COMPILE_TEST
 	default ARM64 && ARCH_MEDIATEK
 	select PINCTRL_MTK_PARIS
 
 config PINCTRL_MT8195
-	bool "Mediatek MT8195 pin control"
+	bool "MediaTek MT8195 pin control"
 	depends on OF
 	depends on ARM64 || COMPILE_TEST
 	default ARM64 && ARCH_MEDIATEK
 	select PINCTRL_MTK_PARIS
 
 config PINCTRL_MT8365
-	bool "Mediatek MT8365 pin control"
+	bool "MediaTek MT8365 pin control"
 	depends on OF
 	depends on ARM64 || COMPILE_TEST
 	default ARM64 && ARCH_MEDIATEK
 	select PINCTRL_MTK
 
 config PINCTRL_MT8516
-	bool "Mediatek MT8516 pin control"
+	bool "MediaTek MT8516 pin control"
 	depends on OF
 	depends on ARM64 || COMPILE_TEST
 	default ARM64 && ARCH_MEDIATEK
@@ -211,7 +211,7 @@ config PINCTRL_MT8516
 
 # For PMIC
 config PINCTRL_MT6397
-	bool "Mediatek MT6397 pin control"
+	bool "MediaTek MT6397 pin control"
 	depends on MFD_MT6397 || COMPILE_TEST
 	depends on OF
 	default MFD_MT6397

From 7bb97e360acdd38b68ad0a1defb89c6e89c85596 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Fri, 24 Feb 2023 14:08:28 +0100
Subject: [PATCH 004/173] pinctrl: at91-pio4: fix domain name assignment

Since commit d59f6617eef0 ("genirq: Allow fwnode to carry name
information only") an IRQ domain is always given a name during
allocation (e.g. used for the debugfs entry).

Drop the no longer valid name assignment, which would lead to an attempt
to free a string constant when removing the domain on late probe
failures (e.g. probe deferral).

Fixes: d59f6617eef0 ("genirq: Allow fwnode to carry name information only")
Cc: stable@vger.kernel.org	# 4.13
Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Reviewed-by: Claudiu Beznea <claudiu.beznea@microchip.com>
Tested-by: Claudiu Beznea <claudiu.beznea@microchip.com> # on SAMA7G5
Link: https://lore.kernel.org/r/20230224130828.27985-1-johan+linaro@kernel.org
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/pinctrl/pinctrl-at91-pio4.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/pinctrl/pinctrl-at91-pio4.c b/drivers/pinctrl/pinctrl-at91-pio4.c
index 373eed8bc4be..c775d239444a 100644
--- a/drivers/pinctrl/pinctrl-at91-pio4.c
+++ b/drivers/pinctrl/pinctrl-at91-pio4.c
@@ -1206,7 +1206,6 @@ static int atmel_pinctrl_probe(struct platform_device *pdev)
 		dev_err(dev, "can't add the irq domain\n");
 		return -ENODEV;
 	}
-	atmel_pioctrl->irq_domain->name = "atmel gpio";
 
 	for (i = 0; i < atmel_pioctrl->npins; i++) {
 		int irq = irq_create_mapping(atmel_pioctrl->irq_domain, i);

From 657fd9da2d4b4aa0a384105b236baa22fa0233bf Mon Sep 17 00:00:00 2001
From: Horatiu Vultur <horatiu.vultur@microchip.com>
Date: Mon, 6 Feb 2023 21:37:20 +0100
Subject: [PATCH 005/173] pinctrl: ocelot: Fix alt mode for ocelot

In case the driver was trying to set an alternate mode for gpio
0 or 32 then the mode was not set correctly. The reason is that
there is computation error inside the function ocelot_pinmux_set_mux
because in this case it was trying to shift to left by -1.
Fix this by actually shifting the function bits and not the position.

Fixes: 4b36082e2e09 ("pinctrl: ocelot: fix pinmuxing for pins after 31")
Signed-off-by: Horatiu Vultur <horatiu.vultur@microchip.com>
Link: https://lore.kernel.org/r/20230206203720.1177718-1-horatiu.vultur@microchip.com
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/pinctrl/pinctrl-ocelot.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/pinctrl/pinctrl-ocelot.c b/drivers/pinctrl/pinctrl-ocelot.c
index 29e4a6282a64..1dcbd0937ef5 100644
--- a/drivers/pinctrl/pinctrl-ocelot.c
+++ b/drivers/pinctrl/pinctrl-ocelot.c
@@ -1204,7 +1204,7 @@ static int ocelot_pinmux_set_mux(struct pinctrl_dev *pctldev,
 	regmap_update_bits(info->map, REG_ALT(0, info, pin->pin),
 			   BIT(p), f << p);
 	regmap_update_bits(info->map, REG_ALT(1, info, pin->pin),
-			   BIT(p), f << (p - 1));
+			   BIT(p), (f >> 1) << p);
 
 	return 0;
 }

From 913a956c4363c3b5fd13d3a00836fad4c46646a7 Mon Sep 17 00:00:00 2001
From: Dario Binacchi <dario.binacchi@amarulasolutions.com>
Date: Mon, 27 Feb 2023 21:51:31 +0100
Subject: [PATCH 006/173] pinctrl: stm32: use dynamic allocation of GPIO base

Since commit 502df79b860563d7 ("gpiolib: Warn on drivers still using static
gpiobase allocation"), one or more warnings are printed during boot on
systems where static allocation of GPIO base is used:

[    0.197707] gpio gpiochip0: Static allocation of GPIO base is deprecated, use dynamic allocation.
[    0.199942] stm32f429-pinctrl soc:pinctrl@40020000: GPIOA bank added
[    0.200711] gpio gpiochip1: Static allocation of GPIO base is deprecated, use dynamic allocation.
[    0.202855] stm32f429-pinctrl soc:pinctrl@40020000: GPIOB bank added
[    0.203591] gpio gpiochip2: Static allocation of GPIO base is deprecated, use dynamic allocation.
[    0.205704] stm32f429-pinctrl soc:pinctrl@40020000: GPIOC bank added
[    0.206338] gpio gpiochip3: Static allocation of GPIO base is deprecated, use dynamic allocation.
[    0.208448] stm32f429-pinctrl soc:pinctrl@40020000: GPIOD bank added
[    0.209182] gpio gpiochip4: Static allocation of GPIO base is deprecated, use dynamic allocation.
[    0.211282] stm32f429-pinctrl soc:pinctrl@40020000: GPIOE bank added
[    0.212094] gpio gpiochip5: Static allocation of GPIO base is deprecated, use dynamic allocation.
[    0.214270] stm32f429-pinctrl soc:pinctrl@40020000: GPIOF bank added
[    0.215005] gpio gpiochip6: Static allocation of GPIO base is deprecated, use dynamic allocation.
[    0.217110] stm32f429-pinctrl soc:pinctrl@40020000: GPIOG bank added
[    0.217845] gpio gpiochip7: Static allocation of GPIO base is deprecated, use dynamic allocation.
[    0.219959] stm32f429-pinctrl soc:pinctrl@40020000: GPIOH bank added
[    0.220602] gpio gpiochip8: Static allocation of GPIO base is deprecated, use dynamic allocation.
[    0.222714] stm32f429-pinctrl soc:pinctrl@40020000: GPIOI bank added
[    0.223483] gpio gpiochip9: Static allocation of GPIO base is deprecated, use dynamic allocation.
[    0.225594] stm32f429-pinctrl soc:pinctrl@40020000: GPIOJ bank added
[    0.226336] gpio gpiochip10: Static allocation of GPIO base is deprecated, use dynamic allocation.
[    0.228490] stm32f429-pinctrl soc:pinctrl@40020000: GPIOK bank added

So let's follow the suggestion and use dynamic allocation.

Tested on STM32F429I-DISC1 board.

Signed-off-by: Dario Binacchi <dario.binacchi@amarulasolutions.com>
Link: https://lore.kernel.org/r/20230227205131.2104082-1-dario.binacchi@amarulasolutions.com
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/pinctrl/stm32/pinctrl-stm32.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/pinctrl/stm32/pinctrl-stm32.c b/drivers/pinctrl/stm32/pinctrl-stm32.c
index cb33a23ab0c1..04ace4c7bd58 100644
--- a/drivers/pinctrl/stm32/pinctrl-stm32.c
+++ b/drivers/pinctrl/stm32/pinctrl-stm32.c
@@ -1330,7 +1330,7 @@ static int stm32_gpiolib_register_bank(struct stm32_pinctrl *pctl, struct fwnode
 	if (fwnode_property_read_u32(fwnode, "st,bank-ioport", &bank_ioport_nr))
 		bank_ioport_nr = bank_nr;
 
-	bank->gpio_chip.base = bank_nr * STM32_GPIO_PINS_PER_BANK;
+	bank->gpio_chip.base = -1;
 
 	bank->gpio_chip.ngpio = npins;
 	bank->gpio_chip.fwnode = fwnode;

From 1eb65c8687316c65140b48fad27133d583178e15 Mon Sep 17 00:00:00 2001
From: Mohammed Gamal <mgamal@redhat.com>
Date: Fri, 17 Feb 2023 22:44:11 +0200
Subject: [PATCH 007/173] Drivers: vmbus: Check for channel allocation before
 looking up relids

relid2channel() assumes vmbus channel array to be allocated when called.
However, in cases such as kdump/kexec, not all relids will be reset by the host.
When the second kernel boots and if the guest receives a vmbus interrupt during
vmbus driver initialization before vmbus_connect() is called, before it finishes,
or if it fails, the vmbus interrupt service routine is called which in turn calls
relid2channel() and can cause a null pointer dereference.

Print a warning and error out in relid2channel() for a channel id that's invalid
in the second kernel.

Fixes: 8b6a877c060e ("Drivers: hv: vmbus: Replace the per-CPU channel lists with a global array of channels")

Signed-off-by: Mohammed Gamal <mgamal@redhat.com>
Reviewed-by: Dexuan Cui <decui@microsoft.com>
Link: https://lore.kernel.org/r/20230217204411.212709-1-mgamal@redhat.com
Signed-off-by: Wei Liu <wei.liu@kernel.org>
---
 drivers/hv/connection.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/hv/connection.c b/drivers/hv/connection.c
index 9dc27e5d367a..da51b50787df 100644
--- a/drivers/hv/connection.c
+++ b/drivers/hv/connection.c
@@ -409,6 +409,10 @@ void vmbus_disconnect(void)
  */
 struct vmbus_channel *relid2channel(u32 relid)
 {
+	if (vmbus_connection.channels == NULL) {
+		pr_warn_once("relid2channel: relid=%d: No channels mapped!\n", relid);
+		return NULL;
+	}
 	if (WARN_ON(relid >= MAX_CHANNEL_RELIDS))
 		return NULL;
 	return READ_ONCE(vmbus_connection.channels[relid]);

From 44ac5abac86b20856e6d9e5e5e40dcc2623fe330 Mon Sep 17 00:00:00 2001
From: Vegard Nossum <vegard.nossum@oracle.com>
Date: Sun, 5 Mar 2023 23:00:04 +0100
Subject: [PATCH 008/173] Documentation/security-bugs: move from admin-guide/
 to process/

Jiri Kosina, Jonathan Corbet, and Willy Tarreau all expressed a desire
to move this document under process/.

Create a new section for security issues in the index and group it with
embargoed-hardware-issues.

I'm doing this at the start of the series to make all the subsequent
changes show up in 'git blame'.

Existing references were updated using:

  git grep -l security-bugs ':!Documentation/translations/' | xargs sed -i 's|admin-guide/security-bugs|process/security-bugs|g'
  git grep -l security-bugs Documentation/translations/ | xargs sed -i 's|Documentation/admin-guide/security-bugs|Documentation/process/security-bugs|g'
  git grep -l security-bugs Documentation/translations/ | xargs sed -i '/Original:/s|\.\./admin-guide/security-bugs|\.\./process/security-bugs|g'

Notably, the page is not moved in the translations (due to my lack of
knowledge of these languages), but the translations have been updated
to point to the new location of the original document where these
references exist.

Link: https://lore.kernel.org/all/nycvar.YFH.7.76.2206062326230.10851@cbobk.fhfr.pm/
Suggested-by: Jiri Kosina <jikos@kernel.org>
Cc: Alex Shi <alexs@kernel.org>
Cc: Yanteng Si <siyanteng@loongson.cn>
Cc: Hu Haowen <src.res@email.cn>
Cc: Federico Vaga <federico.vaga@vaga.pv.it>
Cc: Tsugikazu Shibata <tshibata@ab.jp.nec.com>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Jeimi Lee <jamee.lee@samsung.com>
Cc: Carlos Bilbao <carlos.bilbao@amd.com>
Cc: Akira Yokosawa <akiyks@gmail.com>
Signed-off-by: Vegard Nossum <vegard.nossum@oracle.com>
Acked-by: Carlos Bilbao <carlos.bilbao@amd.com>
Reviewed-by: Yanteng Si <siyanteng@loongson.cn>
Reviewed-by: Akira Yokosawa <akiyks@gmail.com>
Acked-by: Federico Vaga <federico.vaga@vaga.pv.it>
Reviewed-by: Bagas Sanjaya <bagasdotme@gmail.com>
Link: https://lore.kernel.org/r/20230305220010.20895-2-vegard.nossum@oracle.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/admin-guide/index.rst                      | 1 -
 Documentation/admin-guide/reporting-issues.rst           | 4 ++--
 Documentation/process/howto.rst                          | 2 +-
 Documentation/process/index.rst                          | 9 ++++++++-
 Documentation/process/researcher-guidelines.rst          | 2 +-
 Documentation/{admin-guide => process}/security-bugs.rst | 0
 Documentation/process/stable-kernel-rules.rst            | 2 +-
 Documentation/process/submitting-patches.rst             | 2 +-
 .../translations/it_IT/admin-guide/security-bugs.rst     | 2 +-
 .../translations/it_IT/process/submitting-patches.rst    | 2 +-
 Documentation/translations/ja_JP/howto.rst               | 2 +-
 Documentation/translations/ko_KR/howto.rst               | 2 +-
 Documentation/translations/sp_SP/howto.rst               | 2 +-
 .../translations/sp_SP/process/submitting-patches.rst    | 2 +-
 .../translations/zh_CN/admin-guide/security-bugs.rst     | 2 +-
 Documentation/translations/zh_CN/process/howto.rst       | 2 +-
 .../translations/zh_TW/admin-guide/security-bugs.rst     | 2 +-
 Documentation/translations/zh_TW/process/howto.rst       | 2 +-
 MAINTAINERS                                              | 4 ++--
 19 files changed, 26 insertions(+), 20 deletions(-)
 rename Documentation/{admin-guide => process}/security-bugs.rst (100%)

diff --git a/Documentation/admin-guide/index.rst b/Documentation/admin-guide/index.rst
index 0ad7e7ec0d27..09a563bbe3e7 100644
--- a/Documentation/admin-guide/index.rst
+++ b/Documentation/admin-guide/index.rst
@@ -36,7 +36,6 @@ problems and bugs in particular.
 
    reporting-issues
    reporting-regressions
-   security-bugs
    bug-hunting
    bug-bisect
    tainted-kernels
diff --git a/Documentation/admin-guide/reporting-issues.rst b/Documentation/admin-guide/reporting-issues.rst
index ec62151fe672..2fd5a030235a 100644
--- a/Documentation/admin-guide/reporting-issues.rst
+++ b/Documentation/admin-guide/reporting-issues.rst
@@ -395,7 +395,7 @@ might want to be aware of; it for example explains how to add your issue to the
 list of tracked regressions, to ensure it won't fall through the cracks.
 
 What qualifies as security issue is left to your judgment. Consider reading
-Documentation/admin-guide/security-bugs.rst before proceeding, as it
+Documentation/process/security-bugs.rst before proceeding, as it
 provides additional details how to best handle security issues.
 
 An issue is a 'really severe problem' when something totally unacceptably bad
@@ -1269,7 +1269,7 @@ them when sending the report by mail. If you filed it in a bug tracker, forward
 the report's text to these addresses; but on top of it put a small note where
 you mention that you filed it with a link to the ticket.
 
-See Documentation/admin-guide/security-bugs.rst for more information.
+See Documentation/process/security-bugs.rst for more information.
 
 
 Duties after the report went out
diff --git a/Documentation/process/howto.rst b/Documentation/process/howto.rst
index cb6abcb2b6d0..deb8235e20ff 100644
--- a/Documentation/process/howto.rst
+++ b/Documentation/process/howto.rst
@@ -138,7 +138,7 @@ required reading:
     philosophy and is very important for people moving to Linux from
     development on other Operating Systems.
 
-  :ref:`Documentation/admin-guide/security-bugs.rst <securitybugs>`
+  :ref:`Documentation/process/security-bugs.rst <securitybugs>`
     If you feel you have found a security problem in the Linux kernel,
     please follow the steps in this document to help notify the kernel
     developers, and help solve the issue.
diff --git a/Documentation/process/index.rst b/Documentation/process/index.rst
index d4b6217472b0..565df595152e 100644
--- a/Documentation/process/index.rst
+++ b/Documentation/process/index.rst
@@ -35,6 +35,14 @@ Below are the essential guides that every developer should read.
    kernel-enforcement-statement
    kernel-driver-statement
 
+For security issues, see:
+
+.. toctree::
+   :maxdepth: 1
+
+   security-bugs
+   embargoed-hardware-issues
+
 Other guides to the community that are of interest to most developers are:
 
 .. toctree::
@@ -47,7 +55,6 @@ Other guides to the community that are of interest to most developers are:
    submit-checklist
    kernel-docs
    deprecated
-   embargoed-hardware-issues
    maintainers
    researcher-guidelines
 
diff --git a/Documentation/process/researcher-guidelines.rst b/Documentation/process/researcher-guidelines.rst
index afc944e0e898..9fcfed3c350b 100644
--- a/Documentation/process/researcher-guidelines.rst
+++ b/Documentation/process/researcher-guidelines.rst
@@ -68,7 +68,7 @@ Before contributing, carefully read the appropriate documentation:
 * Documentation/process/development-process.rst
 * Documentation/process/submitting-patches.rst
 * Documentation/admin-guide/reporting-issues.rst
-* Documentation/admin-guide/security-bugs.rst
+* Documentation/process/security-bugs.rst
 
 Then send a patch (including a commit log with all the details listed
 below) and follow up on any feedback from other developers.
diff --git a/Documentation/admin-guide/security-bugs.rst b/Documentation/process/security-bugs.rst
similarity index 100%
rename from Documentation/admin-guide/security-bugs.rst
rename to Documentation/process/security-bugs.rst
diff --git a/Documentation/process/stable-kernel-rules.rst b/Documentation/process/stable-kernel-rules.rst
index 2fd8aa593a28..51df1197d5ab 100644
--- a/Documentation/process/stable-kernel-rules.rst
+++ b/Documentation/process/stable-kernel-rules.rst
@@ -39,7 +39,7 @@ Procedure for submitting patches to the -stable tree
 
    Security patches should not be handled (solely) by the -stable review
    process but should follow the procedures in
-   :ref:`Documentation/admin-guide/security-bugs.rst <securitybugs>`.
+   :ref:`Documentation/process/security-bugs.rst <securitybugs>`.
 
 For all other submissions, choose one of the following procedures
 -----------------------------------------------------------------
diff --git a/Documentation/process/submitting-patches.rst b/Documentation/process/submitting-patches.rst
index eac7167dce83..7b223f306efa 100644
--- a/Documentation/process/submitting-patches.rst
+++ b/Documentation/process/submitting-patches.rst
@@ -254,7 +254,7 @@ If you have a patch that fixes an exploitable security bug, send that patch
 to security@kernel.org.  For severe bugs, a short embargo may be considered
 to allow distributors to get the patch out to users; in such cases,
 obviously, the patch should not be sent to any public lists. See also
-Documentation/admin-guide/security-bugs.rst.
+Documentation/process/security-bugs.rst.
 
 Patches that fix a severe bug in a released kernel should be directed
 toward the stable maintainers by putting a line like this::
diff --git a/Documentation/translations/it_IT/admin-guide/security-bugs.rst b/Documentation/translations/it_IT/admin-guide/security-bugs.rst
index 18a5822c7d9a..20994f4bfa31 100644
--- a/Documentation/translations/it_IT/admin-guide/security-bugs.rst
+++ b/Documentation/translations/it_IT/admin-guide/security-bugs.rst
@@ -1,6 +1,6 @@
 .. include:: ../disclaimer-ita.rst
 
-:Original: :ref:`Documentation/admin-guide/security-bugs.rst <securitybugs>`
+:Original: :ref:`Documentation/process/security-bugs.rst <securitybugs>`
 
 .. _it_securitybugs:
 
diff --git a/Documentation/translations/it_IT/process/submitting-patches.rst b/Documentation/translations/it_IT/process/submitting-patches.rst
index c2cfa0948b2b..167fce813032 100644
--- a/Documentation/translations/it_IT/process/submitting-patches.rst
+++ b/Documentation/translations/it_IT/process/submitting-patches.rst
@@ -272,7 +272,7 @@ embargo potrebbe essere preso in considerazione per dare il tempo alle
 distribuzioni di prendere la patch e renderla disponibile ai loro utenti;
 in questo caso, ovviamente, la patch non dovrebbe essere inviata su alcuna
 lista di discussione pubblica. Leggete anche
-Documentation/admin-guide/security-bugs.rst.
+Documentation/process/security-bugs.rst.
 
 Patch che correggono bachi importanti su un kernel già rilasciato, dovrebbero
 essere inviate ai manutentori dei kernel stabili aggiungendo la seguente riga::
diff --git a/Documentation/translations/ja_JP/howto.rst b/Documentation/translations/ja_JP/howto.rst
index 9b0b3436dfcf..8d856ebe873c 100644
--- a/Documentation/translations/ja_JP/howto.rst
+++ b/Documentation/translations/ja_JP/howto.rst
@@ -167,7 +167,7 @@ linux-api@vger.kernel.org に送ることを勧めます。
     このドキュメントは Linux 開発の思想を理解するのに非常に重要です。
     そして、他のOSでの開発者が Linux に移る時にとても重要です。
 
-  :ref:`Documentation/admin-guide/security-bugs.rst <securitybugs>`
+  :ref:`Documentation/process/security-bugs.rst <securitybugs>`
     もし Linux カーネルでセキュリティ問題を発見したように思ったら、こ
     のドキュメントのステップに従ってカーネル開発者に連絡し、問題解決を
     支援してください。
diff --git a/Documentation/translations/ko_KR/howto.rst b/Documentation/translations/ko_KR/howto.rst
index 969e91a95bb0..34f14899c155 100644
--- a/Documentation/translations/ko_KR/howto.rst
+++ b/Documentation/translations/ko_KR/howto.rst
@@ -157,7 +157,7 @@ mtk.manpages@gmail.com의 메인테이너에게 보낼 것을 권장한다.
     리눅스로 전향하는 사람들에게는 매우 중요하다.
 
 
-  :ref:`Documentation/admin-guide/security-bugs.rst <securitybugs>`
+  :ref:`Documentation/process/security-bugs.rst <securitybugs>`
     여러분들이 리눅스 커널의 보안 문제를 발견했다고 생각한다면 이 문서에
     나온 단계에 따라서 커널 개발자들에게 알리고 그 문제를 해결할 수 있도록
     도와 달라.
diff --git a/Documentation/translations/sp_SP/howto.rst b/Documentation/translations/sp_SP/howto.rst
index f9818d687b54..f1629738b49d 100644
--- a/Documentation/translations/sp_SP/howto.rst
+++ b/Documentation/translations/sp_SP/howto.rst
@@ -135,7 +135,7 @@ de obligada lectura:
      de Linux y es muy importante para las personas que se mudan a Linux
      tras desarrollar otros sistemas operativos.
 
-  :ref:`Documentation/admin-guide/security-bugs.rst <securitybugs>`
+  :ref:`Documentation/process/security-bugs.rst <securitybugs>`
     Si cree que ha encontrado un problema de seguridad en el kernel de
     Linux, siga los pasos de este documento para ayudar a notificar a los
     desarrolladores del kernel y ayudar a resolver el problema.
diff --git a/Documentation/translations/sp_SP/process/submitting-patches.rst b/Documentation/translations/sp_SP/process/submitting-patches.rst
index bf95ceb5e865..c2757d9ab216 100644
--- a/Documentation/translations/sp_SP/process/submitting-patches.rst
+++ b/Documentation/translations/sp_SP/process/submitting-patches.rst
@@ -276,7 +276,7 @@ parche a security@kernel.org. Para errores graves, se debe mantener un
 poco de discreción y permitir que los distribuidores entreguen el parche a
 los usuarios; en esos casos, obviamente, el parche no debe enviarse a
 ninguna lista pública. Revise también
-Documentation/admin-guide/security-bugs.rst.
+Documentation/process/security-bugs.rst.
 
 Los parches que corrigen un error grave en un kernel en uso deben dirigirse
 hacia los maintainers estables poniendo una línea como esta::
diff --git a/Documentation/translations/zh_CN/admin-guide/security-bugs.rst b/Documentation/translations/zh_CN/admin-guide/security-bugs.rst
index b8120391755d..d6b8f8a4e7f6 100644
--- a/Documentation/translations/zh_CN/admin-guide/security-bugs.rst
+++ b/Documentation/translations/zh_CN/admin-guide/security-bugs.rst
@@ -1,6 +1,6 @@
 .. include:: ../disclaimer-zh_CN.rst
 
-:Original: :doc:`../../../admin-guide/security-bugs`
+:Original: :doc:`../../../process/security-bugs`
 
 :译者:
 
diff --git a/Documentation/translations/zh_CN/process/howto.rst b/Documentation/translations/zh_CN/process/howto.rst
index 10254751df6a..cc47be356dd3 100644
--- a/Documentation/translations/zh_CN/process/howto.rst
+++ b/Documentation/translations/zh_CN/process/howto.rst
@@ -125,7 +125,7 @@ Linux内核代码中包含有大量的文档。这些文档对于学习如何与
     这篇文档对于理解Linux的开发哲学至关重要。对于将开发平台从其他操作系
     统转移到Linux的人来说也很重要。
 
-  :ref:`Documentation/admin-guide/security-bugs.rst <securitybugs>`
+  :ref:`Documentation/process/security-bugs.rst <securitybugs>`
     如果你认为自己发现了Linux内核的安全性问题，请根据这篇文档中的步骤来
     提醒其他内核开发者并帮助解决这个问题。
 
diff --git a/Documentation/translations/zh_TW/admin-guide/security-bugs.rst b/Documentation/translations/zh_TW/admin-guide/security-bugs.rst
index eed260ef0c37..15f8e9005071 100644
--- a/Documentation/translations/zh_TW/admin-guide/security-bugs.rst
+++ b/Documentation/translations/zh_TW/admin-guide/security-bugs.rst
@@ -2,7 +2,7 @@
 
 .. include:: ../disclaimer-zh_TW.rst
 
-:Original: :doc:`../../../admin-guide/security-bugs`
+:Original: :doc:`../../../process/security-bugs`
 
 :譯者:
 
diff --git a/Documentation/translations/zh_TW/process/howto.rst b/Documentation/translations/zh_TW/process/howto.rst
index 8fb8edcaee66..ea2f468d3e58 100644
--- a/Documentation/translations/zh_TW/process/howto.rst
+++ b/Documentation/translations/zh_TW/process/howto.rst
@@ -128,7 +128,7 @@ Linux內核代碼中包含有大量的文檔。這些文檔對於學習如何與
     這篇文檔對於理解Linux的開發哲學至關重要。對於將開發平台從其他操作系
     統轉移到Linux的人來說也很重要。
 
-  :ref:`Documentation/admin-guide/security-bugs.rst <securitybugs>`
+  :ref:`Documentation/process/security-bugs.rst <securitybugs>`
     如果你認爲自己發現了Linux內核的安全性問題，請根據這篇文檔中的步驟來
     提醒其他內核開發者並幫助解決這個問題。
 
diff --git a/MAINTAINERS b/MAINTAINERS
index 8d5bc223f305..f9f0366a6190 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -73,7 +73,7 @@ Tips for patch submitters
 	and ideally, should come with a patch proposal. Please do not send
 	automated reports to this list either. Such bugs will be handled
 	better and faster in the usual public places. See
-	Documentation/admin-guide/security-bugs.rst for details.
+	Documentation/process/security-bugs.rst for details.
 
 8.	Happy hacking.
 
@@ -18801,7 +18801,7 @@ F:	include/uapi/linux/sed*
 SECURITY CONTACT
 M:	Security Officers <security@kernel.org>
 S:	Supported
-F:	Documentation/admin-guide/security-bugs.rst
+F:	Documentation/process/security-bugs.rst
 
 SECURITY SUBSYSTEM
 M:	Paul Moore <paul@paul-moore.com>

From 3c728b1bc5b99c5275ac5c7788ef814c0e51ef54 Mon Sep 17 00:00:00 2001
From: Eugene Huang <eugene.huang99@gmail.com>
Date: Tue, 14 Mar 2023 17:05:52 +0800
Subject: [PATCH 009/173] ASOC: Intel: sof_sdw: add quirk for Intel 'Rooks
 County' NUC M15
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Same quirks as the 'Bishop County' NUC M15, except the rt711 is in the
'JD2 100K' jack detection mode.

Link: https://github.com/thesofproject/linux/issues/4088
Signed-off-by: Eugene Huang <eugene.huang99@gmail.com>
Reviewed-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Reviewed-by: Péter Ujfalusi <peter.ujfalusi@linux.intel.com>
Signed-off-by: Bard Liao <yung-chuan.liao@linux.intel.com>
Link: https://lore.kernel.org/r/20230314090553.498664-2-yung-chuan.liao@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/intel/boards/sof_sdw.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/sound/soc/intel/boards/sof_sdw.c b/sound/soc/intel/boards/sof_sdw.c
index d2ed807abde9..767fa89d0870 100644
--- a/sound/soc/intel/boards/sof_sdw.c
+++ b/sound/soc/intel/boards/sof_sdw.c
@@ -213,6 +213,17 @@ static const struct dmi_system_id sof_sdw_quirk_table[] = {
 					SOF_SDW_PCH_DMIC |
 					RT711_JD1),
 	},
+	{
+		/* NUC15 'Rooks County' LAPRC510 and LAPRC710 skews */
+		.callback = sof_sdw_quirk_cb,
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "Intel(R) Client Systems"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "LAPRC"),
+		},
+		.driver_data = (void *)(SOF_SDW_TGL_HDMI |
+					SOF_SDW_PCH_DMIC |
+					RT711_JD2_100K),
+	},
 	/* TigerLake-SDCA devices */
 	{
 		.callback = sof_sdw_quirk_cb,

From 9c691a42b8926c8966561265cdae3ddc7464d3a2 Mon Sep 17 00:00:00 2001
From: Eugene Huang <eugene.huang99@gmail.com>
Date: Tue, 14 Mar 2023 17:05:53 +0800
Subject: [PATCH 010/173] ASoC: Intel: soc-acpi: add table for Intel 'Rooks
 County' NUC M15
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Same topology as the HP Omen 16-k0005TX, except with the rt1316 amp
on link2.

Link: https://github.com/thesofproject/linux/issues/4088
Signed-off-by: Eugene Huang <eugene.huang99@gmail.com>
Reviewed-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Reviewed-by: Péter Ujfalusi <peter.ujfalusi@linux.intel.com>
Signed-off-by: Bard Liao <yung-chuan.liao@linux.intel.com>
Link: https://lore.kernel.org/r/20230314090553.498664-3-yung-chuan.liao@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 .../intel/common/soc-acpi-intel-adl-match.c   | 20 +++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/sound/soc/intel/common/soc-acpi-intel-adl-match.c b/sound/soc/intel/common/soc-acpi-intel-adl-match.c
index 28dd2046e4ac..d8c80041388a 100644
--- a/sound/soc/intel/common/soc-acpi-intel-adl-match.c
+++ b/sound/soc/intel/common/soc-acpi-intel-adl-match.c
@@ -354,6 +354,20 @@ static const struct snd_soc_acpi_link_adr adl_sdw_rt711_link0_rt1316_link3[] = {
 	{}
 };
 
+static const struct snd_soc_acpi_link_adr adl_sdw_rt711_link0_rt1316_link2[] = {
+	{
+		.mask = BIT(0),
+		.num_adr = ARRAY_SIZE(rt711_sdca_0_adr),
+		.adr_d = rt711_sdca_0_adr,
+	},
+	{
+		.mask = BIT(2),
+		.num_adr = ARRAY_SIZE(rt1316_2_single_adr),
+		.adr_d = rt1316_2_single_adr,
+	},
+	{}
+};
+
 static const struct snd_soc_acpi_adr_device mx8373_2_adr[] = {
 	{
 		.adr = 0x000223019F837300ull,
@@ -624,6 +638,12 @@ struct snd_soc_acpi_mach snd_soc_acpi_intel_adl_sdw_machines[] = {
 		.drv_name = "sof_sdw",
 		.sof_tplg_filename = "sof-adl-rt711-l0-rt1316-l3.tplg",
 	},
+	{
+		.link_mask = 0x5, /* 2 active links required */
+		.links = adl_sdw_rt711_link0_rt1316_link2,
+		.drv_name = "sof_sdw",
+		.sof_tplg_filename = "sof-adl-rt711-l0-rt1316-l2.tplg",
+	},
 	{
 		.link_mask = 0x1, /* link0 required */
 		.links = adl_rvp,

From 083a25b18d6ad9f1f540e629909aa3eaaaf01823 Mon Sep 17 00:00:00 2001
From: Shengjiu Wang <shengjiu.wang@nxp.com>
Date: Thu, 9 Mar 2023 15:13:37 +0800
Subject: [PATCH 011/173] ASoC: soc-pcm: fix hw->formats cleared by
 soc_pcm_hw_init() for dpcm

The hw->formats may be set by snd_dmaengine_pcm_refine_runtime_hwparams()
in component's startup()/open(), but soc_pcm_hw_init() will init
hw->formats in dpcm_runtime_setup_fe() after component's startup()/open(),
which causes the valuable hw->formats to be cleared.

So need to store the hw->formats before initialization, then restore
it after initialization.

Signed-off-by: Shengjiu Wang <shengjiu.wang@nxp.com>
Link: https://lore.kernel.org/r/1678346017-3660-1-git-send-email-shengjiu.wang@nxp.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/soc-pcm.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c
index 5eb056b942ce..7958c9defd49 100644
--- a/sound/soc/soc-pcm.c
+++ b/sound/soc/soc-pcm.c
@@ -1661,10 +1661,14 @@ static void dpcm_runtime_setup_fe(struct snd_pcm_substream *substream)
 	struct snd_pcm_hardware *hw = &runtime->hw;
 	struct snd_soc_dai *dai;
 	int stream = substream->stream;
+	u64 formats = hw->formats;
 	int i;
 
 	soc_pcm_hw_init(hw);
 
+	if (formats)
+		hw->formats &= formats;
+
 	for_each_rtd_cpu_dais(fe, i, dai) {
 		struct snd_soc_pcm_stream *cpu_stream;
 

From 7c3940bf81e5664cdb50c3fedfec8f0a756a34fb Mon Sep 17 00:00:00 2001
From: "GuoRui.Yu" <GuoRui.Yu@linux.alibaba.com>
Date: Thu, 23 Feb 2023 00:53:15 +0800
Subject: [PATCH 012/173] swiotlb: fix the deadlock in swiotlb_do_find_slots

In general, if swiotlb is sufficient, the logic of index =
wrap_area_index(mem, index + 1) is fine, it will quickly take a slot and
release the area->lock; But if swiotlb is insufficient and the device
has min_align_mask requirements, such as NVME, we may not be able to
satisfy index == wrap and exit the loop properly. In this case, other
kernel threads will not be able to acquire the area->lock and release
the slot, resulting in a deadlock.

The current implementation of wrap_area_index does not involve a modulo
operation, so adjusting the wrap to ensure the loop ends is not trivial.
Introduce a new variable to record the number of loops and exit the loop
after completing the traversal.

Backtraces:
Other CPUs are waiting this core to exit the swiotlb_do_find_slots
loop.
[10199.924391] RIP: 0010:swiotlb_do_find_slots+0x1fe/0x3e0
[10199.924403] Call Trace:
[10199.924404]  <TASK>
[10199.924405]  swiotlb_tbl_map_single+0xec/0x1f0
[10199.924407]  swiotlb_map+0x5c/0x260
[10199.924409]  ? nvme_pci_setup_prps+0x1ed/0x340
[10199.924411]  dma_direct_map_page+0x12e/0x1c0
[10199.924413]  nvme_map_data+0x304/0x370
[10199.924415]  nvme_prep_rq.part.0+0x31/0x120
[10199.924417]  nvme_queue_rq+0x77/0x1f0

...
[ 9639.596311] NMI backtrace for cpu 48
[ 9639.596336] Call Trace:
[ 9639.596337]
[ 9639.596338] _raw_spin_lock_irqsave+0x37/0x40
[ 9639.596341] swiotlb_do_find_slots+0xef/0x3e0
[ 9639.596344] swiotlb_tbl_map_single+0xec/0x1f0
[ 9639.596347] swiotlb_map+0x5c/0x260
[ 9639.596349] dma_direct_map_sg+0x7a/0x280
[ 9639.596352] __dma_map_sg_attrs+0x30/0x70
[ 9639.596355] dma_map_sgtable+0x1d/0x30
[ 9639.596356] nvme_map_data+0xce/0x370

...
[ 9639.595665] NMI backtrace for cpu 50
[ 9639.595682] Call Trace:
[ 9639.595682]
[ 9639.595683] _raw_spin_lock_irqsave+0x37/0x40
[ 9639.595686] swiotlb_release_slots.isra.0+0x86/0x180
[ 9639.595688] dma_direct_unmap_sg+0xcf/0x1a0
[ 9639.595690] nvme_unmap_data.part.0+0x43/0xc0

Fixes: 1f221a0d0dbf ("swiotlb: respect min_align_mask")
Signed-off-by: GuoRui.Yu <GuoRui.Yu@linux.alibaba.com>
Signed-off-by: Xiaokang Hu <xiaokang.hxk@alibaba-inc.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 kernel/dma/swiotlb.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index 03e3251cd9d2..91454b513db0 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -625,8 +625,8 @@ static int swiotlb_do_find_slots(struct device *dev, int area_index,
 	unsigned int iotlb_align_mask =
 		dma_get_min_align_mask(dev) & ~(IO_TLB_SIZE - 1);
 	unsigned int nslots = nr_slots(alloc_size), stride;
-	unsigned int index, wrap, count = 0, i;
 	unsigned int offset = swiotlb_align_offset(dev, orig_addr);
+	unsigned int index, slots_checked, count = 0, i;
 	unsigned long flags;
 	unsigned int slot_base;
 	unsigned int slot_index;
@@ -649,15 +649,16 @@ static int swiotlb_do_find_slots(struct device *dev, int area_index,
 		goto not_found;
 
 	slot_base = area_index * mem->area_nslabs;
-	index = wrap = wrap_area_index(mem, ALIGN(area->index, stride));
+	index = wrap_area_index(mem, ALIGN(area->index, stride));
 
-	do {
+	for (slots_checked = 0; slots_checked < mem->area_nslabs; ) {
 		slot_index = slot_base + index;
 
 		if (orig_addr &&
 		    (slot_addr(tbl_dma_addr, slot_index) &
 		     iotlb_align_mask) != (orig_addr & iotlb_align_mask)) {
 			index = wrap_area_index(mem, index + 1);
+			slots_checked++;
 			continue;
 		}
 
@@ -673,7 +674,8 @@ static int swiotlb_do_find_slots(struct device *dev, int area_index,
 				goto found;
 		}
 		index = wrap_area_index(mem, index + stride);
-	} while (index != wrap);
+		slots_checked += stride;
+	}
 
 not_found:
 	spin_unlock_irqrestore(&area->lock, flags);

From f5bad62f9107b701a6def7cac1f5f65862219b83 Mon Sep 17 00:00:00 2001
From: Jonathan Denose <jdenose@chromium.org>
Date: Fri, 17 Mar 2023 03:19:51 -0700
Subject: [PATCH 013/173] Input: i8042 - add quirk for Fujitsu Lifebook A574/H

Fujitsu Lifebook A574/H requires the nomux option to properly
probe the touchpad, especially when waking from sleep.

Signed-off-by: Jonathan Denose <jdenose@google.com>
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Link: https://lore.kernel.org/r/20230303152623.45859-1-jdenose@google.com
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/serio/i8042-acpipnpio.h | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/drivers/input/serio/i8042-acpipnpio.h b/drivers/input/serio/i8042-acpipnpio.h
index efc61736099b..fe7ffe30997c 100644
--- a/drivers/input/serio/i8042-acpipnpio.h
+++ b/drivers/input/serio/i8042-acpipnpio.h
@@ -610,6 +610,14 @@ static const struct dmi_system_id i8042_dmi_quirk_table[] __initconst = {
 		},
 		.driver_data = (void *)(SERIO_QUIRK_NOMUX)
 	},
+	{
+		/* Fujitsu Lifebook A574/H */
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "FMVA0501PZ"),
+		},
+		.driver_data = (void *)(SERIO_QUIRK_NOMUX)
+	},
 	{
 		/* Gigabyte M912 */
 		.matches = {

From 8a0432bab6ea3203d220785da7ab3c7677f70ecb Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Fri, 17 Mar 2023 03:13:12 -0700
Subject: [PATCH 014/173] Input: goodix - add Lenovo Yoga Book X90F to
 nine_bytes_report DMI table

The Android Lenovo Yoga Book X90F / X90L uses the same goodix touchscreen
with 9 bytes touch reports for its touch keyboard as the already supported
Windows Lenovo Yoga Book X91F/L, add a DMI match for this to
the nine_bytes_report DMI table.

When the quirk for the X91F/L was initially added it was written to
also apply to the X90F/L but this does not work because the Android
version of the Yoga Book uses completely different DMI strings.
Also adjust the X91F/L quirk to reflect that it only applies to
the X91F/L models.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Reviewed-by: Bastien Nocera <hadess@hadess.net>
Link: https://lore.kernel.org/r/20230315134442.71787-1-hdegoede@redhat.com
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/touchscreen/goodix.c | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/drivers/input/touchscreen/goodix.c b/drivers/input/touchscreen/goodix.c
index b348172f19c3..d77f116680a0 100644
--- a/drivers/input/touchscreen/goodix.c
+++ b/drivers/input/touchscreen/goodix.c
@@ -124,10 +124,18 @@ static const unsigned long goodix_irq_flags[] = {
 static const struct dmi_system_id nine_bytes_report[] = {
 #if defined(CONFIG_DMI) && defined(CONFIG_X86)
 	{
-		.ident = "Lenovo YogaBook",
-		/* YB1-X91L/F and YB1-X90L/F */
+		/* Lenovo Yoga Book X90F / X90L */
 		.matches = {
-			DMI_MATCH(DMI_PRODUCT_NAME, "Lenovo YB1-X9")
+			DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Intel Corporation"),
+			DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "CHERRYVIEW D1 PLATFORM"),
+			DMI_EXACT_MATCH(DMI_PRODUCT_VERSION, "YETI-11"),
+		}
+	},
+	{
+		/* Lenovo Yoga Book X91F / X91L */
+		.matches = {
+			/* Non exact match to match F + L versions */
+			DMI_MATCH(DMI_PRODUCT_NAME, "Lenovo YB1-X91"),
 		}
 	},
 #endif

From f8acb24aaf89fc46cd953229462ea8abe31b395f Mon Sep 17 00:00:00 2001
From: Michael Kelley <mikelley@microsoft.com>
Date: Wed, 15 Mar 2023 08:34:13 -0700
Subject: [PATCH 015/173] x86/hyperv: Block root partition functionality in a
 Confidential VM

Hyper-V should never specify a VM that is a Confidential VM and also
running in the root partition.  Nonetheless, explicitly block such a
combination to guard against a compromised Hyper-V maliciously trying to
exploit root partition functionality in a Confidential VM to expose
Confidential VM secrets. No known bug is being fixed, but the attack
surface for Confidential VMs on Hyper-V is reduced.

Signed-off-by: Michael Kelley <mikelley@microsoft.com>
Link: https://lore.kernel.org/r/1678894453-95392-1-git-send-email-mikelley@microsoft.com
Signed-off-by: Wei Liu <wei.liu@kernel.org>
---
 arch/x86/kernel/cpu/mshyperv.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index f36dc2f796c5..f1197366a97d 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -358,12 +358,16 @@ static void __init ms_hyperv_init_platform(void)
 	 * To mirror what Windows does we should extract CPU management
 	 * features and use the ReservedIdentityBit to detect if Linux is the
 	 * root partition. But that requires negotiating CPU management
-	 * interface (a process to be finalized).
+	 * interface (a process to be finalized). For now, use the privilege
+	 * flag as the indicator for running as root.
 	 *
-	 * For now, use the privilege flag as the indicator for running as
-	 * root.
+	 * Hyper-V should never specify running as root and as a Confidential
+	 * VM. But to protect against a compromised/malicious Hyper-V trying
+	 * to exploit root behavior to expose Confidential VM memory, ignore
+	 * the root partition setting if also a Confidential VM.
 	 */
-	if (cpuid_ebx(HYPERV_CPUID_FEATURES) & HV_CPU_MANAGEMENT) {
+	if ((ms_hyperv.priv_high & HV_CPU_MANAGEMENT) &&
+	    !(ms_hyperv.priv_high & HV_ISOLATION)) {
 		hv_root_partition = true;
 		pr_info("Hyper-V: running as root partition\n");
 	}

From 205efd4619b860404ebb5882e5a119eb3b3b3716 Mon Sep 17 00:00:00 2001
From: Ge-org Brohammer <gbrohammer@outlook.com>
Date: Fri, 17 Mar 2023 00:38:51 +0200
Subject: [PATCH 016/173] ASoC: amd: yc: Add DMI entries to support Victus by
 HP Laptop 16-e1xxx (8A22)

This model requires an additional detection quirk to
enable the internal microphone.

Tried to use git send-email this time.

Signed-off-by: Ge-org Brohammer <gbrohammer@outlook.com>
Link: https://lore.kernel.org/r/PAVP195MB2261322C220E95D7F4B2732ADABC9@PAVP195MB2261.EURP195.PROD.OUTLOOK.COM
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/amd/yc/acp6x-mach.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/sound/soc/amd/yc/acp6x-mach.c b/sound/soc/amd/yc/acp6x-mach.c
index 4a69ce702360..0acdf0156f07 100644
--- a/sound/soc/amd/yc/acp6x-mach.c
+++ b/sound/soc/amd/yc/acp6x-mach.c
@@ -269,6 +269,13 @@ static const struct dmi_system_id yc_acp_quirk_table[] = {
 			DMI_MATCH(DMI_BOARD_NAME, "8A43"),
 		}
 	},
+	{
+		.driver_data = &acp6x_card,
+		.matches = {
+			DMI_MATCH(DMI_BOARD_VENDOR, "HP"),
+			DMI_MATCH(DMI_BOARD_NAME, "8A22"),
+		}
+	},
 	{}
 };
 

From ab327f8acdf8d06601fbf058859a539a9422afff Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=81lvaro=20Fern=C3=A1ndez=20Rojas?= <noltari@gmail.com>
Date: Fri, 17 Mar 2023 11:20:04 +0100
Subject: [PATCH 017/173] mips: bmips: BCM6358: disable RAC flush for TP1
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

RAC flush causes kernel panics on BCM6358 with EHCI/OHCI when booting from TP1:
[    3.881739] usb 1-1: new high-speed USB device number 2 using ehci-platform
[    3.895011] Reserved instruction in kernel code[#1]:
[    3.900113] CPU: 0 PID: 1 Comm: init Not tainted 5.10.16 #0
[    3.905829] $ 0   : 00000000 10008700 00000000 77d94060
[    3.911238] $ 4   : 7fd1f088 00000000 81431cac 81431ca0
[    3.916641] $ 8   : 00000000 ffffefff 8075cd34 00000000
[    3.922043] $12   : 806f8d40 f3e812b7 00000000 000d9aaa
[    3.927446] $16   : 7fd1f068 7fd1f080 7ff559b8 81428470
[    3.932848] $20   : 00000000 00000000 55590000 77d70000
[    3.938251] $24   : 00000018 00000010
[    3.943655] $28   : 81430000 81431e60 81431f28 800157fc
[    3.949058] Hi    : 00000000
[    3.952013] Lo    : 00000000
[    3.955019] epc   : 80015808 setup_sigcontext+0x54/0x24c
[    3.960464] ra    : 800157fc setup_sigcontext+0x48/0x24c
[    3.965913] Status: 10008703	KERNEL EXL IE
[    3.970216] Cause : 00800028 (ExcCode 0a)
[    3.974340] PrId  : 0002a010 (Broadcom BMIPS4350)
[    3.979170] Modules linked in: ohci_platform ohci_hcd fsl_mph_dr_of ehci_platform ehci_fsl ehci_hcd gpio_button_hotplug usbcore nls_base usb_common
[    3.992907] Process init (pid: 1, threadinfo=(ptrval), task=(ptrval), tls=77e22ec8)
[    4.000776] Stack : 81431ef4 7fd1f080 81431f28 81428470 7fd1f068 81431edc 7ff559b8 81428470
[    4.009467]         81431f28 7fd1f080 55590000 77d70000 77d5498c 80015c70 806f0000 8063ae74
[    4.018149]         08100002 81431f28 0000000a 08100002 81431f28 0000000a 77d6b418 00000003
[    4.026831]         ffffffff 80016414 80080734 81431ecc 81431ecc 00000001 00000000 04000000
[    4.035512]         77d54874 00000000 00000000 00000000 00000000 00000012 00000002 00000000
[    4.044196]         ...
[    4.046706] Call Trace:
[    4.049238] [<80015808>] setup_sigcontext+0x54/0x24c
[    4.054356] [<80015c70>] setup_frame+0xdc/0x124
[    4.059015] [<80016414>] do_notify_resume+0x1dc/0x288
[    4.064207] [<80011b50>] work_notifysig+0x10/0x18
[    4.069036]
[    4.070538] Code: 8fc300b4  00001025  26240008 <ac820000> ac830004  3c048063  0c0228aa  24846a00  26240010
[    4.080686]
[    4.082517] ---[ end trace 22a8edb41f5f983b ]---
[    4.087374] Kernel panic - not syncing: Fatal exception
[    4.092753] Rebooting in 1 seconds..

Because the bootloader (CFE) is not initializing the Read-ahead cache properly
on the second thread (TP1). Since the RAC was not initialized properly, we
should avoid flushing it at the risk of corrupting the instruction stream as
seen in the trace above.

Fixes: d59098a0e9cb ("MIPS: bmips: use generic dma noncoherent ops")
Signed-off-by: Álvaro Fernández Rojas <noltari@gmail.com>
Signed-off-by: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
---
 arch/mips/bmips/dma.c   | 5 +++++
 arch/mips/bmips/setup.c | 8 ++++++++
 2 files changed, 13 insertions(+)

diff --git a/arch/mips/bmips/dma.c b/arch/mips/bmips/dma.c
index 33788668cbdb..3779e7855bd7 100644
--- a/arch/mips/bmips/dma.c
+++ b/arch/mips/bmips/dma.c
@@ -5,6 +5,8 @@
 #include <asm/bmips.h>
 #include <asm/io.h>
 
+bool bmips_rac_flush_disable;
+
 void arch_sync_dma_for_cpu_all(void)
 {
 	void __iomem *cbr = BMIPS_GET_CBR();
@@ -15,6 +17,9 @@ void arch_sync_dma_for_cpu_all(void)
 	    boot_cpu_type() != CPU_BMIPS4380)
 		return;
 
+	if (unlikely(bmips_rac_flush_disable))
+		return;
+
 	/* Flush stale data out of the readahead cache */
 	cfg = __raw_readl(cbr + BMIPS_RAC_CONFIG);
 	__raw_writel(cfg | 0x100, cbr + BMIPS_RAC_CONFIG);
diff --git a/arch/mips/bmips/setup.c b/arch/mips/bmips/setup.c
index e95b3f78e7cd..549a6392a3d2 100644
--- a/arch/mips/bmips/setup.c
+++ b/arch/mips/bmips/setup.c
@@ -35,6 +35,8 @@
 #define REG_BCM6328_OTP		((void __iomem *)CKSEG1ADDR(0x1000062c))
 #define BCM6328_TP1_DISABLED	BIT(9)
 
+extern bool bmips_rac_flush_disable;
+
 static const unsigned long kbase = VMLINUX_LOAD_ADDRESS & 0xfff00000;
 
 struct bmips_quirk {
@@ -104,6 +106,12 @@ static void bcm6358_quirks(void)
 	 * disable SMP for now
 	 */
 	bmips_smp_enabled = 0;
+
+	/*
+	 * RAC flush causes kernel panics on BCM6358 when booting from TP1
+	 * because the bootloader is not initializing it properly.
+	 */
+	bmips_rac_flush_disable = !!(read_c0_brcm_cmt_local() & (1 << 31));
 }
 
 static void bcm6368_quirks(void)

From ffa6206ebf8d39e83d87ac226df68dbbe155819a Mon Sep 17 00:00:00 2001
From: Matthias Benkmann <matthias.benkmann@gmail.com>
Date: Sun, 19 Mar 2023 21:30:15 -0700
Subject: [PATCH 018/173] Input: xpad - fix incorrectly applied patch for
 MAP_PROFILE_BUTTON

When commit commit fff1011a26d6 ("Input: xpad - add X-Box Adaptive Profile
button") was applied, one hunk ended up in the wrong function; move it to
where it belongs.

Fixes: fff1011a26d6 ("Input: xpad - add X-Box Adaptive Profile button")
Signed-off-by: Matthias Benkmann <matthias.benkmann@gmail.com>
Link: https://lore.kernel.org/r/20230318162106.0aef4ba5@ninja
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/joystick/xpad.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c
index f642ec8e92dd..29131f1a2f06 100644
--- a/drivers/input/joystick/xpad.c
+++ b/drivers/input/joystick/xpad.c
@@ -781,9 +781,6 @@ static void xpad_process_packet(struct usb_xpad *xpad, u16 cmd, unsigned char *d
 	input_report_key(dev, BTN_C, data[8]);
 	input_report_key(dev, BTN_Z, data[9]);
 
-	/* Profile button has a value of 0-3, so it is reported as an axis */
-	if (xpad->mapping & MAP_PROFILE_BUTTON)
-		input_report_abs(dev, ABS_PROFILE, data[34]);
 
 	input_sync(dev);
 }
@@ -1061,6 +1058,10 @@ static void xpadone_process_packet(struct usb_xpad *xpad, u16 cmd, unsigned char
 					(__u16) le16_to_cpup((__le16 *)(data + 8)));
 		}
 
+		/* Profile button has a value of 0-3, so it is reported as an axis */
+		if (xpad->mapping & MAP_PROFILE_BUTTON)
+			input_report_abs(dev, ABS_PROFILE, data[34]);
+
 		/* paddle handling */
 		/* based on SDL's SDL_hidapi_xboxone.c */
 		if (xpad->mapping & MAP_PADDLES) {

From 8980f190947ba29f23110408e712444884b74251 Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Sun, 19 Mar 2023 21:36:36 -0700
Subject: [PATCH 019/173] Input: focaltech - use explicitly signed char type

The recent change of -funsigned-char causes additions of negative
numbers to become additions of large positive numbers, leading to wrong
calculations of mouse movement. Change these casts to be explicitly
signed, to take into account negative offsets.

Fixes: 3bc753c06dd0 ("kbuild: treat char as always unsigned")
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Cc: stable@vger.kernel.org
Link: https://bugzilla.kernel.org/show_bug.cgi?id=217211
Link: https://lore.kernel.org/r/20230318133010.1285202-1-Jason@zx2c4.com
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/mouse/focaltech.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/input/mouse/focaltech.c b/drivers/input/mouse/focaltech.c
index 6fd5fff0cbff..c74b99077d16 100644
--- a/drivers/input/mouse/focaltech.c
+++ b/drivers/input/mouse/focaltech.c
@@ -202,8 +202,8 @@ static void focaltech_process_rel_packet(struct psmouse *psmouse,
 	state->pressed = packet[0] >> 7;
 	finger1 = ((packet[0] >> 4) & 0x7) - 1;
 	if (finger1 < FOC_MAX_FINGERS) {
-		state->fingers[finger1].x += (char)packet[1];
-		state->fingers[finger1].y += (char)packet[2];
+		state->fingers[finger1].x += (s8)packet[1];
+		state->fingers[finger1].y += (s8)packet[2];
 	} else {
 		psmouse_err(psmouse, "First finger in rel packet invalid: %d\n",
 			    finger1);
@@ -218,8 +218,8 @@ static void focaltech_process_rel_packet(struct psmouse *psmouse,
 	 */
 	finger2 = ((packet[3] >> 4) & 0x7) - 1;
 	if (finger2 < FOC_MAX_FINGERS) {
-		state->fingers[finger2].x += (char)packet[4];
-		state->fingers[finger2].y += (char)packet[5];
+		state->fingers[finger2].x += (s8)packet[4];
+		state->fingers[finger2].y += (s8)packet[5];
 	}
 }
 

From 754ff5060daf5a1cf4474eff9b4edeb6c17ef7ab Mon Sep 17 00:00:00 2001
From: msizanoen <msizanoen@qtmlabs.xyz>
Date: Sun, 19 Mar 2023 23:02:56 -0700
Subject: [PATCH 020/173] Input: alps - fix compatibility with -funsigned-char

The AlpsPS/2 code previously relied on the assumption that `char` is a
signed type, which was true on x86 platforms (the only place where this
driver is used) before kernel 6.2. However, on 6.2 and later, this
assumption is broken due to the introduction of -funsigned-char as a new
global compiler flag.

Fix this by explicitly specifying the signedness of `char` when sign
extending the values received from the device.

Fixes: f3f33c677699 ("Input: alps - Rushmore and v7 resolution support")
Signed-off-by: msizanoen <msizanoen@qtmlabs.xyz>
Cc: stable@vger.kernel.org
Link: https://lore.kernel.org/r/20230320045228.182259-1-msizanoen@qtmlabs.xyz
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/mouse/alps.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/input/mouse/alps.c b/drivers/input/mouse/alps.c
index 989228b5a0a4..e2c11d9f3868 100644
--- a/drivers/input/mouse/alps.c
+++ b/drivers/input/mouse/alps.c
@@ -852,8 +852,8 @@ static void alps_process_packet_v6(struct psmouse *psmouse)
 			x = y = z = 0;
 
 		/* Divide 4 since trackpoint's speed is too fast */
-		input_report_rel(dev2, REL_X, (char)x / 4);
-		input_report_rel(dev2, REL_Y, -((char)y / 4));
+		input_report_rel(dev2, REL_X, (s8)x / 4);
+		input_report_rel(dev2, REL_Y, -((s8)y / 4));
 
 		psmouse_report_standard_buttons(dev2, packet[3]);
 
@@ -1104,8 +1104,8 @@ static void alps_process_trackstick_packet_v7(struct psmouse *psmouse)
 	    ((packet[3] & 0x20) << 1);
 	z = (packet[5] & 0x3f) | ((packet[3] & 0x80) >> 1);
 
-	input_report_rel(dev2, REL_X, (char)x);
-	input_report_rel(dev2, REL_Y, -((char)y));
+	input_report_rel(dev2, REL_X, (s8)x);
+	input_report_rel(dev2, REL_Y, -((s8)y));
 	input_report_abs(dev2, ABS_PRESSURE, z);
 
 	psmouse_report_standard_buttons(dev2, packet[1]);
@@ -2294,20 +2294,20 @@ static int alps_get_v3_v7_resolution(struct psmouse *psmouse, int reg_pitch)
 	if (reg < 0)
 		return reg;
 
-	x_pitch = (char)(reg << 4) >> 4; /* sign extend lower 4 bits */
+	x_pitch = (s8)(reg << 4) >> 4; /* sign extend lower 4 bits */
 	x_pitch = 50 + 2 * x_pitch; /* In 0.1 mm units */
 
-	y_pitch = (char)reg >> 4; /* sign extend upper 4 bits */
+	y_pitch = (s8)reg >> 4; /* sign extend upper 4 bits */
 	y_pitch = 36 + 2 * y_pitch; /* In 0.1 mm units */
 
 	reg = alps_command_mode_read_reg(psmouse, reg_pitch + 1);
 	if (reg < 0)
 		return reg;
 
-	x_electrode = (char)(reg << 4) >> 4; /* sign extend lower 4 bits */
+	x_electrode = (s8)(reg << 4) >> 4; /* sign extend lower 4 bits */
 	x_electrode = 17 + x_electrode;
 
-	y_electrode = (char)reg >> 4; /* sign extend upper 4 bits */
+	y_electrode = (s8)reg >> 4; /* sign extend upper 4 bits */
 	y_electrode = 13 + y_electrode;
 
 	x_phys = x_pitch * (x_electrode - 1); /* In 0.1 mm units */

From cdce67099117ece371582f706c6eff7d3a65326d Mon Sep 17 00:00:00 2001
From: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
Date: Fri, 10 Mar 2023 21:34:58 +0900
Subject: [PATCH 021/173] PCI: dwc: Fix PORT_LINK_CONTROL update when CDM check
 enabled

If CDM_CHECK is enabled (by the DT "snps,enable-cdm-check" property), 'val'
is overwritten by PCIE_PL_CHK_REG_CONTROL_STATUS initialization.  Commit
ec7b952f453c ("PCI: dwc: Always enable CDM check if "snps,enable-cdm-check"
exists") did not account for further usage of 'val', so we wrote improper
values to PCIE_PORT_LINK_CONTROL when the CDM check is enabled.

Move the PCIE_PORT_LINK_CONTROL update to be completely after the
PCIE_PL_CHK_REG_CONTROL_STATUS register initialization.

[bhelgaas: commit log adapted from Serge's version]
Fixes: ec7b952f453c ("PCI: dwc: Always enable CDM check if "snps,enable-cdm-check" exists")
Link: https://lore.kernel.org/r/20230310123510.675685-2-yoshihiro.shimoda.uh@renesas.com
Signed-off-by: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Serge Semin <fancer.lancer@gmail.com>
---
 drivers/pci/controller/dwc/pcie-designware.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/pci/controller/dwc/pcie-designware.c b/drivers/pci/controller/dwc/pcie-designware.c
index 53a16b8b6ac2..8e33e6e59e68 100644
--- a/drivers/pci/controller/dwc/pcie-designware.c
+++ b/drivers/pci/controller/dwc/pcie-designware.c
@@ -1001,11 +1001,6 @@ void dw_pcie_setup(struct dw_pcie *pci)
 		dw_pcie_writel_dbi(pci, PCIE_LINK_WIDTH_SPEED_CONTROL, val);
 	}
 
-	val = dw_pcie_readl_dbi(pci, PCIE_PORT_LINK_CONTROL);
-	val &= ~PORT_LINK_FAST_LINK_MODE;
-	val |= PORT_LINK_DLL_LINK_EN;
-	dw_pcie_writel_dbi(pci, PCIE_PORT_LINK_CONTROL, val);
-
 	if (dw_pcie_cap_is(pci, CDM_CHECK)) {
 		val = dw_pcie_readl_dbi(pci, PCIE_PL_CHK_REG_CONTROL_STATUS);
 		val |= PCIE_PL_CHK_REG_CHK_REG_CONTINUOUS |
@@ -1013,6 +1008,11 @@ void dw_pcie_setup(struct dw_pcie *pci)
 		dw_pcie_writel_dbi(pci, PCIE_PL_CHK_REG_CONTROL_STATUS, val);
 	}
 
+	val = dw_pcie_readl_dbi(pci, PCIE_PORT_LINK_CONTROL);
+	val &= ~PORT_LINK_FAST_LINK_MODE;
+	val |= PORT_LINK_DLL_LINK_EN;
+	dw_pcie_writel_dbi(pci, PCIE_PORT_LINK_CONTROL, val);
+
 	if (!pci->num_lanes) {
 		dev_dbg(pci->dev, "Using h/w default number of lanes\n");
 		return;

From 3ced71d273f8edf07bf01a831a49ca6b988e06b3 Mon Sep 17 00:00:00 2001
From: Kevin Locke <kevin@kevinlocke.name>
Date: Tue, 21 Mar 2023 15:39:22 -0600
Subject: [PATCH 022/173] kbuild: deb-pkg: set version for linux-headers paths

As a result of the switch to dh_listpackages, $version is no longer set
when install_kernel_headers() is called.  This causes files in the
linux-headers deb package to be installed to a path with an empty
$version (e.g. /usr/src/linux-headers-/scripts/sign-file rather than
/usr/src/linux-headers-6.3.0-rc3/scripts/sign-file).

To avoid this, while continuing to use the version information from
dh_listpackages, pass $version from $package as the second argument
of install_kernel_headers().

Fixes: 36862e14e316 ("kbuild: deb-pkg: use dh_listpackages to know enabled packages")
Signed-off-by: Kevin Locke <kevin@kevinlocke.name>
Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
---
 scripts/package/builddeb | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/scripts/package/builddeb b/scripts/package/builddeb
index c5ae57167d7c..7b23f52c70c5 100755
--- a/scripts/package/builddeb
+++ b/scripts/package/builddeb
@@ -162,6 +162,7 @@ install_linux_image_dbg () {
 
 install_kernel_headers () {
 	pdir=$1
+	version=$2
 
 	rm -rf $pdir
 
@@ -229,7 +230,7 @@ do
 	linux-libc-dev)
 		install_libc_headers debian/linux-libc-dev;;
 	linux-headers-*)
-		install_kernel_headers debian/linux-headers;;
+		install_kernel_headers debian/linux-headers ${package#linux-headers-};;
 	esac
 done
 

From 39e7d2ab6ea9fd6b389091ec223d566934fe7be5 Mon Sep 17 00:00:00 2001
From: Petr Tesarik <petr.tesarik.ext@huawei.com>
Date: Tue, 21 Mar 2023 09:31:26 +0100
Subject: [PATCH 023/173] swiotlb: use wrap_area_index() instead of open-coding
 it

No functional change, just use an existing helper.

Signed-off-by: Petr Tesarik <petr.tesarik.ext@huawei.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 kernel/dma/swiotlb.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index 91454b513db0..3856e2b524b4 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -695,10 +695,7 @@ found:
 	/*
 	 * Update the indices to avoid searching in the next round.
 	 */
-	if (index + nslots < mem->area_nslabs)
-		area->index = index + nslots;
-	else
-		area->index = 0;
+	area->index = wrap_area_index(mem, index + nslots);
 	area->used += nslots;
 	spin_unlock_irqrestore(&area->lock, flags);
 	return slot_index;

From 0eee5ae1025699ea93d44fdb6ef2365505082103 Mon Sep 17 00:00:00 2001
From: Petr Tesarik <petr.tesarik.ext@huawei.com>
Date: Tue, 21 Mar 2023 09:31:27 +0100
Subject: [PATCH 024/173] swiotlb: fix slot alignment checks

Explicit alignment and page alignment are used only to calculate
the stride, not when checking actual slot physical address.

Originally, only page alignment was implemented, and that worked,
because the whole SWIOTLB is allocated on a page boundary, so
aligning the start index was sufficient to ensure a page-aligned
slot.

When commit 1f221a0d0dbf ("swiotlb: respect min_align_mask") added
support for min_align_mask, the index could be incremented in the
search loop, potentially finding an unaligned slot if minimum device
alignment is between IO_TLB_SIZE and PAGE_SIZE.  The bug could go
unnoticed, because the slot size is 2 KiB, and the most common page
size is 4 KiB, so there is no alignment value in between.

IIUC the intention has been to find a slot that conforms to all
alignment constraints: device minimum alignment, an explicit
alignment (given as function parameter) and optionally page
alignment (if allocation size is >= PAGE_SIZE). The most
restrictive mask can be trivially computed with logical AND. The
rest can stay.

Fixes: 1f221a0d0dbf ("swiotlb: respect min_align_mask")
Fixes: e81e99bacc9f ("swiotlb: Support aligned swiotlb buffers")
Signed-off-by: Petr Tesarik <petr.tesarik.ext@huawei.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 kernel/dma/swiotlb.c | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index 3856e2b524b4..5b919ef832b6 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -634,22 +634,26 @@ static int swiotlb_do_find_slots(struct device *dev, int area_index,
 	BUG_ON(!nslots);
 	BUG_ON(area_index >= mem->nareas);
 
+	/*
+	 * For allocations of PAGE_SIZE or larger only look for page aligned
+	 * allocations.
+	 */
+	if (alloc_size >= PAGE_SIZE)
+		iotlb_align_mask &= PAGE_MASK;
+	iotlb_align_mask &= alloc_align_mask;
+
 	/*
 	 * For mappings with an alignment requirement don't bother looping to
-	 * unaligned slots once we found an aligned one.  For allocations of
-	 * PAGE_SIZE or larger only look for page aligned allocations.
+	 * unaligned slots once we found an aligned one.
 	 */
 	stride = (iotlb_align_mask >> IO_TLB_SHIFT) + 1;
-	if (alloc_size >= PAGE_SIZE)
-		stride = max(stride, stride << (PAGE_SHIFT - IO_TLB_SHIFT));
-	stride = max(stride, (alloc_align_mask >> IO_TLB_SHIFT) + 1);
 
 	spin_lock_irqsave(&area->lock, flags);
 	if (unlikely(nslots > mem->area_nslabs - area->used))
 		goto not_found;
 
 	slot_base = area_index * mem->area_nslabs;
-	index = wrap_area_index(mem, ALIGN(area->index, stride));
+	index = area->index;
 
 	for (slots_checked = 0; slots_checked < mem->area_nslabs; ) {
 		slot_index = slot_base + index;

From e51f49512d98783b90799c9cc2002895ec3aa0eb Mon Sep 17 00:00:00 2001
From: Ranjani Sridharan <ranjani.sridharan@linux.intel.com>
Date: Wed, 22 Mar 2023 10:55:38 +0200
Subject: [PATCH 025/173] ASoC: SOF: ipc4: Ensure DSP is in D0I0 during
 sof_ipc4_set_get_data()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The set_get_data() IPC op bypasses the check for the no_pm flag as done
with the regular IPC tx_msg op. Since set_get_data should be performed
when the DSP is in D0I0, set the DSP power state to D0I0 before sending
the IPC's in sof_ipc4_set_get_data().

Fixes: ceb89acc4dc8 ("ASoC: SOF: ipc4: Add support for mandatory message handling functionality")
Signed-off-by: Ranjani Sridharan <ranjani.sridharan@linux.intel.com>
Reviewed-by: Bard Liao <yung-chuan.liao@linux.intel.com>
Reviewed-by: Péter Ujfalusi <peter.ujfalusi@linux.intel.com>
Reviewed-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Signed-off-by: Peter Ujfalusi <peter.ujfalusi@linux.intel.com>
Link: https://lore.kernel.org/r/20230322085538.10214-1-peter.ujfalusi@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/sof/ipc4.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/sound/soc/sof/ipc4.c b/sound/soc/sof/ipc4.c
index 8ede4b952997..246b56d24a6f 100644
--- a/sound/soc/sof/ipc4.c
+++ b/sound/soc/sof/ipc4.c
@@ -405,6 +405,9 @@ static int sof_ipc4_tx_msg(struct snd_sof_dev *sdev, void *msg_data, size_t msg_
 static int sof_ipc4_set_get_data(struct snd_sof_dev *sdev, void *data,
 				 size_t payload_bytes, bool set)
 {
+	const struct sof_dsp_power_state target_state = {
+			.state = SOF_DSP_PM_D0,
+	};
 	size_t payload_limit = sdev->ipc->max_payload_size;
 	struct sof_ipc4_msg *ipc4_msg = data;
 	struct sof_ipc4_msg tx = {{ 0 }};
@@ -435,6 +438,11 @@ static int sof_ipc4_set_get_data(struct snd_sof_dev *sdev, void *data,
 
 	tx.extension |= SOF_IPC4_MOD_EXT_MSG_FIRST_BLOCK(1);
 
+	/* ensure the DSP is in D0i0 before sending IPC */
+	ret = snd_sof_dsp_set_power_state(sdev, &target_state);
+	if (ret < 0)
+		return ret;
+
 	/* Serialise IPC TX */
 	mutex_lock(&sdev->ipc->tx_mutex);
 

From d701cf6578e8447af4ac0fa08e7c5a0fad6df1ae Mon Sep 17 00:00:00 2001
From: Niklas Schnelle <schnelle@linux.ibm.com>
Date: Tue, 21 Feb 2023 17:10:43 +0100
Subject: [PATCH 026/173] MAINTAINERS: Update s390-iommu driver maintainer
 information

The s390 DMA API conversion changes currently under review will extend
the use of the s390-iommu driver to the DMA API. With s390's mandatory
use of an IOMMU this means all DMA for PCI devices will then use the
s390-iommu driver. With this in mind and considering my involvement in
these changes it makes sense to reflect this increased interdependence
in the maintainer structure. Thus add myself as first maintainer and
move Gerald to reviewer status.

Reviewed-by: Gerald Schaefer <gerald.schaefer@linux.ibm.com>
Reviewed-by: Matthew Rosato <mjrosato@linux.ibm.com>
Acked-by: Peter Oberparleiter <oberpar@linux.ibm.com>
Signed-off-by: Niklas Schnelle <schnelle@linux.ibm.com>
Link: https://lore.kernel.org/r/20230221161043.37065-1-schnelle@linux.ibm.com
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 MAINTAINERS | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index d8ebab595b2a..33a61d516446 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -18291,8 +18291,9 @@ F:	drivers/s390/block/dasd*
 F:	include/linux/dasd_mod.h
 
 S390 IOMMU (PCI)
+M:	Niklas Schnelle <schnelle@linux.ibm.com>
 M:	Matthew Rosato <mjrosato@linux.ibm.com>
-M:	Gerald Schaefer <gerald.schaefer@linux.ibm.com>
+R:	Gerald Schaefer <gerald.schaefer@linux.ibm.com>
 L:	linux-s390@vger.kernel.org
 S:	Supported
 F:	drivers/iommu/s390-iommu.c

From e38c5e80c3d293a883c6f1d553f2146ec0bda35e Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Wed, 22 Mar 2023 15:53:32 +0100
Subject: [PATCH 027/173] ASoC: Intel: bytcr_rt5640: Add quirk for the Acer
 Iconia One 7 B1-750

The Acer Iconia One 7 B1-750 tablet mostly works fine with the defaults
for an Bay Trail CR tablet. Except for the internal mic, instead of
an analog mic on IN3 a digital mic on DMIC1 is uses.

Add a quirk with these settings for this tablet.

Acked-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Link: https://lore.kernel.org/r/20230322145332.131525-1-hdegoede@redhat.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/intel/boards/bytcr_rt5640.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/sound/soc/intel/boards/bytcr_rt5640.c b/sound/soc/intel/boards/bytcr_rt5640.c
index 79e0039c79a3..5a12940ef907 100644
--- a/sound/soc/intel/boards/bytcr_rt5640.c
+++ b/sound/soc/intel/boards/bytcr_rt5640.c
@@ -533,6 +533,18 @@ static int byt_rt5640_aif1_hw_params(struct snd_pcm_substream *substream,
 
 /* Please keep this list alphabetically sorted */
 static const struct dmi_system_id byt_rt5640_quirk_table[] = {
+	{	/* Acer Iconia One 7 B1-750 */
+		.matches = {
+			DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Insyde"),
+			DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "VESPA2"),
+		},
+		.driver_data = (void *)(BYT_RT5640_DMIC1_MAP |
+					BYT_RT5640_JD_SRC_JD1_IN4P |
+					BYT_RT5640_OVCD_TH_1500UA |
+					BYT_RT5640_OVCD_SF_0P75 |
+					BYT_RT5640_SSP0_AIF1 |
+					BYT_RT5640_MCLK_EN),
+	},
 	{	/* Acer Iconia Tab 8 W1-810 */
 		.matches = {
 			DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Acer"),

From 6165a16a5ad9b237bb3131cff4d3c601ccb8f9a3 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@hammerspace.com>
Date: Tue, 21 Mar 2023 00:17:36 -0400
Subject: [PATCH 028/173] NFSv4: Fix hangs when recovering open state after a
 server reboot

When we're using a cached open stateid or a delegation in order to avoid
sending a CLAIM_PREVIOUS open RPC call to the server, we don't have a
new open stateid to present to update_open_stateid().
Instead rely on nfs4_try_open_cached(), just as if we were doing a
normal open.

Fixes: d2bfda2e7aa0 ("NFSv4: don't reprocess cached open CLAIM_PREVIOUS")
Cc: stable@vger.kernel.org
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 fs/nfs/nfs4proc.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 22a93ae46cd7..5607b1e2b821 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -1980,8 +1980,7 @@ _nfs4_opendata_reclaim_to_nfs4_state(struct nfs4_opendata *data)
 	if (!data->rpc_done) {
 		if (data->rpc_status)
 			return ERR_PTR(data->rpc_status);
-		/* cached opens have already been processed */
-		goto update;
+		return nfs4_try_open_cached(data);
 	}
 
 	ret = nfs_refresh_inode(inode, &data->f_attr);
@@ -1990,7 +1989,7 @@ _nfs4_opendata_reclaim_to_nfs4_state(struct nfs4_opendata *data)
 
 	if (data->o_res.delegation_type != 0)
 		nfs4_opendata_check_deleg(data, state);
-update:
+
 	if (!update_open_stateid(state, &data->o_res.stateid,
 				NULL, data->o_arg.fmode))
 		return ERR_PTR(-EAGAIN);

From 1073c15fd39e804ad36ff26a7c7d53b0ab51b184 Mon Sep 17 00:00:00 2001
From: Mirsad Goran Todorovac <mirsad.todorovac@alu.unizg.hr>
Date: Wed, 22 Mar 2023 09:51:07 +0100
Subject: [PATCH 029/173] scripts: merge_config: Fix typo in variable name.

${WARNOVERRIDE} was misspelled as ${WARNOVVERIDE}, which caused a shell
syntax error in certain paths of the script execution.

Fixes: 46dff8d7e381 ("scripts: merge_config: Add option to suppress warning on overrides")
Signed-off-by: Mirsad Goran Todorovac <mirsad.todorovac@alu.unizg.hr>
Reviewed-by: Mark Brown <broonie@kernel.org>
Acked-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
---
 scripts/kconfig/merge_config.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/kconfig/merge_config.sh b/scripts/kconfig/merge_config.sh
index 32620de473ad..902eb429b9db 100755
--- a/scripts/kconfig/merge_config.sh
+++ b/scripts/kconfig/merge_config.sh
@@ -145,7 +145,7 @@ for ORIG_MERGE_FILE in $MERGE_LIST ; do
 		NEW_VAL=$(grep -w $CFG $MERGE_FILE)
 		BUILTIN_FLAG=false
 		if [ "$BUILTIN" = "true" ] && [ "${NEW_VAL#CONFIG_*=}" = "m" ] && [ "${PREV_VAL#CONFIG_*=}" = "y" ]; then
-			${WARNOVVERIDE} Previous  value: $PREV_VAL
+			${WARNOVERRIDE} Previous  value: $PREV_VAL
 			${WARNOVERRIDE} New value:       $NEW_VAL
 			${WARNOVERRIDE} -y passed, will not demote y to m
 			${WARNOVERRIDE}

From fb27e70f6e408dee5d22b083e7a38a59e6118253 Mon Sep 17 00:00:00 2001
From: Ben Hutchings <ben@decadent.org.uk>
Date: Wed, 22 Mar 2023 19:11:45 +0100
Subject: [PATCH 030/173] modpost: Fix processing of CRCs on 32-bit build
 machines

modpost now reads CRCs from .*.cmd files, parsing them using strtol().
This is inconsistent with its parsing of Module.symvers and with their
definition as *unsigned* 32-bit values.

strtol() clamps values to [LONG_MIN, LONG_MAX], and when building on a
32-bit system this changes all CRCs >= 0x80000000 to be 0x7fffffff.

Change extract_crcs_for_object() to use strtoul() instead.

Cc: stable@vger.kernel.org
Fixes: f292d875d0dc ("modpost: extract symbol versions from *.cmd files")
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
---
 scripts/mod/modpost.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c
index efff8078e395..9466b6a2abae 100644
--- a/scripts/mod/modpost.c
+++ b/scripts/mod/modpost.c
@@ -1733,7 +1733,7 @@ static void extract_crcs_for_object(const char *object, struct module *mod)
 		if (!isdigit(*p))
 			continue;	/* skip this line */
 
-		crc = strtol(p, &p, 0);
+		crc = strtoul(p, &p, 0);
 		if (*p != '\n')
 			continue;	/* skip this line */
 

From 6f57937980142715e927697a6ffd2050f38ed6f6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Wed, 22 Mar 2023 22:45:40 +0100
Subject: [PATCH 031/173] pwm: hibvt: Explicitly set .polarity in .get_state()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The driver only both polarities. Complete the implementation of
.get_state() by setting .polarity according to the configured hardware
state.

Fixes: d09f00810850 ("pwm: Add PWM driver for HiSilicon BVT SOCs")
Link: https://lore.kernel.org/r/20230228135508.1798428-2-u.kleine-koenig@pengutronix.de
Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Thierry Reding <thierry.reding@gmail.com>
---
 drivers/pwm/pwm-hibvt.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/pwm/pwm-hibvt.c b/drivers/pwm/pwm-hibvt.c
index 12c05c155cab..1b9274c5ad87 100644
--- a/drivers/pwm/pwm-hibvt.c
+++ b/drivers/pwm/pwm-hibvt.c
@@ -146,6 +146,7 @@ static int hibvt_pwm_get_state(struct pwm_chip *chip, struct pwm_device *pwm,
 
 	value = readl(base + PWM_CTRL_ADDR(pwm->hwpwm));
 	state->enabled = (PWM_ENABLE_MASK & value);
+	state->polarity = (PWM_POLARITY_MASK & value) ? PWM_POLARITY_INVERSED : PWM_POLARITY_NORMAL;
 
 	return 0;
 }

From 30006b77c7e130e01d1ab2148cc8abf73dfcc4bf Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Wed, 22 Mar 2023 22:45:41 +0100
Subject: [PATCH 032/173] pwm: cros-ec: Explicitly set .polarity in
 .get_state()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The driver only supports normal polarity. Complete the implementation of
.get_state() by setting .polarity accordingly.

Reviewed-by: Guenter Roeck <groeck@chromium.org>
Fixes: 1f0d3bb02785 ("pwm: Add ChromeOS EC PWM driver")
Link: https://lore.kernel.org/r/20230228135508.1798428-3-u.kleine-koenig@pengutronix.de
Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Thierry Reding <thierry.reding@gmail.com>
---
 drivers/pwm/pwm-cros-ec.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/pwm/pwm-cros-ec.c b/drivers/pwm/pwm-cros-ec.c
index 86df6702cb83..ad18b0ebe3f1 100644
--- a/drivers/pwm/pwm-cros-ec.c
+++ b/drivers/pwm/pwm-cros-ec.c
@@ -198,6 +198,7 @@ static int cros_ec_pwm_get_state(struct pwm_chip *chip, struct pwm_device *pwm,
 
 	state->enabled = (ret > 0);
 	state->period = EC_PWM_MAX_DUTY;
+	state->polarity = PWM_POLARITY_NORMAL;
 
 	/*
 	 * Note that "disabled" and "duty cycle == 0" are treated the same. If

From b20b097128d9145fadcea1cbb45c4d186cb57466 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Wed, 22 Mar 2023 22:45:42 +0100
Subject: [PATCH 033/173] pwm: iqs620a: Explicitly set .polarity in
 .get_state()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The driver only supports normal polarity. Complete the implementation of
.get_state() by setting .polarity accordingly.

Fixes: 6f0841a8197b ("pwm: Add support for Azoteq IQS620A PWM generator")
Reviewed-by: Jeff LaBundy <jeff@labundy.com>
Link: https://lore.kernel.org/r/20230228135508.1798428-4-u.kleine-koenig@pengutronix.de
Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Thierry Reding <thierry.reding@gmail.com>
---
 drivers/pwm/pwm-iqs620a.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/pwm/pwm-iqs620a.c b/drivers/pwm/pwm-iqs620a.c
index 8362b4870c66..47b3141135f3 100644
--- a/drivers/pwm/pwm-iqs620a.c
+++ b/drivers/pwm/pwm-iqs620a.c
@@ -126,6 +126,7 @@ static int iqs620_pwm_get_state(struct pwm_chip *chip, struct pwm_device *pwm,
 	mutex_unlock(&iqs620_pwm->lock);
 
 	state->period = IQS620_PWM_PERIOD_NS;
+	state->polarity = PWM_POLARITY_NORMAL;
 
 	return 0;
 }

From 2be4dcf6627e1bcbbef8e6ba1811f5127d39202c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Wed, 22 Mar 2023 22:45:43 +0100
Subject: [PATCH 034/173] pwm: sprd: Explicitly set .polarity in .get_state()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The driver only supports normal polarity. Complete the implementation of
.get_state() by setting .polarity accordingly.

Fixes: 8aae4b02e8a6 ("pwm: sprd: Add Spreadtrum PWM support")
Link: https://lore.kernel.org/r/20230228135508.1798428-5-u.kleine-koenig@pengutronix.de
Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Thierry Reding <thierry.reding@gmail.com>
---
 drivers/pwm/pwm-sprd.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/pwm/pwm-sprd.c b/drivers/pwm/pwm-sprd.c
index d866ce345f97..bde579a338c2 100644
--- a/drivers/pwm/pwm-sprd.c
+++ b/drivers/pwm/pwm-sprd.c
@@ -109,6 +109,7 @@ static int sprd_pwm_get_state(struct pwm_chip *chip, struct pwm_device *pwm,
 	duty = val & SPRD_PWM_DUTY_MSK;
 	tmp = (prescale + 1) * NSEC_PER_SEC * duty;
 	state->duty_cycle = DIV_ROUND_CLOSEST_ULL(tmp, chn->clk_rate);
+	state->polarity = PWM_POLARITY_NORMAL;
 
 	/* Disable PWM clocks if the PWM channel is not in enable state. */
 	if (!state->enabled)

From 8caa81eb950cb2e9d2d6959b37d853162d197f57 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Wed, 22 Mar 2023 22:45:44 +0100
Subject: [PATCH 035/173] pwm: meson: Explicitly set .polarity in .get_state()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The driver only supports normal polarity. Complete the implementation of
.get_state() by setting .polarity accordingly.

This fixes a regression that was possible since commit c73a3107624d
("pwm: Handle .get_state() failures") which stopped to zero-initialize
the state passed to the .get_state() callback. This was reported at
https://forum.odroid.com/viewtopic.php?f=177&t=46360 . While this was an
unintended side effect, the real issue is the driver's callback not
setting the polarity.

There is a complicating fact, that the .apply() callback fakes support
for inversed polarity. This is not (and cannot) be matched by
.get_state(). As fixing this isn't easy, only point it out in a comment
to prevent authors of other drivers from copying that approach.

Fixes: c375bcbaabdb ("pwm: meson: Read the full hardware state in meson_pwm_get_state()")
Reported-by: Munehisa Kamata <kamatam@amazon.com>
Acked-by: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Link: https://lore.kernel.org/r/20230310191405.2606296-1-u.kleine-koenig@pengutronix.de
Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Thierry Reding <thierry.reding@gmail.com>
---
 drivers/pwm/pwm-meson.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/drivers/pwm/pwm-meson.c b/drivers/pwm/pwm-meson.c
index 16d79ca5d8f5..5cd7b90872c6 100644
--- a/drivers/pwm/pwm-meson.c
+++ b/drivers/pwm/pwm-meson.c
@@ -162,6 +162,12 @@ static int meson_pwm_calc(struct meson_pwm *meson, struct pwm_device *pwm,
 	duty = state->duty_cycle;
 	period = state->period;
 
+	/*
+	 * Note this is wrong. The result is an output wave that isn't really
+	 * inverted and so is wrongly identified by .get_state as normal.
+	 * Fixing this needs some care however as some machines might rely on
+	 * this.
+	 */
 	if (state->polarity == PWM_POLARITY_INVERSED)
 		duty = period - duty;
 
@@ -358,6 +364,8 @@ static int meson_pwm_get_state(struct pwm_chip *chip, struct pwm_device *pwm,
 		state->duty_cycle = 0;
 	}
 
+	state->polarity = PWM_POLARITY_NORMAL;
+
 	return 0;
 }
 

From 1271a7b98e7989ba6bb978e14403fc84efe16e13 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Wed, 22 Mar 2023 22:45:45 +0100
Subject: [PATCH 036/173] pwm: Zero-initialize the pwm_state passed to driver's
 .get_state()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This is just to ensure that .usage_power is properly initialized and
doesn't contain random stack data. The other members of struct pwm_state
should get a value assigned in a successful call to .get_state(). So in
the absence of bugs in driver implementations, this is only a safe-guard
and no fix.

Reported-by: Munehisa Kamata <kamatam@amazon.com>
Link: https://lore.kernel.org/r/20230310214004.2619480-1-u.kleine-koenig@pengutronix.de
Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Thierry Reding <thierry.reding@gmail.com>
---
 drivers/pwm/core.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/drivers/pwm/core.c b/drivers/pwm/core.c
index e01147f66e15..474725714a05 100644
--- a/drivers/pwm/core.c
+++ b/drivers/pwm/core.c
@@ -115,7 +115,14 @@ static int pwm_device_request(struct pwm_device *pwm, const char *label)
 	}
 
 	if (pwm->chip->ops->get_state) {
-		struct pwm_state state;
+		/*
+		 * Zero-initialize state because most drivers are unaware of
+		 * .usage_power. The other members of state are supposed to be
+		 * set by lowlevel drivers. We still initialize the whole
+		 * structure for simplicity even though this might paper over
+		 * faulty implementations of .get_state().
+		 */
+		struct pwm_state state = { 0, };
 
 		err = pwm->chip->ops->get_state(pwm->chip, pwm, &state);
 		trace_pwm_get(pwm, &state, err);
@@ -448,7 +455,7 @@ static void pwm_apply_state_debug(struct pwm_device *pwm,
 {
 	struct pwm_state *last = &pwm->last;
 	struct pwm_chip *chip = pwm->chip;
-	struct pwm_state s1, s2;
+	struct pwm_state s1 = { 0 }, s2 = { 0 };
 	int err;
 
 	if (!IS_ENABLED(CONFIG_PWM_DEBUG))
@@ -530,6 +537,7 @@ static void pwm_apply_state_debug(struct pwm_device *pwm,
 		return;
 	}
 
+	*last = (struct pwm_state){ 0 };
 	err = chip->ops->get_state(chip, pwm, last);
 	trace_pwm_get(pwm, last, err);
 	if (err)

From a4a3203426f4b67535d6442ddc5dca8878a0678f Mon Sep 17 00:00:00 2001
From: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Date: Thu, 23 Mar 2023 11:01:25 +0000
Subject: [PATCH 037/173] ASoC: codecs: lpass: fix the order or clks turn off
 during suspend

The order in which clocks are stopped matters as some of the clock
like NPL are derived from MCLK.

Without this patch, Dragonboard RB5 DSP would crash with below error:
 qcom_q6v5_pas 17300000.remoteproc: fatal error received:
 ABT_dal.c:278:ABTimeout: AHB Bus hang is detected,
 Number of bus hang detected := 2 , addr0 = 0x3370000 , addr1 = 0x0!!!

Turn off  fsgen first, followed by npl and then finally mclk, which is exactly
the opposite order of enable sequence.

Fixes: 1dc3459009c3 ("ASoC: codecs: lpass: register mclk after runtime pm")
Reported-by: Amit Pundir <amit.pundir@linaro.org>
Signed-off-by: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Tested-by: Amit Pundir <amit.pundir@linaro.org>
Link: https://lore.kernel.org/r/20230323110125.23790-1-srinivas.kandagatla@linaro.org
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/codecs/lpass-rx-macro.c  | 4 ++--
 sound/soc/codecs/lpass-tx-macro.c  | 4 ++--
 sound/soc/codecs/lpass-wsa-macro.c | 4 ++--
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/sound/soc/codecs/lpass-rx-macro.c b/sound/soc/codecs/lpass-rx-macro.c
index a73a7d7a1c0a..faba4237bd3d 100644
--- a/sound/soc/codecs/lpass-rx-macro.c
+++ b/sound/soc/codecs/lpass-rx-macro.c
@@ -3670,9 +3670,9 @@ static int __maybe_unused rx_macro_runtime_suspend(struct device *dev)
 	regcache_cache_only(rx->regmap, true);
 	regcache_mark_dirty(rx->regmap);
 
-	clk_disable_unprepare(rx->mclk);
-	clk_disable_unprepare(rx->npl);
 	clk_disable_unprepare(rx->fsgen);
+	clk_disable_unprepare(rx->npl);
+	clk_disable_unprepare(rx->mclk);
 
 	return 0;
 }
diff --git a/sound/soc/codecs/lpass-tx-macro.c b/sound/soc/codecs/lpass-tx-macro.c
index 473d3cd39554..589c490a8c48 100644
--- a/sound/soc/codecs/lpass-tx-macro.c
+++ b/sound/soc/codecs/lpass-tx-macro.c
@@ -2098,9 +2098,9 @@ static int __maybe_unused tx_macro_runtime_suspend(struct device *dev)
 	regcache_cache_only(tx->regmap, true);
 	regcache_mark_dirty(tx->regmap);
 
-	clk_disable_unprepare(tx->mclk);
-	clk_disable_unprepare(tx->npl);
 	clk_disable_unprepare(tx->fsgen);
+	clk_disable_unprepare(tx->npl);
+	clk_disable_unprepare(tx->mclk);
 
 	return 0;
 }
diff --git a/sound/soc/codecs/lpass-wsa-macro.c b/sound/soc/codecs/lpass-wsa-macro.c
index ba7480f3831e..3f6f1bdd4e03 100644
--- a/sound/soc/codecs/lpass-wsa-macro.c
+++ b/sound/soc/codecs/lpass-wsa-macro.c
@@ -2506,9 +2506,9 @@ static int __maybe_unused wsa_macro_runtime_suspend(struct device *dev)
 	regcache_cache_only(wsa->regmap, true);
 	regcache_mark_dirty(wsa->regmap);
 
-	clk_disable_unprepare(wsa->mclk);
-	clk_disable_unprepare(wsa->npl);
 	clk_disable_unprepare(wsa->fsgen);
+	clk_disable_unprepare(wsa->npl);
+	clk_disable_unprepare(wsa->mclk);
 
 	return 0;
 }

From 943d045a6d796175e5d08f9973953b1d2c07d797 Mon Sep 17 00:00:00 2001
From: Siddharth Kawar <Siddharth.Kawar@microsoft.com>
Date: Mon, 20 Mar 2023 20:37:40 +0000
Subject: [PATCH 038/173] SUNRPC: fix shutdown of NFS TCP client socket

NFS server Duplicate Request Cache (DRC) algorithms rely on NFS clients
reconnecting using the same local TCP port. Unique NFS operations are
identified by the per-TCP connection set of XIDs. This prevents file
corruption when non-idempotent NFS operations are retried.

Currently, NFS client TCP connections are using different local TCP ports
when reconnecting to NFS servers.

After an NFS server initiates shutdown of the TCP connection, the NFS
client's TCP socket is set to NULL after the socket state has reached
TCP_LAST_ACK(9). When reconnecting, the new socket attempts to reuse
the same local port but fails with EADDRNOTAVAIL (99). This forces the
socket to use a different local TCP port to reconnect to the remote NFS
server.

State Transition and Events:
TCP_CLOSE_WAIT(8)
TCP_LAST_ACK(9)
connect(fail EADDRNOTAVAIL(99))
TCP_CLOSE(7)
bind on new port
connect success

dmesg excerpts showing reconnect switching from TCP local port of 926 to
763 after commit 7c81e6a9d75b:
[13354.947854] NFS call  mkdir testW
...
[13405.654781] RPC:       xs_tcp_state_change client 00000000037d0f03...
[13405.654813] RPC:       state 8 conn 1 dead 0 zapped 1 sk_shutdown 1
[13405.654826] RPC:       xs_data_ready...
[13405.654892] RPC:       xs_tcp_state_change client 00000000037d0f03...
[13405.654895] RPC:       state 9 conn 0 dead 0 zapped 1 sk_shutdown 3
[13405.654899] RPC:       xs_tcp_state_change client 00000000037d0f03...
[13405.654900] RPC:       state 9 conn 0 dead 0 zapped 1 sk_shutdown 3
[13405.654950] RPC:       xs_connect scheduled xprt 00000000037d0f03
[13405.654975] RPC:       xs_bind 0.0.0.0:926: ok (0)
[13405.654980] RPC:       worker connecting xprt 00000000037d0f03 via tcp
			  to 10.101.6.228 (port 2049)
[13405.654991] RPC:       00000000037d0f03 connect status 99 connected 0
			  sock state 7
[13405.655001] RPC:       xs_tcp_state_change client 00000000037d0f03...
[13405.655002] RPC:       state 7 conn 0 dead 0 zapped 1 sk_shutdown 3
[13405.655024] RPC:       xs_connect scheduled xprt 00000000037d0f03
[13405.655038] RPC:       xs_bind 0.0.0.0:763: ok (0)
[13405.655041] RPC:       worker connecting xprt 00000000037d0f03 via tcp
			  to 10.101.6.228 (port 2049)
[13405.655065] RPC:       00000000037d0f03 connect status 115 connected 0
			  sock state 2

State Transition and Events with patch applied:
TCP_CLOSE_WAIT(8)
TCP_LAST_ACK(9)
TCP_CLOSE(7)
connect(reuse of port succeeds)

dmesg excerpts showing reconnect on same TCP local port of 936 with patch
applied:
[  257.139935] NFS: mkdir(0:59/560857152), testQ
[  257.139937] NFS call  mkdir testQ
...
[  307.822702] RPC:       state 8 conn 1 dead 0 zapped 1 sk_shutdown 1
[  307.822714] RPC:       xs_data_ready...
[  307.822817] RPC:       xs_tcp_state_change client 00000000ce702f14...
[  307.822821] RPC:       state 9 conn 0 dead 0 zapped 1 sk_shutdown 3
[  307.822825] RPC:       xs_tcp_state_change client 00000000ce702f14...
[  307.822826] RPC:       state 9 conn 0 dead 0 zapped 1 sk_shutdown 3
[  307.823606] RPC:       xs_tcp_state_change client 00000000ce702f14...
[  307.823609] RPC:       state 7 conn 0 dead 0 zapped 1 sk_shutdown 3
[  307.823629] RPC:       xs_tcp_state_change client 00000000ce702f14...
[  307.823632] RPC:       state 7 conn 0 dead 0 zapped 1 sk_shutdown 3
[  307.823676] RPC:       xs_connect scheduled xprt 00000000ce702f14
[  307.823704] RPC:       xs_bind 0.0.0.0:936: ok (0)
[  307.823709] RPC:       worker connecting xprt 00000000ce702f14 via tcp
			  to 10.101.1.30 (port 2049)
[  307.823748] RPC:       00000000ce702f14 connect status 115 connected 0
			  sock state 2
...
[  314.916193] RPC:       state 7 conn 0 dead 0 zapped 1 sk_shutdown 3
[  314.916251] RPC:       xs_connect scheduled xprt 00000000ce702f14
[  314.916282] RPC:       xs_bind 0.0.0.0:936: ok (0)
[  314.916292] RPC:       worker connecting xprt 00000000ce702f14 via tcp
			  to 10.101.1.30 (port 2049)
[  314.916342] RPC:       00000000ce702f14 connect status 115 connected 0
			  sock state 2

Fixes: 7c81e6a9d75b ("SUNRPC: Tweak TCP socket shutdown in the RPC client")
Signed-off-by: Siddharth Rajendra Kawar <sikawar@microsoft.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 net/sunrpc/xprtsock.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index adcbedc244d6..6cacd70a15ff 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -2158,6 +2158,7 @@ static void xs_tcp_shutdown(struct rpc_xprt *xprt)
 	switch (skst) {
 	case TCP_FIN_WAIT1:
 	case TCP_FIN_WAIT2:
+	case TCP_LAST_ACK:
 		break;
 	case TCP_ESTABLISHED:
 	case TCP_CLOSE_WAIT:

From cbedf1a33970c9b825ae75b81fbd3e88e224a418 Mon Sep 17 00:00:00 2001
From: Werner Sembach <wse@tuxedocomputers.com>
Date: Thu, 23 Mar 2023 18:13:11 -0700
Subject: [PATCH 039/173] Input: i8042 - add TUXEDO devices to i8042 quirk
 tables for partial fix

A lot of modern Clevo barebones have touchpad and/or keyboard issues after
suspend fixable with nomux + reset + noloop + nopnp. Luckily, none of them
have an external PS/2 port so this can safely be set for all of them.

I'm not entirely sure if every device listed really needs all four quirks,
but after testing and production use, no negative effects could be
observed when setting all four.

Setting SERIO_QUIRK_NOMUX or SERIO_QUIRK_RESET_ALWAYS on the Clevo N150CU
and the Clevo NHxxRZQ makes the keyboard very laggy for ~5 seconds after
boot and sometimes also after resume. However both are required for the
keyboard to not fail completely sometimes after boot or resume.

Signed-off-by: Werner Sembach <wse@tuxedocomputers.com>
Cc: stable@vger.kernel.org
Link: https://lore.kernel.org/r/20230321191619.647911-1-wse@tuxedocomputers.com
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/serio/i8042-acpipnpio.h | 28 +++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

diff --git a/drivers/input/serio/i8042-acpipnpio.h b/drivers/input/serio/i8042-acpipnpio.h
index fe7ffe30997c..028e45bd050b 100644
--- a/drivers/input/serio/i8042-acpipnpio.h
+++ b/drivers/input/serio/i8042-acpipnpio.h
@@ -1124,6 +1124,20 @@ static const struct dmi_system_id i8042_dmi_quirk_table[] __initconst = {
 		.driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS |
 					SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP)
 	},
+	{
+		/*
+		 * Setting SERIO_QUIRK_NOMUX or SERIO_QUIRK_RESET_ALWAYS makes
+		 * the keyboard very laggy for ~5 seconds after boot and
+		 * sometimes also after resume.
+		 * However both are required for the keyboard to not fail
+		 * completely sometimes after boot or resume.
+		 */
+		.matches = {
+			DMI_MATCH(DMI_BOARD_NAME, "N150CU"),
+		},
+		.driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS |
+					SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP)
+	},
 	{
 		.matches = {
 			DMI_MATCH(DMI_BOARD_NAME, "NH5xAx"),
@@ -1131,6 +1145,20 @@ static const struct dmi_system_id i8042_dmi_quirk_table[] __initconst = {
 		.driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS |
 					SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP)
 	},
+	{
+		/*
+		 * Setting SERIO_QUIRK_NOMUX or SERIO_QUIRK_RESET_ALWAYS makes
+		 * the keyboard very laggy for ~5 seconds after boot and
+		 * sometimes also after resume.
+		 * However both are required for the keyboard to not fail
+		 * completely sometimes after boot or resume.
+		 */
+		.matches = {
+			DMI_MATCH(DMI_BOARD_NAME, "NHxxRZQ"),
+		},
+		.driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS |
+					SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP)
+	},
 	{
 		.matches = {
 			DMI_MATCH(DMI_BOARD_NAME, "NL5xRU"),

From f6887a71bdd2f0dcba9b8180dd2223cfa8637e85 Mon Sep 17 00:00:00 2001
From: Jason Montleon <jmontleo@redhat.com>
Date: Fri, 24 Mar 2023 13:07:11 -0400
Subject: [PATCH 040/173] ASoC: hdac_hdmi: use set_stream() instead of
 set_tdm_slots()

hdac_hdmi was not updated to use set_stream() instead of set_tdm_slots()
in the original commit so HDMI no longer produces audio.

Cc: stable@vger.kernel.org
Link: https://lore.kernel.org/regressions/CAJD_bPKQdtaExvVEKxhQ47G-ZXDA=k+gzhMJRHLBe=mysPnuKA@mail.gmail.com/
Fixes: 636110411ca7 ("ASoC: Intel/SOF: use set_stream() instead of set_tdm_slots() for HDAudio")
Signed-off-by: Jason Montleon <jmontleo@redhat.com>
Reviewed-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Link: https://lore.kernel.org/r/20230324170711.2526-1-jmontleo@redhat.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/codecs/hdac_hdmi.c | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/sound/soc/codecs/hdac_hdmi.c b/sound/soc/codecs/hdac_hdmi.c
index ed4f7cdda04f..8b6b76029694 100644
--- a/sound/soc/codecs/hdac_hdmi.c
+++ b/sound/soc/codecs/hdac_hdmi.c
@@ -436,23 +436,28 @@ static int hdac_hdmi_setup_audio_infoframe(struct hdac_device *hdev,
 	return 0;
 }
 
-static int hdac_hdmi_set_tdm_slot(struct snd_soc_dai *dai,
-		unsigned int tx_mask, unsigned int rx_mask,
-		int slots, int slot_width)
+static int hdac_hdmi_set_stream(struct snd_soc_dai *dai,
+				void *stream, int direction)
 {
 	struct hdac_hdmi_priv *hdmi = snd_soc_dai_get_drvdata(dai);
 	struct hdac_device *hdev = hdmi->hdev;
 	struct hdac_hdmi_dai_port_map *dai_map;
 	struct hdac_hdmi_pcm *pcm;
+	struct hdac_stream *hstream;
 
-	dev_dbg(&hdev->dev, "%s: strm_tag: %d\n", __func__, tx_mask);
+	if (!stream)
+		return -EINVAL;
+
+	hstream = (struct hdac_stream *)stream;
+
+	dev_dbg(&hdev->dev, "%s: strm_tag: %d\n", __func__, hstream->stream_tag);
 
 	dai_map = &hdmi->dai_map[dai->id];
 
 	pcm = hdac_hdmi_get_pcm_from_cvt(hdmi, dai_map->cvt);
 
 	if (pcm)
-		pcm->stream_tag = (tx_mask << 4);
+		pcm->stream_tag = (hstream->stream_tag << 4);
 
 	return 0;
 }
@@ -1544,7 +1549,7 @@ static const struct snd_soc_dai_ops hdmi_dai_ops = {
 	.startup = hdac_hdmi_pcm_open,
 	.shutdown = hdac_hdmi_pcm_close,
 	.hw_params = hdac_hdmi_set_hw_params,
-	.set_tdm_slot = hdac_hdmi_set_tdm_slot,
+	.set_stream = hdac_hdmi_set_stream,
 };
 
 /*

From 0808ed6ebbc292222ca069d339744870f6d801da Mon Sep 17 00:00:00 2001
From: Tomas Henzl <thenzl@redhat.com>
Date: Fri, 24 Mar 2023 14:52:49 +0100
Subject: [PATCH 041/173] scsi: megaraid_sas: Fix fw_crash_buffer_show()

If crash_dump_buf is not allocated then crash dump can't be available.
Replace logical 'and' with 'or'.

Signed-off-by: Tomas Henzl <thenzl@redhat.com>
Link: https://lore.kernel.org/r/20230324135249.9733-1-thenzl@redhat.com
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/megaraid/megaraid_sas_base.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c
index 3ceece988338..c895189375e2 100644
--- a/drivers/scsi/megaraid/megaraid_sas_base.c
+++ b/drivers/scsi/megaraid/megaraid_sas_base.c
@@ -3298,7 +3298,7 @@ fw_crash_buffer_show(struct device *cdev,
 
 	spin_lock_irqsave(&instance->crashdump_lock, flags);
 	buff_offset = instance->fw_crash_buffer_offset;
-	if (!instance->crash_dump_buf &&
+	if (!instance->crash_dump_buf ||
 		!((instance->fw_crash_state == AVAILABLE) ||
 		(instance->fw_crash_state == COPYING))) {
 		dev_err(&instance->pdev->dev,

From 2309df27111a51734cb9240b4d3c25f2f3c6ab06 Mon Sep 17 00:00:00 2001
From: Tomas Henzl <thenzl@redhat.com>
Date: Fri, 24 Mar 2023 16:01:34 +0100
Subject: [PATCH 042/173] scsi: megaraid_sas: Fix crash after a double
 completion

When a physical disk is attached directly "without JBOD MAP support" (see
megasas_get_tm_devhandle()) then there is no real error handling in the
driver.  Return FAILED instead of SUCCESS.

Fixes: 18365b138508 ("megaraid_sas: Task management support")
Signed-off-by: Tomas Henzl <thenzl@redhat.com>
Link: https://lore.kernel.org/r/20230324150134.14696-1-thenzl@redhat.com
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/megaraid/megaraid_sas_fusion.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/scsi/megaraid/megaraid_sas_fusion.c b/drivers/scsi/megaraid/megaraid_sas_fusion.c
index 84c9a55a5794..8a83f3fc2b86 100644
--- a/drivers/scsi/megaraid/megaraid_sas_fusion.c
+++ b/drivers/scsi/megaraid/megaraid_sas_fusion.c
@@ -4771,7 +4771,7 @@ int megasas_task_abort_fusion(struct scsi_cmnd *scmd)
 	devhandle = megasas_get_tm_devhandle(scmd->device);
 
 	if (devhandle == (u16)ULONG_MAX) {
-		ret = SUCCESS;
+		ret = FAILED;
 		sdev_printk(KERN_INFO, scmd->device,
 			"task abort issued for invalid devhandle\n");
 		mutex_unlock(&instance->reset_mutex);
@@ -4841,7 +4841,7 @@ int megasas_reset_target_fusion(struct scsi_cmnd *scmd)
 	devhandle = megasas_get_tm_devhandle(scmd->device);
 
 	if (devhandle == (u16)ULONG_MAX) {
-		ret = SUCCESS;
+		ret = FAILED;
 		sdev_printk(KERN_INFO, scmd->device,
 			"target reset issued for invalid devhandle\n");
 		mutex_unlock(&instance->reset_mutex);

From f0aa59a33d2ac2267d260fe21eaf92500df8e7b4 Mon Sep 17 00:00:00 2001
From: Damien Le Moal <damien.lemoal@opensource.wdc.com>
Date: Wed, 22 Mar 2023 11:22:11 +0900
Subject: [PATCH 043/173] scsi: core: Improve scsi_vpd_inquiry() checks

Some USB-SATA adapters have broken behavior when an unsupported VPD page is
probed: Depending on the VPD page number, a 4-byte header with a valid VPD
page number but with a 0 length is returned. Currently, scsi_vpd_inquiry()
only checks that the page number is valid to determine if the page is
valid, which results in receiving only the 4-byte header for the
non-existent page. This error manifests itself very often with page 0xb9
for the Concurrent Positioning Ranges detection done by sd_read_cpr(),
resulting in the following error message:

sd 0:0:0:0: [sda] Invalid Concurrent Positioning Ranges VPD page

Prevent such misleading error message by adding a check in
scsi_vpd_inquiry() to verify that the page length is not 0.

Signed-off-by: Damien Le Moal <damien.lemoal@opensource.wdc.com>
Link: https://lore.kernel.org/r/20230322022211.116327-1-damien.lemoal@opensource.wdc.com
Reviewed-by: Benjamin Block <bblock@linux.ibm.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/scsi.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c
index 5cce1ba70fc6..09ef0b31dfc0 100644
--- a/drivers/scsi/scsi.c
+++ b/drivers/scsi/scsi.c
@@ -314,11 +314,18 @@ static int scsi_vpd_inquiry(struct scsi_device *sdev, unsigned char *buffer,
 	if (result)
 		return -EIO;
 
-	/* Sanity check that we got the page back that we asked for */
+	/*
+	 * Sanity check that we got the page back that we asked for and that
+	 * the page size is not 0.
+	 */
 	if (buffer[1] != page)
 		return -EIO;
 
-	return get_unaligned_be16(&buffer[2]) + 4;
+	result = get_unaligned_be16(&buffer[2]);
+	if (!result)
+		return -EIO;
+
+	return result + 4;
 }
 
 static int scsi_get_vpd_size(struct scsi_device *sdev, u8 page)

From d684a7a26f7d2c7122a4581ac966ed64e88fb29c Mon Sep 17 00:00:00 2001
From: Jerry Snitselaar <jsnitsel@redhat.com>
Date: Fri, 24 Mar 2023 12:32:04 -0700
Subject: [PATCH 044/173] scsi: mpt3sas: Don't print sense pool info twice

_base_allocate_sense_dma_pool() already prints out the sense pool
information, so don't print it a second time after calling it in
_base_allocate_memory_pools(). In addition the version in
_base_allocate_memory_pools() was using the wrong size value, sz, which was
last assigned when doing some nvme calculations instead of sense_sz to
determine the pool size in kilobytes.

Cc: Sathya Prakash <sathya.prakash@broadcom.com>
Cc: Sreekanth Reddy <sreekanth.reddy@broadcom.com>
Cc: Suganath Prabu Subramani <suganath-prabu.subramani@broadcom.com>
Cc: MPT-FusionLinux.pdl@broadcom.com
Cc: "Martin K. Petersen" <martin.petersen@oracle.com>
Cc: "James E.J. Bottomley" <jejb@linux.ibm.com>
Fixes: 970ac2bb70e7 ("scsi: mpt3sas: Force sense buffer allocations to be within same 4 GB region")
Signed-off-by: Jerry Snitselaar <jsnitsel@redhat.com>
Link: https://lore.kernel.org/r/20230324193204.567932-1-jsnitsel@redhat.com
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/mpt3sas/mpt3sas_base.c | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c
index 2ee9ea57554d..14ae0a9c5d3d 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_base.c
+++ b/drivers/scsi/mpt3sas/mpt3sas_base.c
@@ -6616,11 +6616,6 @@ _base_allocate_memory_pools(struct MPT3SAS_ADAPTER *ioc)
 	else if (rc == -EAGAIN)
 		goto try_32bit_dma;
 	total_sz += sense_sz;
-	ioc_info(ioc,
-	    "sense pool(0x%p)- dma(0x%llx): depth(%d),"
-	    "element_size(%d), pool_size(%d kB)\n",
-	    ioc->sense, (unsigned long long)ioc->sense_dma, ioc->scsiio_depth,
-	    SCSI_SENSE_BUFFERSIZE, sz / 1024);
 	/* reply pool, 4 byte align */
 	sz = ioc->reply_free_queue_depth * ioc->reply_sz;
 	rc = _base_allocate_reply_pool(ioc, sz);

From 02bcba0b9f9da706d5bd1e8cbeb83493863e17b5 Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Sun, 26 Mar 2023 10:29:33 +0200
Subject: [PATCH 045/173] regulator: Handle deferred clk

devm_clk_get() can return -EPROBE_DEFER. So it is better to return the
error code from devm_clk_get(), instead of a hard coded -ENOENT.

This gives more opportunities to successfully probe the driver.

Fixes: 8959e5324485 ("regulator: fixed: add possibility to enable by clock")
Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Link: https://lore.kernel.org/r/18459fae3d017a66313699c7c8456b28158b2dd0.1679819354.git.christophe.jaillet@wanadoo.fr
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/fixed.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/regulator/fixed.c b/drivers/regulator/fixed.c
index 2a9867abba20..e6724a229d23 100644
--- a/drivers/regulator/fixed.c
+++ b/drivers/regulator/fixed.c
@@ -215,7 +215,7 @@ static int reg_fixed_voltage_probe(struct platform_device *pdev)
 		drvdata->enable_clock = devm_clk_get(dev, NULL);
 		if (IS_ERR(drvdata->enable_clock)) {
 			dev_err(dev, "Can't get enable-clock from devicetree\n");
-			return -ENOENT;
+			return PTR_ERR(drvdata->enable_clock);
 		}
 	} else if (drvtype && drvtype->has_performance_state) {
 		drvdata->desc.ops = &fixed_voltage_domain_ops;

From fb5755100a0a5aa5957bdb204fd1e249684557fc Mon Sep 17 00:00:00 2001
From: Rajvi Jingar <rajvi.jingar@linux.intel.com>
Date: Mon, 20 Mar 2023 14:20:29 -0700
Subject: [PATCH 046/173] platform/x86/intel/pmc: Alder Lake PCH
 slp_s0_residency fix

For platforms with Alder Lake PCH (Alder Lake S and Raptor Lake S) the
slp_s0_residency attribute has been reporting the wrong value. Unlike other
platforms, ADL PCH does not have a counter for the time that the SLP_S0
signal was asserted. Instead, firmware uses the aggregate of the Low Power
Mode (LPM) substate counters as the S0ix value.  Since the LPM counters run
at a different frequency, this lead to misreporting of the S0ix time.

Add a check for Alder Lake PCH and adjust the frequency accordingly when
display slp_s0_residency.

Fixes: bbab31101f44 ("platform/x86/intel: pmc/core: Add Alderlake support to pmc core driver")
Signed-off-by: Rajvi Jingar <rajvi.jingar@linux.intel.com>
Signed-off-by: David E. Box <david.e.box@linux.intel.com>
Reviewed-by: Rajneesh Bhardwaj <irenic.rajneesh@gmail.com>
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Link: https://lore.kernel.org/r/20230320212029.3154407-1-david.e.box@linux.intel.com
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 drivers/platform/x86/intel/pmc/core.c | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/drivers/platform/x86/intel/pmc/core.c b/drivers/platform/x86/intel/pmc/core.c
index 3a15d32d7644..b9591969e0fa 100644
--- a/drivers/platform/x86/intel/pmc/core.c
+++ b/drivers/platform/x86/intel/pmc/core.c
@@ -66,7 +66,18 @@ static inline void pmc_core_reg_write(struct pmc_dev *pmcdev, int reg_offset,
 
 static inline u64 pmc_core_adjust_slp_s0_step(struct pmc_dev *pmcdev, u32 value)
 {
-	return (u64)value * pmcdev->map->slp_s0_res_counter_step;
+	/*
+	 * ADL PCH does not have the SLP_S0 counter and LPM Residency counters are
+	 * used as a workaround which uses 30.5 usec tick. All other client
+	 * programs have the legacy SLP_S0 residency counter that is using the 122
+	 * usec tick.
+	 */
+	const int lpm_adj_x2 = pmcdev->map->lpm_res_counter_step_x2;
+
+	if (pmcdev->map == &adl_reg_map)
+		return (u64)value * GET_X2_COUNTER((u64)lpm_adj_x2);
+	else
+		return (u64)value * pmcdev->map->slp_s0_res_counter_step;
 }
 
 static int set_etr3(struct pmc_dev *pmcdev)

From 441d901fbf669f6360566a4437b1e563b854de4a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= <linux@weissschuh.net>
Date: Mon, 27 Mar 2023 13:05:02 +0000
Subject: [PATCH 047/173] platform/x86: gigabyte-wmi: add support for B650
 AORUS ELITE AX
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This has been reported as working.

Suggested-by: got3nks <got3nks@users.noreply.github.com>
Link: https://github.com/t-8ch/linux-gigabyte-wmi-driver/issues/15#issuecomment-1483942966
Signed-off-by: Thomas Weißschuh <linux@weissschuh.net>
Link: https://lore.kernel.org/r/20230327-gigabyte-wmi-b650-elite-ax-v1-1-d4d645c21d0b@weissschuh.net
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 drivers/platform/x86/gigabyte-wmi.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/platform/x86/gigabyte-wmi.c b/drivers/platform/x86/gigabyte-wmi.c
index 4dd39ab6ecfa..5e5b17c50eb6 100644
--- a/drivers/platform/x86/gigabyte-wmi.c
+++ b/drivers/platform/x86/gigabyte-wmi.c
@@ -151,6 +151,7 @@ static const struct dmi_system_id gigabyte_wmi_known_working_platforms[] = {
 	DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B550I AORUS PRO AX"),
 	DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B550M AORUS PRO-P"),
 	DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B550M DS3H"),
+	DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B650 AORUS ELITE AX"),
 	DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B660 GAMING X DDR4"),
 	DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B660I AORUS PRO DDR4"),
 	DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("Z390 I AORUS PRO WIFI-CF"),

From e5c972c1fadacc858b6a564d056f177275238040 Mon Sep 17 00:00:00 2001
From: Jeremi Piotrowski <jpiotrowski@linux.microsoft.com>
Date: Fri, 24 Mar 2023 15:52:33 +0100
Subject: [PATCH 048/173] KVM: SVM: Flush Hyper-V TLB when required

The Hyper-V "EnlightenedNptTlb" enlightenment is always enabled when KVM
is running on top of Hyper-V and Hyper-V exposes support for it (which
is always). On AMD CPUs this enlightenment results in ASID invalidations
not flushing TLB entries derived from the NPT. To force the underlying
(L0) hypervisor to rebuild its shadow page tables, an explicit hypercall
is needed.

The original KVM implementation of Hyper-V's "EnlightenedNptTlb" on SVM
only added remote TLB flush hooks. This worked out fine for a while, as
sufficient remote TLB flushes where being issued in KVM to mask the
problem. Since v5.17, changes in the TDP code reduced the number of
flushes and the out-of-sync TLB prevents guests from booting
successfully.

Split svm_flush_tlb_current() into separate callbacks for the 3 cases
(guest/all/current), and issue the required Hyper-V hypercall when a
Hyper-V TLB flush is needed. The most important case where the TLB flush
was missing is when loading a new PGD, which is followed by what is now
svm_flush_tlb_current().

Cc: stable@vger.kernel.org # v5.17+
Fixes: 1e0c7d40758b ("KVM: SVM: hyper-v: Remote TLB flush for SVM")
Link: https://lore.kernel.org/lkml/43980946-7bbf-dcef-7e40-af904c456250@linux.microsoft.com/
Suggested-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Jeremi Piotrowski <jpiotrowski@linux.microsoft.com>
Reviewed-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Message-Id: <20230324145233.4585-1-jpiotrowski@linux.microsoft.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/kvm_onhyperv.h     |  5 +++++
 arch/x86/kvm/svm/svm.c          | 37 ++++++++++++++++++++++++++++++---
 arch/x86/kvm/svm/svm_onhyperv.h | 15 +++++++++++++
 3 files changed, 54 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kvm/kvm_onhyperv.h b/arch/x86/kvm/kvm_onhyperv.h
index 287e98ef9df3..6272dabec02d 100644
--- a/arch/x86/kvm/kvm_onhyperv.h
+++ b/arch/x86/kvm/kvm_onhyperv.h
@@ -12,6 +12,11 @@ int hv_remote_flush_tlb_with_range(struct kvm *kvm,
 int hv_remote_flush_tlb(struct kvm *kvm);
 void hv_track_root_tdp(struct kvm_vcpu *vcpu, hpa_t root_tdp);
 #else /* !CONFIG_HYPERV */
+static inline int hv_remote_flush_tlb(struct kvm *kvm)
+{
+	return -EOPNOTSUPP;
+}
+
 static inline void hv_track_root_tdp(struct kvm_vcpu *vcpu, hpa_t root_tdp)
 {
 }
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 252e7f37e4e2..f25bc3cbb250 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -3729,7 +3729,7 @@ static void svm_enable_nmi_window(struct kvm_vcpu *vcpu)
 	svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF);
 }
 
-static void svm_flush_tlb_current(struct kvm_vcpu *vcpu)
+static void svm_flush_tlb_asid(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
 
@@ -3753,6 +3753,37 @@ static void svm_flush_tlb_current(struct kvm_vcpu *vcpu)
 		svm->current_vmcb->asid_generation--;
 }
 
+static void svm_flush_tlb_current(struct kvm_vcpu *vcpu)
+{
+	hpa_t root_tdp = vcpu->arch.mmu->root.hpa;
+
+	/*
+	 * When running on Hyper-V with EnlightenedNptTlb enabled, explicitly
+	 * flush the NPT mappings via hypercall as flushing the ASID only
+	 * affects virtual to physical mappings, it does not invalidate guest
+	 * physical to host physical mappings.
+	 */
+	if (svm_hv_is_enlightened_tlb_enabled(vcpu) && VALID_PAGE(root_tdp))
+		hyperv_flush_guest_mapping(root_tdp);
+
+	svm_flush_tlb_asid(vcpu);
+}
+
+static void svm_flush_tlb_all(struct kvm_vcpu *vcpu)
+{
+	/*
+	 * When running on Hyper-V with EnlightenedNptTlb enabled, remote TLB
+	 * flushes should be routed to hv_remote_flush_tlb() without requesting
+	 * a "regular" remote flush.  Reaching this point means either there's
+	 * a KVM bug or a prior hv_remote_flush_tlb() call failed, both of
+	 * which might be fatal to the guest.  Yell, but try to recover.
+	 */
+	if (WARN_ON_ONCE(svm_hv_is_enlightened_tlb_enabled(vcpu)))
+		hv_remote_flush_tlb(vcpu->kvm);
+
+	svm_flush_tlb_asid(vcpu);
+}
+
 static void svm_flush_tlb_gva(struct kvm_vcpu *vcpu, gva_t gva)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
@@ -4745,10 +4776,10 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
 	.set_rflags = svm_set_rflags,
 	.get_if_flag = svm_get_if_flag,
 
-	.flush_tlb_all = svm_flush_tlb_current,
+	.flush_tlb_all = svm_flush_tlb_all,
 	.flush_tlb_current = svm_flush_tlb_current,
 	.flush_tlb_gva = svm_flush_tlb_gva,
-	.flush_tlb_guest = svm_flush_tlb_current,
+	.flush_tlb_guest = svm_flush_tlb_asid,
 
 	.vcpu_pre_run = svm_vcpu_pre_run,
 	.vcpu_run = svm_vcpu_run,
diff --git a/arch/x86/kvm/svm/svm_onhyperv.h b/arch/x86/kvm/svm/svm_onhyperv.h
index cff838f15db5..786d46d73a8e 100644
--- a/arch/x86/kvm/svm/svm_onhyperv.h
+++ b/arch/x86/kvm/svm/svm_onhyperv.h
@@ -6,6 +6,8 @@
 #ifndef __ARCH_X86_KVM_SVM_ONHYPERV_H__
 #define __ARCH_X86_KVM_SVM_ONHYPERV_H__
 
+#include <asm/mshyperv.h>
+
 #if IS_ENABLED(CONFIG_HYPERV)
 
 #include "kvm_onhyperv.h"
@@ -15,6 +17,14 @@ static struct kvm_x86_ops svm_x86_ops;
 
 int svm_hv_enable_l2_tlb_flush(struct kvm_vcpu *vcpu);
 
+static inline bool svm_hv_is_enlightened_tlb_enabled(struct kvm_vcpu *vcpu)
+{
+	struct hv_vmcb_enlightenments *hve = &to_svm(vcpu)->vmcb->control.hv_enlightenments;
+
+	return ms_hyperv.nested_features & HV_X64_NESTED_ENLIGHTENED_TLB &&
+	       !!hve->hv_enlightenments_control.enlightened_npt_tlb;
+}
+
 static inline void svm_hv_init_vmcb(struct vmcb *vmcb)
 {
 	struct hv_vmcb_enlightenments *hve = &vmcb->control.hv_enlightenments;
@@ -80,6 +90,11 @@ static inline void svm_hv_update_vp_id(struct vmcb *vmcb, struct kvm_vcpu *vcpu)
 }
 #else
 
+static inline bool svm_hv_is_enlightened_tlb_enabled(struct kvm_vcpu *vcpu)
+{
+	return false;
+}
+
 static inline void svm_hv_init_vmcb(struct vmcb *vmcb)
 {
 }

From d583fbd7066a2dea43050521a95d9770f7d7593e Mon Sep 17 00:00:00 2001
From: Dmytro Maluka <dmy@semihalf.com>
Date: Wed, 22 Mar 2023 21:43:43 +0100
Subject: [PATCH 049/173] KVM: irqfd: Make resampler_list an RCU list

It is useful to be able to do read-only traversal of the list of all the
registered irqfd resamplers without locking the resampler_lock mutex.
In particular, we are going to traverse it to search for a resampler
registered for the given irq of an irqchip, and that will be done with
an irqchip spinlock (ioapic->lock) held, so it is undesirable to lock a
mutex in this context. So turn this list into an RCU list.

For protecting the read side, reuse kvm->irq_srcu which is already used
for protecting a number of irq related things (kvm->irq_routing,
irqfd->resampler->list, kvm->irq_ack_notifier_list,
kvm->arch.mask_notifier_list).

Signed-off-by: Dmytro Maluka <dmy@semihalf.com>
Message-Id: <20230322204344.50138-2-dmy@semihalf.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/linux/kvm_host.h  | 1 +
 include/linux/kvm_irqfd.h | 2 +-
 virt/kvm/eventfd.c        | 8 ++++++--
 3 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 8ada23756b0e..9f508c8e66e1 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -755,6 +755,7 @@ struct kvm {
 	struct {
 		spinlock_t        lock;
 		struct list_head  items;
+		/* resampler_list update side is protected by resampler_lock. */
 		struct list_head  resampler_list;
 		struct mutex      resampler_lock;
 	} irqfds;
diff --git a/include/linux/kvm_irqfd.h b/include/linux/kvm_irqfd.h
index dac047abdba7..8ad43692e3bb 100644
--- a/include/linux/kvm_irqfd.h
+++ b/include/linux/kvm_irqfd.h
@@ -31,7 +31,7 @@ struct kvm_kernel_irqfd_resampler {
 	/*
 	 * Entry in list of kvm->irqfd.resampler_list.  Use for sharing
 	 * resamplers among irqfds on the same gsi.
-	 * Accessed and modified under kvm->irqfds.resampler_lock
+	 * RCU list modified under kvm->irqfds.resampler_lock
 	 */
 	struct list_head link;
 };
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index 2a3ed401ce46..61aea70dd888 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -96,8 +96,12 @@ irqfd_resampler_shutdown(struct kvm_kernel_irqfd *irqfd)
 	synchronize_srcu(&kvm->irq_srcu);
 
 	if (list_empty(&resampler->list)) {
-		list_del(&resampler->link);
+		list_del_rcu(&resampler->link);
 		kvm_unregister_irq_ack_notifier(kvm, &resampler->notifier);
+		/*
+		 * synchronize_srcu(&kvm->irq_srcu) already called
+		 * in kvm_unregister_irq_ack_notifier().
+		 */
 		kvm_set_irq(kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID,
 			    resampler->notifier.gsi, 0, false);
 		kfree(resampler);
@@ -369,7 +373,7 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
 			resampler->notifier.irq_acked = irqfd_resampler_ack;
 			INIT_LIST_HEAD(&resampler->link);
 
-			list_add(&resampler->link, &kvm->irqfds.resampler_list);
+			list_add_rcu(&resampler->link, &kvm->irqfds.resampler_list);
 			kvm_register_irq_ack_notifier(kvm,
 						      &resampler->notifier);
 			irqfd->resampler = resampler;

From fef8f2b90edbd7089a4278021314f11f056b0cbb Mon Sep 17 00:00:00 2001
From: Dmytro Maluka <dmy@semihalf.com>
Date: Wed, 22 Mar 2023 21:43:44 +0100
Subject: [PATCH 050/173] KVM: x86/ioapic: Resample the pending state of an IRQ
 when unmasking

KVM irqfd based emulation of level-triggered interrupts doesn't work
quite correctly in some cases, particularly in the case of interrupts
that are handled in a Linux guest as oneshot interrupts (IRQF_ONESHOT).
Such an interrupt is acked to the device in its threaded irq handler,
i.e. later than it is acked to the interrupt controller (EOI at the end
of hardirq), not earlier.

Linux keeps such interrupt masked until its threaded handler finishes,
to prevent the EOI from re-asserting an unacknowledged interrupt.
However, with KVM + vfio (or whatever is listening on the resamplefd)
we always notify resamplefd at the EOI, so vfio prematurely unmasks the
host physical IRQ, thus a new physical interrupt is fired in the host.
This extra interrupt in the host is not a problem per se. The problem is
that it is unconditionally queued for injection into the guest, so the
guest sees an extra bogus interrupt. [*]

There are observed at least 2 user-visible issues caused by those
extra erroneous interrupts for a oneshot irq in the guest:

1. System suspend aborted due to a pending wakeup interrupt from
   ChromeOS EC (drivers/platform/chrome/cros_ec.c).
2. Annoying "invalid report id data" errors from ELAN0000 touchpad
   (drivers/input/mouse/elan_i2c_core.c), flooding the guest dmesg
   every time the touchpad is touched.

The core issue here is that by the time when the guest unmasks the IRQ,
the physical IRQ line is no longer asserted (since the guest has
acked the interrupt to the device in the meantime), yet we
unconditionally inject the interrupt queued into the guest by the
previous resampling. So to fix the issue, we need a way to detect that
the IRQ is no longer pending, and cancel the queued interrupt in this
case.

With IOAPIC we are not able to probe the physical IRQ line state
directly (at least not if the underlying physical interrupt controller
is an IOAPIC too), so in this patch we use irqfd resampler for that.
Namely, instead of injecting the queued interrupt, we just notify the
resampler that this interrupt is done. If the IRQ line is actually
already deasserted, we are done. If it is still asserted, a new
interrupt will be shortly triggered through irqfd and injected into the
guest.

In the case if there is no irqfd resampler registered for this IRQ, we
cannot fix the issue, so we keep the existing behavior: immediately
unconditionally inject the queued interrupt.

This patch fixes the issue for x86 IOAPIC only. In the long run, we can
fix it for other irqchips and other architectures too, possibly taking
advantage of reading the physical state of the IRQ line, which is
possible with some other irqchips (e.g. with arm64 GIC, maybe even with
the legacy x86 PIC).

[*] In this description we assume that the interrupt is a physical host
    interrupt forwarded to the guest e.g. by vfio. Potentially the same
    issue may occur also with a purely virtual interrupt from an
    emulated device, e.g. if the guest handles this interrupt, again, as
    a oneshot interrupt.

Signed-off-by: Dmytro Maluka <dmy@semihalf.com>
Link: https://lore.kernel.org/kvm/31420943-8c5f-125c-a5ee-d2fde2700083@semihalf.com/
Link: https://lore.kernel.org/lkml/87o7wrug0w.wl-maz@kernel.org/
Message-Id: <20230322204344.50138-3-dmy@semihalf.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/ioapic.c    | 36 ++++++++++++++++++++++++++++++++---
 include/linux/kvm_host.h | 10 ++++++++++
 virt/kvm/eventfd.c       | 41 ++++++++++++++++++++++++++++++++++------
 3 files changed, 78 insertions(+), 9 deletions(-)

diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c
index 042dee556125..995eb5054360 100644
--- a/arch/x86/kvm/ioapic.c
+++ b/arch/x86/kvm/ioapic.c
@@ -368,9 +368,39 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
 		mask_after = e->fields.mask;
 		if (mask_before != mask_after)
 			kvm_fire_mask_notifiers(ioapic->kvm, KVM_IRQCHIP_IOAPIC, index, mask_after);
-		if (e->fields.trig_mode == IOAPIC_LEVEL_TRIG
-		    && ioapic->irr & (1 << index))
-			ioapic_service(ioapic, index, false);
+		if (e->fields.trig_mode == IOAPIC_LEVEL_TRIG &&
+		    ioapic->irr & (1 << index) && !e->fields.mask && !e->fields.remote_irr) {
+			/*
+			 * Pending status in irr may be outdated: the IRQ line may have
+			 * already been deasserted by a device while the IRQ was masked.
+			 * This occurs, for instance, if the interrupt is handled in a
+			 * Linux guest as a oneshot interrupt (IRQF_ONESHOT). In this
+			 * case the guest acknowledges the interrupt to the device in
+			 * its threaded irq handler, i.e. after the EOI but before
+			 * unmasking, so at the time of unmasking the IRQ line is
+			 * already down but our pending irr bit is still set. In such
+			 * cases, injecting this pending interrupt to the guest is
+			 * buggy: the guest will receive an extra unwanted interrupt.
+			 *
+			 * So we need to check here if the IRQ is actually still pending.
+			 * As we are generally not able to probe the IRQ line status
+			 * directly, we do it through irqfd resampler. Namely, we clear
+			 * the pending status and notify the resampler that this interrupt
+			 * is done, without actually injecting it into the guest. If the
+			 * IRQ line is actually already deasserted, we are done. If it is
+			 * still asserted, a new interrupt will be shortly triggered
+			 * through irqfd and injected into the guest.
+			 *
+			 * If, however, it's not possible to resample (no irqfd resampler
+			 * registered for this irq), then unconditionally inject this
+			 * pending interrupt into the guest, so the guest will not miss
+			 * an interrupt, although may get an extra unwanted interrupt.
+			 */
+			if (kvm_notify_irqfd_resampler(ioapic->kvm, KVM_IRQCHIP_IOAPIC, index))
+				ioapic->irr &= ~(1 << index);
+			else
+				ioapic_service(ioapic, index, false);
+		}
 		if (e->fields.delivery_mode == APIC_DM_FIXED) {
 			struct kvm_lapic_irq irq;
 
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 9f508c8e66e1..a9adf75344be 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1987,6 +1987,9 @@ int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args);
 #ifdef CONFIG_HAVE_KVM_IRQFD
 int kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args);
 void kvm_irqfd_release(struct kvm *kvm);
+bool kvm_notify_irqfd_resampler(struct kvm *kvm,
+				unsigned int irqchip,
+				unsigned int pin);
 void kvm_irq_routing_update(struct kvm *);
 #else
 static inline int kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args)
@@ -1995,6 +1998,13 @@ static inline int kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args)
 }
 
 static inline void kvm_irqfd_release(struct kvm *kvm) {}
+
+static inline bool kvm_notify_irqfd_resampler(struct kvm *kvm,
+					      unsigned int irqchip,
+					      unsigned int pin)
+{
+	return false;
+}
 #endif
 
 #else
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index 61aea70dd888..b0af834ffa95 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -55,6 +55,15 @@ irqfd_inject(struct work_struct *work)
 			    irqfd->gsi, 1, false);
 }
 
+static void irqfd_resampler_notify(struct kvm_kernel_irqfd_resampler *resampler)
+{
+	struct kvm_kernel_irqfd *irqfd;
+
+	list_for_each_entry_srcu(irqfd, &resampler->list, resampler_link,
+				 srcu_read_lock_held(&resampler->kvm->irq_srcu))
+		eventfd_signal(irqfd->resamplefd, 1);
+}
+
 /*
  * Since resampler irqfds share an IRQ source ID, we de-assert once
  * then notify all of the resampler irqfds using this GSI.  We can't
@@ -65,7 +74,6 @@ irqfd_resampler_ack(struct kvm_irq_ack_notifier *kian)
 {
 	struct kvm_kernel_irqfd_resampler *resampler;
 	struct kvm *kvm;
-	struct kvm_kernel_irqfd *irqfd;
 	int idx;
 
 	resampler = container_of(kian,
@@ -76,11 +84,7 @@ irqfd_resampler_ack(struct kvm_irq_ack_notifier *kian)
 		    resampler->notifier.gsi, 0, false);
 
 	idx = srcu_read_lock(&kvm->irq_srcu);
-
-	list_for_each_entry_srcu(irqfd, &resampler->list, resampler_link,
-	    srcu_read_lock_held(&kvm->irq_srcu))
-		eventfd_signal(irqfd->resamplefd, 1);
-
+	irqfd_resampler_notify(resampler);
 	srcu_read_unlock(&kvm->irq_srcu, idx);
 }
 
@@ -648,6 +652,31 @@ void kvm_irq_routing_update(struct kvm *kvm)
 	spin_unlock_irq(&kvm->irqfds.lock);
 }
 
+bool kvm_notify_irqfd_resampler(struct kvm *kvm,
+				unsigned int irqchip,
+				unsigned int pin)
+{
+	struct kvm_kernel_irqfd_resampler *resampler;
+	int gsi, idx;
+
+	idx = srcu_read_lock(&kvm->irq_srcu);
+	gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin);
+	if (gsi != -1) {
+		list_for_each_entry_srcu(resampler,
+					 &kvm->irqfds.resampler_list, link,
+					 srcu_read_lock_held(&kvm->irq_srcu)) {
+			if (resampler->notifier.gsi == gsi) {
+				irqfd_resampler_notify(resampler);
+				srcu_read_unlock(&kvm->irq_srcu, idx);
+				return true;
+			}
+		}
+	}
+	srcu_read_unlock(&kvm->irq_srcu, idx);
+
+	return false;
+}
+
 /*
  * create a host-wide workqueue for issuing deferred shutdown requests
  * aggregated from all vm* instances. We need our own isolated

From 0dc902267cb32ade1c29eed8208e566ad743518a Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Wed, 22 Mar 2023 07:12:20 -0700
Subject: [PATCH 051/173] KVM: x86: Suppress pending MMIO write exits if
 emulator detects exception

Clear vcpu->mmio_needed when injecting an exception from the emulator to
squash a (legitimate) warning about vcpu->mmio_needed being true at the
start of KVM_RUN without a callback being registered to complete the
userspace MMIO exit.  Suppressing the MMIO write exit is inarguably wrong
from an architectural perspective, but it is the least awful hack-a-fix
due to shortcomings in KVM's uAPI, not to mention that KVM already
suppresses MMIO writes in this scenario.

Outside of REP string instructions, KVM doesn't provide a way to resume
an instruction at the exact point where it was "interrupted" if said
instruction partially completed before encountering an MMIO access.  For
MMIO reads, KVM immediately exits to userspace upon detecting MMIO as
userspace provides the to-be-read value in a buffer, and so KVM can safely
(more or less) restart the instruction from the beginning.  When the
emulator re-encounters the MMIO read, KVM will service the MMIO by getting
the value from the buffer instead of exiting to userspace, i.e. KVM won't
put the vCPU into an infinite loop.

On an emulated MMIO write, KVM finishes the instruction before exiting to
userspace, as exiting immediately would ultimately hang the vCPU due to
the aforementioned shortcoming of KVM not being able to resume emulation
in the middle of an instruction.

For the vast majority of _emulated_ instructions, deferring the userspace
exit doesn't cause problems as very few x86 instructions (again ignoring
string operations) generate multiple writes.  But for instructions that
generate multiple writes, e.g. PUSHA (multiple pushes onto the stack),
deferring the exit effectively results in only the final write triggering
an exit to userspace.  KVM does support multiple MMIO "fragments", but
only for page splits; if an instruction performs multiple distinct MMIO
writes, the number of fragments gets reset when the next MMIO write comes
along and any previous MMIO writes are dropped.

Circling back to the warning, if a deferred MMIO write coincides with an
exception, e.g. in this case a #SS due to PUSHA underflowing the stack
after queueing a write to an MMIO page on a previous push, KVM injects
the exceptions and leaves the deferred MMIO pending without registering a
callback, thus triggering the splat.

Sweep the problem under the proverbial rug as dropping MMIO writes is not
unique to the exception scenario (see above), i.e. instructions like PUSHA
are fundamentally broken with respect to MMIO, and have been since KVM's
inception.

Reported-by: zhangjianguo <zhangjianguo18@huawei.com>
Reported-by: syzbot+760a73552f47a8cd0fd9@syzkaller.appspotmail.com
Reported-by: syzbot+8accb43ddc6bd1f5713a@syzkaller.appspotmail.com
Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20230322141220.2206241-1-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/x86.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 7713420abab0..45017576ad5e 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -8903,6 +8903,8 @@ restart:
 	}
 
 	if (ctxt->have_exception) {
+		WARN_ON_ONCE(vcpu->mmio_needed && !vcpu->mmio_is_write);
+		vcpu->mmio_needed = false;
 		r = 1;
 		inject_emulated_exception(vcpu);
 	} else if (vcpu->arch.pio.count) {

From 6c41468c7c12d74843bb414fc00307ea8a6318c3 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Wed, 22 Mar 2023 07:32:59 -0700
Subject: [PATCH 052/173] KVM: x86: Clear "has_error_code", not "error_code",
 for RM exception injection

When injecting an exception into a vCPU in Real Mode, suppress the error
code by clearing the flag that tracks whether the error code is valid, not
by clearing the error code itself.  The "typo" was introduced by recent
fix for SVM's funky Paged Real Mode.

Opportunistically hoist the logic above the tracepoint so that the trace
is coherent with respect to what is actually injected (this was also the
behavior prior to the buggy commit).

Fixes: b97f07458373 ("KVM: x86: determine if an exception has an error code only when injecting it.")
Cc: stable@vger.kernel.org
Cc: Maxim Levitsky <mlevitsk@redhat.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20230322143300.2209476-2-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/x86.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 45017576ad5e..7d6f98b7635f 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -9908,13 +9908,20 @@ int kvm_check_nested_events(struct kvm_vcpu *vcpu)
 
 static void kvm_inject_exception(struct kvm_vcpu *vcpu)
 {
+	/*
+	 * Suppress the error code if the vCPU is in Real Mode, as Real Mode
+	 * exceptions don't report error codes.  The presence of an error code
+	 * is carried with the exception and only stripped when the exception
+	 * is injected as intercepted #PF VM-Exits for AMD's Paged Real Mode do
+	 * report an error code despite the CPU being in Real Mode.
+	 */
+	vcpu->arch.exception.has_error_code &= is_protmode(vcpu);
+
 	trace_kvm_inj_exception(vcpu->arch.exception.vector,
 				vcpu->arch.exception.has_error_code,
 				vcpu->arch.exception.error_code,
 				vcpu->arch.exception.injected);
 
-	if (vcpu->arch.exception.error_code && !is_protmode(vcpu))
-		vcpu->arch.exception.error_code = false;
 	static_call(kvm_x86_inject_exception)(vcpu);
 }
 

From 80962ec912db56d323883154efc2297473e692cb Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Wed, 22 Mar 2023 07:33:00 -0700
Subject: [PATCH 053/173] KVM: nVMX: Do not report error code when synthesizing
 VM-Exit from Real Mode

Don't report an error code to L1 when synthesizing a nested VM-Exit and
L2 is in Real Mode.  Per Intel's SDM, regarding the error code valid bit:

  This bit is always 0 if the VM exit occurred while the logical processor
  was in real-address mode (CR0.PE=0).

The bug was introduced by a recent fix for AMD's Paged Real Mode, which
moved the error code suppression from the common "queue exception" path
to the "inject exception" path, but missed VMX's "synthesize VM-Exit"
path.

Fixes: b97f07458373 ("KVM: x86: determine if an exception has an error code only when injecting it.")
Cc: stable@vger.kernel.org
Cc: Maxim Levitsky <mlevitsk@redhat.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20230322143300.2209476-3-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx/nested.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 1bc2b80273c9..768487611db7 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -3868,7 +3868,12 @@ static void nested_vmx_inject_exception_vmexit(struct kvm_vcpu *vcpu)
 		exit_qual = 0;
 	}
 
-	if (ex->has_error_code) {
+	/*
+	 * Unlike AMD's Paged Real Mode, which reports an error code on #PF
+	 * VM-Exits even if the CPU is in Real Mode, Intel VMX never sets the
+	 * "has error code" flags on VM-Exit if the CPU is in Real Mode.
+	 */
+	if (ex->has_error_code && is_protmode(vcpu)) {
 		/*
 		 * Intel CPUs do not generate error codes with bits 31:16 set,
 		 * and more importantly VMX disallows setting bits 31:16 in the

From c56610a869bce03490faf4f157076370c71b8ae3 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Fri, 24 Mar 2023 14:33:42 +0100
Subject: [PATCH 054/173] ACPI: bus: Rework system-level device notification
 handling

For ACPI drivers that provide a ->notify() callback and set
ACPI_DRIVER_ALL_NOTIFY_EVENTS in their flags, that callback can be
invoked while either the ->add() or the ->remove() callback is running
without any synchronization at the bus type level which is counter to
the common-sense expectation that notification handling should only be
enabled when the driver is actually bound to the device.  As a result,
if the driver is not careful enough, it's ->notify() callback may crash
when it is invoked too early or too late [1].

This issue has been amplified by commit d6fb6ee1820c ("ACPI: bus: Drop
driver member of struct acpi_device") that made acpi_bus_notify() check
for the presence of the driver and its ->notify() callback directly
instead of using an extra driver pointer that was only set and cleared
by the bus type code, but it was present before that commit although
it was harder to reproduce then.

It can be addressed by using the observation that
acpi_device_install_notify_handler() can be modified to install the
handler for all types of events when ACPI_DRIVER_ALL_NOTIFY_EVENTS is
set in the driver flags, in which case acpi_bus_notify() will not need
to invoke the driver's ->notify() callback any more and that callback
will only be invoked after acpi_device_install_notify_handler() has run
and before acpi_device_remove_notify_handler() runs, which implies the
correct ordering with respect to the other ACPI driver callbacks.

Modify the code accordingly and while at it, drop two redundant local
variables from acpi_bus_notify() and turn its description comment into
a proper kerneldoc one.

Fixes: d6fb6ee1820c ("ACPI: bus: Drop driver member of struct acpi_device")
Link: https://lore.kernel.org/linux-acpi/9f6cba7a8a57e5a687c934e8e406e28c.squirrel@mail.panix.com # [1]
Reported-by: Pierre Asselin <pa@panix.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Tested-by: Pierre Asselin <pa@panix.com>
---
 drivers/acpi/bus.c | 85 +++++++++++++++++++++-------------------------
 1 file changed, 38 insertions(+), 47 deletions(-)

diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c
index 9531dd0fef50..a96da65057b1 100644
--- a/drivers/acpi/bus.c
+++ b/drivers/acpi/bus.c
@@ -459,85 +459,67 @@ out_free:
                              Notification Handling
    -------------------------------------------------------------------------- */
 
-/*
- * acpi_bus_notify
- * ---------------
- * Callback for all 'system-level' device notifications (values 0x00-0x7F).
+/**
+ * acpi_bus_notify - Global system-level (0x00-0x7F) notifications handler
+ * @handle: Target ACPI object.
+ * @type: Notification type.
+ * @data: Ignored.
+ *
+ * This only handles notifications related to device hotplug.
  */
 static void acpi_bus_notify(acpi_handle handle, u32 type, void *data)
 {
 	struct acpi_device *adev;
-	u32 ost_code = ACPI_OST_SC_NON_SPECIFIC_FAILURE;
-	bool hotplug_event = false;
 
 	switch (type) {
 	case ACPI_NOTIFY_BUS_CHECK:
 		acpi_handle_debug(handle, "ACPI_NOTIFY_BUS_CHECK event\n");
-		hotplug_event = true;
 		break;
 
 	case ACPI_NOTIFY_DEVICE_CHECK:
 		acpi_handle_debug(handle, "ACPI_NOTIFY_DEVICE_CHECK event\n");
-		hotplug_event = true;
 		break;
 
 	case ACPI_NOTIFY_DEVICE_WAKE:
 		acpi_handle_debug(handle, "ACPI_NOTIFY_DEVICE_WAKE event\n");
-		break;
+		return;
 
 	case ACPI_NOTIFY_EJECT_REQUEST:
 		acpi_handle_debug(handle, "ACPI_NOTIFY_EJECT_REQUEST event\n");
-		hotplug_event = true;
 		break;
 
 	case ACPI_NOTIFY_DEVICE_CHECK_LIGHT:
 		acpi_handle_debug(handle, "ACPI_NOTIFY_DEVICE_CHECK_LIGHT event\n");
 		/* TBD: Exactly what does 'light' mean? */
-		break;
+		return;
 
 	case ACPI_NOTIFY_FREQUENCY_MISMATCH:
 		acpi_handle_err(handle, "Device cannot be configured due "
 				"to a frequency mismatch\n");
-		break;
+		return;
 
 	case ACPI_NOTIFY_BUS_MODE_MISMATCH:
 		acpi_handle_err(handle, "Device cannot be configured due "
 				"to a bus mode mismatch\n");
-		break;
+		return;
 
 	case ACPI_NOTIFY_POWER_FAULT:
 		acpi_handle_err(handle, "Device has suffered a power fault\n");
-		break;
+		return;
 
 	default:
 		acpi_handle_debug(handle, "Unknown event type 0x%x\n", type);
-		break;
-	}
-
-	adev = acpi_get_acpi_dev(handle);
-	if (!adev)
-		goto err;
-
-	if (adev->dev.driver) {
-		struct acpi_driver *driver = to_acpi_driver(adev->dev.driver);
-
-		if (driver && driver->ops.notify &&
-		    (driver->flags & ACPI_DRIVER_ALL_NOTIFY_EVENTS))
-			driver->ops.notify(adev, type);
-	}
-
-	if (!hotplug_event) {
-		acpi_put_acpi_dev(adev);
 		return;
 	}
 
-	if (ACPI_SUCCESS(acpi_hotplug_schedule(adev, type)))
+	adev = acpi_get_acpi_dev(handle);
+
+	if (adev && ACPI_SUCCESS(acpi_hotplug_schedule(adev, type)))
 		return;
 
 	acpi_put_acpi_dev(adev);
 
- err:
-	acpi_evaluate_ost(handle, type, ost_code, NULL);
+	acpi_evaluate_ost(handle, type, ACPI_OST_SC_NON_SPECIFIC_FAILURE, NULL);
 }
 
 static void acpi_notify_device(acpi_handle handle, u32 event, void *data)
@@ -562,42 +544,51 @@ static u32 acpi_device_fixed_event(void *data)
 	return ACPI_INTERRUPT_HANDLED;
 }
 
-static int acpi_device_install_notify_handler(struct acpi_device *device)
+static int acpi_device_install_notify_handler(struct acpi_device *device,
+					      struct acpi_driver *acpi_drv)
 {
 	acpi_status status;
 
-	if (device->device_type == ACPI_BUS_TYPE_POWER_BUTTON)
+	if (device->device_type == ACPI_BUS_TYPE_POWER_BUTTON) {
 		status =
 		    acpi_install_fixed_event_handler(ACPI_EVENT_POWER_BUTTON,
 						     acpi_device_fixed_event,
 						     device);
-	else if (device->device_type == ACPI_BUS_TYPE_SLEEP_BUTTON)
+	} else if (device->device_type == ACPI_BUS_TYPE_SLEEP_BUTTON) {
 		status =
 		    acpi_install_fixed_event_handler(ACPI_EVENT_SLEEP_BUTTON,
 						     acpi_device_fixed_event,
 						     device);
-	else
-		status = acpi_install_notify_handler(device->handle,
-						     ACPI_DEVICE_NOTIFY,
+	} else {
+		u32 type = acpi_drv->flags & ACPI_DRIVER_ALL_NOTIFY_EVENTS ?
+				ACPI_ALL_NOTIFY : ACPI_DEVICE_NOTIFY;
+
+		status = acpi_install_notify_handler(device->handle, type,
 						     acpi_notify_device,
 						     device);
+	}
 
 	if (ACPI_FAILURE(status))
 		return -EINVAL;
 	return 0;
 }
 
-static void acpi_device_remove_notify_handler(struct acpi_device *device)
+static void acpi_device_remove_notify_handler(struct acpi_device *device,
+					      struct acpi_driver *acpi_drv)
 {
-	if (device->device_type == ACPI_BUS_TYPE_POWER_BUTTON)
+	if (device->device_type == ACPI_BUS_TYPE_POWER_BUTTON) {
 		acpi_remove_fixed_event_handler(ACPI_EVENT_POWER_BUTTON,
 						acpi_device_fixed_event);
-	else if (device->device_type == ACPI_BUS_TYPE_SLEEP_BUTTON)
+	} else if (device->device_type == ACPI_BUS_TYPE_SLEEP_BUTTON) {
 		acpi_remove_fixed_event_handler(ACPI_EVENT_SLEEP_BUTTON,
 						acpi_device_fixed_event);
-	else
-		acpi_remove_notify_handler(device->handle, ACPI_DEVICE_NOTIFY,
+	} else {
+		u32 type = acpi_drv->flags & ACPI_DRIVER_ALL_NOTIFY_EVENTS ?
+				ACPI_ALL_NOTIFY : ACPI_DEVICE_NOTIFY;
+
+		acpi_remove_notify_handler(device->handle, type,
 					   acpi_notify_device);
+	}
 }
 
 /* Handle events targeting \_SB device (at present only graceful shutdown) */
@@ -1039,7 +1030,7 @@ static int acpi_device_probe(struct device *dev)
 		 acpi_drv->name, acpi_dev->pnp.bus_id);
 
 	if (acpi_drv->ops.notify) {
-		ret = acpi_device_install_notify_handler(acpi_dev);
+		ret = acpi_device_install_notify_handler(acpi_dev, acpi_drv);
 		if (ret) {
 			if (acpi_drv->ops.remove)
 				acpi_drv->ops.remove(acpi_dev);
@@ -1062,7 +1053,7 @@ static void acpi_device_remove(struct device *dev)
 	struct acpi_driver *acpi_drv = to_acpi_driver(dev->driver);
 
 	if (acpi_drv->ops.notify)
-		acpi_device_remove_notify_handler(acpi_dev);
+		acpi_device_remove_notify_handler(acpi_dev, acpi_drv);
 
 	if (acpi_drv->ops.remove)
 		acpi_drv->ops.remove(acpi_dev);

From bb430b69422640891b0b8db762885730579a4145 Mon Sep 17 00:00:00 2001
From: Alyssa Ross <hi@alyssa.is>
Date: Mon, 20 Mar 2023 13:54:30 +0100
Subject: [PATCH 055/173] loop: LOOP_CONFIGURE: send uevents for partitions

LOOP_CONFIGURE is, as far as I understand it, supposed to be a way to
combine LOOP_SET_FD and LOOP_SET_STATUS64 into a single syscall.  When
using LOOP_SET_FD+LOOP_SET_STATUS64, a single uevent would be sent for
each partition found on the loop device after the second ioctl(), but
when using LOOP_CONFIGURE, no such uevent was being sent.

In the old setup, uevents are disabled for LOOP_SET_FD, but not for
LOOP_SET_STATUS64.  This makes sense, as it prevents uevents being
sent for a partially configured device during LOOP_SET_FD - they're
only sent at the end of LOOP_SET_STATUS64.  But for LOOP_CONFIGURE,
uevents were disabled for the entire operation, so that final
notification was never issued.  To fix this, reduce the critical
section to exclude the loop_reread_partitions() call, which causes
the uevents to be issued, to after uevents are re-enabled, matching
the behaviour of the LOOP_SET_FD+LOOP_SET_STATUS64 combination.

I noticed this because Busybox's losetup program recently changed from
using LOOP_SET_FD+LOOP_SET_STATUS64 to LOOP_CONFIGURE, and this broke
my setup, for which I want a notification from the kernel any time a
new partition becomes available.

Signed-off-by: Alyssa Ross <hi@alyssa.is>
[hch: reduced the critical section]
Signed-off-by: Christoph Hellwig <hch@lst.de>
Fixes: 3448914e8cc5 ("loop: Add LOOP_CONFIGURE ioctl")
Link: https://lore.kernel.org/r/20230320125430.55367-1-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/loop.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 28eb59fd71ca..bc31bb7072a2 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -1010,9 +1010,6 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
 	/* This is safe, since we have a reference from open(). */
 	__module_get(THIS_MODULE);
 
-	/* suppress uevents while reconfiguring the device */
-	dev_set_uevent_suppress(disk_to_dev(lo->lo_disk), 1);
-
 	/*
 	 * If we don't hold exclusive handle for the device, upgrade to it
 	 * here to avoid changing device under exclusive owner.
@@ -1067,6 +1064,9 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
 		}
 	}
 
+	/* suppress uevents while reconfiguring the device */
+	dev_set_uevent_suppress(disk_to_dev(lo->lo_disk), 1);
+
 	disk_force_media_change(lo->lo_disk, DISK_EVENT_MEDIA_CHANGE);
 	set_disk_ro(lo->lo_disk, (lo->lo_flags & LO_FLAGS_READ_ONLY) != 0);
 
@@ -1109,17 +1109,17 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
 	if (partscan)
 		clear_bit(GD_SUPPRESS_PART_SCAN, &lo->lo_disk->state);
 
+	/* enable and uncork uevent now that we are done */
+	dev_set_uevent_suppress(disk_to_dev(lo->lo_disk), 0);
+
 	loop_global_unlock(lo, is_loop);
 	if (partscan)
 		loop_reread_partitions(lo);
+
 	if (!(mode & FMODE_EXCL))
 		bd_abort_claiming(bdev, loop_configure);
 
-	error = 0;
-done:
-	/* enable and uncork uevent now that we are done */
-	dev_set_uevent_suppress(disk_to_dev(lo->lo_disk), 0);
-	return error;
+	return 0;
 
 out_unlock:
 	loop_global_unlock(lo, is_loop);
@@ -1130,7 +1130,7 @@ out_putf:
 	fput(file);
 	/* This is safe: open() is still holding a reference. */
 	module_put(THIS_MODULE);
-	goto done;
+	return error;
 }
 
 static void __loop_clr_fd(struct loop_device *lo, bool release)

From 2f1a6be12ab6c8470d5776e68644726c94257c54 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Wed, 22 Mar 2023 10:33:28 +0000
Subject: [PATCH 056/173] btrfs: fix race between quota disable and quota
 assign ioctls

The quota assign ioctl can currently run in parallel with a quota disable
ioctl call. The assign ioctl uses the quota root, while the disable ioctl
frees that root, and therefore we can have a use-after-free triggered in
the assign ioctl, leading to a trace like the following when KASAN is
enabled:

  [672.723][T736] BUG: KASAN: slab-use-after-free in btrfs_search_slot+0x2962/0x2db0
  [672.723][T736] Read of size 8 at addr ffff888022ec0208 by task btrfs_search_sl/27736
  [672.724][T736]
  [672.725][T736] CPU: 1 PID: 27736 Comm: btrfs_search_sl Not tainted 6.3.0-rc3 #37
  [672.723][T736] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.15.0-1 04/01/2014
  [672.727][T736] Call Trace:
  [672.728][T736]  <TASK>
  [672.728][T736]  dump_stack_lvl+0xd9/0x150
  [672.725][T736]  print_report+0xc1/0x5e0
  [672.720][T736]  ? __virt_addr_valid+0x61/0x2e0
  [672.727][T736]  ? __phys_addr+0xc9/0x150
  [672.725][T736]  ? btrfs_search_slot+0x2962/0x2db0
  [672.722][T736]  kasan_report+0xc0/0xf0
  [672.729][T736]  ? btrfs_search_slot+0x2962/0x2db0
  [672.724][T736]  btrfs_search_slot+0x2962/0x2db0
  [672.723][T736]  ? fs_reclaim_acquire+0xba/0x160
  [672.722][T736]  ? split_leaf+0x13d0/0x13d0
  [672.726][T736]  ? rcu_is_watching+0x12/0xb0
  [672.723][T736]  ? kmem_cache_alloc+0x338/0x3c0
  [672.722][T736]  update_qgroup_status_item+0xf7/0x320
  [672.724][T736]  ? add_qgroup_rb+0x3d0/0x3d0
  [672.739][T736]  ? do_raw_spin_lock+0x12d/0x2b0
  [672.730][T736]  ? spin_bug+0x1d0/0x1d0
  [672.737][T736]  btrfs_run_qgroups+0x5de/0x840
  [672.730][T736]  ? btrfs_qgroup_rescan_worker+0xa70/0xa70
  [672.738][T736]  ? __del_qgroup_relation+0x4ba/0xe00
  [672.738][T736]  btrfs_ioctl+0x3d58/0x5d80
  [672.735][T736]  ? tomoyo_path_number_perm+0x16a/0x550
  [672.737][T736]  ? tomoyo_execute_permission+0x4a0/0x4a0
  [672.731][T736]  ? btrfs_ioctl_get_supported_features+0x50/0x50
  [672.737][T736]  ? __sanitizer_cov_trace_switch+0x54/0x90
  [672.734][T736]  ? do_vfs_ioctl+0x132/0x1660
  [672.730][T736]  ? vfs_fileattr_set+0xc40/0xc40
  [672.730][T736]  ? _raw_spin_unlock_irq+0x2e/0x50
  [672.732][T736]  ? sigprocmask+0xf2/0x340
  [672.737][T736]  ? __fget_files+0x26a/0x480
  [672.732][T736]  ? bpf_lsm_file_ioctl+0x9/0x10
  [672.738][T736]  ? btrfs_ioctl_get_supported_features+0x50/0x50
  [672.736][T736]  __x64_sys_ioctl+0x198/0x210
  [672.736][T736]  do_syscall_64+0x39/0xb0
  [672.731][T736]  entry_SYSCALL_64_after_hwframe+0x63/0xcd
  [672.739][T736] RIP: 0033:0x4556ad
  [672.742][T736]  </TASK>
  [672.743][T736]
  [672.748][T736] Allocated by task 27677:
  [672.743][T736]  kasan_save_stack+0x22/0x40
  [672.741][T736]  kasan_set_track+0x25/0x30
  [672.741][T736]  __kasan_kmalloc+0xa4/0xb0
  [672.749][T736]  btrfs_alloc_root+0x48/0x90
  [672.746][T736]  btrfs_create_tree+0x146/0xa20
  [672.744][T736]  btrfs_quota_enable+0x461/0x1d20
  [672.743][T736]  btrfs_ioctl+0x4a1c/0x5d80
  [672.747][T736]  __x64_sys_ioctl+0x198/0x210
  [672.749][T736]  do_syscall_64+0x39/0xb0
  [672.744][T736]  entry_SYSCALL_64_after_hwframe+0x63/0xcd
  [672.756][T736]
  [672.757][T736] Freed by task 27677:
  [672.759][T736]  kasan_save_stack+0x22/0x40
  [672.759][T736]  kasan_set_track+0x25/0x30
  [672.756][T736]  kasan_save_free_info+0x2e/0x50
  [672.751][T736]  ____kasan_slab_free+0x162/0x1c0
  [672.758][T736]  slab_free_freelist_hook+0x89/0x1c0
  [672.752][T736]  __kmem_cache_free+0xaf/0x2e0
  [672.752][T736]  btrfs_put_root+0x1ff/0x2b0
  [672.759][T736]  btrfs_quota_disable+0x80a/0xbc0
  [672.752][T736]  btrfs_ioctl+0x3e5f/0x5d80
  [672.756][T736]  __x64_sys_ioctl+0x198/0x210
  [672.753][T736]  do_syscall_64+0x39/0xb0
  [672.765][T736]  entry_SYSCALL_64_after_hwframe+0x63/0xcd
  [672.769][T736]
  [672.768][T736] The buggy address belongs to the object at ffff888022ec0000
  [672.768][T736]  which belongs to the cache kmalloc-4k of size 4096
  [672.769][T736] The buggy address is located 520 bytes inside of
  [672.769][T736]  freed 4096-byte region [ffff888022ec0000, ffff888022ec1000)
  [672.760][T736]
  [672.764][T736] The buggy address belongs to the physical page:
  [672.761][T736] page:ffffea00008bb000 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x22ec0
  [672.766][T736] head:ffffea00008bb000 order:3 entire_mapcount:0 nr_pages_mapped:0 pincount:0
  [672.779][T736] flags: 0xfff00000010200(slab|head|node=0|zone=1|lastcpupid=0x7ff)
  [672.770][T736] raw: 00fff00000010200 ffff888012842140 ffffea000054ba00 dead000000000002
  [672.770][T736] raw: 0000000000000000 0000000000040004 00000001ffffffff 0000000000000000
  [672.771][T736] page dumped because: kasan: bad access detected
  [672.778][T736] page_owner tracks the page as allocated
  [672.777][T736] page last allocated via order 3, migratetype Unmovable, gfp_mask 0xd2040(__GFP_IO|__GFP_NOWARN|__GFP_NORETRY|__GFP_COMP|__GFP_NOMEMALLOC), pid 88
  [672.779][T736]  get_page_from_freelist+0x119c/0x2d50
  [672.779][T736]  __alloc_pages+0x1cb/0x4a0
  [672.776][T736]  alloc_pages+0x1aa/0x270
  [672.773][T736]  allocate_slab+0x260/0x390
  [672.771][T736]  ___slab_alloc+0xa9a/0x13e0
  [672.778][T736]  __slab_alloc.constprop.0+0x56/0xb0
  [672.771][T736]  __kmem_cache_alloc_node+0x136/0x320
  [672.789][T736]  __kmalloc+0x4e/0x1a0
  [672.783][T736]  tomoyo_realpath_from_path+0xc3/0x600
  [672.781][T736]  tomoyo_path_perm+0x22f/0x420
  [672.782][T736]  tomoyo_path_unlink+0x92/0xd0
  [672.780][T736]  security_path_unlink+0xdb/0x150
  [672.788][T736]  do_unlinkat+0x377/0x680
  [672.788][T736]  __x64_sys_unlink+0xca/0x110
  [672.789][T736]  do_syscall_64+0x39/0xb0
  [672.783][T736]  entry_SYSCALL_64_after_hwframe+0x63/0xcd
  [672.784][T736] page last free stack trace:
  [672.787][T736]  free_pcp_prepare+0x4e5/0x920
  [672.787][T736]  free_unref_page+0x1d/0x4e0
  [672.784][T736]  __unfreeze_partials+0x17c/0x1a0
  [672.797][T736]  qlist_free_all+0x6a/0x180
  [672.796][T736]  kasan_quarantine_reduce+0x189/0x1d0
  [672.797][T736]  __kasan_slab_alloc+0x64/0x90
  [672.793][T736]  kmem_cache_alloc+0x17c/0x3c0
  [672.799][T736]  getname_flags.part.0+0x50/0x4e0
  [672.799][T736]  getname_flags+0x9e/0xe0
  [672.792][T736]  vfs_fstatat+0x77/0xb0
  [672.791][T736]  __do_sys_newlstat+0x84/0x100
  [672.798][T736]  do_syscall_64+0x39/0xb0
  [672.796][T736]  entry_SYSCALL_64_after_hwframe+0x63/0xcd
  [672.790][T736]
  [672.791][T736] Memory state around the buggy address:
  [672.799][T736]  ffff888022ec0100: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
  [672.805][T736]  ffff888022ec0180: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
  [672.802][T736] >ffff888022ec0200: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
  [672.809][T736]                       ^
  [672.809][T736]  ffff888022ec0280: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
  [672.809][T736]  ffff888022ec0300: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb

Fix this by having the qgroup assign ioctl take the qgroup ioctl mutex
before calling btrfs_run_qgroups(), which is what all qgroup ioctls should
call.

Reported-by: butt3rflyh4ck <butterflyhuangxx@gmail.com>
Link: https://lore.kernel.org/linux-btrfs/CAFcO6XN3VD8ogmHwqRk4kbiwtpUSNySu2VAxN8waEPciCHJvMA@mail.gmail.com/
CC: stable@vger.kernel.org # 5.10+
Reviewed-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: Filipe Manana <fdmanana@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/ioctl.c  |  2 ++
 fs/btrfs/qgroup.c | 11 ++++++++++-
 2 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 439a5bf5ebc6..f9dbf41fc803 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -3732,7 +3732,9 @@ static long btrfs_ioctl_qgroup_assign(struct file *file, void __user *arg)
 	}
 
 	/* update qgroup status and info */
+	mutex_lock(&fs_info->qgroup_ioctl_lock);
 	err = btrfs_run_qgroups(trans);
+	mutex_unlock(&fs_info->qgroup_ioctl_lock);
 	if (err < 0)
 		btrfs_handle_fs_error(fs_info, err,
 				      "failed to update qgroup status and info");
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 52a7d2fa2284..f41da7ac360d 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -2828,13 +2828,22 @@ cleanup:
 }
 
 /*
- * called from commit_transaction. Writes all changed qgroups to disk.
+ * Writes all changed qgroups to disk.
+ * Called by the transaction commit path and the qgroup assign ioctl.
  */
 int btrfs_run_qgroups(struct btrfs_trans_handle *trans)
 {
 	struct btrfs_fs_info *fs_info = trans->fs_info;
 	int ret = 0;
 
+	/*
+	 * In case we are called from the qgroup assign ioctl, assert that we
+	 * are holding the qgroup_ioctl_lock, otherwise we can race with a quota
+	 * disable operation (ioctl) and access a freed quota root.
+	 */
+	if (trans->transaction->state != TRANS_STATE_COMMIT_DOING)
+		lockdep_assert_held(&fs_info->qgroup_ioctl_lock);
+
 	if (!fs_info->quota_root)
 		return ret;
 

From 50d281fc434cb8e2497f5e70a309ccca6b1a09f0 Mon Sep 17 00:00:00 2001
From: Anand Jain <anand.jain@oracle.com>
Date: Thu, 23 Mar 2023 15:56:48 +0800
Subject: [PATCH 057/173] btrfs: scan device in non-exclusive mode

This fixes mkfs/mount/check failures due to race with systemd-udevd
scan.

During the device scan initiated by systemd-udevd, other user space
EXCL operations such as mkfs, mount, or check may get blocked and result
in a "Device or resource busy" error. This is because the device
scan process opens the device with the EXCL flag in the kernel.

Two reports were received:

 - btrfs/179 test case, where the fsck command failed with the -EBUSY
   error

 - LTP pwritev03 test case, where mkfs.vfs failed with
   the -EBUSY error, when mkfs.vfs tried to overwrite old btrfs filesystem
   on the device.

In both cases, fsck and mkfs (respectively) were racing with a
systemd-udevd device scan, and systemd-udevd won, resulting in the
-EBUSY error for fsck and mkfs.

Reproducing the problem has been difficult because there is a very
small window during which these userspace threads can race to
acquire the exclusive device open. Even on the system where the problem
was observed, the problem occurrences were anywhere between 10 to 400
iterations and chances of reproducing decreases with debug printk()s.

However, an exclusive device open is unnecessary for the scan process,
as there are no write operations on the device during scan. Furthermore,
during the mount process, the superblock is re-read in the below
function call chain:

  btrfs_mount_root
   btrfs_open_devices
    open_fs_devices
     btrfs_open_one_device
       btrfs_get_bdev_and_sb

So, to fix this issue, removes the FMODE_EXCL flag from the scan
operation, and add a comment.

The case where mkfs may still write to the device and a scan is running,
the btrfs signature is not written at that time so scan will not
recognize such device.

Reported-by: Sherry Yang <sherry.yang@oracle.com>
Reported-by: kernel test robot <oliver.sang@intel.com>
Link: https://lore.kernel.org/oe-lkp/202303170839.fdf23068-oliver.sang@intel.com
CC: stable@vger.kernel.org # 5.4+
Signed-off-by: Anand Jain <anand.jain@oracle.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/volumes.c | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 6d0124b6e79e..ac0e8fb92fc8 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1366,8 +1366,17 @@ struct btrfs_device *btrfs_scan_one_device(const char *path, fmode_t flags,
 	 * So, we need to add a special mount option to scan for
 	 * later supers, using BTRFS_SUPER_MIRROR_MAX instead
 	 */
-	flags |= FMODE_EXCL;
 
+	/*
+	 * Avoid using flag |= FMODE_EXCL here, as the systemd-udev may
+	 * initiate the device scan which may race with the user's mount
+	 * or mkfs command, resulting in failure.
+	 * Since the device scan is solely for reading purposes, there is
+	 * no need for FMODE_EXCL. Additionally, the devices are read again
+	 * during the mount process. It is ok to get some inconsistent
+	 * values temporarily, as the device paths of the fsid are the only
+	 * required information for assembling the volume.
+	 */
 	bdev = blkdev_get_by_path(path, flags, holder);
 	if (IS_ERR(bdev))
 		return ERR_CAST(bdev);

From 2d82a40aa7d6fcae0250ec68b8566cdee7bfd44c Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Wed, 22 Mar 2023 09:46:34 +0000
Subject: [PATCH 058/173] btrfs: fix deadlock when aborting transaction during
 relocation with scrub

Before relocating a block group we pause scrub, then do the relocation and
then unpause scrub. The relocation process requires starting and committing
a transaction, and if we have a failure in the critical section of the
transaction commit path (transaction state >= TRANS_STATE_COMMIT_START),
we will deadlock if there is a paused scrub.

That results in stack traces like the following:

  [42.479] BTRFS info (device sdc): relocating block group 53876686848 flags metadata|raid6
  [42.936] BTRFS warning (device sdc): Skipping commit of aborted transaction.
  [42.936] ------------[ cut here ]------------
  [42.936] BTRFS: Transaction aborted (error -28)
  [42.936] WARNING: CPU: 11 PID: 346822 at fs/btrfs/transaction.c:1977 btrfs_commit_transaction+0xcc8/0xeb0 [btrfs]
  [42.936] Modules linked in: dm_flakey dm_mod loop btrfs (...)
  [42.936] CPU: 11 PID: 346822 Comm: btrfs Tainted: G        W          6.3.0-rc2-btrfs-next-127+ #1
  [42.936] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.14.0-0-g155821a1990b-prebuilt.qemu.org 04/01/2014
  [42.936] RIP: 0010:btrfs_commit_transaction+0xcc8/0xeb0 [btrfs]
  [42.936] Code: ff ff 45 8b (...)
  [42.936] RSP: 0018:ffffb58649633b48 EFLAGS: 00010282
  [42.936] RAX: 0000000000000000 RBX: ffff8be6ef4d5bd8 RCX: 0000000000000000
  [42.936] RDX: 0000000000000002 RSI: ffffffffb35e7782 RDI: 00000000ffffffff
  [42.936] RBP: ffff8be6ef4d5c98 R08: 0000000000000000 R09: ffffb586496339e8
  [42.936] R10: 0000000000000001 R11: 0000000000000001 R12: ffff8be6d38c7c00
  [42.936] R13: 00000000ffffffe4 R14: ffff8be6c268c000 R15: ffff8be6ef4d5cf0
  [42.936] FS:  00007f381a82b340(0000) GS:ffff8beddfcc0000(0000) knlGS:0000000000000000
  [42.936] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
  [42.936] CR2: 00007f1e35fb7638 CR3: 0000000117680006 CR4: 0000000000370ee0
  [42.936] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
  [42.936] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
  [42.936] Call Trace:
  [42.936]  <TASK>
  [42.936]  ? start_transaction+0xcb/0x610 [btrfs]
  [42.936]  prepare_to_relocate+0x111/0x1a0 [btrfs]
  [42.936]  relocate_block_group+0x57/0x5d0 [btrfs]
  [42.936]  ? btrfs_wait_nocow_writers+0x25/0xb0 [btrfs]
  [42.936]  btrfs_relocate_block_group+0x248/0x3c0 [btrfs]
  [42.936]  ? __pfx_autoremove_wake_function+0x10/0x10
  [42.936]  btrfs_relocate_chunk+0x3b/0x150 [btrfs]
  [42.936]  btrfs_balance+0x8ff/0x11d0 [btrfs]
  [42.936]  ? __kmem_cache_alloc_node+0x14a/0x410
  [42.936]  btrfs_ioctl+0x2334/0x32c0 [btrfs]
  [42.937]  ? mod_objcg_state+0xd2/0x360
  [42.937]  ? refill_obj_stock+0xb0/0x160
  [42.937]  ? seq_release+0x25/0x30
  [42.937]  ? __rseq_handle_notify_resume+0x3b5/0x4b0
  [42.937]  ? percpu_counter_add_batch+0x2e/0xa0
  [42.937]  ? __x64_sys_ioctl+0x88/0xc0
  [42.937]  __x64_sys_ioctl+0x88/0xc0
  [42.937]  do_syscall_64+0x38/0x90
  [42.937]  entry_SYSCALL_64_after_hwframe+0x72/0xdc
  [42.937] RIP: 0033:0x7f381a6ffe9b
  [42.937] Code: 00 48 89 44 24 (...)
  [42.937] RSP: 002b:00007ffd45ecf060 EFLAGS: 00000246 ORIG_RAX: 0000000000000010
  [42.937] RAX: ffffffffffffffda RBX: 0000000000000001 RCX: 00007f381a6ffe9b
  [42.937] RDX: 00007ffd45ecf150 RSI: 00000000c4009420 RDI: 0000000000000003
  [42.937] RBP: 0000000000000003 R08: 0000000000000013 R09: 0000000000000000
  [42.937] R10: 00007f381a60c878 R11: 0000000000000246 R12: 00007ffd45ed0423
  [42.937] R13: 00007ffd45ecf150 R14: 0000000000000000 R15: 00007ffd45ecf148
  [42.937]  </TASK>
  [42.937] ---[ end trace 0000000000000000 ]---
  [42.937] BTRFS: error (device sdc: state A) in cleanup_transaction:1977: errno=-28 No space left
  [59.196] INFO: task btrfs:346772 blocked for more than 120 seconds.
  [59.196]       Tainted: G        W          6.3.0-rc2-btrfs-next-127+ #1
  [59.196] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
  [59.196] task:btrfs           state:D stack:0     pid:346772 ppid:1      flags:0x00004002
  [59.196] Call Trace:
  [59.196]  <TASK>
  [59.196]  __schedule+0x392/0xa70
  [59.196]  ? __pv_queued_spin_lock_slowpath+0x165/0x370
  [59.196]  schedule+0x5d/0xd0
  [59.196]  __scrub_blocked_if_needed+0x74/0xc0 [btrfs]
  [59.197]  ? __pfx_autoremove_wake_function+0x10/0x10
  [59.197]  scrub_pause_off+0x21/0x50 [btrfs]
  [59.197]  scrub_simple_mirror+0x1c7/0x950 [btrfs]
  [59.197]  ? scrub_parity_put+0x1a5/0x1d0 [btrfs]
  [59.198]  ? __pfx_autoremove_wake_function+0x10/0x10
  [59.198]  scrub_stripe+0x20d/0x740 [btrfs]
  [59.198]  scrub_chunk+0xc4/0x130 [btrfs]
  [59.198]  scrub_enumerate_chunks+0x3e4/0x7a0 [btrfs]
  [59.198]  ? __pfx_autoremove_wake_function+0x10/0x10
  [59.198]  btrfs_scrub_dev+0x236/0x6a0 [btrfs]
  [59.199]  ? btrfs_ioctl+0xd97/0x32c0 [btrfs]
  [59.199]  ? _copy_from_user+0x7b/0x80
  [59.199]  btrfs_ioctl+0xde1/0x32c0 [btrfs]
  [59.199]  ? refill_stock+0x33/0x50
  [59.199]  ? should_failslab+0xa/0x20
  [59.199]  ? kmem_cache_alloc_node+0x151/0x460
  [59.199]  ? alloc_io_context+0x1b/0x80
  [59.199]  ? preempt_count_add+0x70/0xa0
  [59.199]  ? __x64_sys_ioctl+0x88/0xc0
  [59.199]  __x64_sys_ioctl+0x88/0xc0
  [59.199]  do_syscall_64+0x38/0x90
  [59.199]  entry_SYSCALL_64_after_hwframe+0x72/0xdc
  [59.199] RIP: 0033:0x7f82ffaffe9b
  [59.199] RSP: 002b:00007f82ff9fcc50 EFLAGS: 00000246 ORIG_RAX: 0000000000000010
  [59.199] RAX: ffffffffffffffda RBX: 000055b191e36310 RCX: 00007f82ffaffe9b
  [59.199] RDX: 000055b191e36310 RSI: 00000000c400941b RDI: 0000000000000003
  [59.199] RBP: 0000000000000000 R08: 00007fff1575016f R09: 0000000000000000
  [59.199] R10: 0000000000000000 R11: 0000000000000246 R12: 00007f82ff9fd640
  [59.199] R13: 000000000000006b R14: 00007f82ffa87580 R15: 0000000000000000
  [59.199]  </TASK>
  [59.199] INFO: task btrfs:346773 blocked for more than 120 seconds.
  [59.200]       Tainted: G        W          6.3.0-rc2-btrfs-next-127+ #1
  [59.200] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
  [59.201] task:btrfs           state:D stack:0     pid:346773 ppid:1      flags:0x00004002
  [59.201] Call Trace:
  [59.201]  <TASK>
  [59.201]  __schedule+0x392/0xa70
  [59.201]  ? __pv_queued_spin_lock_slowpath+0x165/0x370
  [59.201]  schedule+0x5d/0xd0
  [59.201]  __scrub_blocked_if_needed+0x74/0xc0 [btrfs]
  [59.201]  ? __pfx_autoremove_wake_function+0x10/0x10
  [59.201]  scrub_pause_off+0x21/0x50 [btrfs]
  [59.202]  scrub_simple_mirror+0x1c7/0x950 [btrfs]
  [59.202]  ? scrub_parity_put+0x1a5/0x1d0 [btrfs]
  [59.202]  ? __pfx_autoremove_wake_function+0x10/0x10
  [59.202]  scrub_stripe+0x20d/0x740 [btrfs]
  [59.202]  scrub_chunk+0xc4/0x130 [btrfs]
  [59.203]  scrub_enumerate_chunks+0x3e4/0x7a0 [btrfs]
  [59.203]  ? __pfx_autoremove_wake_function+0x10/0x10
  [59.203]  btrfs_scrub_dev+0x236/0x6a0 [btrfs]
  [59.203]  ? btrfs_ioctl+0xd97/0x32c0 [btrfs]
  [59.203]  ? _copy_from_user+0x7b/0x80
  [59.203]  btrfs_ioctl+0xde1/0x32c0 [btrfs]
  [59.204]  ? should_failslab+0xa/0x20
  [59.204]  ? kmem_cache_alloc_node+0x151/0x460
  [59.204]  ? alloc_io_context+0x1b/0x80
  [59.204]  ? preempt_count_add+0x70/0xa0
  [59.204]  ? __x64_sys_ioctl+0x88/0xc0
  [59.204]  __x64_sys_ioctl+0x88/0xc0
  [59.204]  do_syscall_64+0x38/0x90
  [59.204]  entry_SYSCALL_64_after_hwframe+0x72/0xdc
  [59.204] RIP: 0033:0x7f82ffaffe9b
  [59.204] RSP: 002b:00007f82ff1fbc50 EFLAGS: 00000246 ORIG_RAX: 0000000000000010
  [59.204] RAX: ffffffffffffffda RBX: 000055b191e36790 RCX: 00007f82ffaffe9b
  [59.204] RDX: 000055b191e36790 RSI: 00000000c400941b RDI: 0000000000000003
  [59.204] RBP: 0000000000000000 R08: 00007fff1575016f R09: 0000000000000000
  [59.204] R10: 0000000000000000 R11: 0000000000000246 R12: 00007f82ff1fc640
  [59.204] R13: 000000000000006b R14: 00007f82ffa87580 R15: 0000000000000000
  [59.204]  </TASK>
  [59.204] INFO: task btrfs:346774 blocked for more than 120 seconds.
  [59.205]       Tainted: G        W          6.3.0-rc2-btrfs-next-127+ #1
  [59.205] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
  [59.206] task:btrfs           state:D stack:0     pid:346774 ppid:1      flags:0x00004002
  [59.206] Call Trace:
  [59.206]  <TASK>
  [59.206]  __schedule+0x392/0xa70
  [59.206]  schedule+0x5d/0xd0
  [59.206]  __scrub_blocked_if_needed+0x74/0xc0 [btrfs]
  [59.206]  ? __pfx_autoremove_wake_function+0x10/0x10
  [59.206]  scrub_pause_off+0x21/0x50 [btrfs]
  [59.207]  scrub_simple_mirror+0x1c7/0x950 [btrfs]
  [59.207]  ? scrub_parity_put+0x1a5/0x1d0 [btrfs]
  [59.207]  ? __pfx_autoremove_wake_function+0x10/0x10
  [59.207]  scrub_stripe+0x20d/0x740 [btrfs]
  [59.208]  scrub_chunk+0xc4/0x130 [btrfs]
  [59.208]  scrub_enumerate_chunks+0x3e4/0x7a0 [btrfs]
  [59.208]  ? __mutex_unlock_slowpath.isra.0+0x9a/0x120
  [59.208]  btrfs_scrub_dev+0x236/0x6a0 [btrfs]
  [59.208]  ? btrfs_ioctl+0xd97/0x32c0 [btrfs]
  [59.209]  ? _copy_from_user+0x7b/0x80
  [59.209]  btrfs_ioctl+0xde1/0x32c0 [btrfs]
  [59.209]  ? should_failslab+0xa/0x20
  [59.209]  ? kmem_cache_alloc_node+0x151/0x460
  [59.209]  ? alloc_io_context+0x1b/0x80
  [59.209]  ? preempt_count_add+0x70/0xa0
  [59.209]  ? __x64_sys_ioctl+0x88/0xc0
  [59.209]  __x64_sys_ioctl+0x88/0xc0
  [59.209]  do_syscall_64+0x38/0x90
  [59.209]  entry_SYSCALL_64_after_hwframe+0x72/0xdc
  [59.209] RIP: 0033:0x7f82ffaffe9b
  [59.209] RSP: 002b:00007f82fe9fac50 EFLAGS: 00000246 ORIG_RAX: 0000000000000010
  [59.209] RAX: ffffffffffffffda RBX: 000055b191e36c10 RCX: 00007f82ffaffe9b
  [59.209] RDX: 000055b191e36c10 RSI: 00000000c400941b RDI: 0000000000000003
  [59.209] RBP: 0000000000000000 R08: 00007fff1575016f R09: 0000000000000000
  [59.209] R10: 0000000000000000 R11: 0000000000000246 R12: 00007f82fe9fb640
  [59.209] R13: 000000000000006b R14: 00007f82ffa87580 R15: 0000000000000000
  [59.209]  </TASK>
  [59.209] INFO: task btrfs:346775 blocked for more than 120 seconds.
  [59.210]       Tainted: G        W          6.3.0-rc2-btrfs-next-127+ #1
  [59.210] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
  [59.211] task:btrfs           state:D stack:0     pid:346775 ppid:1      flags:0x00004002
  [59.211] Call Trace:
  [59.211]  <TASK>
  [59.211]  __schedule+0x392/0xa70
  [59.211]  schedule+0x5d/0xd0
  [59.211]  __scrub_blocked_if_needed+0x74/0xc0 [btrfs]
  [59.211]  ? __pfx_autoremove_wake_function+0x10/0x10
  [59.211]  scrub_pause_off+0x21/0x50 [btrfs]
  [59.212]  scrub_simple_mirror+0x1c7/0x950 [btrfs]
  [59.212]  ? scrub_parity_put+0x1a5/0x1d0 [btrfs]
  [59.212]  ? __pfx_autoremove_wake_function+0x10/0x10
  [59.212]  scrub_stripe+0x20d/0x740 [btrfs]
  [59.213]  scrub_chunk+0xc4/0x130 [btrfs]
  [59.213]  scrub_enumerate_chunks+0x3e4/0x7a0 [btrfs]
  [59.213]  ? __mutex_unlock_slowpath.isra.0+0x9a/0x120
  [59.213]  btrfs_scrub_dev+0x236/0x6a0 [btrfs]
  [59.213]  ? btrfs_ioctl+0xd97/0x32c0 [btrfs]
  [59.214]  ? _copy_from_user+0x7b/0x80
  [59.214]  btrfs_ioctl+0xde1/0x32c0 [btrfs]
  [59.214]  ? should_failslab+0xa/0x20
  [59.214]  ? kmem_cache_alloc_node+0x151/0x460
  [59.214]  ? alloc_io_context+0x1b/0x80
  [59.214]  ? preempt_count_add+0x70/0xa0
  [59.214]  ? __x64_sys_ioctl+0x88/0xc0
  [59.214]  __x64_sys_ioctl+0x88/0xc0
  [59.214]  do_syscall_64+0x38/0x90
  [59.214]  entry_SYSCALL_64_after_hwframe+0x72/0xdc
  [59.214] RIP: 0033:0x7f82ffaffe9b
  [59.214] RSP: 002b:00007f82fe1f9c50 EFLAGS: 00000246 ORIG_RAX: 0000000000000010
  [59.214] RAX: ffffffffffffffda RBX: 000055b191e37090 RCX: 00007f82ffaffe9b
  [59.214] RDX: 000055b191e37090 RSI: 00000000c400941b RDI: 0000000000000003
  [59.214] RBP: 0000000000000000 R08: 00007fff1575016f R09: 0000000000000000
  [59.214] R10: 0000000000000000 R11: 0000000000000246 R12: 00007f82fe1fa640
  [59.214] R13: 000000000000006b R14: 00007f82ffa87580 R15: 0000000000000000
  [59.214]  </TASK>
  [59.214] INFO: task btrfs:346776 blocked for more than 120 seconds.
  [59.215]       Tainted: G        W          6.3.0-rc2-btrfs-next-127+ #1
  [59.216] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
  [59.217] task:btrfs           state:D stack:0     pid:346776 ppid:1      flags:0x00004002
  [59.217] Call Trace:
  [59.217]  <TASK>
  [59.217]  __schedule+0x392/0xa70
  [59.217]  ? __pv_queued_spin_lock_slowpath+0x165/0x370
  [59.217]  schedule+0x5d/0xd0
  [59.217]  __scrub_blocked_if_needed+0x74/0xc0 [btrfs]
  [59.217]  ? __pfx_autoremove_wake_function+0x10/0x10
  [59.217]  scrub_pause_off+0x21/0x50 [btrfs]
  [59.217]  scrub_simple_mirror+0x1c7/0x950 [btrfs]
  [59.217]  ? scrub_parity_put+0x1a5/0x1d0 [btrfs]
  [59.218]  ? __pfx_autoremove_wake_function+0x10/0x10
  [59.218]  scrub_stripe+0x20d/0x740 [btrfs]
  [59.218]  scrub_chunk+0xc4/0x130 [btrfs]
  [59.218]  scrub_enumerate_chunks+0x3e4/0x7a0 [btrfs]
  [59.219]  ? __pfx_autoremove_wake_function+0x10/0x10
  [59.219]  btrfs_scrub_dev+0x236/0x6a0 [btrfs]
  [59.219]  ? btrfs_ioctl+0xd97/0x32c0 [btrfs]
  [59.219]  ? _copy_from_user+0x7b/0x80
  [59.219]  btrfs_ioctl+0xde1/0x32c0 [btrfs]
  [59.219]  ? should_failslab+0xa/0x20
  [59.219]  ? kmem_cache_alloc_node+0x151/0x460
  [59.219]  ? alloc_io_context+0x1b/0x80
  [59.219]  ? preempt_count_add+0x70/0xa0
  [59.219]  ? __x64_sys_ioctl+0x88/0xc0
  [59.219]  __x64_sys_ioctl+0x88/0xc0
  [59.219]  do_syscall_64+0x38/0x90
  [59.219]  entry_SYSCALL_64_after_hwframe+0x72/0xdc
  [59.219] RIP: 0033:0x7f82ffaffe9b
  [59.219] RSP: 002b:00007f82fd9f8c50 EFLAGS: 00000246 ORIG_RAX: 0000000000000010
  [59.219] RAX: ffffffffffffffda RBX: 000055b191e37510 RCX: 00007f82ffaffe9b
  [59.219] RDX: 000055b191e37510 RSI: 00000000c400941b RDI: 0000000000000003
  [59.219] RBP: 0000000000000000 R08: 00007fff1575016f R09: 0000000000000000
  [59.219] R10: 0000000000000000 R11: 0000000000000246 R12: 00007f82fd9f9640
  [59.219] R13: 000000000000006b R14: 00007f82ffa87580 R15: 0000000000000000
  [59.219]  </TASK>
  [59.219] INFO: task btrfs:346822 blocked for more than 120 seconds.
  [59.220]       Tainted: G        W          6.3.0-rc2-btrfs-next-127+ #1
  [59.221] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
  [59.222] task:btrfs           state:D stack:0     pid:346822 ppid:1      flags:0x00004002
  [59.222] Call Trace:
  [59.222]  <TASK>
  [59.222]  __schedule+0x392/0xa70
  [59.222]  schedule+0x5d/0xd0
  [59.222]  btrfs_scrub_cancel+0x91/0x100 [btrfs]
  [59.222]  ? __pfx_autoremove_wake_function+0x10/0x10
  [59.222]  btrfs_commit_transaction+0x572/0xeb0 [btrfs]
  [59.223]  ? start_transaction+0xcb/0x610 [btrfs]
  [59.223]  prepare_to_relocate+0x111/0x1a0 [btrfs]
  [59.223]  relocate_block_group+0x57/0x5d0 [btrfs]
  [59.223]  ? btrfs_wait_nocow_writers+0x25/0xb0 [btrfs]
  [59.223]  btrfs_relocate_block_group+0x248/0x3c0 [btrfs]
  [59.224]  ? __pfx_autoremove_wake_function+0x10/0x10
  [59.224]  btrfs_relocate_chunk+0x3b/0x150 [btrfs]
  [59.224]  btrfs_balance+0x8ff/0x11d0 [btrfs]
  [59.224]  ? __kmem_cache_alloc_node+0x14a/0x410
  [59.224]  btrfs_ioctl+0x2334/0x32c0 [btrfs]
  [59.225]  ? mod_objcg_state+0xd2/0x360
  [59.225]  ? refill_obj_stock+0xb0/0x160
  [59.225]  ? seq_release+0x25/0x30
  [59.225]  ? __rseq_handle_notify_resume+0x3b5/0x4b0
  [59.225]  ? percpu_counter_add_batch+0x2e/0xa0
  [59.225]  ? __x64_sys_ioctl+0x88/0xc0
  [59.225]  __x64_sys_ioctl+0x88/0xc0
  [59.225]  do_syscall_64+0x38/0x90
  [59.225]  entry_SYSCALL_64_after_hwframe+0x72/0xdc
  [59.225] RIP: 0033:0x7f381a6ffe9b
  [59.225] RSP: 002b:00007ffd45ecf060 EFLAGS: 00000246 ORIG_RAX: 0000000000000010
  [59.225] RAX: ffffffffffffffda RBX: 0000000000000001 RCX: 00007f381a6ffe9b
  [59.225] RDX: 00007ffd45ecf150 RSI: 00000000c4009420 RDI: 0000000000000003
  [59.225] RBP: 0000000000000003 R08: 0000000000000013 R09: 0000000000000000
  [59.225] R10: 00007f381a60c878 R11: 0000000000000246 R12: 00007ffd45ed0423
  [59.225] R13: 00007ffd45ecf150 R14: 0000000000000000 R15: 00007ffd45ecf148
  [59.225]  </TASK>

What happens is the following:

1) A scrub is running, so fs_info->scrubs_running is 1;

2) Task A starts block group relocation, and at btrfs_relocate_chunk() it
   pauses scrub by calling btrfs_scrub_pause(). That increments
   fs_info->scrub_pause_req from 0 to 1 and waits for the scrub task to
   pause (for fs_info->scrubs_paused to be == to fs_info->scrubs_running);

3) The scrub task pauses at scrub_pause_off(), waiting for
   fs_info->scrub_pause_req to decrease to 0;

4) Task A then enters btrfs_relocate_block_group(), and down that call
   chain we start a transaction and then attempt to commit it;

5) When task A calls btrfs_commit_transaction(), it either will do the
   commit itself or wait for some other task that already started the
   commit of the transaction - it doesn't matter which case;

6) The transaction commit enters state TRANS_STATE_COMMIT_START;

7) An error happens during the transaction commit, like -ENOSPC when
   running delayed refs or delayed items for example;

8) This results in calling transaction.c:cleanup_transaction(), where
   we call btrfs_scrub_cancel(), incrementing fs_info->scrub_cancel_req
   from 0 to 1, and blocking this task waiting for fs_info->scrubs_running
   to decrease to 0;

9) From this point on, both the transaction commit and the scrub task
   hang forever:

   1) The transaction commit is waiting for fs_info->scrubs_running to
      be decreased to 0;

   2) The scrub task is at scrub_pause_off() waiting for
      fs_info->scrub_pause_req to decrease to 0 - so it can not proceed
      to stop the scrub and decrement fs_info->scrubs_running from 0 to 1.

   Therefore resulting in a deadlock.

Fix this by having cleanup_transaction(), called if a transaction commit
fails, not call btrfs_scrub_cancel() if relocation is in progress, and
having btrfs_relocate_block_group() call btrfs_scrub_cancel() instead if
the relocation failed and a transaction abort happened.

This was triggered with btrfs/061 from fstests.

Fixes: 55e3a601c81c ("btrfs: Fix data checksum error cause by replace with io-load.")
CC: stable@vger.kernel.org # 4.14+
Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/transaction.c | 15 ++++++++++++++-
 fs/btrfs/volumes.c     |  9 ++++++++-
 2 files changed, 22 insertions(+), 2 deletions(-)

diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 18329ebcb1cb..b8d5b1fa9a03 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -2035,7 +2035,20 @@ static void cleanup_transaction(struct btrfs_trans_handle *trans, int err)
 
 	if (current->journal_info == trans)
 		current->journal_info = NULL;
-	btrfs_scrub_cancel(fs_info);
+
+	/*
+	 * If relocation is running, we can't cancel scrub because that will
+	 * result in a deadlock. Before relocating a block group, relocation
+	 * pauses scrub, then starts and commits a transaction before unpausing
+	 * scrub. If the transaction commit is being done by the relocation
+	 * task or triggered by another task and the relocation task is waiting
+	 * for the commit, and we end up here due to an error in the commit
+	 * path, then calling btrfs_scrub_cancel() will deadlock, as we are
+	 * asking for scrub to stop while having it asked to be paused higher
+	 * above in relocation code.
+	 */
+	if (!test_bit(BTRFS_FS_RELOC_RUNNING, &fs_info->flags))
+		btrfs_scrub_cancel(fs_info);
 
 	kmem_cache_free(btrfs_trans_handle_cachep, trans);
 }
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index ac0e8fb92fc8..c6d592870400 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -3275,8 +3275,15 @@ int btrfs_relocate_chunk(struct btrfs_fs_info *fs_info, u64 chunk_offset)
 	btrfs_scrub_pause(fs_info);
 	ret = btrfs_relocate_block_group(fs_info, chunk_offset);
 	btrfs_scrub_continue(fs_info);
-	if (ret)
+	if (ret) {
+		/*
+		 * If we had a transaction abort, stop all running scrubs.
+		 * See transaction.c:cleanup_transaction() why we do it here.
+		 */
+		if (BTRFS_FS_ERROR(fs_info))
+			btrfs_scrub_cancel(fs_info);
 		return ret;
+	}
 
 	block_group = btrfs_lookup_block_group(fs_info, chunk_offset);
 	if (!block_group)

From 44378cd113e5f15bb0a89f5ac5a0e687b52feb90 Mon Sep 17 00:00:00 2001
From: Duy Nguyen <duy.nguyen.rh@renesas.com>
Date: Tue, 28 Mar 2023 00:03:03 +0000
Subject: [PATCH 059/173] ASoC: da7213.c: add missing pm_runtime_disable()

da7213.c is missing pm_runtime_disable(), thus we will get
below error when rmmod -> insmod.

	$ rmmod  snd-soc-da7213.ko
	$ insmod snd-soc-da7213.ko
	da7213 0-001a: Unbalanced pm_runtime_enable!"

[Kuninori adjusted to latest upstream]

Signed-off-by: Duy Nguyen <duy.nguyen.rh@renesas.com>
Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Tested-by: Khanh Le <khanh.le.xr@renesas.com>
Link: https://lore.kernel.org/r/87mt3xg2tk.wl-kuninori.morimoto.gx@renesas.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/codecs/da7213.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/sound/soc/codecs/da7213.c b/sound/soc/codecs/da7213.c
index 0068780fe0a7..1c1f211a8e2e 100644
--- a/sound/soc/codecs/da7213.c
+++ b/sound/soc/codecs/da7213.c
@@ -2022,6 +2022,11 @@ static int da7213_i2c_probe(struct i2c_client *i2c)
 	return ret;
 }
 
+static void da7213_i2c_remove(struct i2c_client *i2c)
+{
+	pm_runtime_disable(&i2c->dev);
+}
+
 static int __maybe_unused da7213_runtime_suspend(struct device *dev)
 {
 	struct da7213_priv *da7213 = dev_get_drvdata(dev);
@@ -2065,6 +2070,7 @@ static struct i2c_driver da7213_i2c_driver = {
 		.pm = &da7213_pm,
 	},
 	.probe_new	= da7213_i2c_probe,
+	.remove		= da7213_i2c_remove,
 	.id_table	= da7213_i2c_id,
 };
 

From 1231363aec86704a6b0467a12e3ca7bdf890e01d Mon Sep 17 00:00:00 2001
From: Juraj Pecigos <kernel@juraj.dev>
Date: Sun, 26 Mar 2023 11:29:49 +0200
Subject: [PATCH 060/173] nvme-pci: mark Lexar NM760 as IGNORE_DEV_SUBNQN

A system with more than one of these SSDs will only have one usable.
The kernel fails to detect more than one nvme device due to duplicate
cntlids.

before:
[    9.395229] nvme 0000:01:00.0: platform quirk: setting simple suspend
[    9.395262] nvme nvme0: pci function 0000:01:00.0
[    9.395282] nvme 0000:03:00.0: platform quirk: setting simple suspend
[    9.395305] nvme nvme1: pci function 0000:03:00.0
[    9.409873] nvme nvme0: Duplicate cntlid 1 with nvme1, subsys nqn.2022-07.com.siliconmotion:nvm-subsystem-sn-                    , rejecting
[    9.409982] nvme nvme0: Removing after probe failure status: -22
[    9.427487] nvme nvme1: allocated 64 MiB host memory buffer.
[    9.445088] nvme nvme1: 16/0/0 default/read/poll queues
[    9.449898] nvme nvme1: Ignoring bogus Namespace Identifiers

after:
[    1.161890] nvme 0000:01:00.0: platform quirk: setting simple suspend
[    1.162660] nvme nvme0: pci function 0000:01:00.0
[    1.162684] nvme 0000:03:00.0: platform quirk: setting simple suspend
[    1.162707] nvme nvme1: pci function 0000:03:00.0
[    1.191354] nvme nvme0: allocated 64 MiB host memory buffer.
[    1.193378] nvme nvme1: allocated 64 MiB host memory buffer.
[    1.211044] nvme nvme1: 16/0/0 default/read/poll queues
[    1.211080] nvme nvme0: 16/0/0 default/read/poll queues
[    1.216145] nvme nvme0: Ignoring bogus Namespace Identifiers
[    1.216261] nvme nvme1: Ignoring bogus Namespace Identifiers

Adding the NVME_QUIRK_IGNORE_DEV_SUBNQN quirk to resolves the issue.

Signed-off-by: Juraj Pecigos <kernel@juraj.dev>
Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/pci.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index b615906263f3..282d808400c5 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -3441,7 +3441,8 @@ static const struct pci_device_id nvme_id_table[] = {
 	{ PCI_DEVICE(0x1d97, 0x1d97), /* Lexar NM620 */
 		.driver_data = NVME_QUIRK_BOGUS_NID, },
 	{ PCI_DEVICE(0x1d97, 0x2269), /* Lexar NM760 */
-		.driver_data = NVME_QUIRK_BOGUS_NID, },
+		.driver_data = NVME_QUIRK_BOGUS_NID |
+				NVME_QUIRK_IGNORE_DEV_SUBNQN, },
 	{ PCI_DEVICE(PCI_VENDOR_ID_AMAZON, 0x0061),
 		.driver_data = NVME_QUIRK_DMA_ADDRESS_BITS_48, },
 	{ PCI_DEVICE(PCI_VENDOR_ID_AMAZON, 0x0065),

From 21f27df854008b86349a203bf97fef79bb11f53e Mon Sep 17 00:00:00 2001
From: Nico Boehr <nrb@linux.ibm.com>
Date: Mon, 13 Feb 2023 09:55:20 +0100
Subject: [PATCH 061/173] KVM: s390: pv: fix external interruption loop not
 always detected

To determine whether the guest has caused an external interruption loop
upon code 20 (external interrupt) intercepts, the ext_new_psw needs to
be inspected to see whether external interrupts are enabled.

Under non-PV, ext_new_psw can simply be taken from guest lowcore. Under
PV, KVM can only access the encrypted guest lowcore and hence the
ext_new_psw must not be taken from guest lowcore.

handle_external_interrupt() incorrectly did that and hence was not able
to reliably tell whether an external interruption loop is happening or
not. False negatives cause spurious failures of my kvm-unit-test
for extint loops[1] under PV.

Since code 20 is only caused under PV if and only if the guest's
ext_new_psw is enabled for external interrupts, false positive detection
of a external interruption loop can not happen.

Fix this issue by instead looking at the guest PSW in the state
description. Since the PSW swap for external interrupt is done by the
ultravisor before the intercept is caused, this reliably tells whether
the guest is enabled for external interrupts in the ext_new_psw.

Also update the comments to explain better what is happening.

[1] https://lore.kernel.org/kvm/20220812062151.1980937-4-nrb@linux.ibm.com/

Signed-off-by: Nico Boehr <nrb@linux.ibm.com>
Reviewed-by: Janosch Frank <frankja@linux.ibm.com>
Reviewed-by: Christian Borntraeger <borntraeger@linux.ibm.com>
Fixes: 201ae986ead7 ("KVM: s390: protvirt: Implement interrupt injection")
Link: https://lore.kernel.org/r/20230213085520.100756-2-nrb@linux.ibm.com
Message-Id: <20230213085520.100756-2-nrb@linux.ibm.com>
Signed-off-by: Janosch Frank <frankja@linux.ibm.com>
---
 arch/s390/kvm/intercept.c | 32 ++++++++++++++++++++++++--------
 1 file changed, 24 insertions(+), 8 deletions(-)

diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c
index 0ee02dae14b2..2cda8d9d7c6e 100644
--- a/arch/s390/kvm/intercept.c
+++ b/arch/s390/kvm/intercept.c
@@ -271,10 +271,18 @@ static int handle_prog(struct kvm_vcpu *vcpu)
  * handle_external_interrupt - used for external interruption interceptions
  * @vcpu: virtual cpu
  *
- * This interception only occurs if the CPUSTAT_EXT_INT bit was set, or if
- * the new PSW does not have external interrupts disabled. In the first case,
- * we've got to deliver the interrupt manually, and in the second case, we
- * drop to userspace to handle the situation there.
+ * This interception occurs if:
+ * - the CPUSTAT_EXT_INT bit was already set when the external interrupt
+ *   occurred. In this case, the interrupt needs to be injected manually to
+ *   preserve interrupt priority.
+ * - the external new PSW has external interrupts enabled, which will cause an
+ *   interruption loop. We drop to userspace in this case.
+ *
+ * The latter case can be detected by inspecting the external mask bit in the
+ * external new psw.
+ *
+ * Under PV, only the latter case can occur, since interrupt priorities are
+ * handled in the ultravisor.
  */
 static int handle_external_interrupt(struct kvm_vcpu *vcpu)
 {
@@ -285,10 +293,18 @@ static int handle_external_interrupt(struct kvm_vcpu *vcpu)
 
 	vcpu->stat.exit_external_interrupt++;
 
-	rc = read_guest_lc(vcpu, __LC_EXT_NEW_PSW, &newpsw, sizeof(psw_t));
-	if (rc)
-		return rc;
-	/* We can not handle clock comparator or timer interrupt with bad PSW */
+	if (kvm_s390_pv_cpu_is_protected(vcpu)) {
+		newpsw = vcpu->arch.sie_block->gpsw;
+	} else {
+		rc = read_guest_lc(vcpu, __LC_EXT_NEW_PSW, &newpsw, sizeof(psw_t));
+		if (rc)
+			return rc;
+	}
+
+	/*
+	 * Clock comparator or timer interrupt with external interrupt enabled
+	 * will cause interrupt loop. Drop to userspace.
+	 */
 	if ((eic == EXT_IRQ_CLK_COMP || eic == EXT_IRQ_CPU_TIMER) &&
 	    (newpsw.mask & PSW_MASK_EXT))
 		return -EOPNOTSUPP;

From 1abce0580b89464546ae06abd5891ebec43c9470 Mon Sep 17 00:00:00 2001
From: Benjamin Gray <bgray@linux.ibm.com>
Date: Fri, 3 Mar 2023 09:59:47 +1100
Subject: [PATCH 062/173] powerpc/64s: Fix __pte_needs_flush() false positive
 warning

Userspace PROT_NONE ptes set _PAGE_PRIVILEGED, triggering a false
positive debug assertion that __pte_flags_need_flush() is not called
on a kernel mapping.

Detect when it is a userspace PROT_NONE page by checking the required
bits of PAGE_NONE are set, and none of the RWX bits are set.
pte_protnone() is insufficient here because it always returns 0 when
CONFIG_NUMA_BALANCING=n.

Fixes: b11931e9adc1 ("powerpc/64s: add pte_needs_flush and huge_pmd_needs_flush")
Cc: stable@vger.kernel.org # v6.1+
Reported-by: Russell Currey <ruscur@russell.cc>
Signed-off-by: Benjamin Gray <bgray@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://msgid.link/20230302225947.81083-1-bgray@linux.ibm.com
---
 arch/powerpc/include/asm/book3s/64/tlbflush.h | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush.h b/arch/powerpc/include/asm/book3s/64/tlbflush.h
index 2bbc0fcce04a..5e26c7f2c25a 100644
--- a/arch/powerpc/include/asm/book3s/64/tlbflush.h
+++ b/arch/powerpc/include/asm/book3s/64/tlbflush.h
@@ -148,6 +148,11 @@ static inline void flush_tlb_fix_spurious_fault(struct vm_area_struct *vma,
 	 */
 }
 
+static inline bool __pte_protnone(unsigned long pte)
+{
+	return (pte & (pgprot_val(PAGE_NONE) | _PAGE_RWX)) == pgprot_val(PAGE_NONE);
+}
+
 static inline bool __pte_flags_need_flush(unsigned long oldval,
 					  unsigned long newval)
 {
@@ -164,8 +169,8 @@ static inline bool __pte_flags_need_flush(unsigned long oldval,
 	/*
 	 * We do not expect kernel mappings or non-PTEs or not-present PTEs.
 	 */
-	VM_WARN_ON_ONCE(oldval & _PAGE_PRIVILEGED);
-	VM_WARN_ON_ONCE(newval & _PAGE_PRIVILEGED);
+	VM_WARN_ON_ONCE(!__pte_protnone(oldval) && oldval & _PAGE_PRIVILEGED);
+	VM_WARN_ON_ONCE(!__pte_protnone(newval) && newval & _PAGE_PRIVILEGED);
 	VM_WARN_ON_ONCE(!(oldval & _PAGE_PTE));
 	VM_WARN_ON_ONCE(!(newval & _PAGE_PTE));
 	VM_WARN_ON_ONCE(!(oldval & _PAGE_PRESENT));

From fd7276189450110ed835eb0a334e62d2f1c4e3be Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Sun, 26 Mar 2023 16:15:57 -0600
Subject: [PATCH 063/173] powerpc: Don't try to copy PPR for task with NULL
 pt_regs

powerpc sets up PF_KTHREAD and PF_IO_WORKER with a NULL pt_regs, which
from my (arguably very short) checking is not commonly done for other
archs. This is fine, except when PF_IO_WORKER's have been created and
the task does something that causes a coredump to be generated. Then we
get this crash:

  Kernel attempted to read user page (160) - exploit attempt? (uid: 1000)
  BUG: Kernel NULL pointer dereference on read at 0x00000160
  Faulting instruction address: 0xc0000000000c3a60
  Oops: Kernel access of bad area, sig: 11 [#1]
  LE PAGE_SIZE=64K MMU=Radix SMP NR_CPUS=32 NUMA pSeries
  Modules linked in: bochs drm_vram_helper drm_kms_helper xts binfmt_misc ecb ctr syscopyarea sysfillrect cbc sysimgblt drm_ttm_helper aes_generic ttm sg libaes evdev joydev virtio_balloon vmx_crypto gf128mul drm dm_mod fuse loop configfs drm_panel_orientation_quirks ip_tables x_tables autofs4 hid_generic usbhid hid xhci_pci xhci_hcd usbcore usb_common sd_mod
  CPU: 1 PID: 1982 Comm: ppc-crash Not tainted 6.3.0-rc2+ #88
  Hardware name: IBM pSeries (emulated by qemu) POWER9 (raw) 0x4e1202 0xf000005 of:SLOF,HEAD hv:linux,kvm pSeries
  NIP:  c0000000000c3a60 LR: c000000000039944 CTR: c0000000000398e0
  REGS: c0000000041833b0 TRAP: 0300   Not tainted  (6.3.0-rc2+)
  MSR:  800000000280b033 <SF,VEC,VSX,EE,FP,ME,IR,DR,RI,LE>  CR: 88082828  XER: 200400f8
  ...
  NIP memcpy_power7+0x200/0x7d0
  LR  ppr_get+0x64/0xb0
  Call Trace:
    ppr_get+0x40/0xb0 (unreliable)
    __regset_get+0x180/0x1f0
    regset_get_alloc+0x64/0x90
    elf_core_dump+0xb98/0x1b60
    do_coredump+0x1c34/0x24a0
    get_signal+0x71c/0x1410
    do_notify_resume+0x140/0x6f0
    interrupt_exit_user_prepare_main+0x29c/0x320
    interrupt_exit_user_prepare+0x6c/0xa0
    interrupt_return_srr_user+0x8/0x138

Because ppr_get() is trying to copy from a PF_IO_WORKER with a NULL
pt_regs.

Check for a valid pt_regs in both ppc_get/ppr_set, and return an error
if not set. The actual error value doesn't seem to be important here, so
just pick -EINVAL.

Fixes: fa439810cc1b ("powerpc/ptrace: Enable support for NT_PPPC_TAR, NT_PPC_PPR, NT_PPC_DSCR")
Cc: stable@vger.kernel.org # v4.8+
Signed-off-by: Jens Axboe <axboe@kernel.dk>
[mpe: Trim oops in change log, add Fixes & Cc stable]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://msgid.link/d9f63344-fe7c-56ae-b420-4a1a04a2ae4c@kernel.dk
---
 arch/powerpc/kernel/ptrace/ptrace-view.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/arch/powerpc/kernel/ptrace/ptrace-view.c b/arch/powerpc/kernel/ptrace/ptrace-view.c
index 2087a785f05f..5fff0d04b23f 100644
--- a/arch/powerpc/kernel/ptrace/ptrace-view.c
+++ b/arch/powerpc/kernel/ptrace/ptrace-view.c
@@ -290,6 +290,9 @@ static int gpr_set(struct task_struct *target, const struct user_regset *regset,
 static int ppr_get(struct task_struct *target, const struct user_regset *regset,
 		   struct membuf to)
 {
+	if (!target->thread.regs)
+		return -EINVAL;
+
 	return membuf_write(&to, &target->thread.regs->ppr, sizeof(u64));
 }
 
@@ -297,6 +300,9 @@ static int ppr_set(struct task_struct *target, const struct user_regset *regset,
 		   unsigned int pos, unsigned int count, const void *kbuf,
 		   const void __user *ubuf)
 {
+	if (!target->thread.regs)
+		return -EINVAL;
+
 	return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
 				  &target->thread.regs->ppr, 0, sizeof(u64));
 }

From 005308f7bdacf5685ed1a431244a183dbbb9e0e8 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Mon, 27 Mar 2023 19:56:18 -0600
Subject: [PATCH 064/173] io_uring/poll: clear single/double poll flags on poll
 arming

Unless we have at least one entry queued, then don't call into
io_poll_remove_entries(). Normally this isn't possible, but if we
retry poll then we can have ->nr_entries cleared again as we're
setting it up. If this happens for a poll retry, then we'll still have
at least REQ_F_SINGLE_POLL set. io_poll_remove_entries() then thinks
it has entries to remove.

Clear REQ_F_SINGLE_POLL and REQ_F_DOUBLE_POLL unconditionally when
arming a poll request.

Fixes: c16bda37594f ("io_uring/poll: allow some retries for poll triggering spuriously")
Cc: stable@vger.kernel.org
Reported-by: Pengfei Xu <pengfei.xu@intel.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 io_uring/poll.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/io_uring/poll.c b/io_uring/poll.c
index 795facbd0e9f..55306e801081 100644
--- a/io_uring/poll.c
+++ b/io_uring/poll.c
@@ -726,6 +726,7 @@ int io_arm_poll_handler(struct io_kiocb *req, unsigned issue_flags)
 	apoll = io_req_alloc_apoll(req, issue_flags);
 	if (!apoll)
 		return IO_APOLL_ABORTED;
+	req->flags &= ~(REQ_F_SINGLE_POLL | REQ_F_DOUBLE_POLL);
 	req->flags |= REQ_F_POLLED;
 	ipt.pt._qproc = io_async_queue_proc;
 

From b26cd9325be4c1fcd331b77f10acb627c560d4d7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kornel=20Dul=C4=99ba?= <korneld@chromium.org>
Date: Mon, 20 Mar 2023 09:32:59 +0000
Subject: [PATCH 065/173] pinctrl: amd: Disable and mask interrupts on resume
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This fixes a similar problem to the one observed in:
commit 4e5a04be88fe ("pinctrl: amd: disable and mask interrupts on probe").

On some systems, during suspend/resume cycle firmware leaves
an interrupt enabled on a pin that is not used by the kernel.
This confuses the AMD pinctrl driver and causes spurious interrupts.

The driver already has logic to detect if a pin is used by the kernel.
Leverage it to re-initialize interrupt fields of a pin only if it's not
used by us.

Cc: stable@vger.kernel.org
Fixes: dbad75dd1f25 ("pinctrl: add AMD GPIO driver support.")
Signed-off-by: Kornel Dulęba <korneld@chromium.org>
Link: https://lore.kernel.org/r/20230320093259.845178-1-korneld@chromium.org
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/pinctrl/pinctrl-amd.c | 36 +++++++++++++++++++----------------
 1 file changed, 20 insertions(+), 16 deletions(-)

diff --git a/drivers/pinctrl/pinctrl-amd.c b/drivers/pinctrl/pinctrl-amd.c
index 9236a132c7ba..609821b756c2 100644
--- a/drivers/pinctrl/pinctrl-amd.c
+++ b/drivers/pinctrl/pinctrl-amd.c
@@ -872,32 +872,34 @@ static const struct pinconf_ops amd_pinconf_ops = {
 	.pin_config_group_set = amd_pinconf_group_set,
 };
 
-static void amd_gpio_irq_init(struct amd_gpio *gpio_dev)
+static void amd_gpio_irq_init_pin(struct amd_gpio *gpio_dev, int pin)
 {
-	struct pinctrl_desc *desc = gpio_dev->pctrl->desc;
+	const struct pin_desc *pd;
 	unsigned long flags;
 	u32 pin_reg, mask;
-	int i;
 
 	mask = BIT(WAKE_CNTRL_OFF_S0I3) | BIT(WAKE_CNTRL_OFF_S3) |
 		BIT(INTERRUPT_MASK_OFF) | BIT(INTERRUPT_ENABLE_OFF) |
 		BIT(WAKE_CNTRL_OFF_S4);
 
-	for (i = 0; i < desc->npins; i++) {
-		int pin = desc->pins[i].number;
-		const struct pin_desc *pd = pin_desc_get(gpio_dev->pctrl, pin);
+	pd = pin_desc_get(gpio_dev->pctrl, pin);
+	if (!pd)
+		return;
 
-		if (!pd)
-			continue;
+	raw_spin_lock_irqsave(&gpio_dev->lock, flags);
+	pin_reg = readl(gpio_dev->base + pin * 4);
+	pin_reg &= ~mask;
+	writel(pin_reg, gpio_dev->base + pin * 4);
+	raw_spin_unlock_irqrestore(&gpio_dev->lock, flags);
+}
 
-		raw_spin_lock_irqsave(&gpio_dev->lock, flags);
+static void amd_gpio_irq_init(struct amd_gpio *gpio_dev)
+{
+	struct pinctrl_desc *desc = gpio_dev->pctrl->desc;
+	int i;
 
-		pin_reg = readl(gpio_dev->base + i * 4);
-		pin_reg &= ~mask;
-		writel(pin_reg, gpio_dev->base + i * 4);
-
-		raw_spin_unlock_irqrestore(&gpio_dev->lock, flags);
-	}
+	for (i = 0; i < desc->npins; i++)
+		amd_gpio_irq_init_pin(gpio_dev, i);
 }
 
 #ifdef CONFIG_PM_SLEEP
@@ -950,8 +952,10 @@ static int amd_gpio_resume(struct device *dev)
 	for (i = 0; i < desc->npins; i++) {
 		int pin = desc->pins[i].number;
 
-		if (!amd_gpio_should_save(gpio_dev, pin))
+		if (!amd_gpio_should_save(gpio_dev, pin)) {
+			amd_gpio_irq_init_pin(gpio_dev, pin);
 			continue;
+		}
 
 		raw_spin_lock_irqsave(&gpio_dev->lock, flags);
 		gpio_dev->saved_regs[i] |= readl(gpio_dev->base + pin * 4) & PIN_IRQ_PENDING;

From f91bf3272a18356e8585f6bbba896d794632f2af Mon Sep 17 00:00:00 2001
From: Marek Szyprowski <m.szyprowski@samsung.com>
Date: Thu, 16 Mar 2023 00:25:14 +0100
Subject: [PATCH 066/173] iommu/exynos: Fix set_platform_dma_ops() callback

There are some subtle differences between release_device() and
set_platform_dma_ops() callbacks, so separate those two callbacks. Device
links should be removed only in release_device(), because they were
created in probe_device() on purpose and they are needed for proper
Exynos IOMMU driver operation. While fixing this, remove the conditional
code as it is not really needed.

Reported-by: Jason Gunthorpe <jgg@ziepe.ca>
Fixes: 189d496b48b1 ("iommu/exynos: Add missing set_platform_dma_ops callback")
Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
Reviewed-by: Sam Protsenko <semen.protsenko@linaro.org>
Link: https://lore.kernel.org/r/20230315232514.1046589-1-m.szyprowski@samsung.com
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/exynos-iommu.c | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c
index 483aaaeb6dae..1abd187c6075 100644
--- a/drivers/iommu/exynos-iommu.c
+++ b/drivers/iommu/exynos-iommu.c
@@ -1415,23 +1415,26 @@ static struct iommu_device *exynos_iommu_probe_device(struct device *dev)
 	return &data->iommu;
 }
 
-static void exynos_iommu_release_device(struct device *dev)
+static void exynos_iommu_set_platform_dma(struct device *dev)
 {
 	struct exynos_iommu_owner *owner = dev_iommu_priv_get(dev);
-	struct sysmmu_drvdata *data;
 
 	if (owner->domain) {
 		struct iommu_group *group = iommu_group_get(dev);
 
 		if (group) {
-#ifndef CONFIG_ARM
-			WARN_ON(owner->domain !=
-				iommu_group_default_domain(group));
-#endif
 			exynos_iommu_detach_device(owner->domain, dev);
 			iommu_group_put(group);
 		}
 	}
+}
+
+static void exynos_iommu_release_device(struct device *dev)
+{
+	struct exynos_iommu_owner *owner = dev_iommu_priv_get(dev);
+	struct sysmmu_drvdata *data;
+
+	exynos_iommu_set_platform_dma(dev);
 
 	list_for_each_entry(data, &owner->controllers, owner_node)
 		device_link_del(data->link);
@@ -1479,7 +1482,7 @@ static const struct iommu_ops exynos_iommu_ops = {
 	.domain_alloc = exynos_iommu_domain_alloc,
 	.device_group = generic_device_group,
 #ifdef CONFIG_ARM
-	.set_platform_dma_ops = exynos_iommu_release_device,
+	.set_platform_dma_ops = exynos_iommu_set_platform_dma,
 #endif
 	.probe_device = exynos_iommu_probe_device,
 	.release_device = exynos_iommu_release_device,

From b57841fb0b564c61508222e885ac8f30a2811089 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Tue, 28 Mar 2023 20:43:26 +0200
Subject: [PATCH 067/173] thermal: core: Drop excessive lockdep_assert_held()
 calls

The lockdep_assert_held() calls added to cooling_device_stats_setup()
and cooling_device_stats_destroy() by commit 790930f44289 ("thermal:
core: Introduce thermal_cooling_device_update()") trigger false-positive
lockdep reports in code paths that are not subject to race conditions
(before cooling device registration and after cooling device removal).

For this reason, remove the lockdep_assert_held() calls from both
cooling_device_stats_setup() and cooling_device_stats_destroy() and
add one to thermal_cooling_device_stats_reinit() that has to be called
under the cdev lock.

Fixes: 790930f44289 ("thermal: core: Introduce thermal_cooling_device_update()")
Link: https://lore.kernel.org/linux-acpi/ZCIDTLFt27Ei7+V6@ideak-desk.fi.intel.com
Reported-by: Imre Deak <imre.deak@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/thermal/thermal_sysfs.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/thermal/thermal_sysfs.c b/drivers/thermal/thermal_sysfs.c
index a4aba7b8bb8b..6c20c9f90a05 100644
--- a/drivers/thermal/thermal_sysfs.c
+++ b/drivers/thermal/thermal_sysfs.c
@@ -876,8 +876,6 @@ static void cooling_device_stats_setup(struct thermal_cooling_device *cdev)
 	unsigned long states = cdev->max_state + 1;
 	int var;
 
-	lockdep_assert_held(&cdev->lock);
-
 	var = sizeof(*stats);
 	var += sizeof(*stats->time_in_state) * states;
 	var += sizeof(*stats->trans_table) * states * states;
@@ -903,8 +901,6 @@ out:
 
 static void cooling_device_stats_destroy(struct thermal_cooling_device *cdev)
 {
-	lockdep_assert_held(&cdev->lock);
-
 	kfree(cdev->stats);
 	cdev->stats = NULL;
 }
@@ -931,6 +927,8 @@ void thermal_cooling_device_destroy_sysfs(struct thermal_cooling_device *cdev)
 
 void thermal_cooling_device_stats_reinit(struct thermal_cooling_device *cdev)
 {
+	lockdep_assert_held(&cdev->lock);
+
 	cooling_device_stats_destroy(cdev);
 	cooling_device_stats_setup(cdev);
 }

From 2280d425ba3599bdd85c41bd0ec8ba568f00c032 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Tue, 28 Mar 2023 10:45:20 +0100
Subject: [PATCH 068/173] btrfs: ignore fiemap path cache when there are
 multiple paths for a node

During fiemap, when walking backreferences to determine if a b+tree
node/leaf is shared, we may find a tree block (leaf or node) for which
two parents were added to the references ulist. This happens if we get
for example one direct ref (shared tree block ref) and one indirect ref
(non-shared tree block ref) for the tree block at the current level,
which can happen during relocation.

In that case the fiemap path cache can not be used since it's meant for
a single path, with one tree block at each possible level, so having
multiple references for a tree block at any level may result in getting
the level counter exceed BTRFS_MAX_LEVEL and eventually trigger the
warning:

   WARN_ON_ONCE(level >= BTRFS_MAX_LEVEL)

at lookup_backref_shared_cache() and at store_backref_shared_cache().
This is harmless since the code ignores any level >= BTRFS_MAX_LEVEL, the
warning is there just to catch any unexpected case like the one described
above. However if a user finds this it may be scary and get reported.

So just ignore the path cache once we find a tree block for which there
are more than one reference, which is the less common case, and update
the cache with the sharedness check result for all levels below the level
for which we found multiple references.

Reported-by: Jarno Pelkonen <jarno.pelkonen@gmail.com>
Link: https://lore.kernel.org/linux-btrfs/CAKv8qLmDNAGJGCtsevxx_VZ_YOvvs1L83iEJkTgyA4joJertng@mail.gmail.com/
Fixes: 12a824dc67a6 ("btrfs: speedup checking for extent sharedness during fiemap")
CC: stable@vger.kernel.org # 6.1+
Signed-off-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/backref.c | 85 ++++++++++++++++++++++++++++++++++------------
 1 file changed, 63 insertions(+), 22 deletions(-)

diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index 90e40d5ceccd..e54f0884802a 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -1921,8 +1921,7 @@ int btrfs_is_data_extent_shared(struct btrfs_inode *inode, u64 bytenr,
 	level = -1;
 	ULIST_ITER_INIT(&uiter);
 	while (1) {
-		bool is_shared;
-		bool cached;
+		const unsigned long prev_ref_count = ctx->refs.nnodes;
 
 		walk_ctx.bytenr = bytenr;
 		ret = find_parent_nodes(&walk_ctx, &shared);
@@ -1940,21 +1939,36 @@ int btrfs_is_data_extent_shared(struct btrfs_inode *inode, u64 bytenr,
 		ret = 0;
 
 		/*
-		 * If our data extent was not directly shared (without multiple
-		 * reference items), than it might have a single reference item
-		 * with a count > 1 for the same offset, which means there are 2
-		 * (or more) file extent items that point to the data extent -
-		 * this happens when a file extent item needs to be split and
-		 * then one item gets moved to another leaf due to a b+tree leaf
-		 * split when inserting some item. In this case the file extent
-		 * items may be located in different leaves and therefore some
-		 * of the leaves may be referenced through shared subtrees while
-		 * others are not. Since our extent buffer cache only works for
-		 * a single path (by far the most common case and simpler to
-		 * deal with), we can not use it if we have multiple leaves
-		 * (which implies multiple paths).
+		 * More than one extent buffer (bytenr) may have been added to
+		 * the ctx->refs ulist, in which case we have to check multiple
+		 * tree paths in case the first one is not shared, so we can not
+		 * use the path cache which is made for a single path. Multiple
+		 * extent buffers at the current level happen when:
+		 *
+		 * 1) level -1, the data extent: If our data extent was not
+		 *    directly shared (without multiple reference items), then
+		 *    it might have a single reference item with a count > 1 for
+		 *    the same offset, which means there are 2 (or more) file
+		 *    extent items that point to the data extent - this happens
+		 *    when a file extent item needs to be split and then one
+		 *    item gets moved to another leaf due to a b+tree leaf split
+		 *    when inserting some item. In this case the file extent
+		 *    items may be located in different leaves and therefore
+		 *    some of the leaves may be referenced through shared
+		 *    subtrees while others are not. Since our extent buffer
+		 *    cache only works for a single path (by far the most common
+		 *    case and simpler to deal with), we can not use it if we
+		 *    have multiple leaves (which implies multiple paths).
+		 *
+		 * 2) level >= 0, a tree node/leaf: We can have a mix of direct
+		 *    and indirect references on a b+tree node/leaf, so we have
+		 *    to check multiple paths, and the extent buffer (the
+		 *    current bytenr) may be shared or not. One example is
+		 *    during relocation as we may get a shared tree block ref
+		 *    (direct ref) and a non-shared tree block ref (indirect
+		 *    ref) for the same node/leaf.
 		 */
-		if (level == -1 && ctx->refs.nnodes > 1)
+		if ((ctx->refs.nnodes - prev_ref_count) > 1)
 			ctx->use_path_cache = false;
 
 		if (level >= 0)
@@ -1964,18 +1978,45 @@ int btrfs_is_data_extent_shared(struct btrfs_inode *inode, u64 bytenr,
 		if (!node)
 			break;
 		bytenr = node->val;
-		level++;
-		cached = lookup_backref_shared_cache(ctx, root, bytenr, level,
-						     &is_shared);
-		if (cached) {
-			ret = (is_shared ? 1 : 0);
-			break;
+		if (ctx->use_path_cache) {
+			bool is_shared;
+			bool cached;
+
+			level++;
+			cached = lookup_backref_shared_cache(ctx, root, bytenr,
+							     level, &is_shared);
+			if (cached) {
+				ret = (is_shared ? 1 : 0);
+				break;
+			}
 		}
 		shared.share_count = 0;
 		shared.have_delayed_delete_refs = false;
 		cond_resched();
 	}
 
+	/*
+	 * If the path cache is disabled, then it means at some tree level we
+	 * got multiple parents due to a mix of direct and indirect backrefs or
+	 * multiple leaves with file extent items pointing to the same data
+	 * extent. We have to invalidate the cache and cache only the sharedness
+	 * result for the levels where we got only one node/reference.
+	 */
+	if (!ctx->use_path_cache) {
+		int i = 0;
+
+		level--;
+		if (ret >= 0 && level >= 0) {
+			bytenr = ctx->path_cache_entries[level].bytenr;
+			ctx->use_path_cache = true;
+			store_backref_shared_cache(ctx, root, bytenr, level, ret);
+			i = level + 1;
+		}
+
+		for ( ; i < BTRFS_MAX_LEVEL; i++)
+			ctx->path_cache_entries[i].bytenr = 0;
+	}
+
 	/*
 	 * Cache the sharedness result for the data extent if we know our inode
 	 * has more than 1 file extent item that refers to the data extent.

From e4056e38ec87b4c21eb34bb8e38b1b0ca1221744 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Date: Fri, 24 Mar 2023 09:41:27 +0100
Subject: [PATCH 069/173] dt-bindings: pinctrl: qcom,sm8550-lpass-lpi: allow
 input-enabled and bias-bus-hold

Add missing common pin configuration properties: input-enabled and
bias-bus-hold.

Fixes: 268e97ccc311 ("dt-bindings: pinctrl: qcom,sm8550-lpass-lpi-pinctrl: add SM8550 LPASS")
Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Link: https://lore.kernel.org/r/20230324084127.29362-1-krzysztof.kozlowski@linaro.org
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 .../bindings/pinctrl/qcom,sm8550-lpass-lpi-pinctrl.yaml         | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/Documentation/devicetree/bindings/pinctrl/qcom,sm8550-lpass-lpi-pinctrl.yaml b/Documentation/devicetree/bindings/pinctrl/qcom,sm8550-lpass-lpi-pinctrl.yaml
index 5e90051ed314..8f60a9113e7a 100644
--- a/Documentation/devicetree/bindings/pinctrl/qcom,sm8550-lpass-lpi-pinctrl.yaml
+++ b/Documentation/devicetree/bindings/pinctrl/qcom,sm8550-lpass-lpi-pinctrl.yaml
@@ -96,9 +96,11 @@ $defs:
           2: Lower Slew rate (slower edges)
           3: Reserved (No adjustments)
 
+      bias-bus-hold: true
       bias-pull-down: true
       bias-pull-up: true
       bias-disable: true
+      input-enable: true
       output-high: true
       output-low: true
 

From 5c2712387d4850e0b64121d5fd3e6c4e84ea3266 Mon Sep 17 00:00:00 2001
From: Yicong Yang <yangyicong@hisilicon.com>
Date: Tue, 28 Mar 2023 19:49:15 +0800
Subject: [PATCH 070/173] cacheinfo: Fix LLC is not exported through sysfs

After entering 6.3-rc1 the LLC cacheinfo is not exported on our ACPI
based arm64 server. This is because the LLC cacheinfo is partly reset
when secondary CPUs boot up. On arm64 the primary cpu will allocate
and setup cacheinfo:
init_cpu_topology()
  for_each_possible_cpu()
    fetch_cache_info() // Allocate cacheinfo and init levels
detect_cache_attributes()
  cache_shared_cpu_map_setup()
    if (!last_level_cache_is_valid()) // not valid, setup LLC
      cache_setup_properties() // setup LLC

On secondary CPU boot up:
detect_cache_attributes()
  populate_cache_leaves()
    get_cache_type() // Get cache type from clidr_el1,
                     // for LLC type=CACHE_TYPE_NOCACHE
  cache_shared_cpu_map_setup()
    if (!last_level_cache_is_valid()) // Valid and won't go to this branch,
                                      // leave LLC's type=CACHE_TYPE_NOCACHE

The last_level_cache_is_valid() use cacheinfo->{attributes, fw_token} to
test it's valid or not, but populate_cache_leaves() will only reset
LLC's type, so we won't try to re-setup LLC's type and leave it
CACHE_TYPE_NOCACHE and won't export it through sysfs.

This patch tries to fix this by not re-populating the cache leaves if
the LLC is valid.

Fixes: 5944ce092b97 ("arch_topology: Build cacheinfo from primary CPU")
Signed-off-by: Yicong Yang <yangyicong@hisilicon.com>
Reviewed-by: Pierre Gondois <pierre.gondois@arm.com>
Reviewed-by: Sudeep Holla <sudeep.holla@arm.com>
Link: https://lore.kernel.org/r/20230328114915.33340-1-yangyicong@huawei.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/base/cacheinfo.c | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/drivers/base/cacheinfo.c b/drivers/base/cacheinfo.c
index f6573c335f4c..f3903d002819 100644
--- a/drivers/base/cacheinfo.c
+++ b/drivers/base/cacheinfo.c
@@ -474,12 +474,18 @@ int detect_cache_attributes(unsigned int cpu)
 
 populate_leaves:
 	/*
-	 * populate_cache_leaves() may completely setup the cache leaves and
-	 * shared_cpu_map or it may leave it partially setup.
+	 * If LLC is valid the cache leaves were already populated so just go to
+	 * update the cpu map.
 	 */
-	ret = populate_cache_leaves(cpu);
-	if (ret)
-		goto free_ci;
+	if (!last_level_cache_is_valid(cpu)) {
+		/*
+		 * populate_cache_leaves() may completely setup the cache leaves and
+		 * shared_cpu_map or it may leave it partially setup.
+		 */
+		ret = populate_cache_leaves(cpu);
+		if (ret)
+			goto free_ci;
+	}
 
 	/*
 	 * For systems using DT for cache hierarchy, fw_token

From eca9f6e6f83b6725b84e1c76fdde19b003cff0eb Mon Sep 17 00:00:00 2001
From: Haren Myneni <haren@linux.ibm.com>
Date: Mon, 20 Mar 2023 19:50:08 -0700
Subject: [PATCH 071/173] powerpc/pseries/vas: Ignore VAS update for DLPAR if
 copy/paste is not enabled

The hypervisor supports user-mode NX from Power10.

pseries_vas_dlpar_cpu() is called from lparcfg_write() to update VAS
windows for DLPAR event in shared processor mode and the kernel gets
-ENOTSUPP for HCALLs if the user-mode NX is not supported. The current
VAS implementation also supports only with Radix page tables. Whereas in
dedicated processor mode, pseries_vas_notifier() is registered only if
the copy/paste feature is enabled. So instead of displaying HCALL error
messages, update VAS capabilities if the copy/paste feature is
available.

This patch ignores updating VAS capabilities in pseries_vas_dlpar_cpu()
and returns success if the copy/paste feature is not enabled. Then
lparcfg_write() completes the processor DLPAR operations without any
failures.

Fixes: 2147783d6bf0 ("powerpc/pseries: Use lparcfg to reconfig VAS windows for DLPAR CPU")
Cc: stable@vger.kernel.org # v6.1+
Signed-off-by: Haren Myneni <haren@linux.ibm.com>
Reviewed-by: Nathan Lynch <nathanl@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://msgid.link/1d0e727e7dbd9a28627ef08ca9df9c86a50175e2.camel@linux.ibm.com
---
 arch/powerpc/platforms/pseries/vas.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/arch/powerpc/platforms/pseries/vas.c b/arch/powerpc/platforms/pseries/vas.c
index 559112312810..513180467562 100644
--- a/arch/powerpc/platforms/pseries/vas.c
+++ b/arch/powerpc/platforms/pseries/vas.c
@@ -856,6 +856,13 @@ int pseries_vas_dlpar_cpu(void)
 {
 	int new_nr_creds, rc;
 
+	/*
+	 * NX-GZIP is not enabled. Nothing to do for DLPAR event
+	 */
+	if (!copypaste_feat)
+		return 0;
+
+
 	rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES,
 				      vascaps[VAS_GZIP_DEF_FEAT_TYPE].feat,
 				      (u64)virt_to_phys(&hv_cop_caps));
@@ -1012,6 +1019,7 @@ static int __init pseries_vas_init(void)
 	 * Linux supports user space COPY/PASTE only with Radix
 	 */
 	if (!radix_enabled()) {
+		copypaste_feat = false;
 		pr_err("API is supported only with radix page tables\n");
 		return -ENOTSUPP;
 	}

From e3720f92e0237921da537e47a0b24e27899203f8 Mon Sep 17 00:00:00 2001
From: Guennadi Liakhovetski <guennadi.liakhovetski@linux.intel.com>
Date: Wed, 29 Mar 2023 14:38:28 +0300
Subject: [PATCH 072/173] ASoC: SOF: avoid a NULL dereference with unsupported
 widgets

If an IPC4 topology contains an unsupported widget, its .module_info
field won't be set, then sof_ipc4_route_setup() will cause a kernel
Oops trying to dereference it. Add a check for such cases.

Cc: stable@vger.kernel.org # 6.2
Signed-off-by: Guennadi Liakhovetski <guennadi.liakhovetski@linux.intel.com>
Signed-off-by: Peter Ujfalusi <peter.ujfalusi@linux.intel.com>
Link: https://lore.kernel.org/r/20230329113828.28562-1-peter.ujfalusi@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/sof/ipc4-topology.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/sound/soc/sof/ipc4-topology.c b/sound/soc/sof/ipc4-topology.c
index a623707c8ffc..669b99a4f76e 100644
--- a/sound/soc/sof/ipc4-topology.c
+++ b/sound/soc/sof/ipc4-topology.c
@@ -1805,6 +1805,14 @@ static int sof_ipc4_route_setup(struct snd_sof_dev *sdev, struct snd_sof_route *
 	u32 header, extension;
 	int ret;
 
+	if (!src_fw_module || !sink_fw_module) {
+		/* The NULL module will print as "(efault)" */
+		dev_err(sdev->dev, "source %s or sink %s widget weren't set up properly\n",
+			src_fw_module->man4_module_entry.name,
+			sink_fw_module->man4_module_entry.name);
+		return -ENODEV;
+	}
+
 	sroute->src_queue_id = sof_ipc4_get_queue_id(src_widget, sink_widget,
 						     SOF_PIN_TYPE_SOURCE);
 	if (sroute->src_queue_id < 0) {

From 4ff0b50de8cabba055efe50bbcb7506c41a69835 Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Wed, 29 Mar 2023 15:03:43 +0100
Subject: [PATCH 073/173] io_uring/rsrc: fix rogue rsrc node grabbing

We should not be looking at ctx->rsrc_node and anyhow modifying the node
without holding uring_lock, grabbing references in such a way is not
safe either.

Cc: stable@vger.kernel.org
Fixes: 5106dd6e74ab6 ("io_uring: propagate issue_flags state down to file assignment")
Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/1202ede2d7bb90136e3482b2b84aad9ed483e5d6.1680098433.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 io_uring/rsrc.h | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/io_uring/rsrc.h b/io_uring/rsrc.h
index 2b8743645efc..f27f4975217d 100644
--- a/io_uring/rsrc.h
+++ b/io_uring/rsrc.h
@@ -144,15 +144,13 @@ static inline void io_req_set_rsrc_node(struct io_kiocb *req,
 					unsigned int issue_flags)
 {
 	if (!req->rsrc_node) {
+		io_ring_submit_lock(ctx, issue_flags);
+
+		lockdep_assert_held(&ctx->uring_lock);
+
 		req->rsrc_node = ctx->rsrc_node;
-
-		if (!(issue_flags & IO_URING_F_UNLOCKED)) {
-			lockdep_assert_held(&ctx->uring_lock);
-
-			io_charge_rsrc_node(ctx);
-		} else {
-			percpu_ref_get(&req->rsrc_node->refs);
-		}
+		io_charge_rsrc_node(ctx);
+		io_ring_submit_unlock(ctx, issue_flags);
 	}
 }
 

From 433279beba1d4872da10b7b60a539e0cb828b32b Mon Sep 17 00:00:00 2001
From: Yu Kuai <yukuai3@huawei.com>
Date: Tue, 28 Mar 2023 17:44:00 +0800
Subject: [PATCH 074/173] md: fix regression for null-ptr-deference in
 __md_stop()

Commit 3e453522593d ("md: Free resources in __md_stop") tried to fix
null-ptr-deference for 'active_io' by moving percpu_ref_exit() to
__md_stop(), however, the commit also moving 'writes_pending' to
__md_stop(), and this will cause mdadm tests broken:

BUG: kernel NULL pointer dereference, address: 0000000000000038
Oops: 0000 [#1] PREEMPT SMP
CPU: 15 PID: 17830 Comm: mdadm Not tainted 6.3.0-rc3-next-20230324-00009-g520d37
RIP: 0010:free_percpu+0x465/0x670
Call Trace:
 <TASK>
 __percpu_ref_exit+0x48/0x70
 percpu_ref_exit+0x1a/0x90
 __md_stop+0xe9/0x170
 do_md_stop+0x1e1/0x7b0
 md_ioctl+0x90c/0x1aa0
 blkdev_ioctl+0x19b/0x400
 vfs_ioctl+0x20/0x50
 __x64_sys_ioctl+0xba/0xe0
 do_syscall_64+0x6c/0xe0
 entry_SYSCALL_64_after_hwframe+0x63/0xcd

And the problem can be reporduced 100% by following test:

mdadm -CR /dev/md0 -l1 -n1 /dev/sda --force
echo inactive > /sys/block/md0/md/array_state
echo read-auto  > /sys/block/md0/md/array_state
echo inactive > /sys/block/md0/md/array_state

Root cause:

// start raid
raid1_run
 mddev_init_writes_pending
  percpu_ref_init

// inactive raid
array_state_store
 do_md_stop
  __md_stop
   percpu_ref_exit

// start raid again
array_state_store
 do_md_run
  raid1_run
   mddev_init_writes_pending
    if (mddev->writes_pending.percpu_count_ptr)
    // won't reinit

// inactive raid again
...
percpu_ref_exit
-> null-ptr-deference

Before the commit, 'writes_pending' is exited when mddev is freed, and
it's safe to restart raid because mddev_init_writes_pending() already make
sure that 'writes_pending' will only be initialized once.

Fix the prblem by moving 'writes_pending' back, it's a litter hard to find
the relationship between alloc memory and free memory, however, code
changes is much less and we lived with this for a long time already.

Fixes: 3e453522593d ("md: Free resources in __md_stop")
Signed-off-by: Yu Kuai <yukuai3@huawei.com>
Reviewed-by: Xiao Ni <xni@redhat.com>
Signed-off-by: Song Liu <song@kernel.org>
Link: https://lore.kernel.org/r/20230328094400.1448955-1-yukuai1@huaweicloud.com
---
 drivers/md/md.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/md/md.c b/drivers/md/md.c
index 39e49e5d7182..13321dbb5fbc 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -6260,7 +6260,6 @@ static void __md_stop(struct mddev *mddev)
 	module_put(pers->owner);
 	clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
 
-	percpu_ref_exit(&mddev->writes_pending);
 	percpu_ref_exit(&mddev->active_io);
 	bioset_exit(&mddev->bio_set);
 	bioset_exit(&mddev->sync_set);
@@ -6273,6 +6272,7 @@ void md_stop(struct mddev *mddev)
 	 */
 	__md_stop_writes(mddev);
 	__md_stop(mddev);
+	percpu_ref_exit(&mddev->writes_pending);
 }
 
 EXPORT_SYMBOL_GPL(md_stop);
@@ -7843,6 +7843,7 @@ static void md_free_disk(struct gendisk *disk)
 {
 	struct mddev *mddev = disk->private_data;
 
+	percpu_ref_exit(&mddev->writes_pending);
 	mddev_free(mddev);
 }
 

From a57cc2dbb3738930d9cb361b9b473f90c8ede0b8 Mon Sep 17 00:00:00 2001
From: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Date: Wed, 29 Mar 2023 08:22:07 -0700
Subject: [PATCH 075/173] thermal: intel: int340x: processor_thermal: Fix
 additional deadlock

Commit 52f04f10b900 ("thermal: intel: int340x: processor_thermal: Fix
deadlock") addressed deadlock issue during user space trip update. But it
missed a case when thermal zone device is disabled when user writes 0.

Call to thermal_zone_device_disable() also causes deadlock as it also
tries to lock tz->lock, which is already claimed by trip_point_temp_store()
in the thermal core code.

Remove call to thermal_zone_device_disable() in the function
sys_set_trip_temp(), which is called from trip_point_temp_store().

Fixes: 52f04f10b900 ("thermal: intel: int340x: processor_thermal: Fix deadlock")
Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Cc: 6.2+ <stable@vger.kernel.org> # 6.2+
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 .../thermal/intel/int340x_thermal/processor_thermal_device_pci.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/thermal/intel/int340x_thermal/processor_thermal_device_pci.c b/drivers/thermal/intel/int340x_thermal/processor_thermal_device_pci.c
index 90526f46c9b1..d71ee50e7878 100644
--- a/drivers/thermal/intel/int340x_thermal/processor_thermal_device_pci.c
+++ b/drivers/thermal/intel/int340x_thermal/processor_thermal_device_pci.c
@@ -153,7 +153,6 @@ static int sys_set_trip_temp(struct thermal_zone_device *tzd, int trip, int temp
 		cancel_delayed_work_sync(&pci_info->work);
 		proc_thermal_mmio_write(pci_info, PROC_THERMAL_MMIO_INT_ENABLE_0, 0);
 		proc_thermal_mmio_write(pci_info, PROC_THERMAL_MMIO_THRES_0, 0);
-		thermal_zone_device_disable(tzd);
 		pci_info->stored_thres = 0;
 		return 0;
 	}

From 1aa866931b8026a0dd636e9ef7b5c5dfb4cc5ce8 Mon Sep 17 00:00:00 2001
From: Conor Dooley <conor.dooley@microchip.com>
Date: Fri, 24 Mar 2023 10:05:38 +0000
Subject: [PATCH 076/173] RISC-V: add non-alternative fallback for
 riscv_has_extension_[un]likely()

The has_fpu() check, which in turn calls riscv_has_extension_likely(),
relies on alternatives to figure out whether the system has an FPU.
As a result, it will malfunction on XIP kernels, as they do not support
the alternatives mechanism.

When alternatives support is not present, fall back to using
__riscv_isa_extension_available() in riscv_has_extension_[un]likely()
instead stead, which handily takes the same argument, so that kernels
that do not support alternatives can accurately report the presence of
FPU support.

Fixes: 702e64550b12 ("riscv: fpu: switch has_fpu() to riscv_has_extension_likely()")
Link: https://lore.kernel.org/all/ad445951-3d13-4644-94d9-e0989cda39c3@spud/
Signed-off-by: Conor Dooley <conor.dooley@microchip.com>
Reviewed-by: Andrew Jones <ajones@ventanamicro.com>
Reviewed-by: Jason A. Donenfeld <Jason@zx2c4.com>
Link: https://lore.kernel.org/r/20230324100538.3514663-2-conor.dooley@microchip.com
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
---
 arch/riscv/include/asm/hwcap.h | 50 ++++++++++++++++++++--------------
 1 file changed, 30 insertions(+), 20 deletions(-)

diff --git a/arch/riscv/include/asm/hwcap.h b/arch/riscv/include/asm/hwcap.h
index e3021b2590de..6263a0de1c6a 100644
--- a/arch/riscv/include/asm/hwcap.h
+++ b/arch/riscv/include/asm/hwcap.h
@@ -57,18 +57,31 @@ struct riscv_isa_ext_data {
 	unsigned int isa_ext_id;
 };
 
+unsigned long riscv_isa_extension_base(const unsigned long *isa_bitmap);
+
+#define riscv_isa_extension_mask(ext) BIT_MASK(RISCV_ISA_EXT_##ext)
+
+bool __riscv_isa_extension_available(const unsigned long *isa_bitmap, int bit);
+#define riscv_isa_extension_available(isa_bitmap, ext)	\
+	__riscv_isa_extension_available(isa_bitmap, RISCV_ISA_EXT_##ext)
+
 static __always_inline bool
 riscv_has_extension_likely(const unsigned long ext)
 {
 	compiletime_assert(ext < RISCV_ISA_EXT_MAX,
 			   "ext must be < RISCV_ISA_EXT_MAX");
 
-	asm_volatile_goto(
-	ALTERNATIVE("j	%l[l_no]", "nop", 0, %[ext], 1)
-	:
-	: [ext] "i" (ext)
-	:
-	: l_no);
+	if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE)) {
+		asm_volatile_goto(
+		ALTERNATIVE("j	%l[l_no]", "nop", 0, %[ext], 1)
+		:
+		: [ext] "i" (ext)
+		:
+		: l_no);
+	} else {
+		if (!__riscv_isa_extension_available(NULL, ext))
+			goto l_no;
+	}
 
 	return true;
 l_no:
@@ -81,26 +94,23 @@ riscv_has_extension_unlikely(const unsigned long ext)
 	compiletime_assert(ext < RISCV_ISA_EXT_MAX,
 			   "ext must be < RISCV_ISA_EXT_MAX");
 
-	asm_volatile_goto(
-	ALTERNATIVE("nop", "j	%l[l_yes]", 0, %[ext], 1)
-	:
-	: [ext] "i" (ext)
-	:
-	: l_yes);
+	if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE)) {
+		asm_volatile_goto(
+		ALTERNATIVE("nop", "j	%l[l_yes]", 0, %[ext], 1)
+		:
+		: [ext] "i" (ext)
+		:
+		: l_yes);
+	} else {
+		if (__riscv_isa_extension_available(NULL, ext))
+			goto l_yes;
+	}
 
 	return false;
 l_yes:
 	return true;
 }
 
-unsigned long riscv_isa_extension_base(const unsigned long *isa_bitmap);
-
-#define riscv_isa_extension_mask(ext) BIT_MASK(RISCV_ISA_EXT_##ext)
-
-bool __riscv_isa_extension_available(const unsigned long *isa_bitmap, int bit);
-#define riscv_isa_extension_available(isa_bitmap, ext)	\
-	__riscv_isa_extension_available(isa_bitmap, RISCV_ISA_EXT_##ext)
-
 #endif
 
 #endif /* _ASM_RISCV_HWCAP_H */

From 1ee7fc3f4d0a93831a20d5566f203d5ad6d44de8 Mon Sep 17 00:00:00 2001
From: Conor Dooley <conor.dooley@microchip.com>
Date: Fri, 24 Mar 2023 10:05:39 +0000
Subject: [PATCH 077/173] RISC-V: always select RISCV_ALTERNATIVE for non-xip
 kernels

When moving switch_to's has_fpu() over to using
riscv_has_extension_likely() rather than static branches, the FPU code
gained a dependency on the alternatives framework.
That dependency has now been removed, as riscv_has_extension_ikely() now
contains a fallback path, using __riscv_isa_extension_available(), but
if CONFIG_RISCV_ALTERNATIVE isn't selected when CONFIG_FPU is, has_fpu()
checks will not benefit from the "fast path" that the alternatives
framework provides.

We want to ensure that alternatives are available whenever
riscv_has_extension_[un]likely() is used, rather than silently falling
back to the slow path, but rather than rely on selecting
RISCV_ALTERNATIVE in the myriad of locations that may use
riscv_has_extension_[un]likely(), select it (almost) always instead by
adding it to the main RISCV config entry.
xip kernels cannot make use of the alternatives framework, so it is not
enabled for those configurations, although this is the status quo.

All current sites that select RISCV_ALTERNATIVE are converted to
dependencies on the option instead. The explicit dependencies on
!XIP_KERNEL can be dropped, as RISCV_ALTERNATIVE is not user selectable.

Fixes: 702e64550b12 ("riscv: fpu: switch has_fpu() to riscv_has_extension_likely()")
Link: https://lore.kernel.org/all/ZBruFRwt3rUVngPu@zx2c4.com/
Reported-by: Jason A. Donenfeld <Jason@zx2c4.com>
Signed-off-by: Conor Dooley <conor.dooley@microchip.com>
Reviewed-by: Andrew Jones <ajones@ventanamicro.com>
Reviewed-by: Jason A. Donenfeld <Jason@zx2c4.com>
Link: https://lore.kernel.org/r/20230324100538.3514663-3-conor.dooley@microchip.com
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
---
 arch/riscv/Kconfig         | 12 ++++++------
 arch/riscv/Kconfig.erratas |  6 ++----
 2 files changed, 8 insertions(+), 10 deletions(-)

diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index c5e42cc37604..2f6976418d0a 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -126,6 +126,7 @@ config RISCV
 	select OF_IRQ
 	select PCI_DOMAINS_GENERIC if PCI
 	select PCI_MSI if PCI
+	select RISCV_ALTERNATIVE if !XIP_KERNEL
 	select RISCV_INTC
 	select RISCV_TIMER if RISCV_SBI
 	select SIFIVE_PLIC
@@ -401,9 +402,8 @@ config RISCV_ISA_C
 config RISCV_ISA_SVPBMT
 	bool "SVPBMT extension support"
 	depends on 64BIT && MMU
-	depends on !XIP_KERNEL
+	depends on RISCV_ALTERNATIVE
 	default y
-	select RISCV_ALTERNATIVE
 	help
 	   Adds support to dynamically detect the presence of the SVPBMT
 	   ISA-extension (Supervisor-mode: page-based memory types) and
@@ -428,8 +428,8 @@ config TOOLCHAIN_HAS_ZBB
 config RISCV_ISA_ZBB
 	bool "Zbb extension support for bit manipulation instructions"
 	depends on TOOLCHAIN_HAS_ZBB
-	depends on !XIP_KERNEL && MMU
-	select RISCV_ALTERNATIVE
+	depends on MMU
+	depends on RISCV_ALTERNATIVE
 	default y
 	help
 	   Adds support to dynamically detect the presence of the ZBB
@@ -443,9 +443,9 @@ config RISCV_ISA_ZBB
 
 config RISCV_ISA_ZICBOM
 	bool "Zicbom extension support for non-coherent DMA operation"
-	depends on !XIP_KERNEL && MMU
+	depends on MMU
+	depends on RISCV_ALTERNATIVE
 	default y
-	select RISCV_ALTERNATIVE
 	select RISCV_DMA_NONCOHERENT
 	help
 	   Adds support to dynamically detect the presence of the ZICBOM
diff --git a/arch/riscv/Kconfig.erratas b/arch/riscv/Kconfig.erratas
index 69621ae6d647..0c8f4652cd82 100644
--- a/arch/riscv/Kconfig.erratas
+++ b/arch/riscv/Kconfig.erratas
@@ -2,8 +2,7 @@ menu "CPU errata selection"
 
 config ERRATA_SIFIVE
 	bool "SiFive errata"
-	depends on !XIP_KERNEL
-	select RISCV_ALTERNATIVE
+	depends on RISCV_ALTERNATIVE
 	help
 	  All SiFive errata Kconfig depend on this Kconfig. Disabling
 	  this Kconfig will disable all SiFive errata. Please say "Y"
@@ -35,8 +34,7 @@ config ERRATA_SIFIVE_CIP_1200
 
 config ERRATA_THEAD
 	bool "T-HEAD errata"
-	depends on !XIP_KERNEL
-	select RISCV_ALTERNATIVE
+	depends on RISCV_ALTERNATIVE
 	help
 	  All T-HEAD errata Kconfig depend on this Kconfig. Disabling
 	  this Kconfig will disable all T-HEAD errata. Please say "Y"

From 88eaba80328b31ef81813a1207b4056efd7006a6 Mon Sep 17 00:00:00 2001
From: Sagi Grimberg <sagi@grimberg.me>
Date: Mon, 20 Mar 2023 15:33:34 +0200
Subject: [PATCH 078/173] nvme-tcp: fix a possible UAF when failing to allocate
 an io queue

When we allocate a nvme-tcp queue, we set the data_ready callback before
we actually need to use it. This creates the potential that if a stray
controller sends us data on the socket before we connect, we can trigger
the io_work and start consuming the socket.

In this case reported: we failed to allocate one of the io queues, and
as we start releasing the queues that we already allocated, we get
a UAF [1] from the io_work which is running before it should really.

Fix this by setting the socket ops callbacks only before we start the
queue, so that we can't accidentally schedule the io_work in the
initialization phase before the queue started. While we are at it,
rename nvme_tcp_restore_sock_calls to pair with nvme_tcp_setup_sock_ops.

[1]:
[16802.107284] nvme nvme4: starting error recovery
[16802.109166] nvme nvme4: Reconnecting in 10 seconds...
[16812.173535] nvme nvme4: failed to connect socket: -111
[16812.173745] nvme nvme4: Failed reconnect attempt 1
[16812.173747] nvme nvme4: Reconnecting in 10 seconds...
[16822.413555] nvme nvme4: failed to connect socket: -111
[16822.413762] nvme nvme4: Failed reconnect attempt 2
[16822.413765] nvme nvme4: Reconnecting in 10 seconds...
[16832.661274] nvme nvme4: creating 32 I/O queues.
[16833.919887] BUG: kernel NULL pointer dereference, address: 0000000000000088
[16833.920068] nvme nvme4: Failed reconnect attempt 3
[16833.920094] #PF: supervisor write access in kernel mode
[16833.920261] nvme nvme4: Reconnecting in 10 seconds...
[16833.920368] #PF: error_code(0x0002) - not-present page
[16833.921086] Workqueue: nvme_tcp_wq nvme_tcp_io_work [nvme_tcp]
[16833.921191] RIP: 0010:_raw_spin_lock_bh+0x17/0x30
...
[16833.923138] Call Trace:
[16833.923271]  <TASK>
[16833.923402]  lock_sock_nested+0x1e/0x50
[16833.923545]  nvme_tcp_try_recv+0x40/0xa0 [nvme_tcp]
[16833.923685]  nvme_tcp_io_work+0x68/0xa0 [nvme_tcp]
[16833.923824]  process_one_work+0x1e8/0x390
[16833.923969]  worker_thread+0x53/0x3d0
[16833.924104]  ? process_one_work+0x390/0x390
[16833.924240]  kthread+0x124/0x150
[16833.924376]  ? set_kthread_struct+0x50/0x50
[16833.924518]  ret_from_fork+0x1f/0x30
[16833.924655]  </TASK>

Reported-by: Yanjun Zhang <zhangyanjun@cestc.cn>
Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
Tested-by: Yanjun Zhang <zhangyanjun@cestc.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/tcp.c | 46 +++++++++++++++++++++++------------------
 1 file changed, 26 insertions(+), 20 deletions(-)

diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index 42c0598c31f2..49c9e7bc9116 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -1620,22 +1620,7 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl, int qid)
 	if (ret)
 		goto err_init_connect;
 
-	queue->rd_enabled = true;
 	set_bit(NVME_TCP_Q_ALLOCATED, &queue->flags);
-	nvme_tcp_init_recv_ctx(queue);
-
-	write_lock_bh(&queue->sock->sk->sk_callback_lock);
-	queue->sock->sk->sk_user_data = queue;
-	queue->state_change = queue->sock->sk->sk_state_change;
-	queue->data_ready = queue->sock->sk->sk_data_ready;
-	queue->write_space = queue->sock->sk->sk_write_space;
-	queue->sock->sk->sk_data_ready = nvme_tcp_data_ready;
-	queue->sock->sk->sk_state_change = nvme_tcp_state_change;
-	queue->sock->sk->sk_write_space = nvme_tcp_write_space;
-#ifdef CONFIG_NET_RX_BUSY_POLL
-	queue->sock->sk->sk_ll_usec = 1;
-#endif
-	write_unlock_bh(&queue->sock->sk->sk_callback_lock);
 
 	return 0;
 
@@ -1655,7 +1640,7 @@ err_destroy_mutex:
 	return ret;
 }
 
-static void nvme_tcp_restore_sock_calls(struct nvme_tcp_queue *queue)
+static void nvme_tcp_restore_sock_ops(struct nvme_tcp_queue *queue)
 {
 	struct socket *sock = queue->sock;
 
@@ -1670,7 +1655,7 @@ static void nvme_tcp_restore_sock_calls(struct nvme_tcp_queue *queue)
 static void __nvme_tcp_stop_queue(struct nvme_tcp_queue *queue)
 {
 	kernel_sock_shutdown(queue->sock, SHUT_RDWR);
-	nvme_tcp_restore_sock_calls(queue);
+	nvme_tcp_restore_sock_ops(queue);
 	cancel_work_sync(&queue->io_work);
 }
 
@@ -1688,21 +1673,42 @@ static void nvme_tcp_stop_queue(struct nvme_ctrl *nctrl, int qid)
 	mutex_unlock(&queue->queue_lock);
 }
 
+static void nvme_tcp_setup_sock_ops(struct nvme_tcp_queue *queue)
+{
+	write_lock_bh(&queue->sock->sk->sk_callback_lock);
+	queue->sock->sk->sk_user_data = queue;
+	queue->state_change = queue->sock->sk->sk_state_change;
+	queue->data_ready = queue->sock->sk->sk_data_ready;
+	queue->write_space = queue->sock->sk->sk_write_space;
+	queue->sock->sk->sk_data_ready = nvme_tcp_data_ready;
+	queue->sock->sk->sk_state_change = nvme_tcp_state_change;
+	queue->sock->sk->sk_write_space = nvme_tcp_write_space;
+#ifdef CONFIG_NET_RX_BUSY_POLL
+	queue->sock->sk->sk_ll_usec = 1;
+#endif
+	write_unlock_bh(&queue->sock->sk->sk_callback_lock);
+}
+
 static int nvme_tcp_start_queue(struct nvme_ctrl *nctrl, int idx)
 {
 	struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl);
+	struct nvme_tcp_queue *queue = &ctrl->queues[idx];
 	int ret;
 
+	queue->rd_enabled = true;
+	nvme_tcp_init_recv_ctx(queue);
+	nvme_tcp_setup_sock_ops(queue);
+
 	if (idx)
 		ret = nvmf_connect_io_queue(nctrl, idx);
 	else
 		ret = nvmf_connect_admin_queue(nctrl);
 
 	if (!ret) {
-		set_bit(NVME_TCP_Q_LIVE, &ctrl->queues[idx].flags);
+		set_bit(NVME_TCP_Q_LIVE, &queue->flags);
 	} else {
-		if (test_bit(NVME_TCP_Q_ALLOCATED, &ctrl->queues[idx].flags))
-			__nvme_tcp_stop_queue(&ctrl->queues[idx]);
+		if (test_bit(NVME_TCP_Q_ALLOCATED, &queue->flags))
+			__nvme_tcp_stop_queue(queue);
 		dev_err(nctrl->device,
 			"failed to connect queue: %d ret=%d\n", idx, ret);
 	}

From fd30d1cdcc4ff405fc54765edf2e11b03f2ed4f3 Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Thu, 30 Mar 2023 06:52:38 -0600
Subject: [PATCH 079/173] io_uring: fix poll/netmsg alloc caches

We increase cache->nr_cached when we free into the cache but don't
decrease when we take from it, so in some time we'll get an empty
cache with cache->nr_cached larger than IO_ALLOC_CACHE_MAX, that fails
io_alloc_cache_put() and effectively disables caching.

Fixes: 9b797a37c4bd8 ("io_uring: add abstraction around apoll cache")
Cc: stable@vger.kernel.org
Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 io_uring/alloc_cache.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/io_uring/alloc_cache.h b/io_uring/alloc_cache.h
index 729793ae9712..c2cde88aeed5 100644
--- a/io_uring/alloc_cache.h
+++ b/io_uring/alloc_cache.h
@@ -27,6 +27,7 @@ static inline struct io_cache_entry *io_alloc_cache_get(struct io_alloc_cache *c
 		struct hlist_node *node = cache->list.first;
 
 		hlist_del(node);
+		cache->nr_cached--;
 		return container_of(node, struct io_cache_entry, node);
 	}
 

From ae817e618d4b5d221daae34d32a39476e4bdcb36 Mon Sep 17 00:00:00 2001
From: David Arcari <darcari@redhat.com>
Date: Thu, 30 Mar 2023 09:42:18 -0400
Subject: [PATCH 080/173] thermal: intel: powerclamp: Fix cpumask and max_idle
 module parameters

When cpumask is specified as a module parameter the value is
overwritten by the module init routine.  This can easily be fixed
by checking to see if the mask has already been allocated in the
init routine.

When max_idle is specified as a module parameter a panic will occur.
The problem is that the idle_injection_cpu_mask is not allocated until
the module init routine executes. This can easily be fixed by allocating
the cpumask if it's not already allocated.

Fixes: ebf519710218 ("thermal: intel: powerclamp: Add two module parameters")
Signed-off-by: David Arcari <darcari@redhat.com>
Reviewed-by: Srinivas Pandruvada<srinivas.pandruvada@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/thermal/intel/intel_powerclamp.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/thermal/intel/intel_powerclamp.c b/drivers/thermal/intel/intel_powerclamp.c
index c7ba5680cd48..91fc7e239497 100644
--- a/drivers/thermal/intel/intel_powerclamp.c
+++ b/drivers/thermal/intel/intel_powerclamp.c
@@ -235,6 +235,12 @@ static int max_idle_set(const char *arg, const struct kernel_param *kp)
 		goto skip_limit_set;
 	}
 
+	if (!cpumask_available(idle_injection_cpu_mask)) {
+		ret = allocate_copy_idle_injection_mask(cpu_present_mask);
+		if (ret)
+			goto skip_limit_set;
+	}
+
 	if (check_invalid(idle_injection_cpu_mask, new_max_idle)) {
 		ret = -EINVAL;
 		goto skip_limit_set;
@@ -791,7 +797,8 @@ static int __init powerclamp_init(void)
 		return retval;
 
 	mutex_lock(&powerclamp_lock);
-	retval = allocate_copy_idle_injection_mask(cpu_present_mask);
+	if (!cpumask_available(idle_injection_cpu_mask))
+		retval = allocate_copy_idle_injection_mask(cpu_present_mask);
 	mutex_unlock(&powerclamp_lock);
 
 	if (retval)

From 179a88a8558bbf42991d361595281f3e45d7edfc Mon Sep 17 00:00:00 2001
From: David Disseldorp <ddiss@suse.de>
Date: Wed, 29 Mar 2023 22:24:06 +0200
Subject: [PATCH 081/173] cifs: fix DFS traversal oops without
 CONFIG_CIFS_DFS_UPCALL

When compiled with CONFIG_CIFS_DFS_UPCALL disabled, cifs_dfs_d_automount
is NULL. cifs.ko logic for mapping CIFS_FATTR_DFS_REFERRAL attributes to
S_AUTOMOUNT and corresponding dentry flags is retained regardless of
CONFIG_CIFS_DFS_UPCALL, leading to a NULL pointer dereference in
VFS follow_automount() when traversing a DFS referral link:
  BUG: kernel NULL pointer dereference, address: 0000000000000000
  ...
  Call Trace:
   <TASK>
   __traverse_mounts+0xb5/0x220
   ? cifs_revalidate_mapping+0x65/0xc0 [cifs]
   step_into+0x195/0x610
   ? lookup_fast+0xe2/0xf0
   path_lookupat+0x64/0x140
   filename_lookup+0xc2/0x140
   ? __create_object+0x299/0x380
   ? kmem_cache_alloc+0x119/0x220
   ? user_path_at_empty+0x31/0x50
   user_path_at_empty+0x31/0x50
   __x64_sys_chdir+0x2a/0xd0
   ? exit_to_user_mode_prepare+0xca/0x100
   do_syscall_64+0x42/0x90
   entry_SYSCALL_64_after_hwframe+0x72/0xdc

This fix adds an inline cifs_dfs_d_automount() {return -EREMOTE} handler
when CONFIG_CIFS_DFS_UPCALL is disabled. An alternative would be to
avoid flagging S_AUTOMOUNT, etc. without CONFIG_CIFS_DFS_UPCALL. This
approach was chosen as it provides more control over the error path.

Signed-off-by: David Disseldorp <ddiss@suse.de>
Cc: stable@vger.kernel.org
Reviewed-by: Paulo Alcantara (SUSE) <pc@manguebit.com>
Reviewed-by: Ronnie Sahlberg <lsahlber@redhat.com>
Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/cifs/cifsfs.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 71fe0a0a7992..415176b2cf32 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -124,7 +124,10 @@ extern const struct dentry_operations cifs_ci_dentry_ops;
 #ifdef CONFIG_CIFS_DFS_UPCALL
 extern struct vfsmount *cifs_dfs_d_automount(struct path *path);
 #else
-#define cifs_dfs_d_automount NULL
+static inline struct vfsmount *cifs_dfs_d_automount(struct path *path)
+{
+	return ERR_PTR(-EREMOTE);
+}
 #endif
 
 /* Functions related to symlinks */

From 6cc041e90c178955219dcee4030bd5423f800f10 Mon Sep 17 00:00:00 2001
From: Paulo Alcantara <pc@manguebit.com>
Date: Wed, 29 Mar 2023 17:14:21 -0300
Subject: [PATCH 082/173] cifs: avoid races in parallel reconnects in smb1

Prevent multiple threads of doing negotiate, session setup and tree
connect by holding @ses->session_mutex in cifs_reconnect_tcon() while
reconnecting session and tcon.

Signed-off-by: Paulo Alcantara (SUSE) <pc@manguebit.com>
Reviewed-by: Ronnie Sahlberg <lsahlber@redhat.com>
Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/cifs/cifssmb.c | 21 +++++++++++++++------
 1 file changed, 15 insertions(+), 6 deletions(-)

diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 38a697eca305..c9d57ba84be4 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -71,7 +71,7 @@ cifs_reconnect_tcon(struct cifs_tcon *tcon, int smb_command)
 	int rc;
 	struct cifs_ses *ses;
 	struct TCP_Server_Info *server;
-	struct nls_table *nls_codepage;
+	struct nls_table *nls_codepage = NULL;
 
 	/*
 	 * SMBs NegProt, SessSetup, uLogoff do not have tcon yet so check for
@@ -99,6 +99,7 @@ cifs_reconnect_tcon(struct cifs_tcon *tcon, int smb_command)
 	}
 	spin_unlock(&tcon->tc_lock);
 
+again:
 	rc = cifs_wait_for_server_reconnect(server, tcon->retry);
 	if (rc)
 		return rc;
@@ -110,8 +111,7 @@ cifs_reconnect_tcon(struct cifs_tcon *tcon, int smb_command)
 	}
 	spin_unlock(&ses->chan_lock);
 
-	nls_codepage = load_nls_default();
-
+	mutex_lock(&ses->session_mutex);
 	/*
 	 * Recheck after acquire mutex. If another thread is negotiating
 	 * and the server never sends an answer the socket will be closed
@@ -120,29 +120,38 @@ cifs_reconnect_tcon(struct cifs_tcon *tcon, int smb_command)
 	spin_lock(&server->srv_lock);
 	if (server->tcpStatus == CifsNeedReconnect) {
 		spin_unlock(&server->srv_lock);
+		mutex_lock(&ses->session_mutex);
+
+		if (tcon->retry)
+			goto again;
 		rc = -EHOSTDOWN;
 		goto out;
 	}
 	spin_unlock(&server->srv_lock);
 
+	nls_codepage = load_nls_default();
+
 	/*
 	 * need to prevent multiple threads trying to simultaneously
 	 * reconnect the same SMB session
 	 */
+	spin_lock(&ses->ses_lock);
 	spin_lock(&ses->chan_lock);
-	if (!cifs_chan_needs_reconnect(ses, server)) {
+	if (!cifs_chan_needs_reconnect(ses, server) &&
+	    ses->ses_status == SES_GOOD) {
 		spin_unlock(&ses->chan_lock);
+		spin_unlock(&ses->ses_lock);
 
 		/* this means that we only need to tree connect */
 		if (tcon->need_reconnect)
 			goto skip_sess_setup;
 
-		rc = -EHOSTDOWN;
+		mutex_unlock(&ses->session_mutex);
 		goto out;
 	}
 	spin_unlock(&ses->chan_lock);
+	spin_unlock(&ses->ses_lock);
 
-	mutex_lock(&ses->session_mutex);
 	rc = cifs_negotiate_protocol(0, ses, server);
 	if (!rc)
 		rc = cifs_setup_session(0, ses, server, nls_codepage);

From 09ba47b44d26b475bbdf9c80db9e0193d2b58956 Mon Sep 17 00:00:00 2001
From: Paulo Alcantara <pc@manguebit.com>
Date: Wed, 29 Mar 2023 17:14:22 -0300
Subject: [PATCH 083/173] cifs: prevent infinite recursion in CIFSGetDFSRefer()

We can't call smb_init() in CIFSGetDFSRefer() as cifs_reconnect_tcon()
may end up calling CIFSGetDFSRefer() again to get new DFS referrals
and thus causing an infinite recursion.

Signed-off-by: Paulo Alcantara (SUSE) <pc@manguebit.com>
Reviewed-by: Ronnie Sahlberg <lsahlber@redhat.com>
Cc: stable@vger.kernel.org # 6.2
Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/cifs/cifssmb.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index c9d57ba84be4..0d30b17494e4 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -4382,8 +4382,13 @@ CIFSGetDFSRefer(const unsigned int xid, struct cifs_ses *ses,
 		return -ENODEV;
 
 getDFSRetry:
-	rc = smb_init(SMB_COM_TRANSACTION2, 15, ses->tcon_ipc, (void **) &pSMB,
-		      (void **) &pSMBr);
+	/*
+	 * Use smb_init_no_reconnect() instead of smb_init() as
+	 * CIFSGetDFSRefer() may be called from cifs_reconnect_tcon() and thus
+	 * causing an infinite recursion.
+	 */
+	rc = smb_init_no_reconnect(SMB_COM_TRANSACTION2, 15, ses->tcon_ipc,
+				   (void **)&pSMB, (void **)&pSMBr);
 	if (rc)
 		return rc;
 

From e03677100707f849f01d8faf07ee58b4e56cdbf1 Mon Sep 17 00:00:00 2001
From: Paulo Alcantara <pc@manguebit.com>
Date: Wed, 29 Mar 2023 17:14:23 -0300
Subject: [PATCH 084/173] cifs: get rid of dead check in smb2_reconnect()

The SMB2_IOCTL check in the switch statement will never be true as we
return earlier from smb2_reconnect() if @smb2_command == SMB2_IOCTL.

Signed-off-by: Paulo Alcantara (SUSE) <pc@manguebit.com>
Reviewed-by: Ronnie Sahlberg <lsahlber@redhat.com>
Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/cifs/smb2pdu.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 6bd2aa6af18f..2b92132097dc 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -310,7 +310,6 @@ out:
 	case SMB2_READ:
 	case SMB2_WRITE:
 	case SMB2_LOCK:
-	case SMB2_IOCTL:
 	case SMB2_QUERY_DIRECTORY:
 	case SMB2_CHANGE_NOTIFY:
 	case SMB2_QUERY_INFO:

From f9d2b1e146e0f82f3d04629afd92698522058361 Mon Sep 17 00:00:00 2001
From: Bobby Eshleman <bobby.eshleman@bytedance.com>
Date: Wed, 29 Mar 2023 16:51:58 +0000
Subject: [PATCH 085/173] virtio/vsock: fix leaks due to missing skb owner

This patch sets the skb owner in the recv and send path for virtio.

For the send path, this solves the leak caused when
virtio_transport_purge_skbs() finds skb->sk is always NULL and therefore
never matches it with the current socket. Setting the owner upon
allocation fixes this.

For the recv path, this ensures correctness of accounting and also
correct transfer of ownership in vsock_loopback (when skbs are sent from
one socket and received by another).

Fixes: 71dc9ec9ac7d ("virtio/vsock: replace virtio_vsock_pkt with sk_buff")
Signed-off-by: Bobby Eshleman <bobby.eshleman@bytedance.com>
Reported-by: Cong Wang <xiyou.wangcong@gmail.com>
Link: https://lore.kernel.org/all/ZCCbATwov4U+GBUv@pop-os.localdomain/
Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/vmw_vsock/virtio_transport_common.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
index 37934dfe72f4..ee78b4082ef9 100644
--- a/net/vmw_vsock/virtio_transport_common.c
+++ b/net/vmw_vsock/virtio_transport_common.c
@@ -94,6 +94,11 @@ virtio_transport_alloc_skb(struct virtio_vsock_pkt_info *info,
 					 info->op,
 					 info->flags);
 
+	if (info->vsk && !skb_set_owner_sk_safe(skb, sk_vsock(info->vsk))) {
+		WARN_ONCE(1, "failed to allocate skb on vsock socket with sk_refcnt == 0\n");
+		goto out;
+	}
+
 	return skb;
 
 out:
@@ -1303,6 +1308,11 @@ void virtio_transport_recv_pkt(struct virtio_transport *t,
 		goto free_pkt;
 	}
 
+	if (!skb_set_owner_sk_safe(skb, sk)) {
+		WARN_ONCE(1, "receiving vsock socket has sk_refcnt == 0\n");
+		goto free_pkt;
+	}
+
 	vsk = vsock_sk(sk);
 
 	lock_sock(sk);

From c7d624520c1bd4e42d8ceb8283d6505fc90acccb Mon Sep 17 00:00:00 2001
From: Lu Baolu <baolu.lu@linux.intel.com>
Date: Wed, 29 Mar 2023 21:47:19 +0800
Subject: [PATCH 086/173] iommu/vt-d: Remove unnecessary locking in
 intel_irq_remapping_alloc()

The global rwsem dmar_global_lock was introduced by commit 3a5670e8ac932
("iommu/vt-d: Introduce a rwsem to protect global data structures"). It
is used to protect DMAR related global data from DMAR hotplug operations.

Using dmar_global_lock in intel_irq_remapping_alloc() is unnecessary as
the DMAR global data structures are not touched there. Remove it to avoid
below lockdep warning.

 ======================================================
 WARNING: possible circular locking dependency detected
 6.3.0-rc2 #468 Not tainted
 ------------------------------------------------------
 swapper/0/1 is trying to acquire lock:
 ff1db4cb40178698 (&domain->mutex){+.+.}-{3:3},
   at: __irq_domain_alloc_irqs+0x3b/0xa0

 but task is already holding lock:
 ffffffffa0c1cdf0 (dmar_global_lock){++++}-{3:3},
   at: intel_iommu_init+0x58e/0x880

 which lock already depends on the new lock.

 the existing dependency chain (in reverse order) is:

 -> #1 (dmar_global_lock){++++}-{3:3}:
        lock_acquire+0xd6/0x320
        down_read+0x42/0x180
        intel_irq_remapping_alloc+0xad/0x750
        mp_irqdomain_alloc+0xb8/0x2b0
        irq_domain_alloc_irqs_locked+0x12f/0x2d0
        __irq_domain_alloc_irqs+0x56/0xa0
        alloc_isa_irq_from_domain.isra.7+0xa0/0xe0
        mp_map_pin_to_irq+0x1dc/0x330
        setup_IO_APIC+0x128/0x210
        apic_intr_mode_init+0x67/0x110
        x86_late_time_init+0x24/0x40
        start_kernel+0x41e/0x7e0
        secondary_startup_64_no_verify+0xe0/0xeb

 -> #0 (&domain->mutex){+.+.}-{3:3}:
        check_prevs_add+0x160/0xef0
        __lock_acquire+0x147d/0x1950
        lock_acquire+0xd6/0x320
        __mutex_lock+0x9c/0xfc0
        __irq_domain_alloc_irqs+0x3b/0xa0
        dmar_alloc_hwirq+0x9e/0x120
        iommu_pmu_register+0x11d/0x200
        intel_iommu_init+0x5de/0x880
        pci_iommu_init+0x12/0x40
        do_one_initcall+0x65/0x350
        kernel_init_freeable+0x3ca/0x610
        kernel_init+0x1a/0x140
        ret_from_fork+0x29/0x50

 other info that might help us debug this:

 Possible unsafe locking scenario:

        CPU0                    CPU1
        ----                    ----
   lock(dmar_global_lock);
                                lock(&domain->mutex);
                                lock(dmar_global_lock);
   lock(&domain->mutex);

                *** DEADLOCK ***

Fixes: 9dbb8e3452ab ("irqdomain: Switch to per-domain locking")
Reviewed-by: Jacob Pan <jacob.jun.pan@linux.intel.com>
Tested-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
Link: https://lore.kernel.org/r/20230314051836.23817-1-baolu.lu@linux.intel.com
Link: https://lore.kernel.org/r/20230329134721.469447-2-baolu.lu@linux.intel.com
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/intel/irq_remapping.c | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/drivers/iommu/intel/irq_remapping.c b/drivers/iommu/intel/irq_remapping.c
index 6d01fa078c36..df9e261af0b5 100644
--- a/drivers/iommu/intel/irq_remapping.c
+++ b/drivers/iommu/intel/irq_remapping.c
@@ -311,14 +311,12 @@ static int set_ioapic_sid(struct irte *irte, int apic)
 	if (!irte)
 		return -1;
 
-	down_read(&dmar_global_lock);
 	for (i = 0; i < MAX_IO_APICS; i++) {
 		if (ir_ioapic[i].iommu && ir_ioapic[i].id == apic) {
 			sid = (ir_ioapic[i].bus << 8) | ir_ioapic[i].devfn;
 			break;
 		}
 	}
-	up_read(&dmar_global_lock);
 
 	if (sid == 0) {
 		pr_warn("Failed to set source-id of IOAPIC (%d)\n", apic);
@@ -338,14 +336,12 @@ static int set_hpet_sid(struct irte *irte, u8 id)
 	if (!irte)
 		return -1;
 
-	down_read(&dmar_global_lock);
 	for (i = 0; i < MAX_HPET_TBS; i++) {
 		if (ir_hpet[i].iommu && ir_hpet[i].id == id) {
 			sid = (ir_hpet[i].bus << 8) | ir_hpet[i].devfn;
 			break;
 		}
 	}
-	up_read(&dmar_global_lock);
 
 	if (sid == 0) {
 		pr_warn("Failed to set source-id of HPET block (%d)\n", id);
@@ -1339,9 +1335,7 @@ static int intel_irq_remapping_alloc(struct irq_domain *domain,
 	if (!data)
 		goto out_free_parent;
 
-	down_read(&dmar_global_lock);
 	index = alloc_irte(iommu, &data->irq_2_iommu, nr_irqs);
-	up_read(&dmar_global_lock);
 	if (index < 0) {
 		pr_warn("Failed to allocate IRTE\n");
 		kfree(data);

From bfd3c6b9fa4a1dc78139dd1621d5bea321ffa69d Mon Sep 17 00:00:00 2001
From: Lu Baolu <baolu.lu@linux.intel.com>
Date: Wed, 29 Mar 2023 21:47:20 +0800
Subject: [PATCH 087/173] iommu/vt-d: Allow zero SAGAW if second-stage not
 supported

The VT-d spec states (in section 11.4.2) that hardware implementations
reporting second-stage translation support (SSTS) field as Clear also
report the SAGAW field as 0. Fix an inappropriate check in alloc_iommu().

Fixes: 792fb43ce2c9 ("iommu/vt-d: Enable Intel IOMMU scalable mode by default")
Suggested-by: Raghunathan Srinivasan <raghunathan.srinivasan@intel.com>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Signed-off-by: Jacob Pan <jacob.jun.pan@linux.intel.com>
Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
Link: https://lore.kernel.org/r/20230318024824.124542-1-baolu.lu@linux.intel.com
Link: https://lore.kernel.org/r/20230329134721.469447-3-baolu.lu@linux.intel.com
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/intel/dmar.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c
index 6acfe879589c..23828d189c2a 100644
--- a/drivers/iommu/intel/dmar.c
+++ b/drivers/iommu/intel/dmar.c
@@ -1071,7 +1071,8 @@ static int alloc_iommu(struct dmar_drhd_unit *drhd)
 	}
 
 	err = -EINVAL;
-	if (cap_sagaw(iommu->cap) == 0) {
+	if (!cap_sagaw(iommu->cap) &&
+	    (!ecap_smts(iommu->ecap) || ecap_slts(iommu->ecap))) {
 		pr_info("%s: No supported address widths. Not attempting DMA translation.\n",
 			iommu->name);
 		drhd->ignored = 1;

From 16812c96550c30a8d5743167ef4e462d6fbe7472 Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@linux.intel.com>
Date: Wed, 29 Mar 2023 21:47:21 +0800
Subject: [PATCH 088/173] iommu/vt-d: Fix an IOMMU perfmon warning when CPU
 hotplug

A warning can be triggered when hotplug CPU 0.
$ echo 0 > /sys/devices/system/cpu/cpu0/online

 ------------[ cut here ]------------
 Voluntary context switch within RCU read-side critical section!
 WARNING: CPU: 0 PID: 19 at kernel/rcu/tree_plugin.h:318
          rcu_note_context_switch+0x4f4/0x580
 RIP: 0010:rcu_note_context_switch+0x4f4/0x580
 Call Trace:
  <TASK>
  ? perf_event_update_userpage+0x104/0x150
  __schedule+0x8d/0x960
  ? perf_event_set_state.part.82+0x11/0x50
  schedule+0x44/0xb0
  schedule_timeout+0x226/0x310
  ? __perf_event_disable+0x64/0x1a0
  ? _raw_spin_unlock+0x14/0x30
  wait_for_completion+0x94/0x130
  __wait_rcu_gp+0x108/0x130
  synchronize_rcu+0x67/0x70
  ? invoke_rcu_core+0xb0/0xb0
  ? __bpf_trace_rcu_stall_warning+0x10/0x10
  perf_pmu_migrate_context+0x121/0x370
  iommu_pmu_cpu_offline+0x6a/0xa0
  ? iommu_pmu_del+0x1e0/0x1e0
  cpuhp_invoke_callback+0x129/0x510
  cpuhp_thread_fun+0x94/0x150
  smpboot_thread_fn+0x183/0x220
  ? sort_range+0x20/0x20
  kthread+0xe6/0x110
  ? kthread_complete_and_exit+0x20/0x20
  ret_from_fork+0x1f/0x30
  </TASK>
 ---[ end trace 0000000000000000 ]---

The synchronize_rcu() will be invoked in the perf_pmu_migrate_context(),
when migrating a PMU to a new CPU. However, the current for_each_iommu()
is within RCU read-side critical section.

Two methods were considered to fix the issue.
- Use the dmar_global_lock to replace the RCU read lock when going
  through the drhd list. But it triggers a lockdep warning.
- Use the cpuhp_setup_state_multi() to set up a dedicated state for each
  IOMMU PMU. The lock can be avoided.

The latter method is implemented in this patch. Since each IOMMU PMU has
a dedicated state, add cpuhp_node and cpu in struct iommu_pmu to track
the state. The state can be dynamically allocated now. Remove the
CPUHP_AP_PERF_X86_IOMMU_PERF_ONLINE.

Fixes: 46284c6ceb5e ("iommu/vt-d: Support cpumask for IOMMU perfmon")
Reported-by: Ammy Yi <ammy.yi@intel.com>
Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Link: https://lore.kernel.org/r/20230328182028.1366416-1-kan.liang@linux.intel.com
Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
Link: https://lore.kernel.org/r/20230329134721.469447-4-baolu.lu@linux.intel.com
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/intel/iommu.h   |  2 ++
 drivers/iommu/intel/perfmon.c | 68 ++++++++++++++++++++++-------------
 include/linux/cpuhotplug.h    |  1 -
 3 files changed, 46 insertions(+), 25 deletions(-)

diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h
index d6df3b865812..694ab9b7d3e9 100644
--- a/drivers/iommu/intel/iommu.h
+++ b/drivers/iommu/intel/iommu.h
@@ -641,6 +641,8 @@ struct iommu_pmu {
 	DECLARE_BITMAP(used_mask, IOMMU_PMU_IDX_MAX);
 	struct perf_event	*event_list[IOMMU_PMU_IDX_MAX];
 	unsigned char		irq_name[16];
+	struct hlist_node	cpuhp_node;
+	int			cpu;
 };
 
 #define IOMMU_IRQ_ID_OFFSET_PRQ		(DMAR_UNITS_SUPPORTED)
diff --git a/drivers/iommu/intel/perfmon.c b/drivers/iommu/intel/perfmon.c
index e17d9743a0d8..cf43e798eca4 100644
--- a/drivers/iommu/intel/perfmon.c
+++ b/drivers/iommu/intel/perfmon.c
@@ -773,19 +773,34 @@ static void iommu_pmu_unset_interrupt(struct intel_iommu *iommu)
 	iommu->perf_irq = 0;
 }
 
-static int iommu_pmu_cpu_online(unsigned int cpu)
+static int iommu_pmu_cpu_online(unsigned int cpu, struct hlist_node *node)
 {
+	struct iommu_pmu *iommu_pmu = hlist_entry_safe(node, typeof(*iommu_pmu), cpuhp_node);
+
 	if (cpumask_empty(&iommu_pmu_cpu_mask))
 		cpumask_set_cpu(cpu, &iommu_pmu_cpu_mask);
 
+	if (cpumask_test_cpu(cpu, &iommu_pmu_cpu_mask))
+		iommu_pmu->cpu = cpu;
+
 	return 0;
 }
 
-static int iommu_pmu_cpu_offline(unsigned int cpu)
+static int iommu_pmu_cpu_offline(unsigned int cpu, struct hlist_node *node)
 {
-	struct dmar_drhd_unit *drhd;
-	struct intel_iommu *iommu;
-	int target;
+	struct iommu_pmu *iommu_pmu = hlist_entry_safe(node, typeof(*iommu_pmu), cpuhp_node);
+	int target = cpumask_first(&iommu_pmu_cpu_mask);
+
+	/*
+	 * The iommu_pmu_cpu_mask has been updated when offline the CPU
+	 * for the first iommu_pmu. Migrate the other iommu_pmu to the
+	 * new target.
+	 */
+	if (target < nr_cpu_ids && target != iommu_pmu->cpu) {
+		perf_pmu_migrate_context(&iommu_pmu->pmu, cpu, target);
+		iommu_pmu->cpu = target;
+		return 0;
+	}
 
 	if (!cpumask_test_and_clear_cpu(cpu, &iommu_pmu_cpu_mask))
 		return 0;
@@ -795,45 +810,50 @@ static int iommu_pmu_cpu_offline(unsigned int cpu)
 	if (target < nr_cpu_ids)
 		cpumask_set_cpu(target, &iommu_pmu_cpu_mask);
 	else
-		target = -1;
+		return 0;
 
-	rcu_read_lock();
-
-	for_each_iommu(iommu, drhd) {
-		if (!iommu->pmu)
-			continue;
-		perf_pmu_migrate_context(&iommu->pmu->pmu, cpu, target);
-	}
-	rcu_read_unlock();
+	perf_pmu_migrate_context(&iommu_pmu->pmu, cpu, target);
+	iommu_pmu->cpu = target;
 
 	return 0;
 }
 
 static int nr_iommu_pmu;
+static enum cpuhp_state iommu_cpuhp_slot;
 
 static int iommu_pmu_cpuhp_setup(struct iommu_pmu *iommu_pmu)
 {
 	int ret;
 
-	if (nr_iommu_pmu++)
-		return 0;
+	if (!nr_iommu_pmu) {
+		ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN,
+					      "driver/iommu/intel/perfmon:online",
+					      iommu_pmu_cpu_online,
+					      iommu_pmu_cpu_offline);
+		if (ret < 0)
+			return ret;
+		iommu_cpuhp_slot = ret;
+	}
 
-	ret = cpuhp_setup_state(CPUHP_AP_PERF_X86_IOMMU_PERF_ONLINE,
-				"driver/iommu/intel/perfmon:online",
-				iommu_pmu_cpu_online,
-				iommu_pmu_cpu_offline);
-	if (ret)
-		nr_iommu_pmu = 0;
+	ret = cpuhp_state_add_instance(iommu_cpuhp_slot, &iommu_pmu->cpuhp_node);
+	if (ret) {
+		if (!nr_iommu_pmu)
+			cpuhp_remove_multi_state(iommu_cpuhp_slot);
+		return ret;
+	}
+	nr_iommu_pmu++;
 
-	return ret;
+	return 0;
 }
 
 static void iommu_pmu_cpuhp_free(struct iommu_pmu *iommu_pmu)
 {
+	cpuhp_state_remove_instance(iommu_cpuhp_slot, &iommu_pmu->cpuhp_node);
+
 	if (--nr_iommu_pmu)
 		return;
 
-	cpuhp_remove_state(CPUHP_AP_PERF_X86_IOMMU_PERF_ONLINE);
+	cpuhp_remove_multi_state(iommu_cpuhp_slot);
 }
 
 void iommu_pmu_register(struct intel_iommu *iommu)
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index c6fab004104a..5b2f8147d1ae 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -218,7 +218,6 @@ enum cpuhp_state {
 	CPUHP_AP_PERF_X86_CQM_ONLINE,
 	CPUHP_AP_PERF_X86_CSTATE_ONLINE,
 	CPUHP_AP_PERF_X86_IDXD_ONLINE,
-	CPUHP_AP_PERF_X86_IOMMU_PERF_ONLINE,
 	CPUHP_AP_PERF_S390_CF_ONLINE,
 	CPUHP_AP_PERF_S390_SF_ONLINE,
 	CPUHP_AP_PERF_ARM_CCI_ONLINE,

From 44d807320000db0d0013372ad39b53e12d52f758 Mon Sep 17 00:00:00 2001
From: Ziyang Xuan <william.xuanziyang@huawei.com>
Date: Thu, 30 Mar 2023 09:25:32 +0800
Subject: [PATCH 089/173] net: qrtr: Fix a refcount bug in qrtr_recvmsg()

Syzbot reported a bug as following:

refcount_t: addition on 0; use-after-free.
...
RIP: 0010:refcount_warn_saturate+0x17c/0x1f0 lib/refcount.c:25
...
Call Trace:
 <TASK>
 __refcount_add include/linux/refcount.h:199 [inline]
 __refcount_inc include/linux/refcount.h:250 [inline]
 refcount_inc include/linux/refcount.h:267 [inline]
 kref_get include/linux/kref.h:45 [inline]
 qrtr_node_acquire net/qrtr/af_qrtr.c:202 [inline]
 qrtr_node_lookup net/qrtr/af_qrtr.c:398 [inline]
 qrtr_send_resume_tx net/qrtr/af_qrtr.c:1003 [inline]
 qrtr_recvmsg+0x85f/0x990 net/qrtr/af_qrtr.c:1070
 sock_recvmsg_nosec net/socket.c:1017 [inline]
 sock_recvmsg+0xe2/0x160 net/socket.c:1038
 qrtr_ns_worker+0x170/0x1700 net/qrtr/ns.c:688
 process_one_work+0x991/0x15c0 kernel/workqueue.c:2390
 worker_thread+0x669/0x1090 kernel/workqueue.c:2537

It occurs in the concurrent scenario of qrtr_recvmsg() and
qrtr_endpoint_unregister() as following:

	cpu0					cpu1
qrtr_recvmsg				qrtr_endpoint_unregister
qrtr_send_resume_tx			qrtr_node_release
qrtr_node_lookup			mutex_lock(&qrtr_node_lock)
spin_lock_irqsave(&qrtr_nodes_lock, )	refcount_dec_and_test(&node->ref) [node->ref == 0]
radix_tree_lookup [node != NULL]	__qrtr_node_release
qrtr_node_acquire			spin_lock_irqsave(&qrtr_nodes_lock, )
kref_get(&node->ref) [WARNING]		...
					mutex_unlock(&qrtr_node_lock)

Use qrtr_node_lock to protect qrtr_node_lookup() implementation, this
is actually improving the protection of node reference.

Fixes: 0a7e0d0ef054 ("net: qrtr: Migrate node lookup tree to spinlock")
Reported-by: syzbot+a7492efaa5d61b51db23@syzkaller.appspotmail.com
Link: https://syzkaller.appspot.com/bug?extid=a7492efaa5d61b51db23
Signed-off-by: Ziyang Xuan <william.xuanziyang@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/qrtr/af_qrtr.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/qrtr/af_qrtr.c b/net/qrtr/af_qrtr.c
index 5c2fb992803b..3a70255c8d02 100644
--- a/net/qrtr/af_qrtr.c
+++ b/net/qrtr/af_qrtr.c
@@ -393,10 +393,12 @@ static struct qrtr_node *qrtr_node_lookup(unsigned int nid)
 	struct qrtr_node *node;
 	unsigned long flags;
 
+	mutex_lock(&qrtr_node_lock);
 	spin_lock_irqsave(&qrtr_nodes_lock, flags);
 	node = radix_tree_lookup(&qrtr_nodes, nid);
 	node = qrtr_node_acquire(node);
 	spin_unlock_irqrestore(&qrtr_nodes_lock, flags);
+	mutex_unlock(&qrtr_node_lock);
 
 	return node;
 }

From 653a180957a85c3fc30320cc7e84f5dc913a64f8 Mon Sep 17 00:00:00 2001
From: Michael Sit Wei Hong <michael.wei.hong.sit@intel.com>
Date: Thu, 30 Mar 2023 17:14:02 +0800
Subject: [PATCH 090/173] net: phylink: add phylink_expects_phy() method

Provide phylink_expects_phy() to allow MAC drivers to check if it
is expecting a PHY to attach to. Since fixed-linked setups do not
need to attach to a PHY.

Provides a boolean value as to if the MAC should expect a PHY.
Returns true if a PHY is expected.

Reviewed-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Signed-off-by: Michael Sit Wei Hong <michael.wei.hong.sit@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/phylink.c | 19 +++++++++++++++++++
 include/linux/phylink.h   |  1 +
 2 files changed, 20 insertions(+)

diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c
index 1a2f074685fa..30c166b33468 100644
--- a/drivers/net/phy/phylink.c
+++ b/drivers/net/phy/phylink.c
@@ -1586,6 +1586,25 @@ void phylink_destroy(struct phylink *pl)
 }
 EXPORT_SYMBOL_GPL(phylink_destroy);
 
+/**
+ * phylink_expects_phy() - Determine if phylink expects a phy to be attached
+ * @pl: a pointer to a &struct phylink returned from phylink_create()
+ *
+ * When using fixed-link mode, or in-band mode with 1000base-X or 2500base-X,
+ * no PHY is needed.
+ *
+ * Returns true if phylink will be expecting a PHY.
+ */
+bool phylink_expects_phy(struct phylink *pl)
+{
+	if (pl->cfg_link_an_mode == MLO_AN_FIXED ||
+	    (pl->cfg_link_an_mode == MLO_AN_INBAND &&
+	     phy_interface_mode_is_8023z(pl->link_config.interface)))
+		return false;
+	return true;
+}
+EXPORT_SYMBOL_GPL(phylink_expects_phy);
+
 static void phylink_phy_change(struct phy_device *phydev, bool up)
 {
 	struct phylink *pl = phydev->phylink;
diff --git a/include/linux/phylink.h b/include/linux/phylink.h
index c492c26202b5..637698ed5cb6 100644
--- a/include/linux/phylink.h
+++ b/include/linux/phylink.h
@@ -574,6 +574,7 @@ struct phylink *phylink_create(struct phylink_config *, struct fwnode_handle *,
 			       phy_interface_t iface,
 			       const struct phylink_mac_ops *mac_ops);
 void phylink_destroy(struct phylink *);
+bool phylink_expects_phy(struct phylink *pl);
 
 int phylink_connect_phy(struct phylink *, struct phy_device *);
 int phylink_of_phy_connect(struct phylink *, struct device_node *, u32 flags);

From fe2cfbc9680356a3d9f8adde8a38e715831e32f5 Mon Sep 17 00:00:00 2001
From: Michael Sit Wei Hong <michael.wei.hong.sit@intel.com>
Date: Thu, 30 Mar 2023 17:14:03 +0800
Subject: [PATCH 091/173] net: stmmac: check if MAC needs to attach to a PHY

After the introduction of the fixed-link support, the MAC driver
no longer attempt to scan for a PHY to attach to. This causes the
non fixed-link setups to stop working.

Using the phylink_expects_phy() to check and determine if the MAC
should expect and attach a PHY.

Fixes: ab21cf920928 ("net: stmmac: make mdio register skips PHY scanning for fixed-link")
Signed-off-by: Michael Sit Wei Hong <michael.wei.hong.sit@intel.com>
Signed-off-by: Lai Peter Jun Ann <peter.jun.ann.lai@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 17310ade88dd..d41a5f92aee7 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -1135,6 +1135,7 @@ static int stmmac_init_phy(struct net_device *dev)
 {
 	struct stmmac_priv *priv = netdev_priv(dev);
 	struct fwnode_handle *fwnode;
+	bool phy_needed;
 	int ret;
 
 	fwnode = of_fwnode_handle(priv->plat->phylink_node);
@@ -1144,10 +1145,11 @@ static int stmmac_init_phy(struct net_device *dev)
 	if (fwnode)
 		ret = phylink_fwnode_phy_connect(priv->phylink, fwnode, 0);
 
+	phy_needed = phylink_expects_phy(priv->phylink);
 	/* Some DT bindings do not set-up the PHY handle. Let's try to
 	 * manually parse it
 	 */
-	if (!fwnode || ret) {
+	if (!fwnode || phy_needed || ret) {
 		int addr = priv->plat->phy_addr;
 		struct phy_device *phydev;
 

From 6fc21a6ed5953b1dd3a41ce7be1ea57f5ef8c081 Mon Sep 17 00:00:00 2001
From: Michael Sit Wei Hong <michael.wei.hong.sit@intel.com>
Date: Thu, 30 Mar 2023 17:14:04 +0800
Subject: [PATCH 092/173] net: stmmac: remove redundant fixup to support
 fixed-link mode

Currently, intel_speed_mode_2500() will fix-up xpcs_an_inband
to 1 if the underlying controller has a max speed of 1000Mbps.
The value has been initialized and modified if it is
a fixed-linked setup earlier.

This patch removes the fix-up to allow for fixed-linked setup
support. In stmmac_phy_setup(), ovr_an_inband is set based on
the value of xpcs_an_inband. Which in turn will return an
error in phylink_parse_mode() where MLO_AN_FIXED and
ovr_an_inband are both set.

Fixes: c82386310d95 ("stmmac: intel: prepare to support 1000BASE-X phy interface setting")
Signed-off-by: Michael Sit Wei Hong <michael.wei.hong.sit@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
index 13aa919633b4..ab9f876b6df7 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
@@ -251,7 +251,6 @@ static void intel_speed_mode_2500(struct net_device *ndev, void *intel_data)
 		priv->plat->mdio_bus_data->xpcs_an_inband = false;
 	} else {
 		priv->plat->max_speed = 1000;
-		priv->plat->mdio_bus_data->xpcs_an_inband = true;
 	}
 }
 

From 154e07c164859fc90bf4e8143f2f6c1af9f3a35e Mon Sep 17 00:00:00 2001
From: Andrea Righi <andrea.righi@canonical.com>
Date: Thu, 30 Mar 2023 11:54:42 +0200
Subject: [PATCH 093/173] l2tp: generate correct module alias strings

Commit 65b32f801bfb ("uapi: move IPPROTO_L2TP to in.h") moved the
definition of IPPROTO_L2TP from a define to an enum, but since
__stringify doesn't work properly with enums, we ended up breaking the
modalias strings for the l2tp modules:

 $ modinfo l2tp_ip l2tp_ip6 | grep alias
 alias:          net-pf-2-proto-IPPROTO_L2TP
 alias:          net-pf-2-proto-2-type-IPPROTO_L2TP
 alias:          net-pf-10-proto-IPPROTO_L2TP
 alias:          net-pf-10-proto-2-type-IPPROTO_L2TP

Use the resolved number directly in MODULE_ALIAS_*() macros (as we
already do with SOCK_DGRAM) to fix the alias strings:

$ modinfo l2tp_ip l2tp_ip6 | grep alias
alias:          net-pf-2-proto-115
alias:          net-pf-2-proto-115-type-2
alias:          net-pf-10-proto-115
alias:          net-pf-10-proto-115-type-2

Moreover, fix the ordering of the parameters passed to
MODULE_ALIAS_NET_PF_PROTO_TYPE() by switching proto and type.

Fixes: 65b32f801bfb ("uapi: move IPPROTO_L2TP to in.h")
Link: https://lore.kernel.org/lkml/ZCQt7hmodtUaBlCP@righiandr-XPS-13-7390
Signed-off-by: Guillaume Nault <gnault@redhat.com>
Signed-off-by: Andrea Righi <andrea.righi@canonical.com>
Reviewed-by: Wojciech Drewek <wojciech.drewek@intel.com>
Tested-by:  Wojciech Drewek <wojciech.drewek@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/l2tp/l2tp_ip.c  | 8 ++++----
 net/l2tp/l2tp_ip6.c | 8 ++++----
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c
index 4db5a554bdbd..41a74fc84ca1 100644
--- a/net/l2tp/l2tp_ip.c
+++ b/net/l2tp/l2tp_ip.c
@@ -677,8 +677,8 @@ MODULE_AUTHOR("James Chapman <jchapman@katalix.com>");
 MODULE_DESCRIPTION("L2TP over IP");
 MODULE_VERSION("1.0");
 
-/* Use the value of SOCK_DGRAM (2) directory, because __stringify doesn't like
- * enums
+/* Use the values of SOCK_DGRAM (2) as type and IPPROTO_L2TP (115) as protocol,
+ * because __stringify doesn't like enums
  */
-MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET, 2, IPPROTO_L2TP);
-MODULE_ALIAS_NET_PF_PROTO(PF_INET, IPPROTO_L2TP);
+MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET, 115, 2);
+MODULE_ALIAS_NET_PF_PROTO(PF_INET, 115);
diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
index 2478aa60145f..5137ea1861ce 100644
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c
@@ -806,8 +806,8 @@ MODULE_AUTHOR("Chris Elston <celston@katalix.com>");
 MODULE_DESCRIPTION("L2TP IP encapsulation for IPv6");
 MODULE_VERSION("1.0");
 
-/* Use the value of SOCK_DGRAM (2) directory, because __stringify doesn't like
- * enums
+/* Use the values of SOCK_DGRAM (2) as type and IPPROTO_L2TP (115) as protocol,
+ * because __stringify doesn't like enums
  */
-MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET6, 2, IPPROTO_L2TP);
-MODULE_ALIAS_NET_PF_PROTO(PF_INET6, IPPROTO_L2TP);
+MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET6, 115, 2);
+MODULE_ALIAS_NET_PF_PROTO(PF_INET6, 115);

From c5b959eeb7f9e40673b97c08c71cbfff5f5923f2 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 31 Mar 2023 09:48:56 +0200
Subject: [PATCH 094/173] net: netcp: MAX_SKB_FRAGS is now 'int'

The type of MAX_SKB_FRAGS has changed recently, so the debug printk
needs to be updated:

drivers/net/ethernet/ti/netcp_core.c: In function 'netcp_create_interface':
drivers/net/ethernet/ti/netcp_core.c:2084:30: error: format '%ld' expects argument of type 'long int', but argument 3 has type 'int' [-Werror=format=]
 2084 |                 dev_err(dev, "tx-pool size too small, must be at least %ld\n",
      |                              ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

Fixes: 3948b05950fd ("net: introduce a config option to tweak MAX_SKB_FRAGS")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ti/netcp_core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/ti/netcp_core.c b/drivers/net/ethernet/ti/netcp_core.c
index 1bb596a9d8a2..dfdbcdeb991f 100644
--- a/drivers/net/ethernet/ti/netcp_core.c
+++ b/drivers/net/ethernet/ti/netcp_core.c
@@ -2081,7 +2081,7 @@ static int netcp_create_interface(struct netcp_device *netcp_device,
 	netcp->tx_pool_region_id = temp[1];
 
 	if (netcp->tx_pool_size < MAX_SKB_FRAGS) {
-		dev_err(dev, "tx-pool size too small, must be at least %ld\n",
+		dev_err(dev, "tx-pool size too small, must be at least %d\n",
 			MAX_SKB_FRAGS);
 		ret = -ENODEV;
 		goto quit;

From 362f0b6678ad1377c322a7dd237ea6785efc7342 Mon Sep 17 00:00:00 2001
From: "Jiri Slaby (SUSE)" <jirislaby@kernel.org>
Date: Fri, 31 Mar 2023 08:35:15 +0200
Subject: [PATCH 095/173] net: wwan: t7xx: do not compile with -Werror

When playing with various compilers or their versions, some choke on
the t7xx code. For example (with gcc 13):
 In file included from ./arch/s390/include/generated/asm/rwonce.h:1,
                  from ../include/linux/compiler.h:247,
                  from ../include/linux/build_bug.h:5,
                  from ../include/linux/bits.h:22,
                  from ../drivers/net/wwan/t7xx/t7xx_state_monitor.c:17:
 In function 'preempt_count',
     inlined from 't7xx_fsm_append_event' at ../drivers/net/wwan/t7xx/t7xx_state_monitor.c:439:43:
 ../include/asm-generic/rwonce.h:44:26: error: array subscript 0 is outside array bounds of 'const volatile int[0]' [-Werror=array-bounds=]

There is no reason for any code in the kernel to be built with -Werror
by default. Note that we have generic CONFIG_WERROR. So if anyone wants
-Werror, they can enable that.

Signed-off-by: Jiri Slaby (SUSE) <jirislaby@kernel.org>
Link: https://lore.kernel.org/all/20230330232717.1f8bf5ea@kernel.org/
Cc: Chandrashekar Devegowda <chandrashekar.devegowda@intel.com>
Cc: Intel Corporation <linuxwwan@intel.com>
Cc: Chiranjeevi Rapolu <chiranjeevi.rapolu@linux.intel.com>
Cc: Liu Haijun <haijun.liu@mediatek.com>
Cc: M Chetan Kumar <m.chetan.kumar@linux.intel.com>
Cc: Ricardo Martinez <ricardo.martinez@linux.intel.com>
Cc: Loic Poulain <loic.poulain@linaro.org>
Cc: Sergey Ryazanov <ryazanov.s.a@gmail.com>
Cc: Johannes Berg <johannes@sipsolutions.net>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Eric Dumazet <edumazet@google.com>
Cc: Jakub Kicinski <kuba@kernel.org>
Cc: Paolo Abeni <pabeni@redhat.com>
Cc: netdev@vger.kernel.org
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wwan/t7xx/Makefile | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/net/wwan/t7xx/Makefile b/drivers/net/wwan/t7xx/Makefile
index 268ff9e87e5b..2652cd00504e 100644
--- a/drivers/net/wwan/t7xx/Makefile
+++ b/drivers/net/wwan/t7xx/Makefile
@@ -1,7 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0-only
 
-ccflags-y += -Werror
-
 obj-${CONFIG_MTK_T7XX} := mtk_t7xx.o
 mtk_t7xx-y:=	t7xx_pci.o \
 		t7xx_pcie_mac.o \

From ffa5395a7901e83a68d88207c4592962906641bd Mon Sep 17 00:00:00 2001
From: Arseniy Krasnov <avkrasnov@sberdevices.ru>
Date: Fri, 31 Mar 2023 10:56:41 +0300
Subject: [PATCH 096/173] vsock/vmci: convert VMCI error code to -ENOMEM on
 send

This adds conversion of VMCI specific error code to general -ENOMEM. It
is needed, because af_vsock.c passes error value returned from transport
to the user, which does not expect to get VMCI_ERROR_* values.

Fixes: c43170b7e157 ("vsock: return errors other than -ENOMEM to socket")
Signed-off-by: Arseniy Krasnov <AVKrasnov@sberdevices.ru>
Reviewed-by: Vishnu Dasa <vdasa@vmware.com>
Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/vmw_vsock/vmci_transport.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c
index 36eb16a40745..95cc4d79ba29 100644
--- a/net/vmw_vsock/vmci_transport.c
+++ b/net/vmw_vsock/vmci_transport.c
@@ -1842,7 +1842,13 @@ static ssize_t vmci_transport_stream_enqueue(
 	struct msghdr *msg,
 	size_t len)
 {
-	return vmci_qpair_enquev(vmci_trans(vsk)->qpair, msg, len, 0);
+	ssize_t err;
+
+	err = vmci_qpair_enquev(vmci_trans(vsk)->qpair, msg, len, 0);
+	if (err < 0)
+		err = -ENOMEM;
+
+	return err;
 }
 
 static s64 vmci_transport_stream_has_data(struct vsock_sock *vsk)

From cb2239c198ad9fbd5aced22cf93e45562da781eb Mon Sep 17 00:00:00 2001
From: Christian Brauner <brauner@kernel.org>
Date: Thu, 30 Mar 2023 09:13:16 +0200
Subject: [PATCH 097/173] fs: drop peer group ids under namespace lock

When cleaning up peer group ids in the failure path we need to make sure
to hold on to the namespace lock. Otherwise another thread might just
turn the mount from a shared into a non-shared mount concurrently.

Link: https://lore.kernel.org/lkml/00000000000088694505f8132d77@google.com
Fixes: 2a1867219c7b ("fs: add mount_setattr()")
Reported-by: syzbot+8ac3859139c685c4f597@syzkaller.appspotmail.com
Cc: stable@vger.kernel.org # 5.12+
Message-Id: <20230330-vfs-mount_setattr-propagation-fix-v1-1-37548d91533b@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/namespace.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/namespace.c b/fs/namespace.c
index bc0f15257b49..6836e937ee61 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -4183,9 +4183,9 @@ out:
 	unlock_mount_hash();
 
 	if (kattr->propagation) {
-		namespace_unlock();
 		if (err)
 			cleanup_group_ids(mnt, NULL);
+		namespace_unlock();
 	}
 
 	return err;

From 9fdc1605c504204e0fdec7892b29c916579e06f3 Mon Sep 17 00:00:00 2001
From: Andy Chi <andy.chi@canonical.com>
Date: Fri, 31 Mar 2023 16:32:41 +0800
Subject: [PATCH 098/173] ALSA: hda/realtek: fix mute/micmute LEDs for a HP
 ProBook

There is a HP ProBook which using ALC236 codec and need the
ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF quirk to make mute LED and
micmute LED work.

Signed-off-by: Andy Chi <andy.chi@canonical.com>
Cc: <stable@vger.kernel.org>
Link: https://lore.kernel.org/r/20230331083242.58416-1-andy.chi@canonical.com
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/hda/patch_realtek.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index a2706fd87b14..bd4e1a3a55b4 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -9443,6 +9443,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
 	SND_PCI_QUIRK(0x103c, 0x8b47, "HP", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
 	SND_PCI_QUIRK(0x103c, 0x8b5d, "HP", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF),
 	SND_PCI_QUIRK(0x103c, 0x8b5e, "HP", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF),
+	SND_PCI_QUIRK(0x103c, 0x8b66, "HP", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF),
 	SND_PCI_QUIRK(0x103c, 0x8b7a, "HP", ALC236_FIXUP_HP_GPIO_LED),
 	SND_PCI_QUIRK(0x103c, 0x8b7d, "HP", ALC236_FIXUP_HP_GPIO_LED),
 	SND_PCI_QUIRK(0x103c, 0x8b87, "HP", ALC236_FIXUP_HP_GPIO_LED),

From e4efa515d58f1363d8a27e548f9c5769d3121e03 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Mon, 20 Mar 2023 13:22:52 +0100
Subject: [PATCH 099/173] wifi: brcmfmac: Fix SDIO suspend/resume regression

After commit 92cadedd9d5f ("brcmfmac: Avoid keeping power to SDIO card
unless WOWL is used"), the wifi adapter by default is turned off on suspend
and then re-probed on resume.

In at least 2 model x86/acpi tablets with brcmfmac43430a1 wifi adapters,
the newly added re-probe on resume fails like this:

 brcmfmac: brcmf_sdio_bus_rxctl: resumed on timeout
 ieee80211 phy1: brcmf_bus_started: failed: -110
 ieee80211 phy1: brcmf_attach: dongle is not responding: err=-110
 brcmfmac: brcmf_sdio_firmware_callback: brcmf_attach failed

It seems this specific brcmfmac model does not like being reprobed without
it actually being turned off first.

And the adapter is not being turned off during suspend because of
commit f0992ace680c ("brcmfmac: prohibit ACPI power management for brcmfmac
driver").

Now that the driver is being reprobed on resume, the disabling of ACPI
pm is no longer necessary, except when WOWL is used (in which case there
is no-reprobe).

Move the dis-/en-abling of ACPI pm to brcmf_sdio_wowl_config(), this fixes
the brcmfmac43430a1 suspend/resume regression and should help save some
power when suspended.

This change means that the code now also may re-enable ACPI pm when WOWL
gets disabled. ACPI pm should only be re-enabled if it was enabled by
the ACPI core originally. Add a brcmf_sdiod_acpi_save_power_manageable()
to save the original state for this.

This has been tested on the following devices:

Asus T100TA                brcmfmac43241b4-sdio
Acer Iconia One 7 B1-750   brcmfmac43340-sdio
Chuwi Hi8                  brcmfmac43430a0-sdio
Chuwi Hi8                  brcmfmac43430a1-sdio

(the Asus T100TA is the device for which the prohibiting of ACPI pm
 was originally added)

Fixes: 92cadedd9d5f ("brcmfmac: Avoid keeping power to SDIO card unless WOWL is used")
Cc: Ulf Hansson <ulf.hansson@linaro.org>
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Reviewed-by: Ulf Hansson <ulf.hansson@linaro.org>
Signed-off-by: Kalle Valo <kvalo@kernel.org>
Link: https://lore.kernel.org/r/20230320122252.240070-1-hdegoede@redhat.com
---
 .../broadcom/brcm80211/brcmfmac/bcmsdh.c      | 36 +++++++++++++------
 .../broadcom/brcm80211/brcmfmac/sdio.h        |  2 ++
 2 files changed, 28 insertions(+), 10 deletions(-)

diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c
index b7c918f241c9..65d4799a5658 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c
@@ -994,15 +994,34 @@ static const struct sdio_device_id brcmf_sdmmc_ids[] = {
 MODULE_DEVICE_TABLE(sdio, brcmf_sdmmc_ids);
 
 
-static void brcmf_sdiod_acpi_set_power_manageable(struct device *dev,
-						  int val)
+static void brcmf_sdiod_acpi_save_power_manageable(struct brcmf_sdio_dev *sdiodev)
 {
 #if IS_ENABLED(CONFIG_ACPI)
 	struct acpi_device *adev;
 
-	adev = ACPI_COMPANION(dev);
+	adev = ACPI_COMPANION(&sdiodev->func1->dev);
 	if (adev)
-		adev->flags.power_manageable = 0;
+		sdiodev->func1_power_manageable = adev->flags.power_manageable;
+
+	adev = ACPI_COMPANION(&sdiodev->func2->dev);
+	if (adev)
+		sdiodev->func2_power_manageable = adev->flags.power_manageable;
+#endif
+}
+
+static void brcmf_sdiod_acpi_set_power_manageable(struct brcmf_sdio_dev *sdiodev,
+						  int enable)
+{
+#if IS_ENABLED(CONFIG_ACPI)
+	struct acpi_device *adev;
+
+	adev = ACPI_COMPANION(&sdiodev->func1->dev);
+	if (adev)
+		adev->flags.power_manageable = enable ? sdiodev->func1_power_manageable : 0;
+
+	adev = ACPI_COMPANION(&sdiodev->func2->dev);
+	if (adev)
+		adev->flags.power_manageable = enable ? sdiodev->func2_power_manageable : 0;
 #endif
 }
 
@@ -1012,7 +1031,6 @@ static int brcmf_ops_sdio_probe(struct sdio_func *func,
 	int err;
 	struct brcmf_sdio_dev *sdiodev;
 	struct brcmf_bus *bus_if;
-	struct device *dev;
 
 	brcmf_dbg(SDIO, "Enter\n");
 	brcmf_dbg(SDIO, "Class=%x\n", func->class);
@@ -1020,14 +1038,9 @@ static int brcmf_ops_sdio_probe(struct sdio_func *func,
 	brcmf_dbg(SDIO, "sdio device ID: 0x%04x\n", func->device);
 	brcmf_dbg(SDIO, "Function#: %d\n", func->num);
 
-	dev = &func->dev;
-
 	/* Set MMC_QUIRK_LENIENT_FN0 for this card */
 	func->card->quirks |= MMC_QUIRK_LENIENT_FN0;
 
-	/* prohibit ACPI power management for this device */
-	brcmf_sdiod_acpi_set_power_manageable(dev, 0);
-
 	/* Consume func num 1 but dont do anything with it. */
 	if (func->num == 1)
 		return 0;
@@ -1059,6 +1072,7 @@ static int brcmf_ops_sdio_probe(struct sdio_func *func,
 	dev_set_drvdata(&sdiodev->func1->dev, bus_if);
 	sdiodev->dev = &sdiodev->func1->dev;
 
+	brcmf_sdiod_acpi_save_power_manageable(sdiodev);
 	brcmf_sdiod_change_state(sdiodev, BRCMF_SDIOD_DOWN);
 
 	brcmf_dbg(SDIO, "F2 found, calling brcmf_sdiod_probe...\n");
@@ -1124,6 +1138,8 @@ void brcmf_sdio_wowl_config(struct device *dev, bool enabled)
 
 	if (sdiodev->settings->bus.sdio.oob_irq_supported ||
 	    pm_caps & MMC_PM_WAKE_SDIO_IRQ) {
+		/* Stop ACPI from turning off the device when wowl is enabled */
+		brcmf_sdiod_acpi_set_power_manageable(sdiodev, !enabled);
 		sdiodev->wowl_enabled = enabled;
 		brcmf_dbg(SDIO, "Configuring WOWL, enabled=%d\n", enabled);
 		return;
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.h b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.h
index b76d34d36bde..0d18ed15b403 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.h
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.h
@@ -188,6 +188,8 @@ struct brcmf_sdio_dev {
 	char nvram_name[BRCMF_FW_NAME_LEN];
 	char clm_name[BRCMF_FW_NAME_LEN];
 	bool wowl_enabled;
+	bool func1_power_manageable;
+	bool func2_power_manageable;
 	enum brcmf_sdiod_state state;
 	struct brcmf_sdiod_freezer *freezer;
 	const struct firmware *clm_fw;

From 2ceb76f734e37833824b7fab6af17c999eb48d2b Mon Sep 17 00:00:00 2001
From: Ben Greear <greearb@candelatech.com>
Date: Wed, 22 Mar 2023 17:37:17 +0100
Subject: [PATCH 100/173] wifi: mt76: mt7921: Fix use-after-free in fw features
 query.

Stop referencing 'features' memory after release_firmware is called.

Fixes this crash:

RIP: 0010:mt7921_check_offload_capability+0x17d
mt7921_pci_probe+0xca/0x4b0
...

Signed-off-by: Ben Greear <greearb@candelatech.com>
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Acked-by: Felix Fietkau <nbd@nbd.name>
Signed-off-by: Kalle Valo <kvalo@kernel.org>
Link: https://lore.kernel.org/r/51fd8f76494348aa9ecbf0abc471ebe47a983dfd.1679502607.git.lorenzo@kernel.org
---
 drivers/net/wireless/mediatek/mt76/mt7921/init.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/init.c b/drivers/net/wireless/mediatek/mt76/mt7921/init.c
index 80c71acfe159..cc94531185da 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/init.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/init.c
@@ -171,12 +171,12 @@ mt7921_mac_init_band(struct mt7921_dev *dev, u8 band)
 
 u8 mt7921_check_offload_capability(struct device *dev, const char *fw_wm)
 {
-	struct mt7921_fw_features *features = NULL;
 	const struct mt76_connac2_fw_trailer *hdr;
 	struct mt7921_realease_info *rel_info;
 	const struct firmware *fw;
 	int ret, i, offset = 0;
 	const u8 *data, *end;
+	u8 offload_caps = 0;
 
 	ret = request_firmware(&fw, fw_wm, dev);
 	if (ret)
@@ -208,7 +208,10 @@ u8 mt7921_check_offload_capability(struct device *dev, const char *fw_wm)
 		data += sizeof(*rel_info);
 
 		if (rel_info->tag == MT7921_FW_TAG_FEATURE) {
+			struct mt7921_fw_features *features;
+
 			features = (struct mt7921_fw_features *)data;
+			offload_caps = features->data;
 			break;
 		}
 
@@ -218,7 +221,7 @@ u8 mt7921_check_offload_capability(struct device *dev, const char *fw_wm)
 out:
 	release_firmware(fw);
 
-	return features ? features->data : 0;
+	return offload_caps;
 }
 EXPORT_SYMBOL_GPL(mt7921_check_offload_capability);
 

From eb85df0a5643612285f61f38122564498d0c49f7 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Tue, 28 Mar 2023 12:01:17 +0200
Subject: [PATCH 101/173] wifi: mt76: mt7921: fix fw used for offload check for
 mt7922

Fix the firmware version used for offload capability check used by 0x0616
devices. This path enables offload capabilities for 0x0616 devices.

Link: https://bugzilla.kernel.org/show_bug.cgi?id=217245
Fixes: 034ae28b56f1 ("wifi: mt76: mt7921: introduce remain_on_channel support")
Cc: stable@vger.kernel.org
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Kalle Valo <kvalo@kernel.org>
Link: https://lore.kernel.org/r/632d8f0c9781c9902d7160e2c080aa7e9232d50d.1679997487.git.lorenzo@kernel.org
---
 drivers/net/wireless/mediatek/mt76/mt7921/pci.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/pci.c b/drivers/net/wireless/mediatek/mt76/mt7921/pci.c
index cb72ded37256..5c23c827abe4 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/pci.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/pci.c
@@ -20,7 +20,7 @@ static const struct pci_device_id mt7921_pci_device_table[] = {
 	{ PCI_DEVICE(PCI_VENDOR_ID_MEDIATEK, 0x0608),
 		.driver_data = (kernel_ulong_t)MT7921_FIRMWARE_WM },
 	{ PCI_DEVICE(PCI_VENDOR_ID_MEDIATEK, 0x0616),
-		.driver_data = (kernel_ulong_t)MT7921_FIRMWARE_WM },
+		.driver_data = (kernel_ulong_t)MT7922_FIRMWARE_WM },
 	{ },
 };
 

From f1594bc676579133a3cd906d7d27733289edfb86 Mon Sep 17 00:00:00 2001
From: Anh Tuan Phan <tuananhlfc@gmail.com>
Date: Fri, 24 Mar 2023 09:14:15 +0700
Subject: [PATCH 102/173] selftests mount: Fix mount_setattr_test builds failed
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When compiling selftests with target mount_setattr I encountered some errors with the below messages:
mount_setattr_test.c: In function ‘mount_setattr_thread’:
mount_setattr_test.c:343:16: error: variable ‘attr’ has initializer but incomplete type
  343 |         struct mount_attr attr = {
      |                ^~~~~~~~~~

These errors might be because of linux/mount.h is not included. This patch resolves that issue.

Signed-off-by: Anh Tuan Phan <tuananhlfc@gmail.com>
Acked-by: Christian Brauner <brauner@kernel.org>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/selftests/mount_setattr/mount_setattr_test.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/testing/selftests/mount_setattr/mount_setattr_test.c b/tools/testing/selftests/mount_setattr/mount_setattr_test.c
index 582669ca38e9..c6a8c732b802 100644
--- a/tools/testing/selftests/mount_setattr/mount_setattr_test.c
+++ b/tools/testing/selftests/mount_setattr/mount_setattr_test.c
@@ -18,6 +18,7 @@
 #include <grp.h>
 #include <stdbool.h>
 #include <stdarg.h>
+#include <linux/mount.h>
 
 #include "../kselftest_harness.h"
 

From 52882b9c7a761b2b4e44717d6fbd1ed94c601b7f Mon Sep 17 00:00:00 2001
From: Alexey Kardashevskiy <aik@ozlabs.ru>
Date: Wed, 4 May 2022 17:48:07 +1000
Subject: [PATCH 103/173] KVM: PPC: Make KVM_CAP_IRQFD_RESAMPLE platform
 dependent

When introduced, IRQFD resampling worked on POWER8 with XICS. However
KVM on POWER9 has never implemented it - the compatibility mode code
("XICS-on-XIVE") misses the kvm_notify_acked_irq() call and the native
XIVE mode does not handle INTx in KVM at all.

This moved the capability support advertising to platforms and stops
advertising it on XIVE, i.e. POWER9 and later.

Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Acked-by: Anup Patel <anup@brainfault.org>
Acked-by: Nicholas Piggin <npiggin@gmail.com>
Message-Id: <20220504074807.3616813-1-aik@ozlabs.ru>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/arm64/kvm/arm.c       | 1 +
 arch/powerpc/kvm/powerpc.c | 6 ++++++
 arch/s390/kvm/kvm-s390.c   | 1 +
 arch/x86/kvm/x86.c         | 1 +
 virt/kvm/kvm_main.c        | 1 -
 5 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index 3bd732eaf087..3f6a5efdbcf0 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -220,6 +220,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 	case KVM_CAP_VCPU_ATTRIBUTES:
 	case KVM_CAP_PTP_KVM:
 	case KVM_CAP_ARM_SYSTEM_SUSPEND:
+	case KVM_CAP_IRQFD_RESAMPLE:
 		r = 1;
 		break;
 	case KVM_CAP_SET_GUEST_DEBUG2:
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 4c5405fc5538..d23e25e8432d 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -576,6 +576,12 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 		break;
 #endif
 
+#ifdef CONFIG_HAVE_KVM_IRQFD
+	case KVM_CAP_IRQFD_RESAMPLE:
+		r = !xive_enabled();
+		break;
+#endif
+
 	case KVM_CAP_PPC_ALLOC_HTAB:
 		r = hv_enabled;
 		break;
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 39b36562c043..1eeb9ae57879 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -573,6 +573,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 	case KVM_CAP_S390_VCPU_RESETS:
 	case KVM_CAP_SET_GUEST_DEBUG:
 	case KVM_CAP_S390_DIAG318:
+	case KVM_CAP_IRQFD_RESAMPLE:
 		r = 1;
 		break;
 	case KVM_CAP_SET_GUEST_DEBUG2:
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 7d6f98b7635f..3d852ce84920 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4432,6 +4432,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 	case KVM_CAP_VAPIC:
 	case KVM_CAP_ENABLE_CAP:
 	case KVM_CAP_VM_DISABLE_NX_HUGE_PAGES:
+	case KVM_CAP_IRQFD_RESAMPLE:
 		r = 1;
 		break;
 	case KVM_CAP_EXIT_HYPERCALL:
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index d255964ec331..b1679d08a216 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -4479,7 +4479,6 @@ static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg)
 #endif
 #ifdef CONFIG_HAVE_KVM_IRQFD
 	case KVM_CAP_IRQFD:
-	case KVM_CAP_IRQFD_RESAMPLE:
 #endif
 	case KVM_CAP_IOEVENTFD_ANY_LENGTH:
 	case KVM_CAP_CHECK_EXTENSION_VM:

From 52f91e51944808d83dfe2d5582601b5e84e472cc Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Fri, 31 Mar 2023 19:31:48 +0200
Subject: [PATCH 104/173] platform/x86: gigabyte-wmi: add support for X570S
 AORUS ELITE

Add "X570S AORUS ELITE" to known working boards

Reported-by: Brandon Nielsen <nielsenb@jetfuse.net>
Link: https://lore.kernel.org/r/20230331014902.7864-1-nielsenb@jetfuse.net
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 drivers/platform/x86/gigabyte-wmi.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/platform/x86/gigabyte-wmi.c b/drivers/platform/x86/gigabyte-wmi.c
index 5e5b17c50eb6..2a426040f749 100644
--- a/drivers/platform/x86/gigabyte-wmi.c
+++ b/drivers/platform/x86/gigabyte-wmi.c
@@ -161,6 +161,7 @@ static const struct dmi_system_id gigabyte_wmi_known_working_platforms[] = {
 	DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("X570 GAMING X"),
 	DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("X570 I AORUS PRO WIFI"),
 	DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("X570 UD"),
+	DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("X570S AORUS ELITE"),
 	DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("Z690M AORUS ELITE AX DDR4"),
 	{ }
 };

From e352d685fde427a8fc9beb2ba30888f5d6f2e5e6 Mon Sep 17 00:00:00 2001
From: weiliang1503 <weiliang1503@gmail.com>
Date: Thu, 30 Mar 2023 19:49:43 +0800
Subject: [PATCH 105/173] platform/x86: asus-nb-wmi: Add quirk_asus_tablet_mode
 to other ROG Flow X13 models

Make quirk_asus_tablet_mode apply on other ROG Flow X13 devices,
which only affects the GV301Q model before.

Signed-off-by: weiliang1503 <weiliang1503@gmail.com>
Link: https://lore.kernel.org/r/20230330114943.15057-1-weiliang1503@gmail.com
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 drivers/platform/x86/asus-nb-wmi.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/platform/x86/asus-nb-wmi.c b/drivers/platform/x86/asus-nb-wmi.c
index cb15acdf14a3..e2c9a68d12df 100644
--- a/drivers/platform/x86/asus-nb-wmi.c
+++ b/drivers/platform/x86/asus-nb-wmi.c
@@ -464,7 +464,8 @@ static const struct dmi_system_id asus_quirks[] = {
 		.ident = "ASUS ROG FLOW X13",
 		.matches = {
 			DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
-			DMI_MATCH(DMI_PRODUCT_NAME, "GV301Q"),
+			/* Match GV301** */
+			DMI_MATCH(DMI_PRODUCT_NAME, "GV301"),
 		},
 		.driver_data = &quirk_asus_tablet_mode,
 	},

From e3271a5917d1501089b1a224d702aa053e2877f4 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Thu, 30 Mar 2023 21:46:44 +0200
Subject: [PATCH 106/173] platform/x86: ideapad-laptop: Stop sending
 KEY_TOUCHPAD_TOGGLE

Commit 5829f8a897e4 ("platform/x86: ideapad-laptop: Send
KEY_TOUCHPAD_TOGGLE on some models") made ideapad-laptop send
KEY_TOUCHPAD_TOGGLE when we receive an ACPI notify with VPC event bit 5 set
and the touchpad-state has not been changed by the EC itself already.

This was done under the assumption that this would be good to do to make
the touchpad-toggle hotkey work on newer models where the EC does not
toggle the touchpad on/off itself (because it is not routed through
the PS/2 controller, but uses I2C).

But it turns out that at least some models, e.g. the Yoga 7-15ITL5 the EC
triggers an ACPI notify with VPC event bit 5 set on resume, which would
now cause a spurious KEY_TOUCHPAD_TOGGLE on resume to which the desktop
environment responds by disabling the touchpad in software, breaking
the touchpad (until manually re-enabled) on resume.

It was never confirmed that sending KEY_TOUCHPAD_TOGGLE actually improves
things on new models and at least some new models like the Yoga 7-15ITL5
don't have a touchpad on/off toggle hotkey at all, while still sending
ACPI notify events with VPC event bit 5 set.

So it seems best to revert the change to send KEY_TOUCHPAD_TOGGLE when
receiving an ACPI notify events with VPC event bit 5 and the touchpad
state as reported by the EC has not changed.

Note this is not a full revert the code to cache the last EC touchpad
state is kept to avoid sending spurious KEY_TOUCHPAD_ON / _OFF events
on resume.

Fixes: 5829f8a897e4 ("platform/x86: ideapad-laptop: Send KEY_TOUCHPAD_TOGGLE on some models")
Link: https://bugzilla.kernel.org/show_bug.cgi?id=217234
Cc: stable@vger.kernel.org
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Link: https://lore.kernel.org/r/20230330194644.64628-1-hdegoede@redhat.com
---
 drivers/platform/x86/ideapad-laptop.c | 23 ++++++++++-------------
 1 file changed, 10 insertions(+), 13 deletions(-)

diff --git a/drivers/platform/x86/ideapad-laptop.c b/drivers/platform/x86/ideapad-laptop.c
index 0eb5bfdd823a..959ec3c5f376 100644
--- a/drivers/platform/x86/ideapad-laptop.c
+++ b/drivers/platform/x86/ideapad-laptop.c
@@ -1170,7 +1170,6 @@ static const struct key_entry ideapad_keymap[] = {
 	{ KE_KEY,  65, { KEY_PROG4 } },
 	{ KE_KEY,  66, { KEY_TOUCHPAD_OFF } },
 	{ KE_KEY,  67, { KEY_TOUCHPAD_ON } },
-	{ KE_KEY,  68, { KEY_TOUCHPAD_TOGGLE } },
 	{ KE_KEY, 128, { KEY_ESC } },
 
 	/*
@@ -1526,18 +1525,16 @@ static void ideapad_sync_touchpad_state(struct ideapad_private *priv, bool send_
 	if (priv->features.ctrl_ps2_aux_port)
 		i8042_command(&param, value ? I8042_CMD_AUX_ENABLE : I8042_CMD_AUX_DISABLE);
 
-	if (send_events) {
-		/*
-		 * On older models the EC controls the touchpad and toggles it
-		 * on/off itself, in this case we report KEY_TOUCHPAD_ON/_OFF.
-		 * If the EC did not toggle, report KEY_TOUCHPAD_TOGGLE.
-		 */
-		if (value != priv->r_touchpad_val) {
-			ideapad_input_report(priv, value ? 67 : 66);
-			sysfs_notify(&priv->platform_device->dev.kobj, NULL, "touchpad");
-		} else {
-			ideapad_input_report(priv, 68);
-		}
+	/*
+	 * On older models the EC controls the touchpad and toggles it on/off
+	 * itself, in this case we report KEY_TOUCHPAD_ON/_OFF. Some models do
+	 * an acpi-notify with VPC bit 5 set on resume, so this function get
+	 * called with send_events=true on every resume. Therefor if the EC did
+	 * not toggle, do nothing to avoid sending spurious KEY_TOUCHPAD_TOGGLE.
+	 */
+	if (send_events && value != priv->r_touchpad_val) {
+		ideapad_input_report(priv, value ? 67 : 66);
+		sysfs_notify(&priv->platform_device->dev.kobj, NULL, "touchpad");
 	}
 
 	priv->r_touchpad_val = value;

From 36d4d213c6d4fffae2645a601e8ae996de4c3645 Mon Sep 17 00:00:00 2001
From: Jeremy Soller <jeremy@system76.com>
Date: Fri, 31 Mar 2023 10:23:17 -0600
Subject: [PATCH 107/173] ALSA: hda/realtek: Add quirk for Clevo X370SNW

Fixes speaker output and headset detection on Clevo X370SNW.

Signed-off-by: Jeremy Soller <jeremy@system76.com>
Signed-off-by: Tim Crawford <tcrawford@system76.com>
Cc: <stable@vger.kernel.org>
Link: https://lore.kernel.org/r/20230331162317.14992-1-tcrawford@system76.com
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/hda/patch_realtek.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index bd4e1a3a55b4..26187f5d56b5 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -2624,6 +2624,7 @@ static const struct snd_pci_quirk alc882_fixup_tbl[] = {
 	SND_PCI_QUIRK(0x1462, 0xda57, "MSI Z270-Gaming", ALC1220_FIXUP_GB_DUAL_CODECS),
 	SND_PCI_QUIRK_VENDOR(0x1462, "MSI", ALC882_FIXUP_GPIO3),
 	SND_PCI_QUIRK(0x147b, 0x107a, "Abit AW9D-MAX", ALC882_FIXUP_ABIT_AW9D_MAX),
+	SND_PCI_QUIRK(0x1558, 0x3702, "Clevo X370SN[VW]", ALC1220_FIXUP_CLEVO_PB51ED_PINS),
 	SND_PCI_QUIRK(0x1558, 0x50d3, "Clevo PC50[ER][CDF]", ALC1220_FIXUP_CLEVO_PB51ED_PINS),
 	SND_PCI_QUIRK(0x1558, 0x65d1, "Clevo PB51[ER][CDF]", ALC1220_FIXUP_CLEVO_PB51ED_PINS),
 	SND_PCI_QUIRK(0x1558, 0x65d2, "Clevo PB51R[CDF]", ALC1220_FIXUP_CLEVO_PB51ED_PINS),

From 804d8e0a6e54427268790472781e03bc243f4ee3 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Fri, 31 Mar 2023 16:31:19 -0400
Subject: [PATCH 108/173] NFSD: Avoid calling OPDESC() with ops->opnum ==
 OP_ILLEGAL

OPDESC() simply indexes into nfsd4_ops[] by the op's operation
number, without range checking that value. It assumes callers are
careful to avoid calling it with an out-of-bounds opnum value.

nfsd4_decode_compound() is not so careful, and can invoke OPDESC()
with opnum set to OP_ILLEGAL, which is 10044 -- well beyond the end
of nfsd4_ops[].

Reported-by: Jeff Layton <jlayton@kernel.org>
Fixes: f4f9ef4a1b0a ("nfsd4: opdesc will be useful outside nfs4proc.c")
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/nfsd/nfs4xdr.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 97edb32be77f..67bbd2d6334c 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -2476,10 +2476,12 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
 	for (i = 0; i < argp->opcnt; i++) {
 		op = &argp->ops[i];
 		op->replay = NULL;
+		op->opdesc = NULL;
 
 		if (xdr_stream_decode_u32(argp->xdr, &op->opnum) < 0)
 			return false;
 		if (nfsd4_opnum_in_range(argp, op)) {
+			op->opdesc = OPDESC(op);
 			op->status = nfsd4_dec_ops[op->opnum](argp, &op->u);
 			if (op->status != nfs_ok)
 				trace_nfsd_compound_decode_err(argp->rqstp,
@@ -2490,7 +2492,7 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
 			op->opnum = OP_ILLEGAL;
 			op->status = nfserr_op_illegal;
 		}
-		op->opdesc = OPDESC(op);
+
 		/*
 		 * We'll try to cache the result in the DRC if any one
 		 * op in the compound wants to be cached:

From 15a8b55dbb1ba154d82627547c5761cac884d810 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Mon, 27 Mar 2023 06:21:37 -0400
Subject: [PATCH 109/173] nfsd: call op_release, even when op_func returns an
 error

For ops with "trivial" replies, nfsd4_encode_operation will shortcut
most of the encoding work and skip to just marshalling up the status.
One of the things it skips is calling op_release. This could cause a
memory leak in the layoutget codepath if there is an error at an
inopportune time.

Have the compound processing engine always call op_release, even when
op_func sets an error in op->status. With this change, we also need
nfsd4_block_get_device_info_scsi to set the gd_device pointer to NULL
on error to avoid a double free.

Reported-by: Zhi Li <yieli@redhat.com>
Link: https://bugzilla.redhat.com/show_bug.cgi?id=2181403
Fixes: 34b1744c91cc ("nfsd4: define ->op_release for compound ops")
Signed-off-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/nfsd/blocklayout.c |  1 +
 fs/nfsd/nfs4xdr.c     | 11 +++++------
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/fs/nfsd/blocklayout.c b/fs/nfsd/blocklayout.c
index 04697f8dc37d..01d7fd108cf3 100644
--- a/fs/nfsd/blocklayout.c
+++ b/fs/nfsd/blocklayout.c
@@ -297,6 +297,7 @@ nfsd4_block_get_device_info_scsi(struct super_block *sb,
 
 out_free_dev:
 	kfree(dev);
+	gdp->gd_device = NULL;
 	return ret;
 }
 
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 67bbd2d6334c..7799835c2196 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -5400,10 +5400,8 @@ nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op)
 	__be32 *p;
 
 	p = xdr_reserve_space(xdr, 8);
-	if (!p) {
-		WARN_ON_ONCE(1);
-		return;
-	}
+	if (!p)
+		goto release;
 	*p++ = cpu_to_be32(op->opnum);
 	post_err_offset = xdr->buf->len;
 
@@ -5418,8 +5416,6 @@ nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op)
 	op->status = encoder(resp, op->status, &op->u);
 	if (op->status)
 		trace_nfsd_compound_encode_err(rqstp, op->opnum, op->status);
-	if (opdesc && opdesc->op_release)
-		opdesc->op_release(&op->u);
 	xdr_commit_encode(xdr);
 
 	/* nfsd4_check_resp_size guarantees enough room for error status */
@@ -5460,6 +5456,9 @@ nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op)
 	}
 status:
 	*p = op->status;
+release:
+	if (opdesc && opdesc->op_release)
+		opdesc->op_release(&op->u);
 }
 
 /* 

From 7b50567bdcad8925ca1e075feb7171c12015afd1 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 7 Feb 2023 17:13:12 +0100
Subject: [PATCH 110/173] media: i2c: imx290: fix conditional function
 defintions

The runtime suspend/resume functions are only referenced from the
dev_pm_ops, but they use the old SET_RUNTIME_PM_OPS() helper that
requires a __maybe_unused annotation to avoid a warning:

  drivers/media/i2c/imx290.c:1082:12: error: unused function 'imx290_runtime_resume' [-Werror,-Wunused-function]
  static int imx290_runtime_resume(struct device *dev)
             ^
  drivers/media/i2c/imx290.c:1090:12: error: unused function 'imx290_runtime_suspend' [-Werror,-Wunused-function]
  static int imx290_runtime_suspend(struct device *dev)
             ^

Convert this to the new RUNTIME_PM_OPS() helper that so this is not
required.  To improve this further, also use the pm_ptr() helper that
lets the dev_pm_ops get dropped entirely when CONFIG_PM is disabled.

A related mistake happened in the of_match_ptr() macro here, which like
SET_RUNTIME_PM_OPS() requires the match table to be marked as
__maybe_unused, though I could not reproduce building this without
CONFIG_OF.  Remove the of_match_ptr() here as there is no point in
dropping the match table in configurations without CONFIG_OF.

Fixes: 02852c01f654 ("media: i2c: imx290: Initialize runtime PM before subdev")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Reported-by: Guenter Roeck <linux@roeck-us.net>
Reported-by: Sudip Mukherjee <sudipm.mukherjee@gmail.com>
Reviewed-by: Manivannan Sadhasivam <mani@kernel.org>
Reviewed-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/media/i2c/imx290.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/media/i2c/imx290.c b/drivers/media/i2c/imx290.c
index 49d6c8bdec41..48ae2e0adf9e 100644
--- a/drivers/media/i2c/imx290.c
+++ b/drivers/media/i2c/imx290.c
@@ -1098,7 +1098,7 @@ static int imx290_runtime_suspend(struct device *dev)
 }
 
 static const struct dev_pm_ops imx290_pm_ops = {
-	SET_RUNTIME_PM_OPS(imx290_runtime_suspend, imx290_runtime_resume, NULL)
+	RUNTIME_PM_OPS(imx290_runtime_suspend, imx290_runtime_resume, NULL)
 };
 
 /* ----------------------------------------------------------------------------
@@ -1362,8 +1362,8 @@ static struct i2c_driver imx290_i2c_driver = {
 	.remove = imx290_remove,
 	.driver = {
 		.name  = "imx290",
-		.pm = &imx290_pm_ops,
-		.of_match_table = of_match_ptr(imx290_of_match),
+		.pm = pm_ptr(&imx290_pm_ops),
+		.of_match_table = imx290_of_match,
 	},
 };
 

From 7f67aa097e875c87fba024e850cf405342300059 Mon Sep 17 00:00:00 2001
From: Karol Herbst <kherbst@redhat.com>
Date: Fri, 31 Mar 2023 00:39:38 +0200
Subject: [PATCH 111/173] drm/nouveau/disp: Support more modes by checking with
 lower bpc

This allows us to advertise more modes especially on HDR displays.

Fixes using 4K@60 modes on my TV and main display both using a HDMI to DP
adapter. Also fixes similar issues for users running into this.

Cc: stable@vger.kernel.org # 5.10+
Signed-off-by: Karol Herbst <kherbst@redhat.com>
Reviewed-by: Lyude Paul <lyude@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20230330223938.4025569-1-kherbst@redhat.com
---
 drivers/gpu/drm/nouveau/dispnv50/disp.c | 32 +++++++++++++++++++++++++
 drivers/gpu/drm/nouveau/nouveau_dp.c    |  8 ++++---
 2 files changed, 37 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/dispnv50/disp.c b/drivers/gpu/drm/nouveau/dispnv50/disp.c
index ed9d374147b8..5bb777ff1313 100644
--- a/drivers/gpu/drm/nouveau/dispnv50/disp.c
+++ b/drivers/gpu/drm/nouveau/dispnv50/disp.c
@@ -363,6 +363,35 @@ nv50_outp_atomic_check_view(struct drm_encoder *encoder,
 	return 0;
 }
 
+static void
+nv50_outp_atomic_fix_depth(struct drm_encoder *encoder, struct drm_crtc_state *crtc_state)
+{
+	struct nv50_head_atom *asyh = nv50_head_atom(crtc_state);
+	struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
+	struct drm_display_mode *mode = &asyh->state.adjusted_mode;
+	unsigned int max_rate, mode_rate;
+
+	switch (nv_encoder->dcb->type) {
+	case DCB_OUTPUT_DP:
+		max_rate = nv_encoder->dp.link_nr * nv_encoder->dp.link_bw;
+
+		/* we don't support more than 10 anyway */
+		asyh->or.bpc = min_t(u8, asyh->or.bpc, 10);
+
+		/* reduce the bpc until it works out */
+		while (asyh->or.bpc > 6) {
+			mode_rate = DIV_ROUND_UP(mode->clock * asyh->or.bpc * 3, 8);
+			if (mode_rate <= max_rate)
+				break;
+
+			asyh->or.bpc -= 2;
+		}
+		break;
+	default:
+		break;
+	}
+}
+
 static int
 nv50_outp_atomic_check(struct drm_encoder *encoder,
 		       struct drm_crtc_state *crtc_state,
@@ -381,6 +410,9 @@ nv50_outp_atomic_check(struct drm_encoder *encoder,
 	if (crtc_state->mode_changed || crtc_state->connectors_changed)
 		asyh->or.bpc = connector->display_info.bpc;
 
+	/* We might have to reduce the bpc */
+	nv50_outp_atomic_fix_depth(encoder, crtc_state);
+
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_dp.c b/drivers/gpu/drm/nouveau/nouveau_dp.c
index e00876f92aee..d49b4875fc3c 100644
--- a/drivers/gpu/drm/nouveau/nouveau_dp.c
+++ b/drivers/gpu/drm/nouveau/nouveau_dp.c
@@ -263,8 +263,6 @@ nouveau_dp_irq(struct work_struct *work)
 }
 
 /* TODO:
- * - Use the minimum possible BPC here, once we add support for the max bpc
- *   property.
  * - Validate against the DP caps advertised by the GPU (we don't check these
  *   yet)
  */
@@ -276,7 +274,11 @@ nv50_dp_mode_valid(struct drm_connector *connector,
 {
 	const unsigned int min_clock = 25000;
 	unsigned int max_rate, mode_rate, ds_max_dotclock, clock = mode->clock;
-	const u8 bpp = connector->display_info.bpc * 3;
+	/* Check with the minmum bpc always, so we can advertise better modes.
+	 * In particlar not doing this causes modes to be dropped on HDR
+	 * displays as we might check with a bpc of 16 even.
+	 */
+	const u8 bpp = 6 * 3;
 
 	if (mode->flags & DRM_MODE_FLAG_INTERLACE && !outp->caps.dp_interlace)
 		return MODE_NO_INTERLACE;

From adef41b03b35839e5677aace628d02597f04a616 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Fri, 31 Mar 2023 21:16:22 -0700
Subject: [PATCH 112/173] Revert "net: netcp: MAX_SKB_FRAGS is now 'int'"

This reverts commit c5b959eeb7f9e40673b97c08c71cbfff5f5923f2.

Reverted change is required after commit 3948b05950fd ("net: introduce
a config option to tweak MAX_SKB_FRAGS") which does not exist
in this tree, yet. It's only present in -next trees at the time
of writing.

Reported-by: Nathan Chancellor <nathan@kernel.org>
Link: https://lore.kernel.org/all/20230331214444.GA1426512@dev-arch.thelio-3990X/
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/ti/netcp_core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/ti/netcp_core.c b/drivers/net/ethernet/ti/netcp_core.c
index dfdbcdeb991f..1bb596a9d8a2 100644
--- a/drivers/net/ethernet/ti/netcp_core.c
+++ b/drivers/net/ethernet/ti/netcp_core.c
@@ -2081,7 +2081,7 @@ static int netcp_create_interface(struct netcp_device *netcp_device,
 	netcp->tx_pool_region_id = temp[1];
 
 	if (netcp->tx_pool_size < MAX_SKB_FRAGS) {
-		dev_err(dev, "tx-pool size too small, must be at least %d\n",
+		dev_err(dev, "tx-pool size too small, must be at least %ld\n",
 			MAX_SKB_FRAGS);
 		ret = -ENODEV;
 		goto quit;

From 7d63b67125382ff0ffdfca434acbc94a38bd092b Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 30 Mar 2023 17:45:02 +0000
Subject: [PATCH 113/173] icmp: guard against too small mtu

syzbot was able to trigger a panic [1] in icmp_glue_bits(), or
more exactly in skb_copy_and_csum_bits()

There is no repro yet, but I think the issue is that syzbot
manages to lower device mtu to a small value, fooling __icmp_send()

__icmp_send() must make sure there is enough room for the
packet to include at least the headers.

We might in the future refactor skb_copy_and_csum_bits() and its
callers to no longer crash when something bad happens.

[1]
kernel BUG at net/core/skbuff.c:3343 !
invalid opcode: 0000 [#1] PREEMPT SMP KASAN
CPU: 0 PID: 15766 Comm: syz-executor.0 Not tainted 6.3.0-rc4-syzkaller-00039-gffe78bbd5121 #0
Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.14.0-2 04/01/2014
RIP: 0010:skb_copy_and_csum_bits+0x798/0x860 net/core/skbuff.c:3343
Code: f0 c1 c8 08 41 89 c6 e9 73 ff ff ff e8 61 48 d4 f9 e9 41 fd ff ff 48 8b 7c 24 48 e8 52 48 d4 f9 e9 c3 fc ff ff e8 c8 27 84 f9 <0f> 0b 48 89 44 24 28 e8 3c 48 d4 f9 48 8b 44 24 28 e9 9d fb ff ff
RSP: 0018:ffffc90000007620 EFLAGS: 00010246
RAX: 0000000000000000 RBX: 00000000000001e8 RCX: 0000000000000100
RDX: ffff8880276f6280 RSI: ffffffff87fdd138 RDI: 0000000000000005
RBP: 0000000000000000 R08: 0000000000000005 R09: 0000000000000000
R10: 00000000000001e8 R11: 0000000000000001 R12: 000000000000003c
R13: 0000000000000000 R14: ffff888028244868 R15: 0000000000000b0e
FS: 00007fbc81f1c700(0000) GS:ffff88802ca00000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 0000001b2df43000 CR3: 00000000744db000 CR4: 0000000000150ef0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
Call Trace:
<IRQ>
icmp_glue_bits+0x7b/0x210 net/ipv4/icmp.c:353
__ip_append_data+0x1d1b/0x39f0 net/ipv4/ip_output.c:1161
ip_append_data net/ipv4/ip_output.c:1343 [inline]
ip_append_data+0x115/0x1a0 net/ipv4/ip_output.c:1322
icmp_push_reply+0xa8/0x440 net/ipv4/icmp.c:370
__icmp_send+0xb80/0x1430 net/ipv4/icmp.c:765
ipv4_send_dest_unreach net/ipv4/route.c:1239 [inline]
ipv4_link_failure+0x5a9/0x9e0 net/ipv4/route.c:1246
dst_link_failure include/net/dst.h:423 [inline]
arp_error_report+0xcb/0x1c0 net/ipv4/arp.c:296
neigh_invalidate+0x20d/0x560 net/core/neighbour.c:1079
neigh_timer_handler+0xc77/0xff0 net/core/neighbour.c:1166
call_timer_fn+0x1a0/0x580 kernel/time/timer.c:1700
expire_timers+0x29b/0x4b0 kernel/time/timer.c:1751
__run_timers kernel/time/timer.c:2022 [inline]

Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Reported-by: syzbot+d373d60fddbdc915e666@syzkaller.appspotmail.com
Signed-off-by: Eric Dumazet <edumazet@google.com>
Link: https://lore.kernel.org/r/20230330174502.1915328-1-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/ipv4/icmp.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 8cebb476b3ab..b8607763d113 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -749,6 +749,11 @@ void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info,
 		room = 576;
 	room -= sizeof(struct iphdr) + icmp_param.replyopts.opt.opt.optlen;
 	room -= sizeof(struct icmphdr);
+	/* Guard against tiny mtu. We need to include at least one
+	 * IP network header for this message to make any sense.
+	 */
+	if (room <= (int)sizeof(struct iphdr))
+		goto ende;
 
 	icmp_param.data_len = skb_in->len - icmp_param.offset;
 	if (icmp_param.data_len > room)

From f785f5ee968f7045268b8be6b0abc850c4a4277c Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Fri, 31 Mar 2023 16:22:17 +0200
Subject: [PATCH 114/173] ALSA: hda/hdmi: Preserve the previous PCM device upon
 re-enablement

When a DRM driver turns on or off the screen with the audio
capability, it notifies the ELD to HD-audio HDMI codec driver via
component ops.  HDMI codec driver, in turn, attaches or detaches the
PCM stream for the given port on the fly.

The problem is that, since the recent code change, the HDMI driver
always treats the PCM stream assignment dynamically; this ended up the
confusion of the PCM device appearance.  e.g. when a screen goes once
off and on again, it may appear on a different PCM device before the
screen-off.  Although the application should treat such a change, it
doesn't seem working gracefully with the current pipewire (maybe
PulseAudio, too).

As a workaround, this patch changes the HDMI codec driver behavior
slightly to be more consistent.  Now it remembers the previous PCM
slot for the given port and try to assign to it.  That is, if a port
is re-enabled, the driver tries to use the same PCM slot that was
assigned to that port previously.  If it conflicts, a new slot is
searched and used like before, instead.

Note that multiple monitor connections are the only typical case where
the PCM slot preservation is effective.  As long as only a single
monitor is connected, the behavior isn't changed, and the first PCM
slot is still assigned always.

Fixes: ef6f5494faf6 ("ALSA: hda/hdmi: Use only dynamic PCM device allocation")
Reviewed-by: Jaroslav Kysela <perex@perex.cz>
Link: https://bugzilla.kernel.org/show_bug.cgi?id=217259
Link: https://lore.kernel.org/r/20230331142217.19791-1-tiwai@suse.de
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/hda/patch_hdmi.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c
index 9ea633fe9339..4ffa3a59f419 100644
--- a/sound/pci/hda/patch_hdmi.c
+++ b/sound/pci/hda/patch_hdmi.c
@@ -81,6 +81,7 @@ struct hdmi_spec_per_pin {
 	struct delayed_work work;
 	struct hdmi_pcm *pcm; /* pointer to spec->pcm_rec[n] dynamically*/
 	int pcm_idx; /* which pcm is attached. -1 means no pcm is attached */
+	int prev_pcm_idx; /* previously assigned pcm index */
 	int repoll_count;
 	bool setup; /* the stream has been set up by prepare callback */
 	bool silent_stream;
@@ -1380,9 +1381,17 @@ static void hdmi_attach_hda_pcm(struct hdmi_spec *spec,
 	/* pcm already be attached to the pin */
 	if (per_pin->pcm)
 		return;
+	/* try the previously used slot at first */
+	idx = per_pin->prev_pcm_idx;
+	if (idx >= 0) {
+		if (!test_bit(idx, &spec->pcm_bitmap))
+			goto found;
+		per_pin->prev_pcm_idx = -1; /* no longer valid, clear it */
+	}
 	idx = hdmi_find_pcm_slot(spec, per_pin);
 	if (idx == -EBUSY)
 		return;
+ found:
 	per_pin->pcm_idx = idx;
 	per_pin->pcm = get_hdmi_pcm(spec, idx);
 	set_bit(idx, &spec->pcm_bitmap);
@@ -1398,6 +1407,7 @@ static void hdmi_detach_hda_pcm(struct hdmi_spec *spec,
 		return;
 	idx = per_pin->pcm_idx;
 	per_pin->pcm_idx = -1;
+	per_pin->prev_pcm_idx = idx; /* remember the previous index */
 	per_pin->pcm = NULL;
 	if (idx >= 0 && idx < spec->pcm_used)
 		clear_bit(idx, &spec->pcm_bitmap);
@@ -1924,6 +1934,7 @@ static int hdmi_add_pin(struct hda_codec *codec, hda_nid_t pin_nid)
 
 		per_pin->pcm = NULL;
 		per_pin->pcm_idx = -1;
+		per_pin->prev_pcm_idx = -1;
 		per_pin->pin_nid = pin_nid;
 		per_pin->pin_nid_idx = spec->num_nids;
 		per_pin->dev_id = i;

From 275b471e3d2daf1472ae8fa70dc1b50c9e0b9e75 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Thu, 30 Mar 2023 19:21:44 -0700
Subject: [PATCH 115/173] net: don't let netpoll invoke NAPI if in xmit context

Commit 0db3dc73f7a3 ("[NETPOLL]: tx lock deadlock fix") narrowed
down the region under netif_tx_trylock() inside netpoll_send_skb().
(At that point in time netif_tx_trylock() would lock all queues of
the device.) Taking the tx lock was problematic because driver's
cleanup method may take the same lock. So the change made us hold
the xmit lock only around xmit, and expected the driver to take
care of locking within ->ndo_poll_controller().

Unfortunately this only works if netpoll isn't itself called with
the xmit lock already held. Netpoll code is careful and uses
trylock(). The drivers, however, may be using plain lock().
Printing while holding the xmit lock is going to result in rare
deadlocks.

Luckily we record the xmit lock owners, so we can scan all the queues,
the same way we scan NAPI owners. If any of the xmit locks is held
by the local CPU we better not attempt any polling.

It would be nice if we could narrow down the check to only the NAPIs
and the queue we're trying to use. I don't see a way to do that now.

Reported-by: Roman Gushchin <roman.gushchin@linux.dev>
Fixes: 0db3dc73f7a3 ("[NETPOLL]: tx lock deadlock fix")
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/netpoll.c | 19 ++++++++++++++++++-
 1 file changed, 18 insertions(+), 1 deletion(-)

diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index a089b704b986..e6a739b1afa9 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -137,6 +137,20 @@ static void queue_process(struct work_struct *work)
 	}
 }
 
+static int netif_local_xmit_active(struct net_device *dev)
+{
+	int i;
+
+	for (i = 0; i < dev->num_tx_queues; i++) {
+		struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
+
+		if (READ_ONCE(txq->xmit_lock_owner) == smp_processor_id())
+			return 1;
+	}
+
+	return 0;
+}
+
 static void poll_one_napi(struct napi_struct *napi)
 {
 	int work;
@@ -183,7 +197,10 @@ void netpoll_poll_dev(struct net_device *dev)
 	if (!ni || down_trylock(&ni->dev_lock))
 		return;
 
-	if (!netif_running(dev)) {
+	/* Some drivers will take the same locks in poll and xmit,
+	 * we can't poll if local CPU is already in xmit.
+	 */
+	if (!netif_running(dev) || netif_local_xmit_active(dev)) {
 		up(&ni->dev_lock);
 		return;
 	}

From 089b91a0155c4de1209a07ff2a7dd299ff3ece47 Mon Sep 17 00:00:00 2001
From: Gustav Ekelund <gustaek@axis.com>
Date: Fri, 31 Mar 2023 10:40:13 +0200
Subject: [PATCH 116/173] net: dsa: mv88e6xxx: Reset mv88e6393x force WD event
 bit

The force watchdog event bit is not cleared during SW reset in the
mv88e6393x switch. This is a different behavior compared to mv886390 which
clears the force WD event bit as advertised. This causes a force WD event
to be handled over and over again as the SW reset following the event never
clears the force WD event bit.

Explicitly clear the watchdog event register to 0 in irq_action when
handling an event to prevent the switch from sending continuous interrupts.
Marvell aren't aware of any other stuck bits apart from the force WD
bit.

Fixes: de776d0d316f ("net: dsa: mv88e6xxx: add support for mv88e6393x family"
Signed-off-by: Gustav Ekelund <gustaek@axis.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/mv88e6xxx/chip.c    |  2 +-
 drivers/net/dsa/mv88e6xxx/global2.c | 20 ++++++++++++++++++++
 drivers/net/dsa/mv88e6xxx/global2.h |  1 +
 3 files changed, 22 insertions(+), 1 deletion(-)

diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
index 0de7b3611202..7108f745fbf0 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -5601,7 +5601,7 @@ static const struct mv88e6xxx_ops mv88e6393x_ops = {
 	 * .port_set_upstream_port method.
 	 */
 	.set_egress_port = mv88e6393x_set_egress_port,
-	.watchdog_ops = &mv88e6390_watchdog_ops,
+	.watchdog_ops = &mv88e6393x_watchdog_ops,
 	.mgmt_rsvd2cpu = mv88e6393x_port_mgmt_rsvd2cpu,
 	.pot_clear = mv88e6xxx_g2_pot_clear,
 	.reset = mv88e6352_g1_reset,
diff --git a/drivers/net/dsa/mv88e6xxx/global2.c b/drivers/net/dsa/mv88e6xxx/global2.c
index ed3b2f88e783..a7af3cebae97 100644
--- a/drivers/net/dsa/mv88e6xxx/global2.c
+++ b/drivers/net/dsa/mv88e6xxx/global2.c
@@ -943,6 +943,26 @@ const struct mv88e6xxx_irq_ops mv88e6390_watchdog_ops = {
 	.irq_free = mv88e6390_watchdog_free,
 };
 
+static int mv88e6393x_watchdog_action(struct mv88e6xxx_chip *chip, int irq)
+{
+	mv88e6390_watchdog_action(chip, irq);
+
+	/* Fix for clearing the force WD event bit.
+	 * Unreleased erratum on mv88e6393x.
+	 */
+	mv88e6xxx_g2_write(chip, MV88E6390_G2_WDOG_CTL,
+			   MV88E6390_G2_WDOG_CTL_UPDATE |
+			   MV88E6390_G2_WDOG_CTL_PTR_EVENT);
+
+	return IRQ_HANDLED;
+}
+
+const struct mv88e6xxx_irq_ops mv88e6393x_watchdog_ops = {
+	.irq_action = mv88e6393x_watchdog_action,
+	.irq_setup = mv88e6390_watchdog_setup,
+	.irq_free = mv88e6390_watchdog_free,
+};
+
 static irqreturn_t mv88e6xxx_g2_watchdog_thread_fn(int irq, void *dev_id)
 {
 	struct mv88e6xxx_chip *chip = dev_id;
diff --git a/drivers/net/dsa/mv88e6xxx/global2.h b/drivers/net/dsa/mv88e6xxx/global2.h
index e973114d6890..7e091965582b 100644
--- a/drivers/net/dsa/mv88e6xxx/global2.h
+++ b/drivers/net/dsa/mv88e6xxx/global2.h
@@ -369,6 +369,7 @@ int mv88e6xxx_g2_device_mapping_write(struct mv88e6xxx_chip *chip, int target,
 extern const struct mv88e6xxx_irq_ops mv88e6097_watchdog_ops;
 extern const struct mv88e6xxx_irq_ops mv88e6250_watchdog_ops;
 extern const struct mv88e6xxx_irq_ops mv88e6390_watchdog_ops;
+extern const struct mv88e6xxx_irq_ops mv88e6393x_watchdog_ops;
 
 extern const struct mv88e6xxx_avb_ops mv88e6165_avb_ops;
 extern const struct mv88e6xxx_avb_ops mv88e6352_avb_ops;

From e669ce46740a9815953bb4452a6bc5a7fdc21a50 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Fri, 31 Mar 2023 14:49:59 +0200
Subject: [PATCH 117/173] net: ethernet: mtk_eth_soc: fix remaining throughput
 regression

Based on further tests, it seems that the QDMA shaper is not able to
perform shaping close to the MAC link rate without throughput loss.
This cannot be compensated by increasing the shaping rate, so it seems
to be an internal limit.

Fix the remaining throughput regression by detecting that condition and
limiting shaping to ports with lower link speed.

This patch intentionally ignores link speed gain from TRGMII, because
even on such links, shaping to 1000 Mbit/s incurs some throughput
degradation.

Fixes: f63959c7eec3 ("net: ethernet: mtk_eth_soc: implement multi-queue support for per-port queues")
Tested-By: Frank Wunderlich <frank-w@public-files.de>
Reported-by: Frank Wunderlich <frank-w@public-files.de>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mediatek/mtk_eth_soc.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index 282f9435d5ff..e14050e17862 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -753,6 +753,7 @@ static void mtk_mac_link_up(struct phylink_config *config,
 		 MAC_MCR_FORCE_RX_FC);
 
 	/* Configure speed */
+	mac->speed = speed;
 	switch (speed) {
 	case SPEED_2500:
 	case SPEED_1000:
@@ -3235,6 +3236,9 @@ found:
 	if (dp->index >= MTK_QDMA_NUM_QUEUES)
 		return NOTIFY_DONE;
 
+	if (mac->speed > 0 && mac->speed <= s.base.speed)
+		s.base.speed = 0;
+
 	mtk_set_queue_speed(eth, dp->index + 3, s.base.speed);
 
 	return NOTIFY_DONE;

From 2584024b23552c00d95b50255e47bd18d306d31a Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Sat, 1 Apr 2023 19:09:57 -0400
Subject: [PATCH 118/173] sctp: check send stream number after wait_for_sndbuf

This patch fixes a corner case where the asoc out stream count may change
after wait_for_sndbuf.

When the main thread in the client starts a connection, if its out stream
count is set to N while the in stream count in the server is set to N - 2,
another thread in the client keeps sending the msgs with stream number
N - 1, and waits for sndbuf before processing INIT_ACK.

However, after processing INIT_ACK, the out stream count in the client is
shrunk to N - 2, the same to the in stream count in the server. The crash
occurs when the thread waiting for sndbuf is awake and sends the msg in a
non-existing stream(N - 1), the call trace is as below:

  KASAN: null-ptr-deref in range [0x0000000000000038-0x000000000000003f]
  Call Trace:
   <TASK>
   sctp_cmd_send_msg net/sctp/sm_sideeffect.c:1114 [inline]
   sctp_cmd_interpreter net/sctp/sm_sideeffect.c:1777 [inline]
   sctp_side_effects net/sctp/sm_sideeffect.c:1199 [inline]
   sctp_do_sm+0x197d/0x5310 net/sctp/sm_sideeffect.c:1170
   sctp_primitive_SEND+0x9f/0xc0 net/sctp/primitive.c:163
   sctp_sendmsg_to_asoc+0x10eb/0x1a30 net/sctp/socket.c:1868
   sctp_sendmsg+0x8d4/0x1d90 net/sctp/socket.c:2026
   inet_sendmsg+0x9d/0xe0 net/ipv4/af_inet.c:825
   sock_sendmsg_nosec net/socket.c:722 [inline]
   sock_sendmsg+0xde/0x190 net/socket.c:745

The fix is to add an unlikely check for the send stream number after the
thread wakes up from the wait_for_sndbuf.

Fixes: 5bbbbe32a431 ("sctp: introduce stream scheduler foundations")
Reported-by: syzbot+47c24ca20a2fa01f082e@syzkaller.appspotmail.com
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/socket.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index b91616f819de..218e0982c370 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -1830,6 +1830,10 @@ static int sctp_sendmsg_to_asoc(struct sctp_association *asoc,
 		err = sctp_wait_for_sndbuf(asoc, &timeo, msg_len);
 		if (err)
 			goto err;
+		if (unlikely(sinfo->sinfo_stream >= asoc->stream.outcnt)) {
+			err = -EINVAL;
+			goto err;
+		}
 	}
 
 	if (sctp_state(asoc, CLOSED)) {

From f95b8ea79c47c0ad3d18f45ad538f9970e414d1f Mon Sep 17 00:00:00 2001
From: Javier Martinez Canillas <javierm@redhat.com>
Date: Tue, 7 Feb 2023 11:22:54 +0100
Subject: [PATCH 119/173] Revert "venus: firmware: Correct non-pix start and
 end addresses"

This reverts commit a837e5161cff, which broke probing of the venus
driver, at least on the SC7180 SoC HP X2 Chromebook:

  qcom-venus aa00000.video-codec: Adding to iommu group 11
  qcom-venus aa00000.video-codec: non legacy binding
  qcom-venus aa00000.video-codec: failed to reset venus core
  qcom-venus: probe of aa00000.video-codec failed with error -110

Matthias Kaehlcke also reported that the same change caused a regression
in SC7180 and sc7280, that prevents AOSS from entering sleep mode during
system suspend.  So let's revert this commit for now to fix both issues.

Fixes: a837e5161cff ("venus: firmware: Correct non-pix start and end addresses")
Reported-by: Matthias Kaehlcke <mka@chromium.org>
Signed-off-by: Javier Martinez Canillas <javierm@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/media/platform/qcom/venus/firmware.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/media/platform/qcom/venus/firmware.c b/drivers/media/platform/qcom/venus/firmware.c
index 61ff20a7e935..cfb11c551167 100644
--- a/drivers/media/platform/qcom/venus/firmware.c
+++ b/drivers/media/platform/qcom/venus/firmware.c
@@ -38,8 +38,8 @@ static void venus_reset_cpu(struct venus_core *core)
 	writel(fw_size, wrapper_base + WRAPPER_FW_END_ADDR);
 	writel(0, wrapper_base + WRAPPER_CPA_START_ADDR);
 	writel(fw_size, wrapper_base + WRAPPER_CPA_END_ADDR);
-	writel(0, wrapper_base + WRAPPER_NONPIX_START_ADDR);
-	writel(0, wrapper_base + WRAPPER_NONPIX_END_ADDR);
+	writel(fw_size, wrapper_base + WRAPPER_NONPIX_START_ADDR);
+	writel(fw_size, wrapper_base + WRAPPER_NONPIX_END_ADDR);
 
 	if (IS_V6(core)) {
 		/* Bring XTSS out of reset */

From 7e364e56293bb98cae1b55fd835f5991c4e96e7d Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sun, 2 Apr 2023 14:29:29 -0700
Subject: [PATCH 120/173] Linux 6.3-rc5

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index da2586d4c728..ef4e96b9cd5b 100644
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
 VERSION = 6
 PATCHLEVEL = 3
 SUBLEVEL = 0
-EXTRAVERSION = -rc4
+EXTRAVERSION = -rc5
 NAME = Hurr durr I'ma ninja sloth
 
 # *DOCUMENTATION*

From 764a2ab9eb56e1200083e771aab16186836edf1d Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@collabora.com>
Date: Fri, 21 May 2021 11:38:11 +0200
Subject: [PATCH 121/173] drm/panfrost: Fix the panfrost_mmu_map_fault_addr()
 error path

Make sure all bo->base.pages entries are either NULL or pointing to a
valid page before calling drm_gem_shmem_put_pages().

Reported-by: Tomeu Vizoso <tomeu.vizoso@collabora.com>
Cc: <stable@vger.kernel.org>
Fixes: 187d2929206e ("drm/panfrost: Add support for GPU heap allocations")
Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
Reviewed-by: Steven Price <steven.price@arm.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20210521093811.1018992-1-boris.brezillon@collabora.com
---
 drivers/gpu/drm/panfrost/panfrost_mmu.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/panfrost/panfrost_mmu.c b/drivers/gpu/drm/panfrost/panfrost_mmu.c
index 666a5e53fe19..e961fa27702c 100644
--- a/drivers/gpu/drm/panfrost/panfrost_mmu.c
+++ b/drivers/gpu/drm/panfrost/panfrost_mmu.c
@@ -504,6 +504,7 @@ static int panfrost_mmu_map_fault_addr(struct panfrost_device *pfdev, int as,
 		if (IS_ERR(pages[i])) {
 			mutex_unlock(&bo->base.pages_lock);
 			ret = PTR_ERR(pages[i]);
+			pages[i] = NULL;
 			goto err_pages;
 		}
 	}

From f1b17f429f066f920a6a1056332e66f8a5b92256 Mon Sep 17 00:00:00 2001
From: Matthew Auld <matthew.auld@intel.com>
Date: Mon, 30 Jan 2023 12:06:31 +0000
Subject: [PATCH 122/173] drm/i915/ttm: fix sparse warning
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Sparse complains with:

drivers/gpu/drm/i915/gem/i915_gem_ttm.c:1066:21: sparse:
	expected restricted vm_fault_t [assigned] [usertype] ret
drivers/gpu/drm/i915/gem/i915_gem_ttm.c:1066:21: sparse: got int

Fixes: 516198d317d8 ("drm/i915: audit bo->resource usage v3")
Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20230130101230.25347-1-matthew.auld@intel.com
Reviewed-by: Nirmoy Das <nirmoy.das@intel.com>
Acked-by: Christian König <ckoenig.leichtzumerken@gmail.com>
Signed-off-by: Christian König <christian.koenig@amd.com>
(cherry picked from commit fde789e8339c60c8c58e5a71fa819fcfe52d839e)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
index 7420276827a5..4758f21c91e1 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
@@ -1067,11 +1067,12 @@ static vm_fault_t vm_fault_ttm(struct vm_fault *vmf)
 			.interruptible = true,
 			.no_wait_gpu = true, /* should be idle already */
 		};
+		int err;
 
 		GEM_BUG_ON(!bo->ttm || !(bo->ttm->page_flags & TTM_TT_FLAG_SWAPPED));
 
-		ret = ttm_bo_validate(bo, i915_ttm_sys_placement(), &ctx);
-		if (ret) {
+		err = ttm_bo_validate(bo, i915_ttm_sys_placement(), &ctx);
+		if (err) {
 			dma_resv_unlock(bo->base.resv);
 			return VM_FAULT_SIGBUS;
 		}

From c74237496fbc799257b091179dd01a3200f7314d Mon Sep 17 00:00:00 2001
From: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Date: Mon, 13 Mar 2023 13:55:56 -0700
Subject: [PATCH 123/173] drm/i915/huc: Cancel HuC delayed load timer on reset.

In the rare case where we do a full GT reset after starting the HuC
load and before it completes (which basically boils down to i915 hanging
during init), we need to cancel the delayed load fence, as it will be
re-initialized in the post-reset recovery.

Fixes: 27536e03271d ("drm/i915/huc: track delayed HuC load with a fence")
Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Cc: Alan Previn <alan.previn.teres.alexis@intel.com>
Reviewed-by: Alan Previn <alan.previn.teres.alexis@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20230313205556.1174503-1-daniele.ceraolospurio@intel.com
(cherry picked from commit cdf7911f7dbcb37228409a63bf75630776c45a15)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/gt/uc/intel_huc.c | 7 +++++++
 drivers/gpu/drm/i915/gt/uc/intel_huc.h | 7 +------
 2 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc.c b/drivers/gpu/drm/i915/gt/uc/intel_huc.c
index 410905da8e97..0c103ca160d1 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_huc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.c
@@ -235,6 +235,13 @@ static void delayed_huc_load_fini(struct intel_huc *huc)
 	i915_sw_fence_fini(&huc->delayed_load.fence);
 }
 
+int intel_huc_sanitize(struct intel_huc *huc)
+{
+	delayed_huc_load_complete(huc);
+	intel_uc_fw_sanitize(&huc->fw);
+	return 0;
+}
+
 static bool vcs_supported(struct intel_gt *gt)
 {
 	intel_engine_mask_t mask = gt->info.engine_mask;
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc.h b/drivers/gpu/drm/i915/gt/uc/intel_huc.h
index 52db03620c60..db555b3c1f56 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_huc.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.h
@@ -41,6 +41,7 @@ struct intel_huc {
 	} delayed_load;
 };
 
+int intel_huc_sanitize(struct intel_huc *huc);
 void intel_huc_init_early(struct intel_huc *huc);
 int intel_huc_init(struct intel_huc *huc);
 void intel_huc_fini(struct intel_huc *huc);
@@ -54,12 +55,6 @@ bool intel_huc_is_authenticated(struct intel_huc *huc);
 void intel_huc_register_gsc_notifier(struct intel_huc *huc, struct bus_type *bus);
 void intel_huc_unregister_gsc_notifier(struct intel_huc *huc, struct bus_type *bus);
 
-static inline int intel_huc_sanitize(struct intel_huc *huc)
-{
-	intel_uc_fw_sanitize(&huc->fw);
-	return 0;
-}
-
 static inline bool intel_huc_is_supported(struct intel_huc *huc)
 {
 	return intel_uc_fw_is_supported(&huc->fw);

From 95d939bb97ff9be101ae4ceeb322535589da2190 Mon Sep 17 00:00:00 2001
From: Stanislav Lisovskiy <stanislav.lisovskiy@intel.com>
Date: Mon, 27 Mar 2023 09:42:17 +0300
Subject: [PATCH 124/173] drm/i915: Use compressed bpp when calculating m/n
 value for DP MST DSC

For obvious reasons, we use compressed bpp instead of pipe bpp for
DSC DP SST case. Lets be consistent and use compressed bpp instead of
pipe bpp, also in DP MST DSC case.

Signed-off-by: Stanislav Lisovskiy <stanislav.lisovskiy@intel.com>
Reviewed-by: Vinod Govindapillai <vinod.govindapillai@intel.com>
Fixes: d51f25eb479a ("drm/i915: Add DSC support to MST path")
Link: https://patchwork.freedesktop.org/patch/msgid/20230327064217.24033-1-stanislav.lisovskiy@intel.com
(cherry picked from commit ea1deabc6f11575eb3375b454457eaa3c9837abc)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/display/intel_dp_mst.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c
index 2106b3de225a..7c9b328bc2d7 100644
--- a/drivers/gpu/drm/i915/display/intel_dp_mst.c
+++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c
@@ -232,7 +232,7 @@ static int intel_dp_dsc_mst_compute_link_config(struct intel_encoder *encoder,
 			return slots;
 	}
 
-	intel_link_compute_m_n(crtc_state->pipe_bpp,
+	intel_link_compute_m_n(crtc_state->dsc.compressed_bpp,
 			       crtc_state->lane_count,
 			       adjusted_mode->crtc_clock,
 			       crtc_state->port_clock,

From dc30c011469165d57af9adac5baff7d767d20e5c Mon Sep 17 00:00:00 2001
From: Min Li <lm0963hack@gmail.com>
Date: Tue, 28 Mar 2023 17:36:27 +0800
Subject: [PATCH 125/173] drm/i915: fix race condition UAF in
 i915_perf_add_config_ioctl

Userspace can guess the id value and try to race oa_config object creation
with config remove, resulting in a use-after-free if we dereference the
object after unlocking the metrics_lock.  For that reason, unlocking the
metrics_lock must be done after we are done dereferencing the object.

Signed-off-by: Min Li <lm0963hack@gmail.com>
Fixes: f89823c21224 ("drm/i915/perf: Implement I915_PERF_ADD/REMOVE_CONFIG interface")
Cc: <stable@vger.kernel.org> # v4.14+
Reviewed-by: Andi Shyti <andi.shyti@linux.intel.com>
Reviewed-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20230328093627.5067-1-lm0963hack@gmail.com
[tursulin: Manually added stable tag.]
(cherry picked from commit 49f6f6483b652108bcb73accd0204a464b922395)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/i915_perf.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 283a4a3c6862..004074936300 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -4638,13 +4638,13 @@ int i915_perf_add_config_ioctl(struct drm_device *dev, void *data,
 		err = oa_config->id;
 		goto sysfs_err;
 	}
-
-	mutex_unlock(&perf->metrics_lock);
+	id = oa_config->id;
 
 	drm_dbg(&perf->i915->drm,
 		"Added config %s id=%i\n", oa_config->uuid, oa_config->id);
+	mutex_unlock(&perf->metrics_lock);
 
-	return oa_config->id;
+	return id;
 
 sysfs_err:
 	mutex_unlock(&perf->metrics_lock);

From dc3421560a67361442f33ec962fc6dd48895a0df Mon Sep 17 00:00:00 2001
From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Date: Mon, 20 Mar 2023 15:14:23 +0000
Subject: [PATCH 126/173] drm/i915: Fix context runtime accounting

When considering whether to mark one context as stopped and another as
started we need to look at whether the previous and new _contexts_ are
different and not just requests. Otherwise the software tracked context
start time was incorrectly updated to the most recent lite-restore time-
stamp, which was in some cases resulting in active time going backward,
until the context switch (typically the heartbeat pulse) would synchronise
with the hardware tracked context runtime. Easiest use case to observe
this behaviour was with a full screen clients with close to 100% engine
load.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Fixes: bb6287cb1886 ("drm/i915: Track context current active time")
Cc: <stable@vger.kernel.org> # v5.19+
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20230320151423.1708436-1-tvrtko.ursulin@linux.intel.com
[tursulin: Fix spelling in commit msg.]
(cherry picked from commit b3e70051879c665acdd3a1ab50d0ed58d6a8001f)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_execlists_submission.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
index 1bbe6708d0a7..750326434677 100644
--- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
@@ -2018,6 +2018,8 @@ process_csb(struct intel_engine_cs *engine, struct i915_request **inactive)
 	 * inspecting the queue to see if we need to resumbit.
 	 */
 	if (*prev != *execlists->active) { /* elide lite-restores */
+		struct intel_context *prev_ce = NULL, *active_ce = NULL;
+
 		/*
 		 * Note the inherent discrepancy between the HW runtime,
 		 * recorded as part of the context switch, and the CPU
@@ -2029,9 +2031,15 @@ process_csb(struct intel_engine_cs *engine, struct i915_request **inactive)
 		 * and correct overselves later when updating from HW.
 		 */
 		if (*prev)
-			lrc_runtime_stop((*prev)->context);
+			prev_ce = (*prev)->context;
 		if (*execlists->active)
-			lrc_runtime_start((*execlists->active)->context);
+			active_ce = (*execlists->active)->context;
+		if (prev_ce != active_ce) {
+			if (prev_ce)
+				lrc_runtime_stop(prev_ce);
+			if (active_ce)
+				lrc_runtime_start(active_ce);
+		}
 		new_timeslice(execlists);
 	}
 

From ad651d68cee75e9ac20002254c4e5d09ee67a84b Mon Sep 17 00:00:00 2001
From: Daniel Golle <daniel@makrotopia.org>
Date: Sun, 2 Apr 2023 12:44:37 +0100
Subject: [PATCH 127/173] net: sfp: add quirk enabling 2500Base-x for HG
 MXPD-483II

The HG MXPD-483II 1310nm SFP module is meant to operate with 2500Base-X,
however, in their EEPROM they incorrectly specify:
    Transceiver type                          : Ethernet: 1000BASE-LX
    ...
    BR, Nominal                               : 2600MBd

Use sfp_quirk_2500basex for this module to allow 2500Base-X mode anyway.

https://forum.banana-pi.org/t/bpi-r3-sfp-module-compatibility/14573/60

Reported-by: chowtom <chowtom@gmail.com>
Tested-by: chowtom <chowtom@gmail.com>
Signed-off-by: Daniel Golle <daniel@makrotopia.org>
Reviewed-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/sfp.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/net/phy/sfp.c b/drivers/net/phy/sfp.c
index fb98db61e06c..8af10bb53e57 100644
--- a/drivers/net/phy/sfp.c
+++ b/drivers/net/phy/sfp.c
@@ -387,6 +387,10 @@ static const struct sfp_quirk sfp_quirks[] = {
 
 	SFP_QUIRK_F("HALNy", "HL-GSFP", sfp_fixup_halny_gsfp),
 
+	// HG MXPD-483II-F 2.5G supports 2500Base-X, but incorrectly reports
+	// 2600MBd in their EERPOM
+	SFP_QUIRK_M("HG GENUINE", "MXPD-483II", sfp_quirk_2500basex),
+
 	// Huawei MA5671A can operate at 2500base-X, but report 1.2GBd NRZ in
 	// their EEPROM
 	SFP_QUIRK("HUAWEI", "MA5671A", sfp_quirk_2500basex,

From 839349d13905927d8a567ca4d21d88c82028e31d Mon Sep 17 00:00:00 2001
From: Sricharan Ramabadhran <quic_srichara@quicinc.com>
Date: Mon, 3 Apr 2023 12:28:51 +0530
Subject: [PATCH 128/173] net: qrtr: Do not do DEL_SERVER broadcast after
 DEL_CLIENT

On the remote side, when QRTR socket is removed, af_qrtr will call
qrtr_port_remove() which broadcasts the DEL_CLIENT packet to all neighbours
including local NS. NS upon receiving the DEL_CLIENT packet, will remove
the lookups associated with the node:port and broadcasts the DEL_SERVER
packet.

But on the host side, due to the arrival of the DEL_CLIENT packet, the NS
would've already deleted the server belonging to that port. So when the
remote's NS again broadcasts the DEL_SERVER for that port, it throws below
error message on the host:

"failed while handling packet from 2:-2"

So fix this error by not broadcasting the DEL_SERVER packet when the
DEL_CLIENT packet gets processed."

Fixes: 0c2204a4ad71 ("net: qrtr: Migrate nameservice to kernel from userspace")
Reviewed-by: Manivannan Sadhasivam <mani@kernel.org>
Signed-off-by: Ram Kumar Dharuman <quic_ramd@quicinc.com>
Signed-off-by: Sricharan Ramabadhran <quic_srichara@quicinc.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/qrtr/ns.c | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/net/qrtr/ns.c b/net/qrtr/ns.c
index 722936f7dd98..0f25a386138c 100644
--- a/net/qrtr/ns.c
+++ b/net/qrtr/ns.c
@@ -274,7 +274,7 @@ err:
 	return NULL;
 }
 
-static int server_del(struct qrtr_node *node, unsigned int port)
+static int server_del(struct qrtr_node *node, unsigned int port, bool bcast)
 {
 	struct qrtr_lookup *lookup;
 	struct qrtr_server *srv;
@@ -287,7 +287,7 @@ static int server_del(struct qrtr_node *node, unsigned int port)
 	radix_tree_delete(&node->servers, port);
 
 	/* Broadcast the removal of local servers */
-	if (srv->node == qrtr_ns.local_node)
+	if (srv->node == qrtr_ns.local_node && bcast)
 		service_announce_del(&qrtr_ns.bcast_sq, srv);
 
 	/* Announce the service's disappearance to observers */
@@ -373,7 +373,7 @@ static int ctrl_cmd_bye(struct sockaddr_qrtr *from)
 		}
 		slot = radix_tree_iter_resume(slot, &iter);
 		rcu_read_unlock();
-		server_del(node, srv->port);
+		server_del(node, srv->port, true);
 		rcu_read_lock();
 	}
 	rcu_read_unlock();
@@ -459,10 +459,13 @@ static int ctrl_cmd_del_client(struct sockaddr_qrtr *from,
 		kfree(lookup);
 	}
 
-	/* Remove the server belonging to this port */
+	/* Remove the server belonging to this port but don't broadcast
+	 * DEL_SERVER. Neighbours would've already removed the server belonging
+	 * to this port due to the DEL_CLIENT broadcast from qrtr_port_remove().
+	 */
 	node = node_get(node_id);
 	if (node)
-		server_del(node, port);
+		server_del(node, port, false);
 
 	/* Advertise the removal of this client to all local servers */
 	local_node = node_get(qrtr_ns.local_node);
@@ -567,7 +570,7 @@ static int ctrl_cmd_del_server(struct sockaddr_qrtr *from,
 	if (!node)
 		return -ENOENT;
 
-	return server_del(node, port);
+	return server_del(node, port, true);
 }
 
 static int ctrl_cmd_new_lookup(struct sockaddr_qrtr *from,

From ea30388baebcce37fd594d425a65037ca35e59e8 Mon Sep 17 00:00:00 2001
From: Ziyang Xuan <william.xuanziyang@huawei.com>
Date: Mon, 3 Apr 2023 15:34:17 +0800
Subject: [PATCH 129/173] ipv6: Fix an uninit variable access bug in
 __ip6_make_skb()

Syzbot reported a bug as following:

=====================================================
BUG: KMSAN: uninit-value in arch_atomic64_inc arch/x86/include/asm/atomic64_64.h:88 [inline]
BUG: KMSAN: uninit-value in arch_atomic_long_inc include/linux/atomic/atomic-long.h:161 [inline]
BUG: KMSAN: uninit-value in atomic_long_inc include/linux/atomic/atomic-instrumented.h:1429 [inline]
BUG: KMSAN: uninit-value in __ip6_make_skb+0x2f37/0x30f0 net/ipv6/ip6_output.c:1956
 arch_atomic64_inc arch/x86/include/asm/atomic64_64.h:88 [inline]
 arch_atomic_long_inc include/linux/atomic/atomic-long.h:161 [inline]
 atomic_long_inc include/linux/atomic/atomic-instrumented.h:1429 [inline]
 __ip6_make_skb+0x2f37/0x30f0 net/ipv6/ip6_output.c:1956
 ip6_finish_skb include/net/ipv6.h:1122 [inline]
 ip6_push_pending_frames+0x10e/0x550 net/ipv6/ip6_output.c:1987
 rawv6_push_pending_frames+0xb12/0xb90 net/ipv6/raw.c:579
 rawv6_sendmsg+0x297e/0x2e60 net/ipv6/raw.c:922
 inet_sendmsg+0x101/0x180 net/ipv4/af_inet.c:827
 sock_sendmsg_nosec net/socket.c:714 [inline]
 sock_sendmsg net/socket.c:734 [inline]
 ____sys_sendmsg+0xa8e/0xe70 net/socket.c:2476
 ___sys_sendmsg+0x2a1/0x3f0 net/socket.c:2530
 __sys_sendmsg net/socket.c:2559 [inline]
 __do_sys_sendmsg net/socket.c:2568 [inline]
 __se_sys_sendmsg net/socket.c:2566 [inline]
 __x64_sys_sendmsg+0x367/0x540 net/socket.c:2566
 do_syscall_x64 arch/x86/entry/common.c:50 [inline]
 do_syscall_64+0x3d/0xb0 arch/x86/entry/common.c:80
 entry_SYSCALL_64_after_hwframe+0x63/0xcd

Uninit was created at:
 slab_post_alloc_hook mm/slab.h:766 [inline]
 slab_alloc_node mm/slub.c:3452 [inline]
 __kmem_cache_alloc_node+0x71f/0xce0 mm/slub.c:3491
 __do_kmalloc_node mm/slab_common.c:967 [inline]
 __kmalloc_node_track_caller+0x114/0x3b0 mm/slab_common.c:988
 kmalloc_reserve net/core/skbuff.c:492 [inline]
 __alloc_skb+0x3af/0x8f0 net/core/skbuff.c:565
 alloc_skb include/linux/skbuff.h:1270 [inline]
 __ip6_append_data+0x51c1/0x6bb0 net/ipv6/ip6_output.c:1684
 ip6_append_data+0x411/0x580 net/ipv6/ip6_output.c:1854
 rawv6_sendmsg+0x2882/0x2e60 net/ipv6/raw.c:915
 inet_sendmsg+0x101/0x180 net/ipv4/af_inet.c:827
 sock_sendmsg_nosec net/socket.c:714 [inline]
 sock_sendmsg net/socket.c:734 [inline]
 ____sys_sendmsg+0xa8e/0xe70 net/socket.c:2476
 ___sys_sendmsg+0x2a1/0x3f0 net/socket.c:2530
 __sys_sendmsg net/socket.c:2559 [inline]
 __do_sys_sendmsg net/socket.c:2568 [inline]
 __se_sys_sendmsg net/socket.c:2566 [inline]
 __x64_sys_sendmsg+0x367/0x540 net/socket.c:2566
 do_syscall_x64 arch/x86/entry/common.c:50 [inline]
 do_syscall_64+0x3d/0xb0 arch/x86/entry/common.c:80
 entry_SYSCALL_64_after_hwframe+0x63/0xcd

It is because icmp6hdr does not in skb linear region under the scenario
of SOCK_RAW socket. Access icmp6_hdr(skb)->icmp6_type directly will
trigger the uninit variable access bug.

Use a local variable icmp6_type to carry the correct value in different
scenarios.

Fixes: 14878f75abd5 ("[IPV6]: Add ICMPMsgStats MIB (RFC 4293) [rev 2]")
Reported-by: syzbot+8257f4dcef79de670baf@syzkaller.appspotmail.com
Link: https://syzkaller.appspot.com/bug?id=3d605ec1d0a7f2a269a1a6936ac7f2b85975ee9c
Signed-off-by: Ziyang Xuan <william.xuanziyang@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_output.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index c314fdde0097..95a55c6630ad 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1965,8 +1965,13 @@ struct sk_buff *__ip6_make_skb(struct sock *sk,
 	IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
 	if (proto == IPPROTO_ICMPV6) {
 		struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
+		u8 icmp6_type;
 
-		ICMP6MSGOUT_INC_STATS(net, idev, icmp6_hdr(skb)->icmp6_type);
+		if (sk->sk_socket->type == SOCK_RAW && !inet_sk(sk)->hdrincl)
+			icmp6_type = fl6->fl6_icmp_type;
+		else
+			icmp6_type = icmp6_hdr(skb)->icmp6_type;
+		ICMP6MSGOUT_INC_STATS(net, idev, icmp6_type);
 		ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
 	}
 

From a3c4c053014585dcf20f4df954791b74d8a8afcd Mon Sep 17 00:00:00 2001
From: Armin Wolf <W_Armin@gmx.de>
Date: Fri, 31 Mar 2023 23:33:19 +0200
Subject: [PATCH 130/173] platform/x86: think-lmi: Fix memory leak when showing
 current settings

When retriving a item string with tlmi_setting(), the result has to be
freed using kfree(). In current_value_show() however, malformed
item strings are not freed, causing a memory leak.
Fix this by eliminating the early return responsible for this.

Reported-by: Mirsad Goran Todorovac <mirsad.todorovac@alu.unizg.hr>
Link: https://lore.kernel.org/platform-driver-x86/01e920bc-5882-ba0c-dd15-868bf0eca0b8@alu.unizg.hr/T/#t
Tested-by: Mirsad Goran Todorovac <mirsad.todorovac@alu.unizg.hr>
Fixes: 0fdf10e5fc96 ("platform/x86: think-lmi: Split current_value to reflect only the value")
Signed-off-by: Armin Wolf <W_Armin@gmx.de>
Link: https://lore.kernel.org/r/20230331213319.41040-1-W_Armin@gmx.de
Tested-by: Mario Limonciello <mario.limonciello@amd.com>
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 drivers/platform/x86/think-lmi.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/platform/x86/think-lmi.c b/drivers/platform/x86/think-lmi.c
index c816646eb661..6034df6d577d 100644
--- a/drivers/platform/x86/think-lmi.c
+++ b/drivers/platform/x86/think-lmi.c
@@ -930,10 +930,12 @@ static ssize_t current_value_show(struct kobject *kobj, struct kobj_attribute *a
 	/* validate and split from `item,value` -> `value` */
 	value = strpbrk(item, ",");
 	if (!value || value == item || !strlen(value + 1))
-		return -EINVAL;
+		ret = -EINVAL;
+	else
+		ret = sysfs_emit(buf, "%s\n", value + 1);
 
-	ret = sysfs_emit(buf, "%s\n", value + 1);
 	kfree(item);
+
 	return ret;
 }
 

From e7d796fccdc8d17c2d21817ebe4c7bf5bbfe5433 Mon Sep 17 00:00:00 2001
From: Mark Pearson <mpearson-lenovo@squebb.ca>
Date: Sun, 2 Apr 2023 21:31:19 -0400
Subject: [PATCH 131/173] platform/x86: think-lmi: Fix memory leaks when
 parsing ThinkStation WMI strings

My previous commit introduced a memory leak where the item allocated
from tlmi_setting was not freed.
This commit also renames it to avoid confusion with the similarly name
variable in the same function.

Fixes: 8a02d70679fc ("platform/x86: think-lmi: Add possible_values for ThinkStation")
Reported-by: Mirsad Todorovac <mirsad.todorovac@alu.unizg.hr>
Link: https://lore.kernel.org/lkml/df26ff45-8933-f2b3-25f4-6ee51ccda7d8@gmx.de/T/
Signed-off-by: Mark Pearson <mpearson-lenovo@squebb.ca>
Link: https://lore.kernel.org/r/20230403013120.2105-1-mpearson-lenovo@squebb.ca
Tested-by: Mario Limonciello <mario.limonciello@amd.com>
Tested-by: Mirsad Goran Todorovac <mirsad.todorovac@alu.unizg.hr>
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 drivers/platform/x86/think-lmi.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/platform/x86/think-lmi.c b/drivers/platform/x86/think-lmi.c
index 6034df6d577d..87f832142d8d 100644
--- a/drivers/platform/x86/think-lmi.c
+++ b/drivers/platform/x86/think-lmi.c
@@ -1459,10 +1459,10 @@ static int tlmi_analyze(void)
 			 * name string.
 			 * Try and pull that out if it's available.
 			 */
-			char *item, *optstart, *optend;
+			char *optitem, *optstart, *optend;
 
-			if (!tlmi_setting(setting->index, &item, LENOVO_BIOS_SETTING_GUID)) {
-				optstart = strstr(item, "[Optional:");
+			if (!tlmi_setting(setting->index, &optitem, LENOVO_BIOS_SETTING_GUID)) {
+				optstart = strstr(optitem, "[Optional:");
 				if (optstart) {
 					optstart += strlen("[Optional:");
 					optend = strstr(optstart, "]");
@@ -1471,6 +1471,7 @@ static int tlmi_analyze(void)
 							kstrndup(optstart, optend - optstart,
 									GFP_KERNEL);
 				}
+				kfree(optitem);
 			}
 		}
 		/*

From 7065655216d4d034d71164641f3bec0b189ad6fa Mon Sep 17 00:00:00 2001
From: Mark Pearson <mpearson-lenovo@squebb.ca>
Date: Sun, 2 Apr 2023 21:31:20 -0400
Subject: [PATCH 132/173] platform/x86: think-lmi: Clean up display of
 current_value on Thinkstation

On ThinkStations on retrieving the attribute value the BIOS appends the
possible values to the string.
Clean up the display in the current_value_show function so the options
part is not displayed.

Fixes: a40cd7ef22fb ("platform/x86: think-lmi: Add WMI interface support on Lenovo platforms")
Reported by Mario Limoncello <Mario.Limonciello@amd.com>
Link: https://github.com/fwupd/fwupd/issues/5077#issuecomment-1488730526
Signed-off-by: Mark Pearson <mpearson-lenovo@squebb.ca>
Link: https://lore.kernel.org/r/20230403013120.2105-2-mpearson-lenovo@squebb.ca
Tested-by: Mario Limonciello <mario.limonciello@amd.com>
Tested-by: Mirsad Goran Todorovac <mirsad.todorovac@alu.unizg.hr>
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 drivers/platform/x86/think-lmi.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/drivers/platform/x86/think-lmi.c b/drivers/platform/x86/think-lmi.c
index 87f832142d8d..78dc82bda4dd 100644
--- a/drivers/platform/x86/think-lmi.c
+++ b/drivers/platform/x86/think-lmi.c
@@ -920,7 +920,7 @@ static ssize_t display_name_show(struct kobject *kobj, struct kobj_attribute *at
 static ssize_t current_value_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
 {
 	struct tlmi_attr_setting *setting = to_tlmi_attr_setting(kobj);
-	char *item, *value;
+	char *item, *value, *p;
 	int ret;
 
 	ret = tlmi_setting(setting->index, &item, LENOVO_BIOS_SETTING_GUID);
@@ -931,9 +931,12 @@ static ssize_t current_value_show(struct kobject *kobj, struct kobj_attribute *a
 	value = strpbrk(item, ",");
 	if (!value || value == item || !strlen(value + 1))
 		ret = -EINVAL;
-	else
+	else {
+		/* On Workstations remove the Options part after the value */
+		p = strchrnul(value, ';');
+		*p = '\0';
 		ret = sysfs_emit(buf, "%s\n", value + 1);
-
+	}
 	kfree(item);
 
 	return ret;

From 9a469c6dfab38326f99f105386db84230be09ee3 Mon Sep 17 00:00:00 2001
From: Benjamin Asbach <asbachb.kernel@impl.it>
Date: Sat, 1 Apr 2023 01:24:47 +0200
Subject: [PATCH 133/173] platform/x86: thinkpad_acpi: Add missing T14s Gen1
 type to s2idle quirk list

From the commit message adding the first s2idle quirks:

> Lenovo laptops that contain NVME SSDs across a variety of generations have
> trouble resuming from suspend to idle when the IOMMU translation layer is
> active for the NVME storage device.
>
> This generally manifests as a large resume delay or page faults. These
> delays and page faults occur as a result of a Lenovo BIOS specific SMI
> that runs during the D3->D0 transition on NVME devices.

Add the DMI ids for another variant of the T14s Gen1, which also needs
the s2idle quirk.

Link: https://lore.kernel.org/all/20220503183420.348-1-mario.limonciello@amd.com/
Link: https://bbs.archlinux.org/viewtopic.php?pid=2084655#p2084655
Signed-off-by: Benjamin Asbach <asbachb.kernel@impl.it>
Tested-by: Benjamin Asbach <asbachb.kernel@impl.it>
Link: https://lore.kernel.org/r/20230331232447.37204-1-asbachb.kernel@impl.it
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 drivers/platform/x86/thinkpad_acpi.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c
index 32c10457399e..7191ff2625b1 100644
--- a/drivers/platform/x86/thinkpad_acpi.c
+++ b/drivers/platform/x86/thinkpad_acpi.c
@@ -4478,6 +4478,14 @@ static const struct dmi_system_id fwbug_list[] __initconst = {
 			DMI_MATCH(DMI_PRODUCT_NAME, "20UH"),
 		}
 	},
+	{
+		.ident = "T14s Gen1 AMD",
+		.driver_data = &quirk_s2idle_bug,
+		.matches = {
+			DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "20UJ"),
+		}
+	},
 	{
 		.ident = "P14s Gen1 AMD",
 		.driver_data = &quirk_s2idle_bug,

From cf5fa3ca0552f1b7ba8490de40700bbfb6979b17 Mon Sep 17 00:00:00 2001
From: Kalle Valo <quic_kvalo@quicinc.com>
Date: Wed, 29 Mar 2023 19:20:38 +0300
Subject: [PATCH 134/173] wifi: ath11k: reduce the MHI timeout to 20s

Currently ath11k breaks after hibernation, the reason being that ath11k expects
that the wireless device will have power during suspend and the firmware will
continue running. But of course during hibernation the power from the device is
cut off and firmware is not running when resuming, so ath11k will fail.

(The reason why ath11k needs the firmware running is the interaction between
mac80211 and MHI stack, it's a long story and more info in the bugzilla report.)

In SUSE kernels the watchdog timeout is reduced from the default 120 to 60 seconds:

CONFIG_DPM_WATCHDOG_TIMEOUT=60

But as the ath11k MHI timeout is 90 seconds the kernel will crash before will
ath11k will recover in resume callback. To avoid the crash reduce the MHI
timeout to just 20 seconds.

Tested-on: WCN6855 hw2.0 PCI WLAN.HSP.1.1-03125-QCAHSPSWPL_V1_V2_SILICONZ_LITE-3.6510.9

Link: https://bugzilla.kernel.org/show_bug.cgi?id=214649
Signed-off-by: Kalle Valo <quic_kvalo@quicinc.com>
Signed-off-by: Kalle Valo <kvalo@kernel.org>
Link: https://lore.kernel.org/r/20230329162038.8637-1-kvalo@kernel.org
---
 drivers/net/wireless/ath/ath11k/mhi.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wireless/ath/ath11k/mhi.c b/drivers/net/wireless/ath/ath11k/mhi.c
index 86995e8dc913..a62ee05c5409 100644
--- a/drivers/net/wireless/ath/ath11k/mhi.c
+++ b/drivers/net/wireless/ath/ath11k/mhi.c
@@ -16,7 +16,7 @@
 #include "pci.h"
 #include "pcic.h"
 
-#define MHI_TIMEOUT_DEFAULT_MS	90000
+#define MHI_TIMEOUT_DEFAULT_MS	20000
 #define RDDM_DUMP_SIZE	0x420000
 
 static struct mhi_channel_config ath11k_mhi_channels_qca6390[] = {

From e6db67fa871dee37d22701daba806bfcd4d9df49 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Thu, 30 Mar 2023 11:12:59 +0200
Subject: [PATCH 135/173] wifi: mt76: ignore key disable commands

This helps avoid cleartext leakage of already queued or powersave buffered
packets, when a reassoc triggers the key deletion.

Cc: stable@vger.kernel.org
Signed-off-by: Felix Fietkau <nbd@nbd.name>
Signed-off-by: Kalle Valo <kvalo@kernel.org>
Link: https://lore.kernel.org/r/20230330091259.61378-1-nbd@nbd.name
---
 .../net/wireless/mediatek/mt76/mt7603/main.c  | 10 +--
 .../net/wireless/mediatek/mt76/mt7615/mac.c   | 70 ++++++-------------
 .../net/wireless/mediatek/mt76/mt7615/main.c  | 15 ++--
 .../wireless/mediatek/mt76/mt7615/mt7615.h    |  6 +-
 .../net/wireless/mediatek/mt76/mt76x02_util.c | 18 ++---
 .../net/wireless/mediatek/mt76/mt7915/main.c  | 13 ++--
 .../net/wireless/mediatek/mt76/mt7921/main.c  | 13 ++--
 .../net/wireless/mediatek/mt76/mt7996/main.c  | 13 ++--
 8 files changed, 62 insertions(+), 96 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7603/main.c b/drivers/net/wireless/mediatek/mt76/mt7603/main.c
index ca50feb0b3a9..1b1358c6bb46 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7603/main.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7603/main.c
@@ -512,15 +512,15 @@ mt7603_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd,
 	    !(key->flags & IEEE80211_KEY_FLAG_PAIRWISE))
 		return -EOPNOTSUPP;
 
-	if (cmd == SET_KEY) {
-		key->hw_key_idx = wcid->idx;
-		wcid->hw_key_idx = idx;
-	} else {
+	if (cmd != SET_KEY) {
 		if (idx == wcid->hw_key_idx)
 			wcid->hw_key_idx = -1;
 
-		key = NULL;
+		return 0;
 	}
+
+	key->hw_key_idx = wcid->idx;
+	wcid->hw_key_idx = idx;
 	mt76_wcid_key_setup(&dev->mt76, wcid, key);
 
 	return mt7603_wtbl_set_key(dev, wcid->idx, key);
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
index a95602473359..51a968a6afdc 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
@@ -1193,8 +1193,7 @@ EXPORT_SYMBOL_GPL(mt7615_mac_enable_rtscts);
 static int
 mt7615_mac_wtbl_update_key(struct mt7615_dev *dev, struct mt76_wcid *wcid,
 			   struct ieee80211_key_conf *key,
-			   enum mt76_cipher_type cipher, u16 cipher_mask,
-			   enum set_key_cmd cmd)
+			   enum mt76_cipher_type cipher, u16 cipher_mask)
 {
 	u32 addr = mt7615_mac_wtbl_addr(dev, wcid->idx) + 30 * 4;
 	u8 data[32] = {};
@@ -1203,27 +1202,18 @@ mt7615_mac_wtbl_update_key(struct mt7615_dev *dev, struct mt76_wcid *wcid,
 		return -EINVAL;
 
 	mt76_rr_copy(dev, addr, data, sizeof(data));
-	if (cmd == SET_KEY) {
-		if (cipher == MT_CIPHER_TKIP) {
-			/* Rx/Tx MIC keys are swapped */
-			memcpy(data, key->key, 16);
-			memcpy(data + 16, key->key + 24, 8);
-			memcpy(data + 24, key->key + 16, 8);
-		} else {
-			if (cipher_mask == BIT(cipher))
-				memcpy(data, key->key, key->keylen);
-			else if (cipher != MT_CIPHER_BIP_CMAC_128)
-				memcpy(data, key->key, 16);
-			if (cipher == MT_CIPHER_BIP_CMAC_128)
-				memcpy(data + 16, key->key, 16);
-		}
+	if (cipher == MT_CIPHER_TKIP) {
+		/* Rx/Tx MIC keys are swapped */
+		memcpy(data, key->key, 16);
+		memcpy(data + 16, key->key + 24, 8);
+		memcpy(data + 24, key->key + 16, 8);
 	} else {
+		if (cipher_mask == BIT(cipher))
+			memcpy(data, key->key, key->keylen);
+		else if (cipher != MT_CIPHER_BIP_CMAC_128)
+			memcpy(data, key->key, 16);
 		if (cipher == MT_CIPHER_BIP_CMAC_128)
-			memset(data + 16, 0, 16);
-		else if (cipher_mask)
-			memset(data, 0, 16);
-		if (!cipher_mask)
-			memset(data, 0, sizeof(data));
+			memcpy(data + 16, key->key, 16);
 	}
 
 	mt76_wr_copy(dev, addr, data, sizeof(data));
@@ -1234,7 +1224,7 @@ mt7615_mac_wtbl_update_key(struct mt7615_dev *dev, struct mt76_wcid *wcid,
 static int
 mt7615_mac_wtbl_update_pk(struct mt7615_dev *dev, struct mt76_wcid *wcid,
 			  enum mt76_cipher_type cipher, u16 cipher_mask,
-			  int keyidx, enum set_key_cmd cmd)
+			  int keyidx)
 {
 	u32 addr = mt7615_mac_wtbl_addr(dev, wcid->idx), w0, w1;
 
@@ -1253,9 +1243,7 @@ mt7615_mac_wtbl_update_pk(struct mt7615_dev *dev, struct mt76_wcid *wcid,
 	else
 		w0 &= ~MT_WTBL_W0_RX_IK_VALID;
 
-	if (cmd == SET_KEY &&
-	    (cipher != MT_CIPHER_BIP_CMAC_128 ||
-	     cipher_mask == BIT(cipher))) {
+	if (cipher != MT_CIPHER_BIP_CMAC_128 || cipher_mask == BIT(cipher)) {
 		w0 &= ~MT_WTBL_W0_KEY_IDX;
 		w0 |= FIELD_PREP(MT_WTBL_W0_KEY_IDX, keyidx);
 	}
@@ -1272,19 +1260,10 @@ mt7615_mac_wtbl_update_pk(struct mt7615_dev *dev, struct mt76_wcid *wcid,
 
 static void
 mt7615_mac_wtbl_update_cipher(struct mt7615_dev *dev, struct mt76_wcid *wcid,
-			      enum mt76_cipher_type cipher, u16 cipher_mask,
-			      enum set_key_cmd cmd)
+			      enum mt76_cipher_type cipher, u16 cipher_mask)
 {
 	u32 addr = mt7615_mac_wtbl_addr(dev, wcid->idx);
 
-	if (!cipher_mask) {
-		mt76_clear(dev, addr + 2 * 4, MT_WTBL_W2_KEY_TYPE);
-		return;
-	}
-
-	if (cmd != SET_KEY)
-		return;
-
 	if (cipher == MT_CIPHER_BIP_CMAC_128 &&
 	    cipher_mask & ~BIT(MT_CIPHER_BIP_CMAC_128))
 		return;
@@ -1295,8 +1274,7 @@ mt7615_mac_wtbl_update_cipher(struct mt7615_dev *dev, struct mt76_wcid *wcid,
 
 int __mt7615_mac_wtbl_set_key(struct mt7615_dev *dev,
 			      struct mt76_wcid *wcid,
-			      struct ieee80211_key_conf *key,
-			      enum set_key_cmd cmd)
+			      struct ieee80211_key_conf *key)
 {
 	enum mt76_cipher_type cipher;
 	u16 cipher_mask = wcid->cipher;
@@ -1306,19 +1284,14 @@ int __mt7615_mac_wtbl_set_key(struct mt7615_dev *dev,
 	if (cipher == MT_CIPHER_NONE)
 		return -EOPNOTSUPP;
 
-	if (cmd == SET_KEY)
-		cipher_mask |= BIT(cipher);
-	else
-		cipher_mask &= ~BIT(cipher);
-
-	mt7615_mac_wtbl_update_cipher(dev, wcid, cipher, cipher_mask, cmd);
-	err = mt7615_mac_wtbl_update_key(dev, wcid, key, cipher, cipher_mask,
-					 cmd);
+	cipher_mask |= BIT(cipher);
+	mt7615_mac_wtbl_update_cipher(dev, wcid, cipher, cipher_mask);
+	err = mt7615_mac_wtbl_update_key(dev, wcid, key, cipher, cipher_mask);
 	if (err < 0)
 		return err;
 
 	err = mt7615_mac_wtbl_update_pk(dev, wcid, cipher, cipher_mask,
-					key->keyidx, cmd);
+					key->keyidx);
 	if (err < 0)
 		return err;
 
@@ -1329,13 +1302,12 @@ int __mt7615_mac_wtbl_set_key(struct mt7615_dev *dev,
 
 int mt7615_mac_wtbl_set_key(struct mt7615_dev *dev,
 			    struct mt76_wcid *wcid,
-			    struct ieee80211_key_conf *key,
-			    enum set_key_cmd cmd)
+			    struct ieee80211_key_conf *key)
 {
 	int err;
 
 	spin_lock_bh(&dev->mt76.lock);
-	err = __mt7615_mac_wtbl_set_key(dev, wcid, key, cmd);
+	err = __mt7615_mac_wtbl_set_key(dev, wcid, key);
 	spin_unlock_bh(&dev->mt76.lock);
 
 	return err;
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/main.c b/drivers/net/wireless/mediatek/mt76/mt7615/main.c
index ab4c1b4478aa..dadb13f2ca09 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/main.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/main.c
@@ -391,18 +391,17 @@ static int mt7615_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd,
 
 	if (cmd == SET_KEY)
 		*wcid_keyidx = idx;
-	else if (idx == *wcid_keyidx)
-		*wcid_keyidx = -1;
-	else
+	else {
+		if (idx == *wcid_keyidx)
+			*wcid_keyidx = -1;
 		goto out;
+	}
 
-	mt76_wcid_key_setup(&dev->mt76, wcid,
-			    cmd == SET_KEY ? key : NULL);
-
+	mt76_wcid_key_setup(&dev->mt76, wcid, key);
 	if (mt76_is_mmio(&dev->mt76))
-		err = mt7615_mac_wtbl_set_key(dev, wcid, key, cmd);
+		err = mt7615_mac_wtbl_set_key(dev, wcid, key);
 	else
-		err = __mt7615_mac_wtbl_set_key(dev, wcid, key, cmd);
+		err = __mt7615_mac_wtbl_set_key(dev, wcid, key);
 
 out:
 	mt7615_mutex_release(dev);
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h b/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
index 43591b4c1d9a..9e58f6924493 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
@@ -490,11 +490,9 @@ int mt7615_mac_write_txwi(struct mt7615_dev *dev, __le32 *txwi,
 void mt7615_mac_set_timing(struct mt7615_phy *phy);
 int __mt7615_mac_wtbl_set_key(struct mt7615_dev *dev,
 			      struct mt76_wcid *wcid,
-			      struct ieee80211_key_conf *key,
-			      enum set_key_cmd cmd);
+			      struct ieee80211_key_conf *key);
 int mt7615_mac_wtbl_set_key(struct mt7615_dev *dev, struct mt76_wcid *wcid,
-			    struct ieee80211_key_conf *key,
-			    enum set_key_cmd cmd);
+			    struct ieee80211_key_conf *key);
 void mt7615_mac_reset_work(struct work_struct *work);
 u32 mt7615_mac_get_sta_tid_sn(struct mt7615_dev *dev, int wcid, u8 tid);
 
diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02_util.c b/drivers/net/wireless/mediatek/mt76/mt76x02_util.c
index 7451a63206a5..dcbb5c605dfe 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76x02_util.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76x02_util.c
@@ -454,20 +454,20 @@ int mt76x02_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd,
 	msta = sta ? (struct mt76x02_sta *)sta->drv_priv : NULL;
 	wcid = msta ? &msta->wcid : &mvif->group_wcid;
 
-	if (cmd == SET_KEY) {
-		key->hw_key_idx = wcid->idx;
-		wcid->hw_key_idx = idx;
-		if (key->flags & IEEE80211_KEY_FLAG_RX_MGMT) {
-			key->flags |= IEEE80211_KEY_FLAG_SW_MGMT_TX;
-			wcid->sw_iv = true;
-		}
-	} else {
+	if (cmd != SET_KEY) {
 		if (idx == wcid->hw_key_idx) {
 			wcid->hw_key_idx = -1;
 			wcid->sw_iv = false;
 		}
 
-		key = NULL;
+		return 0;
+	}
+
+	key->hw_key_idx = wcid->idx;
+	wcid->hw_key_idx = idx;
+	if (key->flags & IEEE80211_KEY_FLAG_RX_MGMT) {
+		key->flags |= IEEE80211_KEY_FLAG_SW_MGMT_TX;
+		wcid->sw_iv = true;
 	}
 	mt76_wcid_key_setup(&dev->mt76, wcid, key);
 
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/main.c b/drivers/net/wireless/mediatek/mt76/mt7915/main.c
index 3bbccbdfc5eb..784191ec4802 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/main.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/main.c
@@ -410,16 +410,15 @@ static int mt7915_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd,
 		mt7915_mcu_add_bss_info(phy, vif, true);
 	}
 
-	if (cmd == SET_KEY)
+	if (cmd == SET_KEY) {
 		*wcid_keyidx = idx;
-	else if (idx == *wcid_keyidx)
-		*wcid_keyidx = -1;
-	else
+	} else {
+		if (idx == *wcid_keyidx)
+			*wcid_keyidx = -1;
 		goto out;
+	}
 
-	mt76_wcid_key_setup(&dev->mt76, wcid,
-			    cmd == SET_KEY ? key : NULL);
-
+	mt76_wcid_key_setup(&dev->mt76, wcid, key);
 	err = mt76_connac_mcu_add_key(&dev->mt76, vif, &msta->bip,
 				      key, MCU_EXT_CMD(STA_REC_UPDATE),
 				      &msta->wcid, cmd);
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/main.c b/drivers/net/wireless/mediatek/mt76/mt7921/main.c
index 75eaf86c6a78..42933a6b7334 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/main.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/main.c
@@ -569,16 +569,15 @@ static int mt7921_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd,
 
 	mt7921_mutex_acquire(dev);
 
-	if (cmd == SET_KEY)
+	if (cmd == SET_KEY) {
 		*wcid_keyidx = idx;
-	else if (idx == *wcid_keyidx)
-		*wcid_keyidx = -1;
-	else
+	} else {
+		if (idx == *wcid_keyidx)
+			*wcid_keyidx = -1;
 		goto out;
+	}
 
-	mt76_wcid_key_setup(&dev->mt76, wcid,
-			    cmd == SET_KEY ? key : NULL);
-
+	mt76_wcid_key_setup(&dev->mt76, wcid, key);
 	err = mt76_connac_mcu_add_key(&dev->mt76, vif, &msta->bip,
 				      key, MCU_UNI_CMD(STA_REC_UPDATE),
 				      &msta->wcid, cmd);
diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/main.c b/drivers/net/wireless/mediatek/mt76/mt7996/main.c
index 3e4da0350d96..1ba22d147949 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7996/main.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7996/main.c
@@ -351,16 +351,15 @@ static int mt7996_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd,
 		mt7996_mcu_add_bss_info(phy, vif, true);
 	}
 
-	if (cmd == SET_KEY)
+	if (cmd == SET_KEY) {
 		*wcid_keyidx = idx;
-	else if (idx == *wcid_keyidx)
-		*wcid_keyidx = -1;
-	else
+	} else {
+		if (idx == *wcid_keyidx)
+			*wcid_keyidx = -1;
 		goto out;
+	}
 
-	mt76_wcid_key_setup(&dev->mt76, wcid,
-			    cmd == SET_KEY ? key : NULL);
-
+	mt76_wcid_key_setup(&dev->mt76, wcid, key);
 	err = mt7996_mcu_add_key(&dev->mt76, vif, &msta->bip,
 				 key, MCU_WMWA_UNI_CMD(STA_REC_UPDATE),
 				 &msta->wcid, cmd);

From 4ccf11c4e8a8e051499d53a12f502196c97a758e Mon Sep 17 00:00:00 2001
From: Tze-nan Wu <Tze-nan.Wu@mediatek.com>
Date: Tue, 21 Mar 2023 19:04:43 +0800
Subject: [PATCH 136/173] tracing/synthetic: Fix races on freeing last_cmd

Currently, the "last_cmd" variable can be accessed by multiple processes
asynchronously when multiple users manipulate synthetic_events node
at the same time, it could lead to use-after-free or double-free.

This patch add "lastcmd_mutex" to prevent "last_cmd" from being accessed
asynchronously.

================================================================

It's easy to reproduce in the KASAN environment by running the two
scripts below in different shells.

script 1:
        while :
        do
                echo -n -e '\x88' > /sys/kernel/tracing/synthetic_events
        done

script 2:
        while :
        do
                echo -n -e '\xb0' > /sys/kernel/tracing/synthetic_events
        done

================================================================
double-free scenario:

    process A                       process B
-------------------               ---------------
1.kstrdup last_cmd
                                  2.free last_cmd
3.free last_cmd(double-free)

================================================================
use-after-free scenario:

    process A                       process B
-------------------               ---------------
1.kstrdup last_cmd
                                  2.free last_cmd
3.tracing_log_err(use-after-free)

================================================================

Appendix 1. KASAN report double-free:

BUG: KASAN: double-free in kfree+0xdc/0x1d4
Free of addr ***** by task sh/4879
Call trace:
        ...
        kfree+0xdc/0x1d4
        create_or_delete_synth_event+0x60/0x1e8
        trace_parse_run_command+0x2bc/0x4b8
        synth_events_write+0x20/0x30
        vfs_write+0x200/0x830
        ...

Allocated by task 4879:
        ...
        kstrdup+0x5c/0x98
        create_or_delete_synth_event+0x6c/0x1e8
        trace_parse_run_command+0x2bc/0x4b8
        synth_events_write+0x20/0x30
        vfs_write+0x200/0x830
        ...

Freed by task 5464:
        ...
        kfree+0xdc/0x1d4
        create_or_delete_synth_event+0x60/0x1e8
        trace_parse_run_command+0x2bc/0x4b8
        synth_events_write+0x20/0x30
        vfs_write+0x200/0x830
        ...

================================================================
Appendix 2. KASAN report use-after-free:

BUG: KASAN: use-after-free in strlen+0x5c/0x7c
Read of size 1 at addr ***** by task sh/5483
sh: CPU: 7 PID: 5483 Comm: sh
        ...
        __asan_report_load1_noabort+0x34/0x44
        strlen+0x5c/0x7c
        tracing_log_err+0x60/0x444
        create_or_delete_synth_event+0xc4/0x204
        trace_parse_run_command+0x2bc/0x4b8
        synth_events_write+0x20/0x30
        vfs_write+0x200/0x830
        ...

Allocated by task 5483:
        ...
        kstrdup+0x5c/0x98
        create_or_delete_synth_event+0x80/0x204
        trace_parse_run_command+0x2bc/0x4b8
        synth_events_write+0x20/0x30
        vfs_write+0x200/0x830
        ...

Freed by task 5480:
        ...
        kfree+0xdc/0x1d4
        create_or_delete_synth_event+0x74/0x204
        trace_parse_run_command+0x2bc/0x4b8
        synth_events_write+0x20/0x30
        vfs_write+0x200/0x830
        ...

Link: https://lore.kernel.org/linux-trace-kernel/20230321110444.1587-1-Tze-nan.Wu@mediatek.com

Fixes: 27c888da9867 ("tracing: Remove size restriction on synthetic event cmd error logging")
Cc: stable@vger.kernel.org
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Matthias Brugger <matthias.bgg@gmail.com>
Cc: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
Cc: "Tom Zanussi" <zanussi@kernel.org>
Signed-off-by: Tze-nan Wu <Tze-nan.Wu@mediatek.com>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 kernel/trace/trace_events_synth.c | 21 ++++++++++++++++-----
 1 file changed, 16 insertions(+), 5 deletions(-)

diff --git a/kernel/trace/trace_events_synth.c b/kernel/trace/trace_events_synth.c
index 46d0abb32d0f..f0ff730125bf 100644
--- a/kernel/trace/trace_events_synth.c
+++ b/kernel/trace/trace_events_synth.c
@@ -44,14 +44,21 @@ enum { ERRORS };
 
 static const char *err_text[] = { ERRORS };
 
+DEFINE_MUTEX(lastcmd_mutex);
 static char *last_cmd;
 
 static int errpos(const char *str)
 {
-	if (!str || !last_cmd)
-		return 0;
+	int ret = 0;
 
-	return err_pos(last_cmd, str);
+	mutex_lock(&lastcmd_mutex);
+	if (!str || !last_cmd)
+		goto out;
+
+	ret = err_pos(last_cmd, str);
+ out:
+	mutex_unlock(&lastcmd_mutex);
+	return ret;
 }
 
 static void last_cmd_set(const char *str)
@@ -59,18 +66,22 @@ static void last_cmd_set(const char *str)
 	if (!str)
 		return;
 
+	mutex_lock(&lastcmd_mutex);
 	kfree(last_cmd);
-
 	last_cmd = kstrdup(str, GFP_KERNEL);
+	mutex_unlock(&lastcmd_mutex);
 }
 
 static void synth_err(u8 err_type, u16 err_pos)
 {
+	mutex_lock(&lastcmd_mutex);
 	if (!last_cmd)
-		return;
+		goto out;
 
 	tracing_log_err(NULL, "synthetic_events", last_cmd, err_text,
 			err_type, err_pos);
+ out:
+	mutex_unlock(&lastcmd_mutex);
 }
 
 static int create_synth_event(const char *raw_command);

From 6455b6163d8c680366663cdb8c679514d55fc30c Mon Sep 17 00:00:00 2001
From: Zheng Yejian <zhengyejian1@huawei.com>
Date: Sat, 25 Mar 2023 10:12:47 +0800
Subject: [PATCH 137/173] ring-buffer: Fix race while reader and writer are on
 the same page

When user reads file 'trace_pipe', kernel keeps printing following logs
that warn at "cpu_buffer->reader_page->read > rb_page_size(reader)" in
rb_get_reader_page(). It just looks like there's an infinite loop in
tracing_read_pipe(). This problem occurs several times on arm64 platform
when testing v5.10 and below.

  Call trace:
   rb_get_reader_page+0x248/0x1300
   rb_buffer_peek+0x34/0x160
   ring_buffer_peek+0xbc/0x224
   peek_next_entry+0x98/0xbc
   __find_next_entry+0xc4/0x1c0
   trace_find_next_entry_inc+0x30/0x94
   tracing_read_pipe+0x198/0x304
   vfs_read+0xb4/0x1e0
   ksys_read+0x74/0x100
   __arm64_sys_read+0x24/0x30
   el0_svc_common.constprop.0+0x7c/0x1bc
   do_el0_svc+0x2c/0x94
   el0_svc+0x20/0x30
   el0_sync_handler+0xb0/0xb4
   el0_sync+0x160/0x180

Then I dump the vmcore and look into the problematic per_cpu ring_buffer,
I found that tail_page/commit_page/reader_page are on the same page while
reader_page->read is obviously abnormal:
  tail_page == commit_page == reader_page == {
    .write = 0x100d20,
    .read = 0x8f9f4805,  // Far greater than 0xd20, obviously abnormal!!!
    .entries = 0x10004c,
    .real_end = 0x0,
    .page = {
      .time_stamp = 0x857257416af0,
      .commit = 0xd20,  // This page hasn't been full filled.
      // .data[0...0xd20] seems normal.
    }
 }

The root cause is most likely the race that reader and writer are on the
same page while reader saw an event that not fully committed by writer.

To fix this, add memory barriers to make sure the reader can see the
content of what is committed. Since commit a0fcaaed0c46 ("ring-buffer: Fix
race between reset page and reading page") has added the read barrier in
rb_get_reader_page(), here we just need to add the write barrier.

Link: https://lore.kernel.org/linux-trace-kernel/20230325021247.2923907-1-zhengyejian1@huawei.com

Cc: stable@vger.kernel.org
Fixes: 77ae365eca89 ("ring-buffer: make lockless")
Suggested-by: Steven Rostedt (Google) <rostedt@goodmis.org>
Signed-off-by: Zheng Yejian <zhengyejian1@huawei.com>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 kernel/trace/ring_buffer.c | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index c6f47b6cfd5f..76a2d91eecad 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -3098,6 +3098,10 @@ rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer)
 		if (RB_WARN_ON(cpu_buffer,
 			       rb_is_reader_page(cpu_buffer->tail_page)))
 			return;
+		/*
+		 * No need for a memory barrier here, as the update
+		 * of the tail_page did it for this page.
+		 */
 		local_set(&cpu_buffer->commit_page->page->commit,
 			  rb_page_write(cpu_buffer->commit_page));
 		rb_inc_page(&cpu_buffer->commit_page);
@@ -3107,6 +3111,8 @@ rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer)
 	while (rb_commit_index(cpu_buffer) !=
 	       rb_page_write(cpu_buffer->commit_page)) {
 
+		/* Make sure the readers see the content of what is committed. */
+		smp_wmb();
 		local_set(&cpu_buffer->commit_page->page->commit,
 			  rb_page_write(cpu_buffer->commit_page));
 		RB_WARN_ON(cpu_buffer,
@@ -4684,7 +4690,12 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
 
 	/*
 	 * Make sure we see any padding after the write update
-	 * (see rb_reset_tail())
+	 * (see rb_reset_tail()).
+	 *
+	 * In addition, a writer may be writing on the reader page
+	 * if the page has not been fully filled, so the read barrier
+	 * is also needed to make sure we see the content of what is
+	 * committed by the writer (see rb_set_commit_to_write()).
 	 */
 	smp_rmb();
 

From ea65b41807a26495ff2a73dd8b1bab2751940887 Mon Sep 17 00:00:00 2001
From: John Keeping <john@metanate.com>
Date: Mon, 27 Mar 2023 18:36:46 +0100
Subject: [PATCH 138/173] ftrace: Mark get_lock_parent_ip() __always_inline

If the compiler decides not to inline this function then preemption
tracing will always show an IP inside the preemption disabling path and
never the function actually calling preempt_{enable,disable}.

Link: https://lore.kernel.org/linux-trace-kernel/20230327173647.1690849-1-john@metanate.com

Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: stable@vger.kernel.org
Fixes: f904f58263e1d ("sched/debug: Fix preempt_disable_ip recording for preempt_disable()")
Signed-off-by: John Keeping <john@metanate.com>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 include/linux/ftrace.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 366c730beaa3..402fc061de75 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -980,7 +980,7 @@ static inline void __ftrace_enabled_restore(int enabled)
 #define CALLER_ADDR5 ((unsigned long)ftrace_return_address(5))
 #define CALLER_ADDR6 ((unsigned long)ftrace_return_address(6))
 
-static inline unsigned long get_lock_parent_ip(void)
+static __always_inline unsigned long get_lock_parent_ip(void)
 {
 	unsigned long addr = CALLER_ADDR0;
 

From b9f451a9029a16eb7913ace09b92493d00f2e564 Mon Sep 17 00:00:00 2001
From: Daniel Bristot de Oliveira <bristot@kernel.org>
Date: Wed, 29 Mar 2023 17:50:15 +0200
Subject: [PATCH 139/173] tracing/timerlat: Notify new max thread latency

timerlat is not reporting a new tracing_max_latency for the thread
latency. The reason is that it is not calling notify_new_max_latency()
function after the new thread latency is sampled.

Call notify_new_max_latency() after computing the thread latency.

Link: https://lkml.kernel.org/r/16e18d61d69073d0192ace07bf61e405cca96e9c.1680104184.git.bristot@kernel.org

Cc: stable@vger.kernel.org
Fixes: dae181349f1e ("tracing/osnoise: Support a list of trace_array *tr")
Signed-off-by: Daniel Bristot de Oliveira <bristot@kernel.org>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 kernel/trace/trace_osnoise.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/kernel/trace/trace_osnoise.c b/kernel/trace/trace_osnoise.c
index 9176bb7a9bb4..e8116094bed8 100644
--- a/kernel/trace/trace_osnoise.c
+++ b/kernel/trace/trace_osnoise.c
@@ -1738,6 +1738,8 @@ static int timerlat_main(void *data)
 
 		trace_timerlat_sample(&s);
 
+		notify_new_max_latency(diff);
+
 		timerlat_dump_stack(time_to_us(diff));
 
 		tlat->tracing_thread = false;

From d3cba7f02cd82118c32651c73374d8a5a459d9a6 Mon Sep 17 00:00:00 2001
From: Daniel Bristot de Oliveira <bristot@kernel.org>
Date: Wed, 29 Mar 2023 17:50:16 +0200
Subject: [PATCH 140/173] tracing/osnoise: Fix notify new tracing_max_latency

osnoise/timerlat tracers are reporting new max latency on instances
where the tracing is off, creating inconsistencies between the max
reported values in the trace and in the tracing_max_latency. Thus
only report new tracing_max_latency on active tracing instances.

Link: https://lkml.kernel.org/r/ecd109fde4a0c24ab0f00ba1e9a144ac19a91322.1680104184.git.bristot@kernel.org

Cc: stable@vger.kernel.org
Fixes: dae181349f1e ("tracing/osnoise: Support a list of trace_array *tr")
Signed-off-by: Daniel Bristot de Oliveira <bristot@kernel.org>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 kernel/trace/trace_osnoise.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/trace/trace_osnoise.c b/kernel/trace/trace_osnoise.c
index e8116094bed8..4496975f2029 100644
--- a/kernel/trace/trace_osnoise.c
+++ b/kernel/trace/trace_osnoise.c
@@ -1296,7 +1296,7 @@ static void notify_new_max_latency(u64 latency)
 	rcu_read_lock();
 	list_for_each_entry_rcu(inst, &osnoise_instances, list) {
 		tr = inst->tr;
-		if (tr->max_latency < latency) {
+		if (tracer_tracing_is_on(tr) && tr->max_latency < latency) {
 			tr->max_latency = latency;
 			latency_fsnotify(tr);
 		}

From f82e7ca019dfad3b006fd3b772f7ac569672db55 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
Date: Thu, 9 Mar 2023 22:13:02 -0500
Subject: [PATCH 141/173] tracing: Error if a trace event has an array for a
 __field()

A __field() in the TRACE_EVENT() macro is used to set up the fields of the
trace event data. It is for single storage units (word, char, int,
pointer, etc) and not for complex structures or arrays. Unfortunately,
there's nothing preventing the build from accepting:

    __field(int, arr[5]);

from building. It will turn into a array value. This use to work fine, as
the offset and size use to be determined by the macro using the field name,
but things have changed and the offset and size are now determined by the
type. So the above would only be size 4, and the next field will be
located 4 bytes from it (instead of 20).

The proper way to declare static arrays is to use the __array() macro.

Instead of __field(int, arr[5]) it should be __array(int, arr, 5).

Add some macro tricks to the building of a trace event from the
TRACE_EVENT() macro such that __field(int, arr[5]) will fail to build. A
comment by the failure will explain why the build failed.

Link: https://lore.kernel.org/lkml/20230306122549.236561-1-douglas.raillard@arm.com/
Link: https://lore.kernel.org/linux-trace-kernel/20230309221302.642e82d9@gandalf.local.home

Reported-by: Douglas RAILLARD <douglas.raillard@arm.com>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
Acked-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
---
 include/trace/stages/stage5_get_offsets.h | 21 +++++++++++++++++----
 1 file changed, 17 insertions(+), 4 deletions(-)

diff --git a/include/trace/stages/stage5_get_offsets.h b/include/trace/stages/stage5_get_offsets.h
index ac5c24d3beeb..e30a13be46ba 100644
--- a/include/trace/stages/stage5_get_offsets.h
+++ b/include/trace/stages/stage5_get_offsets.h
@@ -9,17 +9,30 @@
 #undef __entry
 #define __entry entry
 
+/*
+ * Fields should never declare an array: i.e. __field(int, arr[5])
+ * If they do, it will cause issues in parsing and possibly corrupt the
+ * events. To prevent that from happening, test the sizeof() a fictitious
+ * type called "struct _test_no_array_##item" which will fail if "item"
+ * contains array elements (like "arr[5]").
+ *
+ * If you hit this, use __array(int, arr, 5) instead.
+ */
 #undef __field
-#define __field(type, item)
+#define __field(type, item)					\
+	{ (void)sizeof(struct _test_no_array_##item *); }
 
 #undef __field_ext
-#define __field_ext(type, item, filter_type)
+#define __field_ext(type, item, filter_type)			\
+	{ (void)sizeof(struct _test_no_array_##item *); }
 
 #undef __field_struct
-#define __field_struct(type, item)
+#define __field_struct(type, item)				\
+	{ (void)sizeof(struct _test_no_array_##item *); }
 
 #undef __field_struct_ext
-#define __field_struct_ext(type, item, filter_type)
+#define __field_struct_ext(type, item, filter_type)		\
+	{ (void)sizeof(struct _test_no_array_##item *); }
 
 #undef __array
 #define __array(type, item, len)

From c6b486fb33680ad5a3a6390ce693c835caaae3f7 Mon Sep 17 00:00:00 2001
From: Siddharth Vadapalli <s-vadapalli@ti.com>
Date: Mon, 3 Apr 2023 14:33:21 +0530
Subject: [PATCH 142/173] net: ethernet: ti: am65-cpsw: Fix mdio cleanup in
 probe

In the am65_cpsw_nuss_probe() function's cleanup path, the call to
of_platform_device_destroy() for the common->mdio_dev device is invoked
unconditionally. It is possible that either the MDIO node is not present
in the device-tree, or the MDIO node is disabled in the device-tree. In
both these cases, the MDIO device is not created, resulting in a NULL
pointer dereference when the of_platform_device_destroy() function is
invoked on the common->mdio_dev device on the cleanup path.

Fix this by ensuring that the common->mdio_dev device exists, before
attempting to invoke of_platform_device_destroy().

Fixes: a45cfcc69a25 ("net: ethernet: ti: am65-cpsw-nuss: use of_platform_device_create() for mdio")
Signed-off-by: Siddharth Vadapalli <s-vadapalli@ti.com>
Reviewed-by: Roger Quadros <rogerq@kernel.org>
Link: https://lore.kernel.org/r/20230403090321.835877-1-s-vadapalli@ti.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 drivers/net/ethernet/ti/am65-cpsw-nuss.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c
index 4e3861c47708..bcea87b7151c 100644
--- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c
+++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c
@@ -2926,7 +2926,8 @@ err_free_phylink:
 	am65_cpsw_nuss_phylink_cleanup(common);
 	am65_cpts_release(common->cpts);
 err_of_clear:
-	of_platform_device_destroy(common->mdio_dev, NULL);
+	if (common->mdio_dev)
+		of_platform_device_destroy(common->mdio_dev, NULL);
 err_pm_clear:
 	pm_runtime_put_sync(dev);
 	pm_runtime_disable(dev);
@@ -2956,7 +2957,8 @@ static int am65_cpsw_nuss_remove(struct platform_device *pdev)
 	am65_cpts_release(common->cpts);
 	am65_cpsw_disable_serdes_phy(common);
 
-	of_platform_device_destroy(common->mdio_dev, NULL);
+	if (common->mdio_dev)
+		of_platform_device_destroy(common->mdio_dev, NULL);
 
 	pm_runtime_put_sync(&pdev->dev);
 	pm_runtime_disable(&pdev->dev);

From e4395701330fc4aee530905039516fe770b81417 Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@nvidia.com>
Date: Fri, 31 Mar 2023 12:32:24 -0300
Subject: [PATCH 143/173] iommufd: Check for uptr overflow

syzkaller found that setting up a map with a user VA that wraps past zero
can trigger WARN_ONs, particularly from pin_user_pages weirdly returning 0
due to invalid arguments.

Prevent creating a pages with a uptr and size that would math overflow.

  WARNING: CPU: 0 PID: 518 at drivers/iommu/iommufd/pages.c:793 pfn_reader_user_pin+0x2e6/0x390
  Modules linked in:
  CPU: 0 PID: 518 Comm: repro Not tainted 6.3.0-rc2-eeac8ede1755+ #1
  Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014
  RIP: 0010:pfn_reader_user_pin+0x2e6/0x390
  Code: b1 11 e9 25 fe ff ff e8 28 e4 0f ff 31 ff 48 89 de e8 2e e6 0f ff 48 85 db 74 0a e8 14 e4 0f ff e9 4d ff ff ff e8 0a e4 0f ff <0f> 0b bb f2 ff ff ff e9 3c ff ff ff e8 f9 e3 0f ff ba 01 00 00 00
  RSP: 0018:ffffc90000f9fa30 EFLAGS: 00010246
  RAX: 0000000000000000 RBX: 0000000000000000 RCX: ffffffff821e2b72
  RDX: 0000000000000000 RSI: ffff888014184680 RDI: 0000000000000002
  RBP: ffffc90000f9fa78 R08: 00000000000000ff R09: 0000000079de6f4e
  R10: ffffc90000f9f790 R11: ffff888014185418 R12: ffffc90000f9fc60
  R13: 0000000000000002 R14: ffff888007879800 R15: 0000000000000000
  FS:  00007f4227555740(0000) GS:ffff88807dc00000(0000) knlGS:0000000000000000
  CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
  CR2: 0000000020000043 CR3: 000000000e748005 CR4: 0000000000770ef0
  PKRU: 55555554
  Call Trace:
   <TASK>
   pfn_reader_next+0x14a/0x7b0
   ? interval_tree_double_span_iter_update+0x11a/0x140
   pfn_reader_first+0x140/0x1b0
   iopt_pages_rw_slow+0x71/0x280
   ? __this_cpu_preempt_check+0x20/0x30
   iopt_pages_rw_access+0x2b2/0x5b0
   iommufd_access_rw+0x19f/0x2f0
   iommufd_test+0xd11/0x16f0
   ? write_comp_data+0x2f/0x90
   iommufd_fops_ioctl+0x206/0x330
   __x64_sys_ioctl+0x10e/0x160
   ? __pfx_iommufd_fops_ioctl+0x10/0x10
   do_syscall_64+0x3b/0x90
   entry_SYSCALL_64_after_hwframe+0x72/0xdc

Cc: <stable@vger.kernel.org>
Fixes: 8d160cd4d506 ("iommufd: Algorithms for PFN storage")
Link: https://lore.kernel.org/r/1-v1-ceab6a4d7d7a+94-iommufd_syz_jgg@nvidia.com
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Reported-by: Pengfei Xu <pengfei.xu@intel.com>
Tested-by: Pengfei Xu <pengfei.xu@intel.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/iommu/iommufd/pages.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/iommu/iommufd/pages.c b/drivers/iommu/iommufd/pages.c
index f8d92c9bb65b..400ec7c91ed7 100644
--- a/drivers/iommu/iommufd/pages.c
+++ b/drivers/iommu/iommufd/pages.c
@@ -1142,6 +1142,7 @@ struct iopt_pages *iopt_alloc_pages(void __user *uptr, unsigned long length,
 				    bool writable)
 {
 	struct iopt_pages *pages;
+	unsigned long end;
 
 	/*
 	 * The iommu API uses size_t as the length, and protect the DIV_ROUND_UP
@@ -1150,6 +1151,9 @@ struct iopt_pages *iopt_alloc_pages(void __user *uptr, unsigned long length,
 	if (length > SIZE_MAX - PAGE_SIZE || length == 0)
 		return ERR_PTR(-EINVAL);
 
+	if (check_add_overflow((unsigned long)uptr, length, &end))
+		return ERR_PTR(-EOVERFLOW);
+
 	pages = kzalloc(sizeof(*pages), GFP_KERNEL_ACCOUNT);
 	if (!pages)
 		return ERR_PTR(-ENOMEM);

From 727c28c1cef2bc013d2c8bb6c50e410a3882a04e Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@nvidia.com>
Date: Fri, 31 Mar 2023 12:32:25 -0300
Subject: [PATCH 144/173] iommufd: Fix unpinning of pages when an access is
 present

syzkaller found that the calculation of batch_last_index should use
'start_index' since at input to this function the batch is either empty or
it has already been adjusted to cross any accesses so it will start at the
point we are unmapping from.

Getting this wrong causes the unmap to run over the end of the pages
which corrupts pages that were never mapped. In most cases this triggers
the num pinned debugging:

  WARNING: CPU: 0 PID: 557 at drivers/iommu/iommufd/pages.c:294 __iopt_area_unfill_domain+0x152/0x560
  Modules linked in:
  CPU: 0 PID: 557 Comm: repro Not tainted 6.3.0-rc2-eeac8ede1755 #1
  Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014
  RIP: 0010:__iopt_area_unfill_domain+0x152/0x560
  Code: d2 0f ff 44 8b 64 24 54 48 8b 44 24 48 31 ff 44 89 e6 48 89 44 24 38 e8 fc d3 0f ff 45 85 e4 0f 85 eb 01 00 00 e8 0e d2 0f ff <0f> 0b e8 07 d2 0f ff 48 8b 44 24 38 89 5c 24 58 89 18 8b 44 24 54
  RSP: 0018:ffffc9000108baf0 EFLAGS: 00010246
  RAX: 0000000000000000 RBX: 00000000ffffffff RCX: ffffffff821e3f85
  RDX: 0000000000000000 RSI: ffff88800faf0000 RDI: 0000000000000002
  RBP: ffffc9000108bd18 R08: 000000000003ca25 R09: 0000000000000014
  R10: 000000000003ca00 R11: 0000000000000024 R12: 0000000000000004
  R13: 0000000000000801 R14: 00000000000007ff R15: 0000000000000800
  FS:  00007f3499ce1740(0000) GS:ffff88807dc00000(0000) knlGS:0000000000000000
  CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
  CR2: 0000000020000243 CR3: 00000000179c2001 CR4: 0000000000770ef0
  PKRU: 55555554
  Call Trace:
   <TASK>
   iopt_area_unfill_domain+0x32/0x40
   iopt_table_remove_domain+0x23f/0x4c0
   iommufd_device_selftest_detach+0x3a/0x90
   iommufd_selftest_destroy+0x55/0x70
   iommufd_object_destroy_user+0xce/0x130
   iommufd_destroy+0xa2/0xc0
   iommufd_fops_ioctl+0x206/0x330
   __x64_sys_ioctl+0x10e/0x160
   do_syscall_64+0x3b/0x90
   entry_SYSCALL_64_after_hwframe+0x72/0xdc

Also add some useful WARN_ON sanity checks.

Cc: <stable@vger.kernel.org>
Fixes: 8d160cd4d506 ("iommufd: Algorithms for PFN storage")
Link: https://lore.kernel.org/r/2-v1-ceab6a4d7d7a+94-iommufd_syz_jgg@nvidia.com
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Reported-by: Pengfei Xu <pengfei.xu@intel.com>
Tested-by: Pengfei Xu <pengfei.xu@intel.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/iommu/iommufd/pages.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/drivers/iommu/iommufd/pages.c b/drivers/iommu/iommufd/pages.c
index 400ec7c91ed7..b11aace83654 100644
--- a/drivers/iommu/iommufd/pages.c
+++ b/drivers/iommu/iommufd/pages.c
@@ -1207,13 +1207,21 @@ iopt_area_unpin_domain(struct pfn_batch *batch, struct iopt_area *area,
 			unsigned long start =
 				max(start_index, *unmapped_end_index);
 
+			if (IS_ENABLED(CONFIG_IOMMUFD_TEST) &&
+			    batch->total_pfns)
+				WARN_ON(*unmapped_end_index -
+						batch->total_pfns !=
+					start_index);
 			batch_from_domain(batch, domain, area, start,
 					  last_index);
-			batch_last_index = start + batch->total_pfns - 1;
+			batch_last_index = start_index + batch->total_pfns - 1;
 		} else {
 			batch_last_index = last_index;
 		}
 
+		if (IS_ENABLED(CONFIG_IOMMUFD_TEST))
+			WARN_ON(batch_last_index > real_last_index);
+
 		/*
 		 * unmaps must always 'cut' at a place where the pfns are not
 		 * contiguous to pair with the maps that always install

From 13a0d1ae7ee6b438f5537711a8c60cba00554943 Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@nvidia.com>
Date: Fri, 31 Mar 2023 12:32:26 -0300
Subject: [PATCH 145/173] iommufd: Do not corrupt the pfn list when doing batch
 carry

If batch->end is 0 then setting npfns[0] before computing the new value of
pfns will fail to adjust the pfn and result in various page accounting
corruptions. It should be ordered after.

This seems to result in various kinds of page meta-data corruption related
failures:

  WARNING: CPU: 1 PID: 527 at mm/gup.c:75 try_grab_folio+0x503/0x740
  Modules linked in:
  CPU: 1 PID: 527 Comm: repro Not tainted 6.3.0-rc2-eeac8ede1755+ #1
  Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014
  RIP: 0010:try_grab_folio+0x503/0x740
  Code: e3 01 48 89 de e8 6d c1 dd ff 48 85 db 0f 84 7c fe ff ff e8 4f bf dd ff 49 8d 47 ff 48 89 45 d0 e9 73 fe ff ff e8 3d bf dd ff <0f> 0b 31 db e9 d0 fc ff ff e8 2f bf dd ff 48 8b 5d c8 31 ff 48 89
  RSP: 0018:ffffc90000f37908 EFLAGS: 00010046
  RAX: 0000000000000000 RBX: 00000000fffffc02 RCX: ffffffff81504c26
  RDX: 0000000000000000 RSI: ffff88800d030000 RDI: 0000000000000002
  RBP: ffffc90000f37948 R08: 000000000003ca24 R09: 0000000000000008
  R10: 000000000003ca00 R11: 0000000000000023 R12: ffffea000035d540
  R13: 0000000000000001 R14: 0000000000000000 R15: ffffea000035d540
  FS:  00007fecbf659740(0000) GS:ffff88807dd00000(0000) knlGS:0000000000000000
  CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
  CR2: 00000000200011c3 CR3: 000000000ef66006 CR4: 0000000000770ee0
  PKRU: 55555554
  Call Trace:
   <TASK>
   internal_get_user_pages_fast+0xd32/0x2200
   pin_user_pages_fast+0x65/0x90
   pfn_reader_user_pin+0x376/0x390
   pfn_reader_next+0x14a/0x7b0
   pfn_reader_first+0x140/0x1b0
   iopt_area_fill_domain+0x74/0x210
   iopt_table_add_domain+0x30e/0x6e0
   iommufd_device_selftest_attach+0x7f/0x140
   iommufd_test+0x10ff/0x16f0
   iommufd_fops_ioctl+0x206/0x330
   __x64_sys_ioctl+0x10e/0x160
   do_syscall_64+0x3b/0x90
   entry_SYSCALL_64_after_hwframe+0x72/0xdc

Cc: <stable@vger.kernel.org>
Fixes: f394576eb11d ("iommufd: PFN handling for iopt_pages")
Link: https://lore.kernel.org/r/3-v1-ceab6a4d7d7a+94-iommufd_syz_jgg@nvidia.com
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Reported-by: Pengfei Xu <pengfei.xu@intel.com>
Tested-by: Pengfei Xu <pengfei.xu@intel.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/iommu/iommufd/pages.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/iommu/iommufd/pages.c b/drivers/iommu/iommufd/pages.c
index b11aace83654..3c47846cc5ef 100644
--- a/drivers/iommu/iommufd/pages.c
+++ b/drivers/iommu/iommufd/pages.c
@@ -294,9 +294,9 @@ static void batch_clear_carry(struct pfn_batch *batch, unsigned int keep_pfns)
 			batch->npfns[batch->end - 1] < keep_pfns);
 
 	batch->total_pfns = keep_pfns;
-	batch->npfns[0] = keep_pfns;
 	batch->pfns[0] = batch->pfns[batch->end - 1] +
 			 (batch->npfns[batch->end - 1] - keep_pfns);
+	batch->npfns[0] = keep_pfns;
 	batch->end = 0;
 }
 

From 218c597325f4faf7b7a6049233a30d7842b5b2dc Mon Sep 17 00:00:00 2001
From: Corinna Vinschen <vinschen@redhat.com>
Date: Mon, 3 Apr 2023 14:11:20 +0200
Subject: [PATCH 146/173] net: stmmac: fix up RX flow hash indirection table
 when setting channels

stmmac_reinit_queues() fails to fix up the RX hash.  Even if the number
of channels gets restricted, the output of `ethtool -x' indicates that
all RX queues are used:

  $ ethtool -l enp0s29f2
  Channel parameters for enp0s29f2:
  Pre-set maximums:
  RX:		8
  TX:		8
  Other:		n/a
  Combined:	n/a
  Current hardware settings:
  RX:		8
  TX:		8
  Other:		n/a
  Combined:	n/a
  $ ethtool -x enp0s29f2
  RX flow hash indirection table for enp0s29f2 with 8 RX ring(s):
      0:      0     1     2     3     4     5     6     7
      8:      0     1     2     3     4     5     6     7
  [...]
  $ ethtool -L enp0s29f2 rx 3
  $ ethtool -x enp0s29f2
  RX flow hash indirection table for enp0s29f2 with 3 RX ring(s):
      0:      0     1     2     3     4     5     6     7
      8:      0     1     2     3     4     5     6     7
  [...]

Fix this by setting the indirection table according to the number
of specified queues.  The result is now as expected:

  $ ethtool -L enp0s29f2 rx 3
  $ ethtool -x enp0s29f2
  RX flow hash indirection table for enp0s29f2 with 3 RX ring(s):
      0:      0     1     2     0     1     2     0     1
      8:      2     0     1     2     0     1     2     0
  [...]

Tested on Intel Elkhart Lake.

Fixes: 0366f7e06a6b ("net: stmmac: add ethtool support for get/set channels")
Signed-off-by: Corinna Vinschen <vinschen@redhat.com>
Link: https://lore.kernel.org/r/20230403121120.489138-1-vinschen@redhat.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index d41a5f92aee7..59cbf3597eb4 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -6950,7 +6950,7 @@ static void stmmac_napi_del(struct net_device *dev)
 int stmmac_reinit_queues(struct net_device *dev, u32 rx_cnt, u32 tx_cnt)
 {
 	struct stmmac_priv *priv = netdev_priv(dev);
-	int ret = 0;
+	int ret = 0, i;
 
 	if (netif_running(dev))
 		stmmac_release(dev);
@@ -6959,6 +6959,10 @@ int stmmac_reinit_queues(struct net_device *dev, u32 rx_cnt, u32 tx_cnt)
 
 	priv->plat->rx_queues_to_use = rx_cnt;
 	priv->plat->tx_queues_to_use = tx_cnt;
+	if (!netif_is_rxfh_configured(dev))
+		for (i = 0; i < ARRAY_SIZE(priv->rss.table); i++)
+			priv->rss.table[i] = ethtool_rxfh_indir_default(i,
+									rx_cnt);
 
 	stmmac_napi_add(dev);
 

From 5085e41f9e83a1bec51da1f20b54f2ec3a13a3fe Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Thu, 30 Mar 2023 14:24:27 -0400
Subject: [PATCH 147/173] sunrpc: only free unix grouplist after RCU settles

While the unix_gid object is rcu-freed, the group_info list that it
contains is not. Ensure that we only put the group list reference once
we are really freeing the unix_gid object.

Reported-by: Zhi Li <yieli@redhat.com>
Link: https://bugzilla.redhat.com/show_bug.cgi?id=2183056
Signed-off-by: Jeff Layton <jlayton@kernel.org>
Fixes: fd5d2f78261b ("SUNRPC: Make server side AUTH_UNIX use lockless lookups")
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 net/sunrpc/svcauth_unix.c | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c
index 983c5891cb56..4246363cb095 100644
--- a/net/sunrpc/svcauth_unix.c
+++ b/net/sunrpc/svcauth_unix.c
@@ -416,14 +416,23 @@ static int unix_gid_hash(kuid_t uid)
 	return hash_long(from_kuid(&init_user_ns, uid), GID_HASHBITS);
 }
 
+static void unix_gid_free(struct rcu_head *rcu)
+{
+	struct unix_gid *ug = container_of(rcu, struct unix_gid, rcu);
+	struct cache_head *item = &ug->h;
+
+	if (test_bit(CACHE_VALID, &item->flags) &&
+	    !test_bit(CACHE_NEGATIVE, &item->flags))
+		put_group_info(ug->gi);
+	kfree(ug);
+}
+
 static void unix_gid_put(struct kref *kref)
 {
 	struct cache_head *item = container_of(kref, struct cache_head, ref);
 	struct unix_gid *ug = container_of(item, struct unix_gid, h);
-	if (test_bit(CACHE_VALID, &item->flags) &&
-	    !test_bit(CACHE_NEGATIVE, &item->flags))
-		put_group_info(ug->gi);
-	kfree_rcu(ug, rcu);
+
+	call_rcu(&ug->rcu, unix_gid_free);
 }
 
 static int unix_gid_match(struct cache_head *corig, struct cache_head *cnew)

From 8be8f170e8383fd1421e8b87950e90d7dd45be07 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Tue, 28 Mar 2023 13:47:58 -0400
Subject: [PATCH 148/173] NFS: Remove "select RPCSEC_GSS_KRB5
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

If CONFIG_CRYPTO=n (e.g. arm/shmobile_defconfig):

   WARNING: unmet direct dependencies detected for RPCSEC_GSS_KRB5
     Depends on [n]: NETWORK_FILESYSTEMS [=y] && SUNRPC [=y] && CRYPTO [=n]
     Selected by [y]:
     - NFS_V4 [=y] && NETWORK_FILESYSTEMS [=y] && NFS_FS [=y]

As NFSv4 can work without crypto enabled, remove the RPCSEC_GSS_KRB5
dependency altogether.

Trond says:
> It is possible to use the NFSv4.1 client with just AUTH_SYS, and
> in fact there are plenty of people out there using only that. The
> fact that RFC5661 gets its knickers in a twist about RPCSEC_GSS
> support is largely irrelevant to those people.
>
> The other issue is that ’select’ enforces the strict dependency
> that if the NFS client is compiled into the kernel, then the
> RPCSEC_GSS and kerberos code needs to be compiled in as well: they
> cannot exist as modules.

Fixes: e57d06527738 ("NFS & NFSD: Update GSS dependencies")
Reported-by: kernel test robot <lkp@intel.com>
Reported-by: Niklas Söderlund <niklas.soderlund@ragnatech.se>
Suggested-by: Trond Myklebust <trondmy@hammerspace.com>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/nfs/Kconfig | 1 -
 1 file changed, 1 deletion(-)

diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig
index 450d6c3bc05e..c1c7ed2fd860 100644
--- a/fs/nfs/Kconfig
+++ b/fs/nfs/Kconfig
@@ -75,7 +75,6 @@ config NFS_V3_ACL
 config NFS_V4
 	tristate "NFS client support for NFS version 4"
 	depends on NFS_FS
-	select RPCSEC_GSS_KRB5
 	select KEYS
 	help
 	  This option enables support for version 4 of the NFS protocol

From 7de82c2f36fb26aa78440bbf0efcf360b691d98b Mon Sep 17 00:00:00 2001
From: Dai Ngo <dai.ngo@oracle.com>
Date: Sat, 1 Apr 2023 13:22:08 -0700
Subject: [PATCH 149/173] NFSD: callback request does not use correct
 credential for AUTH_SYS

Currently callback request does not use the credential specified in
CREATE_SESSION if the security flavor for the back channel is AUTH_SYS.

Problem was discovered by pynfs 4.1 DELEG5 and DELEG7 test with error:
DELEG5   st_delegation.testCBSecParms     : FAILURE
           expected callback with uid, gid == 17, 19, got 0, 0

Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Fixes: 8276c902bbe9 ("SUNRPC: remove uid and gid from struct auth_cred")
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/nfsd/nfs4callback.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 2a815f5a52c4..4039ffcf90ba 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -946,8 +946,8 @@ static const struct cred *get_backchannel_cred(struct nfs4_client *clp, struct r
 		if (!kcred)
 			return NULL;
 
-		kcred->uid = ses->se_cb_sec.uid;
-		kcred->gid = ses->se_cb_sec.gid;
+		kcred->fsuid = ses->se_cb_sec.uid;
+		kcred->fsgid = ses->se_cb_sec.gid;
 		return kcred;
 	}
 }

From b4a01ace20f5c93c724abffc0a83ec84f514b98d Mon Sep 17 00:00:00 2001
From: Simei Su <simei.su@intel.com>
Date: Wed, 22 Mar 2023 10:24:15 +0800
Subject: [PATCH 150/173] ice: fix wrong fallback logic for FDIR

When adding a FDIR filter, if ice_vc_fdir_set_irq_ctx returns failure,
the inserted fdir entry will not be removed and if ice_vc_fdir_write_fltr
returns failure, the fdir context info for irq handler will not be cleared
which may lead to inconsistent or memory leak issue. This patch refines
failure cases to resolve this issue.

Fixes: 1f7ea1cd6a37 ("ice: Enable FDIR Configure for AVF")
Signed-off-by: Simei Su <simei.su@intel.com>
Tested-by: Rafal Romanowski <rafal.romanowski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c
index 5fd75e75772e..4d007d8c2540 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c
@@ -1871,7 +1871,7 @@ int ice_vc_add_fdir_fltr(struct ice_vf *vf, u8 *msg)
 		v_ret = VIRTCHNL_STATUS_SUCCESS;
 		stat->status = VIRTCHNL_FDIR_FAILURE_RULE_NORESOURCE;
 		dev_dbg(dev, "VF %d: set FDIR context failed\n", vf->vf_id);
-		goto err_free_conf;
+		goto err_rem_entry;
 	}
 
 	ret = ice_vc_fdir_write_fltr(vf, conf, true, is_tun);
@@ -1880,15 +1880,16 @@ int ice_vc_add_fdir_fltr(struct ice_vf *vf, u8 *msg)
 		stat->status = VIRTCHNL_FDIR_FAILURE_RULE_NORESOURCE;
 		dev_err(dev, "VF %d: writing FDIR rule failed, ret:%d\n",
 			vf->vf_id, ret);
-		goto err_rem_entry;
+		goto err_clr_irq;
 	}
 
 exit:
 	kfree(stat);
 	return ret;
 
-err_rem_entry:
+err_clr_irq:
 	ice_vc_fdir_clear_irq_ctx(vf);
+err_rem_entry:
 	ice_vc_fdir_remove_entry(vf, conf, conf->flow_id);
 err_free_conf:
 	devm_kfree(dev, conf);

From 83c911dc5e0e8e6eaa6431c06972a8f159bfe2fc Mon Sep 17 00:00:00 2001
From: Lingyu Liu <lingyu.liu@intel.com>
Date: Tue, 28 Mar 2023 10:49:11 +0000
Subject: [PATCH 151/173] ice: Reset FDIR counter in FDIR init stage

Reset the FDIR counters when FDIR inits. Without this patch,
when VF initializes or resets, all the FDIR counters are not
cleaned, which may cause unexpected behaviors for future FDIR
rule create (e.g., rule conflict).

Fixes: 1f7ea1cd6a37 ("ice: Enable FDIR Configure for AVF")
Signed-off-by: Junfeng Guo <junfeng.guo@intel.com>
Signed-off-by: Lingyu Liu <lingyu.liu@intel.com>
Tested-by: Rafal Romanowski <rafal.romanowski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 .../net/ethernet/intel/ice/ice_virtchnl_fdir.c   | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c
index 4d007d8c2540..daa6a1e894cf 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c
@@ -541,6 +541,21 @@ static void ice_vc_fdir_rem_prof_all(struct ice_vf *vf)
 	}
 }
 
+/**
+ * ice_vc_fdir_reset_cnt_all - reset all FDIR counters for this VF FDIR
+ * @fdir: pointer to the VF FDIR structure
+ */
+static void ice_vc_fdir_reset_cnt_all(struct ice_vf_fdir *fdir)
+{
+	enum ice_fltr_ptype flow;
+
+	for (flow = ICE_FLTR_PTYPE_NONF_NONE;
+	     flow < ICE_FLTR_PTYPE_MAX; flow++) {
+		fdir->fdir_fltr_cnt[flow][0] = 0;
+		fdir->fdir_fltr_cnt[flow][1] = 0;
+	}
+}
+
 /**
  * ice_vc_fdir_has_prof_conflict
  * @vf: pointer to the VF structure
@@ -1998,6 +2013,7 @@ void ice_vf_fdir_init(struct ice_vf *vf)
 	spin_lock_init(&fdir->ctx_lock);
 	fdir->ctx_irq.flags = 0;
 	fdir->ctx_done.flags = 0;
+	ice_vc_fdir_reset_cnt_all(fdir);
 }
 
 /**

From d564fa1ff19e893e2971d66e5c8f49dc1cdc8ffc Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Mon, 9 Jan 2023 15:11:52 +0200
Subject: [PATCH 152/173] asm-generic/io.h: suppress endianness warnings for
 readq() and writeq()

Commit c1d55d50139b ("asm-generic/io.h: Fix sparse warnings on
big-endian architectures") missed fixing the 64-bit accessors.

Arnd explains in the attached link why the casts are necessary, even if
__raw_readq() and __raw_writeq() do not take endian-specific types.

Link: https://lore.kernel.org/lkml/9105d6fc-880b-4734-857d-e3d30b87ccf6@app.fastmail.com/
Suggested-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 include/asm-generic/io.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/asm-generic/io.h b/include/asm-generic/io.h
index 4c44a29b5e8e..d78c3056c98f 100644
--- a/include/asm-generic/io.h
+++ b/include/asm-generic/io.h
@@ -236,7 +236,7 @@ static inline u64 readq(const volatile void __iomem *addr)
 
 	log_read_mmio(64, addr, _THIS_IP_, _RET_IP_);
 	__io_br();
-	val = __le64_to_cpu(__raw_readq(addr));
+	val = __le64_to_cpu((__le64 __force)__raw_readq(addr));
 	__io_ar(val);
 	log_post_read_mmio(val, 64, addr, _THIS_IP_, _RET_IP_);
 	return val;
@@ -287,7 +287,7 @@ static inline void writeq(u64 value, volatile void __iomem *addr)
 {
 	log_write_mmio(value, 64, addr, _THIS_IP_, _RET_IP_);
 	__io_bw();
-	__raw_writeq(__cpu_to_le64(value), addr);
+	__raw_writeq((u64 __force)__cpu_to_le64(value), addr);
 	__io_aw();
 	log_post_write_mmio(value, 64, addr, _THIS_IP_, _RET_IP_);
 }

From 05d3855b4d21ef3c2df26be1cbba9d2c68915fcb Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Mon, 9 Jan 2023 15:11:53 +0200
Subject: [PATCH 153/173] asm-generic/io.h: suppress endianness warnings for
 relaxed accessors

Copy the forced type casts from the normal MMIO accessors to suppress
the sparse warnings that point out __raw_readl() returns a native endian
word (just like readl()).

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 include/asm-generic/io.h | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/include/asm-generic/io.h b/include/asm-generic/io.h
index d78c3056c98f..587e7e9b9a37 100644
--- a/include/asm-generic/io.h
+++ b/include/asm-generic/io.h
@@ -319,7 +319,7 @@ static inline u16 readw_relaxed(const volatile void __iomem *addr)
 	u16 val;
 
 	log_read_mmio(16, addr, _THIS_IP_, _RET_IP_);
-	val = __le16_to_cpu(__raw_readw(addr));
+	val = __le16_to_cpu((__le16 __force)__raw_readw(addr));
 	log_post_read_mmio(val, 16, addr, _THIS_IP_, _RET_IP_);
 	return val;
 }
@@ -332,7 +332,7 @@ static inline u32 readl_relaxed(const volatile void __iomem *addr)
 	u32 val;
 
 	log_read_mmio(32, addr, _THIS_IP_, _RET_IP_);
-	val = __le32_to_cpu(__raw_readl(addr));
+	val = __le32_to_cpu((__le32 __force)__raw_readl(addr));
 	log_post_read_mmio(val, 32, addr, _THIS_IP_, _RET_IP_);
 	return val;
 }
@@ -345,7 +345,7 @@ static inline u64 readq_relaxed(const volatile void __iomem *addr)
 	u64 val;
 
 	log_read_mmio(64, addr, _THIS_IP_, _RET_IP_);
-	val = __le64_to_cpu(__raw_readq(addr));
+	val = __le64_to_cpu((__le64 __force)__raw_readq(addr));
 	log_post_read_mmio(val, 64, addr, _THIS_IP_, _RET_IP_);
 	return val;
 }
@@ -366,7 +366,7 @@ static inline void writeb_relaxed(u8 value, volatile void __iomem *addr)
 static inline void writew_relaxed(u16 value, volatile void __iomem *addr)
 {
 	log_write_mmio(value, 16, addr, _THIS_IP_, _RET_IP_);
-	__raw_writew(cpu_to_le16(value), addr);
+	__raw_writew((u16 __force)cpu_to_le16(value), addr);
 	log_post_write_mmio(value, 16, addr, _THIS_IP_, _RET_IP_);
 }
 #endif
@@ -376,7 +376,7 @@ static inline void writew_relaxed(u16 value, volatile void __iomem *addr)
 static inline void writel_relaxed(u32 value, volatile void __iomem *addr)
 {
 	log_write_mmio(value, 32, addr, _THIS_IP_, _RET_IP_);
-	__raw_writel(__cpu_to_le32(value), addr);
+	__raw_writel((u32 __force)__cpu_to_le32(value), addr);
 	log_post_write_mmio(value, 32, addr, _THIS_IP_, _RET_IP_);
 }
 #endif
@@ -386,7 +386,7 @@ static inline void writel_relaxed(u32 value, volatile void __iomem *addr)
 static inline void writeq_relaxed(u64 value, volatile void __iomem *addr)
 {
 	log_write_mmio(value, 64, addr, _THIS_IP_, _RET_IP_);
-	__raw_writeq(__cpu_to_le64(value), addr);
+	__raw_writeq((u64 __force)__cpu_to_le64(value), addr);
 	log_post_write_mmio(value, 64, addr, _THIS_IP_, _RET_IP_);
 }
 #endif

From 656e9007ef5862746cdf7ac16267c8e06e7b0989 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 2 Mar 2023 09:53:31 +0100
Subject: [PATCH 154/173] asm-generic: avoid __generic_cmpxchg_local warnings

Code that passes a 32-bit constant into cmpxchg() produces a harmless
sparse warning because of the truncation in the branch that is not taken:

fs/erofs/zdata.c: note: in included file (through /home/arnd/arm-soc/arch/arm/include/asm/cmpxchg.h, /home/arnd/arm-soc/arch/arm/include/asm/atomic.h, /home/arnd/arm-soc/include/linux/atomic.h, ...):
include/asm-generic/cmpxchg-local.h:29:33: warning: cast truncates bits from constant value (5f0ecafe becomes fe)
include/asm-generic/cmpxchg-local.h:33:34: warning: cast truncates bits from constant value (5f0ecafe becomes cafe)
include/asm-generic/cmpxchg-local.h:29:33: warning: cast truncates bits from constant value (5f0ecafe becomes fe)
include/asm-generic/cmpxchg-local.h:30:42: warning: cast truncates bits from constant value (5f0edead becomes ad)
include/asm-generic/cmpxchg-local.h:33:34: warning: cast truncates bits from constant value (5f0ecafe becomes cafe)
include/asm-generic/cmpxchg-local.h:34:44: warning: cast truncates bits from constant value (5f0edead becomes dead)

This was reported as a regression to Matt's recent __generic_cmpxchg_local
patch, though this patch only added more warnings on top of the ones
that were already there.

Rewording the truncation to use an explicit bitmask instead of a cast
to a smaller type avoids the warning but otherwise leaves the code
unchanged.

I had another look at why the cast is even needed for atomic_cmpxchg(),
and as Matt describes the problem here is that atomic_t contains a
signed 'int', but cmpxchg() takes an 'unsigned long' argument, and
converting between the two leads to a 64-bit sign-extension of
negative 32-bit atomics.

I checked the other implementations of arch_cmpxchg() and did not find
any others that run into the same problem as __generic_cmpxchg_local(),
but it's easy to be on the safe side here and always convert the
signed int into an unsigned int when calling arch_cmpxchg(), as this
will work even when any of the arch_cmpxchg() implementations run
into the same problem.

Fixes: 624654152284 ("locking/atomic: cmpxchg: Make __generic_cmpxchg_local compare against zero-extended 'old' value")
Reviewed-by: Matt Evans <mev@rivosinc.com>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 include/asm-generic/atomic.h        |  4 ++--
 include/asm-generic/cmpxchg-local.h | 12 ++++++------
 include/asm-generic/cmpxchg.h       |  6 +++---
 3 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/include/asm-generic/atomic.h b/include/asm-generic/atomic.h
index 04b8be9f1a77..e271d6708c87 100644
--- a/include/asm-generic/atomic.h
+++ b/include/asm-generic/atomic.h
@@ -130,7 +130,7 @@ ATOMIC_OP(xor, ^)
 #define arch_atomic_read(v)			READ_ONCE((v)->counter)
 #define arch_atomic_set(v, i)			WRITE_ONCE(((v)->counter), (i))
 
-#define arch_atomic_xchg(ptr, v)		(arch_xchg(&(ptr)->counter, (v)))
-#define arch_atomic_cmpxchg(v, old, new)	(arch_cmpxchg(&((v)->counter), (old), (new)))
+#define arch_atomic_xchg(ptr, v)		(arch_xchg(&(ptr)->counter, (u32)(v)))
+#define arch_atomic_cmpxchg(v, old, new)	(arch_cmpxchg(&((v)->counter), (u32)(old), (u32)(new)))
 
 #endif /* __ASM_GENERIC_ATOMIC_H */
diff --git a/include/asm-generic/cmpxchg-local.h b/include/asm-generic/cmpxchg-local.h
index c3e7315b7c1d..3df9f59a544e 100644
--- a/include/asm-generic/cmpxchg-local.h
+++ b/include/asm-generic/cmpxchg-local.h
@@ -26,16 +26,16 @@ static inline unsigned long __generic_cmpxchg_local(volatile void *ptr,
 	raw_local_irq_save(flags);
 	switch (size) {
 	case 1: prev = *(u8 *)ptr;
-		if (prev == (u8)old)
-			*(u8 *)ptr = (u8)new;
+		if (prev == (old & 0xffu))
+			*(u8 *)ptr = (new & 0xffu);
 		break;
 	case 2: prev = *(u16 *)ptr;
-		if (prev == (u16)old)
-			*(u16 *)ptr = (u16)new;
+		if (prev == (old & 0xffffu))
+			*(u16 *)ptr = (new & 0xffffu);
 		break;
 	case 4: prev = *(u32 *)ptr;
-		if (prev == (u32)old)
-			*(u32 *)ptr = (u32)new;
+		if (prev == (old & 0xffffffffffu))
+			*(u32 *)ptr = (new & 0xffffffffu);
 		break;
 	case 8: prev = *(u64 *)ptr;
 		if (prev == old)
diff --git a/include/asm-generic/cmpxchg.h b/include/asm-generic/cmpxchg.h
index dca4419922a9..848de25fc4bf 100644
--- a/include/asm-generic/cmpxchg.h
+++ b/include/asm-generic/cmpxchg.h
@@ -32,7 +32,7 @@ unsigned long __generic_xchg(unsigned long x, volatile void *ptr, int size)
 #else
 		local_irq_save(flags);
 		ret = *(volatile u8 *)ptr;
-		*(volatile u8 *)ptr = x;
+		*(volatile u8 *)ptr = (x & 0xffu);
 		local_irq_restore(flags);
 		return ret;
 #endif /* __xchg_u8 */
@@ -43,7 +43,7 @@ unsigned long __generic_xchg(unsigned long x, volatile void *ptr, int size)
 #else
 		local_irq_save(flags);
 		ret = *(volatile u16 *)ptr;
-		*(volatile u16 *)ptr = x;
+		*(volatile u16 *)ptr = (x & 0xffffu);
 		local_irq_restore(flags);
 		return ret;
 #endif /* __xchg_u16 */
@@ -54,7 +54,7 @@ unsigned long __generic_xchg(unsigned long x, volatile void *ptr, int size)
 #else
 		local_irq_save(flags);
 		ret = *(volatile u32 *)ptr;
-		*(volatile u32 *)ptr = x;
+		*(volatile u32 *)ptr = (x & 0xffffffffu);
 		local_irq_restore(flags);
 		return ret;
 #endif /* __xchg_u32 */

From fb5015bc8b733323b58f015b88e4f316010ec856 Mon Sep 17 00:00:00 2001
From: Takahiro Itazuri <itazur@amazon.com>
Date: Fri, 31 Mar 2023 10:31:16 +0100
Subject: [PATCH 155/173] docs: kvm: x86: Fix broken field list

Add a missing ":" to fix a broken field list.

Signed-off-by: Takahiro Itazuri <itazur@amazon.com>
Fixes: ba7bb663f554 ("KVM: x86: Provide per VM capability for disabling PMU virtualization")
Message-Id: <20230331093116.99820-1-itazur@amazon.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 Documentation/virt/kvm/api.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
index 62de0768d6aa..a5c803f39832 100644
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -8296,11 +8296,11 @@ ENOSYS for the others.
 8.35 KVM_CAP_PMU_CAPABILITY
 ---------------------------
 
-:Capability KVM_CAP_PMU_CAPABILITY
+:Capability: KVM_CAP_PMU_CAPABILITY
 :Architectures: x86
 :Type: vm
 :Parameters: arg[0] is bitmask of PMU virtualization capabilities.
-:Returns 0 on success, -EINVAL when arg[0] contains invalid bits
+:Returns: 0 on success, -EINVAL when arg[0] contains invalid bits
 
 This capability alters PMU virtualization in KVM.
 

From 0a78cf7264d29abeca098eae0b188a10aabc8a32 Mon Sep 17 00:00:00 2001
From: Kuniyuki Iwashima <kuniyu@amazon.com>
Date: Mon, 3 Apr 2023 12:49:58 -0700
Subject: [PATCH 156/173] raw: Fix NULL deref in raw_get_next().

Dae R. Jeong reported a NULL deref in raw_get_next() [0].

It seems that the repro was running these sequences in parallel so
that one thread was iterating on a socket that was being freed in
another netns.

  unshare(0x40060200)
  r0 = syz_open_procfs(0x0, &(0x7f0000002080)='net/raw\x00')
  socket$inet_icmp_raw(0x2, 0x3, 0x1)
  pread64(r0, &(0x7f0000000000)=""/10, 0xa, 0x10000000007f)

After commit 0daf07e52709 ("raw: convert raw sockets to RCU"), we
use RCU and hlist_nulls_for_each_entry() to iterate over SOCK_RAW
sockets.  However, we should use spinlock for slow paths to avoid
the NULL deref.

Also, SOCK_RAW does not use SLAB_TYPESAFE_BY_RCU, and the slab object
is not reused during iteration in the grace period.  In fact, the
lockless readers do not check the nulls marker with get_nulls_value().
So, SOCK_RAW should use hlist instead of hlist_nulls.

Instead of adding an unnecessary barrier by sk_nulls_for_each_rcu(),
let's convert hlist_nulls to hlist and use sk_for_each_rcu() for
fast paths and sk_for_each() and spinlock for /proc/net/raw.

[0]:
general protection fault, probably for non-canonical address 0xdffffc0000000005: 0000 [#1] PREEMPT SMP KASAN
KASAN: null-ptr-deref in range [0x0000000000000028-0x000000000000002f]
CPU: 2 PID: 20952 Comm: syz-executor.0 Not tainted 6.2.0-g048ec869bafd-dirty #7
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.14.0-0-g155821a1990b-prebuilt.qemu.org 04/01/2014
RIP: 0010:read_pnet include/net/net_namespace.h:383 [inline]
RIP: 0010:sock_net include/net/sock.h:649 [inline]
RIP: 0010:raw_get_next net/ipv4/raw.c:974 [inline]
RIP: 0010:raw_get_idx net/ipv4/raw.c:986 [inline]
RIP: 0010:raw_seq_start+0x431/0x800 net/ipv4/raw.c:995
Code: ef e8 33 3d 94 f7 49 8b 6d 00 4c 89 ef e8 b7 65 5f f7 49 89 ed 49 83 c5 98 0f 84 9a 00 00 00 48 83 c5 c8 48 89 e8 48 c1 e8 03 <42> 80 3c 30 00 74 08 48 89 ef e8 00 3d 94 f7 4c 8b 7d 00 48 89 ef
RSP: 0018:ffffc9001154f9b0 EFLAGS: 00010206
RAX: 0000000000000005 RBX: 1ffff1100302c8fd RCX: 0000000000000000
RDX: 0000000000000028 RSI: ffffc9001154f988 RDI: ffffc9000f77a338
RBP: 0000000000000029 R08: ffffffff8a50ffb4 R09: fffffbfff24b6bd9
R10: fffffbfff24b6bd9 R11: 0000000000000000 R12: ffff88801db73b78
R13: fffffffffffffff9 R14: dffffc0000000000 R15: 0000000000000030
FS:  00007f843ae8e700(0000) GS:ffff888063700000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 000055bb9614b35f CR3: 000000003c672000 CR4: 00000000003506e0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
Call Trace:
 <TASK>
 seq_read_iter+0x4c6/0x10f0 fs/seq_file.c:225
 seq_read+0x224/0x320 fs/seq_file.c:162
 pde_read fs/proc/inode.c:316 [inline]
 proc_reg_read+0x23f/0x330 fs/proc/inode.c:328
 vfs_read+0x31e/0xd30 fs/read_write.c:468
 ksys_pread64 fs/read_write.c:665 [inline]
 __do_sys_pread64 fs/read_write.c:675 [inline]
 __se_sys_pread64 fs/read_write.c:672 [inline]
 __x64_sys_pread64+0x1e9/0x280 fs/read_write.c:672
 do_syscall_x64 arch/x86/entry/common.c:51 [inline]
 do_syscall_64+0x4e/0xa0 arch/x86/entry/common.c:82
 entry_SYSCALL_64_after_hwframe+0x63/0xcd
RIP: 0033:0x478d29
Code: f7 d8 64 89 02 b8 ff ff ff ff c3 66 0f 1f 44 00 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 bc ff ff ff f7 d8 64 89 01 48
RSP: 002b:00007f843ae8dbe8 EFLAGS: 00000246 ORIG_RAX: 0000000000000011
RAX: ffffffffffffffda RBX: 0000000000791408 RCX: 0000000000478d29
RDX: 000000000000000a RSI: 0000000020000000 RDI: 0000000000000003
RBP: 00000000f477909a R08: 0000000000000000 R09: 0000000000000000
R10: 000010000000007f R11: 0000000000000246 R12: 0000000000791740
R13: 0000000000791414 R14: 0000000000791408 R15: 00007ffc2eb48a50
 </TASK>
Modules linked in:
---[ end trace 0000000000000000 ]---
RIP: 0010:read_pnet include/net/net_namespace.h:383 [inline]
RIP: 0010:sock_net include/net/sock.h:649 [inline]
RIP: 0010:raw_get_next net/ipv4/raw.c:974 [inline]
RIP: 0010:raw_get_idx net/ipv4/raw.c:986 [inline]
RIP: 0010:raw_seq_start+0x431/0x800 net/ipv4/raw.c:995
Code: ef e8 33 3d 94 f7 49 8b 6d 00 4c 89 ef e8 b7 65 5f f7 49 89 ed 49 83 c5 98 0f 84 9a 00 00 00 48 83 c5 c8 48 89 e8 48 c1 e8 03 <42> 80 3c 30 00 74 08 48 89 ef e8 00 3d 94 f7 4c 8b 7d 00 48 89 ef
RSP: 0018:ffffc9001154f9b0 EFLAGS: 00010206
RAX: 0000000000000005 RBX: 1ffff1100302c8fd RCX: 0000000000000000
RDX: 0000000000000028 RSI: ffffc9001154f988 RDI: ffffc9000f77a338
RBP: 0000000000000029 R08: ffffffff8a50ffb4 R09: fffffbfff24b6bd9
R10: fffffbfff24b6bd9 R11: 0000000000000000 R12: ffff88801db73b78
R13: fffffffffffffff9 R14: dffffc0000000000 R15: 0000000000000030
FS:  00007f843ae8e700(0000) GS:ffff888063700000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00007f92ff166000 CR3: 000000003c672000 CR4: 00000000003506e0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400

Fixes: 0daf07e52709 ("raw: convert raw sockets to RCU")
Reported-by: syzbot <syzkaller@googlegroups.com>
Reported-by: Dae R. Jeong <threeearcat@gmail.com>
Link: https://lore.kernel.org/netdev/ZCA2mGV_cmq7lIfV@dragonet/
Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/raw.h   |  4 ++--
 net/ipv4/raw.c      | 36 +++++++++++++++++++-----------------
 net/ipv4/raw_diag.c | 10 ++++------
 net/ipv6/raw.c      | 10 ++++------
 4 files changed, 29 insertions(+), 31 deletions(-)

diff --git a/include/net/raw.h b/include/net/raw.h
index 2c004c20ed99..3af5289fdead 100644
--- a/include/net/raw.h
+++ b/include/net/raw.h
@@ -37,7 +37,7 @@ int raw_rcv(struct sock *, struct sk_buff *);
 struct raw_hashinfo {
 	spinlock_t lock;
 
-	struct hlist_nulls_head ht[RAW_HTABLE_SIZE] ____cacheline_aligned;
+	struct hlist_head ht[RAW_HTABLE_SIZE] ____cacheline_aligned;
 };
 
 static inline u32 raw_hashfunc(const struct net *net, u32 proto)
@@ -51,7 +51,7 @@ static inline void raw_hashinfo_init(struct raw_hashinfo *hashinfo)
 
 	spin_lock_init(&hashinfo->lock);
 	for (i = 0; i < RAW_HTABLE_SIZE; i++)
-		INIT_HLIST_NULLS_HEAD(&hashinfo->ht[i], i);
+		INIT_HLIST_HEAD(&hashinfo->ht[i]);
 }
 
 #ifdef CONFIG_PROC_FS
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 94df935ee0c5..8088a5011e7d 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -91,12 +91,12 @@ EXPORT_SYMBOL_GPL(raw_v4_hashinfo);
 int raw_hash_sk(struct sock *sk)
 {
 	struct raw_hashinfo *h = sk->sk_prot->h.raw_hash;
-	struct hlist_nulls_head *hlist;
+	struct hlist_head *hlist;
 
 	hlist = &h->ht[raw_hashfunc(sock_net(sk), inet_sk(sk)->inet_num)];
 
 	spin_lock(&h->lock);
-	__sk_nulls_add_node_rcu(sk, hlist);
+	sk_add_node_rcu(sk, hlist);
 	sock_set_flag(sk, SOCK_RCU_FREE);
 	spin_unlock(&h->lock);
 	sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
@@ -110,7 +110,7 @@ void raw_unhash_sk(struct sock *sk)
 	struct raw_hashinfo *h = sk->sk_prot->h.raw_hash;
 
 	spin_lock(&h->lock);
-	if (__sk_nulls_del_node_init_rcu(sk))
+	if (sk_del_node_init_rcu(sk))
 		sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
 	spin_unlock(&h->lock);
 }
@@ -163,16 +163,15 @@ static int icmp_filter(const struct sock *sk, const struct sk_buff *skb)
 static int raw_v4_input(struct net *net, struct sk_buff *skb,
 			const struct iphdr *iph, int hash)
 {
-	struct hlist_nulls_head *hlist;
-	struct hlist_nulls_node *hnode;
 	int sdif = inet_sdif(skb);
+	struct hlist_head *hlist;
 	int dif = inet_iif(skb);
 	int delivered = 0;
 	struct sock *sk;
 
 	hlist = &raw_v4_hashinfo.ht[hash];
 	rcu_read_lock();
-	sk_nulls_for_each(sk, hnode, hlist) {
+	sk_for_each_rcu(sk, hlist) {
 		if (!raw_v4_match(net, sk, iph->protocol,
 				  iph->saddr, iph->daddr, dif, sdif))
 			continue;
@@ -264,10 +263,9 @@ static void raw_err(struct sock *sk, struct sk_buff *skb, u32 info)
 void raw_icmp_error(struct sk_buff *skb, int protocol, u32 info)
 {
 	struct net *net = dev_net(skb->dev);
-	struct hlist_nulls_head *hlist;
-	struct hlist_nulls_node *hnode;
 	int dif = skb->dev->ifindex;
 	int sdif = inet_sdif(skb);
+	struct hlist_head *hlist;
 	const struct iphdr *iph;
 	struct sock *sk;
 	int hash;
@@ -276,7 +274,7 @@ void raw_icmp_error(struct sk_buff *skb, int protocol, u32 info)
 	hlist = &raw_v4_hashinfo.ht[hash];
 
 	rcu_read_lock();
-	sk_nulls_for_each(sk, hnode, hlist) {
+	sk_for_each_rcu(sk, hlist) {
 		iph = (const struct iphdr *)skb->data;
 		if (!raw_v4_match(net, sk, iph->protocol,
 				  iph->daddr, iph->saddr, dif, sdif))
@@ -950,14 +948,13 @@ static struct sock *raw_get_first(struct seq_file *seq, int bucket)
 {
 	struct raw_hashinfo *h = pde_data(file_inode(seq->file));
 	struct raw_iter_state *state = raw_seq_private(seq);
-	struct hlist_nulls_head *hlist;
-	struct hlist_nulls_node *hnode;
+	struct hlist_head *hlist;
 	struct sock *sk;
 
 	for (state->bucket = bucket; state->bucket < RAW_HTABLE_SIZE;
 			++state->bucket) {
 		hlist = &h->ht[state->bucket];
-		sk_nulls_for_each(sk, hnode, hlist) {
+		sk_for_each(sk, hlist) {
 			if (sock_net(sk) == seq_file_net(seq))
 				return sk;
 		}
@@ -970,7 +967,7 @@ static struct sock *raw_get_next(struct seq_file *seq, struct sock *sk)
 	struct raw_iter_state *state = raw_seq_private(seq);
 
 	do {
-		sk = sk_nulls_next(sk);
+		sk = sk_next(sk);
 	} while (sk && sock_net(sk) != seq_file_net(seq));
 
 	if (!sk)
@@ -989,9 +986,12 @@ static struct sock *raw_get_idx(struct seq_file *seq, loff_t pos)
 }
 
 void *raw_seq_start(struct seq_file *seq, loff_t *pos)
-	__acquires(RCU)
+	__acquires(&h->lock)
 {
-	rcu_read_lock();
+	struct raw_hashinfo *h = pde_data(file_inode(seq->file));
+
+	spin_lock(&h->lock);
+
 	return *pos ? raw_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
 }
 EXPORT_SYMBOL_GPL(raw_seq_start);
@@ -1010,9 +1010,11 @@ void *raw_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 EXPORT_SYMBOL_GPL(raw_seq_next);
 
 void raw_seq_stop(struct seq_file *seq, void *v)
-	__releases(RCU)
+	__releases(&h->lock)
 {
-	rcu_read_unlock();
+	struct raw_hashinfo *h = pde_data(file_inode(seq->file));
+
+	spin_unlock(&h->lock);
 }
 EXPORT_SYMBOL_GPL(raw_seq_stop);
 
diff --git a/net/ipv4/raw_diag.c b/net/ipv4/raw_diag.c
index 999321834b94..da3591a66a16 100644
--- a/net/ipv4/raw_diag.c
+++ b/net/ipv4/raw_diag.c
@@ -57,8 +57,7 @@ static bool raw_lookup(struct net *net, struct sock *sk,
 static struct sock *raw_sock_get(struct net *net, const struct inet_diag_req_v2 *r)
 {
 	struct raw_hashinfo *hashinfo = raw_get_hashinfo(r);
-	struct hlist_nulls_head *hlist;
-	struct hlist_nulls_node *hnode;
+	struct hlist_head *hlist;
 	struct sock *sk;
 	int slot;
 
@@ -68,7 +67,7 @@ static struct sock *raw_sock_get(struct net *net, const struct inet_diag_req_v2
 	rcu_read_lock();
 	for (slot = 0; slot < RAW_HTABLE_SIZE; slot++) {
 		hlist = &hashinfo->ht[slot];
-		sk_nulls_for_each(sk, hnode, hlist) {
+		sk_for_each_rcu(sk, hlist) {
 			if (raw_lookup(net, sk, r)) {
 				/*
 				 * Grab it and keep until we fill
@@ -142,9 +141,8 @@ static void raw_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
 	struct raw_hashinfo *hashinfo = raw_get_hashinfo(r);
 	struct net *net = sock_net(skb->sk);
 	struct inet_diag_dump_data *cb_data;
-	struct hlist_nulls_head *hlist;
-	struct hlist_nulls_node *hnode;
 	int num, s_num, slot, s_slot;
+	struct hlist_head *hlist;
 	struct sock *sk = NULL;
 	struct nlattr *bc;
 
@@ -161,7 +159,7 @@ static void raw_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
 		num = 0;
 
 		hlist = &hashinfo->ht[slot];
-		sk_nulls_for_each(sk, hnode, hlist) {
+		sk_for_each_rcu(sk, hlist) {
 			struct inet_sock *inet = inet_sk(sk);
 
 			if (!net_eq(sock_net(sk), net))
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index bac9ba747bde..a327aa481df4 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -141,10 +141,9 @@ EXPORT_SYMBOL(rawv6_mh_filter_unregister);
 static bool ipv6_raw_deliver(struct sk_buff *skb, int nexthdr)
 {
 	struct net *net = dev_net(skb->dev);
-	struct hlist_nulls_head *hlist;
-	struct hlist_nulls_node *hnode;
 	const struct in6_addr *saddr;
 	const struct in6_addr *daddr;
+	struct hlist_head *hlist;
 	struct sock *sk;
 	bool delivered = false;
 	__u8 hash;
@@ -155,7 +154,7 @@ static bool ipv6_raw_deliver(struct sk_buff *skb, int nexthdr)
 	hash = raw_hashfunc(net, nexthdr);
 	hlist = &raw_v6_hashinfo.ht[hash];
 	rcu_read_lock();
-	sk_nulls_for_each(sk, hnode, hlist) {
+	sk_for_each_rcu(sk, hlist) {
 		int filtered;
 
 		if (!raw_v6_match(net, sk, nexthdr, daddr, saddr,
@@ -333,15 +332,14 @@ void raw6_icmp_error(struct sk_buff *skb, int nexthdr,
 		u8 type, u8 code, int inner_offset, __be32 info)
 {
 	struct net *net = dev_net(skb->dev);
-	struct hlist_nulls_head *hlist;
-	struct hlist_nulls_node *hnode;
+	struct hlist_head *hlist;
 	struct sock *sk;
 	int hash;
 
 	hash = raw_hashfunc(net, nexthdr);
 	hlist = &raw_v6_hashinfo.ht[hash];
 	rcu_read_lock();
-	sk_nulls_for_each(sk, hnode, hlist) {
+	sk_for_each_rcu(sk, hlist) {
 		/* Note: ipv6_hdr(skb) != skb->data */
 		const struct ipv6hdr *ip6h = (const struct ipv6hdr *)skb->data;
 

From ab5fb73ffa01072b4d8031cc05801fa1cb653bee Mon Sep 17 00:00:00 2001
From: Kuniyuki Iwashima <kuniyu@amazon.com>
Date: Mon, 3 Apr 2023 12:49:59 -0700
Subject: [PATCH 157/173] ping: Fix potentail NULL deref for /proc/net/icmp.

After commit dbca1596bbb0 ("ping: convert to RCU lookups, get rid
of rwlock"), we use RCU for ping sockets, but we should use spinlock
for /proc/net/icmp to avoid a potential NULL deref mentioned in
the previous patch.

Let's go back to using spinlock there.

Note we can convert ping sockets to use hlist instead of hlist_nulls
because we do not use SLAB_TYPESAFE_BY_RCU for ping sockets.

Fixes: dbca1596bbb0 ("ping: convert to RCU lookups, get rid of rwlock")
Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/ipv4/ping.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index 409ec2a1f95b..5178a3f3cb53 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -1089,13 +1089,13 @@ static struct sock *ping_get_idx(struct seq_file *seq, loff_t pos)
 }
 
 void *ping_seq_start(struct seq_file *seq, loff_t *pos, sa_family_t family)
-	__acquires(RCU)
+	__acquires(ping_table.lock)
 {
 	struct ping_iter_state *state = seq->private;
 	state->bucket = 0;
 	state->family = family;
 
-	rcu_read_lock();
+	spin_lock(&ping_table.lock);
 
 	return *pos ? ping_get_idx(seq, *pos-1) : SEQ_START_TOKEN;
 }
@@ -1121,9 +1121,9 @@ void *ping_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 EXPORT_SYMBOL_GPL(ping_seq_next);
 
 void ping_seq_stop(struct seq_file *seq, void *v)
-	__releases(RCU)
+	__releases(ping_table.lock)
 {
-	rcu_read_unlock();
+	spin_unlock(&ping_table.lock);
 }
 EXPORT_SYMBOL_GPL(ping_seq_stop);
 

From e847c7675e19ef344913724dc68f83df31ad6a17 Mon Sep 17 00:00:00 2001
From: Andy Roulin <aroulin@nvidia.com>
Date: Mon, 3 Apr 2023 14:20:53 -0700
Subject: [PATCH 158/173] ethtool: reset #lanes when lanes is omitted

If the number of lanes was forced and then subsequently the user
omits this parameter, the ksettings->lanes is reset. The driver
should then reset the number of lanes to the device's default
for the specified speed.

However, although the ksettings->lanes is set to 0, the mod variable
is not set to true to indicate the driver and userspace should be
notified of the changes.

The consequence is that the same ethtool operation will produce
different results based on the initial state.

If the initial state is:
$ ethtool swp1 | grep -A 3 'Speed: '
        Speed: 500000Mb/s
        Lanes: 2
        Duplex: Full
        Auto-negotiation: on

then executing 'ethtool -s swp1 speed 50000 autoneg off' will yield:
$ ethtool swp1 | grep -A 3 'Speed: '
        Speed: 500000Mb/s
        Lanes: 2
        Duplex: Full
        Auto-negotiation: off

While if the initial state is:
$ ethtool swp1 | grep -A 3 'Speed: '
        Speed: 500000Mb/s
        Lanes: 1
        Duplex: Full
        Auto-negotiation: off

executing the same 'ethtool -s swp1 speed 50000 autoneg off' results in:
$ ethtool swp1 | grep -A 3 'Speed: '
        Speed: 500000Mb/s
        Lanes: 1
        Duplex: Full
        Auto-negotiation: off

This patch fixes this behavior. Omitting lanes will always results in
the driver choosing the default lane width for the chosen speed. In this
scenario, regardless of the initial state, the end state will be, e.g.,

$ ethtool swp1 | grep -A 3 'Speed: '
        Speed: 500000Mb/s
        Lanes: 2
        Duplex: Full
        Auto-negotiation: off

Fixes: 012ce4dd3102 ("ethtool: Extend link modes settings uAPI with lanes")
Signed-off-by: Andy Roulin <aroulin@nvidia.com>
Reviewed-by: Danielle Ratson <danieller@nvidia.com>
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Link: https://lore.kernel.org/r/ac238d6b-8726-8156-3810-6471291dbc7f@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/ethtool/linkmodes.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/net/ethtool/linkmodes.c b/net/ethtool/linkmodes.c
index fab66c169b9f..20165e07ef90 100644
--- a/net/ethtool/linkmodes.c
+++ b/net/ethtool/linkmodes.c
@@ -270,11 +270,12 @@ static int ethnl_update_linkmodes(struct genl_info *info, struct nlattr **tb,
 					    "lanes configuration not supported by device");
 			return -EOPNOTSUPP;
 		}
-	} else if (!lsettings->autoneg) {
-		/* If autoneg is off and lanes parameter is not passed from user,
-		 * set the lanes parameter to 0.
+	} else if (!lsettings->autoneg && ksettings->lanes) {
+		/* If autoneg is off and lanes parameter is not passed from user but
+		 * it was defined previously then set the lanes parameter to 0.
 		 */
 		ksettings->lanes = 0;
+		*mod = true;
 	}
 
 	ret = ethnl_update_bitset(ksettings->link_modes.advertising,

From a1865f2e7d10dde00d35a2122b38d2e469ae67ed Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 3 Apr 2023 21:46:43 +0000
Subject: [PATCH 159/173] netlink: annotate lockless accesses to
 nlk->max_recvmsg_len

syzbot reported a data-race in data-race in netlink_recvmsg() [1]

Indeed, netlink_recvmsg() can be run concurrently,
and netlink_dump() also needs protection.

[1]
BUG: KCSAN: data-race in netlink_recvmsg / netlink_recvmsg

read to 0xffff888141840b38 of 8 bytes by task 23057 on cpu 0:
netlink_recvmsg+0xea/0x730 net/netlink/af_netlink.c:1988
sock_recvmsg_nosec net/socket.c:1017 [inline]
sock_recvmsg net/socket.c:1038 [inline]
__sys_recvfrom+0x1ee/0x2e0 net/socket.c:2194
__do_sys_recvfrom net/socket.c:2212 [inline]
__se_sys_recvfrom net/socket.c:2208 [inline]
__x64_sys_recvfrom+0x78/0x90 net/socket.c:2208
do_syscall_x64 arch/x86/entry/common.c:50 [inline]
do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80
entry_SYSCALL_64_after_hwframe+0x63/0xcd

write to 0xffff888141840b38 of 8 bytes by task 23037 on cpu 1:
netlink_recvmsg+0x114/0x730 net/netlink/af_netlink.c:1989
sock_recvmsg_nosec net/socket.c:1017 [inline]
sock_recvmsg net/socket.c:1038 [inline]
____sys_recvmsg+0x156/0x310 net/socket.c:2720
___sys_recvmsg net/socket.c:2762 [inline]
do_recvmmsg+0x2e5/0x710 net/socket.c:2856
__sys_recvmmsg net/socket.c:2935 [inline]
__do_sys_recvmmsg net/socket.c:2958 [inline]
__se_sys_recvmmsg net/socket.c:2951 [inline]
__x64_sys_recvmmsg+0xe2/0x160 net/socket.c:2951
do_syscall_x64 arch/x86/entry/common.c:50 [inline]
do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80
entry_SYSCALL_64_after_hwframe+0x63/0xcd

value changed: 0x0000000000000000 -> 0x0000000000001000

Reported by Kernel Concurrency Sanitizer on:
CPU: 1 PID: 23037 Comm: syz-executor.2 Not tainted 6.3.0-rc4-syzkaller-00195-g5a57b48fdfcb #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 03/02/2023

Fixes: 9063e21fb026 ("netlink: autosize skb lengthes")
Reported-by: syzbot <syzkaller@googlegroups.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Simon Horman <simon.horman@corigine.com>
Link: https://lore.kernel.org/r/20230403214643.768555-1-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/netlink/af_netlink.c | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index c64277659753..f365dfdd672d 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1952,7 +1952,7 @@ static int netlink_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
 	struct scm_cookie scm;
 	struct sock *sk = sock->sk;
 	struct netlink_sock *nlk = nlk_sk(sk);
-	size_t copied;
+	size_t copied, max_recvmsg_len;
 	struct sk_buff *skb, *data_skb;
 	int err, ret;
 
@@ -1985,9 +1985,10 @@ static int netlink_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
 #endif
 
 	/* Record the max length of recvmsg() calls for future allocations */
-	nlk->max_recvmsg_len = max(nlk->max_recvmsg_len, len);
-	nlk->max_recvmsg_len = min_t(size_t, nlk->max_recvmsg_len,
-				     SKB_WITH_OVERHEAD(32768));
+	max_recvmsg_len = max(READ_ONCE(nlk->max_recvmsg_len), len);
+	max_recvmsg_len = min_t(size_t, max_recvmsg_len,
+				SKB_WITH_OVERHEAD(32768));
+	WRITE_ONCE(nlk->max_recvmsg_len, max_recvmsg_len);
 
 	copied = data_skb->len;
 	if (len < copied) {
@@ -2236,6 +2237,7 @@ static int netlink_dump(struct sock *sk)
 	struct netlink_ext_ack extack = {};
 	struct netlink_callback *cb;
 	struct sk_buff *skb = NULL;
+	size_t max_recvmsg_len;
 	struct module *module;
 	int err = -ENOBUFS;
 	int alloc_min_size;
@@ -2258,8 +2260,9 @@ static int netlink_dump(struct sock *sk)
 	cb = &nlk->cb;
 	alloc_min_size = max_t(int, cb->min_dump_alloc, NLMSG_GOODSIZE);
 
-	if (alloc_min_size < nlk->max_recvmsg_len) {
-		alloc_size = nlk->max_recvmsg_len;
+	max_recvmsg_len = READ_ONCE(nlk->max_recvmsg_len);
+	if (alloc_min_size < max_recvmsg_len) {
+		alloc_size = max_recvmsg_len;
 		skb = alloc_skb(alloc_size,
 				(GFP_KERNEL & ~__GFP_DIRECT_RECLAIM) |
 				__GFP_NOWARN | __GFP_NORETRY);

From 3ce9345580974863c060fa32971537996a7b2d57 Mon Sep 17 00:00:00 2001
From: Shailend Chand <shailend@google.com>
Date: Mon, 3 Apr 2023 10:28:09 -0700
Subject: [PATCH 160/173] gve: Secure enough bytes in the first TX desc for all
 TCP pkts

Non-GSO TCP packets whose SKBs' linear portion did not include the
entire TCP header were not populating the first Tx descriptor with
as many bytes as the vNIC expected. This change ensures that all
TCP packets populate the first descriptor with the correct number of
bytes.

Fixes: 893ce44df565 ("gve: Add basic driver framework for Compute Engine Virtual NIC")
Signed-off-by: Shailend Chand <shailend@google.com>
Link: https://lore.kernel.org/r/20230403172809.2939306-1-shailend@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/google/gve/gve.h    |  2 ++
 drivers/net/ethernet/google/gve/gve_tx.c | 12 +++++-------
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/google/gve/gve.h b/drivers/net/ethernet/google/gve/gve.h
index 64eb0442c82f..005cb9dfe078 100644
--- a/drivers/net/ethernet/google/gve/gve.h
+++ b/drivers/net/ethernet/google/gve/gve.h
@@ -47,6 +47,8 @@
 
 #define GVE_RX_BUFFER_SIZE_DQO 2048
 
+#define GVE_GQ_TX_MIN_PKT_DESC_BYTES 182
+
 /* Each slot in the desc ring has a 1:1 mapping to a slot in the data ring */
 struct gve_rx_desc_queue {
 	struct gve_rx_desc *desc_ring; /* the descriptor ring */
diff --git a/drivers/net/ethernet/google/gve/gve_tx.c b/drivers/net/ethernet/google/gve/gve_tx.c
index 4888bf05fbed..5e11b8236754 100644
--- a/drivers/net/ethernet/google/gve/gve_tx.c
+++ b/drivers/net/ethernet/google/gve/gve_tx.c
@@ -284,8 +284,8 @@ static inline int gve_skb_fifo_bytes_required(struct gve_tx_ring *tx,
 	int bytes;
 	int hlen;
 
-	hlen = skb_is_gso(skb) ? skb_checksum_start_offset(skb) +
-				 tcp_hdrlen(skb) : skb_headlen(skb);
+	hlen = skb_is_gso(skb) ? skb_checksum_start_offset(skb) + tcp_hdrlen(skb) :
+				 min_t(int, GVE_GQ_TX_MIN_PKT_DESC_BYTES, skb->len);
 
 	pad_bytes = gve_tx_fifo_pad_alloc_one_frag(&tx->tx_fifo,
 						   hlen);
@@ -454,13 +454,11 @@ static int gve_tx_add_skb_copy(struct gve_priv *priv, struct gve_tx_ring *tx, st
 	pkt_desc = &tx->desc[idx];
 
 	l4_hdr_offset = skb_checksum_start_offset(skb);
-	/* If the skb is gso, then we want the tcp header in the first segment
-	 * otherwise we want the linear portion of the skb (which will contain
-	 * the checksum because skb->csum_start and skb->csum_offset are given
-	 * relative to skb->head) in the first segment.
+	/* If the skb is gso, then we want the tcp header alone in the first segment
+	 * otherwise we want the minimum required by the gVNIC spec.
 	 */
 	hlen = is_gso ? l4_hdr_offset + tcp_hdrlen(skb) :
-			skb_headlen(skb);
+			min_t(int, GVE_GQ_TX_MIN_PKT_DESC_BYTES, skb->len);
 
 	info->skb =  skb;
 	/* We don't want to split the header, so if necessary, pad to the end

From 9d52727f8043cfda241ae96896628d92fa9c50bb Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
Date: Tue, 4 Apr 2023 22:21:14 -0400
Subject: [PATCH 161/173] tracing: Have tracing_snapshot_instance_cond() write
 errors to the appropriate instance

If a trace instance has a failure with its snapshot code, the error
message is to be written to that instance's buffer. But currently, the
message is written to the top level buffer. Worse yet, it may also disable
the top level buffer and not the instance that had the issue.

Link: https://lkml.kernel.org/r/20230405022341.688730321@goodmis.org

Cc: stable@vger.kernel.org
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Ross Zwisler <zwisler@google.com>
Fixes: 2824f50332486 ("tracing: Make the snapshot trigger work with instances")
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 kernel/trace/trace.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 937e9676dfd4..ed1d1093f5e9 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1149,22 +1149,22 @@ static void tracing_snapshot_instance_cond(struct trace_array *tr,
 	unsigned long flags;
 
 	if (in_nmi()) {
-		internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
-		internal_trace_puts("*** snapshot is being ignored        ***\n");
+		trace_array_puts(tr, "*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
+		trace_array_puts(tr, "*** snapshot is being ignored        ***\n");
 		return;
 	}
 
 	if (!tr->allocated_snapshot) {
-		internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
-		internal_trace_puts("*** stopping trace here!   ***\n");
-		tracing_off();
+		trace_array_puts(tr, "*** SNAPSHOT NOT ALLOCATED ***\n");
+		trace_array_puts(tr, "*** stopping trace here!   ***\n");
+		tracer_tracing_off(tr);
 		return;
 	}
 
 	/* Note, snapshot can not be used when the tracer uses it */
 	if (tracer->use_max_tr) {
-		internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
-		internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
+		trace_array_puts(tr, "*** LATENCY TRACER ACTIVE ***\n");
+		trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n");
 		return;
 	}
 

From e94891641c21f607e4d6887bcd3beff882fcc483 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
Date: Tue, 4 Apr 2023 22:21:15 -0400
Subject: [PATCH 162/173] tracing: Fix ftrace_boot_snapshot command line logic

The kernel command line ftrace_boot_snapshot by itself is supposed to
trigger a snapshot at the end of boot up of the main top level trace
buffer. A ftrace_boot_snapshot=foo will do the same for an instance called
foo that was created by trace_instance=foo,...

The logic was broken where if ftrace_boot_snapshot was by itself, it would
trigger a snapshot for all instances that had tracing enabled, regardless
if it asked for a snapshot or not.

When a snapshot is requested for a buffer, the buffer's
tr->allocated_snapshot is set to true. Use that to know if a trace buffer
wants a snapshot at boot up or not.

Since the top level buffer is part of the ftrace_trace_arrays list,
there's no reason to treat it differently than the other buffers. Just
iterate the list if ftrace_boot_snapshot was specified.

Link: https://lkml.kernel.org/r/20230405022341.895334039@goodmis.org

Cc: stable@vger.kernel.org
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Ross Zwisler <zwisler@google.com>
Fixes: 9c1c251d670bc ("tracing: Allow boot instances to have snapshot buffers")
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 kernel/trace/trace.c | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index ed1d1093f5e9..4686473b8497 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -10393,19 +10393,20 @@ out:
 
 void __init ftrace_boot_snapshot(void)
 {
+#ifdef CONFIG_TRACER_MAX_TRACE
 	struct trace_array *tr;
 
-	if (snapshot_at_boot) {
-		tracing_snapshot();
-		internal_trace_puts("** Boot snapshot taken **\n");
-	}
+	if (!snapshot_at_boot)
+		return;
 
 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
-		if (tr == &global_trace)
+		if (!tr->allocated_snapshot)
 			continue;
-		trace_array_puts(tr, "** Boot snapshot taken **\n");
+
 		tracing_snapshot_instance(tr);
+		trace_array_puts(tr, "** Boot snapshot taken **\n");
 	}
+#endif
 }
 
 void __init early_trace_init(void)

From 774e7cb50359eff7e966f4e2e80c9486014d3a23 Mon Sep 17 00:00:00 2001
From: Karol Wachowski <karol.wachowski@linux.intel.com>
Date: Fri, 31 Mar 2023 13:36:02 +0200
Subject: [PATCH 163/173] accel/ivpu: Add dma fence to command buffers only

Currently job->done_fence is added to every BO handle within a job. If job
handle (command buffer) is shared between multiple submits, KMD will add
the fence in each of them. Then bo_wait_ioctl() executed on command buffer
will exit only when all jobs containing that handle are done.

This creates deadlock scenario for user mode driver in case when job handle
is added as dependency of another job, because bo_wait_ioctl() of first job
will wait until second job finishes, and second job can not finish before
first one.

Having fences added only to job buffer handle allows user space to execute
bo_wait_ioctl() on the job even if it's handle is submitted with other job.

Fixes: cd7272215c44 ("accel/ivpu: Add command buffer submission logic")
Signed-off-by: Karol Wachowski <karol.wachowski@linux.intel.com>
Signed-off-by: Stanislaw Gruszka <stanislaw.gruszka@linux.intel.com>
Reviewed-by: Jeffrey Hugo <quic_jhugo@quicinc.com>
Signed-off-by: Jacek Lawrynowicz <jacek.lawrynowicz@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20230331113603.2802515-2-stanislaw.gruszka@linux.intel.com
---
 drivers/accel/ivpu/ivpu_job.c | 18 +++++++-----------
 1 file changed, 7 insertions(+), 11 deletions(-)

diff --git a/drivers/accel/ivpu/ivpu_job.c b/drivers/accel/ivpu/ivpu_job.c
index 910301fae435..3c6f1e16cf2f 100644
--- a/drivers/accel/ivpu/ivpu_job.c
+++ b/drivers/accel/ivpu/ivpu_job.c
@@ -461,26 +461,22 @@ ivpu_job_prepare_bos_for_submit(struct drm_file *file, struct ivpu_job *job, u32
 
 	job->cmd_buf_vpu_addr = bo->vpu_addr + commands_offset;
 
-	ret = drm_gem_lock_reservations((struct drm_gem_object **)job->bos, buf_count,
-					&acquire_ctx);
+	ret = drm_gem_lock_reservations((struct drm_gem_object **)job->bos, 1, &acquire_ctx);
 	if (ret) {
 		ivpu_warn(vdev, "Failed to lock reservations: %d\n", ret);
 		return ret;
 	}
 
-	for (i = 0; i < buf_count; i++) {
-		ret = dma_resv_reserve_fences(job->bos[i]->base.resv, 1);
-		if (ret) {
-			ivpu_warn(vdev, "Failed to reserve fences: %d\n", ret);
-			goto unlock_reservations;
-		}
+	ret = dma_resv_reserve_fences(bo->base.resv, 1);
+	if (ret) {
+		ivpu_warn(vdev, "Failed to reserve fences: %d\n", ret);
+		goto unlock_reservations;
 	}
 
-	for (i = 0; i < buf_count; i++)
-		dma_resv_add_fence(job->bos[i]->base.resv, job->done_fence, DMA_RESV_USAGE_WRITE);
+	dma_resv_add_fence(bo->base.resv, job->done_fence, DMA_RESV_USAGE_WRITE);
 
 unlock_reservations:
-	drm_gem_unlock_reservations((struct drm_gem_object **)job->bos, buf_count, &acquire_ctx);
+	drm_gem_unlock_reservations((struct drm_gem_object **)job->bos, 1, &acquire_ctx);
 
 	wmb(); /* Flush write combining buffers */
 

From 0ec8671837a61d841462179686c5819d951d3b10 Mon Sep 17 00:00:00 2001
From: Jacek Lawrynowicz <jacek.lawrynowicz@linux.intel.com>
Date: Fri, 31 Mar 2023 13:36:03 +0200
Subject: [PATCH 164/173] accel/ivpu: Fix S3 system suspend when not idle

Wait for VPU to be idle in ivpu_pm_suspend_cb() before powering off
the device, so jobs are not lost and TDRs are not triggered after
resume.

Fixes: 852be13f3bd3 ("accel/ivpu: Add PM support")
Signed-off-by: Stanislaw Gruszka <stanislaw.gruszka@linux.intel.com>
Reviewed-by: Jeffrey Hugo <quic_jhugo@quicinc.com>
Signed-off-by: Jacek Lawrynowicz <jacek.lawrynowicz@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20230331113603.2802515-3-stanislaw.gruszka@linux.intel.com
---
 drivers/accel/ivpu/ivpu_pm.c | 26 +++++++++++---------------
 1 file changed, 11 insertions(+), 15 deletions(-)

diff --git a/drivers/accel/ivpu/ivpu_pm.c b/drivers/accel/ivpu/ivpu_pm.c
index 7df72fa8100f..bde42d6383da 100644
--- a/drivers/accel/ivpu/ivpu_pm.c
+++ b/drivers/accel/ivpu/ivpu_pm.c
@@ -140,32 +140,28 @@ int ivpu_pm_suspend_cb(struct device *dev)
 {
 	struct drm_device *drm = dev_get_drvdata(dev);
 	struct ivpu_device *vdev = to_ivpu_device(drm);
-	int ret;
+	unsigned long timeout;
 
 	ivpu_dbg(vdev, PM, "Suspend..\n");
 
-	ret = ivpu_suspend(vdev);
-	if (ret && vdev->pm->suspend_reschedule_counter) {
-		ivpu_dbg(vdev, PM, "Failed to enter idle, rescheduling suspend, retries left %d\n",
-			 vdev->pm->suspend_reschedule_counter);
-		pm_schedule_suspend(dev, vdev->timeout.reschedule_suspend);
-		vdev->pm->suspend_reschedule_counter--;
-		return -EBUSY;
-	} else if (!vdev->pm->suspend_reschedule_counter) {
-		ivpu_warn(vdev, "Failed to enter idle, force suspend\n");
-		ivpu_pm_prepare_cold_boot(vdev);
-	} else {
-		ivpu_pm_prepare_warm_boot(vdev);
+	timeout = jiffies + msecs_to_jiffies(vdev->timeout.tdr);
+	while (!ivpu_hw_is_idle(vdev)) {
+		cond_resched();
+		if (time_after_eq(jiffies, timeout)) {
+			ivpu_err(vdev, "Failed to enter idle on system suspend\n");
+			return -EBUSY;
+		}
 	}
 
-	vdev->pm->suspend_reschedule_counter = PM_RESCHEDULE_LIMIT;
+	ivpu_suspend(vdev);
+	ivpu_pm_prepare_warm_boot(vdev);
 
 	pci_save_state(to_pci_dev(dev));
 	pci_set_power_state(to_pci_dev(dev), PCI_D3hot);
 
 	ivpu_dbg(vdev, PM, "Suspend done.\n");
 
-	return ret;
+	return 0;
 }
 
 int ivpu_pm_resume_cb(struct device *dev)

From b45193cb4df556fe6251b285a5ce44046dd36b4a Mon Sep 17 00:00:00 2001
From: Oleksij Rempel <o.rempel@pengutronix.de>
Date: Tue, 4 Apr 2023 09:31:28 +0200
Subject: [PATCH 165/173] can: j1939: j1939_tp_tx_dat_new(): fix out-of-bounds
 memory access

In the j1939_tp_tx_dat_new() function, an out-of-bounds memory access
could occur during the memcpy() operation if the size of skb->cb is
larger than the size of struct j1939_sk_buff_cb. This is because the
memcpy() operation uses the size of skb->cb, leading to a read beyond
the struct j1939_sk_buff_cb.

Updated the memcpy() operation to use the size of struct
j1939_sk_buff_cb instead of the size of skb->cb. This ensures that the
memcpy() operation only reads the memory within the bounds of struct
j1939_sk_buff_cb, preventing out-of-bounds memory access.

Additionally, add a BUILD_BUG_ON() to check that the size of skb->cb
is greater than or equal to the size of struct j1939_sk_buff_cb. This
ensures that the skb->cb buffer is large enough to hold the
j1939_sk_buff_cb structure.

Fixes: 9d71dd0c7009 ("can: add support of SAE J1939 protocol")
Reported-by: Shuangpeng Bai <sjb7183@psu.edu>
Tested-by: Shuangpeng Bai <sjb7183@psu.edu>
Signed-off-by: Oleksij Rempel <o.rempel@pengutronix.de>
Link: https://groups.google.com/g/syzkaller/c/G_LL-C3plRs/m/-8xCi6dCAgAJ
Link: https://lore.kernel.org/all/20230404073128.3173900-1-o.rempel@pengutronix.de
Cc: stable@vger.kernel.org
[mkl: rephrase commit message]
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 net/can/j1939/transport.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/net/can/j1939/transport.c b/net/can/j1939/transport.c
index fb92c3609e17..fe3df23a2595 100644
--- a/net/can/j1939/transport.c
+++ b/net/can/j1939/transport.c
@@ -604,7 +604,10 @@ sk_buff *j1939_tp_tx_dat_new(struct j1939_priv *priv,
 	/* reserve CAN header */
 	skb_reserve(skb, offsetof(struct can_frame, data));
 
-	memcpy(skb->cb, re_skcb, sizeof(skb->cb));
+	/* skb->cb must be large enough to hold a j1939_sk_buff_cb structure */
+	BUILD_BUG_ON(sizeof(skb->cb) < sizeof(*re_skcb));
+
+	memcpy(skb->cb, re_skcb, sizeof(*re_skcb));
 	skcb = j1939_skb_to_cb(skb);
 	if (swap_src_dst)
 		j1939_skbcb_swap(skcb);

From 0145462fc802cd447ef5d029758043c7f15b4b1e Mon Sep 17 00:00:00 2001
From: Oliver Hartkopp <socketcan@hartkopp.net>
Date: Thu, 30 Mar 2023 19:02:48 +0200
Subject: [PATCH 166/173] can: isotp: isotp_recvmsg(): use sock_recv_cmsgs() to
 get SOCK_RXQ_OVFL infos

isotp.c was still using sock_recv_timestamp() which does not provide
control messages to detect dropped PDUs in the receive path.

Fixes: e057dd3fc20f ("can: add ISO 15765-2:2016 transport protocol")
Signed-off-by: Oliver Hartkopp <socketcan@hartkopp.net>
Link: https://lore.kernel.org/all/20230330170248.62342-1-socketcan@hartkopp.net
Cc: stable@vger.kernel.org
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 net/can/isotp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/can/isotp.c b/net/can/isotp.c
index 9bc344851704..47c2ebad10ed 100644
--- a/net/can/isotp.c
+++ b/net/can/isotp.c
@@ -1120,7 +1120,7 @@ static int isotp_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
 	if (ret < 0)
 		goto out_err;
 
-	sock_recv_timestamp(msg, sk, skb);
+	sock_recv_cmsgs(msg, sk, skb);
 
 	if (msg->msg_name) {
 		__sockaddr_check_size(ISOTP_MIN_NAMELEN);

From 79e19fa79cb5d5f1b3bf3e3ae24989ccb93c7b7b Mon Sep 17 00:00:00 2001
From: Michal Sojka <michal.sojka@cvut.cz>
Date: Fri, 31 Mar 2023 14:55:11 +0200
Subject: [PATCH 167/173] can: isotp: isotp_ops: fix poll() to not report false
 EPOLLOUT events

When using select()/poll()/epoll() with a non-blocking ISOTP socket to
wait for when non-blocking write is possible, a false EPOLLOUT event
is sometimes returned. This can happen at least after sending a
message which must be split to multiple CAN frames.

The reason is that isotp_sendmsg() returns -EAGAIN when tx.state is
not equal to ISOTP_IDLE and this behavior is not reflected in
datagram_poll(), which is used in isotp_ops.

This is fixed by introducing ISOTP-specific poll function, which
suppresses the EPOLLOUT events in that case.

v2: https://lore.kernel.org/all/20230302092812.320643-1-michal.sojka@cvut.cz
v1: https://lore.kernel.org/all/20230224010659.48420-1-michal.sojka@cvut.cz
    https://lore.kernel.org/all/b53a04a2-ba1f-3858-84c1-d3eb3301ae15@hartkopp.net

Signed-off-by: Michal Sojka <michal.sojka@cvut.cz>
Reported-by: Jakub Jira <jirajak2@fel.cvut.cz>
Tested-by: Oliver Hartkopp <socketcan@hartkopp.net>
Acked-by: Oliver Hartkopp <socketcan@hartkopp.net>
Fixes: e057dd3fc20f ("can: add ISO 15765-2:2016 transport protocol")
Link: https://lore.kernel.org/all/20230331125511.372783-1-michal.sojka@cvut.cz
Cc: stable@vger.kernel.org
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 net/can/isotp.c | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/net/can/isotp.c b/net/can/isotp.c
index 47c2ebad10ed..281b7766c54e 100644
--- a/net/can/isotp.c
+++ b/net/can/isotp.c
@@ -1608,6 +1608,21 @@ static int isotp_init(struct sock *sk)
 	return 0;
 }
 
+static __poll_t isotp_poll(struct file *file, struct socket *sock, poll_table *wait)
+{
+	struct sock *sk = sock->sk;
+	struct isotp_sock *so = isotp_sk(sk);
+
+	__poll_t mask = datagram_poll(file, sock, wait);
+	poll_wait(file, &so->wait, wait);
+
+	/* Check for false positives due to TX state */
+	if ((mask & EPOLLWRNORM) && (so->tx.state != ISOTP_IDLE))
+		mask &= ~(EPOLLOUT | EPOLLWRNORM);
+
+	return mask;
+}
+
 static int isotp_sock_no_ioctlcmd(struct socket *sock, unsigned int cmd,
 				  unsigned long arg)
 {
@@ -1623,7 +1638,7 @@ static const struct proto_ops isotp_ops = {
 	.socketpair = sock_no_socketpair,
 	.accept = sock_no_accept,
 	.getname = isotp_getname,
-	.poll = datagram_poll,
+	.poll = isotp_poll,
 	.ioctl = isotp_sock_no_ioctlcmd,
 	.gettstamp = sock_gettstamp,
 	.listen = sock_no_listen,

From 051737439eaee5bdd03d3c2ef5510d54a478fd05 Mon Sep 17 00:00:00 2001
From: Oliver Hartkopp <socketcan@hartkopp.net>
Date: Fri, 31 Mar 2023 15:19:35 +0200
Subject: [PATCH 168/173] can: isotp: fix race between isotp_sendsmg() and
 isotp_release()

As discussed with Dae R. Jeong and Hillf Danton here [1] the sendmsg()
function in isotp.c might get into a race condition when restoring the
former tx.state from the old_state.

Remove the old_state concept and implement proper locking for the
ISOTP_IDLE transitions in isotp_sendmsg(), inspired by a
simplification idea from Hillf Danton.

Introduce a new tx.state ISOTP_SHUTDOWN and use the same locking
mechanism from isotp_release() which resolves a potential race between
isotp_sendsmg() and isotp_release().

[1] https://lore.kernel.org/linux-can/ZB%2F93xJxq%2FBUqAgG@dragonet

v1: https://lore.kernel.org/all/20230331102114.15164-1-socketcan@hartkopp.net
v2: https://lore.kernel.org/all/20230331123600.3550-1-socketcan@hartkopp.net
    take care of signal interrupts for wait_event_interruptible() in
    isotp_release()
v3: https://lore.kernel.org/all/20230331130654.9886-1-socketcan@hartkopp.net
    take care of signal interrupts for wait_event_interruptible() in
    isotp_sendmsg() in the wait_tx_done case
v4: https://lore.kernel.org/all/20230331131935.21465-1-socketcan@hartkopp.net
    take care of signal interrupts for wait_event_interruptible() in
    isotp_sendmsg() in ALL cases

Cc: Dae R. Jeong <threeearcat@gmail.com>
Cc: Hillf Danton <hdanton@sina.com>
Signed-off-by: Oliver Hartkopp <socketcan@hartkopp.net>
Fixes: 4f027cba8216 ("can: isotp: split tx timer into transmission and timeout")
Link: https://lore.kernel.org/all/20230331131935.21465-1-socketcan@hartkopp.net
Cc: stable@vger.kernel.org
[mkl: rephrase commit message]
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 net/can/isotp.c | 55 ++++++++++++++++++++++++++++---------------------
 1 file changed, 31 insertions(+), 24 deletions(-)

diff --git a/net/can/isotp.c b/net/can/isotp.c
index 281b7766c54e..5761d4ab839d 100644
--- a/net/can/isotp.c
+++ b/net/can/isotp.c
@@ -119,7 +119,8 @@ enum {
 	ISOTP_WAIT_FIRST_FC,
 	ISOTP_WAIT_FC,
 	ISOTP_WAIT_DATA,
-	ISOTP_SENDING
+	ISOTP_SENDING,
+	ISOTP_SHUTDOWN,
 };
 
 struct tpcon {
@@ -880,8 +881,8 @@ static enum hrtimer_restart isotp_tx_timer_handler(struct hrtimer *hrtimer)
 					     txtimer);
 	struct sock *sk = &so->sk;
 
-	/* don't handle timeouts in IDLE state */
-	if (so->tx.state == ISOTP_IDLE)
+	/* don't handle timeouts in IDLE or SHUTDOWN state */
+	if (so->tx.state == ISOTP_IDLE || so->tx.state == ISOTP_SHUTDOWN)
 		return HRTIMER_NORESTART;
 
 	/* we did not get any flow control or echo frame in time */
@@ -918,7 +919,6 @@ static int isotp_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
 {
 	struct sock *sk = sock->sk;
 	struct isotp_sock *so = isotp_sk(sk);
-	u32 old_state = so->tx.state;
 	struct sk_buff *skb;
 	struct net_device *dev;
 	struct canfd_frame *cf;
@@ -928,23 +928,24 @@ static int isotp_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
 	int off;
 	int err;
 
-	if (!so->bound)
+	if (!so->bound || so->tx.state == ISOTP_SHUTDOWN)
 		return -EADDRNOTAVAIL;
 
+wait_free_buffer:
 	/* we do not support multiple buffers - for now */
-	if (cmpxchg(&so->tx.state, ISOTP_IDLE, ISOTP_SENDING) != ISOTP_IDLE ||
-	    wq_has_sleeper(&so->wait)) {
-		if (msg->msg_flags & MSG_DONTWAIT) {
-			err = -EAGAIN;
-			goto err_out;
-		}
+	if (wq_has_sleeper(&so->wait) && (msg->msg_flags & MSG_DONTWAIT))
+		return -EAGAIN;
 
-		/* wait for complete transmission of current pdu */
-		err = wait_event_interruptible(so->wait, so->tx.state == ISOTP_IDLE);
-		if (err)
-			goto err_out;
+	/* wait for complete transmission of current pdu */
+	err = wait_event_interruptible(so->wait, so->tx.state == ISOTP_IDLE);
+	if (err)
+		goto err_event_drop;
 
-		so->tx.state = ISOTP_SENDING;
+	if (cmpxchg(&so->tx.state, ISOTP_IDLE, ISOTP_SENDING) != ISOTP_IDLE) {
+		if (so->tx.state == ISOTP_SHUTDOWN)
+			return -EADDRNOTAVAIL;
+
+		goto wait_free_buffer;
 	}
 
 	if (!size || size > MAX_MSG_LENGTH) {
@@ -1074,7 +1075,9 @@ static int isotp_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
 
 	if (wait_tx_done) {
 		/* wait for complete transmission of current pdu */
-		wait_event_interruptible(so->wait, so->tx.state == ISOTP_IDLE);
+		err = wait_event_interruptible(so->wait, so->tx.state == ISOTP_IDLE);
+		if (err)
+			goto err_event_drop;
 
 		if (sk->sk_err)
 			return -sk->sk_err;
@@ -1082,13 +1085,15 @@ static int isotp_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
 
 	return size;
 
+err_event_drop:
+	/* got signal: force tx state machine to be idle */
+	so->tx.state = ISOTP_IDLE;
+	hrtimer_cancel(&so->txfrtimer);
+	hrtimer_cancel(&so->txtimer);
 err_out_drop:
 	/* drop this PDU and unlock a potential wait queue */
-	old_state = ISOTP_IDLE;
-err_out:
-	so->tx.state = old_state;
-	if (so->tx.state == ISOTP_IDLE)
-		wake_up_interruptible(&so->wait);
+	so->tx.state = ISOTP_IDLE;
+	wake_up_interruptible(&so->wait);
 
 	return err;
 }
@@ -1150,10 +1155,12 @@ static int isotp_release(struct socket *sock)
 	net = sock_net(sk);
 
 	/* wait for complete transmission of current pdu */
-	wait_event_interruptible(so->wait, so->tx.state == ISOTP_IDLE);
+	while (wait_event_interruptible(so->wait, so->tx.state == ISOTP_IDLE) == 0 &&
+	       cmpxchg(&so->tx.state, ISOTP_IDLE, ISOTP_SHUTDOWN) != ISOTP_IDLE)
+		;
 
 	/* force state machines to be idle also when a signal occurred */
-	so->tx.state = ISOTP_IDLE;
+	so->tx.state = ISOTP_SHUTDOWN;
 	so->rx.state = ISOTP_IDLE;
 
 	spin_lock(&isotp_notifier_lock);

From 3357c6e429643231e60447b52ffbb7ac895aca22 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
Date: Tue, 4 Apr 2023 19:45:04 -0400
Subject: [PATCH 169/173] tracing: Free error logs of tracing instances

When a tracing instance is removed, the error messages that hold errors
that occurred in the instance needs to be freed. The following reports a
memory leak:

 # cd /sys/kernel/tracing
 # mkdir instances/foo
 # echo 'hist:keys=x' > instances/foo/events/sched/sched_switch/trigger
 # cat instances/foo/error_log
 [  117.404795] hist:sched:sched_switch: error: Couldn't find field
   Command: hist:keys=x
                      ^
 # rmdir instances/foo

Then check for memory leaks:

 # echo scan > /sys/kernel/debug/kmemleak
 # cat /sys/kernel/debug/kmemleak
unreferenced object 0xffff88810d8ec700 (size 192):
  comm "bash", pid 869, jiffies 4294950577 (age 215.752s)
  hex dump (first 32 bytes):
    60 dd 68 61 81 88 ff ff 60 dd 68 61 81 88 ff ff  `.ha....`.ha....
    a0 30 8c 83 ff ff ff ff 26 00 0a 00 00 00 00 00  .0......&.......
  backtrace:
    [<00000000dae26536>] kmalloc_trace+0x2a/0xa0
    [<00000000b2938940>] tracing_log_err+0x277/0x2e0
    [<000000004a0e1b07>] parse_atom+0x966/0xb40
    [<0000000023b24337>] parse_expr+0x5f3/0xdb0
    [<00000000594ad074>] event_hist_trigger_parse+0x27f8/0x3560
    [<00000000293a9645>] trigger_process_regex+0x135/0x1a0
    [<000000005c22b4f2>] event_trigger_write+0x87/0xf0
    [<000000002cadc509>] vfs_write+0x162/0x670
    [<0000000059c3b9be>] ksys_write+0xca/0x170
    [<00000000f1cddc00>] do_syscall_64+0x3e/0xc0
    [<00000000868ac68c>] entry_SYSCALL_64_after_hwframe+0x72/0xdc
unreferenced object 0xffff888170c35a00 (size 32):
  comm "bash", pid 869, jiffies 4294950577 (age 215.752s)
  hex dump (first 32 bytes):
    0a 20 20 43 6f 6d 6d 61 6e 64 3a 20 68 69 73 74  .  Command: hist
    3a 6b 65 79 73 3d 78 0a 00 00 00 00 00 00 00 00  :keys=x.........
  backtrace:
    [<000000006a747de5>] __kmalloc+0x4d/0x160
    [<000000000039df5f>] tracing_log_err+0x29b/0x2e0
    [<000000004a0e1b07>] parse_atom+0x966/0xb40
    [<0000000023b24337>] parse_expr+0x5f3/0xdb0
    [<00000000594ad074>] event_hist_trigger_parse+0x27f8/0x3560
    [<00000000293a9645>] trigger_process_regex+0x135/0x1a0
    [<000000005c22b4f2>] event_trigger_write+0x87/0xf0
    [<000000002cadc509>] vfs_write+0x162/0x670
    [<0000000059c3b9be>] ksys_write+0xca/0x170
    [<00000000f1cddc00>] do_syscall_64+0x3e/0xc0
    [<00000000868ac68c>] entry_SYSCALL_64_after_hwframe+0x72/0xdc

The problem is that the error log needs to be freed when the instance is
removed.

Link: https://lore.kernel.org/lkml/76134d9f-a5ba-6a0d-37b3-28310b4a1e91@alu.unizg.hr/
Link: https://lore.kernel.org/linux-trace-kernel/20230404194504.5790b95f@gandalf.local.home

Cc: stable@vger.kernel.org
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Thorsten Leemhuis <regressions@leemhuis.info>
Cc: Ulf Hansson <ulf.hansson@linaro.org>
Cc: Eric Biggers <ebiggers@kernel.org>
Fixes: 2f754e771b1a6 ("tracing: Have the error logs show up in the proper instances")
Reported-by: Mirsad Goran Todorovac <mirsad.todorovac@alu.unizg.hr>
Tested-by: Mirsad Todorovac <mirsad.todorovac@alu.unizg.hr>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 kernel/trace/trace.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 4686473b8497..36a6037823cd 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -9516,6 +9516,7 @@ static int __remove_instance(struct trace_array *tr)
 	tracefs_remove(tr->dir);
 	free_percpu(tr->last_func_repeats);
 	free_trace_buffers(tr);
+	clear_tracing_err_log(tr);
 
 	for (i = 0; i < tr->nr_topts; i++) {
 		kfree(tr->topts[i].topts);

From abc33494ddd5c1e4bd0e17c4abe361038fb6693f Mon Sep 17 00:00:00 2001
From: Greg Ungerer <gerg@linux-m68k.org>
Date: Tue, 4 Apr 2023 15:22:07 +1000
Subject: [PATCH 170/173] net: fec: make use of MDIO C45 quirk

Not all fec MDIO bus drivers support C45 mode transactions. The older fec
hardware block in many ColdFire SoCs does not appear to support them, at
least according to most of the different ColdFire SoC reference manuals.
The bits used to generate C45 access on the iMX parts, in the OP field
of the MMFR register, are documented as generating non-compliant MII
frames (it is not documented as to exactly how they are non-compliant).

Commit 8d03ad1ab0b0 ("net: fec: Separate C22 and C45 transactions")
means the fec driver will always register c45 MDIO read and write
methods. During probe these will always be accessed now generating
non-compliant MII accesses on ColdFire based devices.

Add a quirk define, FEC_QUIRK_HAS_MDIO_C45, that can be used to
distinguish silicon that supports MDIO C45 framing or not. Add this to
all the existing iMX quirks, so they will be behave as they do now (*).

(*) it seems that some iMX parts may not support C45 transactions either.
    The iMX25 and iMX50 Reference Manuals contain similar wording to
    the ColdFire Reference Manuals on this.

Fixes: 8d03ad1ab0b0 ("net: fec: Separate C22 and C45 transactions")
Signed-off-by: Greg Ungerer <gerg@linux-m68k.org>
Reviewed-by: Wei Fang <wei.fang@nxp.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Link: https://lore.kernel.org/r/20230404052207.3064861-1-gerg@linux-m68k.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/freescale/fec.h      |  5 ++++
 drivers/net/ethernet/freescale/fec_main.c | 32 ++++++++++++++---------
 2 files changed, 25 insertions(+), 12 deletions(-)

diff --git a/drivers/net/ethernet/freescale/fec.h b/drivers/net/ethernet/freescale/fec.h
index 5ba1e0d71c68..9939ccafb556 100644
--- a/drivers/net/ethernet/freescale/fec.h
+++ b/drivers/net/ethernet/freescale/fec.h
@@ -507,6 +507,11 @@ struct bufdesc_ex {
 /* i.MX6Q adds pm_qos support */
 #define FEC_QUIRK_HAS_PMQOS			BIT(23)
 
+/* Not all FEC hardware block MDIOs support accesses in C45 mode.
+ * Older blocks in the ColdFire parts do not support it.
+ */
+#define FEC_QUIRK_HAS_MDIO_C45		BIT(24)
+
 struct bufdesc_prop {
 	int qid;
 	/* Address of Rx and Tx buffers */
diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index f3b16a6673e2..160c1b3525f5 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -100,18 +100,19 @@ struct fec_devinfo {
 
 static const struct fec_devinfo fec_imx25_info = {
 	.quirks = FEC_QUIRK_USE_GASKET | FEC_QUIRK_MIB_CLEAR |
-		  FEC_QUIRK_HAS_FRREG,
+		  FEC_QUIRK_HAS_FRREG | FEC_QUIRK_HAS_MDIO_C45,
 };
 
 static const struct fec_devinfo fec_imx27_info = {
-	.quirks = FEC_QUIRK_MIB_CLEAR | FEC_QUIRK_HAS_FRREG,
+	.quirks = FEC_QUIRK_MIB_CLEAR | FEC_QUIRK_HAS_FRREG |
+		  FEC_QUIRK_HAS_MDIO_C45,
 };
 
 static const struct fec_devinfo fec_imx28_info = {
 	.quirks = FEC_QUIRK_ENET_MAC | FEC_QUIRK_SWAP_FRAME |
 		  FEC_QUIRK_SINGLE_MDIO | FEC_QUIRK_HAS_RACC |
 		  FEC_QUIRK_HAS_FRREG | FEC_QUIRK_CLEAR_SETUP_MII |
-		  FEC_QUIRK_NO_HARD_RESET,
+		  FEC_QUIRK_NO_HARD_RESET | FEC_QUIRK_HAS_MDIO_C45,
 };
 
 static const struct fec_devinfo fec_imx6q_info = {
@@ -119,11 +120,12 @@ static const struct fec_devinfo fec_imx6q_info = {
 		  FEC_QUIRK_HAS_BUFDESC_EX | FEC_QUIRK_HAS_CSUM |
 		  FEC_QUIRK_HAS_VLAN | FEC_QUIRK_ERR006358 |
 		  FEC_QUIRK_HAS_RACC | FEC_QUIRK_CLEAR_SETUP_MII |
-		  FEC_QUIRK_HAS_PMQOS,
+		  FEC_QUIRK_HAS_PMQOS | FEC_QUIRK_HAS_MDIO_C45,
 };
 
 static const struct fec_devinfo fec_mvf600_info = {
-	.quirks = FEC_QUIRK_ENET_MAC | FEC_QUIRK_HAS_RACC,
+	.quirks = FEC_QUIRK_ENET_MAC | FEC_QUIRK_HAS_RACC |
+		  FEC_QUIRK_HAS_MDIO_C45,
 };
 
 static const struct fec_devinfo fec_imx6x_info = {
@@ -132,7 +134,8 @@ static const struct fec_devinfo fec_imx6x_info = {
 		  FEC_QUIRK_HAS_VLAN | FEC_QUIRK_HAS_AVB |
 		  FEC_QUIRK_ERR007885 | FEC_QUIRK_BUG_CAPTURE |
 		  FEC_QUIRK_HAS_RACC | FEC_QUIRK_HAS_COALESCE |
-		  FEC_QUIRK_CLEAR_SETUP_MII | FEC_QUIRK_HAS_MULTI_QUEUES,
+		  FEC_QUIRK_CLEAR_SETUP_MII | FEC_QUIRK_HAS_MULTI_QUEUES |
+		  FEC_QUIRK_HAS_MDIO_C45,
 };
 
 static const struct fec_devinfo fec_imx6ul_info = {
@@ -140,7 +143,8 @@ static const struct fec_devinfo fec_imx6ul_info = {
 		  FEC_QUIRK_HAS_BUFDESC_EX | FEC_QUIRK_HAS_CSUM |
 		  FEC_QUIRK_HAS_VLAN | FEC_QUIRK_ERR007885 |
 		  FEC_QUIRK_BUG_CAPTURE | FEC_QUIRK_HAS_RACC |
-		  FEC_QUIRK_HAS_COALESCE | FEC_QUIRK_CLEAR_SETUP_MII,
+		  FEC_QUIRK_HAS_COALESCE | FEC_QUIRK_CLEAR_SETUP_MII |
+		  FEC_QUIRK_HAS_MDIO_C45,
 };
 
 static const struct fec_devinfo fec_imx8mq_info = {
@@ -150,7 +154,8 @@ static const struct fec_devinfo fec_imx8mq_info = {
 		  FEC_QUIRK_ERR007885 | FEC_QUIRK_BUG_CAPTURE |
 		  FEC_QUIRK_HAS_RACC | FEC_QUIRK_HAS_COALESCE |
 		  FEC_QUIRK_CLEAR_SETUP_MII | FEC_QUIRK_HAS_MULTI_QUEUES |
-		  FEC_QUIRK_HAS_EEE | FEC_QUIRK_WAKEUP_FROM_INT2,
+		  FEC_QUIRK_HAS_EEE | FEC_QUIRK_WAKEUP_FROM_INT2 |
+		  FEC_QUIRK_HAS_MDIO_C45,
 };
 
 static const struct fec_devinfo fec_imx8qm_info = {
@@ -160,14 +165,15 @@ static const struct fec_devinfo fec_imx8qm_info = {
 		  FEC_QUIRK_ERR007885 | FEC_QUIRK_BUG_CAPTURE |
 		  FEC_QUIRK_HAS_RACC | FEC_QUIRK_HAS_COALESCE |
 		  FEC_QUIRK_CLEAR_SETUP_MII | FEC_QUIRK_HAS_MULTI_QUEUES |
-		  FEC_QUIRK_DELAYED_CLKS_SUPPORT,
+		  FEC_QUIRK_DELAYED_CLKS_SUPPORT | FEC_QUIRK_HAS_MDIO_C45,
 };
 
 static const struct fec_devinfo fec_s32v234_info = {
 	.quirks = FEC_QUIRK_ENET_MAC | FEC_QUIRK_HAS_GBIT |
 		  FEC_QUIRK_HAS_BUFDESC_EX | FEC_QUIRK_HAS_CSUM |
 		  FEC_QUIRK_HAS_VLAN | FEC_QUIRK_HAS_AVB |
-		  FEC_QUIRK_ERR007885 | FEC_QUIRK_BUG_CAPTURE,
+		  FEC_QUIRK_ERR007885 | FEC_QUIRK_BUG_CAPTURE |
+		  FEC_QUIRK_HAS_MDIO_C45,
 };
 
 static struct platform_device_id fec_devtype[] = {
@@ -2434,8 +2440,10 @@ static int fec_enet_mii_init(struct platform_device *pdev)
 	fep->mii_bus->name = "fec_enet_mii_bus";
 	fep->mii_bus->read = fec_enet_mdio_read_c22;
 	fep->mii_bus->write = fec_enet_mdio_write_c22;
-	fep->mii_bus->read_c45 = fec_enet_mdio_read_c45;
-	fep->mii_bus->write_c45 = fec_enet_mdio_write_c45;
+	if (fep->quirks & FEC_QUIRK_HAS_MDIO_C45) {
+		fep->mii_bus->read_c45 = fec_enet_mdio_read_c45;
+		fep->mii_bus->write_c45 = fec_enet_mdio_write_c45;
+	}
 	snprintf(fep->mii_bus->id, MII_BUS_ID_SIZE, "%s-%x",
 		pdev->name, fep->dev_id + 1);
 	fep->mii_bus->priv = fep;

From 38e058cc7d245dc8034426415bee8fec16ace1bd Mon Sep 17 00:00:00 2001
From: Hangbin Liu <liuhangbin@gmail.com>
Date: Tue, 4 Apr 2023 15:24:11 +0800
Subject: [PATCH 171/173] selftests: net: rps_default_mask.sh: delete veth link
 specifically

When deleting the netns and recreating a new one while re-adding the
veth interface, there is a small window of time during which the old
veth interface has not yet been removed. This can cause the new addition
to fail. To resolve this issue, we can either wait for a short while to
ensure that the old veth interface is deleted, or we can specifically
remove the veth interface.

Before this patch:
  # ./rps_default_mask.sh
  empty rps_default_mask                                      [ ok ]
  changing rps_default_mask dont affect existing devices      [ ok ]
  changing rps_default_mask dont affect existing netns        [ ok ]
  changing rps_default_mask affect newly created devices      [ ok ]
  changing rps_default_mask don't affect newly child netns[II][ ok ]
  rps_default_mask is 0 by default in child netns             [ ok ]
  RTNETLINK answers: File exists
  changing rps_default_mask in child ns don't affect the main one[ ok ]
  cat: /sys/class/net/vethC11an1/queues/rx-0/rps_cpus: No such file or directory
  changing rps_default_mask in child ns affects new childns devices./rps_default_mask.sh: line 36: [: -eq: unary operator expected
  [fail] expected 1 found
  changing rps_default_mask in child ns don't affect existing devices[ ok ]

After this patch:
  # ./rps_default_mask.sh
  empty rps_default_mask                                      [ ok ]
  changing rps_default_mask dont affect existing devices      [ ok ]
  changing rps_default_mask dont affect existing netns        [ ok ]
  changing rps_default_mask affect newly created devices      [ ok ]
  changing rps_default_mask don't affect newly child netns[II][ ok ]
  rps_default_mask is 0 by default in child netns             [ ok ]
  changing rps_default_mask in child ns don't affect the main one[ ok ]
  changing rps_default_mask in child ns affects new childns devices[ ok ]
  changing rps_default_mask in child ns don't affect existing devices[ ok ]

Fixes: 3a7d84eae03b ("self-tests: more rps self tests")
Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Acked-by: Paolo Abeni <pabeni@redhat.com>
Link: https://lore.kernel.org/r/20230404072411.879476-1-liuhangbin@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/rps_default_mask.sh | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/testing/selftests/net/rps_default_mask.sh b/tools/testing/selftests/net/rps_default_mask.sh
index 0fd0d2db3abc..a26c5624429f 100755
--- a/tools/testing/selftests/net/rps_default_mask.sh
+++ b/tools/testing/selftests/net/rps_default_mask.sh
@@ -60,6 +60,7 @@ ip link set dev $VETH up
 ip -n $NETNS link set dev $VETH up
 chk_rps "changing rps_default_mask affect newly created devices" "" $VETH 3
 chk_rps "changing rps_default_mask don't affect newly child netns[II]" $NETNS $VETH 0
+ip link del dev $VETH
 ip netns del $NETNS
 
 setup

From 24e3fce00c0b557491ff596c0682a29dee6fe848 Mon Sep 17 00:00:00 2001
From: Song Yoong Siang <yoong.siang.song@intel.com>
Date: Tue, 4 Apr 2023 12:48:23 +0800
Subject: [PATCH 172/173] net: stmmac: Add queue reset into stmmac_xdp_open()
 function

Queue reset was moved out from __init_dma_rx_desc_rings() and
__init_dma_tx_desc_rings() functions. Thus, the driver fails to transmit
and receive packet after XDP prog setup.

This commit adds the missing queue reset into stmmac_xdp_open() function.

Fixes: f9ec5723c3db ("net: ethernet: stmicro: stmmac: move queue reset to dedicated functions")
Cc: <stable@vger.kernel.org> # 6.0+
Signed-off-by: Song Yoong Siang <yoong.siang.song@intel.com>
Reviewed-by: Alexander Duyck <alexanderduyck@fb.com>
Link: https://lore.kernel.org/r/20230404044823.3226144-1-yoong.siang.song@intel.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 59cbf3597eb4..38abc898f149 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -6624,6 +6624,8 @@ int stmmac_xdp_open(struct net_device *dev)
 		goto init_error;
 	}
 
+	stmmac_reset_queues_param(priv);
+
 	/* DMA CSR Channel configuration */
 	for (chan = 0; chan < dma_csr_ch; chan++) {
 		stmmac_init_chan(priv, priv->ioaddr, priv->plat->dma_cfg, chan);

From 8fbc10b995a506e173f1080dfa2764f232a65e02 Mon Sep 17 00:00:00 2001
From: Michael Sit Wei Hong <michael.wei.hong.sit@intel.com>
Date: Thu, 6 Apr 2023 10:45:41 +0800
Subject: [PATCH 173/173] net: stmmac: check fwnode for phy device before
 scanning for phy

Some DT devices already have phy device configured in the DT/ACPI.
Current implementation scans for a phy unconditionally even though
there is a phy listed in the DT/ACPI and already attached.

We should check the fwnode if there is any phy device listed in
fwnode and decide whether to scan for a phy to attach to.

Fixes: fe2cfbc96803 ("net: stmmac: check if MAC needs to attach to a PHY")
Reported-by: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Link: https://lore.kernel.org/lkml/20230403212434.296975-1-martin.blumenstingl@googlemail.com/
Tested-by: Guenter Roeck <linux@roeck-us.net>
Tested-by: Shahab Vahedi <shahab@synopsys.com>
Tested-by: Marek Szyprowski <m.szyprowski@samsung.com>
Tested-by: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Suggested-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Signed-off-by: Michael Sit Wei Hong <michael.wei.hong.sit@intel.com>
Link: https://lore.kernel.org/r/20230406024541.3556305-1-michael.wei.hong.sit@intel.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 38abc898f149..d7fcab057032 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -1134,22 +1134,26 @@ static void stmmac_check_pcs_mode(struct stmmac_priv *priv)
 static int stmmac_init_phy(struct net_device *dev)
 {
 	struct stmmac_priv *priv = netdev_priv(dev);
+	struct fwnode_handle *phy_fwnode;
 	struct fwnode_handle *fwnode;
-	bool phy_needed;
 	int ret;
 
+	if (!phylink_expects_phy(priv->phylink))
+		return 0;
+
 	fwnode = of_fwnode_handle(priv->plat->phylink_node);
 	if (!fwnode)
 		fwnode = dev_fwnode(priv->device);
 
 	if (fwnode)
-		ret = phylink_fwnode_phy_connect(priv->phylink, fwnode, 0);
+		phy_fwnode = fwnode_get_phy_node(fwnode);
+	else
+		phy_fwnode = NULL;
 
-	phy_needed = phylink_expects_phy(priv->phylink);
 	/* Some DT bindings do not set-up the PHY handle. Let's try to
 	 * manually parse it
 	 */
-	if (!fwnode || phy_needed || ret) {
+	if (!phy_fwnode || IS_ERR(phy_fwnode)) {
 		int addr = priv->plat->phy_addr;
 		struct phy_device *phydev;
 
@@ -1165,6 +1169,9 @@ static int stmmac_init_phy(struct net_device *dev)
 		}
 
 		ret = phylink_connect_phy(priv->phylink, phydev);
+	} else {
+		fwnode_handle_put(phy_fwnode);
+		ret = phylink_fwnode_phy_connect(priv->phylink, fwnode, 0);
 	}
 
 	if (!priv->plat->pmt) {