From 85e985a4f46e462a37f1875cb74ed380e7c0c2e0 Mon Sep 17 00:00:00 2001 From: Konrad Dybcio <konrad.dybcio@linaro.org> Date: Wed, 10 Jan 2024 15:16:26 +0200 Subject: [PATCH 001/304] interconnect: qcom: sc8180x: Mark CO0 BCM keepalive The CO0 BCM needs to be up at all times, otherwise some hardware (like the UFS controller) loses its connection to the rest of the SoC, resulting in a hang of the platform, accompanied by a spectacular logspam. Mark it as keepalive to prevent such cases. Fixes: 9c8c6bac1ae8 ("interconnect: qcom: Add SC8180x providers") Signed-off-by: Konrad Dybcio <konrad.dybcio@linaro.org> Link: https://lore.kernel.org/r/20231214-topic-sc8180_fixes-v1-1-421904863006@linaro.org Signed-off-by: Georgi Djakov <djakov@kernel.org> --- drivers/interconnect/qcom/sc8180x.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/interconnect/qcom/sc8180x.c b/drivers/interconnect/qcom/sc8180x.c index 20331e119beb..03d626776ba1 100644 --- a/drivers/interconnect/qcom/sc8180x.c +++ b/drivers/interconnect/qcom/sc8180x.c @@ -1372,6 +1372,7 @@ static struct qcom_icc_bcm bcm_mm0 = { static struct qcom_icc_bcm bcm_co0 = { .name = "CO0", + .keepalive = true, .num_nodes = 1, .nodes = { &slv_qns_cdsp_mem_noc } }; From 24406f6794aa631516241deb9e19de333d6a0600 Mon Sep 17 00:00:00 2001 From: Konrad Dybcio <konrad.dybcio@linaro.org> Date: Wed, 10 Jan 2024 15:16:46 +0200 Subject: [PATCH 002/304] interconnect: qcom: sm8550: Enable sync_state To ensure the interconnect votes are actually meaningful and in order to prevent holding all buses at FMAX, introduce the sync state callback. Fixes: e6f0d6a30f73 ("interconnect: qcom: Add SM8550 interconnect provider driver") Signed-off-by: Konrad Dybcio <konrad.dybcio@linaro.org> Reviewed-by: Bjorn Andersson <andersson@kernel.org> Link: https://lore.kernel.org/r/20231218-topic-8550_fixes-v1-2-ce1272d77540@linaro.org Signed-off-by: Georgi Djakov <djakov@kernel.org> --- drivers/interconnect/qcom/sm8550.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/interconnect/qcom/sm8550.c b/drivers/interconnect/qcom/sm8550.c index 629faa4c9aae..fc22cecf650f 100644 --- a/drivers/interconnect/qcom/sm8550.c +++ b/drivers/interconnect/qcom/sm8550.c @@ -2223,6 +2223,7 @@ static struct platform_driver qnoc_driver = { .driver = { .name = "qnoc-sm8550", .of_match_table = qnoc_of_match, + .sync_state = icc_sync_state, }, }; From 95a0d596bbd0552a78e13ced43f2be1038883c81 Mon Sep 17 00:00:00 2001 From: Dinghao Liu <dinghao.liu@zju.edu.cn> Date: Fri, 8 Dec 2023 15:31:19 +0800 Subject: [PATCH 003/304] iio: core: fix memleak in iio_device_register_sysfs When iio_device_register_sysfs_group() fails, we should free iio_dev_opaque->chan_attr_group.attrs to prevent potential memleak. Fixes: 32f171724e5c ("iio: core: rework iio device group creation") Signed-off-by: Dinghao Liu <dinghao.liu@zju.edu.cn> Link: https://lore.kernel.org/r/20231208073119.29283-1-dinghao.liu@zju.edu.cn Cc: <Stable@vger.kernel.org> Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> --- drivers/iio/industrialio-core.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/iio/industrialio-core.c b/drivers/iio/industrialio-core.c index 9a85752124dd..173dc00762a1 100644 --- a/drivers/iio/industrialio-core.c +++ b/drivers/iio/industrialio-core.c @@ -1584,10 +1584,13 @@ static int iio_device_register_sysfs(struct iio_dev *indio_dev) ret = iio_device_register_sysfs_group(indio_dev, &iio_dev_opaque->chan_attr_group); if (ret) - goto error_clear_attrs; + goto error_free_chan_attrs; return 0; +error_free_chan_attrs: + kfree(iio_dev_opaque->chan_attr_group.attrs); + iio_dev_opaque->chan_attr_group.attrs = NULL; error_clear_attrs: iio_free_chan_devattr_list(&iio_dev_opaque->channel_attr_list); From b67f3e653e305abf1471934d7b9fdb9ad2df3eef Mon Sep 17 00:00:00 2001 From: Sam Protsenko <semen.protsenko@linaro.org> Date: Wed, 20 Dec 2023 12:47:53 -0600 Subject: [PATCH 004/304] iio: pressure: bmp280: Add missing bmp085 to SPI id table "bmp085" is missing in bmp280_spi_id[] table, which leads to the next warning in dmesg: SPI driver bmp280 has no spi_device_id for bosch,bmp085 Add "bmp085" to bmp280_spi_id[] by mimicking its existing description in bmp280_of_spi_match[] table to fix the above warning. Signed-off-by: Sam Protsenko <semen.protsenko@linaro.org> Fixes: b26b4e91700f ("iio: pressure: bmp280: add SPI interface driver") Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com> Reviewed-by: Linus Walleij <linus.walleij@linaro.org> Cc: <Stable@vger.kernel.org> Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> --- drivers/iio/pressure/bmp280-spi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/iio/pressure/bmp280-spi.c b/drivers/iio/pressure/bmp280-spi.c index 433d6fac83c4..e8a5fed07e88 100644 --- a/drivers/iio/pressure/bmp280-spi.c +++ b/drivers/iio/pressure/bmp280-spi.c @@ -87,6 +87,7 @@ static const struct of_device_id bmp280_of_spi_match[] = { MODULE_DEVICE_TABLE(of, bmp280_of_spi_match); static const struct spi_device_id bmp280_spi_id[] = { + { "bmp085", (kernel_ulong_t)&bmp180_chip_info }, { "bmp180", (kernel_ulong_t)&bmp180_chip_info }, { "bmp181", (kernel_ulong_t)&bmp180_chip_info }, { "bmp280", (kernel_ulong_t)&bmp280_chip_info }, From 792595bab4925aa06532a14dd256db523eb4fa5e Mon Sep 17 00:00:00 2001 From: "zhili.liu" <zhili.liu@ucas.com.cn> Date: Tue, 2 Jan 2024 09:07:11 +0800 Subject: [PATCH 005/304] iio: magnetometer: rm3100: add boundary check for the value read from RM3100_REG_TMRC Recently, we encounter kernel crash in function rm3100_common_probe caused by out of bound access of array rm3100_samp_rates (because of underlying hardware failures). Add boundary check to prevent out of bound access. Fixes: 121354b2eceb ("iio: magnetometer: Add driver support for PNI RM3100") Suggested-by: Zhouyi Zhou <zhouzhouyi@gmail.com> Signed-off-by: zhili.liu <zhili.liu@ucas.com.cn> Link: https://lore.kernel.org/r/1704157631-3814-1-git-send-email-zhouzhouyi@gmail.com Cc: <Stable@vger.kernel.org> Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> --- drivers/iio/magnetometer/rm3100-core.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/iio/magnetometer/rm3100-core.c b/drivers/iio/magnetometer/rm3100-core.c index 69938204456f..42b70cd42b39 100644 --- a/drivers/iio/magnetometer/rm3100-core.c +++ b/drivers/iio/magnetometer/rm3100-core.c @@ -530,6 +530,7 @@ int rm3100_common_probe(struct device *dev, struct regmap *regmap, int irq) struct rm3100_data *data; unsigned int tmp; int ret; + int samp_rate_index; indio_dev = devm_iio_device_alloc(dev, sizeof(*data)); if (!indio_dev) @@ -586,9 +587,14 @@ int rm3100_common_probe(struct device *dev, struct regmap *regmap, int irq) ret = regmap_read(regmap, RM3100_REG_TMRC, &tmp); if (ret < 0) return ret; + + samp_rate_index = tmp - RM3100_TMRC_OFFSET; + if (samp_rate_index < 0 || samp_rate_index >= RM3100_SAMP_NUM) { + dev_err(dev, "The value read from RM3100_REG_TMRC is invalid!\n"); + return -EINVAL; + } /* Initializing max wait time, which is double conversion time. */ - data->conversion_time = rm3100_samp_rates[tmp - RM3100_TMRC_OFFSET][2] - * 2; + data->conversion_time = rm3100_samp_rates[samp_rate_index][2] * 2; /* Cycle count values may not be what we want. */ if ((tmp - RM3100_TMRC_OFFSET) == 0) From 35ec2d03b282a939949090bd8c39eb37a5856721 Mon Sep 17 00:00:00 2001 From: Randy Dunlap <rdunlap@infradead.org> Date: Wed, 10 Jan 2024 10:56:11 -0800 Subject: [PATCH 006/304] iio: imu: bno055: serdev requires REGMAP There are a ton of build errors when REGMAP is not set, so select REGMAP to fix all of them. Examples (not all of them): ../drivers/iio/imu/bno055/bno055_ser_core.c:495:15: error: variable 'bno055_ser_regmap_bus' has initializer but incomplete type 495 | static struct regmap_bus bno055_ser_regmap_bus = { ../drivers/iio/imu/bno055/bno055_ser_core.c:496:10: error: 'struct regmap_bus' has no member named 'write' 496 | .write = bno055_ser_write_reg, ../drivers/iio/imu/bno055/bno055_ser_core.c:497:10: error: 'struct regmap_bus' has no member named 'read' 497 | .read = bno055_ser_read_reg, ../drivers/iio/imu/bno055/bno055_ser_core.c: In function 'bno055_ser_probe': ../drivers/iio/imu/bno055/bno055_ser_core.c:532:18: error: implicit declaration of function 'devm_regmap_init'; did you mean 'vmem_map_init'? [-Werror=implicit-function-declaration] 532 | regmap = devm_regmap_init(&serdev->dev, &bno055_ser_regmap_bus, ../drivers/iio/imu/bno055/bno055_ser_core.c:532:16: warning: assignment to 'struct regmap *' from 'int' makes pointer from integer without a cast [-Wint-conversion] 532 | regmap = devm_regmap_init(&serdev->dev, &bno055_ser_regmap_bus, ../drivers/iio/imu/bno055/bno055_ser_core.c: At top level: ../drivers/iio/imu/bno055/bno055_ser_core.c:495:26: error: storage size of 'bno055_ser_regmap_bus' isn't known 495 | static struct regmap_bus bno055_ser_regmap_bus = { Fixes: 2eef5a9cc643 ("iio: imu: add BNO055 serdev driver") Signed-off-by: Randy Dunlap <rdunlap@infradead.org> Cc: Andrea Merello <andrea.merello@iit.it> Cc: Jonathan Cameron <jic23@kernel.org> Cc: Lars-Peter Clausen <lars@metafoo.de> Cc: linux-iio@vger.kernel.org Cc: <Stable@vger.kernel.org> Link: https://lore.kernel.org/r/20240110185611.19723-1-rdunlap@infradead.org Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> --- drivers/iio/imu/bno055/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/iio/imu/bno055/Kconfig b/drivers/iio/imu/bno055/Kconfig index 83e53acfbe88..c7f5866a177d 100644 --- a/drivers/iio/imu/bno055/Kconfig +++ b/drivers/iio/imu/bno055/Kconfig @@ -8,6 +8,7 @@ config BOSCH_BNO055 config BOSCH_BNO055_SERIAL tristate "Bosch BNO055 attached via UART" depends on SERIAL_DEV_BUS + select REGMAP select BOSCH_BNO055 help Enable this to support Bosch BNO055 IMUs attached via UART. From f1dfcbaa7b9d131859b0167c428480ae6e7e817d Mon Sep 17 00:00:00 2001 From: Jonathan Cameron <Jonathan.Cameron@huawei.com> Date: Sun, 21 Jan 2024 13:46:16 +0000 Subject: [PATCH 007/304] iio: humidity: hdc3020: Add Makefile, Kconfig and MAINTAINERS entry Something when wrong when applying the original patch and only the c file made it in. Here the rest of the changes are applied. Fixes: c9180b8e39be ("iio: humidity: Add driver for ti HDC302x humidity sensors") Reported-by: Lars-Peter Clausen <lars@metafoo.de> Cc: Javier Carrasco <javier.carrasco.cruz@gmail.com> Cc: Li peiyu <579lpy@gmail.com> Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> Reviewed-by: Lars-Peter Clausen <lars@metafoo.de> --- MAINTAINERS | 8 ++++++++ drivers/iio/humidity/Kconfig | 12 ++++++++++++ drivers/iio/humidity/Makefile | 1 + 3 files changed, 21 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 8d1052fa6a69..1b590e6f6bdd 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -22005,6 +22005,14 @@ F: Documentation/devicetree/bindings/media/i2c/ti,ds90* F: drivers/media/i2c/ds90* F: include/media/i2c/ds90* +TI HDC302X HUMIDITY DRIVER +M: Javier Carrasco <javier.carrasco.cruz@gmail.com> +M: Li peiyu <579lpy@gmail.com> +L: linux-iio@vger.kernel.org +S: Maintained +F: Documentation/devicetree/bindings/iio/humidity/ti,hdc3020.yaml +F: drivers/iio/humidity/hdc3020.c + TI ICSSG ETHERNET DRIVER (ICSSG) R: MD Danish Anwar <danishanwar@ti.com> R: Roger Quadros <rogerq@kernel.org> diff --git a/drivers/iio/humidity/Kconfig b/drivers/iio/humidity/Kconfig index 2de5494e7c22..b15b7a3b66d5 100644 --- a/drivers/iio/humidity/Kconfig +++ b/drivers/iio/humidity/Kconfig @@ -48,6 +48,18 @@ config HDC2010 To compile this driver as a module, choose M here: the module will be called hdc2010. +config HDC3020 + tristate "TI HDC3020 relative humidity and temperature sensor" + depends on I2C + select CRC8 + help + Say yes here to build support for the Texas Instruments + HDC3020, HDC3021 and HDC3022 relative humidity and temperature + sensors. + + To compile this driver as a module, choose M here: the module + will be called hdc3020. + config HID_SENSOR_HUMIDITY tristate "HID Environmental humidity sensor" depends on HID_SENSOR_HUB diff --git a/drivers/iio/humidity/Makefile b/drivers/iio/humidity/Makefile index f19ff3de97c5..5fbeef299f61 100644 --- a/drivers/iio/humidity/Makefile +++ b/drivers/iio/humidity/Makefile @@ -7,6 +7,7 @@ obj-$(CONFIG_AM2315) += am2315.o obj-$(CONFIG_DHT11) += dht11.o obj-$(CONFIG_HDC100X) += hdc100x.o obj-$(CONFIG_HDC2010) += hdc2010.o +obj-$(CONFIG_HDC3020) += hdc3020.o obj-$(CONFIG_HID_SENSOR_HUMIDITY) += hid-sensor-humidity.o hts221-y := hts221_core.o \ From 8e98b87f515d8c4bae521048a037b2cc431c3fd5 Mon Sep 17 00:00:00 2001 From: Nuno Sa <nuno.sa@analog.com> Date: Wed, 17 Jan 2024 14:10:49 +0100 Subject: [PATCH 008/304] iio: imu: adis: ensure proper DMA alignment Aligning the buffer to the L1 cache is not sufficient in some platforms as they might have larger cacheline sizes for caches after L1 and thus, we can't guarantee DMA safety. That was the whole reason to introduce IIO_DMA_MINALIGN in [1]. Do the same for the sigma_delta ADCs. [1]: https://lore.kernel.org/linux-iio/20220508175712.647246-2-jic23@kernel.org/ Fixes: ccd2b52f4ac6 ("staging:iio: Add common ADIS library") Signed-off-by: Nuno Sa <nuno.sa@analog.com> Link: https://lore.kernel.org/r/20240117-adis-improv-v1-1-7f90e9fad200@analog.com Cc: <Stable@vger.kernel.org> Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> --- include/linux/iio/imu/adis.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/linux/iio/imu/adis.h b/include/linux/iio/imu/adis.h index dc9ea299e088..8898966bc0f0 100644 --- a/include/linux/iio/imu/adis.h +++ b/include/linux/iio/imu/adis.h @@ -11,6 +11,7 @@ #include <linux/spi/spi.h> #include <linux/interrupt.h> +#include <linux/iio/iio.h> #include <linux/iio/types.h> #define ADIS_WRITE_REG(reg) ((0x80 | (reg))) @@ -131,7 +132,7 @@ struct adis { unsigned long irq_flag; void *buffer; - u8 tx[10] ____cacheline_aligned; + u8 tx[10] __aligned(IIO_DMA_MINALIGN); u8 rx[4]; }; From 59598510be1d49e1cff7fd7593293bb8e1b2398b Mon Sep 17 00:00:00 2001 From: Nuno Sa <nuno.sa@analog.com> Date: Wed, 17 Jan 2024 13:41:03 +0100 Subject: [PATCH 009/304] iio: adc: ad_sigma_delta: ensure proper DMA alignment Aligning the buffer to the L1 cache is not sufficient in some platforms as they might have larger cacheline sizes for caches after L1 and thus, we can't guarantee DMA safety. That was the whole reason to introduce IIO_DMA_MINALIGN in [1]. Do the same for the sigma_delta ADCs. [1]: https://lore.kernel.org/linux-iio/20220508175712.647246-2-jic23@kernel.org/ Fixes: 0fb6ee8d0b5e ("iio: ad_sigma_delta: Don't put SPI transfer buffer on the stack") Signed-off-by: Nuno Sa <nuno.sa@analog.com> Link: https://lore.kernel.org/r/20240117-dev_sigma_delta_no_irq_flags-v1-1-db39261592cf@analog.com Cc: <Stable@vger.kernel.org> Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> --- include/linux/iio/adc/ad_sigma_delta.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/include/linux/iio/adc/ad_sigma_delta.h b/include/linux/iio/adc/ad_sigma_delta.h index 7852f6c9a714..719cf9cc6e1a 100644 --- a/include/linux/iio/adc/ad_sigma_delta.h +++ b/include/linux/iio/adc/ad_sigma_delta.h @@ -8,6 +8,8 @@ #ifndef __AD_SIGMA_DELTA_H__ #define __AD_SIGMA_DELTA_H__ +#include <linux/iio/iio.h> + enum ad_sigma_delta_mode { AD_SD_MODE_CONTINUOUS = 0, AD_SD_MODE_SINGLE = 1, @@ -99,7 +101,7 @@ struct ad_sigma_delta { * 'rx_buf' is up to 32 bits per sample + 64 bit timestamp, * rounded to 16 bytes to take into account padding. */ - uint8_t tx_buf[4] ____cacheline_aligned; + uint8_t tx_buf[4] __aligned(IIO_DMA_MINALIGN); uint8_t rx_buf[16] __aligned(8); }; From 9c46e3a5232d855a65f440b298a2a66497f799d2 Mon Sep 17 00:00:00 2001 From: Dan Carpenter <dan.carpenter@linaro.org> Date: Mon, 8 Jan 2024 12:07:02 +0300 Subject: [PATCH 010/304] iio: adc: ad7091r8: Fix error code in ad7091r8_gpio_setup() There is a copy and paste error so it accidentally returns ->convst_gpio instead of ->reset_gpio. Fix it. Fixes: 0b76ff46c463 ("iio: adc: Add support for AD7091R-8") Signed-off-by: Dan Carpenter <dan.carpenter@linaro.org> Reviewed-by: Marcelo Schmitt <marcelo.schmitt1@gmail.com> Link: https://lore.kernel.org/r/fd905ad0-6413-489c-9a3b-90c0cdb35ec9@moroto.mountain Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> --- drivers/iio/adc/ad7091r8.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/adc/ad7091r8.c b/drivers/iio/adc/ad7091r8.c index 57700f124803..700564305057 100644 --- a/drivers/iio/adc/ad7091r8.c +++ b/drivers/iio/adc/ad7091r8.c @@ -195,7 +195,7 @@ static int ad7091r8_gpio_setup(struct ad7091r_state *st) st->reset_gpio = devm_gpiod_get_optional(st->dev, "reset", GPIOD_OUT_HIGH); if (IS_ERR(st->reset_gpio)) - return dev_err_probe(st->dev, PTR_ERR(st->convst_gpio), + return dev_err_probe(st->dev, PTR_ERR(st->reset_gpio), "Error on requesting reset GPIO\n"); if (st->reset_gpio) { From 809aa64ebff51eb170ee31a95f83b2d21efa32e2 Mon Sep 17 00:00:00 2001 From: Zhipeng Lu <alexious@zju.edu.cn> Date: Fri, 12 Jan 2024 16:55:23 +0800 Subject: [PATCH 011/304] IB/hfi1: Fix a memleak in init_credit_return When dma_alloc_coherent fails to allocate dd->cr_base[i].va, init_credit_return should deallocate dd->cr_base and dd->cr_base[i] that allocated before. Or those resources would be never freed and a memleak is triggered. Fixes: 7724105686e7 ("IB/hfi1: add driver files") Signed-off-by: Zhipeng Lu <alexious@zju.edu.cn> Link: https://lore.kernel.org/r/20240112085523.3731720-1-alexious@zju.edu.cn Acked-by: Dennis Dalessandro <dennis.dalessandro@cornelisnetworks.com> Signed-off-by: Leon Romanovsky <leon@kernel.org> --- drivers/infiniband/hw/hfi1/pio.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hfi1/pio.c b/drivers/infiniband/hw/hfi1/pio.c index 68c621ff59d0..5a91cbda4aee 100644 --- a/drivers/infiniband/hw/hfi1/pio.c +++ b/drivers/infiniband/hw/hfi1/pio.c @@ -2086,7 +2086,7 @@ int init_credit_return(struct hfi1_devdata *dd) "Unable to allocate credit return DMA range for NUMA %d\n", i); ret = -ENOMEM; - goto done; + goto free_cr_base; } } set_dev_node(&dd->pcidev->dev, dd->node); @@ -2094,6 +2094,10 @@ int init_credit_return(struct hfi1_devdata *dd) ret = 0; done: return ret; + +free_cr_base: + free_credit_return(dd); + goto done; } void free_credit_return(struct hfi1_devdata *dd) From 282fd66e2ef6e5d72b8fcd77efb2b282d2569464 Mon Sep 17 00:00:00 2001 From: Kalesh AP <kalesh-anakkur.purayil@broadcom.com> Date: Mon, 22 Jan 2024 20:54:33 -0800 Subject: [PATCH 012/304] RDMA/bnxt_re: Avoid creating fence MR for newer adapters Limit the usage of fence MR to adapters older than Gen P5 products. Fixes: 1801d87b3598 ("RDMA/bnxt_re: Support new 5760X P7 devices") Signed-off-by: Kashyap Desai <kashyap.desai@broadcom.com> Signed-off-by: Bhargava Chenna Marreddy <bhargava.marreddy@broadcom.com> Signed-off-by: Kalesh AP <kalesh-anakkur.purayil@broadcom.com> Signed-off-by: Selvin Xavier <selvin.xavier@broadcom.com> Link: https://lore.kernel.org/r/1705985677-15551-2-git-send-email-selvin.xavier@broadcom.com Signed-off-by: Leon Romanovsky <leon@kernel.org> --- drivers/infiniband/hw/bnxt_re/ib_verbs.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index 824349659d69..e1ea49263145 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -401,6 +401,10 @@ static void bnxt_re_create_fence_wqe(struct bnxt_re_pd *pd) struct bnxt_re_fence_data *fence = &pd->fence; struct ib_mr *ib_mr = &fence->mr->ib_mr; struct bnxt_qplib_swqe *wqe = &fence->bind_wqe; + struct bnxt_re_dev *rdev = pd->rdev; + + if (bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx)) + return; memset(wqe, 0, sizeof(*wqe)); wqe->type = BNXT_QPLIB_SWQE_TYPE_BIND_MW; @@ -455,6 +459,9 @@ static void bnxt_re_destroy_fence_mr(struct bnxt_re_pd *pd) struct device *dev = &rdev->en_dev->pdev->dev; struct bnxt_re_mr *mr = fence->mr; + if (bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx)) + return; + if (fence->mw) { bnxt_re_dealloc_mw(fence->mw); fence->mw = NULL; @@ -486,6 +493,9 @@ static int bnxt_re_create_fence_mr(struct bnxt_re_pd *pd) struct ib_mw *mw; int rc; + if (bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx)) + return 0; + dma_addr = dma_map_single(dev, fence->va, BNXT_RE_FENCE_BYTES, DMA_BIDIRECTIONAL); rc = dma_mapping_error(dev, dma_addr); From 8fcbf0a55f71be7e562f10222abc9a34148189d0 Mon Sep 17 00:00:00 2001 From: Kalesh AP <kalesh-anakkur.purayil@broadcom.com> Date: Mon, 22 Jan 2024 20:54:34 -0800 Subject: [PATCH 013/304] RDMA/bnxt_re: Remove a redundant check inside bnxt_re_vf_res_config After the cited commit, there is no possibility that this check can return true. Remove it. Fixes: a43c26fa2e6c ("RDMA/bnxt_re: Remove the sriov config callback") Signed-off-by: Kalesh AP <kalesh-anakkur.purayil@broadcom.com> Signed-off-by: Selvin Xavier <selvin.xavier@broadcom.com> Link: https://lore.kernel.org/r/1705985677-15551-3-git-send-email-selvin.xavier@broadcom.com Signed-off-by: Leon Romanovsky <leon@kernel.org> --- drivers/infiniband/hw/bnxt_re/main.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index f022c922fae5..54b4d2f3a5d8 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -280,9 +280,6 @@ static void bnxt_re_set_resource_limits(struct bnxt_re_dev *rdev) static void bnxt_re_vf_res_config(struct bnxt_re_dev *rdev) { - - if (test_bit(BNXT_RE_FLAG_ERR_DEVICE_DETACHED, &rdev->flags)) - return; rdev->num_vfs = pci_sriov_get_totalvfs(rdev->en_dev->pdev); if (!bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx)) { bnxt_re_set_resource_limits(rdev); From 8eaca6b5997bd8fd7039f2693e4ecf112823c816 Mon Sep 17 00:00:00 2001 From: Kalesh AP <kalesh-anakkur.purayil@broadcom.com> Date: Mon, 22 Jan 2024 20:54:35 -0800 Subject: [PATCH 014/304] RDMA/bnxt_re: Fix unconditional fence for newer adapters Older adapters required an unconditional fence for non-wire memory operations. Newer adapters doesn't require this and therefore, disabling the unconditional fence. Fixes: 1801d87b3598 ("RDMA/bnxt_re: Support new 5760X P7 devices") Signed-off-by: Kashyap Desai <kashyap.desai@broadcom.com> Signed-off-by: Kalesh AP <kalesh-anakkur.purayil@broadcom.com> Signed-off-by: Selvin Xavier <selvin.xavier@broadcom.com> Link: https://lore.kernel.org/r/1705985677-15551-4-git-send-email-selvin.xavier@broadcom.com Signed-off-by: Leon Romanovsky <leon@kernel.org> --- drivers/infiniband/hw/bnxt_re/ib_verbs.c | 28 ++++++++++++++---------- 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index e1ea49263145..fdb4fde1acbe 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -2566,11 +2566,6 @@ static int bnxt_re_build_inv_wqe(const struct ib_send_wr *wr, wqe->type = BNXT_QPLIB_SWQE_TYPE_LOCAL_INV; wqe->local_inv.inv_l_key = wr->ex.invalidate_rkey; - /* Need unconditional fence for local invalidate - * opcode to work as expected. - */ - wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_UC_FENCE; - if (wr->send_flags & IB_SEND_SIGNALED) wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_SIGNAL_COMP; if (wr->send_flags & IB_SEND_SOLICITED) @@ -2593,12 +2588,6 @@ static int bnxt_re_build_reg_wqe(const struct ib_reg_wr *wr, wqe->frmr.levels = qplib_frpl->hwq.level; wqe->type = BNXT_QPLIB_SWQE_TYPE_REG_MR; - /* Need unconditional fence for reg_mr - * opcode to function as expected. - */ - - wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_UC_FENCE; - if (wr->wr.send_flags & IB_SEND_SIGNALED) wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_SIGNAL_COMP; @@ -2729,6 +2718,18 @@ bad: return rc; } +static void bnxt_re_legacy_set_uc_fence(struct bnxt_qplib_swqe *wqe) +{ + /* Need unconditional fence for non-wire memory opcode + * to work as expected. + */ + if (wqe->type == BNXT_QPLIB_SWQE_TYPE_LOCAL_INV || + wqe->type == BNXT_QPLIB_SWQE_TYPE_FAST_REG_MR || + wqe->type == BNXT_QPLIB_SWQE_TYPE_REG_MR || + wqe->type == BNXT_QPLIB_SWQE_TYPE_BIND_MW) + wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_UC_FENCE; +} + int bnxt_re_post_send(struct ib_qp *ib_qp, const struct ib_send_wr *wr, const struct ib_send_wr **bad_wr) { @@ -2808,8 +2809,11 @@ int bnxt_re_post_send(struct ib_qp *ib_qp, const struct ib_send_wr *wr, rc = -EINVAL; goto bad; } - if (!rc) + if (!rc) { + if (!bnxt_qplib_is_chip_gen_p5_p7(qp->rdev->chip_ctx)) + bnxt_re_legacy_set_uc_fence(&wqe); rc = bnxt_qplib_post_send(&qp->qplib_qp, &wqe); + } bad: if (rc) { ibdev_err(&qp->rdev->ibdev, From 3687b450c5f32e80f179ce4b09e0454da1449eac Mon Sep 17 00:00:00 2001 From: Kalesh AP <kalesh-anakkur.purayil@broadcom.com> Date: Mon, 22 Jan 2024 20:54:36 -0800 Subject: [PATCH 015/304] RDMA/bnxt_re: Return error for SRQ resize SRQ resize is not supported in the driver. But driver is not returning error from bnxt_re_modify_srq() for SRQ resize. Fixes: 37cb11acf1f7 ("RDMA/bnxt_re: Add SRQ support for Broadcom adapters") Signed-off-by: Kalesh AP <kalesh-anakkur.purayil@broadcom.com> Signed-off-by: Selvin Xavier <selvin.xavier@broadcom.com> Link: https://lore.kernel.org/r/1705985677-15551-5-git-send-email-selvin.xavier@broadcom.com Signed-off-by: Leon Romanovsky <leon@kernel.org> --- drivers/infiniband/hw/bnxt_re/ib_verbs.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index fdb4fde1acbe..ce9c5bae83bf 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -1827,7 +1827,7 @@ int bnxt_re_modify_srq(struct ib_srq *ib_srq, struct ib_srq_attr *srq_attr, switch (srq_attr_mask) { case IB_SRQ_MAX_WR: /* SRQ resize is not supported */ - break; + return -EINVAL; case IB_SRQ_LIMIT: /* Change the SRQ threshold */ if (srq_attr->srq_limit > srq->qplib_srq.max_wqe) @@ -1842,13 +1842,12 @@ int bnxt_re_modify_srq(struct ib_srq *ib_srq, struct ib_srq_attr *srq_attr, /* On success, update the shadow */ srq->srq_limit = srq_attr->srq_limit; /* No need to Build and send response back to udata */ - break; + return 0; default: ibdev_err(&rdev->ibdev, "Unsupported srq_attr_mask 0x%x", srq_attr_mask); return -EINVAL; } - return 0; } int bnxt_re_query_srq(struct ib_srq *ib_srq, struct ib_srq_attr *srq_attr) From 80dde187f734cf9ccf988d5c2ef1a46b990660fd Mon Sep 17 00:00:00 2001 From: Kalesh AP <kalesh-anakkur.purayil@broadcom.com> Date: Mon, 22 Jan 2024 20:54:37 -0800 Subject: [PATCH 016/304] RDMA/bnxt_re: Add a missing check in bnxt_qplib_query_srq Before populating the response, driver has to check the status of HWRM command. Fixes: 37cb11acf1f7 ("RDMA/bnxt_re: Add SRQ support for Broadcom adapters") Signed-off-by: Kalesh AP <kalesh-anakkur.purayil@broadcom.com> Signed-off-by: Selvin Xavier <selvin.xavier@broadcom.com> Link: https://lore.kernel.org/r/1705985677-15551-6-git-send-email-selvin.xavier@broadcom.com Signed-off-by: Leon Romanovsky <leon@kernel.org> --- drivers/infiniband/hw/bnxt_re/qplib_fp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c index c98e04fe2ddd..439d0c7c5d0c 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_fp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c @@ -744,7 +744,8 @@ int bnxt_qplib_query_srq(struct bnxt_qplib_res *res, bnxt_qplib_fill_cmdqmsg(&msg, &req, &resp, &sbuf, sizeof(req), sizeof(resp), 0); rc = bnxt_qplib_rcfw_send_message(rcfw, &msg); - srq->threshold = le16_to_cpu(sb->srq_limit); + if (!rc) + srq->threshold = le16_to_cpu(sb->srq_limit); dma_free_coherent(&rcfw->pdev->dev, sbuf.size, sbuf.sb, sbuf.dma_addr); From a69eeaad093dd2c8fd0b216c8143b380b73d672d Mon Sep 17 00:00:00 2001 From: Dimitri Fedrau <dima.fedrau@gmail.com> Date: Fri, 26 Jan 2024 14:52:26 +0100 Subject: [PATCH 017/304] iio: humidity: hdc3020: fix temperature offset The temperature offset should be negative according to the datasheet. Adding a minus to the existing offset results in correct temperature calculations. Fixes: c9180b8e39be ("iio: humidity: Add driver for ti HDC302x humidity sensors") Reviewed-by: Nuno Sa <nuno.sa@analog.com> Signed-off-by: Dimitri Fedrau <dima.fedrau@gmail.com> Link: https://lore.kernel.org/r/20240126135226.3977904-1-dima.fedrau@gmail.com Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> --- drivers/iio/humidity/hdc3020.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/humidity/hdc3020.c b/drivers/iio/humidity/hdc3020.c index 4e3311170725..ed70415512f6 100644 --- a/drivers/iio/humidity/hdc3020.c +++ b/drivers/iio/humidity/hdc3020.c @@ -322,7 +322,7 @@ static int hdc3020_read_raw(struct iio_dev *indio_dev, if (chan->type != IIO_TEMP) return -EINVAL; - *val = 16852; + *val = -16852; return IIO_VAL_INT; default: From 6db053cd949fcd6254cea9f2cd5d39f7bd64379c Mon Sep 17 00:00:00 2001 From: David Schiller <david.schiller@jku.at> Date: Mon, 22 Jan 2024 14:49:17 +0100 Subject: [PATCH 018/304] staging: iio: ad5933: fix type mismatch regression Commit 4c3577db3e4f ("Staging: iio: impedance-analyzer: Fix sparse warning") fixed a compiler warning, but introduced a bug that resulted in one of the two 16 bit IIO channels always being zero (when both are enabled). This is because int is 32 bits wide on most architectures and in the case of a little-endian machine the two most significant bytes would occupy the buffer for the second channel as 'val' is being passed as a void pointer to 'iio_push_to_buffers()'. Fix by defining 'val' as u16. Tested working on ARM64. Fixes: 4c3577db3e4f ("Staging: iio: impedance-analyzer: Fix sparse warning") Signed-off-by: David Schiller <david.schiller@jku.at> Link: https://lore.kernel.org/r/20240122134916.2137957-1-david.schiller@jku.at Cc: <Stable@vger.kernel.org> Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> --- drivers/staging/iio/impedance-analyzer/ad5933.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/iio/impedance-analyzer/ad5933.c b/drivers/staging/iio/impedance-analyzer/ad5933.c index e748a5d04e97..9149d41fe65b 100644 --- a/drivers/staging/iio/impedance-analyzer/ad5933.c +++ b/drivers/staging/iio/impedance-analyzer/ad5933.c @@ -608,7 +608,7 @@ static void ad5933_work(struct work_struct *work) struct ad5933_state, work.work); struct iio_dev *indio_dev = i2c_get_clientdata(st->client); __be16 buf[2]; - int val[2]; + u16 val[2]; unsigned char status; int ret; From 6f6c72acddf4357fcc83593c20ef9064fb42db92 Mon Sep 17 00:00:00 2001 From: Javier Carrasco <javier.carrasco.cruz@gmail.com> Date: Sat, 27 Jan 2024 21:02:08 +0100 Subject: [PATCH 019/304] iio: move LIGHT_UVA and LIGHT_UVB to the end of iio_modifier The new modifiers should have added to the end of the enum, so they do not affect the existing entries. No modifiers were added since then, so they can be moved safely to the end of the list. Move IIO_MOD_LIGHT_UVA and IIO_MOD_LIGHT_UVB to the end of iio_modifier. Fixes: b89710bd215e ("iio: add modifiers for A and B ultraviolet light") Suggested-by: Paul Cercueil <paul@crapouillou.net> Signed-off-by: Javier Carrasco <javier.carrasco.cruz@gmail.com> Link: https://lore.kernel.org/r/20240127200208.185815-1-javier.carrasco.cruz@gmail.com Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> --- include/uapi/linux/iio/types.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/uapi/linux/iio/types.h b/include/uapi/linux/iio/types.h index 5060963707b1..f2e0b2d50e6b 100644 --- a/include/uapi/linux/iio/types.h +++ b/include/uapi/linux/iio/types.h @@ -91,8 +91,6 @@ enum iio_modifier { IIO_MOD_CO2, IIO_MOD_VOC, IIO_MOD_LIGHT_UV, - IIO_MOD_LIGHT_UVA, - IIO_MOD_LIGHT_UVB, IIO_MOD_LIGHT_DUV, IIO_MOD_PM1, IIO_MOD_PM2P5, @@ -107,6 +105,8 @@ enum iio_modifier { IIO_MOD_PITCH, IIO_MOD_YAW, IIO_MOD_ROLL, + IIO_MOD_LIGHT_UVA, + IIO_MOD_LIGHT_UVB, }; enum iio_event_type { From ec4d82f855ce332de26fe080892483de98cc1a19 Mon Sep 17 00:00:00 2001 From: Mohammad Rahimi <rahimi.mhmmd@gmail.com> Date: Sat, 27 Jan 2024 11:26:28 +0800 Subject: [PATCH 020/304] thunderbolt: Fix setting the CNS bit in ROUTER_CS_5 The bit 23, CM TBT3 Not Supported (CNS), in ROUTER_CS_5 indicates whether a USB4 Connection Manager is TBT3-Compatible and should be: 0b for TBT3-Compatible 1b for Not TBT3-Compatible Fixes: b04079837b20 ("thunderbolt: Add initial support for USB4") Cc: stable@vger.kernel.org Signed-off-by: Mohammad Rahimi <rahimi.mhmmd@gmail.com> Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com> --- drivers/thunderbolt/tb_regs.h | 2 +- drivers/thunderbolt/usb4.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/thunderbolt/tb_regs.h b/drivers/thunderbolt/tb_regs.h index 87e4795275fe..6f798f6a2b84 100644 --- a/drivers/thunderbolt/tb_regs.h +++ b/drivers/thunderbolt/tb_regs.h @@ -203,7 +203,7 @@ struct tb_regs_switch_header { #define ROUTER_CS_5_WOP BIT(1) #define ROUTER_CS_5_WOU BIT(2) #define ROUTER_CS_5_WOD BIT(3) -#define ROUTER_CS_5_C3S BIT(23) +#define ROUTER_CS_5_CNS BIT(23) #define ROUTER_CS_5_PTO BIT(24) #define ROUTER_CS_5_UTO BIT(25) #define ROUTER_CS_5_HCO BIT(26) diff --git a/drivers/thunderbolt/usb4.c b/drivers/thunderbolt/usb4.c index f8f0d24ff6e4..1515eff8cc3e 100644 --- a/drivers/thunderbolt/usb4.c +++ b/drivers/thunderbolt/usb4.c @@ -290,7 +290,7 @@ int usb4_switch_setup(struct tb_switch *sw) } /* TBT3 supported by the CM */ - val |= ROUTER_CS_5_C3S; + val &= ~ROUTER_CS_5_CNS; return tb_sw_write(sw, &val, TB_CFG_SWITCH, ROUTER_CS_5, 1); } From 02add85a9eefb5c969b9bb6916f14607ca2faae0 Mon Sep 17 00:00:00 2001 From: Sean Christopherson <seanjc@google.com> Date: Wed, 29 Nov 2023 14:40:41 -0800 Subject: [PATCH 021/304] KVM: selftests: Reword the NX hugepage test's skip message to be more helpful Rework the NX hugepage test's skip message regarding the magic token to provide all of the necessary magic, and to very explicitly recommended using the wrapper shell script. Opportunistically remove an overzealous newline; splitting the recommendation message across two lines of ~45 characters makes it much harder to read than running out a single line to 98 characters. Link: https://lore.kernel.org/r/20231129224042.530798-1-seanjc@google.com Signed-off-by: Sean Christopherson <seanjc@google.com> --- tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c b/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c index 83e25bccc139..17bbb96fc4df 100644 --- a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c +++ b/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c @@ -257,9 +257,9 @@ int main(int argc, char **argv) TEST_REQUIRE(kvm_has_cap(KVM_CAP_VM_DISABLE_NX_HUGE_PAGES)); __TEST_REQUIRE(token == MAGIC_TOKEN, - "This test must be run with the magic token %d.\n" - "This is done by nx_huge_pages_test.sh, which\n" - "also handles environment setup for the test.", MAGIC_TOKEN); + "This test must be run with the magic token via '-t %d'.\n" + "Running via nx_huge_pages_test.sh, which also handles " + "environment setup, is strongly recommended.", MAGIC_TOKEN); run_test(reclaim_period_ms, false, reboot_permissions); run_test(reclaim_period_ms, true, reboot_permissions); From 250e138d876838774bca3140d544438898df0489 Mon Sep 17 00:00:00 2001 From: Andrew Jones <ajones@ventanamicro.com> Date: Wed, 6 Dec 2023 18:02:43 +0100 Subject: [PATCH 022/304] KVM: selftests: Remove redundant newlines TEST_* functions append their own newline. Remove newlines from TEST_* callsites to avoid extra newlines in output. Signed-off-by: Andrew Jones <ajones@ventanamicro.com> Link: https://lore.kernel.org/r/20231206170241.82801-8-ajones@ventanamicro.com Signed-off-by: Sean Christopherson <seanjc@google.com> --- tools/testing/selftests/kvm/demand_paging_test.c | 4 ++-- .../testing/selftests/kvm/dirty_log_perf_test.c | 4 ++-- tools/testing/selftests/kvm/dirty_log_test.c | 4 ++-- tools/testing/selftests/kvm/get-reg-list.c | 2 +- tools/testing/selftests/kvm/guest_print_test.c | 8 ++++---- .../selftests/kvm/hardware_disable_test.c | 6 +++--- .../testing/selftests/kvm/kvm_create_max_vcpus.c | 2 +- .../testing/selftests/kvm/kvm_page_table_test.c | 4 ++-- tools/testing/selftests/kvm/lib/elf.c | 2 +- tools/testing/selftests/kvm/lib/kvm_util.c | 16 ++++++++-------- tools/testing/selftests/kvm/lib/memstress.c | 2 +- .../testing/selftests/kvm/lib/userfaultfd_util.c | 2 +- .../kvm/memslot_modification_stress_test.c | 2 +- tools/testing/selftests/kvm/memslot_perf_test.c | 6 +++--- tools/testing/selftests/kvm/rseq_test.c | 4 ++-- .../selftests/kvm/set_memory_region_test.c | 6 +++--- .../selftests/kvm/system_counter_offset_test.c | 2 +- 17 files changed, 38 insertions(+), 38 deletions(-) diff --git a/tools/testing/selftests/kvm/demand_paging_test.c b/tools/testing/selftests/kvm/demand_paging_test.c index 09c116a82a84..bf3609f71854 100644 --- a/tools/testing/selftests/kvm/demand_paging_test.c +++ b/tools/testing/selftests/kvm/demand_paging_test.c @@ -45,10 +45,10 @@ static void vcpu_worker(struct memstress_vcpu_args *vcpu_args) /* Let the guest access its memory */ ret = _vcpu_run(vcpu); - TEST_ASSERT(ret == 0, "vcpu_run failed: %d\n", ret); + TEST_ASSERT(ret == 0, "vcpu_run failed: %d", ret); if (get_ucall(vcpu, NULL) != UCALL_SYNC) { TEST_ASSERT(false, - "Invalid guest sync status: exit_reason=%s\n", + "Invalid guest sync status: exit_reason=%s", exit_reason_str(run->exit_reason)); } diff --git a/tools/testing/selftests/kvm/dirty_log_perf_test.c b/tools/testing/selftests/kvm/dirty_log_perf_test.c index d374dbcf9a53..504f6fe980e8 100644 --- a/tools/testing/selftests/kvm/dirty_log_perf_test.c +++ b/tools/testing/selftests/kvm/dirty_log_perf_test.c @@ -88,9 +88,9 @@ static void vcpu_worker(struct memstress_vcpu_args *vcpu_args) ret = _vcpu_run(vcpu); ts_diff = timespec_elapsed(start); - TEST_ASSERT(ret == 0, "vcpu_run failed: %d\n", ret); + TEST_ASSERT(ret == 0, "vcpu_run failed: %d", ret); TEST_ASSERT(get_ucall(vcpu, NULL) == UCALL_SYNC, - "Invalid guest sync status: exit_reason=%s\n", + "Invalid guest sync status: exit_reason=%s", exit_reason_str(run->exit_reason)); pr_debug("Got sync event from vCPU %d\n", vcpu_idx); diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c index 6cbecf499767..babea97b31a4 100644 --- a/tools/testing/selftests/kvm/dirty_log_test.c +++ b/tools/testing/selftests/kvm/dirty_log_test.c @@ -262,7 +262,7 @@ static void default_after_vcpu_run(struct kvm_vcpu *vcpu, int ret, int err) "vcpu run failed: errno=%d", err); TEST_ASSERT(get_ucall(vcpu, NULL) == UCALL_SYNC, - "Invalid guest sync status: exit_reason=%s\n", + "Invalid guest sync status: exit_reason=%s", exit_reason_str(run->exit_reason)); vcpu_handle_sync_stop(); @@ -410,7 +410,7 @@ static void dirty_ring_after_vcpu_run(struct kvm_vcpu *vcpu, int ret, int err) pr_info("vcpu continues now.\n"); } else { TEST_ASSERT(false, "Invalid guest sync status: " - "exit_reason=%s\n", + "exit_reason=%s", exit_reason_str(run->exit_reason)); } } diff --git a/tools/testing/selftests/kvm/get-reg-list.c b/tools/testing/selftests/kvm/get-reg-list.c index 8274ef04301f..91f05f78e824 100644 --- a/tools/testing/selftests/kvm/get-reg-list.c +++ b/tools/testing/selftests/kvm/get-reg-list.c @@ -152,7 +152,7 @@ static void check_supported(struct vcpu_reg_list *c) continue; __TEST_REQUIRE(kvm_has_cap(s->capability), - "%s: %s not available, skipping tests\n", + "%s: %s not available, skipping tests", config_name(c), s->name); } } diff --git a/tools/testing/selftests/kvm/guest_print_test.c b/tools/testing/selftests/kvm/guest_print_test.c index 41230b746190..3502caa3590c 100644 --- a/tools/testing/selftests/kvm/guest_print_test.c +++ b/tools/testing/selftests/kvm/guest_print_test.c @@ -98,7 +98,7 @@ static void ucall_abort(const char *assert_msg, const char *expected_assert_msg) int offset = len_str - len_substr; TEST_ASSERT(len_substr <= len_str, - "Expected '%s' to be a substring of '%s'\n", + "Expected '%s' to be a substring of '%s'", assert_msg, expected_assert_msg); TEST_ASSERT(strcmp(&assert_msg[offset], expected_assert_msg) == 0, @@ -116,7 +116,7 @@ static void run_test(struct kvm_vcpu *vcpu, const char *expected_printf, vcpu_run(vcpu); TEST_ASSERT(run->exit_reason == UCALL_EXIT_REASON, - "Unexpected exit reason: %u (%s),\n", + "Unexpected exit reason: %u (%s),", run->exit_reason, exit_reason_str(run->exit_reason)); switch (get_ucall(vcpu, &uc)) { @@ -161,11 +161,11 @@ static void test_limits(void) vcpu_run(vcpu); TEST_ASSERT(run->exit_reason == UCALL_EXIT_REASON, - "Unexpected exit reason: %u (%s),\n", + "Unexpected exit reason: %u (%s),", run->exit_reason, exit_reason_str(run->exit_reason)); TEST_ASSERT(get_ucall(vcpu, &uc) == UCALL_ABORT, - "Unexpected ucall command: %lu, Expected: %u (UCALL_ABORT)\n", + "Unexpected ucall command: %lu, Expected: %u (UCALL_ABORT)", uc.cmd, UCALL_ABORT); kvm_vm_free(vm); diff --git a/tools/testing/selftests/kvm/hardware_disable_test.c b/tools/testing/selftests/kvm/hardware_disable_test.c index f5d59b9934f1..decc521fc760 100644 --- a/tools/testing/selftests/kvm/hardware_disable_test.c +++ b/tools/testing/selftests/kvm/hardware_disable_test.c @@ -41,7 +41,7 @@ static void *run_vcpu(void *arg) vcpu_run(vcpu); - TEST_ASSERT(false, "%s: exited with reason %d: %s\n", + TEST_ASSERT(false, "%s: exited with reason %d: %s", __func__, run->exit_reason, exit_reason_str(run->exit_reason)); pthread_exit(NULL); @@ -55,7 +55,7 @@ static void *sleeping_thread(void *arg) fd = open("/dev/null", O_RDWR); close(fd); } - TEST_ASSERT(false, "%s: exited\n", __func__); + TEST_ASSERT(false, "%s: exited", __func__); pthread_exit(NULL); } @@ -118,7 +118,7 @@ static void run_test(uint32_t run) for (i = 0; i < VCPU_NUM; ++i) check_join(threads[i], &b); /* Should not be reached */ - TEST_ASSERT(false, "%s: [%d] child escaped the ninja\n", __func__, run); + TEST_ASSERT(false, "%s: [%d] child escaped the ninja", __func__, run); } void wait_for_child_setup(pid_t pid) diff --git a/tools/testing/selftests/kvm/kvm_create_max_vcpus.c b/tools/testing/selftests/kvm/kvm_create_max_vcpus.c index 31b3cb24b9a7..b9e23265e4b3 100644 --- a/tools/testing/selftests/kvm/kvm_create_max_vcpus.c +++ b/tools/testing/selftests/kvm/kvm_create_max_vcpus.c @@ -65,7 +65,7 @@ int main(int argc, char *argv[]) int r = setrlimit(RLIMIT_NOFILE, &rl); __TEST_REQUIRE(r >= 0, - "RLIMIT_NOFILE hard limit is too low (%d, wanted %d)\n", + "RLIMIT_NOFILE hard limit is too low (%d, wanted %d)", old_rlim_max, nr_fds_wanted); } else { TEST_ASSERT(!setrlimit(RLIMIT_NOFILE, &rl), "setrlimit() failed!"); diff --git a/tools/testing/selftests/kvm/kvm_page_table_test.c b/tools/testing/selftests/kvm/kvm_page_table_test.c index e37dc9c21888..e0ba97ac1c56 100644 --- a/tools/testing/selftests/kvm/kvm_page_table_test.c +++ b/tools/testing/selftests/kvm/kvm_page_table_test.c @@ -204,9 +204,9 @@ static void *vcpu_worker(void *data) ret = _vcpu_run(vcpu); ts_diff = timespec_elapsed(start); - TEST_ASSERT(ret == 0, "vcpu_run failed: %d\n", ret); + TEST_ASSERT(ret == 0, "vcpu_run failed: %d", ret); TEST_ASSERT(get_ucall(vcpu, NULL) == UCALL_SYNC, - "Invalid guest sync status: exit_reason=%s\n", + "Invalid guest sync status: exit_reason=%s", exit_reason_str(vcpu->run->exit_reason)); pr_debug("Got sync event from vCPU %d\n", vcpu->id); diff --git a/tools/testing/selftests/kvm/lib/elf.c b/tools/testing/selftests/kvm/lib/elf.c index 266f3876e10a..f34d926d9735 100644 --- a/tools/testing/selftests/kvm/lib/elf.c +++ b/tools/testing/selftests/kvm/lib/elf.c @@ -184,7 +184,7 @@ void kvm_vm_elf_load(struct kvm_vm *vm, const char *filename) "Seek to program segment offset failed,\n" " program header idx: %u errno: %i\n" " offset_rv: 0x%jx\n" - " expected: 0x%jx\n", + " expected: 0x%jx", n1, errno, (intmax_t) offset_rv, (intmax_t) phdr.p_offset); test_read(fd, addr_gva2hva(vm, phdr.p_vaddr), diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index e066d584c656..8754abaff5e9 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -320,7 +320,7 @@ static uint64_t vm_nr_pages_required(enum vm_guest_mode mode, uint64_t nr_pages; TEST_ASSERT(nr_runnable_vcpus, - "Use vm_create_barebones() for VMs that _never_ have vCPUs\n"); + "Use vm_create_barebones() for VMs that _never_ have vCPUs"); TEST_ASSERT(nr_runnable_vcpus <= kvm_check_cap(KVM_CAP_MAX_VCPUS), "nr_vcpus = %d too large for host, max-vcpus = %d", @@ -491,7 +491,7 @@ void kvm_pin_this_task_to_pcpu(uint32_t pcpu) CPU_ZERO(&mask); CPU_SET(pcpu, &mask); r = sched_setaffinity(0, sizeof(mask), &mask); - TEST_ASSERT(!r, "sched_setaffinity() failed for pCPU '%u'.\n", pcpu); + TEST_ASSERT(!r, "sched_setaffinity() failed for pCPU '%u'.", pcpu); } static uint32_t parse_pcpu(const char *cpu_str, const cpu_set_t *allowed_mask) @@ -499,7 +499,7 @@ static uint32_t parse_pcpu(const char *cpu_str, const cpu_set_t *allowed_mask) uint32_t pcpu = atoi_non_negative("CPU number", cpu_str); TEST_ASSERT(CPU_ISSET(pcpu, allowed_mask), - "Not allowed to run on pCPU '%d', check cgroups?\n", pcpu); + "Not allowed to run on pCPU '%d', check cgroups?", pcpu); return pcpu; } @@ -529,7 +529,7 @@ void kvm_parse_vcpu_pinning(const char *pcpus_string, uint32_t vcpu_to_pcpu[], int i, r; cpu_list = strdup(pcpus_string); - TEST_ASSERT(cpu_list, "strdup() allocation failed.\n"); + TEST_ASSERT(cpu_list, "strdup() allocation failed."); r = sched_getaffinity(0, sizeof(allowed_mask), &allowed_mask); TEST_ASSERT(!r, "sched_getaffinity() failed"); @@ -538,7 +538,7 @@ void kvm_parse_vcpu_pinning(const char *pcpus_string, uint32_t vcpu_to_pcpu[], /* 1. Get all pcpus for vcpus. */ for (i = 0; i < nr_vcpus; i++) { - TEST_ASSERT(cpu, "pCPU not provided for vCPU '%d'\n", i); + TEST_ASSERT(cpu, "pCPU not provided for vCPU '%d'", i); vcpu_to_pcpu[i] = parse_pcpu(cpu, &allowed_mask); cpu = strtok(NULL, delim); } @@ -1057,7 +1057,7 @@ void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type, TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION2 IOCTL failed,\n" " rc: %i errno: %i\n" " slot: %u flags: 0x%x\n" - " guest_phys_addr: 0x%lx size: 0x%lx guest_memfd: %d\n", + " guest_phys_addr: 0x%lx size: 0x%lx guest_memfd: %d", ret, errno, slot, flags, guest_paddr, (uint64_t) region->region.memory_size, region->region.guest_memfd); @@ -1222,7 +1222,7 @@ void vm_guest_mem_fallocate(struct kvm_vm *vm, uint64_t base, uint64_t size, len = min_t(uint64_t, end - gpa, region->region.memory_size - offset); ret = fallocate(region->region.guest_memfd, mode, fd_offset, len); - TEST_ASSERT(!ret, "fallocate() failed to %s at %lx (len = %lu), fd = %d, mode = %x, offset = %lx\n", + TEST_ASSERT(!ret, "fallocate() failed to %s at %lx (len = %lu), fd = %d, mode = %x, offset = %lx", punch_hole ? "punch hole" : "allocate", gpa, len, region->region.guest_memfd, mode, fd_offset); } @@ -1265,7 +1265,7 @@ struct kvm_vcpu *__vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id) struct kvm_vcpu *vcpu; /* Confirm a vcpu with the specified id doesn't already exist. */ - TEST_ASSERT(!vcpu_exists(vm, vcpu_id), "vCPU%d already exists\n", vcpu_id); + TEST_ASSERT(!vcpu_exists(vm, vcpu_id), "vCPU%d already exists", vcpu_id); /* Allocate and initialize new vcpu structure. */ vcpu = calloc(1, sizeof(*vcpu)); diff --git a/tools/testing/selftests/kvm/lib/memstress.c b/tools/testing/selftests/kvm/lib/memstress.c index d05487e5a371..cf2c73971308 100644 --- a/tools/testing/selftests/kvm/lib/memstress.c +++ b/tools/testing/selftests/kvm/lib/memstress.c @@ -192,7 +192,7 @@ struct kvm_vm *memstress_create_vm(enum vm_guest_mode mode, int nr_vcpus, TEST_ASSERT(guest_num_pages < region_end_gfn, "Requested more guest memory than address space allows.\n" " guest pages: %" PRIx64 " max gfn: %" PRIx64 - " nr_vcpus: %d wss: %" PRIx64 "]\n", + " nr_vcpus: %d wss: %" PRIx64 "]", guest_num_pages, region_end_gfn - 1, nr_vcpus, vcpu_memory_bytes); args->gpa = (region_end_gfn - guest_num_pages - 1) * args->guest_page_size; diff --git a/tools/testing/selftests/kvm/lib/userfaultfd_util.c b/tools/testing/selftests/kvm/lib/userfaultfd_util.c index 271f63891581..f4eef6eb2dc2 100644 --- a/tools/testing/selftests/kvm/lib/userfaultfd_util.c +++ b/tools/testing/selftests/kvm/lib/userfaultfd_util.c @@ -69,7 +69,7 @@ static void *uffd_handler_thread_fn(void *arg) if (pollfd[1].revents & POLLIN) { r = read(pollfd[1].fd, &tmp_chr, 1); TEST_ASSERT(r == 1, - "Error reading pipefd in UFFD thread\n"); + "Error reading pipefd in UFFD thread"); break; } diff --git a/tools/testing/selftests/kvm/memslot_modification_stress_test.c b/tools/testing/selftests/kvm/memslot_modification_stress_test.c index 9855c41ca811..156361966612 100644 --- a/tools/testing/selftests/kvm/memslot_modification_stress_test.c +++ b/tools/testing/selftests/kvm/memslot_modification_stress_test.c @@ -45,7 +45,7 @@ static void vcpu_worker(struct memstress_vcpu_args *vcpu_args) /* Let the guest access its memory until a stop signal is received */ while (!READ_ONCE(memstress_args.stop_vcpus)) { ret = _vcpu_run(vcpu); - TEST_ASSERT(ret == 0, "vcpu_run failed: %d\n", ret); + TEST_ASSERT(ret == 0, "vcpu_run failed: %d", ret); if (get_ucall(vcpu, NULL) == UCALL_SYNC) continue; diff --git a/tools/testing/selftests/kvm/memslot_perf_test.c b/tools/testing/selftests/kvm/memslot_perf_test.c index 8698d1ab60d0..579a64f97333 100644 --- a/tools/testing/selftests/kvm/memslot_perf_test.c +++ b/tools/testing/selftests/kvm/memslot_perf_test.c @@ -175,11 +175,11 @@ static void wait_for_vcpu(void) struct timespec ts; TEST_ASSERT(!clock_gettime(CLOCK_REALTIME, &ts), - "clock_gettime() failed: %d\n", errno); + "clock_gettime() failed: %d", errno); ts.tv_sec += 2; TEST_ASSERT(!sem_timedwait(&vcpu_ready, &ts), - "sem_timedwait() failed: %d\n", errno); + "sem_timedwait() failed: %d", errno); } static void *vm_gpa2hva(struct vm_data *data, uint64_t gpa, uint64_t *rempages) @@ -336,7 +336,7 @@ static bool prepare_vm(struct vm_data *data, int nslots, uint64_t *maxslots, gpa = vm_phy_pages_alloc(data->vm, npages, guest_addr, slot); TEST_ASSERT(gpa == guest_addr, - "vm_phy_pages_alloc() failed\n"); + "vm_phy_pages_alloc() failed"); data->hva_slots[slot - 1] = addr_gpa2hva(data->vm, guest_addr); memset(data->hva_slots[slot - 1], 0, npages * guest_page_size); diff --git a/tools/testing/selftests/kvm/rseq_test.c b/tools/testing/selftests/kvm/rseq_test.c index f74e76d03b7e..28f97fb52044 100644 --- a/tools/testing/selftests/kvm/rseq_test.c +++ b/tools/testing/selftests/kvm/rseq_test.c @@ -245,7 +245,7 @@ int main(int argc, char *argv[]) } while (snapshot != atomic_read(&seq_cnt)); TEST_ASSERT(rseq_cpu == cpu, - "rseq CPU = %d, sched CPU = %d\n", rseq_cpu, cpu); + "rseq CPU = %d, sched CPU = %d", rseq_cpu, cpu); } /* @@ -256,7 +256,7 @@ int main(int argc, char *argv[]) * migrations given the 1us+ delay in the migration task. */ TEST_ASSERT(i > (NR_TASK_MIGRATIONS / 2), - "Only performed %d KVM_RUNs, task stalled too much?\n", i); + "Only performed %d KVM_RUNs, task stalled too much?", i); pthread_join(migration_thread, NULL); diff --git a/tools/testing/selftests/kvm/set_memory_region_test.c b/tools/testing/selftests/kvm/set_memory_region_test.c index 075b80dbe237..40337f566eeb 100644 --- a/tools/testing/selftests/kvm/set_memory_region_test.c +++ b/tools/testing/selftests/kvm/set_memory_region_test.c @@ -98,11 +98,11 @@ static void wait_for_vcpu(void) struct timespec ts; TEST_ASSERT(!clock_gettime(CLOCK_REALTIME, &ts), - "clock_gettime() failed: %d\n", errno); + "clock_gettime() failed: %d", errno); ts.tv_sec += 2; TEST_ASSERT(!sem_timedwait(&vcpu_ready, &ts), - "sem_timedwait() failed: %d\n", errno); + "sem_timedwait() failed: %d", errno); /* Wait for the vCPU thread to reenter the guest. */ usleep(100000); @@ -302,7 +302,7 @@ static void test_delete_memory_region(void) if (run->exit_reason == KVM_EXIT_INTERNAL_ERROR) TEST_ASSERT(regs.rip >= final_rip_start && regs.rip < final_rip_end, - "Bad rip, expected 0x%lx - 0x%lx, got 0x%llx\n", + "Bad rip, expected 0x%lx - 0x%lx, got 0x%llx", final_rip_start, final_rip_end, regs.rip); kvm_vm_free(vm); diff --git a/tools/testing/selftests/kvm/system_counter_offset_test.c b/tools/testing/selftests/kvm/system_counter_offset_test.c index 7f5b330b6a1b..513d421a9bff 100644 --- a/tools/testing/selftests/kvm/system_counter_offset_test.c +++ b/tools/testing/selftests/kvm/system_counter_offset_test.c @@ -108,7 +108,7 @@ static void enter_guest(struct kvm_vcpu *vcpu) handle_abort(&uc); return; default: - TEST_ASSERT(0, "unhandled ucall %ld\n", + TEST_ASSERT(0, "unhandled ucall %ld", get_ucall(vcpu, &uc)); } } From 95be17e4008b592342ec6cfb9264f4b54c21b790 Mon Sep 17 00:00:00 2001 From: Andrew Jones <ajones@ventanamicro.com> Date: Wed, 6 Dec 2023 18:02:44 +0100 Subject: [PATCH 023/304] KVM: selftests: aarch64: Remove redundant newlines TEST_* functions append their own newline. Remove newlines from TEST_* callsites to avoid extra newlines in output. Signed-off-by: Andrew Jones <ajones@ventanamicro.com> Acked-by: Zenghui Yu <yuzenghui@huawei.com> Link: https://lore.kernel.org/r/20231206170241.82801-9-ajones@ventanamicro.com Signed-off-by: Sean Christopherson <seanjc@google.com> --- tools/testing/selftests/kvm/aarch64/arch_timer.c | 12 ++++++------ tools/testing/selftests/kvm/aarch64/hypercalls.c | 16 ++++++++-------- .../selftests/kvm/aarch64/page_fault_test.c | 6 +++--- .../testing/selftests/kvm/aarch64/smccc_filter.c | 2 +- .../selftests/kvm/aarch64/vpmu_counter_access.c | 12 ++++++------ .../selftests/kvm/lib/aarch64/processor.c | 2 +- tools/testing/selftests/kvm/lib/aarch64/vgic.c | 4 ++-- 7 files changed, 27 insertions(+), 27 deletions(-) diff --git a/tools/testing/selftests/kvm/aarch64/arch_timer.c b/tools/testing/selftests/kvm/aarch64/arch_timer.c index 274b8465b42a..2cb8dd1f8275 100644 --- a/tools/testing/selftests/kvm/aarch64/arch_timer.c +++ b/tools/testing/selftests/kvm/aarch64/arch_timer.c @@ -248,7 +248,7 @@ static void *test_vcpu_run(void *arg) REPORT_GUEST_ASSERT(uc); break; default: - TEST_FAIL("Unexpected guest exit\n"); + TEST_FAIL("Unexpected guest exit"); } return NULL; @@ -287,7 +287,7 @@ static int test_migrate_vcpu(unsigned int vcpu_idx) /* Allow the error where the vCPU thread is already finished */ TEST_ASSERT(ret == 0 || ret == ESRCH, - "Failed to migrate the vCPU:%u to pCPU: %u; ret: %d\n", + "Failed to migrate the vCPU:%u to pCPU: %u; ret: %d", vcpu_idx, new_pcpu, ret); return ret; @@ -326,12 +326,12 @@ static void test_run(struct kvm_vm *vm) pthread_mutex_init(&vcpu_done_map_lock, NULL); vcpu_done_map = bitmap_zalloc(test_args.nr_vcpus); - TEST_ASSERT(vcpu_done_map, "Failed to allocate vcpu done bitmap\n"); + TEST_ASSERT(vcpu_done_map, "Failed to allocate vcpu done bitmap"); for (i = 0; i < (unsigned long)test_args.nr_vcpus; i++) { ret = pthread_create(&pt_vcpu_run[i], NULL, test_vcpu_run, (void *)(unsigned long)i); - TEST_ASSERT(!ret, "Failed to create vCPU-%d pthread\n", i); + TEST_ASSERT(!ret, "Failed to create vCPU-%d pthread", i); } /* Spawn a thread to control the vCPU migrations */ @@ -340,7 +340,7 @@ static void test_run(struct kvm_vm *vm) ret = pthread_create(&pt_vcpu_migration, NULL, test_vcpu_migration, NULL); - TEST_ASSERT(!ret, "Failed to create the migration pthread\n"); + TEST_ASSERT(!ret, "Failed to create the migration pthread"); } @@ -384,7 +384,7 @@ static struct kvm_vm *test_vm_create(void) if (kvm_has_cap(KVM_CAP_COUNTER_OFFSET)) vm_ioctl(vm, KVM_ARM_SET_COUNTER_OFFSET, &test_args.offset); else - TEST_FAIL("no support for global offset\n"); + TEST_FAIL("no support for global offset"); } for (i = 0; i < nr_vcpus; i++) diff --git a/tools/testing/selftests/kvm/aarch64/hypercalls.c b/tools/testing/selftests/kvm/aarch64/hypercalls.c index 31f66ba97228..27c10e7a7e01 100644 --- a/tools/testing/selftests/kvm/aarch64/hypercalls.c +++ b/tools/testing/selftests/kvm/aarch64/hypercalls.c @@ -175,18 +175,18 @@ static void test_fw_regs_before_vm_start(struct kvm_vcpu *vcpu) /* First 'read' should be an upper limit of the features supported */ vcpu_get_reg(vcpu, reg_info->reg, &val); TEST_ASSERT(val == FW_REG_ULIMIT_VAL(reg_info->max_feat_bit), - "Expected all the features to be set for reg: 0x%lx; expected: 0x%lx; read: 0x%lx\n", + "Expected all the features to be set for reg: 0x%lx; expected: 0x%lx; read: 0x%lx", reg_info->reg, FW_REG_ULIMIT_VAL(reg_info->max_feat_bit), val); /* Test a 'write' by disabling all the features of the register map */ ret = __vcpu_set_reg(vcpu, reg_info->reg, 0); TEST_ASSERT(ret == 0, - "Failed to clear all the features of reg: 0x%lx; ret: %d\n", + "Failed to clear all the features of reg: 0x%lx; ret: %d", reg_info->reg, errno); vcpu_get_reg(vcpu, reg_info->reg, &val); TEST_ASSERT(val == 0, - "Expected all the features to be cleared for reg: 0x%lx\n", reg_info->reg); + "Expected all the features to be cleared for reg: 0x%lx", reg_info->reg); /* * Test enabling a feature that's not supported. @@ -195,7 +195,7 @@ static void test_fw_regs_before_vm_start(struct kvm_vcpu *vcpu) if (reg_info->max_feat_bit < 63) { ret = __vcpu_set_reg(vcpu, reg_info->reg, BIT(reg_info->max_feat_bit + 1)); TEST_ASSERT(ret != 0 && errno == EINVAL, - "Unexpected behavior or return value (%d) while setting an unsupported feature for reg: 0x%lx\n", + "Unexpected behavior or return value (%d) while setting an unsupported feature for reg: 0x%lx", errno, reg_info->reg); } } @@ -216,7 +216,7 @@ static void test_fw_regs_after_vm_start(struct kvm_vcpu *vcpu) */ vcpu_get_reg(vcpu, reg_info->reg, &val); TEST_ASSERT(val == 0, - "Expected all the features to be cleared for reg: 0x%lx\n", + "Expected all the features to be cleared for reg: 0x%lx", reg_info->reg); /* @@ -226,7 +226,7 @@ static void test_fw_regs_after_vm_start(struct kvm_vcpu *vcpu) */ ret = __vcpu_set_reg(vcpu, reg_info->reg, FW_REG_ULIMIT_VAL(reg_info->max_feat_bit)); TEST_ASSERT(ret != 0 && errno == EBUSY, - "Unexpected behavior or return value (%d) while setting a feature while VM is running for reg: 0x%lx\n", + "Unexpected behavior or return value (%d) while setting a feature while VM is running for reg: 0x%lx", errno, reg_info->reg); } } @@ -265,7 +265,7 @@ static void test_guest_stage(struct kvm_vm **vm, struct kvm_vcpu **vcpu) case TEST_STAGE_HVC_IFACE_FALSE_INFO: break; default: - TEST_FAIL("Unknown test stage: %d\n", prev_stage); + TEST_FAIL("Unknown test stage: %d", prev_stage); } } @@ -294,7 +294,7 @@ static void test_run(void) REPORT_GUEST_ASSERT(uc); break; default: - TEST_FAIL("Unexpected guest exit\n"); + TEST_FAIL("Unexpected guest exit"); } } diff --git a/tools/testing/selftests/kvm/aarch64/page_fault_test.c b/tools/testing/selftests/kvm/aarch64/page_fault_test.c index 08a5ca5bed56..53fddad57cbb 100644 --- a/tools/testing/selftests/kvm/aarch64/page_fault_test.c +++ b/tools/testing/selftests/kvm/aarch64/page_fault_test.c @@ -414,10 +414,10 @@ static bool punch_hole_in_backing_store(struct kvm_vm *vm, if (fd != -1) { ret = fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, 0, paging_size); - TEST_ASSERT(ret == 0, "fallocate failed\n"); + TEST_ASSERT(ret == 0, "fallocate failed"); } else { ret = madvise(hva, paging_size, MADV_DONTNEED); - TEST_ASSERT(ret == 0, "madvise failed\n"); + TEST_ASSERT(ret == 0, "madvise failed"); } return true; @@ -501,7 +501,7 @@ static bool handle_cmd(struct kvm_vm *vm, int cmd) void fail_vcpu_run_no_handler(int ret) { - TEST_FAIL("Unexpected vcpu run failure\n"); + TEST_FAIL("Unexpected vcpu run failure"); } void fail_vcpu_run_mmio_no_syndrome_handler(int ret) diff --git a/tools/testing/selftests/kvm/aarch64/smccc_filter.c b/tools/testing/selftests/kvm/aarch64/smccc_filter.c index f4ceae9c8925..2d189f3da228 100644 --- a/tools/testing/selftests/kvm/aarch64/smccc_filter.c +++ b/tools/testing/selftests/kvm/aarch64/smccc_filter.c @@ -178,7 +178,7 @@ static void expect_call_denied(struct kvm_vcpu *vcpu) struct ucall uc; if (get_ucall(vcpu, &uc) != UCALL_SYNC) - TEST_FAIL("Unexpected ucall: %lu\n", uc.cmd); + TEST_FAIL("Unexpected ucall: %lu", uc.cmd); TEST_ASSERT(uc.args[1] == SMCCC_RET_NOT_SUPPORTED, "Unexpected SMCCC return code: %lu", uc.args[1]); diff --git a/tools/testing/selftests/kvm/aarch64/vpmu_counter_access.c b/tools/testing/selftests/kvm/aarch64/vpmu_counter_access.c index 9d51b5691349..5f9713364693 100644 --- a/tools/testing/selftests/kvm/aarch64/vpmu_counter_access.c +++ b/tools/testing/selftests/kvm/aarch64/vpmu_counter_access.c @@ -517,11 +517,11 @@ static void test_create_vpmu_vm_with_pmcr_n(uint64_t pmcr_n, bool expect_fail) if (expect_fail) TEST_ASSERT(pmcr_orig == pmcr, - "PMCR.N modified by KVM to a larger value (PMCR: 0x%lx) for pmcr_n: 0x%lx\n", + "PMCR.N modified by KVM to a larger value (PMCR: 0x%lx) for pmcr_n: 0x%lx", pmcr, pmcr_n); else TEST_ASSERT(pmcr_n == get_pmcr_n(pmcr), - "Failed to update PMCR.N to %lu (received: %lu)\n", + "Failed to update PMCR.N to %lu (received: %lu)", pmcr_n, get_pmcr_n(pmcr)); } @@ -594,12 +594,12 @@ static void run_pmregs_validity_test(uint64_t pmcr_n) */ vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(set_reg_id), ®_val); TEST_ASSERT((reg_val & (~valid_counters_mask)) == 0, - "Initial read of set_reg: 0x%llx has unimplemented counters enabled: 0x%lx\n", + "Initial read of set_reg: 0x%llx has unimplemented counters enabled: 0x%lx", KVM_ARM64_SYS_REG(set_reg_id), reg_val); vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(clr_reg_id), ®_val); TEST_ASSERT((reg_val & (~valid_counters_mask)) == 0, - "Initial read of clr_reg: 0x%llx has unimplemented counters enabled: 0x%lx\n", + "Initial read of clr_reg: 0x%llx has unimplemented counters enabled: 0x%lx", KVM_ARM64_SYS_REG(clr_reg_id), reg_val); /* @@ -611,12 +611,12 @@ static void run_pmregs_validity_test(uint64_t pmcr_n) vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(set_reg_id), ®_val); TEST_ASSERT((reg_val & (~valid_counters_mask)) == 0, - "Read of set_reg: 0x%llx has unimplemented counters enabled: 0x%lx\n", + "Read of set_reg: 0x%llx has unimplemented counters enabled: 0x%lx", KVM_ARM64_SYS_REG(set_reg_id), reg_val); vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(clr_reg_id), ®_val); TEST_ASSERT((reg_val & (~valid_counters_mask)) == 0, - "Read of clr_reg: 0x%llx has unimplemented counters enabled: 0x%lx\n", + "Read of clr_reg: 0x%llx has unimplemented counters enabled: 0x%lx", KVM_ARM64_SYS_REG(clr_reg_id), reg_val); } diff --git a/tools/testing/selftests/kvm/lib/aarch64/processor.c b/tools/testing/selftests/kvm/lib/aarch64/processor.c index 41c776b642c0..43b9a7283360 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/processor.c +++ b/tools/testing/selftests/kvm/lib/aarch64/processor.c @@ -398,7 +398,7 @@ void vcpu_args_set(struct kvm_vcpu *vcpu, unsigned int num, ...) int i; TEST_ASSERT(num >= 1 && num <= 8, "Unsupported number of args,\n" - " num: %u\n", num); + " num: %u", num); va_start(ap, num); diff --git a/tools/testing/selftests/kvm/lib/aarch64/vgic.c b/tools/testing/selftests/kvm/lib/aarch64/vgic.c index b5f28d21a947..184378d593e9 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/vgic.c +++ b/tools/testing/selftests/kvm/lib/aarch64/vgic.c @@ -38,7 +38,7 @@ int vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, uint32_t nr_irqs, struct list_head *iter; unsigned int nr_gic_pages, nr_vcpus_created = 0; - TEST_ASSERT(nr_vcpus, "Number of vCPUs cannot be empty\n"); + TEST_ASSERT(nr_vcpus, "Number of vCPUs cannot be empty"); /* * Make sure that the caller is infact calling this @@ -47,7 +47,7 @@ int vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, uint32_t nr_irqs, list_for_each(iter, &vm->vcpus) nr_vcpus_created++; TEST_ASSERT(nr_vcpus == nr_vcpus_created, - "Number of vCPUs requested (%u) doesn't match with the ones created for the VM (%u)\n", + "Number of vCPUs requested (%u) doesn't match with the ones created for the VM (%u)", nr_vcpus, nr_vcpus_created); /* Distributor setup */ From 93e43e50b80b4f342251fb48f8ebced4d3c34983 Mon Sep 17 00:00:00 2001 From: Andrew Jones <ajones@ventanamicro.com> Date: Wed, 6 Dec 2023 18:02:45 +0100 Subject: [PATCH 024/304] KVM: selftests: riscv: Remove redundant newlines TEST_* functions append their own newline. Remove newlines from TEST_* callsites to avoid extra newlines in output. Signed-off-by: Andrew Jones <ajones@ventanamicro.com> Acked-by: Anup Patel <anup@brainfault.org> Link: https://lore.kernel.org/r/20231206170241.82801-10-ajones@ventanamicro.com Signed-off-by: Sean Christopherson <seanjc@google.com> --- tools/testing/selftests/kvm/lib/riscv/processor.c | 2 +- tools/testing/selftests/kvm/riscv/get-reg-list.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/kvm/lib/riscv/processor.c b/tools/testing/selftests/kvm/lib/riscv/processor.c index 7ca736fb4194..2bb33a8ac03c 100644 --- a/tools/testing/selftests/kvm/lib/riscv/processor.c +++ b/tools/testing/selftests/kvm/lib/riscv/processor.c @@ -327,7 +327,7 @@ void vcpu_args_set(struct kvm_vcpu *vcpu, unsigned int num, ...) int i; TEST_ASSERT(num >= 1 && num <= 8, "Unsupported number of args,\n" - " num: %u\n", num); + " num: %u", num); va_start(ap, num); diff --git a/tools/testing/selftests/kvm/riscv/get-reg-list.c b/tools/testing/selftests/kvm/riscv/get-reg-list.c index 6652108816db..50bb9bb73a7b 100644 --- a/tools/testing/selftests/kvm/riscv/get-reg-list.c +++ b/tools/testing/selftests/kvm/riscv/get-reg-list.c @@ -150,7 +150,7 @@ void finalize_vcpu(struct kvm_vcpu *vcpu, struct vcpu_reg_list *c) /* Double check whether the desired extension was enabled */ __TEST_REQUIRE(vcpu_has_ext(vcpu, feature), - "%s not available, skipping tests\n", s->name); + "%s not available, skipping tests", s->name); } } From a38125f188c141cd1297d14af84c28b5276ab287 Mon Sep 17 00:00:00 2001 From: Andrew Jones <ajones@ventanamicro.com> Date: Wed, 6 Dec 2023 18:02:46 +0100 Subject: [PATCH 025/304] KVM: selftests: s390x: Remove redundant newlines TEST_* functions append their own newline. Remove newlines from TEST_* callsites to avoid extra newlines in output. Signed-off-by: Andrew Jones <ajones@ventanamicro.com> Acked-by: Janosch Frank <frankja@linux.ibm.com> Acked-by: Claudio Imbrenda <imbrenda@linux.ibm.com> Link: https://lore.kernel.org/r/20231206170241.82801-11-ajones@ventanamicro.com Signed-off-by: Sean Christopherson <seanjc@google.com> --- .../selftests/kvm/lib/s390x/processor.c | 2 +- tools/testing/selftests/kvm/s390x/resets.c | 4 ++-- .../selftests/kvm/s390x/sync_regs_test.c | 20 +++++++++---------- 3 files changed, 13 insertions(+), 13 deletions(-) diff --git a/tools/testing/selftests/kvm/lib/s390x/processor.c b/tools/testing/selftests/kvm/lib/s390x/processor.c index 15945121daf1..f6d227892cbc 100644 --- a/tools/testing/selftests/kvm/lib/s390x/processor.c +++ b/tools/testing/selftests/kvm/lib/s390x/processor.c @@ -198,7 +198,7 @@ void vcpu_args_set(struct kvm_vcpu *vcpu, unsigned int num, ...) int i; TEST_ASSERT(num >= 1 && num <= 5, "Unsupported number of args,\n" - " num: %u\n", + " num: %u", num); va_start(ap, num); diff --git a/tools/testing/selftests/kvm/s390x/resets.c b/tools/testing/selftests/kvm/s390x/resets.c index e41e2cb8ffa9..357943f2bea8 100644 --- a/tools/testing/selftests/kvm/s390x/resets.c +++ b/tools/testing/selftests/kvm/s390x/resets.c @@ -78,7 +78,7 @@ static void assert_noirq(struct kvm_vcpu *vcpu) * (notably, the emergency call interrupt we have injected) should * be cleared by the resets, so this should be 0. */ - TEST_ASSERT(irqs >= 0, "Could not fetch IRQs: errno %d\n", errno); + TEST_ASSERT(irqs >= 0, "Could not fetch IRQs: errno %d", errno); TEST_ASSERT(!irqs, "IRQ pending"); } @@ -199,7 +199,7 @@ static void inject_irq(struct kvm_vcpu *vcpu) irq->type = KVM_S390_INT_EMERGENCY; irq->u.emerg.code = vcpu->id; irqs = __vcpu_ioctl(vcpu, KVM_S390_SET_IRQ_STATE, &irq_state); - TEST_ASSERT(irqs >= 0, "Error injecting EMERGENCY IRQ errno %d\n", errno); + TEST_ASSERT(irqs >= 0, "Error injecting EMERGENCY IRQ errno %d", errno); } static struct kvm_vm *create_vm(struct kvm_vcpu **vcpu) diff --git a/tools/testing/selftests/kvm/s390x/sync_regs_test.c b/tools/testing/selftests/kvm/s390x/sync_regs_test.c index 636a70ddac1e..43fb25ddc3ec 100644 --- a/tools/testing/selftests/kvm/s390x/sync_regs_test.c +++ b/tools/testing/selftests/kvm/s390x/sync_regs_test.c @@ -39,13 +39,13 @@ static void guest_code(void) #define REG_COMPARE(reg) \ TEST_ASSERT(left->reg == right->reg, \ "Register " #reg \ - " values did not match: 0x%llx, 0x%llx\n", \ + " values did not match: 0x%llx, 0x%llx", \ left->reg, right->reg) #define REG_COMPARE32(reg) \ TEST_ASSERT(left->reg == right->reg, \ "Register " #reg \ - " values did not match: 0x%x, 0x%x\n", \ + " values did not match: 0x%x, 0x%x", \ left->reg, right->reg) @@ -82,14 +82,14 @@ void test_read_invalid(struct kvm_vcpu *vcpu) run->kvm_valid_regs = INVALID_SYNC_FIELD; rv = _vcpu_run(vcpu); TEST_ASSERT(rv < 0 && errno == EINVAL, - "Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d\n", + "Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d", rv); run->kvm_valid_regs = 0; run->kvm_valid_regs = INVALID_SYNC_FIELD | TEST_SYNC_FIELDS; rv = _vcpu_run(vcpu); TEST_ASSERT(rv < 0 && errno == EINVAL, - "Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d\n", + "Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d", rv); run->kvm_valid_regs = 0; } @@ -103,14 +103,14 @@ void test_set_invalid(struct kvm_vcpu *vcpu) run->kvm_dirty_regs = INVALID_SYNC_FIELD; rv = _vcpu_run(vcpu); TEST_ASSERT(rv < 0 && errno == EINVAL, - "Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d\n", + "Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d", rv); run->kvm_dirty_regs = 0; run->kvm_dirty_regs = INVALID_SYNC_FIELD | TEST_SYNC_FIELDS; rv = _vcpu_run(vcpu); TEST_ASSERT(rv < 0 && errno == EINVAL, - "Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d\n", + "Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d", rv); run->kvm_dirty_regs = 0; } @@ -125,12 +125,12 @@ void test_req_and_verify_all_valid_regs(struct kvm_vcpu *vcpu) /* Request and verify all valid register sets. */ run->kvm_valid_regs = TEST_SYNC_FIELDS; rv = _vcpu_run(vcpu); - TEST_ASSERT(rv == 0, "vcpu_run failed: %d\n", rv); + TEST_ASSERT(rv == 0, "vcpu_run failed: %d", rv); TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_S390_SIEIC); TEST_ASSERT(run->s390_sieic.icptcode == 4 && (run->s390_sieic.ipa >> 8) == 0x83 && (run->s390_sieic.ipb >> 16) == 0x501, - "Unexpected interception code: ic=%u, ipa=0x%x, ipb=0x%x\n", + "Unexpected interception code: ic=%u, ipa=0x%x, ipb=0x%x", run->s390_sieic.icptcode, run->s390_sieic.ipa, run->s390_sieic.ipb); @@ -161,7 +161,7 @@ void test_set_and_verify_various_reg_values(struct kvm_vcpu *vcpu) } rv = _vcpu_run(vcpu); - TEST_ASSERT(rv == 0, "vcpu_run failed: %d\n", rv); + TEST_ASSERT(rv == 0, "vcpu_run failed: %d", rv); TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_S390_SIEIC); TEST_ASSERT(run->s.regs.gprs[11] == 0xBAD1DEA + 1, "r11 sync regs value incorrect 0x%llx.", @@ -193,7 +193,7 @@ void test_clear_kvm_dirty_regs_bits(struct kvm_vcpu *vcpu) run->s.regs.gprs[11] = 0xDEADBEEF; run->s.regs.diag318 = 0x4B1D; rv = _vcpu_run(vcpu); - TEST_ASSERT(rv == 0, "vcpu_run failed: %d\n", rv); + TEST_ASSERT(rv == 0, "vcpu_run failed: %d", rv); TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_S390_SIEIC); TEST_ASSERT(run->s.regs.gprs[11] != 0xDEADBEEF, "r11 sync regs value incorrect 0x%llx.", From 65612e993493014cb95be81ca4f98e08732c46d0 Mon Sep 17 00:00:00 2001 From: Andrew Jones <ajones@ventanamicro.com> Date: Wed, 6 Dec 2023 18:02:47 +0100 Subject: [PATCH 026/304] KVM: selftests: x86_64: Remove redundant newlines TEST_* functions append their own newline. Remove newlines from TEST_* callsites to avoid extra newlines in output. Signed-off-by: Andrew Jones <ajones@ventanamicro.com> Link: https://lore.kernel.org/r/20231206170241.82801-12-ajones@ventanamicro.com [sean: keep the newline in the "tsc\n" strncmp()] Signed-off-by: Sean Christopherson <seanjc@google.com> --- .../selftests/kvm/lib/x86_64/processor.c | 10 +++---- tools/testing/selftests/kvm/lib/x86_64/vmx.c | 6 ++-- tools/testing/selftests/kvm/x86_64/amx_test.c | 2 +- .../testing/selftests/kvm/x86_64/cpuid_test.c | 4 +-- .../selftests/kvm/x86_64/flds_emulation.h | 2 +- .../selftests/kvm/x86_64/hyperv_clock.c | 4 +-- .../testing/selftests/kvm/x86_64/hyperv_ipi.c | 2 +- .../selftests/kvm/x86_64/hyperv_tlb_flush.c | 2 +- .../selftests/kvm/x86_64/kvm_clock_test.c | 6 ++-- .../selftests/kvm/x86_64/platform_info_test.c | 2 +- .../kvm/x86_64/pmu_event_filter_test.c | 2 +- .../selftests/kvm/x86_64/sev_migrate_tests.c | 28 +++++++++---------- .../smaller_maxphyaddr_emulation_test.c | 4 +-- .../selftests/kvm/x86_64/sync_regs_test.c | 10 +++---- .../kvm/x86_64/ucna_injection_test.c | 8 +++--- .../selftests/kvm/x86_64/userspace_io_test.c | 2 +- .../kvm/x86_64/vmx_apic_access_test.c | 2 +- .../selftests/kvm/x86_64/vmx_dirty_log_test.c | 16 +++++------ .../vmx_exception_with_invalid_guest_state.c | 2 +- .../selftests/kvm/x86_64/xapic_ipi_test.c | 8 +++--- .../selftests/kvm/x86_64/xcr0_cpuid_test.c | 2 +- .../selftests/kvm/x86_64/xss_msr_test.c | 2 +- 22 files changed, 63 insertions(+), 63 deletions(-) diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c index d8288374078e..4bc52948447d 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/processor.c +++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c @@ -170,10 +170,10 @@ static uint64_t *virt_create_upper_pte(struct kvm_vm *vm, * this level. */ TEST_ASSERT(current_level != target_level, - "Cannot create hugepage at level: %u, vaddr: 0x%lx\n", + "Cannot create hugepage at level: %u, vaddr: 0x%lx", current_level, vaddr); TEST_ASSERT(!(*pte & PTE_LARGE_MASK), - "Cannot create page table at level: %u, vaddr: 0x%lx\n", + "Cannot create page table at level: %u, vaddr: 0x%lx", current_level, vaddr); } return pte; @@ -220,7 +220,7 @@ void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, int level) /* Fill in page table entry. */ pte = virt_get_pte(vm, pde, vaddr, PG_LEVEL_4K); TEST_ASSERT(!(*pte & PTE_PRESENT_MASK), - "PTE already present for 4k page at vaddr: 0x%lx\n", vaddr); + "PTE already present for 4k page at vaddr: 0x%lx", vaddr); *pte = PTE_PRESENT_MASK | PTE_WRITABLE_MASK | (paddr & PHYSICAL_PAGE_MASK); } @@ -253,7 +253,7 @@ static bool vm_is_target_pte(uint64_t *pte, int *level, int current_level) if (*pte & PTE_LARGE_MASK) { TEST_ASSERT(*level == PG_LEVEL_NONE || *level == current_level, - "Unexpected hugepage at level %d\n", current_level); + "Unexpected hugepage at level %d", current_level); *level = current_level; } @@ -825,7 +825,7 @@ void vcpu_args_set(struct kvm_vcpu *vcpu, unsigned int num, ...) struct kvm_regs regs; TEST_ASSERT(num >= 1 && num <= 6, "Unsupported number of args,\n" - " num: %u\n", + " num: %u", num); va_start(ap, num); diff --git a/tools/testing/selftests/kvm/lib/x86_64/vmx.c b/tools/testing/selftests/kvm/lib/x86_64/vmx.c index 59d97531c9b1..089b8925b6b2 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/vmx.c +++ b/tools/testing/selftests/kvm/lib/x86_64/vmx.c @@ -54,7 +54,7 @@ int vcpu_enable_evmcs(struct kvm_vcpu *vcpu) /* KVM should return supported EVMCS version range */ TEST_ASSERT(((evmcs_ver >> 8) >= (evmcs_ver & 0xff)) && (evmcs_ver & 0xff) > 0, - "Incorrect EVMCS version range: %x:%x\n", + "Incorrect EVMCS version range: %x:%x", evmcs_ver & 0xff, evmcs_ver >> 8); return evmcs_ver; @@ -387,10 +387,10 @@ static void nested_create_pte(struct kvm_vm *vm, * this level. */ TEST_ASSERT(current_level != target_level, - "Cannot create hugepage at level: %u, nested_paddr: 0x%lx\n", + "Cannot create hugepage at level: %u, nested_paddr: 0x%lx", current_level, nested_paddr); TEST_ASSERT(!pte->page_size, - "Cannot create page table at level: %u, nested_paddr: 0x%lx\n", + "Cannot create page table at level: %u, nested_paddr: 0x%lx", current_level, nested_paddr); } } diff --git a/tools/testing/selftests/kvm/x86_64/amx_test.c b/tools/testing/selftests/kvm/x86_64/amx_test.c index 11329e5ff945..f7da543570ad 100644 --- a/tools/testing/selftests/kvm/x86_64/amx_test.c +++ b/tools/testing/selftests/kvm/x86_64/amx_test.c @@ -296,7 +296,7 @@ int main(int argc, char *argv[]) void *tiles_data = (void *)addr_gva2hva(vm, tiledata); /* Only check TMM0 register, 1 tile */ ret = memcmp(amx_start, tiles_data, TILE_SIZE); - TEST_ASSERT(ret == 0, "memcmp failed, ret=%d\n", ret); + TEST_ASSERT(ret == 0, "memcmp failed, ret=%d", ret); kvm_x86_state_cleanup(state); break; case 9: diff --git a/tools/testing/selftests/kvm/x86_64/cpuid_test.c b/tools/testing/selftests/kvm/x86_64/cpuid_test.c index 3b34d8156d1c..8c579ce714e9 100644 --- a/tools/testing/selftests/kvm/x86_64/cpuid_test.c +++ b/tools/testing/selftests/kvm/x86_64/cpuid_test.c @@ -84,7 +84,7 @@ static void compare_cpuids(const struct kvm_cpuid2 *cpuid1, TEST_ASSERT(e1->function == e2->function && e1->index == e2->index && e1->flags == e2->flags, - "CPUID entries[%d] mismtach: 0x%x.%d.%x vs. 0x%x.%d.%x\n", + "CPUID entries[%d] mismtach: 0x%x.%d.%x vs. 0x%x.%d.%x", i, e1->function, e1->index, e1->flags, e2->function, e2->index, e2->flags); @@ -170,7 +170,7 @@ static void test_get_cpuid2(struct kvm_vcpu *vcpu) vcpu_ioctl(vcpu, KVM_GET_CPUID2, cpuid); TEST_ASSERT(cpuid->nent == vcpu->cpuid->nent, - "KVM didn't update nent on success, wanted %u, got %u\n", + "KVM didn't update nent on success, wanted %u, got %u", vcpu->cpuid->nent, cpuid->nent); for (i = 0; i < vcpu->cpuid->nent; i++) { diff --git a/tools/testing/selftests/kvm/x86_64/flds_emulation.h b/tools/testing/selftests/kvm/x86_64/flds_emulation.h index 0a1573d52882..37b1a9f52864 100644 --- a/tools/testing/selftests/kvm/x86_64/flds_emulation.h +++ b/tools/testing/selftests/kvm/x86_64/flds_emulation.h @@ -41,7 +41,7 @@ static inline void handle_flds_emulation_failure_exit(struct kvm_vcpu *vcpu) insn_bytes = run->emulation_failure.insn_bytes; TEST_ASSERT(insn_bytes[0] == 0xd9 && insn_bytes[1] == 0, - "Expected 'flds [eax]', opcode '0xd9 0x00', got opcode 0x%02x 0x%02x\n", + "Expected 'flds [eax]', opcode '0xd9 0x00', got opcode 0x%02x 0x%02x", insn_bytes[0], insn_bytes[1]); vcpu_regs_get(vcpu, ®s); diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_clock.c b/tools/testing/selftests/kvm/x86_64/hyperv_clock.c index f5e1e98f04f9..65690d916db7 100644 --- a/tools/testing/selftests/kvm/x86_64/hyperv_clock.c +++ b/tools/testing/selftests/kvm/x86_64/hyperv_clock.c @@ -220,7 +220,7 @@ int main(void) tsc_page_gva = vm_vaddr_alloc_page(vm); memset(addr_gva2hva(vm, tsc_page_gva), 0x0, getpagesize()); TEST_ASSERT((addr_gva2gpa(vm, tsc_page_gva) & (getpagesize() - 1)) == 0, - "TSC page has to be page aligned\n"); + "TSC page has to be page aligned"); vcpu_args_set(vcpu, 2, tsc_page_gva, addr_gva2gpa(vm, tsc_page_gva)); host_check_tsc_msr_rdtsc(vcpu); @@ -237,7 +237,7 @@ int main(void) break; case UCALL_DONE: /* Keep in sync with guest_main() */ - TEST_ASSERT(stage == 11, "Testing ended prematurely, stage %d\n", + TEST_ASSERT(stage == 11, "Testing ended prematurely, stage %d", stage); goto out; default: diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_ipi.c b/tools/testing/selftests/kvm/x86_64/hyperv_ipi.c index 65e5f4c05068..f1617762c22f 100644 --- a/tools/testing/selftests/kvm/x86_64/hyperv_ipi.c +++ b/tools/testing/selftests/kvm/x86_64/hyperv_ipi.c @@ -289,7 +289,7 @@ int main(int argc, char *argv[]) switch (get_ucall(vcpu[0], &uc)) { case UCALL_SYNC: TEST_ASSERT(uc.args[1] == stage, - "Unexpected stage: %ld (%d expected)\n", + "Unexpected stage: %ld (%d expected)", uc.args[1], stage); break; case UCALL_DONE: diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_tlb_flush.c b/tools/testing/selftests/kvm/x86_64/hyperv_tlb_flush.c index c4443f71f8dd..05b56095cf76 100644 --- a/tools/testing/selftests/kvm/x86_64/hyperv_tlb_flush.c +++ b/tools/testing/selftests/kvm/x86_64/hyperv_tlb_flush.c @@ -658,7 +658,7 @@ int main(int argc, char *argv[]) switch (get_ucall(vcpu[0], &uc)) { case UCALL_SYNC: TEST_ASSERT(uc.args[1] == stage, - "Unexpected stage: %ld (%d expected)\n", + "Unexpected stage: %ld (%d expected)", uc.args[1], stage); break; case UCALL_ABORT: diff --git a/tools/testing/selftests/kvm/x86_64/kvm_clock_test.c b/tools/testing/selftests/kvm/x86_64/kvm_clock_test.c index 1778704360a6..3e0b7d51abda 100644 --- a/tools/testing/selftests/kvm/x86_64/kvm_clock_test.c +++ b/tools/testing/selftests/kvm/x86_64/kvm_clock_test.c @@ -92,7 +92,7 @@ static void setup_clock(struct kvm_vm *vm, struct test_case *test_case) break; } while (errno == EINTR); - TEST_ASSERT(!r, "clock_gettime() failed: %d\n", r); + TEST_ASSERT(!r, "clock_gettime() failed: %d", r); data.realtime = ts.tv_sec * NSEC_PER_SEC; data.realtime += ts.tv_nsec; @@ -127,7 +127,7 @@ static void enter_guest(struct kvm_vcpu *vcpu) handle_abort(&uc); return; default: - TEST_ASSERT(0, "unhandled ucall: %ld\n", uc.cmd); + TEST_ASSERT(0, "unhandled ucall: %ld", uc.cmd); } } } @@ -154,7 +154,7 @@ static void check_clocksource(void) } clk_name = malloc(st.st_size); - TEST_ASSERT(clk_name, "failed to allocate buffer to read file\n"); + TEST_ASSERT(clk_name, "failed to allocate buffer to read file"); if (!fgets(clk_name, st.st_size, fp)) { pr_info("failed to read clocksource file: %d; assuming TSC.\n", diff --git a/tools/testing/selftests/kvm/x86_64/platform_info_test.c b/tools/testing/selftests/kvm/x86_64/platform_info_test.c index c9a07963d68a..87011965dc41 100644 --- a/tools/testing/selftests/kvm/x86_64/platform_info_test.c +++ b/tools/testing/selftests/kvm/x86_64/platform_info_test.c @@ -44,7 +44,7 @@ static void test_msr_platform_info_enabled(struct kvm_vcpu *vcpu) get_ucall(vcpu, &uc); TEST_ASSERT(uc.cmd == UCALL_SYNC, - "Received ucall other than UCALL_SYNC: %lu\n", uc.cmd); + "Received ucall other than UCALL_SYNC: %lu", uc.cmd); TEST_ASSERT((uc.args[1] & MSR_PLATFORM_INFO_MAX_TURBO_RATIO) == MSR_PLATFORM_INFO_MAX_TURBO_RATIO, "Expected MSR_PLATFORM_INFO to have max turbo ratio mask: %i.", diff --git a/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c b/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c index 283cc55597a4..a3bd54b925ab 100644 --- a/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c +++ b/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c @@ -866,7 +866,7 @@ static void __test_fixed_counter_bitmap(struct kvm_vcpu *vcpu, uint8_t idx, * userspace doesn't set any pmu filter. */ count = run_vcpu_to_sync(vcpu); - TEST_ASSERT(count, "Unexpected count value: %ld\n", count); + TEST_ASSERT(count, "Unexpected count value: %ld", count); for (i = 0; i < BIT(nr_fixed_counters); i++) { bitmap = BIT(i); diff --git a/tools/testing/selftests/kvm/x86_64/sev_migrate_tests.c b/tools/testing/selftests/kvm/x86_64/sev_migrate_tests.c index c7ef97561038..a49828adf294 100644 --- a/tools/testing/selftests/kvm/x86_64/sev_migrate_tests.c +++ b/tools/testing/selftests/kvm/x86_64/sev_migrate_tests.c @@ -91,7 +91,7 @@ static void sev_migrate_from(struct kvm_vm *dst, struct kvm_vm *src) int ret; ret = __sev_migrate_from(dst, src); - TEST_ASSERT(!ret, "Migration failed, ret: %d, errno: %d\n", ret, errno); + TEST_ASSERT(!ret, "Migration failed, ret: %d, errno: %d", ret, errno); } static void test_sev_migrate_from(bool es) @@ -113,7 +113,7 @@ static void test_sev_migrate_from(bool es) /* Migrate the guest back to the original VM. */ ret = __sev_migrate_from(src_vm, dst_vms[NR_MIGRATE_TEST_VMS - 1]); TEST_ASSERT(ret == -1 && errno == EIO, - "VM that was migrated from should be dead. ret %d, errno: %d\n", ret, + "VM that was migrated from should be dead. ret %d, errno: %d", ret, errno); kvm_vm_free(src_vm); @@ -172,7 +172,7 @@ static void test_sev_migrate_parameters(void) vm_no_sev = aux_vm_create(true); ret = __sev_migrate_from(vm_no_vcpu, vm_no_sev); TEST_ASSERT(ret == -1 && errno == EINVAL, - "Migrations require SEV enabled. ret %d, errno: %d\n", ret, + "Migrations require SEV enabled. ret %d, errno: %d", ret, errno); if (!have_sev_es) @@ -187,25 +187,25 @@ static void test_sev_migrate_parameters(void) ret = __sev_migrate_from(sev_vm, sev_es_vm); TEST_ASSERT( ret == -1 && errno == EINVAL, - "Should not be able migrate to SEV enabled VM. ret: %d, errno: %d\n", + "Should not be able migrate to SEV enabled VM. ret: %d, errno: %d", ret, errno); ret = __sev_migrate_from(sev_es_vm, sev_vm); TEST_ASSERT( ret == -1 && errno == EINVAL, - "Should not be able migrate to SEV-ES enabled VM. ret: %d, errno: %d\n", + "Should not be able migrate to SEV-ES enabled VM. ret: %d, errno: %d", ret, errno); ret = __sev_migrate_from(vm_no_vcpu, sev_es_vm); TEST_ASSERT( ret == -1 && errno == EINVAL, - "SEV-ES migrations require same number of vCPUS. ret: %d, errno: %d\n", + "SEV-ES migrations require same number of vCPUS. ret: %d, errno: %d", ret, errno); ret = __sev_migrate_from(vm_no_vcpu, sev_es_vm_no_vmsa); TEST_ASSERT( ret == -1 && errno == EINVAL, - "SEV-ES migrations require UPDATE_VMSA. ret %d, errno: %d\n", + "SEV-ES migrations require UPDATE_VMSA. ret %d, errno: %d", ret, errno); kvm_vm_free(sev_vm); @@ -227,7 +227,7 @@ static void sev_mirror_create(struct kvm_vm *dst, struct kvm_vm *src) int ret; ret = __sev_mirror_create(dst, src); - TEST_ASSERT(!ret, "Copying context failed, ret: %d, errno: %d\n", ret, errno); + TEST_ASSERT(!ret, "Copying context failed, ret: %d, errno: %d", ret, errno); } static void verify_mirror_allowed_cmds(int vm_fd) @@ -259,7 +259,7 @@ static void verify_mirror_allowed_cmds(int vm_fd) ret = __sev_ioctl(vm_fd, cmd_id, NULL, &fw_error); TEST_ASSERT( ret == -1 && errno == EINVAL, - "Should not be able call command: %d. ret: %d, errno: %d\n", + "Should not be able call command: %d. ret: %d, errno: %d", cmd_id, ret, errno); } @@ -301,18 +301,18 @@ static void test_sev_mirror_parameters(void) ret = __sev_mirror_create(sev_vm, sev_vm); TEST_ASSERT( ret == -1 && errno == EINVAL, - "Should not be able copy context to self. ret: %d, errno: %d\n", + "Should not be able copy context to self. ret: %d, errno: %d", ret, errno); ret = __sev_mirror_create(vm_no_vcpu, vm_with_vcpu); TEST_ASSERT(ret == -1 && errno == EINVAL, - "Copy context requires SEV enabled. ret %d, errno: %d\n", ret, + "Copy context requires SEV enabled. ret %d, errno: %d", ret, errno); ret = __sev_mirror_create(vm_with_vcpu, sev_vm); TEST_ASSERT( ret == -1 && errno == EINVAL, - "SEV copy context requires no vCPUS on the destination. ret: %d, errno: %d\n", + "SEV copy context requires no vCPUS on the destination. ret: %d, errno: %d", ret, errno); if (!have_sev_es) @@ -322,13 +322,13 @@ static void test_sev_mirror_parameters(void) ret = __sev_mirror_create(sev_vm, sev_es_vm); TEST_ASSERT( ret == -1 && errno == EINVAL, - "Should not be able copy context to SEV enabled VM. ret: %d, errno: %d\n", + "Should not be able copy context to SEV enabled VM. ret: %d, errno: %d", ret, errno); ret = __sev_mirror_create(sev_es_vm, sev_vm); TEST_ASSERT( ret == -1 && errno == EINVAL, - "Should not be able copy context to SEV-ES enabled VM. ret: %d, errno: %d\n", + "Should not be able copy context to SEV-ES enabled VM. ret: %d, errno: %d", ret, errno); kvm_vm_free(sev_es_vm); diff --git a/tools/testing/selftests/kvm/x86_64/smaller_maxphyaddr_emulation_test.c b/tools/testing/selftests/kvm/x86_64/smaller_maxphyaddr_emulation_test.c index 06edf00a97d6..1a46dd7bb391 100644 --- a/tools/testing/selftests/kvm/x86_64/smaller_maxphyaddr_emulation_test.c +++ b/tools/testing/selftests/kvm/x86_64/smaller_maxphyaddr_emulation_test.c @@ -74,7 +74,7 @@ int main(int argc, char *argv[]) MEM_REGION_SIZE / PAGE_SIZE, 0); gpa = vm_phy_pages_alloc(vm, MEM_REGION_SIZE / PAGE_SIZE, MEM_REGION_GPA, MEM_REGION_SLOT); - TEST_ASSERT(gpa == MEM_REGION_GPA, "Failed vm_phy_pages_alloc\n"); + TEST_ASSERT(gpa == MEM_REGION_GPA, "Failed vm_phy_pages_alloc"); virt_map(vm, MEM_REGION_GVA, MEM_REGION_GPA, 1); hva = addr_gpa2hva(vm, MEM_REGION_GPA); memset(hva, 0, PAGE_SIZE); @@ -102,7 +102,7 @@ int main(int argc, char *argv[]) case UCALL_DONE: break; default: - TEST_FAIL("Unrecognized ucall: %lu\n", uc.cmd); + TEST_FAIL("Unrecognized ucall: %lu", uc.cmd); } kvm_vm_free(vm); diff --git a/tools/testing/selftests/kvm/x86_64/sync_regs_test.c b/tools/testing/selftests/kvm/x86_64/sync_regs_test.c index 00965ba33f73..a91b5b145fa3 100644 --- a/tools/testing/selftests/kvm/x86_64/sync_regs_test.c +++ b/tools/testing/selftests/kvm/x86_64/sync_regs_test.c @@ -46,7 +46,7 @@ static void compare_regs(struct kvm_regs *left, struct kvm_regs *right) #define REG_COMPARE(reg) \ TEST_ASSERT(left->reg == right->reg, \ "Register " #reg \ - " values did not match: 0x%llx, 0x%llx\n", \ + " values did not match: 0x%llx, 0x%llx", \ left->reg, right->reg) REG_COMPARE(rax); REG_COMPARE(rbx); @@ -230,14 +230,14 @@ int main(int argc, char *argv[]) run->kvm_valid_regs = INVALID_SYNC_FIELD; rv = _vcpu_run(vcpu); TEST_ASSERT(rv < 0 && errno == EINVAL, - "Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d\n", + "Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d", rv); run->kvm_valid_regs = 0; run->kvm_valid_regs = INVALID_SYNC_FIELD | TEST_SYNC_FIELDS; rv = _vcpu_run(vcpu); TEST_ASSERT(rv < 0 && errno == EINVAL, - "Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d\n", + "Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d", rv); run->kvm_valid_regs = 0; @@ -245,14 +245,14 @@ int main(int argc, char *argv[]) run->kvm_dirty_regs = INVALID_SYNC_FIELD; rv = _vcpu_run(vcpu); TEST_ASSERT(rv < 0 && errno == EINVAL, - "Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d\n", + "Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d", rv); run->kvm_dirty_regs = 0; run->kvm_dirty_regs = INVALID_SYNC_FIELD | TEST_SYNC_FIELDS; rv = _vcpu_run(vcpu); TEST_ASSERT(rv < 0 && errno == EINVAL, - "Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d\n", + "Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d", rv); run->kvm_dirty_regs = 0; diff --git a/tools/testing/selftests/kvm/x86_64/ucna_injection_test.c b/tools/testing/selftests/kvm/x86_64/ucna_injection_test.c index 0ed32ec903d0..dcbb3c29fb8e 100644 --- a/tools/testing/selftests/kvm/x86_64/ucna_injection_test.c +++ b/tools/testing/selftests/kvm/x86_64/ucna_injection_test.c @@ -143,7 +143,7 @@ static void run_vcpu_expect_gp(struct kvm_vcpu *vcpu) TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); TEST_ASSERT(get_ucall(vcpu, &uc) == UCALL_SYNC, - "Expect UCALL_SYNC\n"); + "Expect UCALL_SYNC"); TEST_ASSERT(uc.args[1] == SYNC_GP, "#GP is expected."); printf("vCPU received GP in guest.\n"); } @@ -188,7 +188,7 @@ static void *run_ucna_injection(void *arg) TEST_ASSERT_KVM_EXIT_REASON(params->vcpu, KVM_EXIT_IO); TEST_ASSERT(get_ucall(params->vcpu, &uc) == UCALL_SYNC, - "Expect UCALL_SYNC\n"); + "Expect UCALL_SYNC"); TEST_ASSERT(uc.args[1] == SYNC_FIRST_UCNA, "Injecting first UCNA."); printf("Injecting first UCNA at %#x.\n", FIRST_UCNA_ADDR); @@ -198,7 +198,7 @@ static void *run_ucna_injection(void *arg) TEST_ASSERT_KVM_EXIT_REASON(params->vcpu, KVM_EXIT_IO); TEST_ASSERT(get_ucall(params->vcpu, &uc) == UCALL_SYNC, - "Expect UCALL_SYNC\n"); + "Expect UCALL_SYNC"); TEST_ASSERT(uc.args[1] == SYNC_SECOND_UCNA, "Injecting second UCNA."); printf("Injecting second UCNA at %#x.\n", SECOND_UCNA_ADDR); @@ -208,7 +208,7 @@ static void *run_ucna_injection(void *arg) TEST_ASSERT_KVM_EXIT_REASON(params->vcpu, KVM_EXIT_IO); if (get_ucall(params->vcpu, &uc) == UCALL_ABORT) { - TEST_ASSERT(false, "vCPU assertion failure: %s.\n", + TEST_ASSERT(false, "vCPU assertion failure: %s.", (const char *)uc.args[0]); } diff --git a/tools/testing/selftests/kvm/x86_64/userspace_io_test.c b/tools/testing/selftests/kvm/x86_64/userspace_io_test.c index 255c50b0dc32..9481cbcf284f 100644 --- a/tools/testing/selftests/kvm/x86_64/userspace_io_test.c +++ b/tools/testing/selftests/kvm/x86_64/userspace_io_test.c @@ -71,7 +71,7 @@ int main(int argc, char *argv[]) break; TEST_ASSERT(run->io.port == 0x80, - "Expected I/O at port 0x80, got port 0x%x\n", run->io.port); + "Expected I/O at port 0x80, got port 0x%x", run->io.port); /* * Modify the rep string count in RCX: 2 => 1 and 3 => 8192. diff --git a/tools/testing/selftests/kvm/x86_64/vmx_apic_access_test.c b/tools/testing/selftests/kvm/x86_64/vmx_apic_access_test.c index 2bed5fb3a0d6..a81a24761aac 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_apic_access_test.c +++ b/tools/testing/selftests/kvm/x86_64/vmx_apic_access_test.c @@ -99,7 +99,7 @@ int main(int argc, char *argv[]) TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_INTERNAL_ERROR); TEST_ASSERT(run->internal.suberror == KVM_INTERNAL_ERROR_EMULATION, - "Got internal suberror other than KVM_INTERNAL_ERROR_EMULATION: %u\n", + "Got internal suberror other than KVM_INTERNAL_ERROR_EMULATION: %u", run->internal.suberror); break; } diff --git a/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c b/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c index e4ad5fef52ff..7f6f5f23fb9b 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c +++ b/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c @@ -128,17 +128,17 @@ int main(int argc, char *argv[]) */ kvm_vm_get_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap); if (uc.args[1]) { - TEST_ASSERT(test_bit(0, bmap), "Page 0 incorrectly reported clean\n"); - TEST_ASSERT(host_test_mem[0] == 1, "Page 0 not written by guest\n"); + TEST_ASSERT(test_bit(0, bmap), "Page 0 incorrectly reported clean"); + TEST_ASSERT(host_test_mem[0] == 1, "Page 0 not written by guest"); } else { - TEST_ASSERT(!test_bit(0, bmap), "Page 0 incorrectly reported dirty\n"); - TEST_ASSERT(host_test_mem[0] == 0xaaaaaaaaaaaaaaaaULL, "Page 0 written by guest\n"); + TEST_ASSERT(!test_bit(0, bmap), "Page 0 incorrectly reported dirty"); + TEST_ASSERT(host_test_mem[0] == 0xaaaaaaaaaaaaaaaaULL, "Page 0 written by guest"); } - TEST_ASSERT(!test_bit(1, bmap), "Page 1 incorrectly reported dirty\n"); - TEST_ASSERT(host_test_mem[4096 / 8] == 0xaaaaaaaaaaaaaaaaULL, "Page 1 written by guest\n"); - TEST_ASSERT(!test_bit(2, bmap), "Page 2 incorrectly reported dirty\n"); - TEST_ASSERT(host_test_mem[8192 / 8] == 0xaaaaaaaaaaaaaaaaULL, "Page 2 written by guest\n"); + TEST_ASSERT(!test_bit(1, bmap), "Page 1 incorrectly reported dirty"); + TEST_ASSERT(host_test_mem[4096 / 8] == 0xaaaaaaaaaaaaaaaaULL, "Page 1 written by guest"); + TEST_ASSERT(!test_bit(2, bmap), "Page 2 incorrectly reported dirty"); + TEST_ASSERT(host_test_mem[8192 / 8] == 0xaaaaaaaaaaaaaaaaULL, "Page 2 written by guest"); break; case UCALL_DONE: done = true; diff --git a/tools/testing/selftests/kvm/x86_64/vmx_exception_with_invalid_guest_state.c b/tools/testing/selftests/kvm/x86_64/vmx_exception_with_invalid_guest_state.c index a9b827c69f32..fad3634fd9eb 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_exception_with_invalid_guest_state.c +++ b/tools/testing/selftests/kvm/x86_64/vmx_exception_with_invalid_guest_state.c @@ -28,7 +28,7 @@ static void __run_vcpu_with_invalid_state(struct kvm_vcpu *vcpu) TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_INTERNAL_ERROR); TEST_ASSERT(run->emulation_failure.suberror == KVM_INTERNAL_ERROR_EMULATION, - "Expected emulation failure, got %d\n", + "Expected emulation failure, got %d", run->emulation_failure.suberror); } diff --git a/tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c b/tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c index 67ac2a3292ef..725c206ba0b9 100644 --- a/tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c +++ b/tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c @@ -216,7 +216,7 @@ static void *vcpu_thread(void *arg) "Halting vCPU halted %lu times, woke %lu times, received %lu IPIs.\n" "Halter TPR=%#x PPR=%#x LVR=%#x\n" "Migrations attempted: %lu\n" - "Migrations completed: %lu\n", + "Migrations completed: %lu", vcpu->id, (const char *)uc.args[0], params->data->ipis_sent, params->data->hlt_count, params->data->wake_count, @@ -288,7 +288,7 @@ void do_migrations(struct test_data_page *data, int run_secs, int delay_usecs, } TEST_ASSERT(nodes > 1, - "Did not find at least 2 numa nodes. Can't do migration\n"); + "Did not find at least 2 numa nodes. Can't do migration"); fprintf(stderr, "Migrating amongst %d nodes found\n", nodes); @@ -347,7 +347,7 @@ void do_migrations(struct test_data_page *data, int run_secs, int delay_usecs, wake_count != data->wake_count, "IPI, HLT and wake count have not increased " "in the last %lu seconds. " - "HLTer is likely hung.\n", interval_secs); + "HLTer is likely hung.", interval_secs); ipis_sent = data->ipis_sent; hlt_count = data->hlt_count; @@ -381,7 +381,7 @@ void get_cmdline_args(int argc, char *argv[], int *run_secs, "-m adds calls to migrate_pages while vCPUs are running." " Default is no migrations.\n" "-d <delay microseconds> - delay between migrate_pages() calls." - " Default is %d microseconds.\n", + " Default is %d microseconds.", DEFAULT_RUN_SECS, DEFAULT_DELAY_USECS); } } diff --git a/tools/testing/selftests/kvm/x86_64/xcr0_cpuid_test.c b/tools/testing/selftests/kvm/x86_64/xcr0_cpuid_test.c index dc6217440db3..25a0b0db5c3c 100644 --- a/tools/testing/selftests/kvm/x86_64/xcr0_cpuid_test.c +++ b/tools/testing/selftests/kvm/x86_64/xcr0_cpuid_test.c @@ -116,7 +116,7 @@ int main(int argc, char *argv[]) vcpu_run(vcpu); TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, - "Unexpected exit reason: %u (%s),\n", + "Unexpected exit reason: %u (%s),", run->exit_reason, exit_reason_str(run->exit_reason)); diff --git a/tools/testing/selftests/kvm/x86_64/xss_msr_test.c b/tools/testing/selftests/kvm/x86_64/xss_msr_test.c index e0ddf47362e7..167c97abff1b 100644 --- a/tools/testing/selftests/kvm/x86_64/xss_msr_test.c +++ b/tools/testing/selftests/kvm/x86_64/xss_msr_test.c @@ -29,7 +29,7 @@ int main(int argc, char *argv[]) xss_val = vcpu_get_msr(vcpu, MSR_IA32_XSS); TEST_ASSERT(xss_val == 0, - "MSR_IA32_XSS should be initialized to zero\n"); + "MSR_IA32_XSS should be initialized to zero"); vcpu_set_msr(vcpu, MSR_IA32_XSS, xss_val); From 46fee9e38995af9ae16a8cc7d05031486d44cf35 Mon Sep 17 00:00:00 2001 From: Sean Christopherson <seanjc@google.com> Date: Tue, 9 Jan 2024 14:03:02 -0800 Subject: [PATCH 027/304] KVM: selftests: Delete superfluous, unused "stage" variable in AMX test Delete the AMX's tests "stage" counter, as the counter is no longer used, which makes clang unhappy: x86_64/amx_test.c:224:6: error: variable 'stage' set but not used int stage, ret; ^ 1 error generated. Note, "stage" was never really used, it just happened to be dumped out by a (failed) assertion on run->exit_reason, i.e. the AMX test has no concept of stages, the code was likely copy+pasted from a different test. Fixes: c96f57b08012 ("KVM: selftests: Make vCPU exit reason test assertion common") Reviewed-by: Jim Mattson <jmattson@google.com> Link: https://lore.kernel.org/r/20240109220302.399296-1-seanjc@google.com Signed-off-by: Sean Christopherson <seanjc@google.com> --- tools/testing/selftests/kvm/x86_64/amx_test.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/kvm/x86_64/amx_test.c b/tools/testing/selftests/kvm/x86_64/amx_test.c index f7da543570ad..eae521f050e0 100644 --- a/tools/testing/selftests/kvm/x86_64/amx_test.c +++ b/tools/testing/selftests/kvm/x86_64/amx_test.c @@ -221,7 +221,7 @@ int main(int argc, char *argv[]) vm_vaddr_t amx_cfg, tiledata, xstate; struct ucall uc; u32 amx_offset; - int stage, ret; + int ret; /* * Note, all off-by-default features must be enabled before anything @@ -263,7 +263,7 @@ int main(int argc, char *argv[]) memset(addr_gva2hva(vm, xstate), 0, PAGE_SIZE * DIV_ROUND_UP(XSAVE_SIZE, PAGE_SIZE)); vcpu_args_set(vcpu, 3, amx_cfg, tiledata, xstate); - for (stage = 1; ; stage++) { + for (;;) { vcpu_run(vcpu); TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); From 8ad4855273488c9bd5320b3fee80f66f0023f326 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov <vkuznets@redhat.com> Date: Mon, 29 Jan 2024 09:58:46 +0100 Subject: [PATCH 028/304] KVM: selftests: Avoid infinite loop in hyperv_features when invtsc is missing When X86_FEATURE_INVTSC is missing, guest_test_msrs_access() was supposed to skip testing dependent Hyper-V invariant TSC feature. Unfortunately, 'continue' does not lead to that as stage is not incremented. Moreover, 'vm' allocated with vm_create_with_one_vcpu() is not freed and the test runs out of available file descriptors very quickly. Fixes: bd827bd77537 ("KVM: selftests: Test Hyper-V invariant TSC control") Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com> Link: https://lore.kernel.org/r/20240129085847.2674082-1-vkuznets@redhat.com Signed-off-by: Sean Christopherson <seanjc@google.com> --- tools/testing/selftests/kvm/x86_64/hyperv_features.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_features.c b/tools/testing/selftests/kvm/x86_64/hyperv_features.c index 4f4193fc74ff..b923a285e96f 100644 --- a/tools/testing/selftests/kvm/x86_64/hyperv_features.c +++ b/tools/testing/selftests/kvm/x86_64/hyperv_features.c @@ -454,7 +454,7 @@ static void guest_test_msrs_access(void) case 44: /* MSR is not available when CPUID feature bit is unset */ if (!has_invtsc) - continue; + goto next_stage; msr->idx = HV_X64_MSR_TSC_INVARIANT_CONTROL; msr->write = false; msr->fault_expected = true; @@ -462,7 +462,7 @@ static void guest_test_msrs_access(void) case 45: /* MSR is vailable when CPUID feature bit is set */ if (!has_invtsc) - continue; + goto next_stage; vcpu_set_cpuid_feature(vcpu, HV_ACCESS_TSC_INVARIANT); msr->idx = HV_X64_MSR_TSC_INVARIANT_CONTROL; msr->write = false; @@ -471,7 +471,7 @@ static void guest_test_msrs_access(void) case 46: /* Writing bits other than 0 is forbidden */ if (!has_invtsc) - continue; + goto next_stage; msr->idx = HV_X64_MSR_TSC_INVARIANT_CONTROL; msr->write = true; msr->write_val = 0xdeadbeef; @@ -480,7 +480,7 @@ static void guest_test_msrs_access(void) case 47: /* Setting bit 0 enables the feature */ if (!has_invtsc) - continue; + goto next_stage; msr->idx = HV_X64_MSR_TSC_INVARIANT_CONTROL; msr->write = true; msr->write_val = 1; @@ -513,6 +513,7 @@ static void guest_test_msrs_access(void) return; } +next_stage: stage++; kvm_vm_free(vm); } From c2a449a30fa2b5ddc1dd058e92e047157c9eeb9e Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov <vkuznets@redhat.com> Date: Mon, 29 Jan 2024 09:58:47 +0100 Subject: [PATCH 029/304] KVM: selftests: Fail tests when open() fails with !ENOENT open_path_or_exit() is used for '/dev/kvm', '/dev/sev', and '/sys/module/%s/parameters/%s' and skipping test when the entry is missing is completely reasonable. Other errors, however, may indicate a real issue which is easy to miss. E.g. when 'hyperv_features' test was entering an infinite loop the output was: ./hyperv_features Testing access to Hyper-V specific MSRs 1..0 # SKIP - /dev/kvm not available (errno: 24) and this can easily get overlooked. Keep ENOENT case 'special' for skipping tests and fail when open() results in any other errno. Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com> Link: https://lore.kernel.org/r/20240129085847.2674082-2-vkuznets@redhat.com Signed-off-by: Sean Christopherson <seanjc@google.com> --- tools/testing/selftests/kvm/lib/kvm_util.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index 8754abaff5e9..1b197426f29f 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -27,7 +27,8 @@ int open_path_or_exit(const char *path, int flags) int fd; fd = open(path, flags); - __TEST_REQUIRE(fd >= 0, "%s not available (errno: %d)", path, errno); + __TEST_REQUIRE(fd >= 0 || errno != ENOENT, "Cannot open %s: %s", path, strerror(errno)); + TEST_ASSERT(fd >= 0, "Failed to open '%s'", path); return fd; } From 10c02aad111df02088d1a81792a709f6a7eca6cc Mon Sep 17 00:00:00 2001 From: Sebastian Ene <sebastianene@google.com> Date: Wed, 24 Jan 2024 09:10:28 +0000 Subject: [PATCH 030/304] KVM: arm64: Fix circular locking dependency The rule inside kvm enforces that the vcpu->mutex is taken *inside* kvm->lock. The rule is violated by the pkvm_create_hyp_vm() which acquires the kvm->lock while already holding the vcpu->mutex lock from kvm_vcpu_ioctl(). Avoid the circular locking dependency altogether by protecting the hyp vm handle with the config_lock, much like we already do for other forms of VM-scoped data. Signed-off-by: Sebastian Ene <sebastianene@google.com> Cc: stable@vger.kernel.org Reviewed-by: Oliver Upton <oliver.upton@linux.dev> Signed-off-by: Marc Zyngier <maz@kernel.org> Link: https://lore.kernel.org/r/20240124091027.1477174-2-sebastianene@google.com --- arch/arm64/kvm/pkvm.c | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/arch/arm64/kvm/pkvm.c b/arch/arm64/kvm/pkvm.c index 8350fb8fee0b..b7be96a53597 100644 --- a/arch/arm64/kvm/pkvm.c +++ b/arch/arm64/kvm/pkvm.c @@ -101,6 +101,17 @@ void __init kvm_hyp_reserve(void) hyp_mem_base); } +static void __pkvm_destroy_hyp_vm(struct kvm *host_kvm) +{ + if (host_kvm->arch.pkvm.handle) { + WARN_ON(kvm_call_hyp_nvhe(__pkvm_teardown_vm, + host_kvm->arch.pkvm.handle)); + } + + host_kvm->arch.pkvm.handle = 0; + free_hyp_memcache(&host_kvm->arch.pkvm.teardown_mc); +} + /* * Allocates and donates memory for hypervisor VM structs at EL2. * @@ -181,7 +192,7 @@ static int __pkvm_create_hyp_vm(struct kvm *host_kvm) return 0; destroy_vm: - pkvm_destroy_hyp_vm(host_kvm); + __pkvm_destroy_hyp_vm(host_kvm); return ret; free_vm: free_pages_exact(hyp_vm, hyp_vm_sz); @@ -194,23 +205,19 @@ int pkvm_create_hyp_vm(struct kvm *host_kvm) { int ret = 0; - mutex_lock(&host_kvm->lock); + mutex_lock(&host_kvm->arch.config_lock); if (!host_kvm->arch.pkvm.handle) ret = __pkvm_create_hyp_vm(host_kvm); - mutex_unlock(&host_kvm->lock); + mutex_unlock(&host_kvm->arch.config_lock); return ret; } void pkvm_destroy_hyp_vm(struct kvm *host_kvm) { - if (host_kvm->arch.pkvm.handle) { - WARN_ON(kvm_call_hyp_nvhe(__pkvm_teardown_vm, - host_kvm->arch.pkvm.handle)); - } - - host_kvm->arch.pkvm.handle = 0; - free_hyp_memcache(&host_kvm->arch.pkvm.teardown_mc); + mutex_lock(&host_kvm->arch.config_lock); + __pkvm_destroy_hyp_vm(host_kvm); + mutex_unlock(&host_kvm->arch.config_lock); } int pkvm_init_host_vm(struct kvm *host_kvm) From 98323e9d70172f1b46d1cadb20d6c54abf62870d Mon Sep 17 00:00:00 2001 From: Vincent Guittot <vincent.guittot@linaro.org> Date: Wed, 17 Jan 2024 20:05:45 +0100 Subject: [PATCH 031/304] topology: Set capacity_freq_ref in all cases If "capacity-dmips-mhz" is not set, raw_capacity is null and we skip the normalization step which includes setting per_cpu capacity_freq_ref. Always register the notifier but skip the capacity normalization if raw_capacity is null. Fixes: 9942cb22ea45 ("sched/topology: Add a new arch_scale_freq_ref() method") Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org> Acked-by: Sudeep Holla <sudeep.holla@arm.com> Tested-by: Pierre Gondois <pierre.gondois@arm.com> Tested-by: Mark Brown <broonie@kernel.org> Tested-by: Paul Barker <paul.barker.ct@bp.renesas.com> Reviewed-by: Dietmar Eggemann <dietmar.eggemann@arm.com> Tested-by: Dietmar Eggemann <dietmar.eggemann@arm.com> Link: https://lore.kernel.org/r/20240117190545.596057-1-vincent.guittot@linaro.org Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> --- drivers/base/arch_topology.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c index 018ac202de34..024b78a0cfc1 100644 --- a/drivers/base/arch_topology.c +++ b/drivers/base/arch_topology.c @@ -431,9 +431,6 @@ init_cpu_capacity_callback(struct notifier_block *nb, struct cpufreq_policy *policy = data; int cpu; - if (!raw_capacity) - return 0; - if (val != CPUFREQ_CREATE_POLICY) return 0; @@ -450,9 +447,11 @@ init_cpu_capacity_callback(struct notifier_block *nb, } if (cpumask_empty(cpus_to_visit)) { - topology_normalize_cpu_scale(); - schedule_work(&update_topology_flags_work); - free_raw_capacity(); + if (raw_capacity) { + topology_normalize_cpu_scale(); + schedule_work(&update_topology_flags_work); + free_raw_capacity(); + } pr_debug("cpu_capacity: parsing done\n"); schedule_work(&parsing_done_work); } @@ -472,7 +471,7 @@ static int __init register_cpufreq_notifier(void) * On ACPI-based systems skip registering cpufreq notifier as cpufreq * information is not needed for cpu capacity initialization. */ - if (!acpi_disabled || !raw_capacity) + if (!acpi_disabled) return -EINVAL; if (!alloc_cpumask_var(&cpus_to_visit, GFP_KERNEL)) From 4d5e86a56615cc387d21c629f9af8fb0e958d350 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky <leonro@nvidia.com> Date: Sun, 28 Jan 2024 11:29:11 +0200 Subject: [PATCH 032/304] RDMA/mlx5: Fix fortify source warning while accessing Eth segment ------------[ cut here ]------------ memcpy: detected field-spanning write (size 56) of single field "eseg->inline_hdr.start" at /var/lib/dkms/mlnx-ofed-kernel/5.8/build/drivers/infiniband/hw/mlx5/wr.c:131 (size 2) WARNING: CPU: 0 PID: 293779 at /var/lib/dkms/mlnx-ofed-kernel/5.8/build/drivers/infiniband/hw/mlx5/wr.c:131 mlx5_ib_post_send+0x191b/0x1a60 [mlx5_ib] Modules linked in: 8021q garp mrp stp llc rdma_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_ib(OE) ib_uverbs(OE) ib_core(OE) mlx5_core(OE) pci_hyperv_intf mlxdevm(OE) mlx_compat(OE) tls mlxfw(OE) psample nft_fib_inet nft_fib_ipv4 nft_fib_ipv6 nft_fib nft_reject_inet nf_reject_ipv4 nf_reject_ipv6 nft_reject nft_ct nft_chain_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 ip_set nf_tables libcrc32c nfnetlink mst_pciconf(OE) knem(OE) vfio_pci vfio_pci_core vfio_iommu_type1 vfio iommufd irqbypass cuse nfsv3 nfs fscache netfs xfrm_user xfrm_algo ipmi_devintf ipmi_msghandler binfmt_misc crct10dif_pclmul crc32_pclmul polyval_clmulni polyval_generic ghash_clmulni_intel sha512_ssse3 snd_pcsp aesni_intel crypto_simd cryptd snd_pcm snd_timer joydev snd soundcore input_leds serio_raw evbug nfsd auth_rpcgss nfs_acl lockd grace sch_fq_codel sunrpc drm efi_pstore ip_tables x_tables autofs4 psmouse virtio_net net_failover failover floppy [last unloaded: mlx_compat(OE)] CPU: 0 PID: 293779 Comm: ssh Tainted: G OE 6.2.0-32-generic #32~22.04.1-Ubuntu Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011 RIP: 0010:mlx5_ib_post_send+0x191b/0x1a60 [mlx5_ib] Code: 0c 01 00 a8 01 75 25 48 8b 75 a0 b9 02 00 00 00 48 c7 c2 10 5b fd c0 48 c7 c7 80 5b fd c0 c6 05 57 0c 03 00 01 e8 95 4d 93 da <0f> 0b 44 8b 4d b0 4c 8b 45 c8 48 8b 4d c0 e9 49 fb ff ff 41 0f b7 RSP: 0018:ffffb5b48478b570 EFLAGS: 00010046 RAX: 0000000000000000 RBX: 0000000000000001 RCX: 0000000000000000 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 RBP: ffffb5b48478b628 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: ffffb5b48478b5e8 R13: ffff963a3c609b5e R14: ffff9639c3fbd800 R15: ffffb5b480475a80 FS: 00007fc03b444c80(0000) GS:ffff963a3dc00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000556f46bdf000 CR3: 0000000006ac6003 CR4: 00000000003706f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: <TASK> ? show_regs+0x72/0x90 ? mlx5_ib_post_send+0x191b/0x1a60 [mlx5_ib] ? __warn+0x8d/0x160 ? mlx5_ib_post_send+0x191b/0x1a60 [mlx5_ib] ? report_bug+0x1bb/0x1d0 ? handle_bug+0x46/0x90 ? exc_invalid_op+0x19/0x80 ? asm_exc_invalid_op+0x1b/0x20 ? mlx5_ib_post_send+0x191b/0x1a60 [mlx5_ib] mlx5_ib_post_send_nodrain+0xb/0x20 [mlx5_ib] ipoib_send+0x2ec/0x770 [ib_ipoib] ipoib_start_xmit+0x5a0/0x770 [ib_ipoib] dev_hard_start_xmit+0x8e/0x1e0 ? validate_xmit_skb_list+0x4d/0x80 sch_direct_xmit+0x116/0x3a0 __dev_xmit_skb+0x1fd/0x580 __dev_queue_xmit+0x284/0x6b0 ? _raw_spin_unlock_irq+0xe/0x50 ? __flush_work.isra.0+0x20d/0x370 ? push_pseudo_header+0x17/0x40 [ib_ipoib] neigh_connected_output+0xcd/0x110 ip_finish_output2+0x179/0x480 ? __smp_call_single_queue+0x61/0xa0 __ip_finish_output+0xc3/0x190 ip_finish_output+0x2e/0xf0 ip_output+0x78/0x110 ? __pfx_ip_finish_output+0x10/0x10 ip_local_out+0x64/0x70 __ip_queue_xmit+0x18a/0x460 ip_queue_xmit+0x15/0x30 __tcp_transmit_skb+0x914/0x9c0 tcp_write_xmit+0x334/0x8d0 tcp_push_one+0x3c/0x60 tcp_sendmsg_locked+0x2e1/0xac0 tcp_sendmsg+0x2d/0x50 inet_sendmsg+0x43/0x90 sock_sendmsg+0x68/0x80 sock_write_iter+0x93/0x100 vfs_write+0x326/0x3c0 ksys_write+0xbd/0xf0 ? do_syscall_64+0x69/0x90 __x64_sys_write+0x19/0x30 do_syscall_64+0x59/0x90 ? do_user_addr_fault+0x1d0/0x640 ? exit_to_user_mode_prepare+0x3b/0xd0 ? irqentry_exit_to_user_mode+0x9/0x20 ? irqentry_exit+0x43/0x50 ? exc_page_fault+0x92/0x1b0 entry_SYSCALL_64_after_hwframe+0x72/0xdc RIP: 0033:0x7fc03ad14a37 Code: 10 00 f7 d8 64 89 02 48 c7 c0 ff ff ff ff eb b7 0f 1f 00 f3 0f 1e fa 64 8b 04 25 18 00 00 00 85 c0 75 10 b8 01 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 51 c3 48 83 ec 28 48 89 54 24 18 48 89 74 24 RSP: 002b:00007ffdf8697fe8 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 RAX: ffffffffffffffda RBX: 0000000000008024 RCX: 00007fc03ad14a37 RDX: 0000000000008024 RSI: 0000556f46bd8270 RDI: 0000000000000003 RBP: 0000556f46bb1800 R08: 0000000000007fe3 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000002 R13: 0000556f46bc66b0 R14: 000000000000000a R15: 0000556f46bb2f50 </TASK> ---[ end trace 0000000000000000 ]--- Link: https://lore.kernel.org/r/8228ad34bd1a25047586270f7b1fb4ddcd046282.1706433934.git.leon@kernel.org Signed-off-by: Leon Romanovsky <leonro@nvidia.com> --- drivers/infiniband/hw/mlx5/wr.c | 2 +- include/linux/mlx5/qp.h | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/wr.c b/drivers/infiniband/hw/mlx5/wr.c index df1d1b0a3ef7..9947feb7fb8a 100644 --- a/drivers/infiniband/hw/mlx5/wr.c +++ b/drivers/infiniband/hw/mlx5/wr.c @@ -78,7 +78,7 @@ static void set_eth_seg(const struct ib_send_wr *wr, struct mlx5_ib_qp *qp, */ copysz = min_t(u64, *cur_edge - (void *)eseg->inline_hdr.start, left); - memcpy(eseg->inline_hdr.start, pdata, copysz); + memcpy(eseg->inline_hdr.data, pdata, copysz); stride = ALIGN(sizeof(struct mlx5_wqe_eth_seg) - sizeof(eseg->inline_hdr.start) + copysz, 16); *size += stride / 16; diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h index bd53cf4be7bd..f0e55bf3ec8b 100644 --- a/include/linux/mlx5/qp.h +++ b/include/linux/mlx5/qp.h @@ -269,7 +269,10 @@ struct mlx5_wqe_eth_seg { union { struct { __be16 sz; - u8 start[2]; + union { + u8 start[2]; + DECLARE_FLEX_ARRAY(u8, data); + }; } inline_hdr; struct { __be16 type; From 43fdbd140238d44e7e847232719fef7d20f9d326 Mon Sep 17 00:00:00 2001 From: Mark Zhang <markzhang@nvidia.com> Date: Sun, 28 Jan 2024 11:29:12 +0200 Subject: [PATCH 033/304] IB/mlx5: Don't expose debugfs entries for RRoCE general parameters if not supported debugfs entries for RRoCE general CC parameters must be exposed only when they are supported, otherwise when accessing them there may be a syndrome error in kernel log, for example: $ cat /sys/kernel/debug/mlx5/0000:08:00.1/cc_params/rtt_resp_dscp cat: '/sys/kernel/debug/mlx5/0000:08:00.1/cc_params/rtt_resp_dscp': Invalid argument $ dmesg mlx5_core 0000:08:00.1: mlx5_cmd_out_err:805:(pid 1253): QUERY_CONG_PARAMS(0x824) op_mod(0x0) failed, status bad parameter(0x3), syndrome (0x325a82), err(-22) Fixes: 66fb1d5df6ac ("IB/mlx5: Extend debug control for CC parameters") Reviewed-by: Edward Srouji <edwards@nvidia.com> Signed-off-by: Mark Zhang <markzhang@nvidia.com> Link: https://lore.kernel.org/r/e7ade70bad52b7468bdb1de4d41d5fad70c8b71c.1706433934.git.leon@kernel.org Signed-off-by: Leon Romanovsky <leon@kernel.org> --- drivers/infiniband/hw/mlx5/cong.c | 6 ++++++ include/linux/mlx5/mlx5_ifc.h | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx5/cong.c b/drivers/infiniband/hw/mlx5/cong.c index f87531318feb..a78a067e3ce7 100644 --- a/drivers/infiniband/hw/mlx5/cong.c +++ b/drivers/infiniband/hw/mlx5/cong.c @@ -458,6 +458,12 @@ void mlx5_ib_init_cong_debugfs(struct mlx5_ib_dev *dev, u32 port_num) dbg_cc_params->root = debugfs_create_dir("cc_params", mlx5_debugfs_get_dev_root(mdev)); for (i = 0; i < MLX5_IB_DBG_CC_MAX; i++) { + if ((i == MLX5_IB_DBG_CC_GENERAL_RTT_RESP_DSCP_VALID || + i == MLX5_IB_DBG_CC_GENERAL_RTT_RESP_DSCP)) + if (!MLX5_CAP_GEN(mdev, roce) || + !MLX5_CAP_ROCE(mdev, roce_cc_general)) + continue; + dbg_cc_params->params[i].offset = i; dbg_cc_params->params[i].dev = dev; dbg_cc_params->params[i].port_num = port_num; diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index bf5320b28b8b..2c10350bd422 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1103,7 +1103,7 @@ struct mlx5_ifc_roce_cap_bits { u8 sw_r_roce_src_udp_port[0x1]; u8 fl_rc_qp_when_roce_disabled[0x1]; u8 fl_rc_qp_when_roce_enabled[0x1]; - u8 reserved_at_7[0x1]; + u8 roce_cc_general[0x1]; u8 qp_ooo_transmit_default[0x1]; u8 reserved_at_9[0x15]; u8 qp_ts_format[0x2]; From be551ee1574280ef8afbf7c271212ac3e38933ef Mon Sep 17 00:00:00 2001 From: Yishai Hadas <yishaih@nvidia.com> Date: Sun, 28 Jan 2024 11:29:13 +0200 Subject: [PATCH 034/304] RDMA/mlx5: Relax DEVX access upon modify commands Relax DEVX access upon modify commands to be UVERBS_ACCESS_READ. The kernel doesn't need to protect what firmware protects, or what causes no damage to anyone but the user. As firmware needs to protect itself from parallel access to the same object, don't block parallel modify/query commands on the same object in the kernel side. This change will allow user space application to run parallel updates to different entries in the same bulk object. Tested-by: Tamar Mashiah <tmashiah@nvidia.com> Signed-off-by: Yishai Hadas <yishaih@nvidia.com> Reviewed-by: Michael Guralnik <michaelgur@nvidia.com> Link: https://lore.kernel.org/r/7407d5ed35dc427c1097699e12b49c01e1073406.1706433934.git.leon@kernel.org Signed-off-by: Leon Romanovsky <leon@kernel.org> --- drivers/infiniband/hw/mlx5/devx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index 869369cb5b5f..253fea374a72 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -2949,7 +2949,7 @@ DECLARE_UVERBS_NAMED_METHOD( MLX5_IB_METHOD_DEVX_OBJ_MODIFY, UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_OBJ_MODIFY_HANDLE, UVERBS_IDR_ANY_OBJECT, - UVERBS_ACCESS_WRITE, + UVERBS_ACCESS_READ, UA_MANDATORY), UVERBS_ATTR_PTR_IN( MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_IN, From 6231c9e1a9f35b535c66709aa8a6eda40dbc4132 Mon Sep 17 00:00:00 2001 From: Prasad Pandit <pjp@fedoraproject.org> Date: Wed, 3 Jan 2024 13:23:43 +0530 Subject: [PATCH 035/304] KVM: x86: make KVM_REQ_NMI request iff NMI pending for vcpu kvm_vcpu_ioctl_x86_set_vcpu_events() routine makes 'KVM_REQ_NMI' request for a vcpu even when its 'events->nmi.pending' is zero. Ex: qemu_thread_start kvm_vcpu_thread_fn qemu_wait_io_event qemu_wait_io_event_common process_queued_cpu_work do_kvm_cpu_synchronize_post_init/_reset kvm_arch_put_registers kvm_put_vcpu_events (cpu, level=[2|3]) This leads vCPU threads in QEMU to constantly acquire & release the global mutex lock, delaying the guest boot due to lock contention. Add check to make KVM_REQ_NMI request only if vcpu has NMI pending. Fixes: bdedff263132 ("KVM: x86: Route pending NMIs from userspace through process_nmi()") Cc: stable@vger.kernel.org Signed-off-by: Prasad Pandit <pjp@fedoraproject.org> Link: https://lore.kernel.org/r/20240103075343.549293-1-ppandit@redhat.com Signed-off-by: Sean Christopherson <seanjc@google.com> --- arch/x86/kvm/x86.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 363b1c080205..25bc52cdf8f4 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -5454,7 +5454,8 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, if (events->flags & KVM_VCPUEVENT_VALID_NMI_PENDING) { vcpu->arch.nmi_pending = 0; atomic_set(&vcpu->arch.nmi_queued, events->nmi.pending); - kvm_make_request(KVM_REQ_NMI, vcpu); + if (events->nmi.pending) + kvm_make_request(KVM_REQ_NMI, vcpu); } static_call(kvm_x86_set_nmi_mask)(vcpu, events->nmi.masked); From a107d643b2a3382e0a2d2c4ef08bf8c6bff4561d Mon Sep 17 00:00:00 2001 From: Tomi Valkeinen <tomi.valkeinen@ideasonboard.com> Date: Mon, 18 Dec 2023 08:54:00 +0100 Subject: [PATCH 036/304] media: Revert "media: rkisp1: Drop IRQF_SHARED" This reverts commit 85d2a31fe4d9be1555f621ead7a520d8791e0f74. The rkisp1 does share interrupt lines on some platforms, after all. Thus we need to revert this, and implement a fix for the rkisp1 shared irq handling in a follow-up patch. Closes: https://lore.kernel.org/all/87o7eo8vym.fsf@gmail.com/ Link: https://lore.kernel.org/r/20231218-rkisp-shirq-fix-v1-1-173007628248@ideasonboard.com Reported-by: Mikhail Rudenko <mike.rudenko@gmail.com> Signed-off-by: Tomi Valkeinen <tomi.valkeinen@ideasonboard.com> Signed-off-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com> Signed-off-by: Mauro Carvalho Chehab <mchehab@kernel.org> --- drivers/media/platform/rockchip/rkisp1/rkisp1-dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/platform/rockchip/rkisp1/rkisp1-dev.c b/drivers/media/platform/rockchip/rkisp1/rkisp1-dev.c index f96f821a7b50..acc559652d6e 100644 --- a/drivers/media/platform/rockchip/rkisp1/rkisp1-dev.c +++ b/drivers/media/platform/rockchip/rkisp1/rkisp1-dev.c @@ -559,7 +559,7 @@ static int rkisp1_probe(struct platform_device *pdev) rkisp1->irqs[il] = irq; } - ret = devm_request_irq(dev, irq, info->isrs[i].isr, 0, + ret = devm_request_irq(dev, irq, info->isrs[i].isr, IRQF_SHARED, dev_driver_string(dev), dev); if (ret) { dev_err(dev, "request irq failed: %d\n", ret); From ffb635bb398fc07cb38f8a7b4a82cbe5f412f08e Mon Sep 17 00:00:00 2001 From: Tomi Valkeinen <tomi.valkeinen@ideasonboard.com> Date: Mon, 18 Dec 2023 08:54:01 +0100 Subject: [PATCH 037/304] media: rkisp1: Fix IRQ handling due to shared interrupts The driver requests the interrupts as IRQF_SHARED, so the interrupt handlers can be called at any time. If such a call happens while the ISP is powered down, the SoC will hang as the driver tries to access the ISP registers. This can be reproduced even without the platform sharing the IRQ line: Enable CONFIG_DEBUG_SHIRQ and unload the driver, and the board will hang. Fix this by adding a new field, 'irqs_enabled', which is used to bail out from the interrupt handler when the ISP is not operational. Link: https://lore.kernel.org/r/20231218-rkisp-shirq-fix-v1-2-173007628248@ideasonboard.com Signed-off-by: Tomi Valkeinen <tomi.valkeinen@ideasonboard.com> Signed-off-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com> Signed-off-by: Mauro Carvalho Chehab <mchehab@kernel.org> --- .../platform/rockchip/rkisp1/rkisp1-capture.c | 3 +++ .../platform/rockchip/rkisp1/rkisp1-common.h | 2 ++ .../platform/rockchip/rkisp1/rkisp1-csi.c | 3 +++ .../platform/rockchip/rkisp1/rkisp1-dev.c | 22 +++++++++++++++++++ .../platform/rockchip/rkisp1/rkisp1-isp.c | 3 +++ 5 files changed, 33 insertions(+) diff --git a/drivers/media/platform/rockchip/rkisp1/rkisp1-capture.c b/drivers/media/platform/rockchip/rkisp1/rkisp1-capture.c index aebd3c12020b..c381c22135a2 100644 --- a/drivers/media/platform/rockchip/rkisp1/rkisp1-capture.c +++ b/drivers/media/platform/rockchip/rkisp1/rkisp1-capture.c @@ -725,6 +725,9 @@ irqreturn_t rkisp1_capture_isr(int irq, void *ctx) unsigned int i; u32 status; + if (!rkisp1->irqs_enabled) + return IRQ_NONE; + status = rkisp1_read(rkisp1, RKISP1_CIF_MI_MIS); if (!status) return IRQ_NONE; diff --git a/drivers/media/platform/rockchip/rkisp1/rkisp1-common.h b/drivers/media/platform/rockchip/rkisp1/rkisp1-common.h index 4b6b28c05b89..b757f75edecf 100644 --- a/drivers/media/platform/rockchip/rkisp1/rkisp1-common.h +++ b/drivers/media/platform/rockchip/rkisp1/rkisp1-common.h @@ -450,6 +450,7 @@ struct rkisp1_debug { * @debug: debug params to be exposed on debugfs * @info: version-specific ISP information * @irqs: IRQ line numbers + * @irqs_enabled: the hardware is enabled and can cause interrupts */ struct rkisp1_device { void __iomem *base_addr; @@ -471,6 +472,7 @@ struct rkisp1_device { struct rkisp1_debug debug; const struct rkisp1_info *info; int irqs[RKISP1_NUM_IRQS]; + bool irqs_enabled; }; /* diff --git a/drivers/media/platform/rockchip/rkisp1/rkisp1-csi.c b/drivers/media/platform/rockchip/rkisp1/rkisp1-csi.c index b6e47e2f1b94..4202642e0523 100644 --- a/drivers/media/platform/rockchip/rkisp1/rkisp1-csi.c +++ b/drivers/media/platform/rockchip/rkisp1/rkisp1-csi.c @@ -196,6 +196,9 @@ irqreturn_t rkisp1_csi_isr(int irq, void *ctx) struct rkisp1_device *rkisp1 = dev_get_drvdata(dev); u32 val, status; + if (!rkisp1->irqs_enabled) + return IRQ_NONE; + status = rkisp1_read(rkisp1, RKISP1_CIF_MIPI_MIS); if (!status) return IRQ_NONE; diff --git a/drivers/media/platform/rockchip/rkisp1/rkisp1-dev.c b/drivers/media/platform/rockchip/rkisp1/rkisp1-dev.c index acc559652d6e..73cf08a74011 100644 --- a/drivers/media/platform/rockchip/rkisp1/rkisp1-dev.c +++ b/drivers/media/platform/rockchip/rkisp1/rkisp1-dev.c @@ -305,6 +305,24 @@ static int __maybe_unused rkisp1_runtime_suspend(struct device *dev) { struct rkisp1_device *rkisp1 = dev_get_drvdata(dev); + rkisp1->irqs_enabled = false; + /* Make sure the IRQ handler will see the above */ + mb(); + + /* + * Wait until any running IRQ handler has returned. The IRQ handler + * may get called even after this (as it's a shared interrupt line) + * but the 'irqs_enabled' flag will make the handler return immediately. + */ + for (unsigned int il = 0; il < ARRAY_SIZE(rkisp1->irqs); ++il) { + if (rkisp1->irqs[il] == -1) + continue; + + /* Skip if the irq line is the same as previous */ + if (il == 0 || rkisp1->irqs[il - 1] != rkisp1->irqs[il]) + synchronize_irq(rkisp1->irqs[il]); + } + clk_bulk_disable_unprepare(rkisp1->clk_size, rkisp1->clks); return pinctrl_pm_select_sleep_state(dev); } @@ -321,6 +339,10 @@ static int __maybe_unused rkisp1_runtime_resume(struct device *dev) if (ret) return ret; + rkisp1->irqs_enabled = true; + /* Make sure the IRQ handler will see the above */ + mb(); + return 0; } diff --git a/drivers/media/platform/rockchip/rkisp1/rkisp1-isp.c b/drivers/media/platform/rockchip/rkisp1/rkisp1-isp.c index f00873d31c42..78a1f7a1499b 100644 --- a/drivers/media/platform/rockchip/rkisp1/rkisp1-isp.c +++ b/drivers/media/platform/rockchip/rkisp1/rkisp1-isp.c @@ -976,6 +976,9 @@ irqreturn_t rkisp1_isp_isr(int irq, void *ctx) struct rkisp1_device *rkisp1 = dev_get_drvdata(dev); u32 status, isp_err; + if (!rkisp1->irqs_enabled) + return IRQ_NONE; + status = rkisp1_read(rkisp1, RKISP1_CIF_ISP_MIS); if (!status) return IRQ_NONE; From f66556c1333b3bd4806fc98ee07c419ab545e6ee Mon Sep 17 00:00:00 2001 From: Hans de Goede <hdegoede@redhat.com> Date: Thu, 28 Dec 2023 22:43:25 +0100 Subject: [PATCH 038/304] media: atomisp: Adjust for v4l2_subdev_state handling changes in 6.8 The atomisp driver emulates a standard v4l2 device, which also works for non media-controller aware applications. Part of this requires making try_fmt calls on the sensor when a normal v4l2 app is making try_fmt calls on the /dev/video# mode. With the recent v4l2_subdev_state handling changes in 6.8 this no longer works, fixing this requires 2 changes: 1. The atomisp code was using its own internal v4l2_subdev_pad_config for this. Replace the internal v4l2_subdev_pad_config with allocating a full v4l2_subdev_state for storing the full try_fmt state. 2. The paths actually setting the fmt or crop selection now need to be passed the v4l2_subdev's active state, so that sensor drivers which are using the v4l2_subdev's active state to store their state keep working. Signed-off-by: Hans de Goede <hdegoede@redhat.com> Signed-off-by: Mauro Carvalho Chehab <mchehab@kernel.org> --- .../staging/media/atomisp/pci/atomisp_cmd.c | 58 ++++++++++-------- .../media/atomisp/pci/atomisp_internal.h | 4 +- .../staging/media/atomisp/pci/atomisp_ioctl.c | 52 +++++++++------- .../staging/media/atomisp/pci/atomisp_v4l2.c | 59 ++++++++++++++----- 4 files changed, 111 insertions(+), 62 deletions(-) diff --git a/drivers/staging/media/atomisp/pci/atomisp_cmd.c b/drivers/staging/media/atomisp/pci/atomisp_cmd.c index f44e6412f4e3..d0db2efe0045 100644 --- a/drivers/staging/media/atomisp/pci/atomisp_cmd.c +++ b/drivers/staging/media/atomisp/pci/atomisp_cmd.c @@ -3723,12 +3723,10 @@ apply_min_padding: static int atomisp_set_crop(struct atomisp_device *isp, const struct v4l2_mbus_framefmt *format, + struct v4l2_subdev_state *sd_state, int which) { struct atomisp_input_subdev *input = &isp->inputs[isp->asd.input_curr]; - struct v4l2_subdev_state pad_state = { - .pads = &input->pad_cfg, - }; struct v4l2_subdev_selection sel = { .which = which, .target = V4L2_SEL_TGT_CROP, @@ -3754,7 +3752,7 @@ static int atomisp_set_crop(struct atomisp_device *isp, sel.r.left = ((input->native_rect.width - sel.r.width) / 2) & ~1; sel.r.top = ((input->native_rect.height - sel.r.height) / 2) & ~1; - ret = v4l2_subdev_call(input->camera, pad, set_selection, &pad_state, &sel); + ret = v4l2_subdev_call(input->camera, pad, set_selection, sd_state, &sel); if (ret) dev_err(isp->dev, "Error setting crop to %ux%u @%ux%u: %d\n", sel.r.width, sel.r.height, sel.r.left, sel.r.top, ret); @@ -3770,9 +3768,6 @@ int atomisp_try_fmt(struct atomisp_device *isp, struct v4l2_pix_format *f, const struct atomisp_format_bridge *fmt, *snr_fmt; struct atomisp_sub_device *asd = &isp->asd; struct atomisp_input_subdev *input = &isp->inputs[asd->input_curr]; - struct v4l2_subdev_state pad_state = { - .pads = &input->pad_cfg, - }; struct v4l2_subdev_format format = { .which = V4L2_SUBDEV_FORMAT_TRY, }; @@ -3809,11 +3804,16 @@ int atomisp_try_fmt(struct atomisp_device *isp, struct v4l2_pix_format *f, dev_dbg(isp->dev, "try_mbus_fmt: asking for %ux%u\n", format.format.width, format.format.height); - ret = atomisp_set_crop(isp, &format.format, V4L2_SUBDEV_FORMAT_TRY); - if (ret) - return ret; + v4l2_subdev_lock_state(input->try_sd_state); + + ret = atomisp_set_crop(isp, &format.format, input->try_sd_state, + V4L2_SUBDEV_FORMAT_TRY); + if (ret == 0) + ret = v4l2_subdev_call(input->camera, pad, set_fmt, + input->try_sd_state, &format); + + v4l2_subdev_unlock_state(input->try_sd_state); - ret = v4l2_subdev_call(input->camera, pad, set_fmt, &pad_state, &format); if (ret) return ret; @@ -4238,9 +4238,7 @@ static int atomisp_set_fmt_to_snr(struct video_device *vdev, const struct v4l2_p struct atomisp_device *isp = asd->isp; struct atomisp_input_subdev *input = &isp->inputs[asd->input_curr]; const struct atomisp_format_bridge *format; - struct v4l2_subdev_state pad_state = { - .pads = &input->pad_cfg, - }; + struct v4l2_subdev_state *act_sd_state; struct v4l2_subdev_format vformat = { .which = V4L2_SUBDEV_FORMAT_TRY, }; @@ -4268,12 +4266,18 @@ static int atomisp_set_fmt_to_snr(struct video_device *vdev, const struct v4l2_p /* Disable dvs if resolution can't be supported by sensor */ if (asd->params.video_dis_en && asd->run_mode->val == ATOMISP_RUN_MODE_VIDEO) { - ret = atomisp_set_crop(isp, &vformat.format, V4L2_SUBDEV_FORMAT_TRY); - if (ret) - return ret; + v4l2_subdev_lock_state(input->try_sd_state); + + ret = atomisp_set_crop(isp, &vformat.format, input->try_sd_state, + V4L2_SUBDEV_FORMAT_TRY); + if (ret == 0) { + vformat.which = V4L2_SUBDEV_FORMAT_TRY; + ret = v4l2_subdev_call(input->camera, pad, set_fmt, + input->try_sd_state, &vformat); + } + + v4l2_subdev_unlock_state(input->try_sd_state); - vformat.which = V4L2_SUBDEV_FORMAT_TRY; - ret = v4l2_subdev_call(input->camera, pad, set_fmt, &pad_state, &vformat); if (ret) return ret; @@ -4291,12 +4295,18 @@ static int atomisp_set_fmt_to_snr(struct video_device *vdev, const struct v4l2_p } } - ret = atomisp_set_crop(isp, &vformat.format, V4L2_SUBDEV_FORMAT_ACTIVE); - if (ret) - return ret; + act_sd_state = v4l2_subdev_lock_and_get_active_state(input->camera); + + ret = atomisp_set_crop(isp, &vformat.format, act_sd_state, + V4L2_SUBDEV_FORMAT_ACTIVE); + if (ret == 0) { + vformat.which = V4L2_SUBDEV_FORMAT_ACTIVE; + ret = v4l2_subdev_call(input->camera, pad, set_fmt, act_sd_state, &vformat); + } + + if (act_sd_state) + v4l2_subdev_unlock_state(act_sd_state); - vformat.which = V4L2_SUBDEV_FORMAT_ACTIVE; - ret = v4l2_subdev_call(input->camera, pad, set_fmt, NULL, &vformat); if (ret) return ret; diff --git a/drivers/staging/media/atomisp/pci/atomisp_internal.h b/drivers/staging/media/atomisp/pci/atomisp_internal.h index f7b4bee9574b..d5b077e602ca 100644 --- a/drivers/staging/media/atomisp/pci/atomisp_internal.h +++ b/drivers/staging/media/atomisp/pci/atomisp_internal.h @@ -132,8 +132,8 @@ struct atomisp_input_subdev { /* Sensor rects for sensors which support crop */ struct v4l2_rect native_rect; struct v4l2_rect active_rect; - /* Sensor pad_cfg for which == V4L2_SUBDEV_FORMAT_TRY calls */ - struct v4l2_subdev_pad_config pad_cfg; + /* Sensor state for which == V4L2_SUBDEV_FORMAT_TRY calls */ + struct v4l2_subdev_state *try_sd_state; struct v4l2_subdev *motor; diff --git a/drivers/staging/media/atomisp/pci/atomisp_ioctl.c b/drivers/staging/media/atomisp/pci/atomisp_ioctl.c index 01b7fa9b56a2..5b2d88c02d36 100644 --- a/drivers/staging/media/atomisp/pci/atomisp_ioctl.c +++ b/drivers/staging/media/atomisp/pci/atomisp_ioctl.c @@ -781,12 +781,20 @@ static int atomisp_enum_framesizes(struct file *file, void *priv, .which = V4L2_SUBDEV_FORMAT_ACTIVE, .code = input->code, }; + struct v4l2_subdev_state *act_sd_state; int ret; + if (!input->camera) + return -EINVAL; + if (input->crop_support) return atomisp_enum_framesizes_crop(isp, fsize); - ret = v4l2_subdev_call(input->camera, pad, enum_frame_size, NULL, &fse); + act_sd_state = v4l2_subdev_lock_and_get_active_state(input->camera); + ret = v4l2_subdev_call(input->camera, pad, enum_frame_size, + act_sd_state, &fse); + if (act_sd_state) + v4l2_subdev_unlock_state(act_sd_state); if (ret) return ret; @@ -803,18 +811,25 @@ static int atomisp_enum_frameintervals(struct file *file, void *priv, struct video_device *vdev = video_devdata(file); struct atomisp_device *isp = video_get_drvdata(vdev); struct atomisp_sub_device *asd = atomisp_to_video_pipe(vdev)->asd; + struct atomisp_input_subdev *input = &isp->inputs[asd->input_curr]; struct v4l2_subdev_frame_interval_enum fie = { - .code = atomisp_in_fmt_conv[0].code, + .code = atomisp_in_fmt_conv[0].code, .index = fival->index, .width = fival->width, .height = fival->height, .which = V4L2_SUBDEV_FORMAT_ACTIVE, }; + struct v4l2_subdev_state *act_sd_state; int ret; - ret = v4l2_subdev_call(isp->inputs[asd->input_curr].camera, - pad, enum_frame_interval, NULL, - &fie); + if (!input->camera) + return -EINVAL; + + act_sd_state = v4l2_subdev_lock_and_get_active_state(input->camera); + ret = v4l2_subdev_call(input->camera, pad, enum_frame_interval, + act_sd_state, &fie); + if (act_sd_state) + v4l2_subdev_unlock_state(act_sd_state); if (ret) return ret; @@ -830,30 +845,25 @@ static int atomisp_enum_fmt_cap(struct file *file, void *fh, struct video_device *vdev = video_devdata(file); struct atomisp_device *isp = video_get_drvdata(vdev); struct atomisp_sub_device *asd = atomisp_to_video_pipe(vdev)->asd; + struct atomisp_input_subdev *input = &isp->inputs[asd->input_curr]; struct v4l2_subdev_mbus_code_enum code = { .which = V4L2_SUBDEV_FORMAT_ACTIVE, }; const struct atomisp_format_bridge *format; - struct v4l2_subdev *camera; + struct v4l2_subdev_state *act_sd_state; unsigned int i, fi = 0; - int rval; + int ret; - camera = isp->inputs[asd->input_curr].camera; - if(!camera) { - dev_err(isp->dev, "%s(): camera is NULL, device is %s\n", - __func__, vdev->name); + if (!input->camera) return -EINVAL; - } - rval = v4l2_subdev_call(camera, pad, enum_mbus_code, NULL, &code); - if (rval == -ENOIOCTLCMD) { - dev_warn(isp->dev, - "enum_mbus_code pad op not supported by %s. Please fix your sensor driver!\n", - camera->name); - } - - if (rval) - return rval; + act_sd_state = v4l2_subdev_lock_and_get_active_state(input->camera); + ret = v4l2_subdev_call(input->camera, pad, enum_mbus_code, + act_sd_state, &code); + if (act_sd_state) + v4l2_subdev_unlock_state(act_sd_state); + if (ret) + return ret; for (i = 0; i < ARRAY_SIZE(atomisp_output_fmts); i++) { format = &atomisp_output_fmts[i]; diff --git a/drivers/staging/media/atomisp/pci/atomisp_v4l2.c b/drivers/staging/media/atomisp/pci/atomisp_v4l2.c index c1c8501ec61f..547e1444ad97 100644 --- a/drivers/staging/media/atomisp/pci/atomisp_v4l2.c +++ b/drivers/staging/media/atomisp/pci/atomisp_v4l2.c @@ -862,6 +862,9 @@ static void atomisp_unregister_entities(struct atomisp_device *isp) v4l2_device_unregister(&isp->v4l2_dev); media_device_unregister(&isp->media_dev); media_device_cleanup(&isp->media_dev); + + for (i = 0; i < isp->input_cnt; i++) + __v4l2_subdev_state_free(isp->inputs[i].try_sd_state); } static int atomisp_register_entities(struct atomisp_device *isp) @@ -933,32 +936,49 @@ v4l2_device_failed: static void atomisp_init_sensor(struct atomisp_input_subdev *input) { + static struct lock_class_key try_sd_state_key; struct v4l2_subdev_mbus_code_enum mbus_code_enum = { }; struct v4l2_subdev_frame_size_enum fse = { }; - struct v4l2_subdev_state sd_state = { - .pads = &input->pad_cfg, - }; struct v4l2_subdev_selection sel = { }; + struct v4l2_subdev_state *try_sd_state, *act_sd_state; int i, err; + /* + * FIXME: Drivers are not supposed to use __v4l2_subdev_state_alloc() + * but atomisp needs this for try_fmt on its /dev/video# node since + * it emulates a normal v4l2 device there, passing through try_fmt / + * set_fmt to the sensor. + */ + try_sd_state = __v4l2_subdev_state_alloc(input->camera, + "atomisp:try_sd_state->lock", &try_sd_state_key); + if (IS_ERR(try_sd_state)) + return; + + input->try_sd_state = try_sd_state; + + act_sd_state = v4l2_subdev_lock_and_get_active_state(input->camera); + mbus_code_enum.which = V4L2_SUBDEV_FORMAT_ACTIVE; - err = v4l2_subdev_call(input->camera, pad, enum_mbus_code, NULL, &mbus_code_enum); + err = v4l2_subdev_call(input->camera, pad, enum_mbus_code, + act_sd_state, &mbus_code_enum); if (!err) input->code = mbus_code_enum.code; sel.which = V4L2_SUBDEV_FORMAT_ACTIVE; sel.target = V4L2_SEL_TGT_NATIVE_SIZE; - err = v4l2_subdev_call(input->camera, pad, get_selection, NULL, &sel); + err = v4l2_subdev_call(input->camera, pad, get_selection, + act_sd_state, &sel); if (err) - return; + goto unlock_act_sd_state; input->native_rect = sel.r; sel.which = V4L2_SUBDEV_FORMAT_ACTIVE; sel.target = V4L2_SEL_TGT_CROP_DEFAULT; - err = v4l2_subdev_call(input->camera, pad, get_selection, NULL, &sel); + err = v4l2_subdev_call(input->camera, pad, get_selection, + act_sd_state, &sel); if (err) - return; + goto unlock_act_sd_state; input->active_rect = sel.r; @@ -973,7 +993,8 @@ static void atomisp_init_sensor(struct atomisp_input_subdev *input) fse.code = input->code; fse.which = V4L2_SUBDEV_FORMAT_ACTIVE; - err = v4l2_subdev_call(input->camera, pad, enum_frame_size, NULL, &fse); + err = v4l2_subdev_call(input->camera, pad, enum_frame_size, + act_sd_state, &fse); if (err) break; @@ -989,22 +1010,26 @@ static void atomisp_init_sensor(struct atomisp_input_subdev *input) * for padding, set the crop rect to cover the entire sensor instead * of only the default active area. * - * Do this for both try and active formats since the try_crop rect in - * pad_cfg may influence (clamp) future try_fmt calls with which == try. + * Do this for both try and active formats since the crop rect in + * try_sd_state may influence (clamp size) in calls with which == try. */ sel.which = V4L2_SUBDEV_FORMAT_TRY; sel.target = V4L2_SEL_TGT_CROP; sel.r = input->native_rect; - err = v4l2_subdev_call(input->camera, pad, set_selection, &sd_state, &sel); + v4l2_subdev_lock_state(input->try_sd_state); + err = v4l2_subdev_call(input->camera, pad, set_selection, + input->try_sd_state, &sel); + v4l2_subdev_unlock_state(input->try_sd_state); if (err) - return; + goto unlock_act_sd_state; sel.which = V4L2_SUBDEV_FORMAT_ACTIVE; sel.target = V4L2_SEL_TGT_CROP; sel.r = input->native_rect; - err = v4l2_subdev_call(input->camera, pad, set_selection, NULL, &sel); + err = v4l2_subdev_call(input->camera, pad, set_selection, + act_sd_state, &sel); if (err) - return; + goto unlock_act_sd_state; dev_info(input->camera->dev, "Supports crop native %dx%d active %dx%d binning %d\n", input->native_rect.width, input->native_rect.height, @@ -1012,6 +1037,10 @@ static void atomisp_init_sensor(struct atomisp_input_subdev *input) input->binning_support); input->crop_support = true; + +unlock_act_sd_state: + if (act_sd_state) + v4l2_subdev_unlock_state(act_sd_state); } int atomisp_register_device_nodes(struct atomisp_device *isp) From 6a9d552483d50953320b9d3b57abdee8d436f23f Mon Sep 17 00:00:00 2001 From: Sean Young <sean@mess.org> Date: Thu, 13 Apr 2023 10:50:32 +0200 Subject: [PATCH 039/304] media: rc: bpf attach/detach requires write permission Note that bpf attach/detach also requires CAP_NET_ADMIN. Cc: stable@vger.kernel.org Signed-off-by: Sean Young <sean@mess.org> Signed-off-by: Mauro Carvalho Chehab <mchehab@kernel.org> --- drivers/media/rc/bpf-lirc.c | 6 +++--- drivers/media/rc/lirc_dev.c | 5 ++++- drivers/media/rc/rc-core-priv.h | 2 +- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/media/rc/bpf-lirc.c b/drivers/media/rc/bpf-lirc.c index fe17c7f98e81..52d82cbe7685 100644 --- a/drivers/media/rc/bpf-lirc.c +++ b/drivers/media/rc/bpf-lirc.c @@ -253,7 +253,7 @@ int lirc_prog_attach(const union bpf_attr *attr, struct bpf_prog *prog) if (attr->attach_flags) return -EINVAL; - rcdev = rc_dev_get_from_fd(attr->target_fd); + rcdev = rc_dev_get_from_fd(attr->target_fd, true); if (IS_ERR(rcdev)) return PTR_ERR(rcdev); @@ -278,7 +278,7 @@ int lirc_prog_detach(const union bpf_attr *attr) if (IS_ERR(prog)) return PTR_ERR(prog); - rcdev = rc_dev_get_from_fd(attr->target_fd); + rcdev = rc_dev_get_from_fd(attr->target_fd, true); if (IS_ERR(rcdev)) { bpf_prog_put(prog); return PTR_ERR(rcdev); @@ -303,7 +303,7 @@ int lirc_prog_query(const union bpf_attr *attr, union bpf_attr __user *uattr) if (attr->query.query_flags) return -EINVAL; - rcdev = rc_dev_get_from_fd(attr->query.target_fd); + rcdev = rc_dev_get_from_fd(attr->query.target_fd, false); if (IS_ERR(rcdev)) return PTR_ERR(rcdev); diff --git a/drivers/media/rc/lirc_dev.c b/drivers/media/rc/lirc_dev.c index a537734832c5..caad59f76793 100644 --- a/drivers/media/rc/lirc_dev.c +++ b/drivers/media/rc/lirc_dev.c @@ -814,7 +814,7 @@ void __exit lirc_dev_exit(void) unregister_chrdev_region(lirc_base_dev, RC_DEV_MAX); } -struct rc_dev *rc_dev_get_from_fd(int fd) +struct rc_dev *rc_dev_get_from_fd(int fd, bool write) { struct fd f = fdget(fd); struct lirc_fh *fh; @@ -828,6 +828,9 @@ struct rc_dev *rc_dev_get_from_fd(int fd) return ERR_PTR(-EINVAL); } + if (write && !(f.file->f_mode & FMODE_WRITE)) + return ERR_PTR(-EPERM); + fh = f.file->private_data; dev = fh->rc; diff --git a/drivers/media/rc/rc-core-priv.h b/drivers/media/rc/rc-core-priv.h index ef1e95e1af7f..7df949fc65e2 100644 --- a/drivers/media/rc/rc-core-priv.h +++ b/drivers/media/rc/rc-core-priv.h @@ -325,7 +325,7 @@ void lirc_raw_event(struct rc_dev *dev, struct ir_raw_event ev); void lirc_scancode_event(struct rc_dev *dev, struct lirc_scancode *lsc); int lirc_register(struct rc_dev *dev); void lirc_unregister(struct rc_dev *dev); -struct rc_dev *rc_dev_get_from_fd(int fd); +struct rc_dev *rc_dev_get_from_fd(int fd, bool write); #else static inline int lirc_dev_init(void) { return 0; } static inline void lirc_dev_exit(void) {} From dc9ceb90c4b42c6e5c6757df1d6257110433788e Mon Sep 17 00:00:00 2001 From: Zhipeng Lu <alexious@zju.edu.cn> Date: Wed, 17 Jan 2024 09:14:19 +0100 Subject: [PATCH 040/304] media: ir_toy: fix a memleak in irtoy_tx When irtoy_command fails, buf should be freed since it is allocated by irtoy_tx, or there is a memleak. Fixes: 4114978dcd24 ("media: ir_toy: prevent device from hanging during transmit") Signed-off-by: Zhipeng Lu <alexious@zju.edu.cn> Signed-off-by: Sean Young <sean@mess.org> Signed-off-by: Mauro Carvalho Chehab <mchehab@kernel.org> --- drivers/media/rc/ir_toy.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/media/rc/ir_toy.c b/drivers/media/rc/ir_toy.c index 196806709259..69e630d85262 100644 --- a/drivers/media/rc/ir_toy.c +++ b/drivers/media/rc/ir_toy.c @@ -332,6 +332,7 @@ static int irtoy_tx(struct rc_dev *rc, uint *txbuf, uint count) sizeof(COMMAND_SMODE_EXIT), STATE_COMMAND_NO_RESP); if (err) { dev_err(irtoy->dev, "exit sample mode: %d\n", err); + kfree(buf); return err; } @@ -339,6 +340,7 @@ static int irtoy_tx(struct rc_dev *rc, uint *txbuf, uint count) sizeof(COMMAND_SMODE_ENTER), STATE_COMMAND); if (err) { dev_err(irtoy->dev, "enter sample mode: %d\n", err); + kfree(buf); return err; } From 346c84e281a963437b9fe9dfcd92c531630289de Mon Sep 17 00:00:00 2001 From: Sean Young <sean@mess.org> Date: Tue, 30 Jan 2024 09:55:25 +0100 Subject: [PATCH 041/304] media: pwm-ir-tx: Depend on CONFIG_HIGH_RES_TIMERS Since commit 363d0e56285e ("media: pwm-ir-tx: Trigger edges from hrtimer interrupt context"), pwm-ir-tx uses high resolution timers for IR signal generation when the pwm can be used from atomic context. Ensure they are available. Fixes: 363d0e56285e ("media: pwm-ir-tx: Trigger edges from hrtimer interrupt context") Signed-off-by: Sean Young <sean@mess.org> Signed-off-by: Mauro Carvalho Chehab <mchehab@kernel.org> --- drivers/media/rc/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/media/rc/Kconfig b/drivers/media/rc/Kconfig index 2afe67ffa285..74d69ce22a33 100644 --- a/drivers/media/rc/Kconfig +++ b/drivers/media/rc/Kconfig @@ -319,6 +319,7 @@ config IR_PWM_TX tristate "PWM IR transmitter" depends on LIRC depends on PWM + depends on HIGH_RES_TIMERS depends on OF help Say Y if you want to use a PWM based IR transmitter. This is From e440c5f2e3e6893aeb39bbba6dd181207840a795 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov <vkuznets@redhat.com> Date: Tue, 9 Jan 2024 15:11:17 +0100 Subject: [PATCH 042/304] KVM: selftests: Generalize check_clocksource() from kvm_clock_test Several existing x86 selftests need to check that the underlying system clocksource is TSC or based on TSC but every test implements its own check. As a first step towards unification, extract check_clocksource() from kvm_clock_test and split it into two functions: arch-neutral 'sys_get_cur_clocksource()' and x86-specific 'sys_clocksource_is_tsc()'. Fix a couple of pre-existing issues in kvm_clock_test: memory leakage in check_clocksource() and using TEST_ASSERT() instead of TEST_REQUIRE(). The change also makes the test fail when system clocksource can't be read from sysfs. Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com> Link: https://lore.kernel.org/r/20240109141121.1619463-2-vkuznets@redhat.com [sean: eliminate if-elif pattern just to set a bool true] Signed-off-by: Sean Christopherson <seanjc@google.com> --- .../testing/selftests/kvm/include/test_util.h | 2 + .../selftests/kvm/include/x86_64/processor.h | 2 + tools/testing/selftests/kvm/lib/test_util.c | 25 ++++++++++++ .../selftests/kvm/lib/x86_64/processor.c | 10 +++++ .../selftests/kvm/x86_64/kvm_clock_test.c | 38 +------------------ 5 files changed, 40 insertions(+), 37 deletions(-) diff --git a/tools/testing/selftests/kvm/include/test_util.h b/tools/testing/selftests/kvm/include/test_util.h index 71a41fa924b7..50a5e31ba8da 100644 --- a/tools/testing/selftests/kvm/include/test_util.h +++ b/tools/testing/selftests/kvm/include/test_util.h @@ -195,4 +195,6 @@ __printf(3, 4) int guest_snprintf(char *buf, int n, const char *fmt, ...); char *strdup_printf(const char *fmt, ...) __attribute__((format(printf, 1, 2), nonnull(1))); +char *sys_get_cur_clocksource(void); + #endif /* SELFTEST_KVM_TEST_UTIL_H */ diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h index a84863503fcb..01eec72e0d3e 100644 --- a/tools/testing/selftests/kvm/include/x86_64/processor.h +++ b/tools/testing/selftests/kvm/include/x86_64/processor.h @@ -1271,4 +1271,6 @@ void virt_map_level(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, #define PFERR_GUEST_PAGE_MASK BIT_ULL(PFERR_GUEST_PAGE_BIT) #define PFERR_IMPLICIT_ACCESS BIT_ULL(PFERR_IMPLICIT_ACCESS_BIT) +bool sys_clocksource_is_tsc(void); + #endif /* SELFTEST_KVM_PROCESSOR_H */ diff --git a/tools/testing/selftests/kvm/lib/test_util.c b/tools/testing/selftests/kvm/lib/test_util.c index 5d7f28b02d73..5a8f8becb129 100644 --- a/tools/testing/selftests/kvm/lib/test_util.c +++ b/tools/testing/selftests/kvm/lib/test_util.c @@ -392,3 +392,28 @@ char *strdup_printf(const char *fmt, ...) return str; } + +#define CLOCKSOURCE_PATH "/sys/devices/system/clocksource/clocksource0/current_clocksource" + +char *sys_get_cur_clocksource(void) +{ + char *clk_name; + struct stat st; + FILE *fp; + + fp = fopen(CLOCKSOURCE_PATH, "r"); + TEST_ASSERT(fp, "failed to open clocksource file, errno: %d", errno); + + TEST_ASSERT(!fstat(fileno(fp), &st), "failed to stat clocksource file, errno: %d", + errno); + + clk_name = malloc(st.st_size); + TEST_ASSERT(clk_name, "failed to allocate buffer to read file"); + + TEST_ASSERT(fgets(clk_name, st.st_size, fp), "failed to read clocksource file: %d", + ferror(fp)); + + fclose(fp); + + return clk_name; +} diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c index 4bc52948447d..e6964ff2a37d 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/processor.c +++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c @@ -1299,3 +1299,13 @@ void kvm_selftest_arch_init(void) host_cpu_is_intel = this_cpu_is_intel(); host_cpu_is_amd = this_cpu_is_amd(); } + +bool sys_clocksource_is_tsc(void) +{ + char *clk_name = sys_get_cur_clocksource(); + bool ret = !strcmp(clk_name, "tsc\n"); + + free(clk_name); + + return ret; +} diff --git a/tools/testing/selftests/kvm/x86_64/kvm_clock_test.c b/tools/testing/selftests/kvm/x86_64/kvm_clock_test.c index 3e0b7d51abda..6fcc1a433587 100644 --- a/tools/testing/selftests/kvm/x86_64/kvm_clock_test.c +++ b/tools/testing/selftests/kvm/x86_64/kvm_clock_test.c @@ -132,42 +132,6 @@ static void enter_guest(struct kvm_vcpu *vcpu) } } -#define CLOCKSOURCE_PATH "/sys/devices/system/clocksource/clocksource0/current_clocksource" - -static void check_clocksource(void) -{ - char *clk_name; - struct stat st; - FILE *fp; - - fp = fopen(CLOCKSOURCE_PATH, "r"); - if (!fp) { - pr_info("failed to open clocksource file: %d; assuming TSC.\n", - errno); - return; - } - - if (fstat(fileno(fp), &st)) { - pr_info("failed to stat clocksource file: %d; assuming TSC.\n", - errno); - goto out; - } - - clk_name = malloc(st.st_size); - TEST_ASSERT(clk_name, "failed to allocate buffer to read file"); - - if (!fgets(clk_name, st.st_size, fp)) { - pr_info("failed to read clocksource file: %d; assuming TSC.\n", - ferror(fp)); - goto out; - } - - TEST_ASSERT(!strncmp(clk_name, "tsc\n", st.st_size), - "clocksource not supported: %s", clk_name); -out: - fclose(fp); -} - int main(void) { struct kvm_vcpu *vcpu; @@ -179,7 +143,7 @@ int main(void) flags = kvm_check_cap(KVM_CAP_ADJUST_CLOCK); TEST_REQUIRE(flags & KVM_CLOCK_REALTIME); - check_clocksource(); + TEST_REQUIRE(sys_clocksource_is_tsc()); vm = vm_create_with_one_vcpu(&vcpu, guest_main); From 410cb01ead5bcec500c0654f361d620553f930aa Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov <vkuznets@redhat.com> Date: Tue, 9 Jan 2024 15:11:18 +0100 Subject: [PATCH 043/304] KVM: selftests: Use generic sys_clocksource_is_tsc() in vmx_nested_tsc_scaling_test Despite its name, system_has_stable_tsc() just checks that system clocksource is 'tsc'; this can now be done with generic sys_clocksource_is_tsc(). No functional change intended. Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com> Link: https://lore.kernel.org/r/20240109141121.1619463-3-vkuznets@redhat.com Signed-off-by: Sean Christopherson <seanjc@google.com> --- .../kvm/x86_64/vmx_nested_tsc_scaling_test.c | 19 +------------------ 1 file changed, 1 insertion(+), 18 deletions(-) diff --git a/tools/testing/selftests/kvm/x86_64/vmx_nested_tsc_scaling_test.c b/tools/testing/selftests/kvm/x86_64/vmx_nested_tsc_scaling_test.c index e710b6e7fb38..93b0a850a240 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_nested_tsc_scaling_test.c +++ b/tools/testing/selftests/kvm/x86_64/vmx_nested_tsc_scaling_test.c @@ -116,23 +116,6 @@ static void l1_guest_code(struct vmx_pages *vmx_pages) GUEST_DONE(); } -static bool system_has_stable_tsc(void) -{ - bool tsc_is_stable; - FILE *fp; - char buf[4]; - - fp = fopen("/sys/devices/system/clocksource/clocksource0/current_clocksource", "r"); - if (fp == NULL) - return false; - - tsc_is_stable = fgets(buf, sizeof(buf), fp) && - !strncmp(buf, "tsc", sizeof(buf)); - - fclose(fp); - return tsc_is_stable; -} - int main(int argc, char *argv[]) { struct kvm_vcpu *vcpu; @@ -148,7 +131,7 @@ int main(int argc, char *argv[]) TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX)); TEST_REQUIRE(kvm_has_cap(KVM_CAP_TSC_CONTROL)); - TEST_REQUIRE(system_has_stable_tsc()); + TEST_REQUIRE(sys_clocksource_is_tsc()); /* * We set L1's scale factor to be a random number from 2 to 10. From 09951bf2cbb3a7893f76d1364b0ae6e3007ff1de Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov <vkuznets@redhat.com> Date: Tue, 9 Jan 2024 15:11:19 +0100 Subject: [PATCH 044/304] KVM: selftests: Run clocksource dependent tests with hyperv_clocksource_tsc_page too KVM's 'gtod_is_based_on_tsc()' recognizes two clocksources: 'tsc' and 'hyperv_clocksource_tsc_page' and enables kvmclock in 'masterclock' mode when either is in use. Transform 'sys_clocksource_is_tsc()' into 'sys_clocksource_is_based_on_tsc()' to support the later. This affects two tests: kvm_clock_test and vmx_nested_tsc_scaling_test, both seem to work well when system clocksource is 'hyperv_clocksource_tsc_page'. Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com> Link: https://lore.kernel.org/r/20240109141121.1619463-4-vkuznets@redhat.com Signed-off-by: Sean Christopherson <seanjc@google.com> --- tools/testing/selftests/kvm/include/x86_64/processor.h | 2 +- tools/testing/selftests/kvm/lib/x86_64/processor.c | 5 +++-- tools/testing/selftests/kvm/x86_64/kvm_clock_test.c | 2 +- .../selftests/kvm/x86_64/vmx_nested_tsc_scaling_test.c | 2 +- 4 files changed, 6 insertions(+), 5 deletions(-) diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h index 01eec72e0d3e..5bca8c947c82 100644 --- a/tools/testing/selftests/kvm/include/x86_64/processor.h +++ b/tools/testing/selftests/kvm/include/x86_64/processor.h @@ -1271,6 +1271,6 @@ void virt_map_level(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, #define PFERR_GUEST_PAGE_MASK BIT_ULL(PFERR_GUEST_PAGE_BIT) #define PFERR_IMPLICIT_ACCESS BIT_ULL(PFERR_IMPLICIT_ACCESS_BIT) -bool sys_clocksource_is_tsc(void); +bool sys_clocksource_is_based_on_tsc(void); #endif /* SELFTEST_KVM_PROCESSOR_H */ diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c index e6964ff2a37d..f639b3e062e3 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/processor.c +++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c @@ -1300,10 +1300,11 @@ void kvm_selftest_arch_init(void) host_cpu_is_amd = this_cpu_is_amd(); } -bool sys_clocksource_is_tsc(void) +bool sys_clocksource_is_based_on_tsc(void) { char *clk_name = sys_get_cur_clocksource(); - bool ret = !strcmp(clk_name, "tsc\n"); + bool ret = !strcmp(clk_name, "tsc\n") || + !strcmp(clk_name, "hyperv_clocksource_tsc_page\n"); free(clk_name); diff --git a/tools/testing/selftests/kvm/x86_64/kvm_clock_test.c b/tools/testing/selftests/kvm/x86_64/kvm_clock_test.c index 6fcc1a433587..5bc12222d87a 100644 --- a/tools/testing/selftests/kvm/x86_64/kvm_clock_test.c +++ b/tools/testing/selftests/kvm/x86_64/kvm_clock_test.c @@ -143,7 +143,7 @@ int main(void) flags = kvm_check_cap(KVM_CAP_ADJUST_CLOCK); TEST_REQUIRE(flags & KVM_CLOCK_REALTIME); - TEST_REQUIRE(sys_clocksource_is_tsc()); + TEST_REQUIRE(sys_clocksource_is_based_on_tsc()); vm = vm_create_with_one_vcpu(&vcpu, guest_main); diff --git a/tools/testing/selftests/kvm/x86_64/vmx_nested_tsc_scaling_test.c b/tools/testing/selftests/kvm/x86_64/vmx_nested_tsc_scaling_test.c index 93b0a850a240..1759fa5cb3f2 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_nested_tsc_scaling_test.c +++ b/tools/testing/selftests/kvm/x86_64/vmx_nested_tsc_scaling_test.c @@ -131,7 +131,7 @@ int main(int argc, char *argv[]) TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX)); TEST_REQUIRE(kvm_has_cap(KVM_CAP_TSC_CONTROL)); - TEST_REQUIRE(sys_clocksource_is_tsc()); + TEST_REQUIRE(sys_clocksource_is_based_on_tsc()); /* * We set L1's scale factor to be a random number from 2 to 10. From b6831a108be1206cd9f0e7905b48677b4147d5f9 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov <vkuznets@redhat.com> Date: Tue, 9 Jan 2024 15:11:20 +0100 Subject: [PATCH 045/304] KVM: selftests: Make hyperv_clock require TSC based system clocksource KVM sets up Hyper-V TSC page clocksource for its guests when system clocksource is 'based on TSC' (see gtod_is_based_on_tsc()), running hyperv_clock with any other clocksource leads to imminent failure. Add the missing requirement to make the test skip gracefully. Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com> Link: https://lore.kernel.org/r/20240109141121.1619463-5-vkuznets@redhat.com Signed-off-by: Sean Christopherson <seanjc@google.com> --- tools/testing/selftests/kvm/x86_64/hyperv_clock.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_clock.c b/tools/testing/selftests/kvm/x86_64/hyperv_clock.c index 65690d916db7..e058bc676cd6 100644 --- a/tools/testing/selftests/kvm/x86_64/hyperv_clock.c +++ b/tools/testing/selftests/kvm/x86_64/hyperv_clock.c @@ -212,6 +212,7 @@ int main(void) int stage; TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_TIME)); + TEST_REQUIRE(sys_clocksource_is_based_on_tsc()); vm = vm_create_with_one_vcpu(&vcpu, guest_main); From 9e62797fd7e8eef9c8a3f7b54b57fbc9caf6a20a Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov <vkuznets@redhat.com> Date: Tue, 9 Jan 2024 15:11:21 +0100 Subject: [PATCH 046/304] KVM: x86: Make gtod_is_based_on_tsc() return 'bool' gtod_is_based_on_tsc() is boolean in nature, i.e. it returns '1' for good clocksources and '0' otherwise. Moreover, its result is used raw by kvm_get_time_and_clockread()/kvm_get_walltime_and_clockread() which are 'bool'. No functional change intended. Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com> Link: https://lore.kernel.org/r/20240109141121.1619463-6-vkuznets@redhat.com Signed-off-by: Sean Christopherson <seanjc@google.com> --- arch/x86/kvm/x86.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 363b1c080205..aa27aec10860 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2507,7 +2507,7 @@ static u64 compute_guest_tsc(struct kvm_vcpu *vcpu, s64 kernel_ns) } #ifdef CONFIG_X86_64 -static inline int gtod_is_based_on_tsc(int mode) +static inline bool gtod_is_based_on_tsc(int mode) { return mode == VDSO_CLOCKMODE_TSC || mode == VDSO_CLOCKMODE_HVCLOCK; } From 03facb39d6c6433a78d0f79c7a146b1e6a61943e Mon Sep 17 00:00:00 2001 From: Rob Clark <robdclark@chromium.org> Date: Wed, 31 Jan 2024 07:08:54 -0800 Subject: [PATCH 047/304] drm/msm/gem: Fix double resv lock aquire MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since commit 79e2cf2e7a19 ("drm/gem: Take reservation lock for vmap/vunmap operations"), the resv lock is already held in the prime vmap path, so don't try to grab it again. v2: This applies to vunmap path as well v3: Fix fixes commit Fixes: 79e2cf2e7a19 ("drm/gem: Take reservation lock for vmap/vunmap operations") Signed-off-by: Rob Clark <robdclark@chromium.org> Acked-by: Christian König <christian.koenig@amd.com> Patchwork: https://patchwork.freedesktop.org/patch/576642/ --- drivers/gpu/drm/msm/msm_gem_prime.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/msm/msm_gem_prime.c b/drivers/gpu/drm/msm/msm_gem_prime.c index 5f68e31a3e4e..0915f3b68752 100644 --- a/drivers/gpu/drm/msm/msm_gem_prime.c +++ b/drivers/gpu/drm/msm/msm_gem_prime.c @@ -26,7 +26,7 @@ int msm_gem_prime_vmap(struct drm_gem_object *obj, struct iosys_map *map) { void *vaddr; - vaddr = msm_gem_get_vaddr(obj); + vaddr = msm_gem_get_vaddr_locked(obj); if (IS_ERR(vaddr)) return PTR_ERR(vaddr); iosys_map_set_vaddr(map, vaddr); @@ -36,7 +36,7 @@ int msm_gem_prime_vmap(struct drm_gem_object *obj, struct iosys_map *map) void msm_gem_prime_vunmap(struct drm_gem_object *obj, struct iosys_map *map) { - msm_gem_put_vaddr(obj); + msm_gem_put_vaddr_locked(obj); } struct drm_gem_object *msm_gem_prime_import_sg_table(struct drm_device *dev, From 6a0dbcd20ef252ebf98af94186a2e53da7167bed Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov <dmitry.baryshkov@linaro.org> Date: Tue, 9 Jan 2024 22:41:08 +0200 Subject: [PATCH 048/304] drm/msm/a6xx: set highest_bank_bit to 13 for a610 During the testing of Gnome on Qualcomm Robotics platform screen corruption has been observed. Lowering GPU's highest_bank_bit from 14 to 13 seems to fix the screen corruption. Note, the MDSS and DPU drivers use HBB=1 (which maps to the highest_bank_bit = 14). So this change merely works around the UBWC swizzling issue on this platform until the real cause is found. Fixes: e7fc9398e608 ("drm/msm/a6xx: Add A610 support") Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org> Patchwork: https://patchwork.freedesktop.org/patch/573838/ Signed-off-by: Rob Clark <robdclark@chromium.org> --- drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c index c0bc924cd302..c9c55e2ea584 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c @@ -1287,7 +1287,7 @@ static void a6xx_calc_ubwc_config(struct adreno_gpu *gpu) gpu->ubwc_config.highest_bank_bit = 15; if (adreno_is_a610(gpu)) { - gpu->ubwc_config.highest_bank_bit = 14; + gpu->ubwc_config.highest_bank_bit = 13; gpu->ubwc_config.min_acc_len = 1; gpu->ubwc_config.ubwc_mode = 1; } From 917e9b7c2350e3e53162fcf5035e5f2d68e2cbed Mon Sep 17 00:00:00 2001 From: Rob Clark <robdclark@chromium.org> Date: Tue, 9 Jan 2024 10:22:17 -0800 Subject: [PATCH 049/304] Revert "drm/msm/gpu: Push gpu lock down past runpm" This reverts commit abe2023b4cea192ab266b351fd38dc9dbd846df0. Changing the locking order means that scheduler/msm_job_run() can race with the recovery kthread worker, with the result that the GPU gets an extra runpm get when we are trying to power it off. Leaving the GPU in an unrecovered state. I'll need to come up with a different scheme for appeasing lockdep. Signed-off-by: Rob Clark <robdclark@chromium.org> Patchwork: https://patchwork.freedesktop.org/patch/573835/ --- drivers/gpu/drm/msm/msm_gpu.c | 11 +++++------ drivers/gpu/drm/msm/msm_ringbuffer.c | 7 +++++-- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c index 095390774f22..655002b21b0d 100644 --- a/drivers/gpu/drm/msm/msm_gpu.c +++ b/drivers/gpu/drm/msm/msm_gpu.c @@ -751,12 +751,14 @@ void msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) struct msm_ringbuffer *ring = submit->ring; unsigned long flags; + WARN_ON(!mutex_is_locked(&gpu->lock)); + pm_runtime_get_sync(&gpu->pdev->dev); - mutex_lock(&gpu->lock); - msm_gpu_hw_init(gpu); + submit->seqno = submit->hw_fence->seqno; + update_sw_cntrs(gpu); /* @@ -781,11 +783,8 @@ void msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) gpu->funcs->submit(gpu, submit); gpu->cur_ctx_seqno = submit->queue->ctx->seqno; - hangcheck_timer_reset(gpu); - - mutex_unlock(&gpu->lock); - pm_runtime_put(&gpu->pdev->dev); + hangcheck_timer_reset(gpu); } /* diff --git a/drivers/gpu/drm/msm/msm_ringbuffer.c b/drivers/gpu/drm/msm/msm_ringbuffer.c index 4bc13f7d005a..9d6655f96f0c 100644 --- a/drivers/gpu/drm/msm/msm_ringbuffer.c +++ b/drivers/gpu/drm/msm/msm_ringbuffer.c @@ -21,8 +21,6 @@ static struct dma_fence *msm_job_run(struct drm_sched_job *job) msm_fence_init(submit->hw_fence, fctx); - submit->seqno = submit->hw_fence->seqno; - mutex_lock(&priv->lru.lock); for (i = 0; i < submit->nr_bos; i++) { @@ -35,8 +33,13 @@ static struct dma_fence *msm_job_run(struct drm_sched_job *job) mutex_unlock(&priv->lru.lock); + /* TODO move submit path over to using a per-ring lock.. */ + mutex_lock(&gpu->lock); + msm_gpu_submit(gpu, submit); + mutex_unlock(&gpu->lock); + return dma_fence_get(submit->hw_fence); } From 7fddac12c38237252431d5b8af7b6d5771b6d125 Mon Sep 17 00:00:00 2001 From: Saravana Kannan <saravanak@google.com> Date: Fri, 2 Feb 2024 01:56:33 -0800 Subject: [PATCH 050/304] driver core: Fix device_link_flag_is_sync_state_only() device_link_flag_is_sync_state_only() correctly returns true on the flags of an existing device link that only implements sync_state() functionality. However, it incorrectly and confusingly returns false if it's called with DL_FLAG_SYNC_STATE_ONLY. This bug doesn't manifest in any of the existing calls to this function, but fix this confusing behavior to avoid future bugs. Fixes: 67cad5c67019 ("driver core: fw_devlink: Add DL_FLAG_CYCLE support to device links") Signed-off-by: Saravana Kannan <saravanak@google.com> Tested-by: Xu Yang <xu.yang_2@nxp.com> Link: https://lore.kernel.org/r/20240202095636.868578-2-saravanak@google.com Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> --- drivers/base/core.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/base/core.c b/drivers/base/core.c index 14d46af40f9a..52215c4c7209 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -284,10 +284,12 @@ static bool device_is_ancestor(struct device *dev, struct device *target) return false; } +#define DL_MARKER_FLAGS (DL_FLAG_INFERRED | \ + DL_FLAG_CYCLE | \ + DL_FLAG_MANAGED) static inline bool device_link_flag_is_sync_state_only(u32 flags) { - return (flags & ~(DL_FLAG_INFERRED | DL_FLAG_CYCLE)) == - (DL_FLAG_SYNC_STATE_ONLY | DL_FLAG_MANAGED); + return (flags & ~DL_MARKER_FLAGS) == DL_FLAG_SYNC_STATE_ONLY; } /** From 6442d79d880cf7a2fff18779265d657fef0cce4c Mon Sep 17 00:00:00 2001 From: Saravana Kannan <saravanak@google.com> Date: Fri, 2 Feb 2024 01:56:34 -0800 Subject: [PATCH 051/304] driver core: fw_devlink: Improve detection of overlapping cycles fw_devlink can detect most overlapping/intersecting cycles. However it was missing a few corner cases because of an incorrect optimization logic that tries to avoid repeating cycle detection for devices that are already marked as part of a cycle. Here's an example provided by Xu Yang (edited for clarity): usb +-----+ tcpc | | +-----+ | +--| | |----------->|EP| |--+ | | +--| |EP|<-----------| | |--+ | | B | | | +-----+ | A | | +-----+ | ^ +-----+ | | | | | +-----| C |<--+ | | +-----+ usb-phy Node A (tcpc) will be populated as device 1-0050. Node B (usb) will be populated as device 38100000.usb. Node C (usb-phy) will be populated as device 381f0040.usb-phy. The description below uses the notation: consumer --> supplier child ==> parent 1. Node C is populated as device C. No cycles detected because cycle detection is only run when a fwnode link is converted to a device link. 2. Node B is populated as device B. As we convert B --> C into a device link we run cycle detection and find and mark the device link/fwnode link cycle: C--> A --> B.EP ==> B --> C 3. Node A is populated as device A. As we convert C --> A into a device link, we see it's already part of a cycle (from step 2) and don't run cycle detection. Thus we miss detecting the cycle: A --> B.EP ==> B --> A.EP ==> A Looking at it another way, A depends on B in one way: A --> B.EP ==> B But B depends on A in two ways and we only detect the first: B --> C --> A B --> A.EP ==> A To detect both of these, we remove the incorrect optimization attempt in step 3 and run cycle detection even if the fwnode link from which the device link is being created has already been marked as part of a cycle. Reported-by: Xu Yang <xu.yang_2@nxp.com> Closes: https://lore.kernel.org/lkml/DU2PR04MB8822693748725F85DC0CB86C8C792@DU2PR04MB8822.eurprd04.prod.outlook.com/ Fixes: 3fb16866b51d ("driver core: fw_devlink: Make cycle detection more robust") Signed-off-by: Saravana Kannan <saravanak@google.com> Tested-by: Xu Yang <xu.yang_2@nxp.com> Link: https://lore.kernel.org/r/20240202095636.868578-3-saravanak@google.com Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> --- drivers/base/core.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/base/core.c b/drivers/base/core.c index 52215c4c7209..e3d666461835 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -2060,9 +2060,14 @@ static int fw_devlink_create_devlink(struct device *con, /* * SYNC_STATE_ONLY device links don't block probing and supports cycles. - * So cycle detection isn't necessary and shouldn't be done. + * So, one might expect that cycle detection isn't necessary for them. + * However, if the device link was marked as SYNC_STATE_ONLY because + * it's part of a cycle, then we still need to do cycle detection. This + * is because the consumer and supplier might be part of multiple cycles + * and we need to detect all those cycles. */ - if (!(flags & DL_FLAG_SYNC_STATE_ONLY)) { + if (!device_link_flag_is_sync_state_only(flags) || + flags & DL_FLAG_CYCLE) { device_links_write_lock(); if (__fw_devlink_relax_cycles(con, sup_handle)) { __fwnode_link_cycle(link); From 6e7ad1aebb4fc9fed0217dd50ef6e58a53f17d81 Mon Sep 17 00:00:00 2001 From: Saravana Kannan <saravanak@google.com> Date: Fri, 2 Feb 2024 01:56:35 -0800 Subject: [PATCH 052/304] driver core: fw_devlink: Improve logs for cycle detection The links in a cycle are not all logged in a consistent manner or not logged at all. Make them consistent by adding a "cycle:" string and log all the link in the cycles (even the child ==> parent dependency) so that it's easier to debug cycle detection code. Also, mark the start and end of a cycle so it's easy to tell when multiple cycles are logged back to back. Signed-off-by: Saravana Kannan <saravanak@google.com> Tested-by: Xu Yang <xu.yang_2@nxp.com> Link: https://lore.kernel.org/r/20240202095636.868578-4-saravanak@google.com Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> --- drivers/base/core.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/base/core.c b/drivers/base/core.c index e3d666461835..9828da9b933c 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -125,7 +125,7 @@ static void __fwnode_link_del(struct fwnode_link *link) */ static void __fwnode_link_cycle(struct fwnode_link *link) { - pr_debug("%pfwf: Relaxing link with %pfwf\n", + pr_debug("%pfwf: cycle: depends on %pfwf\n", link->consumer, link->supplier); link->flags |= FWLINK_FLAG_CYCLE; } @@ -1945,6 +1945,7 @@ static bool __fw_devlink_relax_cycles(struct device *con, /* Termination condition. */ if (sup_dev == con) { + pr_debug("----- cycle: start -----\n"); ret = true; goto out; } @@ -1976,8 +1977,11 @@ static bool __fw_devlink_relax_cycles(struct device *con, else par_dev = fwnode_get_next_parent_dev(sup_handle); - if (par_dev && __fw_devlink_relax_cycles(con, par_dev->fwnode)) + if (par_dev && __fw_devlink_relax_cycles(con, par_dev->fwnode)) { + pr_debug("%pfwf: cycle: child of %pfwf\n", sup_handle, + par_dev->fwnode); ret = true; + } if (!sup_dev) goto out; @@ -1993,6 +1997,8 @@ static bool __fw_devlink_relax_cycles(struct device *con, if (__fw_devlink_relax_cycles(con, dev_link->supplier->fwnode)) { + pr_debug("%pfwf: cycle: depends on %pfwf\n", sup_handle, + dev_link->supplier->fwnode); fw_devlink_relax_link(dev_link); dev_link->flags |= DL_FLAG_CYCLE; ret = true; @@ -2072,6 +2078,7 @@ static int fw_devlink_create_devlink(struct device *con, if (__fw_devlink_relax_cycles(con, sup_handle)) { __fwnode_link_cycle(link); flags = fw_devlink_get_flags(link->flags); + pr_debug("----- cycle: end -----\n"); dev_info(con, "Fixed dependency cycle(s) with %pfwf\n", sup_handle); } From 05519c86d6997cfb9bb6c82ce1595d1015b718dc Mon Sep 17 00:00:00 2001 From: Mingwei Zhang <mizhang@google.com> Date: Tue, 23 Jan 2024 22:12:20 +0000 Subject: [PATCH 053/304] KVM: x86/pmu: Fix type length error when reading pmu->fixed_ctr_ctrl Use a u64 instead of a u8 when taking a snapshot of pmu->fixed_ctr_ctrl when reprogramming fixed counters, as truncating the value results in KVM thinking fixed counter 2 is already disabled (the bug also affects fixed counters 3+, but KVM doesn't yet support those). As a result, if the guest disables fixed counter 2, KVM will get a false negative and fail to reprogram/disable emulation of the counter, which can leads to incorrect counts and spurious PMIs in the guest. Fixes: 76d287b2342e ("KVM: x86/pmu: Drop "u8 ctrl, int idx" for reprogram_fixed_counter()") Cc: stable@vger.kernel.org Signed-off-by: Mingwei Zhang <mizhang@google.com> Link: https://lore.kernel.org/r/20240123221220.3911317-1-mizhang@google.com [sean: rewrite changelog to call out the effects of the bug] Signed-off-by: Sean Christopherson <seanjc@google.com> --- arch/x86/kvm/vmx/pmu_intel.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c index a6216c874729..315c7c2ba89b 100644 --- a/arch/x86/kvm/vmx/pmu_intel.c +++ b/arch/x86/kvm/vmx/pmu_intel.c @@ -71,7 +71,7 @@ static int fixed_pmc_events[] = { static void reprogram_fixed_counters(struct kvm_pmu *pmu, u64 data) { struct kvm_pmc *pmc; - u8 old_fixed_ctr_ctrl = pmu->fixed_ctr_ctrl; + u64 old_fixed_ctr_ctrl = pmu->fixed_ctr_ctrl; int i; pmu->fixed_ctr_ctrl = data; From bd97cea7b18a0a553773af806dfbfac27a7c4acb Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn <mike.marciniszyn@intel.com> Date: Wed, 31 Jan 2024 17:38:46 -0600 Subject: [PATCH 054/304] RDMA/irdma: Fix KASAN issue with tasklet KASAN testing revealed the following issue assocated with freeing an IRQ. [50006.466686] Call Trace: [50006.466691] <IRQ> [50006.489538] dump_stack+0x5c/0x80 [50006.493475] print_address_description.constprop.6+0x1a/0x150 [50006.499872] ? irdma_sc_process_ceq+0x483/0x790 [irdma] [50006.505742] ? irdma_sc_process_ceq+0x483/0x790 [irdma] [50006.511644] kasan_report.cold.11+0x7f/0x118 [50006.516572] ? irdma_sc_process_ceq+0x483/0x790 [irdma] [50006.522473] irdma_sc_process_ceq+0x483/0x790 [irdma] [50006.528232] irdma_process_ceq+0xb2/0x400 [irdma] [50006.533601] ? irdma_hw_flush_wqes_callback+0x370/0x370 [irdma] [50006.540298] irdma_ceq_dpc+0x44/0x100 [irdma] [50006.545306] tasklet_action_common.isra.14+0x148/0x2c0 [50006.551096] __do_softirq+0x1d0/0xaf8 [50006.555396] irq_exit_rcu+0x219/0x260 [50006.559670] irq_exit+0xa/0x20 [50006.563320] smp_apic_timer_interrupt+0x1bf/0x690 [50006.568645] apic_timer_interrupt+0xf/0x20 [50006.573341] </IRQ> The issue is that a tasklet could be pending on another core racing the delete of the irq. Fix by insuring any scheduled tasklet is killed after deleting the irq. Fixes: 44d9e52977a1 ("RDMA/irdma: Implement device initialization definitions") Signed-off-by: Mike Marciniszyn <mike.marciniszyn@intel.com> Signed-off-by: Shiraz Saleem <shiraz.saleem@intel.com> Signed-off-by: Sindhu Devale <sindhu.devale@intel.com> Link: https://lore.kernel.org/r/20240131233849.400285-2-sindhu.devale@intel.com Signed-off-by: Leon Romanovsky <leon@kernel.org> --- drivers/infiniband/hw/irdma/hw.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/infiniband/hw/irdma/hw.c b/drivers/infiniband/hw/irdma/hw.c index bd4b2b896444..2f8d18d8be3b 100644 --- a/drivers/infiniband/hw/irdma/hw.c +++ b/drivers/infiniband/hw/irdma/hw.c @@ -570,6 +570,13 @@ static void irdma_destroy_irq(struct irdma_pci_f *rf, dev->irq_ops->irdma_dis_irq(dev, msix_vec->idx); irq_update_affinity_hint(msix_vec->irq, NULL); free_irq(msix_vec->irq, dev_id); + if (rf == dev_id) { + tasklet_kill(&rf->dpc_tasklet); + } else { + struct irdma_ceq *iwceq = (struct irdma_ceq *)dev_id; + + tasklet_kill(&iwceq->dpc_tasklet); + } } /** From ee107186bcfd25d7873258f3f75440e20f5e6416 Mon Sep 17 00:00:00 2001 From: Shiraz Saleem <shiraz.saleem@intel.com> Date: Wed, 31 Jan 2024 17:38:47 -0600 Subject: [PATCH 055/304] RDMA/irdma: Validate max_send_wr and max_recv_wr Validate that max_send_wr and max_recv_wr is within the supported range. Fixes: b48c24c2d710 ("RDMA/irdma: Implement device supported verb APIs") Change-Id: I2fc8b10292b641fddd20b36986a9dae90a93f4be Signed-off-by: Shiraz Saleem <shiraz.saleem@intel.com> Signed-off-by: Sindhu Devale <sindhu.devale@intel.com> Link: https://lore.kernel.org/r/20240131233849.400285-3-sindhu.devale@intel.com Signed-off-by: Leon Romanovsky <leon@kernel.org> --- drivers/infiniband/hw/irdma/verbs.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c index b5eb8d421988..cb828e3da478 100644 --- a/drivers/infiniband/hw/irdma/verbs.c +++ b/drivers/infiniband/hw/irdma/verbs.c @@ -839,7 +839,9 @@ static int irdma_validate_qp_attrs(struct ib_qp_init_attr *init_attr, if (init_attr->cap.max_inline_data > uk_attrs->max_hw_inline || init_attr->cap.max_send_sge > uk_attrs->max_hw_wq_frags || - init_attr->cap.max_recv_sge > uk_attrs->max_hw_wq_frags) + init_attr->cap.max_recv_sge > uk_attrs->max_hw_wq_frags || + init_attr->cap.max_send_wr > uk_attrs->max_hw_wq_quanta || + init_attr->cap.max_recv_wr > uk_attrs->max_hw_rq_quanta) return -EINVAL; if (rdma_protocol_roce(&iwdev->ibdev, 1)) { From 666047f3ece9f991774c1fe9b223139a9ef8908d Mon Sep 17 00:00:00 2001 From: Mustafa Ismail <mustafa.ismail@intel.com> Date: Wed, 31 Jan 2024 17:38:48 -0600 Subject: [PATCH 056/304] RDMA/irdma: Set the CQ read threshold for GEN 1 The CQ shadow read threshold is currently not set for GEN 2. This could cause an invalid CQ overflow condition, so remove the GEN check that exclused GEN 1. Fixes: b48c24c2d710 ("RDMA/irdma: Implement device supported verb APIs") Signed-off-by: Mustafa Ismail <mustafa.ismail@intel.com> Signed-off-by: Shiraz Saleem <shiraz.saleem@intel.com> Signed-off-by: Sindhu Devale <sindhu.devale@intel.com> Link: https://lore.kernel.org/r/20240131233849.400285-4-sindhu.devale@intel.com Signed-off-by: Leon Romanovsky <leon@kernel.org> --- drivers/infiniband/hw/irdma/verbs.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c index cb828e3da478..0b046c061742 100644 --- a/drivers/infiniband/hw/irdma/verbs.c +++ b/drivers/infiniband/hw/irdma/verbs.c @@ -2186,9 +2186,8 @@ static int irdma_create_cq(struct ib_cq *ibcq, info.cq_base_pa = iwcq->kmem.pa; } - if (dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_2) - info.shadow_read_threshold = min(info.cq_uk_init_info.cq_size / 2, - (u32)IRDMA_MAX_CQ_READ_THRESH); + info.shadow_read_threshold = min(info.cq_uk_init_info.cq_size / 2, + (u32)IRDMA_MAX_CQ_READ_THRESH); if (irdma_sc_cq_init(cq, &info)) { ibdev_dbg(&iwdev->ibdev, "VERBS: init cq fail\n"); From 630bdb6f28ca9e5ff79e244030170ac788478332 Mon Sep 17 00:00:00 2001 From: Mustafa Ismail <mustafa.ismail@intel.com> Date: Wed, 31 Jan 2024 17:38:49 -0600 Subject: [PATCH 057/304] RDMA/irdma: Add AE for too many RNRS Add IRDMA_AE_LLP_TOO_MANY_RNRS to the list of AE's processed as an abnormal asyncronous event. Fixes: b48c24c2d710 ("RDMA/irdma: Implement device supported verb APIs") Signed-off-by: Mustafa Ismail <mustafa.ismail@intel.com> Signed-off-by: Shiraz Saleem <shiraz.saleem@intel.com> Signed-off-by: Sindhu Devale <sindhu.devale@gmail.com> Link: https://lore.kernel.org/r/20240131233849.400285-5-sindhu.devale@intel.com Signed-off-by: Leon Romanovsky <leon@kernel.org> --- drivers/infiniband/hw/irdma/defs.h | 1 + drivers/infiniband/hw/irdma/hw.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/infiniband/hw/irdma/defs.h b/drivers/infiniband/hw/irdma/defs.h index 8fb752f2eda2..2cb4b96db721 100644 --- a/drivers/infiniband/hw/irdma/defs.h +++ b/drivers/infiniband/hw/irdma/defs.h @@ -346,6 +346,7 @@ enum irdma_cqp_op_type { #define IRDMA_AE_LLP_TOO_MANY_KEEPALIVE_RETRIES 0x050b #define IRDMA_AE_LLP_DOUBT_REACHABILITY 0x050c #define IRDMA_AE_LLP_CONNECTION_ESTABLISHED 0x050e +#define IRDMA_AE_LLP_TOO_MANY_RNRS 0x050f #define IRDMA_AE_RESOURCE_EXHAUSTION 0x0520 #define IRDMA_AE_RESET_SENT 0x0601 #define IRDMA_AE_TERMINATE_SENT 0x0602 diff --git a/drivers/infiniband/hw/irdma/hw.c b/drivers/infiniband/hw/irdma/hw.c index 2f8d18d8be3b..ad50b77282f8 100644 --- a/drivers/infiniband/hw/irdma/hw.c +++ b/drivers/infiniband/hw/irdma/hw.c @@ -387,6 +387,7 @@ static void irdma_process_aeq(struct irdma_pci_f *rf) case IRDMA_AE_LLP_TOO_MANY_RETRIES: case IRDMA_AE_LCE_QP_CATASTROPHIC: case IRDMA_AE_LCE_FUNCTION_CATASTROPHIC: + case IRDMA_AE_LLP_TOO_MANY_RNRS: case IRDMA_AE_LCE_CQ_CATASTROPHIC: case IRDMA_AE_UDA_XMIT_DGRAM_TOO_LONG: default: From e6f57c6881916df39db7d95981a8ad2b9c3458d6 Mon Sep 17 00:00:00 2001 From: Daniel Vacek <neelx@redhat.com> Date: Thu, 1 Feb 2024 09:10:08 +0100 Subject: [PATCH 058/304] IB/hfi1: Fix sdma.h tx->num_descs off-by-one error Unfortunately the commit `fd8958efe877` introduced another error causing the `descs` array to overflow. This reults in further crashes easily reproducible by `sendmsg` system call. [ 1080.836473] general protection fault, probably for non-canonical address 0x400300015528b00a: 0000 [#1] PREEMPT SMP PTI [ 1080.869326] RIP: 0010:hfi1_ipoib_build_ib_tx_headers.constprop.0+0xe1/0x2b0 [hfi1] -- [ 1080.974535] Call Trace: [ 1080.976990] <TASK> [ 1081.021929] hfi1_ipoib_send_dma_common+0x7a/0x2e0 [hfi1] [ 1081.027364] hfi1_ipoib_send_dma_list+0x62/0x270 [hfi1] [ 1081.032633] hfi1_ipoib_send+0x112/0x300 [hfi1] [ 1081.042001] ipoib_start_xmit+0x2a9/0x2d0 [ib_ipoib] [ 1081.046978] dev_hard_start_xmit+0xc4/0x210 -- [ 1081.148347] __sys_sendmsg+0x59/0xa0 crash> ipoib_txreq 0xffff9cfeba229f00 struct ipoib_txreq { txreq = { list = { next = 0xffff9cfeba229f00, prev = 0xffff9cfeba229f00 }, descp = 0xffff9cfeba229f40, coalesce_buf = 0x0, wait = 0xffff9cfea4e69a48, complete = 0xffffffffc0fe0760 <hfi1_ipoib_sdma_complete>, packet_len = 0x46d, tlen = 0x0, num_desc = 0x0, desc_limit = 0x6, next_descq_idx = 0x45c, coalesce_idx = 0x0, flags = 0x0, descs = {{ qw = {0x8024000120dffb00, 0x4} # SDMA_DESC0_FIRST_DESC_FLAG (bit 63) }, { qw = { 0x3800014231b108, 0x4} }, { qw = { 0x310000e4ee0fcf0, 0x8} }, { qw = { 0x3000012e9f8000, 0x8} }, { qw = { 0x59000dfb9d0000, 0x8} }, { qw = { 0x78000e02e40000, 0x8} }} }, sdma_hdr = 0x400300015528b000, <<< invalid pointer in the tx request structure sdma_status = 0x0, SDMA_DESC0_LAST_DESC_FLAG (bit 62) complete = 0x0, priv = 0x0, txq = 0xffff9cfea4e69880, skb = 0xffff9d099809f400 } If an SDMA send consists of exactly 6 descriptors and requires dword padding (in the 7th descriptor), the sdma_txreq descriptor array is not properly expanded and the packet will overflow into the container structure. This results in a panic when the send completion runs. The exact panic varies depending on what elements of the container structure get corrupted. The fix is to use the correct expression in _pad_sdma_tx_descs() to test the need to expand the descriptor array. With this patch the crashes are no longer reproducible and the machine is stable. Fixes: fd8958efe877 ("IB/hfi1: Fix sdma.h tx->num_descs off-by-one errors") Cc: stable@vger.kernel.org Reported-by: Mats Kronberg <kronberg@nsc.liu.se> Tested-by: Mats Kronberg <kronberg@nsc.liu.se> Signed-off-by: Daniel Vacek <neelx@redhat.com> Link: https://lore.kernel.org/r/20240201081009.1109442-1-neelx@redhat.com Signed-off-by: Leon Romanovsky <leon@kernel.org> --- drivers/infiniband/hw/hfi1/sdma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c index 6e5ac2023328..b67d23b1f286 100644 --- a/drivers/infiniband/hw/hfi1/sdma.c +++ b/drivers/infiniband/hw/hfi1/sdma.c @@ -3158,7 +3158,7 @@ int _pad_sdma_tx_descs(struct hfi1_devdata *dd, struct sdma_txreq *tx) { int rval = 0; - if ((unlikely(tx->num_desc + 1 == tx->desc_limit))) { + if ((unlikely(tx->num_desc == tx->desc_limit))) { rval = _extend_sdma_tx_descs(dd, tx); if (rval) { __sdma_txclean(dd, tx); From 42dfa94d802a48c871e2017cbf86153270c86632 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada <masahiroy@kernel.org> Date: Sun, 4 Feb 2024 16:43:05 +0900 Subject: [PATCH 059/304] KVM: arm64: Do not source virt/lib/Kconfig twice For ARCH=arm64, virt/lib/Kconfig is sourced twice, from arch/arm64/kvm/Kconfig and from drivers/vfio/Kconfig. There is no good reason to parse virt/lib/Kconfig twice. Commit 2412405b3141 ("KVM: arm/arm64: register irq bypass consumer on ARM/ARM64") should not have added this 'source' directive. Signed-off-by: Masahiro Yamada <masahiroy@kernel.org> Signed-off-by: Marc Zyngier <maz@kernel.org> Link: https://lore.kernel.org/r/20240204074305.31492-1-masahiroy@kernel.org --- arch/arm64/kvm/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig index 6c3c8ca73e7f..27ca89b628a0 100644 --- a/arch/arm64/kvm/Kconfig +++ b/arch/arm64/kvm/Kconfig @@ -3,7 +3,6 @@ # KVM configuration # -source "virt/lib/Kconfig" source "virt/kvm/Kconfig" menuconfig VIRTUALIZATION From 621c6257128149e45b36ffb973a01c3f3461b893 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com> Date: Sun, 4 Feb 2024 04:56:17 -0800 Subject: [PATCH 060/304] iio: hid-sensor-als: Return 0 for HID_USAGE_SENSOR_TIME_TIMESTAMP When als_capture_sample() is called with usage ID HID_USAGE_SENSOR_TIME_TIMESTAMP, return 0. The HID sensor core ignores the return value for capture_sample() callback, so return value doesn't make difference. But correct the return value to return success instead of -EINVAL. Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com> Link: https://lore.kernel.org/r/20240204125617.2635574-1-srinivas.pandruvada@linux.intel.com Cc: <Stable@vger.kernel.org> Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> --- drivers/iio/light/hid-sensor-als.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/iio/light/hid-sensor-als.c b/drivers/iio/light/hid-sensor-als.c index 5cd27f04b45e..b6c4bef2a7bb 100644 --- a/drivers/iio/light/hid-sensor-als.c +++ b/drivers/iio/light/hid-sensor-als.c @@ -226,6 +226,7 @@ static int als_capture_sample(struct hid_sensor_hub_device *hsdev, case HID_USAGE_SENSOR_TIME_TIMESTAMP: als_state->timestamp = hid_sensor_convert_timestamp(&als_state->common_attributes, *(s64 *)raw_data); + ret = 0; break; default: break; From 862cf85fef85becc55a173387527adb4f076fab0 Mon Sep 17 00:00:00 2001 From: Nuno Sa <nuno.sa@analog.com> Date: Wed, 31 Jan 2024 10:16:47 +0100 Subject: [PATCH 061/304] iio: commom: st_sensors: ensure proper DMA alignment Aligning the buffer to the L1 cache is not sufficient in some platforms as they might have larger cacheline sizes for caches after L1 and thus, we can't guarantee DMA safety. That was the whole reason to introduce IIO_DMA_MINALIGN in [1]. Do the same for st_sensors common buffer. While at it, moved the odr_lock before buffer_data as we definitely don't want any other data to share a cacheline with the buffer. [1]: https://lore.kernel.org/linux-iio/20220508175712.647246-2-jic23@kernel.org/ Fixes: e031d5f558f1 ("iio:st_sensors: remove buffer allocation at each buffer enable") Signed-off-by: Nuno Sa <nuno.sa@analog.com> Cc: <Stable@vger.kernel.org> Link: https://lore.kernel.org/r/20240131-dev_dma_safety_stm-v2-1-580c07fae51b@analog.com Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> --- include/linux/iio/common/st_sensors.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/iio/common/st_sensors.h b/include/linux/iio/common/st_sensors.h index 607c3a89a647..f9ae5cdd884f 100644 --- a/include/linux/iio/common/st_sensors.h +++ b/include/linux/iio/common/st_sensors.h @@ -258,9 +258,9 @@ struct st_sensor_data { bool hw_irq_trigger; s64 hw_timestamp; - char buffer_data[ST_SENSORS_MAX_BUFFER_SIZE] ____cacheline_aligned; - struct mutex odr_lock; + + char buffer_data[ST_SENSORS_MAX_BUFFER_SIZE] __aligned(IIO_DMA_MINALIGN); }; #ifdef CONFIG_IIO_BUFFER From 4cb81840d8f29b66d9d05c6d7f360c9560f7e2f4 Mon Sep 17 00:00:00 2001 From: Mario Limonciello <mario.limonciello@amd.com> Date: Wed, 31 Jan 2024 16:52:46 -0600 Subject: [PATCH 062/304] iio: accel: bma400: Fix a compilation problem The kernel fails when compiling without `CONFIG_REGMAP_I2C` but with `CONFIG_BMA400`. ``` ld: drivers/iio/accel/bma400_i2c.o: in function `bma400_i2c_probe': bma400_i2c.c:(.text+0x23): undefined reference to `__devm_regmap_init_i2c' ``` Link: https://download.01.org/0day-ci/archive/20240131/202401311634.FE5CBVwe-lkp@intel.com/config Fixes: 465c811f1f20 ("iio: accel: Add driver for the BMA400") Fixes: 9bea10642396 ("iio: accel: bma400: add support for bma400 spi") Signed-off-by: Mario Limonciello <mario.limonciello@amd.com> Link: https://lore.kernel.org/r/20240131225246.14169-1-mario.limonciello@amd.com Cc: <Stable@vger.kernel.org> Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> --- drivers/iio/accel/Kconfig | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/iio/accel/Kconfig b/drivers/iio/accel/Kconfig index 91adcac875a4..c9d7afe489e8 100644 --- a/drivers/iio/accel/Kconfig +++ b/drivers/iio/accel/Kconfig @@ -219,10 +219,12 @@ config BMA400 config BMA400_I2C tristate + select REGMAP_I2C depends on BMA400 config BMA400_SPI tristate + select REGMAP_SPI depends on BMA400 config BMC150_ACCEL From a40f93e9286968863c661f2b9e314d97adc2f84e Mon Sep 17 00:00:00 2001 From: Mike Tipton <quic_mdtipton@quicinc.com> Date: Thu, 1 Feb 2024 17:48:05 -0800 Subject: [PATCH 063/304] interconnect: qcom: sm8650: Use correct ACV enable_mask The ACV enable_mask is historically BIT(3), but it's BIT(0) on this target. Fix it. Fixes: c062bcab5924 ("interconnect: qcom: introduce RPMh Network-On-Chip Interconnect on SM8650 SoC") Signed-off-by: Mike Tipton <quic_mdtipton@quicinc.com> Reviewed-by: Konrad Dybcio <konrad.dybcio@linaro.org> Reviewed-by: Neil Armstrong <neil.armstrong@linaro.org> Link: https://lore.kernel.org/r/20240202014806.7876-2-quic_mdtipton@quicinc.com Signed-off-by: Georgi Djakov <djakov@kernel.org> --- drivers/interconnect/qcom/sm8650.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/interconnect/qcom/sm8650.c b/drivers/interconnect/qcom/sm8650.c index b83de54577b6..b962e6c233ef 100644 --- a/drivers/interconnect/qcom/sm8650.c +++ b/drivers/interconnect/qcom/sm8650.c @@ -1160,7 +1160,7 @@ static struct qcom_icc_node qns_gemnoc_sf = { static struct qcom_icc_bcm bcm_acv = { .name = "ACV", - .enable_mask = BIT(3), + .enable_mask = BIT(0), .num_nodes = 1, .nodes = { &ebi }, }; From 5464e7acea4a6c56b3c5c2d7aeef2eda92227b33 Mon Sep 17 00:00:00 2001 From: Mike Tipton <quic_mdtipton@quicinc.com> Date: Thu, 1 Feb 2024 17:48:06 -0800 Subject: [PATCH 064/304] interconnect: qcom: x1e80100: Add missing ACV enable_mask The ACV BCM is voted using bitmasks. Add the proper mask for this target. Fixes: 9f196772841e ("interconnect: qcom: Add X1E80100 interconnect provider driver") Signed-off-by: Mike Tipton <quic_mdtipton@quicinc.com> Reviewed-by: Konrad Dybcio <konrad.dybcio@linaro.org> Tested-by: Abel Vesa <abel.vesa@linaro.org> Link: https://lore.kernel.org/r/20240202014806.7876-3-quic_mdtipton@quicinc.com Signed-off-by: Georgi Djakov <djakov@kernel.org> --- drivers/interconnect/qcom/x1e80100.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/interconnect/qcom/x1e80100.c b/drivers/interconnect/qcom/x1e80100.c index d19501d913b3..cbaf4f9c41be 100644 --- a/drivers/interconnect/qcom/x1e80100.c +++ b/drivers/interconnect/qcom/x1e80100.c @@ -1586,6 +1586,7 @@ static struct qcom_icc_node qns_pcie_south_gem_noc_pcie = { static struct qcom_icc_bcm bcm_acv = { .name = "ACV", + .enable_mask = BIT(3), .num_nodes = 1, .nodes = { &ebi }, }; From 8ded03ae48b3657e0fbca99d8a9d8fa1bfb8c9be Mon Sep 17 00:00:00 2001 From: Nathan Lynch <nathanl@linux.ibm.com> Date: Fri, 2 Feb 2024 18:26:46 -0600 Subject: [PATCH 065/304] powerpc/pseries/papr-sysparm: use u8 arrays for payloads Some PAPR system parameter values are formatted by firmware as nul-terminated strings (e.g. LPAR name, shared processor attributes). But the values returned for other parameters, such as processor module info and TLB block invalidate characteristics, are binary data with parameter-specific layouts. So char[] isn't the appropriate type for the general case. Use u8/__u8. Signed-off-by: Nathan Lynch <nathanl@linux.ibm.com> Fixes: 905b9e48786e ("powerpc/pseries/papr-sysparm: Expose character device to user space") Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://msgid.link/20240202-papr-sysparm-ioblock-data-use-u8-v1-1-f5c6c89f65ec@linux.ibm.com --- arch/powerpc/include/asm/papr-sysparm.h | 2 +- arch/powerpc/include/uapi/asm/papr-sysparm.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/include/asm/papr-sysparm.h b/arch/powerpc/include/asm/papr-sysparm.h index 0dbbff59101d..c3cd5b131033 100644 --- a/arch/powerpc/include/asm/papr-sysparm.h +++ b/arch/powerpc/include/asm/papr-sysparm.h @@ -32,7 +32,7 @@ typedef struct { */ struct papr_sysparm_buf { __be16 len; - char val[PAPR_SYSPARM_MAX_OUTPUT]; + u8 val[PAPR_SYSPARM_MAX_OUTPUT]; }; struct papr_sysparm_buf *papr_sysparm_buf_alloc(void); diff --git a/arch/powerpc/include/uapi/asm/papr-sysparm.h b/arch/powerpc/include/uapi/asm/papr-sysparm.h index 9f9a0f267ea5..f733467b1534 100644 --- a/arch/powerpc/include/uapi/asm/papr-sysparm.h +++ b/arch/powerpc/include/uapi/asm/papr-sysparm.h @@ -14,7 +14,7 @@ enum { struct papr_sysparm_io_block { __u32 parameter; __u16 length; - char data[PAPR_SYSPARM_MAX_OUTPUT]; + __u8 data[PAPR_SYSPARM_MAX_OUTPUT]; }; /** From 4639c5021029d49fd2f97fa8d74731f167f98919 Mon Sep 17 00:00:00 2001 From: bo liu <bo.liu@senarytech.com> Date: Mon, 5 Feb 2024 09:38:02 +0800 Subject: [PATCH 066/304] ALSA: hda/conexant: Add quirk for SWS JS201D The SWS JS201D need a different pinconfig from windows driver. Add a quirk to use a specific pinconfig to SWS JS201D. Signed-off-by: bo liu <bo.liu@senarytech.com> Cc: <stable@vger.kernel.org> Link: https://lore.kernel.org/r/20240205013802.51907-1-bo.liu@senarytech.com Signed-off-by: Takashi Iwai <tiwai@suse.de> --- sound/pci/hda/patch_conexant.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c index e8819e8a9876..e8209178d87b 100644 --- a/sound/pci/hda/patch_conexant.c +++ b/sound/pci/hda/patch_conexant.c @@ -344,6 +344,7 @@ enum { CXT_FIXUP_HP_ZBOOK_MUTE_LED, CXT_FIXUP_HEADSET_MIC, CXT_FIXUP_HP_MIC_NO_PRESENCE, + CXT_PINCFG_SWS_JS201D, }; /* for hda_fixup_thinkpad_acpi() */ @@ -841,6 +842,17 @@ static const struct hda_pintbl cxt_pincfg_lemote[] = { {} }; +/* SuoWoSi/South-holding JS201D with sn6140 */ +static const struct hda_pintbl cxt_pincfg_sws_js201d[] = { + { 0x16, 0x03211040 }, /* hp out */ + { 0x17, 0x91170110 }, /* SPK/Class_D */ + { 0x18, 0x95a70130 }, /* Internal mic */ + { 0x19, 0x03a11020 }, /* Headset Mic */ + { 0x1a, 0x40f001f0 }, /* Not used */ + { 0x21, 0x40f001f0 }, /* Not used */ + {} +}; + static const struct hda_fixup cxt_fixups[] = { [CXT_PINCFG_LENOVO_X200] = { .type = HDA_FIXUP_PINS, @@ -996,6 +1008,10 @@ static const struct hda_fixup cxt_fixups[] = { .chained = true, .chain_id = CXT_FIXUP_HEADSET_MIC, }, + [CXT_PINCFG_SWS_JS201D] = { + .type = HDA_FIXUP_PINS, + .v.pins = cxt_pincfg_sws_js201d, + }, }; static const struct snd_pci_quirk cxt5045_fixups[] = { @@ -1069,6 +1085,7 @@ static const struct snd_pci_quirk cxt5066_fixups[] = { SND_PCI_QUIRK(0x103c, 0x8457, "HP Z2 G4 mini", CXT_FIXUP_HP_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x103c, 0x8458, "HP Z2 G4 mini premium", CXT_FIXUP_HP_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1043, 0x138d, "Asus", CXT_FIXUP_HEADPHONE_MIC_PIN), + SND_PCI_QUIRK(0x14f1, 0x0265, "SWS JS201D", CXT_PINCFG_SWS_JS201D), SND_PCI_QUIRK(0x152d, 0x0833, "OLPC XO-1.5", CXT_FIXUP_OLPC_XO), SND_PCI_QUIRK(0x17aa, 0x20f2, "Lenovo T400", CXT_PINCFG_LENOVO_TP410), SND_PCI_QUIRK(0x17aa, 0x215e, "Lenovo T410", CXT_PINCFG_LENOVO_TP410), @@ -1109,6 +1126,7 @@ static const struct hda_model_fixup cxt5066_fixup_models[] = { { .id = CXT_FIXUP_HP_ZBOOK_MUTE_LED, .name = "hp-zbook-mute-led" }, { .id = CXT_FIXUP_HP_MIC_NO_PRESENCE, .name = "hp-mic-fix" }, { .id = CXT_PINCFG_LENOVO_NOTEBOOK, .name = "lenovo-20149" }, + { .id = CXT_PINCFG_SWS_JS201D, .name = "sws-js201d" }, {} }; From fddab35fd064414c677e9488c4fb3a1f67725d37 Mon Sep 17 00:00:00 2001 From: Shuming Fan <shumingf@realtek.com> Date: Mon, 5 Feb 2024 15:22:52 +0800 Subject: [PATCH 067/304] ALSA: hda/realtek: add IDs for Dell dual spk platform This patch adds another two IDs for the Dell dual speaker platform. Signed-off-by: Shuming Fan <shumingf@realtek.com> Cc: <stable@vger.kernel.org> Link: https://lore.kernel.org/r/20240205072252.3791500-1-shumingf@realtek.com Signed-off-by: Takashi Iwai <tiwai@suse.de> --- sound/pci/hda/patch_realtek.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 6994c4c5073c..e045d3e76a45 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -9737,7 +9737,9 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1028, 0x0b71, "Dell Inspiron 16 Plus 7620", ALC295_FIXUP_DELL_INSPIRON_TOP_SPEAKERS), SND_PCI_QUIRK(0x1028, 0x0beb, "Dell XPS 15 9530 (2023)", ALC289_FIXUP_DELL_CS35L41_SPI_2), SND_PCI_QUIRK(0x1028, 0x0c03, "Dell Precision 5340", ALC269_FIXUP_DELL4_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1028, 0x0c0b, "Dell Oasis 14 RPL-P", ALC289_FIXUP_RTK_AMP_DUAL_SPK), SND_PCI_QUIRK(0x1028, 0x0c0d, "Dell Oasis", ALC289_FIXUP_RTK_AMP_DUAL_SPK), + SND_PCI_QUIRK(0x1028, 0x0c0e, "Dell Oasis 16", ALC289_FIXUP_RTK_AMP_DUAL_SPK), SND_PCI_QUIRK(0x1028, 0x0c19, "Dell Precision 3340", ALC236_FIXUP_DELL_DUAL_CODECS), SND_PCI_QUIRK(0x1028, 0x0c1a, "Dell Precision 3340", ALC236_FIXUP_DELL_DUAL_CODECS), SND_PCI_QUIRK(0x1028, 0x0c1b, "Dell Precision 3440", ALC236_FIXUP_DELL_DUAL_CODECS), From fdfa083549de5d50ebf7f6811f33757781e838c0 Mon Sep 17 00:00:00 2001 From: Bart Van Assche <bvanassche@acm.org> Date: Sun, 4 Feb 2024 16:42:07 -0800 Subject: [PATCH 068/304] RDMA/srpt: Support specifying the srpt_service_guid parameter Make loading ib_srpt with this parameter set work. The current behavior is that setting that parameter while loading the ib_srpt kernel module triggers the following kernel crash: BUG: kernel NULL pointer dereference, address: 0000000000000000 Call Trace: <TASK> parse_one+0x18c/0x1d0 parse_args+0xe1/0x230 load_module+0x8de/0xa60 init_module_from_file+0x8b/0xd0 idempotent_init_module+0x181/0x240 __x64_sys_finit_module+0x5a/0xb0 do_syscall_64+0x5f/0xe0 entry_SYSCALL_64_after_hwframe+0x6e/0x76 Cc: LiHonggang <honggangli@163.com> Reported-by: LiHonggang <honggangli@163.com> Fixes: a42d985bd5b2 ("ib_srpt: Initial SRP Target merge for v3.3-rc1") Signed-off-by: Bart Van Assche <bvanassche@acm.org> Link: https://lore.kernel.org/r/20240205004207.17031-1-bvanassche@acm.org Signed-off-by: Leon Romanovsky <leon@kernel.org> --- drivers/infiniband/ulp/srpt/ib_srpt.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index 58f70cfec45a..d2dce6ce30a9 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -79,12 +79,16 @@ module_param(srpt_srq_size, int, 0444); MODULE_PARM_DESC(srpt_srq_size, "Shared receive queue (SRQ) size."); +static int srpt_set_u64_x(const char *buffer, const struct kernel_param *kp) +{ + return kstrtou64(buffer, 16, (u64 *)kp->arg); +} static int srpt_get_u64_x(char *buffer, const struct kernel_param *kp) { return sprintf(buffer, "0x%016llx\n", *(u64 *)kp->arg); } -module_param_call(srpt_service_guid, NULL, srpt_get_u64_x, &srpt_service_guid, - 0444); +module_param_call(srpt_service_guid, srpt_set_u64_x, srpt_get_u64_x, + &srpt_service_guid, 0444); MODULE_PARM_DESC(srpt_service_guid, "Using this value for ioc_guid, id_ext, and cm_listen_id instead of using the node_guid of the first HCA."); From ed8b94f6e0acd652ce69bd69d678a0c769172df8 Mon Sep 17 00:00:00 2001 From: Gaurav Batra <gbatra@linux.ibm.com> Date: Mon, 22 Jan 2024 16:24:07 -0600 Subject: [PATCH 069/304] powerpc/pseries/iommu: Fix iommu initialisation during DLPAR add When a PCI device is dynamically added, the kernel oopses with a NULL pointer dereference: BUG: Kernel NULL pointer dereference on read at 0x00000030 Faulting instruction address: 0xc0000000006bbe5c Oops: Kernel access of bad area, sig: 11 [#1] LE PAGE_SIZE=64K MMU=Radix SMP NR_CPUS=2048 NUMA pSeries Modules linked in: rpadlpar_io rpaphp rpcsec_gss_krb5 auth_rpcgss nfsv4 dns_resolver nfs lockd grace fscache netfs xsk_diag bonding nft_compat nf_tables nfnetlink rfkill binfmt_misc dm_multipath rpcrdma sunrpc rdma_ucm ib_srpt ib_isert iscsi_target_mod target_core_mod ib_umad ib_iser libiscsi scsi_transport_iscsi ib_ipoib rdma_cm iw_cm ib_cm mlx5_ib ib_uverbs ib_core pseries_rng drm drm_panel_orientation_quirks xfs libcrc32c mlx5_core mlxfw sd_mod t10_pi sg tls ibmvscsi ibmveth scsi_transport_srp vmx_crypto pseries_wdt psample dm_mirror dm_region_hash dm_log dm_mod fuse CPU: 17 PID: 2685 Comm: drmgr Not tainted 6.7.0-203405+ #66 Hardware name: IBM,9080-HEX POWER10 (raw) 0x800200 0xf000006 of:IBM,FW1060.00 (NH1060_008) hv:phyp pSeries NIP: c0000000006bbe5c LR: c000000000a13e68 CTR: c0000000000579f8 REGS: c00000009924f240 TRAP: 0300 Not tainted (6.7.0-203405+) MSR: 8000000000009033 <SF,EE,ME,IR,DR,RI,LE> CR: 24002220 XER: 20040006 CFAR: c000000000a13e64 DAR: 0000000000000030 DSISR: 40000000 IRQMASK: 0 ... NIP sysfs_add_link_to_group+0x34/0x94 LR iommu_device_link+0x5c/0x118 Call Trace: iommu_init_device+0x26c/0x318 (unreliable) iommu_device_link+0x5c/0x118 iommu_init_device+0xa8/0x318 iommu_probe_device+0xc0/0x134 iommu_bus_notifier+0x44/0x104 notifier_call_chain+0xb8/0x19c blocking_notifier_call_chain+0x64/0x98 bus_notify+0x50/0x7c device_add+0x640/0x918 pci_device_add+0x23c/0x298 of_create_pci_dev+0x400/0x884 of_scan_pci_dev+0x124/0x1b0 __of_scan_bus+0x78/0x18c pcibios_scan_phb+0x2a4/0x3b0 init_phb_dynamic+0xb8/0x110 dlpar_add_slot+0x170/0x3b8 [rpadlpar_io] add_slot_store.part.0+0xb4/0x130 [rpadlpar_io] kobj_attr_store+0x2c/0x48 sysfs_kf_write+0x64/0x78 kernfs_fop_write_iter+0x1b0/0x290 vfs_write+0x350/0x4a0 ksys_write+0x84/0x140 system_call_exception+0x124/0x330 system_call_vectored_common+0x15c/0x2ec Commit a940904443e4 ("powerpc/iommu: Add iommu_ops to report capabilities and allow blocking domains") broke DLPAR add of PCI devices. The above added iommu_device structure to pci_controller. During system boot, PCI devices are discovered and this newly added iommu_device structure is initialized by a call to iommu_device_register(). During DLPAR add of a PCI device, a new pci_controller structure is allocated but there are no calls made to iommu_device_register() interface. Fix is to register the iommu device during DLPAR add as well. Fixes: a940904443e4 ("powerpc/iommu: Add iommu_ops to report capabilities and allow blocking domains") Signed-off-by: Gaurav Batra <gbatra@linux.ibm.com> [mpe: Trim oops and tweak some change log wording] Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://msgid.link/20240122222407.39603-1-gbatra@linux.ibm.com --- arch/powerpc/include/asm/ppc-pci.h | 3 +++ arch/powerpc/kernel/iommu.c | 21 ++++++++++++++++----- arch/powerpc/platforms/pseries/pci_dlpar.c | 4 ++++ 3 files changed, 23 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/include/asm/ppc-pci.h b/arch/powerpc/include/asm/ppc-pci.h index ce2b1b5eebdd..c3a3f3df36d1 100644 --- a/arch/powerpc/include/asm/ppc-pci.h +++ b/arch/powerpc/include/asm/ppc-pci.h @@ -29,6 +29,9 @@ void *pci_traverse_device_nodes(struct device_node *start, void *(*fn)(struct device_node *, void *), void *data); extern void pci_devs_phb_init_dynamic(struct pci_controller *phb); +extern void ppc_iommu_register_device(struct pci_controller *phb); +extern void ppc_iommu_unregister_device(struct pci_controller *phb); + /* From rtas_pci.h */ extern void init_pci_config_tokens (void); diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index ebe259bdd462..c6f62e130d55 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -1388,6 +1388,21 @@ static const struct attribute_group *spapr_tce_iommu_groups[] = { NULL, }; +void ppc_iommu_register_device(struct pci_controller *phb) +{ + iommu_device_sysfs_add(&phb->iommu, phb->parent, + spapr_tce_iommu_groups, "iommu-phb%04x", + phb->global_number); + iommu_device_register(&phb->iommu, &spapr_tce_iommu_ops, + phb->parent); +} + +void ppc_iommu_unregister_device(struct pci_controller *phb) +{ + iommu_device_unregister(&phb->iommu); + iommu_device_sysfs_remove(&phb->iommu); +} + /* * This registers IOMMU devices of PHBs. This needs to happen * after core_initcall(iommu_init) + postcore_initcall(pci_driver_init) and @@ -1398,11 +1413,7 @@ static int __init spapr_tce_setup_phb_iommus_initcall(void) struct pci_controller *hose; list_for_each_entry(hose, &hose_list, list_node) { - iommu_device_sysfs_add(&hose->iommu, hose->parent, - spapr_tce_iommu_groups, "iommu-phb%04x", - hose->global_number); - iommu_device_register(&hose->iommu, &spapr_tce_iommu_ops, - hose->parent); + ppc_iommu_register_device(hose); } return 0; } diff --git a/arch/powerpc/platforms/pseries/pci_dlpar.c b/arch/powerpc/platforms/pseries/pci_dlpar.c index 4ba824568119..4448386268d9 100644 --- a/arch/powerpc/platforms/pseries/pci_dlpar.c +++ b/arch/powerpc/platforms/pseries/pci_dlpar.c @@ -35,6 +35,8 @@ struct pci_controller *init_phb_dynamic(struct device_node *dn) pseries_msi_allocate_domains(phb); + ppc_iommu_register_device(phb); + /* Create EEH devices for the PHB */ eeh_phb_pe_create(phb); @@ -76,6 +78,8 @@ int remove_phb_dynamic(struct pci_controller *phb) } } + ppc_iommu_unregister_device(phb); + pseries_msi_free_domains(phb); /* Keep a reference so phb isn't freed yet */ From aad98efd0b121f63a2e1c221dcb4d4850128c697 Mon Sep 17 00:00:00 2001 From: Naveen N Rao <naveen@kernel.org> Date: Fri, 2 Feb 2024 21:13:16 +0530 Subject: [PATCH 070/304] powerpc/64: Set task pt_regs->link to the LR value on scv entry Nysal reported that userspace backtraces are missing in offcputime bcc tool. As an example: $ sudo ./bcc/tools/offcputime.py -uU Tracing off-CPU time (us) of user threads by user stack... Hit Ctrl-C to end. ^C write - python (9107) 8 write - sudo (9105) 9 mmap - python (9107) 16 clock_nanosleep - multipathd (697) 3001604 The offcputime bcc tool attaches a bpf program to a kprobe on finish_task_switch(), which is usually hit on a syscall from userspace. With the switch to system call vectored, we started setting pt_regs->link to zero. This is because system call vectored behaves like a function call with LR pointing to the system call return address, and with no modification to SRR0/SRR1. The LR value does indicate our next instruction, so it is being saved as pt_regs->nip, and pt_regs->link is being set to zero. This is not a problem by itself, but BPF uses perf callchain infrastructure for capturing stack traces, and that stores LR as the second entry in the stack trace. perf has code to cope with the second entry being zero, and skips over it. However, generic userspace unwinders assume that a zero entry indicates end of the stack trace, resulting in a truncated userspace stack trace. Rather than fixing all userspace unwinders to ignore/skip past the second entry, store the real LR value in pt_regs->link so that there continues to be a valid, though duplicate entry in the stack trace. With this change: $ sudo ./bcc/tools/offcputime.py -uU Tracing off-CPU time (us) of user threads by user stack... Hit Ctrl-C to end. ^C write write [unknown] [unknown] [unknown] [unknown] [unknown] PyObject_VectorcallMethod [unknown] [unknown] PyObject_CallOneArg PyFile_WriteObject PyFile_WriteString [unknown] [unknown] PyObject_Vectorcall _PyEval_EvalFrameDefault PyEval_EvalCode [unknown] [unknown] [unknown] _PyRun_SimpleFileObject _PyRun_AnyFileObject Py_RunMain [unknown] Py_BytesMain [unknown] __libc_start_main - python (1293) 7 write write [unknown] sudo_ev_loop_v1 sudo_ev_dispatch_v1 [unknown] [unknown] [unknown] [unknown] __libc_start_main - sudo (1291) 7 syscall syscall bpf_open_perf_buffer_opts [unknown] [unknown] [unknown] [unknown] _PyObject_MakeTpCall PyObject_Vectorcall _PyEval_EvalFrameDefault PyEval_EvalCode [unknown] [unknown] [unknown] _PyRun_SimpleFileObject _PyRun_AnyFileObject Py_RunMain [unknown] Py_BytesMain [unknown] __libc_start_main - python (1293) 11 clock_nanosleep clock_nanosleep nanosleep sleep [unknown] [unknown] __clone - multipathd (698) 3001661 Fixes: 7fa95f9adaee ("powerpc/64s: system call support for scv/rfscv instructions") Cc: stable@vger.kernel.org Reported-by: "Nysal Jan K.A" <nysal@linux.ibm.com> Signed-off-by: Naveen N Rao <naveen@kernel.org> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://msgid.link/20240202154316.395276-1-naveen@kernel.org --- arch/powerpc/kernel/interrupt_64.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/interrupt_64.S b/arch/powerpc/kernel/interrupt_64.S index bd863702d812..1ad059a9e2fe 100644 --- a/arch/powerpc/kernel/interrupt_64.S +++ b/arch/powerpc/kernel/interrupt_64.S @@ -52,7 +52,8 @@ _ASM_NOKPROBE_SYMBOL(system_call_vectored_\name) mr r10,r1 ld r1,PACAKSAVE(r13) std r10,0(r1) - std r11,_NIP(r1) + std r11,_LINK(r1) + std r11,_NIP(r1) /* Saved LR is also the next instruction */ std r12,_MSR(r1) std r0,GPR0(r1) std r10,GPR1(r1) @@ -70,7 +71,6 @@ _ASM_NOKPROBE_SYMBOL(system_call_vectored_\name) std r9,GPR13(r1) SAVE_NVGPRS(r1) std r11,_XER(r1) - std r11,_LINK(r1) std r11,_CTR(r1) li r11,\trapnr From f09696279b5dd1770a3de2e062f1c5d1449213ff Mon Sep 17 00:00:00 2001 From: R Nageswara Sastry <rnsastry@linux.ibm.com> Date: Wed, 31 Jan 2024 18:38:59 +0530 Subject: [PATCH 071/304] selftests/powerpc/papr_vpd: Check devfd before get_system_loc_code() Calling get_system_loc_code before checking devfd and errno fails the test when the device is not available, the expected behaviour is a SKIP. Change the order of 'SKIP_IF_MSG' to correctly SKIP when the /dev/ papr-vpd device is not available. Test output before: Test FAILED on line 271 Test output after: [SKIP] Test skipped on line 266: /dev/papr-vpd not present Signed-off-by: R Nageswara Sastry <rnsastry@linux.ibm.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://msgid.link/20240131130859.14968-1-rnsastry@linux.ibm.com --- tools/testing/selftests/powerpc/papr_vpd/papr_vpd.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/powerpc/papr_vpd/papr_vpd.c b/tools/testing/selftests/powerpc/papr_vpd/papr_vpd.c index 98cbb9109ee6..505294da1b9f 100644 --- a/tools/testing/selftests/powerpc/papr_vpd/papr_vpd.c +++ b/tools/testing/selftests/powerpc/papr_vpd/papr_vpd.c @@ -263,10 +263,10 @@ static int papr_vpd_system_loc_code(void) off_t size; int fd; - SKIP_IF_MSG(get_system_loc_code(&lc), - "Cannot determine system location code"); SKIP_IF_MSG(devfd < 0 && errno == ENOENT, DEVPATH " not present"); + SKIP_IF_MSG(get_system_loc_code(&lc), + "Cannot determine system location code"); FAIL_IF(devfd < 0); From a038a3ff8c6582404834852c043dadc73a5b68b4 Mon Sep 17 00:00:00 2001 From: Matthias Schiffer <matthias.schiffer@ew.tq-group.com> Date: Wed, 24 Jan 2024 11:38:38 +0100 Subject: [PATCH 072/304] powerpc/6xx: set High BAT Enable flag on G2_LE cores MMU_FTR_USE_HIGH_BATS is set for G2_LE cores and derivatives like e300cX, but the high BATs need to be enabled in HID2 to work. Add register definitions and add the needed setup to __setup_cpu_603. This fixes boot on CPUs like the MPC5200B with STRICT_KERNEL_RWX enabled on systems where the flag has not been set by the bootloader already. Fixes: e4d6654ebe6e ("powerpc/mm/32s: rework mmu_mapin_ram()") Signed-off-by: Matthias Schiffer <matthias.schiffer@ew.tq-group.com> Reviewed-by: Christophe Leroy <christophe.leroy@csgroup.eu> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://msgid.link/20240124103838.43675-1-matthias.schiffer@ew.tq-group.com --- arch/powerpc/include/asm/reg.h | 2 ++ arch/powerpc/kernel/cpu_setup_6xx.S | 20 +++++++++++++++++++- 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index 7fd09f25452d..bb47af9054a9 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -617,6 +617,8 @@ #endif #define SPRN_HID2 0x3F8 /* Hardware Implementation Register 2 */ #define SPRN_HID2_GEKKO 0x398 /* Gekko HID2 Register */ +#define SPRN_HID2_G2_LE 0x3F3 /* G2_LE HID2 Register */ +#define HID2_G2_LE_HBE (1<<18) /* High BAT Enable (G2_LE) */ #define SPRN_IABR 0x3F2 /* Instruction Address Breakpoint Register */ #define SPRN_IABR2 0x3FA /* 83xx */ #define SPRN_IBCR 0x135 /* 83xx Insn Breakpoint Control Reg */ diff --git a/arch/powerpc/kernel/cpu_setup_6xx.S b/arch/powerpc/kernel/cpu_setup_6xx.S index f29ce3dd6140..bfd3f442e5eb 100644 --- a/arch/powerpc/kernel/cpu_setup_6xx.S +++ b/arch/powerpc/kernel/cpu_setup_6xx.S @@ -26,6 +26,15 @@ BEGIN_FTR_SECTION bl __init_fpu_registers END_FTR_SECTION_IFCLR(CPU_FTR_FPU_UNAVAILABLE) bl setup_common_caches + + /* + * This assumes that all cores using __setup_cpu_603 with + * MMU_FTR_USE_HIGH_BATS are G2_LE compatible + */ +BEGIN_MMU_FTR_SECTION + bl setup_g2_le_hid2 +END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS) + mtlr r5 blr _GLOBAL(__setup_cpu_604) @@ -115,6 +124,16 @@ SYM_FUNC_START_LOCAL(setup_604_hid0) blr SYM_FUNC_END(setup_604_hid0) +/* Enable high BATs for G2_LE and derivatives like e300cX */ +SYM_FUNC_START_LOCAL(setup_g2_le_hid2) + mfspr r11,SPRN_HID2_G2_LE + oris r11,r11,HID2_G2_LE_HBE@h + mtspr SPRN_HID2_G2_LE,r11 + sync + isync + blr +SYM_FUNC_END(setup_g2_le_hid2) + /* 7400 <= rev 2.7 and 7410 rev = 1.0 suffer from some * erratas we work around here. * Moto MPC710CE.pdf describes them, those are errata @@ -495,4 +514,3 @@ _GLOBAL(__restore_cpu_setup) mtcr r7 blr _ASM_NOKPROBE_SYMBOL(__restore_cpu_setup) - From 4a7aee96200ad281a5cc4cf5c7a2e2a49d2b97b0 Mon Sep 17 00:00:00 2001 From: Jiangfeng Xiao <xiaojiangfeng@huawei.com> Date: Tue, 23 Jan 2024 09:45:59 +0800 Subject: [PATCH 073/304] powerpc/kasan: Fix addr error caused by page alignment In kasan_init_region, when k_start is not page aligned, at the begin of for loop, k_cur = k_start & PAGE_MASK is less than k_start, and then `va = block + k_cur - k_start` is less than block, the addr va is invalid, because the memory address space from va to block is not alloced by memblock_alloc, which will not be reserved by memblock_reserve later, it will be used by other places. As a result, memory overwriting occurs. for example: int __init __weak kasan_init_region(void *start, size_t size) { [...] /* if say block(dcd97000) k_start(feef7400) k_end(feeff3fe) */ block = memblock_alloc(k_end - k_start, PAGE_SIZE); [...] for (k_cur = k_start & PAGE_MASK; k_cur < k_end; k_cur += PAGE_SIZE) { /* at the begin of for loop * block(dcd97000) va(dcd96c00) k_cur(feef7000) k_start(feef7400) * va(dcd96c00) is less than block(dcd97000), va is invalid */ void *va = block + k_cur - k_start; [...] } [...] } Therefore, page alignment is performed on k_start before memblock_alloc() to ensure the validity of the VA address. Fixes: 663c0c9496a6 ("powerpc/kasan: Fix shadow area set up for modules.") Signed-off-by: Jiangfeng Xiao <xiaojiangfeng@huawei.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://msgid.link/1705974359-43790-1-git-send-email-xiaojiangfeng@huawei.com --- arch/powerpc/mm/kasan/init_32.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/mm/kasan/init_32.c b/arch/powerpc/mm/kasan/init_32.c index a70828a6d935..aa9aa11927b2 100644 --- a/arch/powerpc/mm/kasan/init_32.c +++ b/arch/powerpc/mm/kasan/init_32.c @@ -64,6 +64,7 @@ int __init __weak kasan_init_region(void *start, size_t size) if (ret) return ret; + k_start = k_start & PAGE_MASK; block = memblock_alloc(k_end - k_start, PAGE_SIZE); if (!block) return -ENOMEM; From 4703b014f28bf7a2e56d1da238ee95ef6c5ce76b Mon Sep 17 00:00:00 2001 From: Dan Carpenter <dan.carpenter@linaro.org> Date: Mon, 5 Feb 2024 15:44:30 +0300 Subject: [PATCH 074/304] ASoC: cs35l56: fix reversed if statement in cs35l56_dspwait_asp1tx_put() It looks like the "!" character was added accidentally. The regmap_update_bits_check() function is normally going to succeed. This means the rest of the function is unreachable and we don't handle the situation where "changed" is true correctly. Fixes: 07f7d6e7a124 ("ASoC: cs35l56: Fix for initializing ASP1 mixer registers") Signed-off-by: Dan Carpenter <dan.carpenter@linaro.org> Reviewed-by: Richard Fitzgerald <rf@opensource.cirrus.com> Link: https://lore.kernel.org/r/0c254c07-d1c0-4a5c-a22b-7e135cab032c@moroto.mountain Signed-off-by: Mark Brown <broonie@kernel.org> --- sound/soc/codecs/cs35l56.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/cs35l56.c b/sound/soc/codecs/cs35l56.c index c23e29da4cfb..ebed5ab1245b 100644 --- a/sound/soc/codecs/cs35l56.c +++ b/sound/soc/codecs/cs35l56.c @@ -115,7 +115,7 @@ static int cs35l56_dspwait_asp1tx_put(struct snd_kcontrol *kcontrol, ret = regmap_update_bits_check(cs35l56->base.regmap, addr, CS35L56_ASP_TXn_SRC_MASK, val, &changed); - if (!ret) + if (ret) return ret; if (changed) From ac670505d825151ce47c1e75b9964485991954dd Mon Sep 17 00:00:00 2001 From: Jeffrey Hugo <quic_jhugo@quicinc.com> Date: Fri, 2 Feb 2024 10:43:13 -0700 Subject: [PATCH 075/304] ASoC: dt-bindings: google,sc7280-herobrine: Drop bouncing @codeaurora The servers for the @codeaurora domain have long been retired and any messages sent there bounce. Srinivasa Rao Mandadapu has left the company and there does not appear to be an updated address to suggest, so drop Srinivasa as maintainer of the binding. The binding still appears to be maintined as Judy is listed. Signed-off-by: Jeffrey Hugo <quic_jhugo@quicinc.com> Acked-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org> Link: https://lore.kernel.org/r/20240202174313.4113670-1-quic_jhugo@quicinc.com Signed-off-by: Mark Brown <broonie@kernel.org> --- .../devicetree/bindings/sound/google,sc7280-herobrine.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/Documentation/devicetree/bindings/sound/google,sc7280-herobrine.yaml b/Documentation/devicetree/bindings/sound/google,sc7280-herobrine.yaml index ec4b6e547ca6..cdcd7c6f21eb 100644 --- a/Documentation/devicetree/bindings/sound/google,sc7280-herobrine.yaml +++ b/Documentation/devicetree/bindings/sound/google,sc7280-herobrine.yaml @@ -7,7 +7,6 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Google SC7280-Herobrine ASoC sound card driver maintainers: - - Srinivasa Rao Mandadapu <srivasam@codeaurora.org> - Judy Hsiao <judyhsiao@chromium.org> description: From b5fbde22684af5456d1de60758950944d69d69ad Mon Sep 17 00:00:00 2001 From: Cezary Rojewski <cezary.rojewski@intel.com> Date: Fri, 2 Feb 2024 12:49:01 +0100 Subject: [PATCH 076/304] ASoC: Intel: avs: Fix pci_probe() error path Recent changes modified operation-order in the probe() function without updating its error path accordingly. If snd_hdac_i915_init() exists with status EPROBE_DEFER the error path must cleanup allocated IRQs before leaving the scope. Fixes: 2dddc514b6e4 ("ASoC: Intel: avs: Move snd_hdac_i915_init to before probe_work.") Signed-off-by: Cezary Rojewski <cezary.rojewski@intel.com> Link: https://lore.kernel.org/r/20240202114901.1002127-1-cezary.rojewski@intel.com Signed-off-by: Mark Brown <broonie@kernel.org> --- sound/soc/intel/avs/core.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sound/soc/intel/avs/core.c b/sound/soc/intel/avs/core.c index 59c3793f65df..db78eb2f0108 100644 --- a/sound/soc/intel/avs/core.c +++ b/sound/soc/intel/avs/core.c @@ -477,6 +477,9 @@ static int avs_pci_probe(struct pci_dev *pci, const struct pci_device_id *id) return 0; err_i915_init: + pci_free_irq(pci, 0, adev); + pci_free_irq(pci, 0, bus); + pci_free_irq_vectors(pci); pci_clear_master(pci); pci_set_drvdata(pci, NULL); err_acquire_irq: From 34a1066981a967eab619938e7b35a9be6b4c34e1 Mon Sep 17 00:00:00 2001 From: Gergo Koteles <soyer@irl.hu> Date: Sun, 4 Feb 2024 21:01:17 +0100 Subject: [PATCH 077/304] ASoC: tas2781: add module parameter to tascodec_init() The tascodec_init() of the snd-soc-tas2781-comlib module is called from snd-soc-tas2781-i2c and snd-hda-scodec-tas2781-i2c modules. It calls request_firmware_nowait() with parameter THIS_MODULE and a cont/callback from the latter modules. The latter modules can be removed while their callbacks are running, resulting in a general protection failure. Add module parameter to tascodec_init() so request_firmware_nowait() can be called with the module of the callback. Fixes: ef3bcde75d06 ("ASoC: tas2781: Add tas2781 driver") CC: stable@vger.kernel.org Signed-off-by: Gergo Koteles <soyer@irl.hu> Link: https://lore.kernel.org/r/118dad922cef50525e5aab09badef2fa0eb796e5.1707076603.git.soyer@irl.hu Signed-off-by: Mark Brown <broonie@kernel.org> --- include/sound/tas2781.h | 1 + sound/pci/hda/tas2781_hda_i2c.c | 2 +- sound/soc/codecs/tas2781-comlib.c | 3 ++- sound/soc/codecs/tas2781-i2c.c | 2 +- 4 files changed, 5 insertions(+), 3 deletions(-) diff --git a/include/sound/tas2781.h b/include/sound/tas2781.h index b00d65417c31..9aff384941de 100644 --- a/include/sound/tas2781.h +++ b/include/sound/tas2781.h @@ -142,6 +142,7 @@ struct tasdevice_priv { void tas2781_reset(struct tasdevice_priv *tas_dev); int tascodec_init(struct tasdevice_priv *tas_priv, void *codec, + struct module *module, void (*cont)(const struct firmware *fw, void *context)); struct tasdevice_priv *tasdevice_kzalloc(struct i2c_client *i2c); int tasdevice_init(struct tasdevice_priv *tas_priv); diff --git a/sound/pci/hda/tas2781_hda_i2c.c b/sound/pci/hda/tas2781_hda_i2c.c index 2dd809de62e5..1bfb00102a77 100644 --- a/sound/pci/hda/tas2781_hda_i2c.c +++ b/sound/pci/hda/tas2781_hda_i2c.c @@ -710,7 +710,7 @@ static int tas2781_hda_bind(struct device *dev, struct device *master, strscpy(comps->name, dev_name(dev), sizeof(comps->name)); - ret = tascodec_init(tas_hda->priv, codec, tasdev_fw_ready); + ret = tascodec_init(tas_hda->priv, codec, THIS_MODULE, tasdev_fw_ready); if (!ret) comps->playback_hook = tas2781_hda_playback_hook; diff --git a/sound/soc/codecs/tas2781-comlib.c b/sound/soc/codecs/tas2781-comlib.c index b7e56ceb1acf..5d0e5348b361 100644 --- a/sound/soc/codecs/tas2781-comlib.c +++ b/sound/soc/codecs/tas2781-comlib.c @@ -267,6 +267,7 @@ void tas2781_reset(struct tasdevice_priv *tas_dev) EXPORT_SYMBOL_GPL(tas2781_reset); int tascodec_init(struct tasdevice_priv *tas_priv, void *codec, + struct module *module, void (*cont)(const struct firmware *fw, void *context)) { int ret = 0; @@ -280,7 +281,7 @@ int tascodec_init(struct tasdevice_priv *tas_priv, void *codec, tas_priv->dev_name, tas_priv->ndev); crc8_populate_msb(tas_priv->crc8_lkp_tbl, TASDEVICE_CRC8_POLYNOMIAL); tas_priv->codec = codec; - ret = request_firmware_nowait(THIS_MODULE, FW_ACTION_UEVENT, + ret = request_firmware_nowait(module, FW_ACTION_UEVENT, tas_priv->rca_binaryname, tas_priv->dev, GFP_KERNEL, tas_priv, cont); if (ret) diff --git a/sound/soc/codecs/tas2781-i2c.c b/sound/soc/codecs/tas2781-i2c.c index 32913bd1a623..b5abff230e43 100644 --- a/sound/soc/codecs/tas2781-i2c.c +++ b/sound/soc/codecs/tas2781-i2c.c @@ -566,7 +566,7 @@ static int tasdevice_codec_probe(struct snd_soc_component *codec) { struct tasdevice_priv *tas_priv = snd_soc_component_get_drvdata(codec); - return tascodec_init(tas_priv, codec, tasdevice_fw_ready); + return tascodec_init(tas_priv, codec, THIS_MODULE, tasdevice_fw_ready); } static void tasdevice_deinit(void *context) From 3376ca3f1a2075eaa23c5576c47d04d7e8a4adda Mon Sep 17 00:00:00 2001 From: Mathias Krause <minipli@grsecurity.net> Date: Sat, 3 Feb 2024 13:45:20 +0100 Subject: [PATCH 078/304] KVM: x86: Fix KVM_GET_MSRS stack info leak Commit 6abe9c1386e5 ("KVM: X86: Move ignore_msrs handling upper the stack") changed the 'ignore_msrs' handling, including sanitizing return values to the caller. This was fine until commit 12bc2132b15e ("KVM: X86: Do the same ignore_msrs check for feature msrs") which allowed non-existing feature MSRs to be ignored, i.e. to not generate an error on the ioctl() level. It even tried to preserve the sanitization of the return value. However, the logic is flawed, as '*data' will be overwritten again with the uninitialized stack value of msr.data. Fix this by simplifying the logic and always initializing msr.data, vanishing the need for an additional error exit path. Fixes: 12bc2132b15e ("KVM: X86: Do the same ignore_msrs check for feature msrs") Signed-off-by: Mathias Krause <minipli@grsecurity.net> Reviewed-by: Xiaoyao Li <xiaoyao.li@intel.com> Link: https://lore.kernel.org/r/20240203124522.592778-2-minipli@grsecurity.net Signed-off-by: Sean Christopherson <seanjc@google.com> --- arch/x86/kvm/x86.c | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 25bc52cdf8f4..3f6e82fd37f3 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1704,22 +1704,17 @@ static int do_get_msr_feature(struct kvm_vcpu *vcpu, unsigned index, u64 *data) struct kvm_msr_entry msr; int r; + /* Unconditionally clear the output for simplicity */ + msr.data = 0; msr.index = index; r = kvm_get_msr_feature(&msr); - if (r == KVM_MSR_RET_INVALID) { - /* Unconditionally clear the output for simplicity */ - *data = 0; - if (kvm_msr_ignored_check(index, 0, false)) - r = 0; - } - - if (r) - return r; + if (r == KVM_MSR_RET_INVALID && kvm_msr_ignored_check(index, 0, false)) + r = 0; *data = msr.data; - return 0; + return r; } static bool __kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer) From aac8a59537dfc704ff344f1aacfd143c089ee20f Mon Sep 17 00:00:00 2001 From: Tejun Heo <tj@kernel.org> Date: Mon, 5 Feb 2024 15:43:41 -1000 Subject: [PATCH 079/304] Revert "workqueue: Override implicit ordered attribute in workqueue_apply_unbound_cpumask()" This reverts commit ca10d851b9ad0338c19e8e3089e24d565ebfffd7. The commit allowed workqueue_apply_unbound_cpumask() to clear __WQ_ORDERED on now removed implicitly ordered workqueues. This was incorrect in that system-wide config change shouldn't break ordering properties of all workqueues. The reason why apply_workqueue_attrs() path was allowed to do so was because it was targeting the specific workqueue - either the workqueue had WQ_SYSFS set or the workqueue user specifically tried to change max_active, both of which indicate that the workqueue doesn't need to be ordered. The implicitly ordered workqueue promotion was removed by the previous commit 3bc1e711c26b ("workqueue: Don't implicitly make UNBOUND workqueues w/ @max_active==1 ordered"). However, it didn't update this path and broke build. Let's revert the commit which was incorrect in the first place which also fixes build. Signed-off-by: Tejun Heo <tj@kernel.org> Fixes: 3bc1e711c26b ("workqueue: Don't implicitly make UNBOUND workqueues w/ @max_active==1 ordered") Fixes: ca10d851b9ad ("workqueue: Override implicit ordered attribute in workqueue_apply_unbound_cpumask()") Cc: stable@vger.kernel.org # v6.6+ Signed-off-by: Tejun Heo <tj@kernel.org> --- kernel/workqueue.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 76e60faed892..7b482a26d741 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -5786,13 +5786,9 @@ static int workqueue_apply_unbound_cpumask(const cpumask_var_t unbound_cpumask) list_for_each_entry(wq, &workqueues, list) { if (!(wq->flags & WQ_UNBOUND)) continue; - /* creating multiple pwqs breaks ordering guarantee */ - if (!list_empty(&wq->pwqs)) { - if (wq->flags & __WQ_ORDERED_EXPLICIT) - continue; - wq->flags &= ~__WQ_ORDERED; - } + if (wq->flags & __WQ_ORDERED) + continue; ctx = apply_wqattrs_prepare(wq, wq->unbound_attrs, unbound_cpumask); if (IS_ERR(ctx)) { From 610010737f74482a61896596a0116876ecf9e65c Mon Sep 17 00:00:00 2001 From: Mario Limonciello <mario.limonciello@amd.com> Date: Mon, 5 Feb 2024 15:48:53 -0600 Subject: [PATCH 080/304] ASoC: amd: yc: Add DMI quirk for Lenovo Ideapad Pro 5 16ARP8 The laptop requires a quirk ID to enable its internal microphone. Add it to the DMI quirk table. Reported-by: Stanislav Petrov <stanislav.i.petrov@gmail.com> Closes: https://bugzilla.kernel.org/show_bug.cgi?id=216925 Cc: stable@vger.kernel.org Signed-off-by: Mario Limonciello <mario.limonciello@amd.com> Link: https://lore.kernel.org/r/20240205214853.2689-1-mario.limonciello@amd.com Signed-off-by: Mark Brown <broonie@kernel.org> --- sound/soc/amd/yc/acp6x-mach.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sound/soc/amd/yc/acp6x-mach.c b/sound/soc/amd/yc/acp6x-mach.c index 23d44a50d815..80ad60d485ea 100644 --- a/sound/soc/amd/yc/acp6x-mach.c +++ b/sound/soc/amd/yc/acp6x-mach.c @@ -248,6 +248,13 @@ static const struct dmi_system_id yc_acp_quirk_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "82YM"), } }, + { + .driver_data = &acp6x_card, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "83AS"), + } + }, { .driver_data = &acp6x_card, .matches = { From 5c84bc8b617bf90e722cc57d447abd9a468d3a52 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann <arnd@arndb.de> Date: Tue, 23 Jan 2024 13:51:41 +0100 Subject: [PATCH 081/304] powerpc: udbg_memcons: mark functions static ppc64_book3e_allmodconfig has one more driver that triggeres a few missing-prototypes warnings: arch/powerpc/sysdev/udbg_memcons.c:44:6: error: no previous prototype for 'memcons_putc' [-Werror=missing-prototypes] arch/powerpc/sysdev/udbg_memcons.c:57:5: error: no previous prototype for 'memcons_getc_poll' [-Werror=missing-prototypes] arch/powerpc/sysdev/udbg_memcons.c:80:5: error: no previous prototype for 'memcons_getc' [-Werror=missing-prototypes] Mark all these function static as there are no other users. Signed-off-by: Arnd Bergmann <arnd@arndb.de> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://msgid.link/20240123125148.2004648-1-arnd@kernel.org --- arch/powerpc/sysdev/udbg_memcons.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/sysdev/udbg_memcons.c b/arch/powerpc/sysdev/udbg_memcons.c index 5020044400dc..4de57ba52236 100644 --- a/arch/powerpc/sysdev/udbg_memcons.c +++ b/arch/powerpc/sysdev/udbg_memcons.c @@ -41,7 +41,7 @@ struct memcons memcons = { .input_end = &memcons_input[CONFIG_PPC_MEMCONS_INPUT_SIZE], }; -void memcons_putc(char c) +static void memcons_putc(char c) { char *new_output_pos; @@ -54,7 +54,7 @@ void memcons_putc(char c) memcons.output_pos = new_output_pos; } -int memcons_getc_poll(void) +static int memcons_getc_poll(void) { char c; char *new_input_pos; @@ -77,7 +77,7 @@ int memcons_getc_poll(void) return -1; } -int memcons_getc(void) +static int memcons_getc(void) { int c; From 1c57b9f63ab34f01b8c73731cc0efacb5a9a2f16 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann <arnd@arndb.de> Date: Tue, 23 Jan 2024 13:51:42 +0100 Subject: [PATCH 082/304] powerpc: 85xx: mark local functions static These functions are either used in only one file and can just be made static or need an #include statement to avoid a warning: arch/powerpc/platforms/85xx/mpc8536_ds.c:30:13: error: no previous prototype for 'mpc8536_ds_pic_init' [-Werror=missing-prototypes] arch/powerpc/platforms/85xx/p1010rdb.c:27:13: error: no previous prototype for 'p1010_rdb_pic_init' [-Werror=missing-prototypes] arch/powerpc/platforms/85xx/p1022_ds.c:373:6: error: no previous prototype for 'p1022ds_set_pixel_clock' [-Werror=missing-prototypes] arch/powerpc/platforms/85xx/p1022_ds.c:422:1: error: no previous prototype for 'p1022ds_valid_monitor_port' [-Werror=missing-prototypes] arch/powerpc/platforms/85xx/p1022_ds.c:435:13: error: no previous prototype for 'p1022_ds_pic_init' [-Werror=missing-prototypes] arch/powerpc/platforms/85xx/p1022_rdk.c:43:6: error: no previous prototype for 'p1022rdk_set_pixel_clock' [-Werror=missing-prototypes] arch/powerpc/platforms/85xx/p1022_rdk.c:92:1: error: no previous prototype for 'p1022rdk_valid_monitor_port' [-Werror=missing-prototypes] arch/powerpc/platforms/85xx/p1022_rdk.c:99:13: error: no previous prototype for 'p1022_rdk_pic_init' [-Werror=missing-prototypes] arch/powerpc/platforms/85xx/socrates_fpga_pic.c:273:13: error: no previous prototype for 'socrates_fpga_pic_init' [-Werror=missing-prototypes] arch/powerpc/platforms/85xx/xes_mpc85xx.c:40:13: error: no previous prototype for 'xes_mpc85xx_pic_init' [-Werror=missing-prototypes] arch/powerpc/platforms/85xx/mvme2500.c:24:13: error: no previous prototype for 'mvme2500_pic_init' [-Werror=missing-prototypes] Signed-off-by: Arnd Bergmann <arnd@arndb.de> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://msgid.link/20240123125148.2004648-2-arnd@kernel.org --- arch/powerpc/platforms/85xx/mpc8536_ds.c | 2 +- arch/powerpc/platforms/85xx/mvme2500.c | 2 +- arch/powerpc/platforms/85xx/p1010rdb.c | 2 +- arch/powerpc/platforms/85xx/p1022_ds.c | 6 +++--- arch/powerpc/platforms/85xx/p1022_rdk.c | 6 +++--- arch/powerpc/platforms/85xx/socrates_fpga_pic.c | 2 ++ arch/powerpc/platforms/85xx/xes_mpc85xx.c | 2 +- 7 files changed, 12 insertions(+), 10 deletions(-) diff --git a/arch/powerpc/platforms/85xx/mpc8536_ds.c b/arch/powerpc/platforms/85xx/mpc8536_ds.c index e966b2ad8ecd..b3327a358eb4 100644 --- a/arch/powerpc/platforms/85xx/mpc8536_ds.c +++ b/arch/powerpc/platforms/85xx/mpc8536_ds.c @@ -27,7 +27,7 @@ #include "mpc85xx.h" -void __init mpc8536_ds_pic_init(void) +static void __init mpc8536_ds_pic_init(void) { struct mpic *mpic = mpic_alloc(NULL, 0, MPIC_BIG_ENDIAN, 0, 256, " OpenPIC "); diff --git a/arch/powerpc/platforms/85xx/mvme2500.c b/arch/powerpc/platforms/85xx/mvme2500.c index 1b59e45a0c64..19122daadb55 100644 --- a/arch/powerpc/platforms/85xx/mvme2500.c +++ b/arch/powerpc/platforms/85xx/mvme2500.c @@ -21,7 +21,7 @@ #include "mpc85xx.h" -void __init mvme2500_pic_init(void) +static void __init mvme2500_pic_init(void) { struct mpic *mpic = mpic_alloc(NULL, 0, MPIC_BIG_ENDIAN | MPIC_SINGLE_DEST_CPU, diff --git a/arch/powerpc/platforms/85xx/p1010rdb.c b/arch/powerpc/platforms/85xx/p1010rdb.c index 10d6f1fa3327..491895ac8bcf 100644 --- a/arch/powerpc/platforms/85xx/p1010rdb.c +++ b/arch/powerpc/platforms/85xx/p1010rdb.c @@ -24,7 +24,7 @@ #include "mpc85xx.h" -void __init p1010_rdb_pic_init(void) +static void __init p1010_rdb_pic_init(void) { struct mpic *mpic = mpic_alloc(NULL, 0, MPIC_BIG_ENDIAN | MPIC_SINGLE_DEST_CPU, diff --git a/arch/powerpc/platforms/85xx/p1022_ds.c b/arch/powerpc/platforms/85xx/p1022_ds.c index 0dd786a061a6..adc3a2ee1415 100644 --- a/arch/powerpc/platforms/85xx/p1022_ds.c +++ b/arch/powerpc/platforms/85xx/p1022_ds.c @@ -370,7 +370,7 @@ exit: * * @pixclock: the wavelength, in picoseconds, of the clock */ -void p1022ds_set_pixel_clock(unsigned int pixclock) +static void p1022ds_set_pixel_clock(unsigned int pixclock) { struct device_node *guts_np = NULL; struct ccsr_guts __iomem *guts; @@ -418,7 +418,7 @@ void p1022ds_set_pixel_clock(unsigned int pixclock) /** * p1022ds_valid_monitor_port: set the monitor port for sysfs */ -enum fsl_diu_monitor_port +static enum fsl_diu_monitor_port p1022ds_valid_monitor_port(enum fsl_diu_monitor_port port) { switch (port) { @@ -432,7 +432,7 @@ p1022ds_valid_monitor_port(enum fsl_diu_monitor_port port) #endif -void __init p1022_ds_pic_init(void) +static void __init p1022_ds_pic_init(void) { struct mpic *mpic = mpic_alloc(NULL, 0, MPIC_BIG_ENDIAN | MPIC_SINGLE_DEST_CPU, diff --git a/arch/powerpc/platforms/85xx/p1022_rdk.c b/arch/powerpc/platforms/85xx/p1022_rdk.c index 25ab6e9c1470..6198299d95b1 100644 --- a/arch/powerpc/platforms/85xx/p1022_rdk.c +++ b/arch/powerpc/platforms/85xx/p1022_rdk.c @@ -40,7 +40,7 @@ * * @pixclock: the wavelength, in picoseconds, of the clock */ -void p1022rdk_set_pixel_clock(unsigned int pixclock) +static void p1022rdk_set_pixel_clock(unsigned int pixclock) { struct device_node *guts_np = NULL; struct ccsr_guts __iomem *guts; @@ -88,7 +88,7 @@ void p1022rdk_set_pixel_clock(unsigned int pixclock) /** * p1022rdk_valid_monitor_port: set the monitor port for sysfs */ -enum fsl_diu_monitor_port +static enum fsl_diu_monitor_port p1022rdk_valid_monitor_port(enum fsl_diu_monitor_port port) { return FSL_DIU_PORT_DVI; @@ -96,7 +96,7 @@ p1022rdk_valid_monitor_port(enum fsl_diu_monitor_port port) #endif -void __init p1022_rdk_pic_init(void) +static void __init p1022_rdk_pic_init(void) { struct mpic *mpic = mpic_alloc(NULL, 0, MPIC_BIG_ENDIAN | MPIC_SINGLE_DEST_CPU, diff --git a/arch/powerpc/platforms/85xx/socrates_fpga_pic.c b/arch/powerpc/platforms/85xx/socrates_fpga_pic.c index baa12eff6d5d..60e0b8947ce6 100644 --- a/arch/powerpc/platforms/85xx/socrates_fpga_pic.c +++ b/arch/powerpc/platforms/85xx/socrates_fpga_pic.c @@ -8,6 +8,8 @@ #include <linux/of_irq.h> #include <linux/io.h> +#include "socrates_fpga_pic.h" + /* * The FPGA supports 9 interrupt sources, which can be routed to 3 * interrupt request lines of the MPIC. The line to be used can be diff --git a/arch/powerpc/platforms/85xx/xes_mpc85xx.c b/arch/powerpc/platforms/85xx/xes_mpc85xx.c index 45f257fc1ade..2582427d8d01 100644 --- a/arch/powerpc/platforms/85xx/xes_mpc85xx.c +++ b/arch/powerpc/platforms/85xx/xes_mpc85xx.c @@ -37,7 +37,7 @@ #define MPC85xx_L2CTL_L2I 0x40000000 /* L2 flash invalidate */ #define MPC85xx_L2CTL_L2SIZ_MASK 0x30000000 /* L2 SRAM size (R/O) */ -void __init xes_mpc85xx_pic_init(void) +static void __init xes_mpc85xx_pic_init(void) { struct mpic *mpic = mpic_alloc(NULL, 0, MPIC_BIG_ENDIAN, 0, 256, " OpenPIC "); From 720e78d7fa0f1df905a42ae68e7e3b0f95e53946 Mon Sep 17 00:00:00 2001 From: Dan Carpenter <dan.carpenter@linaro.org> Date: Wed, 31 Jan 2024 11:24:59 +0300 Subject: [PATCH 083/304] serial: 8250_pci1xxxx: partially revert off by one patch I was reviewing this code again and I realized I made a mistake here. It should have been > instead of >=. The subtract ensures that we don't go out of bounds. My patch meant that we don't read the last chunk of the buffer. Fixes: 86ee55e9bc7f ("serial: 8250_pci1xxxx: fix off by one in pci1xxxx_process_read_data()") Signed-off-by: Dan Carpenter <dan.carpenter@linaro.org> Link: https://lore.kernel.org/r/bd6fb361-bbb9-427d-90e8-a5df4de76221@moroto.mountain Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> --- drivers/tty/serial/8250/8250_pci1xxxx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/tty/serial/8250/8250_pci1xxxx.c b/drivers/tty/serial/8250/8250_pci1xxxx.c index cd258922bd78..2dda737b1660 100644 --- a/drivers/tty/serial/8250/8250_pci1xxxx.c +++ b/drivers/tty/serial/8250/8250_pci1xxxx.c @@ -302,7 +302,7 @@ static void pci1xxxx_process_read_data(struct uart_port *port, * to read, the data is received one byte at a time. */ while (valid_burst_count--) { - if (*buff_index >= (RX_BUF_SIZE - UART_BURST_SIZE)) + if (*buff_index > (RX_BUF_SIZE - UART_BURST_SIZE)) break; burst_buf = (u32 *)&rx_buff[*buff_index]; *burst_buf = readl(port->membase + UART_RX_BURST_FIFO); From 3ee07964d407411fd578a3bc998de44fd64d266a Mon Sep 17 00:00:00 2001 From: "Jiri Slaby (SUSE)" <jirislaby@kernel.org> Date: Thu, 1 Feb 2024 11:55:56 +0100 Subject: [PATCH 084/304] serial: core: introduce uart_port_tx_flags() And an enum with a flag: UART_TX_NOSTOP. To NOT call __port->ops->stop_tx() when the circular buffer is empty. mxs-uart needs this (see the next patch). Signed-off-by: "Jiri Slaby (SUSE)" <jirislaby@kernel.org> Cc: stable <stable@kernel.org> Tested-by: Emil Kronborg <emil.kronborg@protonmail.com> Link: https://lore.kernel.org/r/20240201105557.28043-1-jirislaby@kernel.org Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> --- include/linux/serial_core.h | 32 +++++++++++++++++++++++++++----- 1 file changed, 27 insertions(+), 5 deletions(-) diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index 536b2581d3e2..55b1f3ba48ac 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -748,8 +748,17 @@ struct uart_driver { void uart_write_wakeup(struct uart_port *port); -#define __uart_port_tx(uport, ch, tx_ready, put_char, tx_done, for_test, \ - for_post) \ +/** + * enum UART_TX_FLAGS -- flags for uart_port_tx_flags() + * + * @UART_TX_NOSTOP: don't call port->ops->stop_tx() on empty buffer + */ +enum UART_TX_FLAGS { + UART_TX_NOSTOP = BIT(0), +}; + +#define __uart_port_tx(uport, ch, flags, tx_ready, put_char, tx_done, \ + for_test, for_post) \ ({ \ struct uart_port *__port = (uport); \ struct circ_buf *xmit = &__port->state->xmit; \ @@ -777,7 +786,7 @@ void uart_write_wakeup(struct uart_port *port); if (pending < WAKEUP_CHARS) { \ uart_write_wakeup(__port); \ \ - if (pending == 0) \ + if (!((flags) & UART_TX_NOSTOP) && pending == 0) \ __port->ops->stop_tx(__port); \ } \ \ @@ -812,7 +821,7 @@ void uart_write_wakeup(struct uart_port *port); */ #define uart_port_tx_limited(port, ch, count, tx_ready, put_char, tx_done) ({ \ unsigned int __count = (count); \ - __uart_port_tx(port, ch, tx_ready, put_char, tx_done, __count, \ + __uart_port_tx(port, ch, 0, tx_ready, put_char, tx_done, __count, \ __count--); \ }) @@ -826,8 +835,21 @@ void uart_write_wakeup(struct uart_port *port); * See uart_port_tx_limited() for more details. */ #define uart_port_tx(port, ch, tx_ready, put_char) \ - __uart_port_tx(port, ch, tx_ready, put_char, ({}), true, ({})) + __uart_port_tx(port, ch, 0, tx_ready, put_char, ({}), true, ({})) + +/** + * uart_port_tx_flags -- transmit helper for uart_port with flags + * @port: uart port + * @ch: variable to store a character to be written to the HW + * @flags: %UART_TX_NOSTOP or similar + * @tx_ready: can HW accept more data function + * @put_char: function to write a character + * + * See uart_port_tx_limited() for more details. + */ +#define uart_port_tx_flags(port, ch, flags, tx_ready, put_char) \ + __uart_port_tx(port, ch, flags, tx_ready, put_char, ({}), true, ({})) /* * Baud rate helpers. */ From 7be50f2e8f20fc2299069b28dea59a28e3abe20a Mon Sep 17 00:00:00 2001 From: "Jiri Slaby (SUSE)" <jirislaby@kernel.org> Date: Thu, 1 Feb 2024 11:55:57 +0100 Subject: [PATCH 085/304] serial: mxs-auart: fix tx Emil reports: After updating Linux on an i.MX28 board, serial communication over AUART broke. When I TX from the board and measure on the TX pin, it seems like the HW fifo is not emptied before the transmission is stopped. MXS performs weird things with stop_tx(). The driver makes it conditional on uart_tx_stopped(). So the driver needs special handling. Pass the brand new UART_TX_NOSTOP to uart_port_tx_flags() and handle the stop on its own. Signed-off-by: "Jiri Slaby (SUSE)" <jirislaby@kernel.org> Reported-by: Emil Kronborg <emil.kronborg@protonmail.com> Cc: stable <stable@kernel.org> Fixes: 2d141e683e9a ("tty: serial: use uart_port_tx() helper") Closes: https://lore.kernel.org/all/miwgbnvy3hjpnricubg76ytpn7xoceehwahupy25bubbduu23s@om2lptpa26xw/ Tested-by: Stefan Wahren <wahrenst@gmx.net> Tested-by: Emil Kronborg <emil.kronborg@protonmail.com> Link: https://lore.kernel.org/r/20240201105557.28043-2-jirislaby@kernel.org Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> --- drivers/tty/serial/mxs-auart.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/tty/serial/mxs-auart.c b/drivers/tty/serial/mxs-auart.c index 3ec725555bcc..4749331fe618 100644 --- a/drivers/tty/serial/mxs-auart.c +++ b/drivers/tty/serial/mxs-auart.c @@ -605,13 +605,16 @@ static void mxs_auart_tx_chars(struct mxs_auart_port *s) return; } - pending = uart_port_tx(&s->port, ch, + pending = uart_port_tx_flags(&s->port, ch, UART_TX_NOSTOP, !(mxs_read(s, REG_STAT) & AUART_STAT_TXFF), mxs_write(ch, s, REG_DATA)); if (pending) mxs_set(AUART_INTR_TXIEN, s, REG_INTR); else mxs_clr(AUART_INTR_TXIEN, s, REG_INTR); + + if (uart_tx_stopped(&s->port)) + mxs_auart_stop_tx(&s->port); } static void mxs_auart_rx_char(struct mxs_auart_port *s) From ba58f873cdeec30b6da48e28dd5782c5a3e1371b Mon Sep 17 00:00:00 2001 From: Sean Christopherson <seanjc@google.com> Date: Fri, 2 Feb 2024 15:18:31 -0800 Subject: [PATCH 086/304] KVM: selftests: Fix a semaphore imbalance in the dirty ring logging test When finishing the final iteration of dirty_log_test testcase, set host_quit _before_ the final "continue" so that the vCPU worker doesn't run an extra iteration, and delete the hack-a-fix of an extra "continue" from the dirty ring testcase. This fixes a bug where the extra post to sem_vcpu_cont may not be consumed, which results in failures in subsequent runs of the testcases. The bug likely was missed during development as x86 supports only a single "guest mode", i.e. there aren't any subsequent testcases after the dirty ring test, because for_each_guest_mode() only runs a single iteration. For the regular dirty log testcases, letting the vCPU run one extra iteration is a non-issue as the vCPU worker waits on sem_vcpu_cont if and only if the worker is explicitly told to stop (vcpu_sync_stop_requested). But for the dirty ring test, which needs to periodically stop the vCPU to reap the dirty ring, letting the vCPU resume the guest _after_ the last iteration means the vCPU will get stuck without an extra "continue". However, blindly firing off an post to sem_vcpu_cont isn't guaranteed to be consumed, e.g. if the vCPU worker sees host_quit==true before resuming the guest. This results in a dangling sem_vcpu_cont, which leads to subsequent iterations getting out of sync, as the vCPU worker will continue on before the main task is ready for it to resume the guest, leading to a variety of asserts, e.g. ==== Test Assertion Failure ==== dirty_log_test.c:384: dirty_ring_vcpu_ring_full pid=14854 tid=14854 errno=22 - Invalid argument 1 0x00000000004033eb: dirty_ring_collect_dirty_pages at dirty_log_test.c:384 2 0x0000000000402d27: log_mode_collect_dirty_pages at dirty_log_test.c:505 3 (inlined by) run_test at dirty_log_test.c:802 4 0x0000000000403dc7: for_each_guest_mode at guest_modes.c:100 5 0x0000000000401dff: main at dirty_log_test.c:941 (discriminator 3) 6 0x0000ffff9be173c7: ?? ??:0 7 0x0000ffff9be1749f: ?? ??:0 8 0x000000000040206f: _start at ??:? Didn't continue vcpu even without ring full Alternatively, the test could simply reset the semaphores before each testcase, but papering over hacks with more hacks usually ends in tears. Reported-by: Shaoqin Huang <shahuang@redhat.com> Fixes: 84292e565951 ("KVM: selftests: Add dirty ring buffer test") Reviewed-by: Peter Xu <peterx@redhat.com> Reviewed-by: Shaoqin Huang <shahuang@redhat.com> Link: https://lore.kernel.org/r/20240202231831.354848-1-seanjc@google.com Signed-off-by: Sean Christopherson <seanjc@google.com> --- tools/testing/selftests/kvm/dirty_log_test.c | 50 +++++++++++--------- 1 file changed, 27 insertions(+), 23 deletions(-) diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c index babea97b31a4..eaad5b20854c 100644 --- a/tools/testing/selftests/kvm/dirty_log_test.c +++ b/tools/testing/selftests/kvm/dirty_log_test.c @@ -376,7 +376,10 @@ static void dirty_ring_collect_dirty_pages(struct kvm_vcpu *vcpu, int slot, cleared = kvm_vm_reset_dirty_ring(vcpu->vm); - /* Cleared pages should be the same as collected */ + /* + * Cleared pages should be the same as collected, as KVM is supposed to + * clear only the entries that have been harvested. + */ TEST_ASSERT(cleared == count, "Reset dirty pages (%u) mismatch " "with collected (%u)", cleared, count); @@ -415,12 +418,6 @@ static void dirty_ring_after_vcpu_run(struct kvm_vcpu *vcpu, int ret, int err) } } -static void dirty_ring_before_vcpu_join(void) -{ - /* Kick another round of vcpu just to make sure it will quit */ - sem_post(&sem_vcpu_cont); -} - struct log_mode { const char *name; /* Return true if this mode is supported, otherwise false */ @@ -433,7 +430,6 @@ struct log_mode { uint32_t *ring_buf_idx); /* Hook to call when after each vcpu run */ void (*after_vcpu_run)(struct kvm_vcpu *vcpu, int ret, int err); - void (*before_vcpu_join) (void); } log_modes[LOG_MODE_NUM] = { { .name = "dirty-log", @@ -452,7 +448,6 @@ struct log_mode { .supported = dirty_ring_supported, .create_vm_done = dirty_ring_create_vm_done, .collect_dirty_pages = dirty_ring_collect_dirty_pages, - .before_vcpu_join = dirty_ring_before_vcpu_join, .after_vcpu_run = dirty_ring_after_vcpu_run, }, }; @@ -513,14 +508,6 @@ static void log_mode_after_vcpu_run(struct kvm_vcpu *vcpu, int ret, int err) mode->after_vcpu_run(vcpu, ret, err); } -static void log_mode_before_vcpu_join(void) -{ - struct log_mode *mode = &log_modes[host_log_mode]; - - if (mode->before_vcpu_join) - mode->before_vcpu_join(); -} - static void generate_random_array(uint64_t *guest_array, uint64_t size) { uint64_t i; @@ -719,6 +706,7 @@ static void run_test(enum vm_guest_mode mode, void *arg) struct kvm_vm *vm; unsigned long *bmap; uint32_t ring_buf_idx = 0; + int sem_val; if (!log_mode_supported()) { print_skip("Log mode '%s' not supported", @@ -788,12 +776,22 @@ static void run_test(enum vm_guest_mode mode, void *arg) /* Start the iterations */ iteration = 1; sync_global_to_guest(vm, iteration); - host_quit = false; + WRITE_ONCE(host_quit, false); host_dirty_count = 0; host_clear_count = 0; host_track_next_count = 0; WRITE_ONCE(dirty_ring_vcpu_ring_full, false); + /* + * Ensure the previous iteration didn't leave a dangling semaphore, i.e. + * that the main task and vCPU worker were synchronized and completed + * verification of all iterations. + */ + sem_getvalue(&sem_vcpu_stop, &sem_val); + TEST_ASSERT_EQ(sem_val, 0); + sem_getvalue(&sem_vcpu_cont, &sem_val); + TEST_ASSERT_EQ(sem_val, 0); + pthread_create(&vcpu_thread, NULL, vcpu_worker, vcpu); while (iteration < p->iterations) { @@ -819,15 +817,21 @@ static void run_test(enum vm_guest_mode mode, void *arg) assert(host_log_mode == LOG_MODE_DIRTY_RING || atomic_read(&vcpu_sync_stop_requested) == false); vm_dirty_log_verify(mode, bmap); - sem_post(&sem_vcpu_cont); - iteration++; + /* + * Set host_quit before sem_vcpu_cont in the final iteration to + * ensure that the vCPU worker doesn't resume the guest. As + * above, the dirty ring test may stop and wait even when not + * explicitly request to do so, i.e. would hang waiting for a + * "continue" if it's allowed to resume the guest. + */ + if (++iteration == p->iterations) + WRITE_ONCE(host_quit, true); + + sem_post(&sem_vcpu_cont); sync_global_to_guest(vm, iteration); } - /* Tell the vcpu thread to quit */ - host_quit = true; - log_mode_before_vcpu_join(); pthread_join(vcpu_thread, NULL); pr_info("Total bits checked: dirty (%"PRIu64"), clear (%"PRIu64"), " From 6fd78beed0213655c16ef728af0caec6d5ae4c73 Mon Sep 17 00:00:00 2001 From: Sean Christopherson <seanjc@google.com> Date: Wed, 31 Jan 2024 14:27:28 -0800 Subject: [PATCH 087/304] KVM: selftests: Don't assert on exact number of 4KiB in dirty log split test Drop dirty_log_page_splitting_test's assertion that the number of 4KiB pages remains the same across dirty logging being enabled and disabled, as the test doesn't guarantee that mappings outside of the memslots being dirty logged are stable, e.g. KVM's mappings for code and pages in memslot0 can be zapped by things like NUMA balancing. To preserve the spirit of the check, assert that (a) the number of 4KiB pages after splitting is _at least_ the number of 4KiB pages across all memslots under test, and (b) the number of hugepages before splitting adds up to the number of pages across all memslots under test. (b) is a little tenuous as it relies on memslot0 being incompatible with transparent hugepages, but that holds true for now as selftests explicitly madvise() MADV_NOHUGEPAGE for memslot0 (__vm_create() unconditionally specifies the backing type as VM_MEM_SRC_ANONYMOUS). Reported-by: Yi Lai <yi1.lai@intel.com> Reported-by: Tao Su <tao1.su@linux.intel.com> Reviewed-by: Tao Su <tao1.su@linux.intel.com> Link: https://lore.kernel.org/r/20240131222728.4100079-1-seanjc@google.com Signed-off-by: Sean Christopherson <seanjc@google.com> --- .../x86_64/dirty_log_page_splitting_test.c | 21 +++++++++++-------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/tools/testing/selftests/kvm/x86_64/dirty_log_page_splitting_test.c b/tools/testing/selftests/kvm/x86_64/dirty_log_page_splitting_test.c index 634c6bfcd572..ee3b384b991c 100644 --- a/tools/testing/selftests/kvm/x86_64/dirty_log_page_splitting_test.c +++ b/tools/testing/selftests/kvm/x86_64/dirty_log_page_splitting_test.c @@ -92,7 +92,6 @@ static void run_test(enum vm_guest_mode mode, void *unused) uint64_t host_num_pages; uint64_t pages_per_slot; int i; - uint64_t total_4k_pages; struct kvm_page_stats stats_populated; struct kvm_page_stats stats_dirty_logging_enabled; struct kvm_page_stats stats_dirty_pass[ITERATIONS]; @@ -107,6 +106,9 @@ static void run_test(enum vm_guest_mode mode, void *unused) guest_num_pages = vm_adjust_num_guest_pages(mode, guest_num_pages); host_num_pages = vm_num_host_pages(mode, guest_num_pages); pages_per_slot = host_num_pages / SLOTS; + TEST_ASSERT_EQ(host_num_pages, pages_per_slot * SLOTS); + TEST_ASSERT(!(host_num_pages % 512), + "Number of pages, '%lu' not a multiple of 2MiB", host_num_pages); bitmaps = memstress_alloc_bitmaps(SLOTS, pages_per_slot); @@ -165,10 +167,8 @@ static void run_test(enum vm_guest_mode mode, void *unused) memstress_free_bitmaps(bitmaps, SLOTS); memstress_destroy_vm(vm); - /* Make assertions about the page counts. */ - total_4k_pages = stats_populated.pages_4k; - total_4k_pages += stats_populated.pages_2m * 512; - total_4k_pages += stats_populated.pages_1g * 512 * 512; + TEST_ASSERT_EQ((stats_populated.pages_2m * 512 + + stats_populated.pages_1g * 512 * 512), host_num_pages); /* * Check that all huge pages were split. Since large pages can only @@ -180,19 +180,22 @@ static void run_test(enum vm_guest_mode mode, void *unused) */ if (dirty_log_manual_caps) { TEST_ASSERT_EQ(stats_clear_pass[0].hugepages, 0); - TEST_ASSERT_EQ(stats_clear_pass[0].pages_4k, total_4k_pages); + TEST_ASSERT(stats_clear_pass[0].pages_4k >= host_num_pages, + "Expected at least '%lu' 4KiB pages, found only '%lu'", + host_num_pages, stats_clear_pass[0].pages_4k); TEST_ASSERT_EQ(stats_dirty_logging_enabled.hugepages, stats_populated.hugepages); } else { TEST_ASSERT_EQ(stats_dirty_logging_enabled.hugepages, 0); - TEST_ASSERT_EQ(stats_dirty_logging_enabled.pages_4k, total_4k_pages); + TEST_ASSERT(stats_dirty_logging_enabled.pages_4k >= host_num_pages, + "Expected at least '%lu' 4KiB pages, found only '%lu'", + host_num_pages, stats_dirty_logging_enabled.pages_4k); } /* * Once dirty logging is disabled and the vCPUs have touched all their - * memory again, the page counts should be the same as they were + * memory again, the hugepage counts should be the same as they were * right after initial population of memory. */ - TEST_ASSERT_EQ(stats_populated.pages_4k, stats_repopulated.pages_4k); TEST_ASSERT_EQ(stats_populated.pages_2m, stats_repopulated.pages_2m); TEST_ASSERT_EQ(stats_populated.pages_1g, stats_repopulated.pages_1g); } From d1722057477a3786b8c0d60c28fc281f6ecf1cc3 Mon Sep 17 00:00:00 2001 From: Charles Keepax <ckeepax@opensource.cirrus.com> Date: Tue, 6 Feb 2024 11:38:49 +0000 Subject: [PATCH 088/304] ASoC: cs42l43: Handle error from devm_pm_runtime_enable As devm_pm_runtime_enable can fail due to memory allocations, it is best to handle the error. Signed-off-by: Charles Keepax <ckeepax@opensource.cirrus.com> Link: https://lore.kernel.org/r/20240206113850.719888-1-ckeepax@opensource.cirrus.com Signed-off-by: Mark Brown <broonie@kernel.org> --- sound/soc/codecs/cs42l43.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/sound/soc/codecs/cs42l43.c b/sound/soc/codecs/cs42l43.c index 6a64681767de..4d2a5584aa57 100644 --- a/sound/soc/codecs/cs42l43.c +++ b/sound/soc/codecs/cs42l43.c @@ -2257,7 +2257,10 @@ static int cs42l43_codec_probe(struct platform_device *pdev) pm_runtime_use_autosuspend(priv->dev); pm_runtime_set_active(priv->dev); pm_runtime_get_noresume(priv->dev); - devm_pm_runtime_enable(priv->dev); + + ret = devm_pm_runtime_enable(priv->dev); + if (ret) + goto err_pm; for (i = 0; i < ARRAY_SIZE(cs42l43_irqs); i++) { ret = cs42l43_request_irq(priv, dom, cs42l43_irqs[i].name, From 64353af49fecbdec1de9aadf2369d54fc00f1899 Mon Sep 17 00:00:00 2001 From: Charles Keepax <ckeepax@opensource.cirrus.com> Date: Tue, 6 Feb 2024 11:38:50 +0000 Subject: [PATCH 089/304] ASoC: cs42l43: Add system suspend ops to disable IRQ The IRQ should be disabled whilst entering and exiting system suspend to avoid the IRQ handler being called whilst the PM runtime is disabled. Signed-off-by: Charles Keepax <ckeepax@opensource.cirrus.com> Link: https://lore.kernel.org/r/20240206113850.719888-2-ckeepax@opensource.cirrus.com Signed-off-by: Mark Brown <broonie@kernel.org> --- sound/soc/codecs/cs42l43.c | 43 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 41 insertions(+), 2 deletions(-) diff --git a/sound/soc/codecs/cs42l43.c b/sound/soc/codecs/cs42l43.c index 4d2a5584aa57..a97ccb512deb 100644 --- a/sound/soc/codecs/cs42l43.c +++ b/sound/soc/codecs/cs42l43.c @@ -2336,8 +2336,47 @@ static int cs42l43_codec_runtime_resume(struct device *dev) return 0; } -static DEFINE_RUNTIME_DEV_PM_OPS(cs42l43_codec_pm_ops, NULL, - cs42l43_codec_runtime_resume, NULL); +static int cs42l43_codec_suspend(struct device *dev) +{ + struct cs42l43 *cs42l43 = dev_get_drvdata(dev); + + disable_irq(cs42l43->irq); + + return 0; +} + +static int cs42l43_codec_suspend_noirq(struct device *dev) +{ + struct cs42l43 *cs42l43 = dev_get_drvdata(dev); + + enable_irq(cs42l43->irq); + + return 0; +} + +static int cs42l43_codec_resume(struct device *dev) +{ + struct cs42l43 *cs42l43 = dev_get_drvdata(dev); + + enable_irq(cs42l43->irq); + + return 0; +} + +static int cs42l43_codec_resume_noirq(struct device *dev) +{ + struct cs42l43 *cs42l43 = dev_get_drvdata(dev); + + disable_irq(cs42l43->irq); + + return 0; +} + +static const struct dev_pm_ops cs42l43_codec_pm_ops = { + SYSTEM_SLEEP_PM_OPS(cs42l43_codec_suspend, cs42l43_codec_resume) + NOIRQ_SYSTEM_SLEEP_PM_OPS(cs42l43_codec_suspend_noirq, cs42l43_codec_resume_noirq) + RUNTIME_PM_OPS(NULL, cs42l43_codec_runtime_resume, NULL) +}; static const struct platform_device_id cs42l43_codec_id_table[] = { { "cs42l43-codec", }, From 44d3b8a19b91cd2af11f918b2fd05628383172de Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Amadeusz=20S=C5=82awi=C5=84ski?= <amadeuszx.slawinski@linux.intel.com> Date: Wed, 7 Feb 2024 12:26:24 +0100 Subject: [PATCH 090/304] ASoC: Intel: avs: Fix dynamic port assignment when TDM is set MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In case TDM is set in topology on SSP0, parser will overwrite vindex value, because it only checks if port is set. Fix this by checking whole field value. Fixes: e6d50e474e45 ("ASoC: Intel: avs: Improve topology parsing of dynamic strings") Reviewed-by: Cezary Rojewski <cezary.rojewski@intel.com> Signed-off-by: Amadeusz Sławiński <amadeuszx.slawinski@linux.intel.com> Link: https://lore.kernel.org/r/20240207112624.2132821-1-amadeuszx.slawinski@linux.intel.com Signed-off-by: Mark Brown <broonie@kernel.org> --- sound/soc/intel/avs/topology.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/intel/avs/topology.c b/sound/soc/intel/avs/topology.c index 778236d3fd28..48b3c67c9103 100644 --- a/sound/soc/intel/avs/topology.c +++ b/sound/soc/intel/avs/topology.c @@ -857,7 +857,7 @@ assign_copier_gtw_instance(struct snd_soc_component *comp, struct avs_tplg_modcf } /* If topology sets value don't overwrite it */ - if (cfg->copier.vindex.i2s.instance) + if (cfg->copier.vindex.val) return; mach = dev_get_platdata(comp->card->dev); From c14f09f010cc569ae7e2f6ef02374f6bfef9917e Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald <rf@opensource.cirrus.com> Date: Thu, 8 Feb 2024 12:37:42 +0000 Subject: [PATCH 091/304] ASoC: cs35l56: Fix deadlock in ASP1 mixer register initialization Rewrite the handling of ASP1 TX mixer mux initialization to prevent a deadlock during component_remove(). The firmware can overwrite the ASP1 TX mixer registers with system-specific settings. This is mainly for hardware that uses the ASP as a chip-to-chip link controlled by the firmware. Because of this the driver cannot know the starting state of the ASP1 mixer muxes until the firmware has been downloaded and rebooted. The original workaround for this was to queue a work function from the dsp_work() job. This work then read the register values (populating the regmap cache the first time around) and then called snd_soc_dapm_mux_update_power(). The problem with this is that it was ultimately triggered by cs35l56_component_probe() queueing dsp_work, which meant that it would be running in parallel with the rest of the ASoC component and card initialization. To prevent accessing DAPM before it was fully initialized the work function took the card mutex. But this would deadlock if cs35l56_component_remove() was called before the work job had completed, because ASoC calls component_remove() with the card mutex held. This new version removes the work function. Instead the regmap cache and DAPM mux widgets are initialized the first time any of the associated ALSA controls is read or written. Signed-off-by: Richard Fitzgerald <rf@opensource.cirrus.com> Fixes: 07f7d6e7a124 ("ASoC: cs35l56: Fix for initializing ASP1 mixer registers") Link: https://lore.kernel.org/r/20240208123742.1278104-1-rf@opensource.cirrus.com Signed-off-by: Mark Brown <broonie@kernel.org> --- sound/soc/codecs/cs35l56.c | 157 +++++++++++++++++-------------------- sound/soc/codecs/cs35l56.h | 2 +- 2 files changed, 75 insertions(+), 84 deletions(-) diff --git a/sound/soc/codecs/cs35l56.c b/sound/soc/codecs/cs35l56.c index ebed5ab1245b..98d3957c66e7 100644 --- a/sound/soc/codecs/cs35l56.c +++ b/sound/soc/codecs/cs35l56.c @@ -68,6 +68,66 @@ static const char * const cs35l56_asp1_mux_control_names[] = { "ASP1 TX1 Source", "ASP1 TX2 Source", "ASP1 TX3 Source", "ASP1 TX4 Source" }; +static int cs35l56_sync_asp1_mixer_widgets_with_firmware(struct cs35l56_private *cs35l56) +{ + struct snd_soc_dapm_context *dapm = snd_soc_component_get_dapm(cs35l56->component); + const char *prefix = cs35l56->component->name_prefix; + char full_name[SNDRV_CTL_ELEM_ID_NAME_MAXLEN]; + const char *name; + struct snd_kcontrol *kcontrol; + struct soc_enum *e; + unsigned int val[4]; + int i, item, ret; + + if (cs35l56->asp1_mixer_widgets_initialized) + return 0; + + /* + * Resume so we can read the registers from silicon if the regmap + * cache has not yet been populated. + */ + ret = pm_runtime_resume_and_get(cs35l56->base.dev); + if (ret < 0) + return ret; + + /* Wait for firmware download and reboot */ + cs35l56_wait_dsp_ready(cs35l56); + + ret = regmap_bulk_read(cs35l56->base.regmap, CS35L56_ASP1TX1_INPUT, + val, ARRAY_SIZE(val)); + + pm_runtime_mark_last_busy(cs35l56->base.dev); + pm_runtime_put_autosuspend(cs35l56->base.dev); + + if (ret) { + dev_err(cs35l56->base.dev, "Failed to read ASP1 mixer regs: %d\n", ret); + return ret; + } + + for (i = 0; i < ARRAY_SIZE(cs35l56_asp1_mux_control_names); ++i) { + name = cs35l56_asp1_mux_control_names[i]; + + if (prefix) { + snprintf(full_name, sizeof(full_name), "%s %s", prefix, name); + name = full_name; + } + + kcontrol = snd_soc_card_get_kcontrol(dapm->card, name); + if (!kcontrol) { + dev_warn(cs35l56->base.dev, "Could not find control %s\n", name); + continue; + } + + e = (struct soc_enum *)kcontrol->private_value; + item = snd_soc_enum_val_to_item(e, val[i] & CS35L56_ASP_TXn_SRC_MASK); + snd_soc_dapm_mux_update_power(dapm, kcontrol, item, e, NULL); + } + + cs35l56->asp1_mixer_widgets_initialized = true; + + return 0; +} + static int cs35l56_dspwait_asp1tx_get(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol) { @@ -78,9 +138,9 @@ static int cs35l56_dspwait_asp1tx_get(struct snd_kcontrol *kcontrol, unsigned int addr, val; int ret; - /* Wait for mux to be initialized */ - cs35l56_wait_dsp_ready(cs35l56); - flush_work(&cs35l56->mux_init_work); + ret = cs35l56_sync_asp1_mixer_widgets_with_firmware(cs35l56); + if (ret) + return ret; addr = cs35l56_asp1_mixer_regs[index]; ret = regmap_read(cs35l56->base.regmap, addr, &val); @@ -106,9 +166,9 @@ static int cs35l56_dspwait_asp1tx_put(struct snd_kcontrol *kcontrol, bool changed; int ret; - /* Wait for mux to be initialized */ - cs35l56_wait_dsp_ready(cs35l56); - flush_work(&cs35l56->mux_init_work); + ret = cs35l56_sync_asp1_mixer_widgets_with_firmware(cs35l56); + if (ret) + return ret; addr = cs35l56_asp1_mixer_regs[index]; val = snd_soc_enum_item_to_val(e, item); @@ -124,70 +184,6 @@ static int cs35l56_dspwait_asp1tx_put(struct snd_kcontrol *kcontrol, return changed; } -static void cs35l56_mark_asp1_mixer_widgets_dirty(struct cs35l56_private *cs35l56) -{ - struct snd_soc_dapm_context *dapm = snd_soc_component_get_dapm(cs35l56->component); - const char *prefix = cs35l56->component->name_prefix; - char full_name[SNDRV_CTL_ELEM_ID_NAME_MAXLEN]; - const char *name; - struct snd_kcontrol *kcontrol; - struct soc_enum *e; - unsigned int val[4]; - int i, item, ret; - - /* - * Resume so we can read the registers from silicon if the regmap - * cache has not yet been populated. - */ - ret = pm_runtime_resume_and_get(cs35l56->base.dev); - if (ret < 0) - return; - - ret = regmap_bulk_read(cs35l56->base.regmap, CS35L56_ASP1TX1_INPUT, - val, ARRAY_SIZE(val)); - - pm_runtime_mark_last_busy(cs35l56->base.dev); - pm_runtime_put_autosuspend(cs35l56->base.dev); - - if (ret) { - dev_err(cs35l56->base.dev, "Failed to read ASP1 mixer regs: %d\n", ret); - return; - } - - snd_soc_card_mutex_lock(dapm->card); - WARN_ON(!dapm->card->instantiated); - - for (i = 0; i < ARRAY_SIZE(cs35l56_asp1_mux_control_names); ++i) { - name = cs35l56_asp1_mux_control_names[i]; - - if (prefix) { - snprintf(full_name, sizeof(full_name), "%s %s", prefix, name); - name = full_name; - } - - kcontrol = snd_soc_card_get_kcontrol(dapm->card, name); - if (!kcontrol) { - dev_warn(cs35l56->base.dev, "Could not find control %s\n", name); - continue; - } - - e = (struct soc_enum *)kcontrol->private_value; - item = snd_soc_enum_val_to_item(e, val[i] & CS35L56_ASP_TXn_SRC_MASK); - snd_soc_dapm_mux_update_power(dapm, kcontrol, item, e, NULL); - } - - snd_soc_card_mutex_unlock(dapm->card); -} - -static void cs35l56_mux_init_work(struct work_struct *work) -{ - struct cs35l56_private *cs35l56 = container_of(work, - struct cs35l56_private, - mux_init_work); - - cs35l56_mark_asp1_mixer_widgets_dirty(cs35l56); -} - static DECLARE_TLV_DB_SCALE(vol_tlv, -10000, 25, 0); static const struct snd_kcontrol_new cs35l56_controls[] = { @@ -936,14 +932,6 @@ static void cs35l56_dsp_work(struct work_struct *work) else cs35l56_patch(cs35l56, firmware_missing); - - /* - * Set starting value of ASP1 mux widgets. Updating a mux takes - * the DAPM mutex. Post this to a separate job so that DAPM - * power-up can wait for dsp_work to complete without deadlocking - * on the DAPM mutex. - */ - queue_work(cs35l56->dsp_wq, &cs35l56->mux_init_work); err: pm_runtime_mark_last_busy(cs35l56->base.dev); pm_runtime_put_autosuspend(cs35l56->base.dev); @@ -989,6 +977,13 @@ static int cs35l56_component_probe(struct snd_soc_component *component) debugfs_create_bool("can_hibernate", 0444, debugfs_root, &cs35l56->base.can_hibernate); debugfs_create_bool("fw_patched", 0444, debugfs_root, &cs35l56->base.fw_patched); + /* + * The widgets for the ASP1TX mixer can't be initialized + * until the firmware has been downloaded and rebooted. + */ + regcache_drop_region(cs35l56->base.regmap, CS35L56_ASP1TX1_INPUT, CS35L56_ASP1TX4_INPUT); + cs35l56->asp1_mixer_widgets_initialized = false; + queue_work(cs35l56->dsp_wq, &cs35l56->dsp_work); return 0; @@ -999,7 +994,6 @@ static void cs35l56_component_remove(struct snd_soc_component *component) struct cs35l56_private *cs35l56 = snd_soc_component_get_drvdata(component); cancel_work_sync(&cs35l56->dsp_work); - cancel_work_sync(&cs35l56->mux_init_work); if (cs35l56->dsp.cs_dsp.booted) wm_adsp_power_down(&cs35l56->dsp); @@ -1070,10 +1064,8 @@ int cs35l56_system_suspend(struct device *dev) dev_dbg(dev, "system_suspend\n"); - if (cs35l56->component) { + if (cs35l56->component) flush_work(&cs35l56->dsp_work); - cancel_work_sync(&cs35l56->mux_init_work); - } /* * The interrupt line is normally shared, but after we start suspending @@ -1224,7 +1216,6 @@ static int cs35l56_dsp_init(struct cs35l56_private *cs35l56) return -ENOMEM; INIT_WORK(&cs35l56->dsp_work, cs35l56_dsp_work); - INIT_WORK(&cs35l56->mux_init_work, cs35l56_mux_init_work); dsp = &cs35l56->dsp; cs35l56_init_cs_dsp(&cs35l56->base, &dsp->cs_dsp); diff --git a/sound/soc/codecs/cs35l56.h b/sound/soc/codecs/cs35l56.h index 596b141e3f96..b000e7365e40 100644 --- a/sound/soc/codecs/cs35l56.h +++ b/sound/soc/codecs/cs35l56.h @@ -34,7 +34,6 @@ struct cs35l56_private { struct wm_adsp dsp; /* must be first member */ struct cs35l56_base base; struct work_struct dsp_work; - struct work_struct mux_init_work; struct workqueue_struct *dsp_wq; struct snd_soc_component *component; struct regulator_bulk_data supplies[CS35L56_NUM_BULK_SUPPLIES]; @@ -52,6 +51,7 @@ struct cs35l56_private { u8 asp_slot_count; bool tdm_mode; bool sysclk_set; + bool asp1_mixer_widgets_initialized; u8 old_sdw_clock_scale; }; From d7332c4a4f1a7d16f054c6357fb65c597b6a86a7 Mon Sep 17 00:00:00 2001 From: Ranjani Sridharan <ranjani.sridharan@linux.intel.com> Date: Thu, 8 Feb 2024 15:34:32 +0200 Subject: [PATCH 092/304] ASoC: SOF: ipc3-topology: Fix pipeline tear down logic MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With the change in the widget free logic to power down the cores only when the scheduler widgets are freed, we need to ensure that the scheduler widget is freed only after all the widgets associated with the scheduler are freed. This is to ensure that the secondary core that the scheduler is scheduled to run on is kept powered on until all widgets that need them are in use. While this works well for dynamic pipelines, in the case of static pipelines the current logic does not take this into account and frees all widgets in the order they occur in the widget_list. So, modify this to ensure that the scheduler widgets are freed only after all other types of widgets in the widget_list are freed. Link: https://github.com/thesofproject/linux/issues/4807 Fixes: 31ed8da1c8e5 ("ASoC: SOF: sof-audio: Modify logic for enabling/disabling topology cores") Signed-off-by: Ranjani Sridharan <ranjani.sridharan@linux.intel.com> Reviewed-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com> Reviewed-by: Péter Ujfalusi <peter.ujfalusi@linux.intel.com> Signed-off-by: Peter Ujfalusi <peter.ujfalusi@linux.intel.com> Link: https://lore.kernel.org/r/20240208133432.1688-1-peter.ujfalusi@linux.intel.com Signed-off-by: Mark Brown <broonie@kernel.org> --- sound/soc/sof/ipc3-topology.c | 69 ++++++++++++++++++++++++----------- 1 file changed, 48 insertions(+), 21 deletions(-) diff --git a/sound/soc/sof/ipc3-topology.c b/sound/soc/sof/ipc3-topology.c index a8832a1c1a24..d47698f4be2d 100644 --- a/sound/soc/sof/ipc3-topology.c +++ b/sound/soc/sof/ipc3-topology.c @@ -2360,6 +2360,44 @@ static int sof_tear_down_left_over_pipelines(struct snd_sof_dev *sdev) return 0; } +static int sof_ipc3_free_widgets_in_list(struct snd_sof_dev *sdev, bool include_scheduler, + bool *dyn_widgets, bool verify) +{ + struct sof_ipc_fw_version *v = &sdev->fw_ready.version; + struct snd_sof_widget *swidget; + int ret; + + list_for_each_entry(swidget, &sdev->widget_list, list) { + if (swidget->dynamic_pipeline_widget) { + *dyn_widgets = true; + continue; + } + + /* Do not free widgets for static pipelines with FW older than SOF2.2 */ + if (!verify && !swidget->dynamic_pipeline_widget && + SOF_FW_VER(v->major, v->minor, v->micro) < SOF_FW_VER(2, 2, 0)) { + mutex_lock(&swidget->setup_mutex); + swidget->use_count = 0; + mutex_unlock(&swidget->setup_mutex); + if (swidget->spipe) + swidget->spipe->complete = 0; + continue; + } + + if (include_scheduler && swidget->id != snd_soc_dapm_scheduler) + continue; + + if (!include_scheduler && swidget->id == snd_soc_dapm_scheduler) + continue; + + ret = sof_widget_free(sdev, swidget); + if (ret < 0) + return ret; + } + + return 0; +} + /* * For older firmware, this function doesn't free widgets for static pipelines during suspend. * It only resets use_count for all widgets. @@ -2376,29 +2414,18 @@ static int sof_ipc3_tear_down_all_pipelines(struct snd_sof_dev *sdev, bool verif * This function is called during suspend and for one-time topology verification during * first boot. In both cases, there is no need to protect swidget->use_count and * sroute->setup because during suspend all running streams are suspended and during - * topology loading the sound card unavailable to open PCMs. + * topology loading the sound card unavailable to open PCMs. Do not free the scheduler + * widgets yet so that the secondary cores do not get powered down before all the widgets + * associated with the scheduler are freed. */ - list_for_each_entry(swidget, &sdev->widget_list, list) { - if (swidget->dynamic_pipeline_widget) { - dyn_widgets = true; - continue; - } + ret = sof_ipc3_free_widgets_in_list(sdev, false, &dyn_widgets, verify); + if (ret < 0) + return ret; - /* Do not free widgets for static pipelines with FW older than SOF2.2 */ - if (!verify && !swidget->dynamic_pipeline_widget && - SOF_FW_VER(v->major, v->minor, v->micro) < SOF_FW_VER(2, 2, 0)) { - mutex_lock(&swidget->setup_mutex); - swidget->use_count = 0; - mutex_unlock(&swidget->setup_mutex); - if (swidget->spipe) - swidget->spipe->complete = 0; - continue; - } - - ret = sof_widget_free(sdev, swidget); - if (ret < 0) - return ret; - } + /* free all the scheduler widgets now */ + ret = sof_ipc3_free_widgets_in_list(sdev, true, &dyn_widgets, verify); + if (ret < 0) + return ret; /* * Tear down all pipelines associated with PCMs that did not get suspended From 3ca8fbabcceb8bfe44f7f50640092fd8f1de375c Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Date: Thu, 8 Feb 2024 16:02:50 +0000 Subject: [PATCH 093/304] Revert "kobject: Remove redundant checks for whether ktype is NULL" This reverts commit 1b28cb81dab7c1eedc6034206f4e8d644046ad31. It is reported to cause problems, so revert it for now until the root cause can be found. Reported-by: kernel test robot <oliver.sang@intel.com> Fixes: 1b28cb81dab7 ("kobject: Remove redundant checks for whether ktype is NULL") Cc: Zhen Lei <thunder.leizhen@huawei.com> Closes: https://lore.kernel.org/oe-lkp/202402071403.e302e33a-oliver.sang@intel.com Link: https://lore.kernel.org/r/2024020849-consensus-length-6264@gregkh Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> --- lib/kobject.c | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/lib/kobject.c b/lib/kobject.c index 59dbcbdb1c91..72fa20f405f1 100644 --- a/lib/kobject.c +++ b/lib/kobject.c @@ -74,10 +74,12 @@ static int create_dir(struct kobject *kobj) if (error) return error; - error = sysfs_create_groups(kobj, ktype->default_groups); - if (error) { - sysfs_remove_dir(kobj); - return error; + if (ktype) { + error = sysfs_create_groups(kobj, ktype->default_groups); + if (error) { + sysfs_remove_dir(kobj); + return error; + } } /* @@ -589,7 +591,8 @@ static void __kobject_del(struct kobject *kobj) sd = kobj->sd; ktype = get_ktype(kobj); - sysfs_remove_groups(kobj, ktype->default_groups); + if (ktype) + sysfs_remove_groups(kobj, ktype->default_groups); /* send "remove" if the caller did not do it but sent "add" */ if (kobj->state_add_uevent_sent && !kobj->state_remove_uevent_sent) { @@ -666,6 +669,10 @@ static void kobject_cleanup(struct kobject *kobj) pr_debug("'%s' (%p): %s, parent %p\n", kobject_name(kobj), kobj, __func__, kobj->parent); + if (t && !t->release) + pr_debug("'%s' (%p): does not have a release() function, it is broken and must be fixed. See Documentation/core-api/kobject.rst.\n", + kobject_name(kobj), kobj); + /* remove from sysfs if the caller did not do it */ if (kobj->state_in_sysfs) { pr_debug("'%s' (%p): auto cleanup kobject_del\n", @@ -676,13 +683,10 @@ static void kobject_cleanup(struct kobject *kobj) parent = NULL; } - if (t->release) { + if (t && t->release) { pr_debug("'%s' (%p): calling ktype release\n", kobject_name(kobj), kobj); t->release(kobj); - } else { - pr_debug("'%s' (%p): does not have a release() function, it is broken and must be fixed. See Documentation/core-api/kobject.rst.\n", - kobject_name(kobj), kobj); } /* free name if we allocated it */ @@ -1056,7 +1060,7 @@ const struct kobj_ns_type_operations *kobj_child_ns_ops(const struct kobject *pa { const struct kobj_ns_type_operations *ops = NULL; - if (parent && parent->ktype->child_ns_type) + if (parent && parent->ktype && parent->ktype->child_ns_type) ops = parent->ktype->child_ns_type(parent); return ops; From c0ec2a712daf133d9996a8a1b7ee2d4996080363 Mon Sep 17 00:00:00 2001 From: zhenwei pi <pizhenwei@bytedance.com> Date: Tue, 30 Jan 2024 19:27:40 +0800 Subject: [PATCH 094/304] crypto: virtio/akcipher - Fix stack overflow on memcpy sizeof(struct virtio_crypto_akcipher_session_para) is less than sizeof(struct virtio_crypto_op_ctrl_req::u), copying more bytes from stack variable leads stack overflow. Clang reports this issue by commands: make -j CC=clang-14 mrproper >/dev/null 2>&1 make -j O=/tmp/crypto-build CC=clang-14 allmodconfig >/dev/null 2>&1 make -j O=/tmp/crypto-build W=1 CC=clang-14 drivers/crypto/virtio/ virtio_crypto_akcipher_algs.o Fixes: 59ca6c93387d ("virtio-crypto: implement RSA algorithm") Link: https://lore.kernel.org/all/0a194a79-e3a3-45e7-be98-83abd3e1cb7e@roeck-us.net/ Cc: <stable@vger.kernel.org> Signed-off-by: zhenwei pi <pizhenwei@bytedance.com> Tested-by: Nathan Chancellor <nathan@kernel.org> # build Acked-by: Michael S. Tsirkin <mst@redhat.com> Acked-by: Jason Wang <jasowang@redhat.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> --- drivers/crypto/virtio/virtio_crypto_akcipher_algs.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/virtio/virtio_crypto_akcipher_algs.c b/drivers/crypto/virtio/virtio_crypto_akcipher_algs.c index 2621ff8a9376..de53eddf6796 100644 --- a/drivers/crypto/virtio/virtio_crypto_akcipher_algs.c +++ b/drivers/crypto/virtio/virtio_crypto_akcipher_algs.c @@ -104,7 +104,8 @@ static void virtio_crypto_dataq_akcipher_callback(struct virtio_crypto_request * } static int virtio_crypto_alg_akcipher_init_session(struct virtio_crypto_akcipher_ctx *ctx, - struct virtio_crypto_ctrl_header *header, void *para, + struct virtio_crypto_ctrl_header *header, + struct virtio_crypto_akcipher_session_para *para, const uint8_t *key, unsigned int keylen) { struct scatterlist outhdr_sg, key_sg, inhdr_sg, *sgs[3]; @@ -128,7 +129,7 @@ static int virtio_crypto_alg_akcipher_init_session(struct virtio_crypto_akcipher ctrl = &vc_ctrl_req->ctrl; memcpy(&ctrl->header, header, sizeof(ctrl->header)); - memcpy(&ctrl->u, para, sizeof(ctrl->u)); + memcpy(&ctrl->u.akcipher_create_session.para, para, sizeof(*para)); input = &vc_ctrl_req->input; input->status = cpu_to_le32(VIRTIO_CRYPTO_ERR); From 17c8e9ac95d81d10e9619a845a68b83c9384be64 Mon Sep 17 00:00:00 2001 From: Andrew Jones <ajones@ventanamicro.com> Date: Wed, 31 Jan 2024 13:05:13 +0100 Subject: [PATCH 095/304] RISC-V: paravirt: steal_time should be static steal_time is not used outside paravirt.c, make it static, as sparse suggested. Fixes: fdf68acccfc6 ("RISC-V: paravirt: Implement steal-time support") Signed-off-by: Andrew Jones <ajones@ventanamicro.com> Reviewed-by: Atish Patra <atishp@rivosinc.com> Signed-off-by: Anup Patel <anup@brainfault.org> --- arch/riscv/kernel/paravirt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/kernel/paravirt.c b/arch/riscv/kernel/paravirt.c index 8e114f5930ce..15c5d4048db5 100644 --- a/arch/riscv/kernel/paravirt.c +++ b/arch/riscv/kernel/paravirt.c @@ -41,7 +41,7 @@ static int __init parse_no_stealacc(char *arg) early_param("no-steal-acc", parse_no_stealacc); -DEFINE_PER_CPU(struct sbi_sta_struct, steal_time) __aligned(64); +static DEFINE_PER_CPU(struct sbi_sta_struct, steal_time) __aligned(64); static bool __init has_pv_steal_clock(void) { From 3752219b6007ee0421cc228f442f33b31f85addd Mon Sep 17 00:00:00 2001 From: Andrew Jones <ajones@ventanamicro.com> Date: Wed, 31 Jan 2024 13:05:14 +0100 Subject: [PATCH 096/304] RISC-V: paravirt: Use correct restricted types __le32 and __le64 types should be used with le32_to_cpu() and le64_to_cpu(), as sparse helpfully points out. Fixes: fdf68acccfc6 ("RISC-V: paravirt: Implement steal-time support") Reported-by: kernel test robot <lkp@intel.com> Closes: https://lore.kernel.org/oe-kbuild-all/202401011933.hL9zqmKo-lkp@intel.com/ Signed-off-by: Andrew Jones <ajones@ventanamicro.com> Reviewed-by: Atish Patra <atishp@rivosinc.com> Signed-off-by: Anup Patel <anup@brainfault.org> --- arch/riscv/kernel/paravirt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/riscv/kernel/paravirt.c b/arch/riscv/kernel/paravirt.c index 15c5d4048db5..0d6225fd3194 100644 --- a/arch/riscv/kernel/paravirt.c +++ b/arch/riscv/kernel/paravirt.c @@ -91,8 +91,8 @@ static int pv_time_cpu_down_prepare(unsigned int cpu) static u64 pv_time_steal_clock(int cpu) { struct sbi_sta_struct *st = per_cpu_ptr(&steal_time, cpu); - u32 sequence; - u64 steal; + __le32 sequence; + __le64 steal; /* * Check the sequence field before and after reading the steal From f072b272aa27d57cf7fe6fdedb30fb50f391974e Mon Sep 17 00:00:00 2001 From: Andrew Jones <ajones@ventanamicro.com> Date: Wed, 31 Jan 2024 13:05:15 +0100 Subject: [PATCH 097/304] RISC-V: KVM: Use correct restricted types __le32 and __le64 types should be used with le32_to_cpu() and le64_to_cpu() and __user is needed for pointers referencing guest memory, as sparse helpfully points out. Fixes: e9f12b5fff8a ("RISC-V: KVM: Implement SBI STA extension") Reported-by: kernel test robot <lkp@intel.com> Closes: https://lore.kernel.org/oe-kbuild-all/202401020142.lwFEDK5v-lkp@intel.com/ Signed-off-by: Andrew Jones <ajones@ventanamicro.com> Reviewed-by: Atish Patra <atishp@rivosinc.com> Signed-off-by: Anup Patel <anup@brainfault.org> --- arch/riscv/kvm/vcpu_sbi_sta.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/arch/riscv/kvm/vcpu_sbi_sta.c b/arch/riscv/kvm/vcpu_sbi_sta.c index 01f09fe8c3b0..d8cf9ca28c61 100644 --- a/arch/riscv/kvm/vcpu_sbi_sta.c +++ b/arch/riscv/kvm/vcpu_sbi_sta.c @@ -26,8 +26,12 @@ void kvm_riscv_vcpu_record_steal_time(struct kvm_vcpu *vcpu) { gpa_t shmem = vcpu->arch.sta.shmem; u64 last_steal = vcpu->arch.sta.last_steal; - u32 *sequence_ptr, sequence; - u64 *steal_ptr, steal; + __le32 __user *sequence_ptr; + __le64 __user *steal_ptr; + __le32 sequence_le; + __le64 steal_le; + u32 sequence; + u64 steal; unsigned long hva; gfn_t gfn; @@ -47,22 +51,22 @@ void kvm_riscv_vcpu_record_steal_time(struct kvm_vcpu *vcpu) return; } - sequence_ptr = (u32 *)(hva + offset_in_page(shmem) + + sequence_ptr = (__le32 __user *)(hva + offset_in_page(shmem) + offsetof(struct sbi_sta_struct, sequence)); - steal_ptr = (u64 *)(hva + offset_in_page(shmem) + + steal_ptr = (__le64 __user *)(hva + offset_in_page(shmem) + offsetof(struct sbi_sta_struct, steal)); - if (WARN_ON(get_user(sequence, sequence_ptr))) + if (WARN_ON(get_user(sequence_le, sequence_ptr))) return; - sequence = le32_to_cpu(sequence); + sequence = le32_to_cpu(sequence_le); sequence += 1; if (WARN_ON(put_user(cpu_to_le32(sequence), sequence_ptr))) return; - if (!WARN_ON(get_user(steal, steal_ptr))) { - steal = le64_to_cpu(steal); + if (!WARN_ON(get_user(steal_le, steal_ptr))) { + steal = le64_to_cpu(steal_le); vcpu->arch.sta.last_steal = READ_ONCE(current->sched_info.run_delay); steal += vcpu->arch.sta.last_steal - last_steal; WARN_ON(put_user(cpu_to_le64(steal), steal_ptr)); From 7d708c145b2631941b8b0b4a740dc2990818c39c Mon Sep 17 00:00:00 2001 From: Thinh Nguyen <Thinh.Nguyen@synopsys.com> Date: Fri, 9 Feb 2024 01:24:54 +0000 Subject: [PATCH 098/304] Revert "usb: dwc3: Support EBC feature of DWC_usb31" This reverts commit 398aa9a7e77cf23c2a6f882ddd3dcd96f21771dc. The update to the gadget API to support EBC feature is incomplete. It's missing at least the following: * New usage documentation * Gadget capability check * Condition for the user to check how many and which endpoints can be used as "fifo_mode" * Description of how it can affect completed request (e.g. dwc3 won't update TRB on completion -- ie. how it can affect request's actual length report) Let's revert this until it's ready. Fixes: 398aa9a7e77c ("usb: dwc3: Support EBC feature of DWC_usb31") Signed-off-by: Thinh Nguyen <Thinh.Nguyen@synopsys.com> Link: https://lore.kernel.org/r/3042f847ff904b4dd4e4cf66a1b9df470e63439e.1707441690.git.Thinh.Nguyen@synopsys.com Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> --- drivers/usb/dwc3/core.h | 1 - drivers/usb/dwc3/gadget.c | 6 ------ drivers/usb/dwc3/gadget.h | 2 -- include/linux/usb/gadget.h | 1 - 4 files changed, 10 deletions(-) diff --git a/drivers/usb/dwc3/core.h b/drivers/usb/dwc3/core.h index e3eea965e57b..e120611a5174 100644 --- a/drivers/usb/dwc3/core.h +++ b/drivers/usb/dwc3/core.h @@ -376,7 +376,6 @@ /* Global HWPARAMS4 Register */ #define DWC3_GHWPARAMS4_HIBER_SCRATCHBUFS(n) (((n) & (0x0f << 13)) >> 13) #define DWC3_MAX_HIBER_SCRATCHBUFS 15 -#define DWC3_EXT_BUFF_CONTROL BIT(21) /* Global HWPARAMS6 Register */ #define DWC3_GHWPARAMS6_BCSUPPORT BIT(14) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 564976b3e2b9..4c8dd6724678 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -673,12 +673,6 @@ static int dwc3_gadget_set_ep_config(struct dwc3_ep *dep, unsigned int action) params.param1 |= DWC3_DEPCFG_BINTERVAL_M1(bInterval_m1); } - if (dep->endpoint.fifo_mode) { - if (!(dwc->hwparams.hwparams4 & DWC3_EXT_BUFF_CONTROL)) - return -EINVAL; - params.param1 |= DWC3_DEPCFG_EBC_HWO_NOWB | DWC3_DEPCFG_USE_EBC; - } - return dwc3_send_gadget_ep_cmd(dep, DWC3_DEPCMD_SETEPCONFIG, ¶ms); } diff --git a/drivers/usb/dwc3/gadget.h b/drivers/usb/dwc3/gadget.h index fd7a4e94397e..55a56cf67d73 100644 --- a/drivers/usb/dwc3/gadget.h +++ b/drivers/usb/dwc3/gadget.h @@ -26,8 +26,6 @@ struct dwc3; #define DWC3_DEPCFG_XFER_NOT_READY_EN BIT(10) #define DWC3_DEPCFG_FIFO_ERROR_EN BIT(11) #define DWC3_DEPCFG_STREAM_EVENT_EN BIT(13) -#define DWC3_DEPCFG_EBC_HWO_NOWB BIT(14) -#define DWC3_DEPCFG_USE_EBC BIT(15) #define DWC3_DEPCFG_BINTERVAL_M1(n) (((n) & 0xff) << 16) #define DWC3_DEPCFG_STREAM_CAPABLE BIT(24) #define DWC3_DEPCFG_EP_NUMBER(n) (((n) & 0x1f) << 25) diff --git a/include/linux/usb/gadget.h b/include/linux/usb/gadget.h index a771ccc038ac..6532beb587b1 100644 --- a/include/linux/usb/gadget.h +++ b/include/linux/usb/gadget.h @@ -236,7 +236,6 @@ struct usb_ep { unsigned max_streams:16; unsigned mult:2; unsigned maxburst:5; - unsigned fifo_mode:1; u8 address; const struct usb_endpoint_descriptor *desc; const struct usb_ss_ep_comp_descriptor *comp_desc; From e5aa6d51a2ef8c7ef7e3fe76bebe530fb68e7f08 Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn <lukas.bulwahn@gmail.com> Date: Fri, 9 Feb 2024 09:20:44 +0100 Subject: [PATCH 099/304] ALSA: hda/cs35l56: select intended config FW_CS_DSP Commit 73cfbfa9caea ("ALSA: hda/cs35l56: Add driver for Cirrus Logic CS35L56 amplifier") adds configs SND_HDA_SCODEC_CS35L56_{I2C,SPI}, which selects the non-existing config CS_DSP. Note the renaming in commit d7cfdf17cb9d ("firmware: cs_dsp: Rename KConfig symbol CS_DSP -> FW_CS_DSP"), though. Select the intended config FW_CS_DSP. This broken select command probably was not noticed as the configs also select SND_HDA_CS_DSP_CONTROLS and this then selects FW_CS_DSP. So, the select FW_CS_DSP could actually be dropped, but we will keep this redundancy in place as the author originally also intended to have this redundancy of selects in place. Fixes: 73cfbfa9caea ("ALSA: hda/cs35l56: Add driver for Cirrus Logic CS35L56 amplifier") Signed-off-by: Lukas Bulwahn <lukas.bulwahn@gmail.com> Reviewed-by: Simon Trimmer <simont@opensource.cirrus.com> Link: https://lore.kernel.org/r/20240209082044.3981-1-lukas.bulwahn@gmail.com Signed-off-by: Takashi Iwai <tiwai@suse.de> --- sound/pci/hda/Kconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/pci/hda/Kconfig b/sound/pci/hda/Kconfig index 21a90b3c4cc7..8e0ff70fb610 100644 --- a/sound/pci/hda/Kconfig +++ b/sound/pci/hda/Kconfig @@ -156,7 +156,7 @@ config SND_HDA_SCODEC_CS35L56_I2C depends on I2C depends on ACPI || COMPILE_TEST depends on SND_SOC - select CS_DSP + select FW_CS_DSP select SND_HDA_GENERIC select SND_SOC_CS35L56_SHARED select SND_HDA_SCODEC_CS35L56 @@ -171,7 +171,7 @@ config SND_HDA_SCODEC_CS35L56_SPI depends on SPI_MASTER depends on ACPI || COMPILE_TEST depends on SND_SOC - select CS_DSP + select FW_CS_DSP select SND_HDA_GENERIC select SND_SOC_CS35L56_SHARED select SND_HDA_SCODEC_CS35L56 From 727b943263dc98a7aca355cc0302158218f71543 Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald <rf@opensource.cirrus.com> Date: Fri, 9 Feb 2024 14:57:00 +0000 Subject: [PATCH 100/304] ASoC: cs35l56: Remove default from IRQ1_CFG register The driver never uses the IRQ1_CFG register so there's no need to provide a default value. It's set as a readable register only for debugging through the regmap registers file. A system-specific firmware could overwrite this register with a non-default value. Therefore the driver can't hardcode what the initial value actually is. As the register is only for debugging the value can be left unknown until someone wants to read it through debugfs. Signed-off-by: Richard Fitzgerald <rf@opensource.cirrus.com> Link: https://lore.kernel.org/r/20240209145700.1555950-1-rf@opensource.cirrus.com Signed-off-by: Mark Brown <broonie@kernel.org> --- sound/soc/codecs/cs35l56-shared.c | 1 - 1 file changed, 1 deletion(-) diff --git a/sound/soc/codecs/cs35l56-shared.c b/sound/soc/codecs/cs35l56-shared.c index 02fba4bc0a14..995d979b6d87 100644 --- a/sound/soc/codecs/cs35l56-shared.c +++ b/sound/soc/codecs/cs35l56-shared.c @@ -51,7 +51,6 @@ static const struct reg_default cs35l56_reg_defaults[] = { { CS35L56_SWIRE_DP3_CH2_INPUT, 0x00000019 }, { CS35L56_SWIRE_DP3_CH3_INPUT, 0x00000029 }, { CS35L56_SWIRE_DP3_CH4_INPUT, 0x00000028 }, - { CS35L56_IRQ1_CFG, 0x00000000 }, { CS35L56_IRQ1_MASK_1, 0x83ffffff }, { CS35L56_IRQ1_MASK_2, 0xffff7fff }, { CS35L56_IRQ1_MASK_4, 0xe0ffffff }, From 61da7c8e2a602f66be578cbbcebe8638c10e0f48 Mon Sep 17 00:00:00 2001 From: Mark Brown <broonie@kernel.org> Date: Tue, 30 Jan 2024 15:43:53 +0000 Subject: [PATCH 101/304] arm64/signal: Don't assume that TIF_SVE means we saved SVE state When we are in a syscall we will only save the FPSIMD subset even though the task still has access to the full register set, and on context switch we will only remove TIF_SVE when loading the register state. This means that the signal handling code should not assume that TIF_SVE means that the register state is stored in SVE format, it should instead check the format that was recorded during save. Fixes: 8c845e273104 ("arm64/sve: Leave SVE enabled on syscall if we don't context switch") Signed-off-by: Mark Brown <broonie@kernel.org> Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20240130-arm64-sve-signal-regs-v2-1-9fc6f9502782@kernel.org Signed-off-by: Will Deacon <will@kernel.org> --- arch/arm64/kernel/fpsimd.c | 2 +- arch/arm64/kernel/signal.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index a5dc6f764195..25ceaee6b025 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -1635,7 +1635,7 @@ void fpsimd_preserve_current_state(void) void fpsimd_signal_preserve_current_state(void) { fpsimd_preserve_current_state(); - if (test_thread_flag(TIF_SVE)) + if (current->thread.fp_type == FP_STATE_SVE) sve_to_fpsimd(current); } diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index 0e8beb3349ea..425b1bc17a3f 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -242,7 +242,7 @@ static int preserve_sve_context(struct sve_context __user *ctx) vl = task_get_sme_vl(current); vq = sve_vq_from_vl(vl); flags |= SVE_SIG_FLAG_SM; - } else if (test_thread_flag(TIF_SVE)) { + } else if (current->thread.fp_type == FP_STATE_SVE) { vq = sve_vq_from_vl(vl); } @@ -878,7 +878,7 @@ static int setup_sigframe_layout(struct rt_sigframe_user_layout *user, if (system_supports_sve() || system_supports_sme()) { unsigned int vq = 0; - if (add_all || test_thread_flag(TIF_SVE) || + if (add_all || current->thread.fp_type == FP_STATE_SVE || thread_sm_enabled(¤t->thread)) { int vl = max(sve_max_vl(), sme_max_vl()); From 719da04f2d1285922abca72b074fb6fa75d464ea Mon Sep 17 00:00:00 2001 From: Hojin Nam <hj96.nam@samsung.com> Date: Thu, 8 Feb 2024 10:34:15 +0900 Subject: [PATCH 102/304] perf: CXL: fix mismatched cpmu event opcode S2M NDR BI-ConflictAck opcode is described as 4 in the CXL r3.0 3.3.9 Table 3.43. However, it is defined as 3 in macro definition. Fixes: 5d7107c72796 ("perf: CXL Performance Monitoring Unit driver") Signed-off-by: Hojin Nam <hj96.nam@samsung.com> Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> Link: https://lore.kernel.org/r/20240208013415epcms2p2904187c8a863f4d0d2adc980fb91a2dc@epcms2p2 Signed-off-by: Will Deacon <will@kernel.org> --- drivers/perf/cxl_pmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/perf/cxl_pmu.c b/drivers/perf/cxl_pmu.c index 365d964b0f6a..bc0d414a6aff 100644 --- a/drivers/perf/cxl_pmu.c +++ b/drivers/perf/cxl_pmu.c @@ -419,7 +419,7 @@ static struct attribute *cxl_pmu_event_attrs[] = { CXL_PMU_EVENT_CXL_ATTR(s2m_ndr_cmp, CXL_PMU_GID_S2M_NDR, BIT(0)), CXL_PMU_EVENT_CXL_ATTR(s2m_ndr_cmps, CXL_PMU_GID_S2M_NDR, BIT(1)), CXL_PMU_EVENT_CXL_ATTR(s2m_ndr_cmpe, CXL_PMU_GID_S2M_NDR, BIT(2)), - CXL_PMU_EVENT_CXL_ATTR(s2m_ndr_biconflictack, CXL_PMU_GID_S2M_NDR, BIT(3)), + CXL_PMU_EVENT_CXL_ATTR(s2m_ndr_biconflictack, CXL_PMU_GID_S2M_NDR, BIT(4)), /* CXL rev 3.0 Table 3-46 S2M DRS opcodes */ CXL_PMU_EVENT_CXL_ATTR(s2m_drs_memdata, CXL_PMU_GID_S2M_DRS, BIT(0)), CXL_PMU_EVENT_CXL_ATTR(s2m_drs_memdatanxm, CXL_PMU_GID_S2M_DRS, BIT(1)), From c0b26c06ed008c0898829dda4c2783b8ec478350 Mon Sep 17 00:00:00 2001 From: Seongsu Park <sgsu.park@samsung.com> Date: Fri, 2 Feb 2024 10:33:06 +0900 Subject: [PATCH 103/304] arm64: fix typo in comments fix typo in comments thath -> that Signed-off-by: Seongsu Park <sgsu.park@samsung.com> Link: https://lore.kernel.org/r/20240202013306.883777-1-sgsu.park@samsung.com Signed-off-by: Will Deacon <will@kernel.org> --- arch/arm64/include/asm/cpufeature.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 21c824edf8ce..bd8d4ca81a48 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -83,7 +83,7 @@ struct arm64_ftr_bits { * to full-0 denotes that this field has no override * * A @mask field set to full-0 with the corresponding @val field set - * to full-1 denotes thath this field has an invalid override. + * to full-1 denotes that this field has an invalid override. */ struct arm64_ftr_override { u64 val; From f9daab0ad01cf9d165dbbbf106ca4e61d06e7fe8 Mon Sep 17 00:00:00 2001 From: Fangrui Song <maskray@google.com> Date: Mon, 5 Feb 2024 23:45:52 -0800 Subject: [PATCH 104/304] arm64: jump_label: use constraints "Si" instead of "i" The generic constraint "i" seems to be copied from x86 or arm (and with a redundant generic operand modifier "c"). It works with -fno-PIE but not with -fPIE/-fPIC in GCC's aarch64 port. The machine constraint "S", which denotes a symbol or label reference with a constant offset, supports PIC and has been available in GCC since 2012 and in Clang since 7.0. However, Clang before 19 does not support "S" on a symbol with a constant offset [1] (e.g. `static_key_false(&nf_hooks_needed[pf][hook])` in include/linux/netfilter.h), so we use "i" as a fallback. Suggested-by: Ard Biesheuvel <ardb@kernel.org> Signed-off-by: Fangrui Song <maskray@google.com> Link: https://github.com/llvm/llvm-project/pull/80255 [1] Acked-by: Mark Rutland <mark.rutland@arm.com> Link: https://lore.kernel.org/r/20240206074552.541154-1-maskray@google.com Signed-off-by: Will Deacon <will@kernel.org> --- arch/arm64/include/asm/jump_label.h | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/arch/arm64/include/asm/jump_label.h b/arch/arm64/include/asm/jump_label.h index 48ddc0f45d22..b7716b215f91 100644 --- a/arch/arm64/include/asm/jump_label.h +++ b/arch/arm64/include/asm/jump_label.h @@ -15,6 +15,10 @@ #define JUMP_LABEL_NOP_SIZE AARCH64_INSN_SIZE +/* + * Prefer the constraint "S" to support PIC with GCC. Clang before 19 does not + * support "S" on a symbol with a constant offset, so we use "i" as a fallback. + */ static __always_inline bool arch_static_branch(struct static_key * const key, const bool branch) { @@ -23,9 +27,9 @@ static __always_inline bool arch_static_branch(struct static_key * const key, " .pushsection __jump_table, \"aw\" \n\t" " .align 3 \n\t" " .long 1b - ., %l[l_yes] - . \n\t" - " .quad %c0 - . \n\t" + " .quad (%[key] - .) + %[bit0] \n\t" " .popsection \n\t" - : : "i"(&((char *)key)[branch]) : : l_yes); + : : [key]"Si"(key), [bit0]"i"(branch) : : l_yes); return false; l_yes: @@ -40,9 +44,9 @@ static __always_inline bool arch_static_branch_jump(struct static_key * const ke " .pushsection __jump_table, \"aw\" \n\t" " .align 3 \n\t" " .long 1b - ., %l[l_yes] - . \n\t" - " .quad %c0 - . \n\t" + " .quad (%[key] - .) + %[bit0] \n\t" " .popsection \n\t" - : : "i"(&((char *)key)[branch]) : : l_yes); + : : [key]"Si"(key), [bit0]"i"(branch) : : l_yes); return false; l_yes: From 50572064ec7109b00eef8880e905f55861c8b3de Mon Sep 17 00:00:00 2001 From: Ilkka Koskinen <ilkka@os.amperecomputing.com> Date: Fri, 9 Feb 2024 17:11:09 +0000 Subject: [PATCH 105/304] perf/arm-cmn: Workaround AmpereOneX errata AC04_MESH_1 (incorrect child count) AmpereOneX mesh implementation has a bug in HN-P nodes that makes them report incorrect child count. The failing crosspoints report 8 children while they only have two. When the driver tries to access the inexistent child nodes, it believes it has reached an invalid node type and probing fails. The workaround is to ignore those incorrect child nodes and continue normally. Signed-off-by: Ilkka Koskinen <ilkka@os.amperecomputing.com> [ rm: rewrote simpler generalised version ] Tested-by: Ilkka Koskinen <ilkka@os.amperecomputing.com> Signed-off-by: Robin Murphy <robin.murphy@arm.com> Link: https://lore.kernel.org/r/ce4b1442135fe03d0de41859b04b268c88c854a3.1707498577.git.robin.murphy@arm.com Signed-off-by: Will Deacon <will@kernel.org> --- drivers/perf/arm-cmn.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c index c584165b13ba..7e3aa7e2345f 100644 --- a/drivers/perf/arm-cmn.c +++ b/drivers/perf/arm-cmn.c @@ -2305,6 +2305,17 @@ static int arm_cmn_discover(struct arm_cmn *cmn, unsigned int rgn_offset) dev_dbg(cmn->dev, "ignoring external node %llx\n", reg); continue; } + /* + * AmpereOneX erratum AC04_MESH_1 makes some XPs report a bogus + * child count larger than the number of valid child pointers. + * A child offset of 0 can only occur on CMN-600; otherwise it + * would imply the root node being its own grandchild, which + * we can safely dismiss in general. + */ + if (reg == 0 && cmn->part != PART_CMN600) { + dev_dbg(cmn->dev, "bogus child pointer?\n"); + continue; + } arm_cmn_init_node_info(cmn, reg & CMN_CHILD_NODE_ADDR, dn); From 41044d5360685e78a869d40a168491a70cdb7e73 Mon Sep 17 00:00:00 2001 From: Alex Williamson <alex.williamson@redhat.com> Date: Tue, 23 Jan 2024 11:55:31 -0700 Subject: [PATCH 106/304] PCI: Fix active state requirement in PME polling The commit noted in fixes added a bogus requirement that runtime PM managed devices need to be in the RPM_ACTIVE state for PME polling. In fact, only devices in low power states should be polled. However there's still a requirement that the device config space must be accessible, which has implications for both the current state of the polled device and the parent bridge, when present. It's not sufficient to assume the bridge remains in D0 and cases have been observed where the bridge passes the D0 test, but the PM state indicates RPM_SUSPENDING and config space of the polled device becomes inaccessible during pci_pme_wakeup(). Therefore, since the bridge is already effectively required to be in the RPM_ACTIVE state, formalize this in the code and elevate the PM usage count to maintain the state while polling the subordinate device. This resolves a regression reported in the bugzilla below where a Thunderbolt/USB4 hierarchy fails to scan for an attached NVMe endpoint downstream of a bridge in a D3hot power state. Link: https://lore.kernel.org/r/20240123185548.1040096-1-alex.williamson@redhat.com Fixes: d3fcd7360338 ("PCI: Fix runtime PM race with PME polling") Reported-by: Sanath S <sanath.s@amd.com> Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218360 Signed-off-by: Alex Williamson <alex.williamson@redhat.com> Signed-off-by: Bjorn Helgaas <bhelgaas@google.com> Tested-by: Sanath S <sanath.s@amd.com> Reviewed-by: Rafael J. Wysocki <rafael@kernel.org> Cc: Lukas Wunner <lukas@wunner.de> Cc: Mika Westerberg <mika.westerberg@linux.intel.com> --- drivers/pci/pci.c | 37 ++++++++++++++++++++++--------------- 1 file changed, 22 insertions(+), 15 deletions(-) diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index d8f11a078924..0a80156fe812 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -2496,29 +2496,36 @@ static void pci_pme_list_scan(struct work_struct *work) if (pdev->pme_poll) { struct pci_dev *bridge = pdev->bus->self; struct device *dev = &pdev->dev; - int pm_status; + struct device *bdev = bridge ? &bridge->dev : NULL; + int bref = 0; /* - * If bridge is in low power state, the - * configuration space of subordinate devices - * may be not accessible + * If we have a bridge, it should be in an active/D0 + * state or the configuration space of subordinate + * devices may not be accessible or stable over the + * course of the call. */ - if (bridge && bridge->current_state != PCI_D0) - continue; + if (bdev) { + bref = pm_runtime_get_if_active(bdev, true); + if (!bref) + continue; + + if (bridge->current_state != PCI_D0) + goto put_bridge; + } /* - * If the device is in a low power state it - * should not be polled either. + * The device itself should be suspended but config + * space must be accessible, therefore it cannot be in + * D3cold. */ - pm_status = pm_runtime_get_if_active(dev, true); - if (!pm_status) - continue; - - if (pdev->current_state != PCI_D3cold) + if (pm_runtime_suspended(dev) && + pdev->current_state != PCI_D3cold) pci_pme_wakeup(pdev, NULL); - if (pm_status > 0) - pm_runtime_put(dev); +put_bridge: + if (bref > 0) + pm_runtime_put(bdev); } else { list_del(&pme_dev->list); kfree(pme_dev); From a22b0a2be69a36511cb5b37d948b651ddf7debf3 Mon Sep 17 00:00:00 2001 From: Cosmin Tanislav <demonsingur@gmail.com> Date: Wed, 7 Feb 2024 15:20:05 +0200 Subject: [PATCH 107/304] iio: adc: ad4130: zero-initialize clock init data The clk_init_data struct does not have all its members initialized, causing issues when trying to expose the internal clock on the CLK pin. Fix this by zero-initializing the clk_init_data struct. Fixes: 62094060cf3a ("iio: adc: ad4130: add AD4130 driver") Signed-off-by: Cosmin Tanislav <demonsingur@gmail.com> Reviewed-by: Nuno Sa <nuno.sa@analog.com> Link: https://lore.kernel.org/r/20240207132007.253768-1-demonsingur@gmail.com Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> --- drivers/iio/adc/ad4130.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/adc/ad4130.c b/drivers/iio/adc/ad4130.c index feb86fe6c422..9daeac16499b 100644 --- a/drivers/iio/adc/ad4130.c +++ b/drivers/iio/adc/ad4130.c @@ -1821,7 +1821,7 @@ static int ad4130_setup_int_clk(struct ad4130_state *st) { struct device *dev = &st->spi->dev; struct device_node *of_node = dev_of_node(dev); - struct clk_init_data init; + struct clk_init_data init = {}; const char *clk_name; int ret; From 78367c32bebfe833cd30c855755d863a4ff3fdee Mon Sep 17 00:00:00 2001 From: Cosmin Tanislav <demonsingur@gmail.com> Date: Wed, 7 Feb 2024 15:20:06 +0200 Subject: [PATCH 108/304] iio: adc: ad4130: only set GPIO_CTRL if pin is unused Currently, GPIO_CTRL bits are set even if the pins are used for measurements. GPIO_CTRL bits should only be set if the pin is not used for other functionality. Fix this by only setting the GPIO_CTRL bits if the pin has no other function. Fixes: 62094060cf3a ("iio: adc: ad4130: add AD4130 driver") Signed-off-by: Cosmin Tanislav <demonsingur@gmail.com> Reviewed-by: Nuno Sa <nuno.sa@analog.com> Link: https://lore.kernel.org/r/20240207132007.253768-2-demonsingur@gmail.com Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> --- drivers/iio/adc/ad4130.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/iio/adc/ad4130.c b/drivers/iio/adc/ad4130.c index 9daeac16499b..62490424b6ae 100644 --- a/drivers/iio/adc/ad4130.c +++ b/drivers/iio/adc/ad4130.c @@ -1891,10 +1891,14 @@ static int ad4130_setup(struct iio_dev *indio_dev) return ret; /* - * Configure all GPIOs for output. If configured, the interrupt function - * of P2 takes priority over the GPIO out function. + * Configure unused GPIOs for output. If configured, the interrupt + * function of P2 takes priority over the GPIO out function. */ - val = AD4130_IO_CONTROL_GPIO_CTRL_MASK; + val = 0; + for (i = 0; i < AD4130_MAX_GPIOS; i++) + if (st->pins_fn[i + AD4130_AIN2_P1] == AD4130_PIN_FN_NONE) + val |= FIELD_PREP(AD4130_IO_CONTROL_GPIO_CTRL_MASK, BIT(i)); + val |= FIELD_PREP(AD4130_IO_CONTROL_INT_PIN_SEL_MASK, st->int_pin_sel); ret = regmap_write(st->regmap, AD4130_IO_CONTROL_REG, val); From 1a1c93e7f8141749ecb10165a95e95ad484bb85f Mon Sep 17 00:00:00 2001 From: Kent Overstreet <kent.overstreet@linux.dev> Date: Fri, 9 Feb 2024 18:34:05 -0500 Subject: [PATCH 109/304] bcachefs: Fix missing bch2_err_class() calls We aren't supposed to be leaking our private error codes outside of fs/bcachefs/. Fixes: Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev> --- fs/bcachefs/fs.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index ec419b8e2c43..77ae65542db9 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -435,7 +435,7 @@ static int bch2_link(struct dentry *old_dentry, struct inode *vdir, bch2_subvol_is_ro(c, inode->ei_subvol) ?: __bch2_link(c, inode, dir, dentry); if (unlikely(ret)) - return ret; + return bch2_err_class(ret); ihold(&inode->v); d_instantiate(dentry, &inode->v); @@ -487,8 +487,9 @@ static int bch2_unlink(struct inode *vdir, struct dentry *dentry) struct bch_inode_info *dir= to_bch_ei(vdir); struct bch_fs *c = dir->v.i_sb->s_fs_info; - return bch2_subvol_is_ro(c, dir->ei_subvol) ?: + int ret = bch2_subvol_is_ro(c, dir->ei_subvol) ?: __bch2_unlink(vdir, dentry, false); + return bch2_err_class(ret); } static int bch2_symlink(struct mnt_idmap *idmap, @@ -523,7 +524,7 @@ static int bch2_symlink(struct mnt_idmap *idmap, return 0; err: iput(&inode->v); - return ret; + return bch2_err_class(ret); } static int bch2_mkdir(struct mnt_idmap *idmap, @@ -641,7 +642,7 @@ err: src_inode, dst_inode); - return ret; + return bch2_err_class(ret); } static void bch2_setattr_copy(struct mnt_idmap *idmap, From 7dcfb87af9731d051decb9f99ec3cc35cf684865 Mon Sep 17 00:00:00 2001 From: Su Yue <glass.su@suse.com> Date: Fri, 9 Feb 2024 21:41:31 +0800 Subject: [PATCH 110/304] bcachefs: fix kmemleak in __bch2_read_super error handling path During xfstest tests, there are some kmemleak reports e.g. generic/051 with if USE_KMEMLEAK=yes: ==================================================================== EXPERIMENTAL kmemleak reported some memory leaks! Due to the way kmemleak works, the leak might be from an earlier test, or something totally unrelated. unreferenced object 0xffff9ef905aaf778 (size 8): comm "mount.bcachefs", pid 169844, jiffies 4295281209 (age 87.040s) hex dump (first 8 bytes): a5 cc cc cc cc cc cc cc ........ backtrace: [<ffffffff87fd9a43>] __kmem_cache_alloc_node+0x1f3/0x2c0 [<ffffffff87f49b66>] kmalloc_trace+0x26/0xb0 [<ffffffffc0a3fefe>] __bch2_read_super+0xfe/0x4e0 [bcachefs] [<ffffffffc0a3ad22>] bch2_fs_open+0x262/0x1710 [bcachefs] [<ffffffffc09c9e24>] bch2_mount+0x4c4/0x640 [bcachefs] [<ffffffff88080c90>] legacy_get_tree+0x30/0x60 [<ffffffff8802c748>] vfs_get_tree+0x28/0xf0 [<ffffffff88061fe5>] path_mount+0x475/0xb60 [<ffffffff880627e5>] __x64_sys_mount+0x105/0x140 [<ffffffff88932642>] do_syscall_64+0x42/0xf0 [<ffffffff88a000e6>] entry_SYSCALL_64_after_hwframe+0x6e/0x76 unreferenced object 0xffff9ef96cdc4fc0 (size 32): comm "mount.bcachefs", pid 169844, jiffies 4295281209 (age 87.040s) hex dump (first 32 bytes): 2f 64 65 76 2f 6d 61 70 70 65 72 2f 74 65 73 74 /dev/mapper/test 2d 31 00 cc cc cc cc cc cc cc cc cc cc cc cc cc -1.............. backtrace: [<ffffffff87fd9a43>] __kmem_cache_alloc_node+0x1f3/0x2c0 [<ffffffff87f4a081>] __kmalloc_node_track_caller+0x51/0x150 [<ffffffff87f3adc2>] kstrdup+0x32/0x60 [<ffffffffc0a3ff1a>] __bch2_read_super+0x11a/0x4e0 [bcachefs] [<ffffffffc0a3ad22>] bch2_fs_open+0x262/0x1710 [bcachefs] [<ffffffffc09c9e24>] bch2_mount+0x4c4/0x640 [bcachefs] [<ffffffff88080c90>] legacy_get_tree+0x30/0x60 [<ffffffff8802c748>] vfs_get_tree+0x28/0xf0 [<ffffffff88061fe5>] path_mount+0x475/0xb60 [<ffffffff880627e5>] __x64_sys_mount+0x105/0x140 [<ffffffff88932642>] do_syscall_64+0x42/0xf0 [<ffffffff88a000e6>] entry_SYSCALL_64_after_hwframe+0x6e/0x76 ==================================================================== The leak happens if bdev_open_by_path() failed to open a block device then it goes label 'out' directly without call of bch2_free_super(). Fix it by going to label 'err' instead of 'out' if bdev_open_by_path() fails. Signed-off-by: Su Yue <glass.su@suse.com> Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev> --- fs/bcachefs/super-io.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c index d60c7d27a047..36988add581f 100644 --- a/fs/bcachefs/super-io.c +++ b/fs/bcachefs/super-io.c @@ -717,7 +717,7 @@ retry: if (IS_ERR(sb->bdev_handle)) { ret = PTR_ERR(sb->bdev_handle); - goto out; + goto err; } sb->bdev = sb->bdev_handle->bdev; From 04eb57930e4e471d8f16455a748df2009d43c139 Mon Sep 17 00:00:00 2001 From: Kent Overstreet <kent.overstreet@linux.dev> Date: Sat, 10 Feb 2024 16:28:52 -0500 Subject: [PATCH 111/304] bcachefs: fix missing endiannes conversion in sb_members Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev> --- fs/bcachefs/sb-members.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/bcachefs/sb-members.c b/fs/bcachefs/sb-members.c index a45354d2acde..eff5ce18c69c 100644 --- a/fs/bcachefs/sb-members.c +++ b/fs/bcachefs/sb-members.c @@ -421,7 +421,7 @@ void bch2_dev_errors_reset(struct bch_dev *ca) m = bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx); for (unsigned i = 0; i < ARRAY_SIZE(m->errors_at_reset); i++) m->errors_at_reset[i] = cpu_to_le64(atomic64_read(&ca->errors[i])); - m->errors_reset_time = ktime_get_real_seconds(); + m->errors_reset_time = cpu_to_le64(ktime_get_real_seconds()); bch2_write_super(c); mutex_unlock(&c->sb_lock); From 7d99a70b65951108d82e1618c67abe69c3ed7720 Mon Sep 17 00:00:00 2001 From: Hans de Goede <hdegoede@redhat.com> Date: Sat, 10 Feb 2024 14:43:59 +0100 Subject: [PATCH 112/304] ASoC: Intel: Boards: Fix NULL pointer deref in BYT/CHT boards Since commit 13f58267cda3 ("ASoC: soc.h: don't create dummy Component via COMP_DUMMY()") dummy snd_soc_dai_link.codecs entries no longer have a name set. This means that when looking for the codec dai_link the machine driver can no longer unconditionally run strcmp() on snd_soc_dai_link.codecs[0].name since this may now be NULL. Add a check for snd_soc_dai_link.codecs[0].name being NULL to all BYT/CHT machine drivers to avoid NULL pointer dereferences in their probe() methods. Fixes: 13f58267cda3 ("ASoC: soc.h: don't create dummy Component via COMP_DUMMY()") Cc: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com> Signed-off-by: Hans de Goede <hdegoede@redhat.com> Link: https://lore.kernel.org/r/20240210134400.24913-2-hdegoede@redhat.com Signed-off-by: Mark Brown <broonie@kernel.org> --- sound/soc/intel/boards/bytcht_cx2072x.c | 3 ++- sound/soc/intel/boards/bytcht_da7213.c | 3 ++- sound/soc/intel/boards/bytcht_es8316.c | 3 ++- sound/soc/intel/boards/bytcr_rt5640.c | 3 ++- sound/soc/intel/boards/bytcr_rt5651.c | 3 ++- sound/soc/intel/boards/bytcr_wm5102.c | 3 ++- sound/soc/intel/boards/cht_bsw_rt5645.c | 3 ++- sound/soc/intel/boards/cht_bsw_rt5672.c | 3 ++- 8 files changed, 16 insertions(+), 8 deletions(-) diff --git a/sound/soc/intel/boards/bytcht_cx2072x.c b/sound/soc/intel/boards/bytcht_cx2072x.c index 10a84a2c1036..c014d85a08b2 100644 --- a/sound/soc/intel/boards/bytcht_cx2072x.c +++ b/sound/soc/intel/boards/bytcht_cx2072x.c @@ -241,7 +241,8 @@ static int snd_byt_cht_cx2072x_probe(struct platform_device *pdev) /* fix index of codec dai */ for (i = 0; i < ARRAY_SIZE(byt_cht_cx2072x_dais); i++) { - if (!strcmp(byt_cht_cx2072x_dais[i].codecs->name, + if (byt_cht_cx2072x_dais[i].codecs->name && + !strcmp(byt_cht_cx2072x_dais[i].codecs->name, "i2c-14F10720:00")) { dai_index = i; break; diff --git a/sound/soc/intel/boards/bytcht_da7213.c b/sound/soc/intel/boards/bytcht_da7213.c index 7e5eea690023..f4ac3ddd148b 100644 --- a/sound/soc/intel/boards/bytcht_da7213.c +++ b/sound/soc/intel/boards/bytcht_da7213.c @@ -245,7 +245,8 @@ static int bytcht_da7213_probe(struct platform_device *pdev) /* fix index of codec dai */ for (i = 0; i < ARRAY_SIZE(dailink); i++) { - if (!strcmp(dailink[i].codecs->name, "i2c-DLGS7213:00")) { + if (dailink[i].codecs->name && + !strcmp(dailink[i].codecs->name, "i2c-DLGS7213:00")) { dai_index = i; break; } diff --git a/sound/soc/intel/boards/bytcht_es8316.c b/sound/soc/intel/boards/bytcht_es8316.c index 1564a88a885e..2fcec2e02bb5 100644 --- a/sound/soc/intel/boards/bytcht_es8316.c +++ b/sound/soc/intel/boards/bytcht_es8316.c @@ -546,7 +546,8 @@ static int snd_byt_cht_es8316_mc_probe(struct platform_device *pdev) /* fix index of codec dai */ for (i = 0; i < ARRAY_SIZE(byt_cht_es8316_dais); i++) { - if (!strcmp(byt_cht_es8316_dais[i].codecs->name, + if (byt_cht_es8316_dais[i].codecs->name && + !strcmp(byt_cht_es8316_dais[i].codecs->name, "i2c-ESSX8316:00")) { dai_index = i; break; diff --git a/sound/soc/intel/boards/bytcr_rt5640.c b/sound/soc/intel/boards/bytcr_rt5640.c index 42466b4b1ca4..03be5e26ec4a 100644 --- a/sound/soc/intel/boards/bytcr_rt5640.c +++ b/sound/soc/intel/boards/bytcr_rt5640.c @@ -1652,7 +1652,8 @@ static int snd_byt_rt5640_mc_probe(struct platform_device *pdev) /* fix index of codec dai */ for (i = 0; i < ARRAY_SIZE(byt_rt5640_dais); i++) { - if (!strcmp(byt_rt5640_dais[i].codecs->name, + if (byt_rt5640_dais[i].codecs->name && + !strcmp(byt_rt5640_dais[i].codecs->name, "i2c-10EC5640:00")) { dai_index = i; break; diff --git a/sound/soc/intel/boards/bytcr_rt5651.c b/sound/soc/intel/boards/bytcr_rt5651.c index f9fe8414f454..80c841b000a3 100644 --- a/sound/soc/intel/boards/bytcr_rt5651.c +++ b/sound/soc/intel/boards/bytcr_rt5651.c @@ -910,7 +910,8 @@ static int snd_byt_rt5651_mc_probe(struct platform_device *pdev) /* fix index of codec dai */ for (i = 0; i < ARRAY_SIZE(byt_rt5651_dais); i++) { - if (!strcmp(byt_rt5651_dais[i].codecs->name, + if (byt_rt5651_dais[i].codecs->name && + !strcmp(byt_rt5651_dais[i].codecs->name, "i2c-10EC5651:00")) { dai_index = i; break; diff --git a/sound/soc/intel/boards/bytcr_wm5102.c b/sound/soc/intel/boards/bytcr_wm5102.c index 6978ebde6693..cccb5e90c0fe 100644 --- a/sound/soc/intel/boards/bytcr_wm5102.c +++ b/sound/soc/intel/boards/bytcr_wm5102.c @@ -605,7 +605,8 @@ static int snd_byt_wm5102_mc_probe(struct platform_device *pdev) /* find index of codec dai */ for (i = 0; i < ARRAY_SIZE(byt_wm5102_dais); i++) { - if (!strcmp(byt_wm5102_dais[i].codecs->name, + if (byt_wm5102_dais[i].codecs->name && + !strcmp(byt_wm5102_dais[i].codecs->name, "wm5102-codec")) { dai_index = i; break; diff --git a/sound/soc/intel/boards/cht_bsw_rt5645.c b/sound/soc/intel/boards/cht_bsw_rt5645.c index c952a96cde7e..7773f61064f4 100644 --- a/sound/soc/intel/boards/cht_bsw_rt5645.c +++ b/sound/soc/intel/boards/cht_bsw_rt5645.c @@ -571,7 +571,8 @@ static int snd_cht_mc_probe(struct platform_device *pdev) /* set correct codec name */ for (i = 0; i < ARRAY_SIZE(cht_dailink); i++) - if (!strcmp(card->dai_link[i].codecs->name, + if (card->dai_link[i].codecs->name && + !strcmp(card->dai_link[i].codecs->name, "i2c-10EC5645:00")) { card->dai_link[i].codecs->name = drv->codec_name; dai_index = i; diff --git a/sound/soc/intel/boards/cht_bsw_rt5672.c b/sound/soc/intel/boards/cht_bsw_rt5672.c index 8cf0b33cc02e..be2d1a8dbca8 100644 --- a/sound/soc/intel/boards/cht_bsw_rt5672.c +++ b/sound/soc/intel/boards/cht_bsw_rt5672.c @@ -466,7 +466,8 @@ static int snd_cht_mc_probe(struct platform_device *pdev) /* find index of codec dai */ for (i = 0; i < ARRAY_SIZE(cht_dailink); i++) { - if (!strcmp(cht_dailink[i].codecs->name, RT5672_I2C_DEFAULT)) { + if (cht_dailink[i].codecs->name && + !strcmp(cht_dailink[i].codecs->name, RT5672_I2C_DEFAULT)) { dai_index = i; break; } From 930375d34de67e129566caca008de0bbc54a4646 Mon Sep 17 00:00:00 2001 From: Hans de Goede <hdegoede@redhat.com> Date: Sat, 10 Feb 2024 14:44:00 +0100 Subject: [PATCH 113/304] ASoC: Intel: cht_bsw_rt5645: Cleanup codec_name handling 4 fixes / cleanups to the rt5645 mc driver's codec_name handling: 1. In the for loop looking for the dai_index for the codec, replace card->dai_link[i] with cht_dailink[i]. The for loop already uses ARRAY_SIZE(cht_dailink) as bound and card->dai_link is just a pointer to cht_dailink using card->dai_link only obfuscates that cht_dailink is being modified directly rather then say a copy of cht_dailink. Using cht_dailink[i] also makes the code consistent with other machine drivers. 2. Don't set cht_dailink[dai_index].codecs->name in the for loop, this immediately gets overridden using acpi_dev_name(adev) directly below the loop. 3. Add a missing break to the loop. 4. Remove the now no longer used (only set, never read) codec_name field from struct cht_mc_private. Signed-off-by: Hans de Goede <hdegoede@redhat.com> Link: https://lore.kernel.org/r/20240210134400.24913-3-hdegoede@redhat.com Signed-off-by: Mark Brown <broonie@kernel.org> --- sound/soc/intel/boards/cht_bsw_rt5645.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/sound/soc/intel/boards/cht_bsw_rt5645.c b/sound/soc/intel/boards/cht_bsw_rt5645.c index 7773f61064f4..eb41b7115d01 100644 --- a/sound/soc/intel/boards/cht_bsw_rt5645.c +++ b/sound/soc/intel/boards/cht_bsw_rt5645.c @@ -40,7 +40,6 @@ struct cht_acpi_card { struct cht_mc_private { struct snd_soc_jack jack; struct cht_acpi_card *acpi_card; - char codec_name[SND_ACPI_I2C_ID_LEN]; struct clk *mclk; }; @@ -567,15 +566,14 @@ static int snd_cht_mc_probe(struct platform_device *pdev) } card->dev = &pdev->dev; - sprintf(drv->codec_name, "i2c-%s:00", drv->acpi_card->codec_id); /* set correct codec name */ for (i = 0; i < ARRAY_SIZE(cht_dailink); i++) - if (card->dai_link[i].codecs->name && - !strcmp(card->dai_link[i].codecs->name, + if (cht_dailink[i].codecs->name && + !strcmp(cht_dailink[i].codecs->name, "i2c-10EC5645:00")) { - card->dai_link[i].codecs->name = drv->codec_name; dai_index = i; + break; } /* fixup codec name based on HID */ From 6ef5d5b92f7117b324efaac72b3db27ae8bb3082 Mon Sep 17 00:00:00 2001 From: Alexey Khoroshilov <khoroshilov@ispras.ru> Date: Sun, 11 Feb 2024 12:58:34 +0300 Subject: [PATCH 114/304] ASoC: rt5645: Fix deadlock in rt5645_jack_detect_work() There is a path in rt5645_jack_detect_work(), where rt5645->jd_mutex is left locked forever. That may lead to deadlock when rt5645_jack_detect_work() is called for the second time. Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: cdba4301adda ("ASoC: rt5650: add mutex to avoid the jack detection failure") Signed-off-by: Alexey Khoroshilov <khoroshilov@ispras.ru> Link: https://lore.kernel.org/r/1707645514-21196-1-git-send-email-khoroshilov@ispras.ru Signed-off-by: Mark Brown <broonie@kernel.org> --- sound/soc/codecs/rt5645.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/codecs/rt5645.c b/sound/soc/codecs/rt5645.c index 5150d6ee3748..0cc2fa131d48 100644 --- a/sound/soc/codecs/rt5645.c +++ b/sound/soc/codecs/rt5645.c @@ -3317,6 +3317,7 @@ static void rt5645_jack_detect_work(struct work_struct *work) report, SND_JACK_HEADPHONE); snd_soc_jack_report(rt5645->mic_jack, report, SND_JACK_MICROPHONE); + mutex_unlock(&rt5645->jd_mutex); return; case 4: val = snd_soc_component_read(rt5645->component, RT5645_A_JD_CTRL1) & 0x0020; From c4b603c6e2df3a17831731d8bbec5c16fa7bbdf8 Mon Sep 17 00:00:00 2001 From: Cristian Ciocaltea <cristian.ciocaltea@collabora.com> Date: Fri, 9 Feb 2024 01:43:14 +0200 Subject: [PATCH 115/304] ASoC: SOF: amd: Fix locking in ACP IRQ handler A recent change in acp_irq_thread() was meant to address a potential race condition while trying to acquire the hardware semaphore responsible for the synchronization between firmware and host IPC interrupts. This resulted in an improper use of the IPC spinlock, causing normal kernel memory allocations (which may sleep) inside atomic contexts: 1707255557.133976 kernel: BUG: sleeping function called from invalid context at include/linux/sched/mm.h:315 ... 1707255557.134757 kernel: sof_ipc3_rx_msg+0x70/0x130 [snd_sof] 1707255557.134793 kernel: acp_sof_ipc_irq_thread+0x1e0/0x550 [snd_sof_amd_acp] 1707255557.134855 kernel: acp_irq_thread+0xa3/0x130 [snd_sof_amd_acp] 1707255557.134904 kernel: ? irq_thread+0xb5/0x1e0 1707255557.134947 kernel: ? __pfx_irq_thread_fn+0x10/0x10 1707255557.134985 kernel: irq_thread_fn+0x23/0x60 Moreover, there are attempts to lock a mutex from the same atomic context: 1707255557.136357 kernel: ============================= 1707255557.136393 kernel: [ BUG: Invalid wait context ] 1707255557.136413 kernel: 6.8.0-rc3-next-20240206-audio-next #9 Tainted: G W 1707255557.136432 kernel: ----------------------------- 1707255557.136451 kernel: irq/66-AudioDSP/502 is trying to lock: 1707255557.136470 kernel: ffff965152f26af8 (&sb->s_type->i_mutex_key#2){+.+.}-{3:3}, at: start_creating.part.0+0x5f/0x180 ... 1707255557.137429 kernel: start_creating.part.0+0x5f/0x180 1707255557.137457 kernel: __debugfs_create_file+0x61/0x210 1707255557.137475 kernel: snd_sof_debugfs_io_item+0x75/0xc0 [snd_sof] 1707255557.137494 kernel: sof_ipc3_do_rx_work+0x7cf/0x9f0 [snd_sof] 1707255557.137513 kernel: sof_ipc3_rx_msg+0xb3/0x130 [snd_sof] 1707255557.137532 kernel: acp_sof_ipc_irq_thread+0x1e0/0x550 [snd_sof_amd_acp] 1707255557.137551 kernel: acp_irq_thread+0xa3/0x130 [snd_sof_amd_acp] Fix the issues by reducing the lock scope in acp_irq_thread(), so that it guards only the hardware semaphore acquiring attempt. Additionally, restore the initial locking in acp_sof_ipc_irq_thread() to synchronize the handling of immediate replies from DSP core. Fixes: 802134c8c2c8 ("ASoC: SOF: amd: Refactor spinlock_irq(&sdev->ipc_lock) sequence in irq_handler") Signed-off-by: Cristian Ciocaltea <cristian.ciocaltea@collabora.com> Link: https://lore.kernel.org/r/20240208234315.2182048-1-cristian.ciocaltea@collabora.com Signed-off-by: Mark Brown <broonie@kernel.org> --- sound/soc/sof/amd/acp-ipc.c | 2 ++ sound/soc/sof/amd/acp.c | 17 ++++++++--------- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/sound/soc/sof/amd/acp-ipc.c b/sound/soc/sof/amd/acp-ipc.c index 2743f07a5e08..b44b1b1adb6e 100644 --- a/sound/soc/sof/amd/acp-ipc.c +++ b/sound/soc/sof/amd/acp-ipc.c @@ -188,11 +188,13 @@ irqreturn_t acp_sof_ipc_irq_thread(int irq, void *context) dsp_ack = snd_sof_dsp_read(sdev, ACP_DSP_BAR, ACP_SCRATCH_REG_0 + dsp_ack_write); if (dsp_ack) { + spin_lock_irq(&sdev->ipc_lock); /* handle immediate reply from DSP core */ acp_dsp_ipc_get_reply(sdev); snd_sof_ipc_reply(sdev, 0); /* set the done bit */ acp_dsp_ipc_dsp_done(sdev); + spin_unlock_irq(&sdev->ipc_lock); ipc_irq = true; } diff --git a/sound/soc/sof/amd/acp.c b/sound/soc/sof/amd/acp.c index 32a741fcb84f..07632ae6ccf5 100644 --- a/sound/soc/sof/amd/acp.c +++ b/sound/soc/sof/amd/acp.c @@ -355,21 +355,20 @@ static irqreturn_t acp_irq_thread(int irq, void *context) unsigned int count = ACP_HW_SEM_RETRY_COUNT; spin_lock_irq(&sdev->ipc_lock); - while (snd_sof_dsp_read(sdev, ACP_DSP_BAR, desc->hw_semaphore_offset)) { - /* Wait until acquired HW Semaphore lock or timeout */ - count--; - if (!count) { - dev_err(sdev->dev, "%s: Failed to acquire HW lock\n", __func__); - spin_unlock_irq(&sdev->ipc_lock); - return IRQ_NONE; - } + /* Wait until acquired HW Semaphore lock or timeout */ + while (snd_sof_dsp_read(sdev, ACP_DSP_BAR, desc->hw_semaphore_offset) && --count) + ; + spin_unlock_irq(&sdev->ipc_lock); + + if (!count) { + dev_err(sdev->dev, "%s: Failed to acquire HW lock\n", __func__); + return IRQ_NONE; } sof_ops(sdev)->irq_thread(irq, sdev); /* Unlock or Release HW Semaphore */ snd_sof_dsp_write(sdev, ACP_DSP_BAR, desc->hw_semaphore_offset, 0x0); - spin_unlock_irq(&sdev->ipc_lock); return IRQ_HANDLED; }; From cffe487026be13eaf37ea28b783d9638ab147204 Mon Sep 17 00:00:00 2001 From: Dan Carpenter <dan.carpenter@linaro.org> Date: Thu, 8 Feb 2024 13:18:46 +0300 Subject: [PATCH 116/304] cifs: fix underflow in parse_server_interfaces() In this loop, we step through the buffer and after each item we check if the size_left is greater than the minimum size we need. However, the problem is that "bytes_left" is type ssize_t while sizeof() is type size_t. That means that because of type promotion, the comparison is done as an unsigned and if we have negative bytes left the loop continues instead of ending. Fixes: fe856be475f7 ("CIFS: parse and store info on iface queries") Signed-off-by: Dan Carpenter <dan.carpenter@linaro.org> Reviewed-by: Shyam Prasad N <sprasad@microsoft.com> Signed-off-by: Steve French <stfrench@microsoft.com> --- fs/smb/client/smb2ops.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c index 83c898afc835..755f1c66b573 100644 --- a/fs/smb/client/smb2ops.c +++ b/fs/smb/client/smb2ops.c @@ -619,7 +619,7 @@ parse_server_interfaces(struct network_interface_info_ioctl_rsp *buf, goto out; } - while (bytes_left >= sizeof(*p)) { + while (bytes_left >= (ssize_t)sizeof(*p)) { memset(&tmp_iface, 0, sizeof(tmp_iface)); tmp_iface.speed = le64_to_cpu(p->LinkSpeed); tmp_iface.rdma_capable = le32_to_cpu(p->Capability & RDMA_CAPABLE) ? 1 : 0; From 79520587fe42cd4988aff8695d60621e689109cb Mon Sep 17 00:00:00 2001 From: Shyam Prasad N <sprasad@microsoft.com> Date: Fri, 9 Feb 2024 11:25:42 +0000 Subject: [PATCH 117/304] cifs: update the same create_guid on replay File open requests made to the server contain a CreateGuid, which is used by the server to identify the open request. If the same request needs to be replayed, it needs to be sent with the same CreateGuid in the durable handle v2 context. Without doing so, we could end up leaking handles on the server when: 1. multichannel is used AND 2. connection goes down, but not for all channels This is because the replayed open request would have a new CreateGuid and the server will treat this as a new request and open a new handle. This change fixes this by reusing the existing create_guid stored in the cached fid struct. REF: MS-SMB2 4.9 Replay Create Request on an Alternate Channel Fixes: 4f1fffa23769 ("cifs: commands that are retried should have replay flag set") Signed-off-by: Shyam Prasad N <sprasad@microsoft.com> Signed-off-by: Steve French <stfrench@microsoft.com> --- fs/smb/client/cached_dir.c | 1 + fs/smb/client/cifsglob.h | 1 + fs/smb/client/smb2ops.c | 4 ++++ fs/smb/client/smb2pdu.c | 10 ++++++++-- 4 files changed, 14 insertions(+), 2 deletions(-) diff --git a/fs/smb/client/cached_dir.c b/fs/smb/client/cached_dir.c index 1daeb5714faa..3de5047a7ff9 100644 --- a/fs/smb/client/cached_dir.c +++ b/fs/smb/client/cached_dir.c @@ -242,6 +242,7 @@ replay_again: .desired_access = FILE_READ_DATA | FILE_READ_ATTRIBUTES, .disposition = FILE_OPEN, .fid = pfid, + .replay = !!(retries), }; rc = SMB2_open_init(tcon, server, diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h index c86a72c9d9ec..53c75cfb33ab 100644 --- a/fs/smb/client/cifsglob.h +++ b/fs/smb/client/cifsglob.h @@ -1378,6 +1378,7 @@ struct cifs_open_parms { struct cifs_fid *fid; umode_t mode; bool reconnect:1; + bool replay:1; /* indicates that this open is for a replay */ }; struct cifs_fid { diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c index 755f1c66b573..6b3c384ead0d 100644 --- a/fs/smb/client/smb2ops.c +++ b/fs/smb/client/smb2ops.c @@ -1204,6 +1204,7 @@ replay_again: .disposition = FILE_OPEN, .create_options = cifs_create_options(cifs_sb, 0), .fid = &fid, + .replay = !!(retries), }; rc = SMB2_open_init(tcon, server, @@ -1569,6 +1570,7 @@ replay_again: .disposition = FILE_OPEN, .create_options = cifs_create_options(cifs_sb, create_options), .fid = &fid, + .replay = !!(retries), }; if (qi.flags & PASSTHRU_FSCTL) { @@ -2295,6 +2297,7 @@ replay_again: .disposition = FILE_OPEN, .create_options = cifs_create_options(cifs_sb, 0), .fid = fid, + .replay = !!(retries), }; rc = SMB2_open_init(tcon, server, @@ -2681,6 +2684,7 @@ replay_again: .disposition = FILE_OPEN, .create_options = cifs_create_options(cifs_sb, 0), .fid = &fid, + .replay = !!(retries), }; rc = SMB2_open_init(tcon, server, diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c index 4085ce27fd38..608ee05491e2 100644 --- a/fs/smb/client/smb2pdu.c +++ b/fs/smb/client/smb2pdu.c @@ -2404,8 +2404,13 @@ create_durable_v2_buf(struct cifs_open_parms *oparms) */ buf->dcontext.Timeout = cpu_to_le32(oparms->tcon->handle_timeout); buf->dcontext.Flags = cpu_to_le32(SMB2_DHANDLE_FLAG_PERSISTENT); - generate_random_uuid(buf->dcontext.CreateGuid); - memcpy(pfid->create_guid, buf->dcontext.CreateGuid, 16); + + /* for replay, we should not overwrite the existing create guid */ + if (!oparms->replay) { + generate_random_uuid(buf->dcontext.CreateGuid); + memcpy(pfid->create_guid, buf->dcontext.CreateGuid, 16); + } else + memcpy(buf->dcontext.CreateGuid, pfid->create_guid, 16); /* SMB2_CREATE_DURABLE_HANDLE_REQUEST is "DH2Q" */ buf->Name[0] = 'D'; @@ -3142,6 +3147,7 @@ replay_again: /* reinitialize for possible replay */ flags = 0; server = cifs_pick_channel(ses); + oparms->replay = !!(retries); cifs_dbg(FYI, "create/open\n"); if (!ses || !server) From 28083ff18d3f65ecd64857f4495623135dd1f132 Mon Sep 17 00:00:00 2001 From: Jacek Lawrynowicz <jacek.lawrynowicz@linux.intel.com> Date: Wed, 7 Feb 2024 11:24:46 +0100 Subject: [PATCH 118/304] accel/ivpu: Fix DevTLB errors on suspend/resume and recovery Issue IP reset before shutdown in order to complete all upstream requests to the SOC. Without this DevTLB is complaining about incomplete transactions and NPU cannot resume from suspend. This problem is only happening on recent IFWI releases. IP reset in rare corner cases can mess up PCI configuration, so save it before the reset. After this happens it is also impossible to issue PLL requests and D0->D3->D0 cycle is needed to recover the NPU. Add WP 0 request on power up, so the PUNIT is always notified about NPU reset. Use D0/D3 cycle for recovery as it can recover from failed IP reset and FLR cannot. Fixes: 3f7c0634926d ("accel/ivpu/37xx: Fix hangs related to MMIO reset") Signed-off-by: Jacek Lawrynowicz <jacek.lawrynowicz@linux.intel.com> Reviewed-by: Jeffrey Hugo <quic_jhugo@quicinc.com> Link: https://patchwork.freedesktop.org/patch/msgid/20240207102446.3126981-1-jacek.lawrynowicz@linux.intel.com --- drivers/accel/ivpu/ivpu_hw_37xx.c | 44 ++++++++++++++++++++++--------- drivers/accel/ivpu/ivpu_pm.c | 39 +++++++++++++++------------ 2 files changed, 54 insertions(+), 29 deletions(-) diff --git a/drivers/accel/ivpu/ivpu_hw_37xx.c b/drivers/accel/ivpu/ivpu_hw_37xx.c index 77accd029c4a..89af1006df55 100644 --- a/drivers/accel/ivpu/ivpu_hw_37xx.c +++ b/drivers/accel/ivpu/ivpu_hw_37xx.c @@ -510,16 +510,6 @@ static int ivpu_boot_pwr_domain_enable(struct ivpu_device *vdev) return ret; } -static int ivpu_boot_pwr_domain_disable(struct ivpu_device *vdev) -{ - ivpu_boot_dpu_active_drive(vdev, false); - ivpu_boot_pwr_island_isolation_drive(vdev, true); - ivpu_boot_pwr_island_trickle_drive(vdev, false); - ivpu_boot_pwr_island_drive(vdev, false); - - return ivpu_boot_wait_for_pwr_island_status(vdev, 0x0); -} - static void ivpu_boot_no_snoop_enable(struct ivpu_device *vdev) { u32 val = REGV_RD32(VPU_37XX_HOST_IF_TCU_PTW_OVERRIDES); @@ -616,12 +606,37 @@ static int ivpu_hw_37xx_info_init(struct ivpu_device *vdev) return 0; } +static int ivpu_hw_37xx_ip_reset(struct ivpu_device *vdev) +{ + int ret; + u32 val; + + if (IVPU_WA(punit_disabled)) + return 0; + + ret = REGB_POLL_FLD(VPU_37XX_BUTTRESS_VPU_IP_RESET, TRIGGER, 0, TIMEOUT_US); + if (ret) { + ivpu_err(vdev, "Timed out waiting for TRIGGER bit\n"); + return ret; + } + + val = REGB_RD32(VPU_37XX_BUTTRESS_VPU_IP_RESET); + val = REG_SET_FLD(VPU_37XX_BUTTRESS_VPU_IP_RESET, TRIGGER, val); + REGB_WR32(VPU_37XX_BUTTRESS_VPU_IP_RESET, val); + + ret = REGB_POLL_FLD(VPU_37XX_BUTTRESS_VPU_IP_RESET, TRIGGER, 0, TIMEOUT_US); + if (ret) + ivpu_err(vdev, "Timed out waiting for RESET completion\n"); + + return ret; +} + static int ivpu_hw_37xx_reset(struct ivpu_device *vdev) { int ret = 0; - if (ivpu_boot_pwr_domain_disable(vdev)) { - ivpu_err(vdev, "Failed to disable power domain\n"); + if (ivpu_hw_37xx_ip_reset(vdev)) { + ivpu_err(vdev, "Failed to reset NPU\n"); ret = -EIO; } @@ -661,6 +676,11 @@ static int ivpu_hw_37xx_power_up(struct ivpu_device *vdev) { int ret; + /* PLL requests may fail when powering down, so issue WP 0 here */ + ret = ivpu_pll_disable(vdev); + if (ret) + ivpu_warn(vdev, "Failed to disable PLL: %d\n", ret); + ret = ivpu_hw_37xx_d0i3_disable(vdev); if (ret) ivpu_warn(vdev, "Failed to disable D0I3: %d\n", ret); diff --git a/drivers/accel/ivpu/ivpu_pm.c b/drivers/accel/ivpu/ivpu_pm.c index f501f27ebafd..5f73854234ba 100644 --- a/drivers/accel/ivpu/ivpu_pm.c +++ b/drivers/accel/ivpu/ivpu_pm.c @@ -58,11 +58,14 @@ static int ivpu_suspend(struct ivpu_device *vdev) { int ret; + /* Save PCI state before powering down as it sometimes gets corrupted if NPU hangs */ + pci_save_state(to_pci_dev(vdev->drm.dev)); + ret = ivpu_shutdown(vdev); - if (ret) { + if (ret) ivpu_err(vdev, "Failed to shutdown VPU: %d\n", ret); - return ret; - } + + pci_set_power_state(to_pci_dev(vdev->drm.dev), PCI_D3hot); return ret; } @@ -71,6 +74,9 @@ static int ivpu_resume(struct ivpu_device *vdev) { int ret; + pci_set_power_state(to_pci_dev(vdev->drm.dev), PCI_D0); + pci_restore_state(to_pci_dev(vdev->drm.dev)); + retry: ret = ivpu_hw_power_up(vdev); if (ret) { @@ -120,15 +126,20 @@ static void ivpu_pm_recovery_work(struct work_struct *work) ivpu_fw_log_dump(vdev); -retry: - ret = pci_try_reset_function(to_pci_dev(vdev->drm.dev)); - if (ret == -EAGAIN && !drm_dev_is_unplugged(&vdev->drm)) { - cond_resched(); - goto retry; - } + atomic_inc(&vdev->pm->reset_counter); + atomic_set(&vdev->pm->reset_pending, 1); + down_write(&vdev->pm->reset_lock); - if (ret && ret != -EAGAIN) - ivpu_err(vdev, "Failed to reset VPU: %d\n", ret); + ivpu_suspend(vdev); + ivpu_pm_prepare_cold_boot(vdev); + ivpu_jobs_abort_all(vdev); + + ret = ivpu_resume(vdev); + if (ret) + ivpu_err(vdev, "Failed to resume NPU: %d\n", ret); + + up_write(&vdev->pm->reset_lock); + atomic_set(&vdev->pm->reset_pending, 0); kobject_uevent_env(&vdev->drm.dev->kobj, KOBJ_CHANGE, evt); pm_runtime_mark_last_busy(vdev->drm.dev); @@ -200,9 +211,6 @@ int ivpu_pm_suspend_cb(struct device *dev) ivpu_suspend(vdev); ivpu_pm_prepare_warm_boot(vdev); - pci_save_state(to_pci_dev(dev)); - pci_set_power_state(to_pci_dev(dev), PCI_D3hot); - ivpu_dbg(vdev, PM, "Suspend done.\n"); return 0; @@ -216,9 +224,6 @@ int ivpu_pm_resume_cb(struct device *dev) ivpu_dbg(vdev, PM, "Resume..\n"); - pci_set_power_state(to_pci_dev(dev), PCI_D0); - pci_restore_state(to_pci_dev(dev)); - ret = ivpu_resume(vdev); if (ret) ivpu_err(vdev, "Failed to resume: %d\n", ret); From 962ac2dce56bb3aad1f82a4bbe3ada57a020287c Mon Sep 17 00:00:00 2001 From: Manasi Navare <navaremanasi@chromium.org> Date: Mon, 5 Feb 2024 20:46:19 +0000 Subject: [PATCH 119/304] drm/i915/dsc: Fix the macro that calculates DSCC_/DSCA_ PPS reg address Commit bd077259d0a9 ("drm/i915/vdsc: Add function to read any PPS register") defines a new macro to calculate the DSC PPS register addresses with PPS number as an input. This macro correctly calculates the addresses till PPS 11 since the addresses increment by 4. So in that case the following macro works correctly to give correct register address: _MMIO(_DSCA_PPS_0 + (pps) * 4) However after PPS 11, the register address for PPS 12 increments by 12 because of RC Buffer memory allocation in between. Because of this discontinuity in the address space, the macro calculates wrong addresses for PPS 12 - 16 resulting into incorrect DSC PPS parameter value read/writes causing DSC corruption. This fixes it by correcting this macro to add the offset of 12 for PPS >=12. v3: Add correct paranthesis for pps argument (Jani Nikula) Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/10172 Fixes: bd077259d0a9 ("drm/i915/vdsc: Add function to read any PPS register") Cc: Suraj Kandpal <suraj.kandpal@intel.com> Cc: Ankit Nautiyal <ankit.k.nautiyal@intel.com> Cc: Animesh Manna <animesh.manna@intel.com> Cc: Jani Nikula <jani.nikula@linux.intel.com> Cc: Sean Paul <sean@poorly.run> Cc: Drew Davenport <ddavenport@chromium.org> Signed-off-by: Manasi Navare <navaremanasi@chromium.org> Reviewed-by: Jani Nikula <jani.nikula@intel.com> Signed-off-by: Jani Nikula <jani.nikula@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20240205204619.1991673-1-navaremanasi@chromium.org (cherry picked from commit 6074be620c31dc2ae11af96a1a5ea95580976fb5) Signed-off-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> --- drivers/gpu/drm/i915/display/intel_vdsc_regs.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_vdsc_regs.h b/drivers/gpu/drm/i915/display/intel_vdsc_regs.h index 64f440fdc22b..8b21dc8e26d5 100644 --- a/drivers/gpu/drm/i915/display/intel_vdsc_regs.h +++ b/drivers/gpu/drm/i915/display/intel_vdsc_regs.h @@ -51,8 +51,8 @@ #define DSCC_PICTURE_PARAMETER_SET_0 _MMIO(0x6BA00) #define _DSCA_PPS_0 0x6B200 #define _DSCC_PPS_0 0x6BA00 -#define DSCA_PPS(pps) _MMIO(_DSCA_PPS_0 + (pps) * 4) -#define DSCC_PPS(pps) _MMIO(_DSCC_PPS_0 + (pps) * 4) +#define DSCA_PPS(pps) _MMIO(_DSCA_PPS_0 + ((pps) < 12 ? (pps) : (pps) + 12) * 4) +#define DSCC_PPS(pps) _MMIO(_DSCC_PPS_0 + ((pps) < 12 ? (pps) : (pps) + 12) * 4) #define _ICL_DSC0_PICTURE_PARAMETER_SET_0_PB 0x78270 #define _ICL_DSC1_PICTURE_PARAMETER_SET_0_PB 0x78370 #define _ICL_DSC0_PICTURE_PARAMETER_SET_0_PC 0x78470 From ad26d56d080780bbfcc1696ca0c0cce3e2124ef6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com> Date: Thu, 8 Feb 2024 17:45:52 +0200 Subject: [PATCH 120/304] drm/i915/dp: Limit SST link rate to <=8.1Gbps MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Limit the link rate to HBR3 or below (<=8.1Gbps) in SST mode. UHBR (10Gbps+) link rates require 128b/132b channel encoding which we have not yet hooked up into the SST/no-sideband codepaths. Cc: stable@vger.kernel.org Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20240208154552.14545-1-ville.syrjala@linux.intel.com Reviewed-by: Jani Nikula <jani.nikula@intel.com> (cherry picked from commit 6061811d72e14f41f71b6a025510920b187bfcca) Signed-off-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> --- drivers/gpu/drm/i915/display/intel_dp.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index f5ef95da5534..ae647d03af25 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -2355,6 +2355,9 @@ intel_dp_compute_config_limits(struct intel_dp *intel_dp, limits->min_rate = intel_dp_common_rate(intel_dp, 0); limits->max_rate = intel_dp_max_link_rate(intel_dp); + /* FIXME 128b/132b SST support missing */ + limits->max_rate = min(limits->max_rate, 810000); + limits->min_lane_count = 1; limits->max_lane_count = intel_dp_max_lane_count(intel_dp); From 9a0c32d698c1d0c4a6f5642ac017da31febad1eb Mon Sep 17 00:00:00 2001 From: Danilo Krummrich <dakr@redhat.com> Date: Fri, 2 Feb 2024 01:05:50 +0100 Subject: [PATCH 121/304] drm/nouveau: don't fini scheduler if not initialized nouveau_abi16_ioctl_channel_alloc() and nouveau_cli_init() simply call their corresponding *_fini() counterpart. This can lead to nouveau_sched_fini() being called without struct nouveau_sched ever being initialized in the first place. Instead of embedding struct nouveau_sched into struct nouveau_cli and struct nouveau_chan_abi16, allocate struct nouveau_sched separately, such that we can check for the corresponding pointer to be NULL in the particular *_fini() functions. It makes sense to allocate struct nouveau_sched separately anyway, since in a subsequent commit we can also avoid to allocate a struct nouveau_sched in nouveau_abi16_ioctl_channel_alloc() at all, if the VM_BIND uAPI has been disabled due to the legacy uAPI being used. Fixes: 5f03a507b29e ("drm/nouveau: implement 1:1 scheduler - entity relationship") Reported-by: Timur Tabi <ttabi@nvidia.com> Tested-by: Timur Tabi <ttabi@nvidia.com> Closes: https://lore.kernel.org/nouveau/20240131213917.1545604-1-ttabi@nvidia.com/ Reviewed-by: Dave Airlie <airlied@redhat.com> Signed-off-by: Danilo Krummrich <dakr@redhat.com> Link: https://patchwork.freedesktop.org/patch/msgid/20240202000606.3526-1-dakr@redhat.com --- drivers/gpu/drm/nouveau/nouveau_abi16.c | 10 ++++--- drivers/gpu/drm/nouveau/nouveau_abi16.h | 2 +- drivers/gpu/drm/nouveau/nouveau_drm.c | 7 +++-- drivers/gpu/drm/nouveau/nouveau_drv.h | 2 +- drivers/gpu/drm/nouveau/nouveau_exec.c | 2 +- drivers/gpu/drm/nouveau/nouveau_sched.c | 38 +++++++++++++++++++++++-- drivers/gpu/drm/nouveau/nouveau_sched.h | 6 ++-- drivers/gpu/drm/nouveau/nouveau_uvmm.c | 2 +- 8 files changed, 53 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_abi16.c b/drivers/gpu/drm/nouveau/nouveau_abi16.c index a04156ca8390..ca4b5ab3e59e 100644 --- a/drivers/gpu/drm/nouveau/nouveau_abi16.c +++ b/drivers/gpu/drm/nouveau/nouveau_abi16.c @@ -128,12 +128,14 @@ nouveau_abi16_chan_fini(struct nouveau_abi16 *abi16, struct nouveau_abi16_ntfy *ntfy, *temp; /* Cancel all jobs from the entity's queue. */ - drm_sched_entity_fini(&chan->sched.entity); + if (chan->sched) + drm_sched_entity_fini(&chan->sched->entity); if (chan->chan) nouveau_channel_idle(chan->chan); - nouveau_sched_fini(&chan->sched); + if (chan->sched) + nouveau_sched_destroy(&chan->sched); /* cleanup notifier state */ list_for_each_entry_safe(ntfy, temp, &chan->notifiers, head) { @@ -337,8 +339,8 @@ nouveau_abi16_ioctl_channel_alloc(ABI16_IOCTL_ARGS) if (ret) goto done; - ret = nouveau_sched_init(&chan->sched, drm, drm->sched_wq, - chan->chan->dma.ib_max); + ret = nouveau_sched_create(&chan->sched, drm, drm->sched_wq, + chan->chan->dma.ib_max); if (ret) goto done; diff --git a/drivers/gpu/drm/nouveau/nouveau_abi16.h b/drivers/gpu/drm/nouveau/nouveau_abi16.h index 1f5e243c0c75..11c8c4a80079 100644 --- a/drivers/gpu/drm/nouveau/nouveau_abi16.h +++ b/drivers/gpu/drm/nouveau/nouveau_abi16.h @@ -26,7 +26,7 @@ struct nouveau_abi16_chan { struct nouveau_bo *ntfy; struct nouveau_vma *ntfy_vma; struct nvkm_mm heap; - struct nouveau_sched sched; + struct nouveau_sched *sched; }; struct nouveau_abi16 { diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c index 6f6c31a9937b..a947e1d5f309 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drm.c +++ b/drivers/gpu/drm/nouveau/nouveau_drm.c @@ -201,7 +201,8 @@ nouveau_cli_fini(struct nouveau_cli *cli) WARN_ON(!list_empty(&cli->worker)); usif_client_fini(cli); - nouveau_sched_fini(&cli->sched); + if (cli->sched) + nouveau_sched_destroy(&cli->sched); if (uvmm) nouveau_uvmm_fini(uvmm); nouveau_vmm_fini(&cli->svm); @@ -311,7 +312,7 @@ nouveau_cli_init(struct nouveau_drm *drm, const char *sname, cli->mem = &mems[ret]; /* Don't pass in the (shared) sched_wq in order to let - * nouveau_sched_init() create a dedicated one for VM_BIND jobs. + * nouveau_sched_create() create a dedicated one for VM_BIND jobs. * * This is required to ensure that for VM_BIND jobs free_job() work and * run_job() work can always run concurrently and hence, free_job() work @@ -320,7 +321,7 @@ nouveau_cli_init(struct nouveau_drm *drm, const char *sname, * locks which indirectly or directly are held for allocations * elsewhere. */ - ret = nouveau_sched_init(&cli->sched, drm, NULL, 1); + ret = nouveau_sched_create(&cli->sched, drm, NULL, 1); if (ret) goto done; diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h b/drivers/gpu/drm/nouveau/nouveau_drv.h index 8a6d94c8b163..e239c6bf4afa 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drv.h +++ b/drivers/gpu/drm/nouveau/nouveau_drv.h @@ -98,7 +98,7 @@ struct nouveau_cli { bool disabled; } uvmm; - struct nouveau_sched sched; + struct nouveau_sched *sched; const struct nvif_mclass *mem; diff --git a/drivers/gpu/drm/nouveau/nouveau_exec.c b/drivers/gpu/drm/nouveau/nouveau_exec.c index bc5d71b79ab2..e65c0ef23bc7 100644 --- a/drivers/gpu/drm/nouveau/nouveau_exec.c +++ b/drivers/gpu/drm/nouveau/nouveau_exec.c @@ -389,7 +389,7 @@ nouveau_exec_ioctl_exec(struct drm_device *dev, if (ret) goto out; - args.sched = &chan16->sched; + args.sched = chan16->sched; args.file_priv = file_priv; args.chan = chan; diff --git a/drivers/gpu/drm/nouveau/nouveau_sched.c b/drivers/gpu/drm/nouveau/nouveau_sched.c index dd98f6910f9c..32fa2e273965 100644 --- a/drivers/gpu/drm/nouveau/nouveau_sched.c +++ b/drivers/gpu/drm/nouveau/nouveau_sched.c @@ -398,7 +398,7 @@ static const struct drm_sched_backend_ops nouveau_sched_ops = { .free_job = nouveau_sched_free_job, }; -int +static int nouveau_sched_init(struct nouveau_sched *sched, struct nouveau_drm *drm, struct workqueue_struct *wq, u32 credit_limit) { @@ -453,7 +453,30 @@ fail_wq: return ret; } -void +int +nouveau_sched_create(struct nouveau_sched **psched, struct nouveau_drm *drm, + struct workqueue_struct *wq, u32 credit_limit) +{ + struct nouveau_sched *sched; + int ret; + + sched = kzalloc(sizeof(*sched), GFP_KERNEL); + if (!sched) + return -ENOMEM; + + ret = nouveau_sched_init(sched, drm, wq, credit_limit); + if (ret) { + kfree(sched); + return ret; + } + + *psched = sched; + + return 0; +} + + +static void nouveau_sched_fini(struct nouveau_sched *sched) { struct drm_gpu_scheduler *drm_sched = &sched->base; @@ -471,3 +494,14 @@ nouveau_sched_fini(struct nouveau_sched *sched) if (sched->wq) destroy_workqueue(sched->wq); } + +void +nouveau_sched_destroy(struct nouveau_sched **psched) +{ + struct nouveau_sched *sched = *psched; + + nouveau_sched_fini(sched); + kfree(sched); + + *psched = NULL; +} diff --git a/drivers/gpu/drm/nouveau/nouveau_sched.h b/drivers/gpu/drm/nouveau/nouveau_sched.h index a6528f5981e6..e1f01a23e6f6 100644 --- a/drivers/gpu/drm/nouveau/nouveau_sched.h +++ b/drivers/gpu/drm/nouveau/nouveau_sched.h @@ -111,8 +111,8 @@ struct nouveau_sched { } job; }; -int nouveau_sched_init(struct nouveau_sched *sched, struct nouveau_drm *drm, - struct workqueue_struct *wq, u32 credit_limit); -void nouveau_sched_fini(struct nouveau_sched *sched); +int nouveau_sched_create(struct nouveau_sched **psched, struct nouveau_drm *drm, + struct workqueue_struct *wq, u32 credit_limit); +void nouveau_sched_destroy(struct nouveau_sched **psched); #endif diff --git a/drivers/gpu/drm/nouveau/nouveau_uvmm.c b/drivers/gpu/drm/nouveau/nouveau_uvmm.c index 4f223c972c6a..0a0a11dc9ec0 100644 --- a/drivers/gpu/drm/nouveau/nouveau_uvmm.c +++ b/drivers/gpu/drm/nouveau/nouveau_uvmm.c @@ -1740,7 +1740,7 @@ nouveau_uvmm_ioctl_vm_bind(struct drm_device *dev, if (ret) return ret; - args.sched = &cli->sched; + args.sched = cli->sched; args.file_priv = file_priv; ret = nouveau_uvmm_vm_bind(&args); From a1d8700d906444167899e5a3c64a11ba50c0badd Mon Sep 17 00:00:00 2001 From: Danilo Krummrich <dakr@redhat.com> Date: Fri, 2 Feb 2024 01:05:51 +0100 Subject: [PATCH 122/304] drm/nouveau: omit to create schedulers using the legacy uAPI Omit to create scheduler instances when using the legacy uAPI. When using the legacy NOUVEAU_GEM_PUSHBUF ioctl no scheduler instance is required, hence omit creating scheduler instances in nouveau_abi16_ioctl_channel_alloc(). Tested-by: Timur Tabi <ttabi@nvidia.com> Reviewed-by: Dave Airlie <airlied@redhat.com> Signed-off-by: Danilo Krummrich <dakr@redhat.com> Link: https://patchwork.freedesktop.org/patch/msgid/20240202000606.3526-2-dakr@redhat.com --- drivers/gpu/drm/nouveau/nouveau_abi16.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_abi16.c b/drivers/gpu/drm/nouveau/nouveau_abi16.c index ca4b5ab3e59e..d1bb8151a1df 100644 --- a/drivers/gpu/drm/nouveau/nouveau_abi16.c +++ b/drivers/gpu/drm/nouveau/nouveau_abi16.c @@ -339,10 +339,16 @@ nouveau_abi16_ioctl_channel_alloc(ABI16_IOCTL_ARGS) if (ret) goto done; - ret = nouveau_sched_create(&chan->sched, drm, drm->sched_wq, - chan->chan->dma.ib_max); - if (ret) - goto done; + /* If we're not using the VM_BIND uAPI, we don't need a scheduler. + * + * The client lock is already acquired by nouveau_abi16_get(). + */ + if (nouveau_cli_uvmm(cli)) { + ret = nouveau_sched_create(&chan->sched, drm, drm->sched_wq, + chan->chan->dma.ib_max); + if (ret) + goto done; + } init->channel = chan->chan->chid; From 5ba4e6d5863c53e937f49932dee0ecb004c65928 Mon Sep 17 00:00:00 2001 From: Kamal Heib <kheib@redhat.com> Date: Thu, 8 Feb 2024 17:36:28 -0500 Subject: [PATCH 123/304] RDMA/qedr: Fix qedr_create_user_qp error flow Avoid the following warning by making sure to free the allocated resources in case that qedr_init_user_queue() fail. -----------[ cut here ]----------- WARNING: CPU: 0 PID: 143192 at drivers/infiniband/core/rdma_core.c:874 uverbs_destroy_ufile_hw+0xcf/0xf0 [ib_uverbs] Modules linked in: tls target_core_user uio target_core_pscsi target_core_file target_core_iblock ib_srpt ib_srp scsi_transport_srp nfsd nfs_acl rpcsec_gss_krb5 auth_rpcgss nfsv4 dns_resolver nfs lockd grace fscache netfs 8021q garp mrp stp llc ext4 mbcache jbd2 opa_vnic ib_umad ib_ipoib sunrpc rdma_ucm ib_isert iscsi_target_mod target_core_mod ib_iser libiscsi scsi_transport_iscsi rdma_cm iw_cm ib_cm hfi1 intel_rapl_msr intel_rapl_common mgag200 qedr sb_edac drm_shmem_helper rdmavt x86_pkg_temp_thermal drm_kms_helper intel_powerclamp ib_uverbs coretemp i2c_algo_bit kvm_intel dell_wmi_descriptor ipmi_ssif sparse_keymap kvm ib_core rfkill syscopyarea sysfillrect video sysimgblt irqbypass ipmi_si ipmi_devintf fb_sys_fops rapl iTCO_wdt mxm_wmi iTCO_vendor_support intel_cstate pcspkr dcdbas intel_uncore ipmi_msghandler lpc_ich acpi_power_meter mei_me mei fuse drm xfs libcrc32c qede sd_mod ahci libahci t10_pi sg crct10dif_pclmul crc32_pclmul crc32c_intel qed libata tg3 ghash_clmulni_intel megaraid_sas crc8 wmi [last unloaded: ib_srpt] CPU: 0 PID: 143192 Comm: fi_rdm_tagged_p Kdump: loaded Not tainted 5.14.0-408.el9.x86_64 #1 Hardware name: Dell Inc. PowerEdge R430/03XKDV, BIOS 2.14.0 01/25/2022 RIP: 0010:uverbs_destroy_ufile_hw+0xcf/0xf0 [ib_uverbs] Code: 5d 41 5c 41 5d 41 5e e9 0f 26 1b dd 48 89 df e8 67 6a ff ff 49 8b 86 10 01 00 00 48 85 c0 74 9c 4c 89 e7 e8 83 c0 cb dd eb 92 <0f> 0b eb be 0f 0b be 04 00 00 00 48 89 df e8 8e f5 ff ff e9 6d ff RSP: 0018:ffffb7c6cadfbc60 EFLAGS: 00010286 RAX: ffff8f0889ee3f60 RBX: ffff8f088c1a5200 RCX: 00000000802a0016 RDX: 00000000802a0017 RSI: 0000000000000001 RDI: ffff8f0880042600 RBP: 0000000000000001 R08: 0000000000000001 R09: 0000000000000000 R10: ffff8f11fffd5000 R11: 0000000000039000 R12: ffff8f0d5b36cd80 R13: ffff8f088c1a5250 R14: ffff8f1206d91000 R15: 0000000000000000 FS: 0000000000000000(0000) GS:ffff8f11d7c00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000147069200e20 CR3: 00000001c7210002 CR4: 00000000001706f0 Call Trace: <TASK> ? show_trace_log_lvl+0x1c4/0x2df ? show_trace_log_lvl+0x1c4/0x2df ? ib_uverbs_close+0x1f/0xb0 [ib_uverbs] ? uverbs_destroy_ufile_hw+0xcf/0xf0 [ib_uverbs] ? __warn+0x81/0x110 ? uverbs_destroy_ufile_hw+0xcf/0xf0 [ib_uverbs] ? report_bug+0x10a/0x140 ? handle_bug+0x3c/0x70 ? exc_invalid_op+0x14/0x70 ? asm_exc_invalid_op+0x16/0x20 ? uverbs_destroy_ufile_hw+0xcf/0xf0 [ib_uverbs] ib_uverbs_close+0x1f/0xb0 [ib_uverbs] __fput+0x94/0x250 task_work_run+0x5c/0x90 do_exit+0x270/0x4a0 do_group_exit+0x2d/0x90 get_signal+0x87c/0x8c0 arch_do_signal_or_restart+0x25/0x100 ? ib_uverbs_ioctl+0xc2/0x110 [ib_uverbs] exit_to_user_mode_loop+0x9c/0x130 exit_to_user_mode_prepare+0xb6/0x100 syscall_exit_to_user_mode+0x12/0x40 do_syscall_64+0x69/0x90 ? syscall_exit_work+0x103/0x130 ? syscall_exit_to_user_mode+0x22/0x40 ? do_syscall_64+0x69/0x90 ? syscall_exit_work+0x103/0x130 ? syscall_exit_to_user_mode+0x22/0x40 ? do_syscall_64+0x69/0x90 ? do_syscall_64+0x69/0x90 ? common_interrupt+0x43/0xa0 entry_SYSCALL_64_after_hwframe+0x72/0xdc RIP: 0033:0x1470abe3ec6b Code: Unable to access opcode bytes at RIP 0x1470abe3ec41. RSP: 002b:00007fff13ce9108 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 RAX: fffffffffffffffc RBX: 00007fff13ce9218 RCX: 00001470abe3ec6b RDX: 00007fff13ce9200 RSI: 00000000c0181b01 RDI: 0000000000000004 RBP: 00007fff13ce91e0 R08: 0000558d9655da10 R09: 0000558d9655dd00 R10: 00007fff13ce95c0 R11: 0000000000000246 R12: 00007fff13ce9358 R13: 0000000000000013 R14: 0000558d9655db50 R15: 00007fff13ce9470 </TASK> --[ end trace 888a9b92e04c5c97 ]-- Fixes: df15856132bc ("RDMA/qedr: restructure functions that create/destroy QPs") Signed-off-by: Kamal Heib <kheib@redhat.com> Link: https://lore.kernel.org/r/20240208223628.2040841-1-kheib@redhat.com Signed-off-by: Leon Romanovsky <leon@kernel.org> --- drivers/infiniband/hw/qedr/verbs.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index 7887a6786ed4..f118ce0a9a61 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -1879,8 +1879,17 @@ static int qedr_create_user_qp(struct qedr_dev *dev, /* RQ - read access only (0) */ rc = qedr_init_user_queue(udata, dev, &qp->urq, ureq.rq_addr, ureq.rq_len, true, 0, alloc_and_init); - if (rc) + if (rc) { + ib_umem_release(qp->usq.umem); + qp->usq.umem = NULL; + if (rdma_protocol_roce(&dev->ibdev, 1)) { + qedr_free_pbl(dev, &qp->usq.pbl_info, + qp->usq.pbl_tbl); + } else { + kfree(qp->usq.pbl_tbl); + } return rc; + } } memset(&in_params, 0, sizeof(in_params)); From 551539a8606e28cb2a130f8ef3e9834235b456c4 Mon Sep 17 00:00:00 2001 From: Hans de Goede <hdegoede@redhat.com> Date: Sun, 11 Feb 2024 22:27:35 +0100 Subject: [PATCH 124/304] ASoC: rt5645: Make LattePanda board DMI match more precise The DMI strings used for the LattePanda board DMI quirks are very generic. Using the dmidecode database from https://linux-hardware.org/ shows that the chosen DMI strings also match the following 2 laptops which also have a rt5645 codec: Insignia NS-P11W7100 https://linux-hardware.org/?computer=E092FFF8BA04 Insignia NS-P10W8100 https://linux-hardware.org/?computer=AFB6C0BF7934 All 4 hw revisions of the LattePanda board have "S70CR" in their BIOS version DMI strings: DF-BI-7-S70CR100-* DF-BI-7-S70CR110-* DF-BI-7-S70CR200-* LP-BS-7-S70CR700-* See e.g. https://linux-hardware.org/?computer=D98250A817C0 Add a partial (non exact) DMI match on this string to make the LattePanda board DMI match more precise to avoid false-positive matches. Signed-off-by: Hans de Goede <hdegoede@redhat.com> Link: https://msgid.link/r/20240211212736.179605-1-hdegoede@redhat.com Signed-off-by: Mark Brown <broonie@kernel.org> --- sound/soc/codecs/rt5645.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/sound/soc/codecs/rt5645.c b/sound/soc/codecs/rt5645.c index 0cc2fa131d48..69de78ad5da8 100644 --- a/sound/soc/codecs/rt5645.c +++ b/sound/soc/codecs/rt5645.c @@ -3838,6 +3838,16 @@ static const struct dmi_system_id dmi_platform_data[] = { DMI_EXACT_MATCH(DMI_BOARD_VENDOR, "AMI Corporation"), DMI_EXACT_MATCH(DMI_BOARD_NAME, "Cherry Trail CR"), DMI_EXACT_MATCH(DMI_BOARD_VERSION, "Default string"), + /* + * Above strings are too generic, LattePanda BIOS versions for + * all 4 hw revisions are: + * DF-BI-7-S70CR100-* + * DF-BI-7-S70CR110-* + * DF-BI-7-S70CR200-* + * LP-BS-7-S70CR700-* + * Do a partial match for S70CR to avoid false positive matches. + */ + DMI_MATCH(DMI_BIOS_VERSION, "S70CR"), }, .driver_data = (void *)&lattepanda_board_platform_data, }, From d6755a53b8dde434220a164c756190345772843a Mon Sep 17 00:00:00 2001 From: Hans de Goede <hdegoede@redhat.com> Date: Sun, 11 Feb 2024 22:27:36 +0100 Subject: [PATCH 125/304] ASoC: rt5645: Add DMI quirk for inverted jack-detect on MeeGoPad T8 The MeeGoPad T8 uses the standard rt5645 jd_mode=3 setting for jack-detect, but the used jack connector outputs an inverted jack-detect signal. Add a DMI quirk for this. Signed-off-by: Hans de Goede <hdegoede@redhat.com> Link: https://msgid.link/r/20240211212736.179605-2-hdegoede@redhat.com Signed-off-by: Mark Brown <broonie@kernel.org> --- sound/soc/codecs/rt5645.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/sound/soc/codecs/rt5645.c b/sound/soc/codecs/rt5645.c index 69de78ad5da8..20191a4473c2 100644 --- a/sound/soc/codecs/rt5645.c +++ b/sound/soc/codecs/rt5645.c @@ -3693,6 +3693,11 @@ static const struct rt5645_platform_data jd_mode3_monospk_platform_data = { .mono_speaker = true, }; +static const struct rt5645_platform_data jd_mode3_inv_data = { + .jd_mode = 3, + .inv_jd1_1 = true, +}; + static const struct rt5645_platform_data jd_mode3_platform_data = { .jd_mode = 3, }; @@ -3882,6 +3887,16 @@ static const struct dmi_system_id dmi_platform_data[] = { }, .driver_data = (void *)&intel_braswell_platform_data, }, + { + .ident = "Meegopad T08", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Default string"), + DMI_MATCH(DMI_PRODUCT_NAME, "Default string"), + DMI_MATCH(DMI_BOARD_NAME, "T3 MRD"), + DMI_MATCH(DMI_BOARD_VERSION, "V1.1"), + }, + .driver_data = (void *)&jd_mode3_inv_data, + }, { } }; From 2c80a2b715df75881359d07dbaacff8ad411f40e Mon Sep 17 00:00:00 2001 From: Arnd Bergmann <arnd@arndb.de> Date: Mon, 12 Feb 2024 12:22:17 +0100 Subject: [PATCH 126/304] nouveau/svm: fix kvcalloc() argument order The conversion to kvcalloc() mixed up the object size and count arguments, causing a warning: drivers/gpu/drm/nouveau/nouveau_svm.c: In function 'nouveau_svm_fault_buffer_ctor': drivers/gpu/drm/nouveau/nouveau_svm.c:1010:40: error: 'kvcalloc' sizes specified with 'sizeof' in the earlier argument and not in the later argument [-Werror=calloc-transposed-args] 1010 | buffer->fault = kvcalloc(sizeof(*buffer->fault), buffer->entries, GFP_KERNEL); | ^ drivers/gpu/drm/nouveau/nouveau_svm.c:1010:40: note: earlier argument should specify number of elements, later size of each element The behavior is still correct aside from the warning, but fixing it avoids the warnings and can help the compiler track the individual objects better. Fixes: 71e4bbca070e ("nouveau/svm: Use kvcalloc() instead of kvzalloc()") Signed-off-by: Arnd Bergmann <arnd@arndb.de> Signed-off-by: Danilo Krummrich <dakr@redhat.com> Link: https://patchwork.freedesktop.org/patch/msgid/20240212112230.1117284-1-arnd@kernel.org --- drivers/gpu/drm/nouveau/nouveau_svm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_svm.c b/drivers/gpu/drm/nouveau/nouveau_svm.c index cc03e0c22ff3..5e4565c5011a 100644 --- a/drivers/gpu/drm/nouveau/nouveau_svm.c +++ b/drivers/gpu/drm/nouveau/nouveau_svm.c @@ -1011,7 +1011,7 @@ nouveau_svm_fault_buffer_ctor(struct nouveau_svm *svm, s32 oclass, int id) if (ret) return ret; - buffer->fault = kvcalloc(sizeof(*buffer->fault), buffer->entries, GFP_KERNEL); + buffer->fault = kvcalloc(buffer->entries, sizeof(*buffer->fault), GFP_KERNEL); if (!buffer->fault) return -ENOMEM; From f7fe85b229bc30cb5dc95b4e9015a601c9e3a8cd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Attila=20T=C5=91k=C3=A9s?= <attitokes@gmail.com> Date: Sat, 10 Feb 2024 21:36:38 +0200 Subject: [PATCH 127/304] ASoC: amd: yc: Fix non-functional mic on Lenovo 82UU MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Like many other models, the Lenovo 82UU (Yoga Slim 7 Pro 14ARH7) needs a quirk entry for the internal microphone to function. Signed-off-by: Attila Tőkés <attitokes@gmail.com> Link: https://msgid.link/r/20240210193638.144028-1-attitokes@gmail.com Signed-off-by: Mark Brown <broonie@kernel.org> --- sound/soc/amd/yc/acp6x-mach.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sound/soc/amd/yc/acp6x-mach.c b/sound/soc/amd/yc/acp6x-mach.c index 80ad60d485ea..cc231185d72c 100644 --- a/sound/soc/amd/yc/acp6x-mach.c +++ b/sound/soc/amd/yc/acp6x-mach.c @@ -234,6 +234,13 @@ static const struct dmi_system_id yc_acp_quirk_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "82UG"), } }, + { + .driver_data = &acp6x_card, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "82UU"), + } + }, { .driver_data = &acp6x_card, .matches = { From 83ef106fa732aea8558253641cd98e8a895604d7 Mon Sep 17 00:00:00 2001 From: Viken Dadhaniya <quic_vdadhani@quicinc.com> Date: Mon, 12 Feb 2024 18:22:39 +0530 Subject: [PATCH 128/304] i2c: qcom-geni: Correct I2C TRE sequence For i2c read operation in GSI mode, we are getting timeout due to malformed TRE basically incorrect TRE sequence in gpi(drivers/dma/qcom/gpi.c) driver. I2C driver has geni_i2c_gpi(I2C_WRITE) function which generates GO TRE and geni_i2c_gpi(I2C_READ)generates DMA TRE. Hence to generate GO TRE before DMA TRE, we should move geni_i2c_gpi(I2C_WRITE) before geni_i2c_gpi(I2C_READ) inside the I2C GSI mode transfer function i.e. geni_i2c_gpi_xfer(). TRE stands for Transfer Ring Element - which is basically an element with size of 4 words. It contains all information like slave address, clk divider, dma address value data size etc). Mainly we have 3 TREs(Config, GO and DMA tre). - CONFIG TRE : consists of internal register configuration which is required before start of the transfer. - DMA TRE : contains DDR/Memory address, called as DMA descriptor. - GO TRE : contains Transfer directions, slave ID, Delay flags, Length of the transfer. I2c driver calls GPI driver API to config each TRE depending on the protocol. For read operation tre sequence will be as below which is not aligned to hardware programming guide. - CONFIG tre - DMA tre - GO tre As per Qualcomm's internal Hardware Programming Guide, we should configure TREs in below sequence for any RX only transfer. - CONFIG tre - GO tre - DMA tre Fixes: d8703554f4de ("i2c: qcom-geni: Add support for GPI DMA") Reviewed-by: Andi Shyti <andi.shyti@kernel.org> Reviewed-by: Bryan O'Donoghue <bryan.odonoghue@linaro.org> Tested-by: Bryan O'Donoghue <bryan.odonoghue@linaro.org> # qrb5165-rb5 Co-developed-by: Mukesh Kumar Savaliya <quic_msavaliy@quicinc.com> Signed-off-by: Mukesh Kumar Savaliya <quic_msavaliy@quicinc.com> Signed-off-by: Viken Dadhaniya <quic_vdadhani@quicinc.com> Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org> Signed-off-by: Andi Shyti <andi.shyti@kernel.org> --- drivers/i2c/busses/i2c-qcom-geni.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/i2c/busses/i2c-qcom-geni.c b/drivers/i2c/busses/i2c-qcom-geni.c index 0d2e7171e3a6..da94df466e83 100644 --- a/drivers/i2c/busses/i2c-qcom-geni.c +++ b/drivers/i2c/busses/i2c-qcom-geni.c @@ -613,20 +613,20 @@ static int geni_i2c_gpi_xfer(struct geni_i2c_dev *gi2c, struct i2c_msg msgs[], i peripheral.addr = msgs[i].addr; - if (msgs[i].flags & I2C_M_RD) { - ret = geni_i2c_gpi(gi2c, &msgs[i], &config, - &rx_addr, &rx_buf, I2C_READ, gi2c->rx_c); - if (ret) - goto err; - } - ret = geni_i2c_gpi(gi2c, &msgs[i], &config, &tx_addr, &tx_buf, I2C_WRITE, gi2c->tx_c); if (ret) goto err; - if (msgs[i].flags & I2C_M_RD) + if (msgs[i].flags & I2C_M_RD) { + ret = geni_i2c_gpi(gi2c, &msgs[i], &config, + &rx_addr, &rx_buf, I2C_READ, gi2c->rx_c); + if (ret) + goto err; + dma_async_issue_pending(gi2c->rx_c); + } + dma_async_issue_pending(gi2c->tx_c); timeout = wait_for_completion_timeout(&gi2c->done, XFER_TIMEOUT); From 4508ec17357094e2075f334948393ddedbb75157 Mon Sep 17 00:00:00 2001 From: Paulo Alcantara <pc@manguebit.com> Date: Sun, 11 Feb 2024 20:19:30 -0300 Subject: [PATCH 129/304] smb: client: set correct id, uid and cruid for multiuser automounts When uid, gid and cruid are not specified, we need to dynamically set them into the filesystem context used for automounting otherwise they'll end up reusing the values from the parent mount. Fixes: 9fd29a5bae6e ("cifs: use fs_context for automounts") Reported-by: Shane Nehring <snehring@iastate.edu> Closes: https://bugzilla.redhat.com/show_bug.cgi?id=2259257 Cc: stable@vger.kernel.org # 6.2+ Signed-off-by: Paulo Alcantara (Red Hat) <pc@manguebit.com> Signed-off-by: Steve French <stfrench@microsoft.com> --- fs/smb/client/namespace.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/fs/smb/client/namespace.c b/fs/smb/client/namespace.c index a6968573b775..4a517b280f2b 100644 --- a/fs/smb/client/namespace.c +++ b/fs/smb/client/namespace.c @@ -168,6 +168,21 @@ static char *automount_fullpath(struct dentry *dentry, void *page) return s; } +static void fs_context_set_ids(struct smb3_fs_context *ctx) +{ + kuid_t uid = current_fsuid(); + kgid_t gid = current_fsgid(); + + if (ctx->multiuser) { + if (!ctx->uid_specified) + ctx->linux_uid = uid; + if (!ctx->gid_specified) + ctx->linux_gid = gid; + } + if (!ctx->cruid_specified) + ctx->cred_uid = uid; +} + /* * Create a vfsmount that we can automount */ @@ -205,6 +220,7 @@ static struct vfsmount *cifs_do_automount(struct path *path) tmp.leaf_fullpath = NULL; tmp.UNC = tmp.prepath = NULL; tmp.dfs_root_ses = NULL; + fs_context_set_ids(&tmp); rc = smb3_fs_context_dup(ctx, &tmp); if (rc) { From 8bde59b20de06339d598e8b05e5195f7c631c38b Mon Sep 17 00:00:00 2001 From: Paulo Alcantara <pc@manguebit.com> Date: Sun, 11 Feb 2024 20:19:31 -0300 Subject: [PATCH 130/304] smb: client: handle path separator of created SMB symlinks Convert path separator to CIFS_DIR_SEP(cifs_sb) from symlink target before sending it over the wire otherwise the created SMB symlink may become innaccesible from server side. Fixes: 514d793e27a3 ("smb: client: allow creating symlinks via reparse points") Signed-off-by: Paulo Alcantara (Red Hat) <pc@manguebit.com> Signed-off-by: Steve French <stfrench@microsoft.com> --- fs/smb/client/smb2ops.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c index 6b3c384ead0d..4695433fcf39 100644 --- a/fs/smb/client/smb2ops.c +++ b/fs/smb/client/smb2ops.c @@ -5217,7 +5217,7 @@ static int smb2_create_reparse_symlink(const unsigned int xid, struct inode *new; struct kvec iov; __le16 *path; - char *sym; + char *sym, sep = CIFS_DIR_SEP(cifs_sb); u16 len, plen; int rc = 0; @@ -5231,7 +5231,8 @@ static int smb2_create_reparse_symlink(const unsigned int xid, .symlink_target = sym, }; - path = cifs_convert_path_to_utf16(symname, cifs_sb); + convert_delimiter(sym, sep); + path = cifs_convert_path_to_utf16(sym, cifs_sb); if (!path) { rc = -ENOMEM; goto out; @@ -5254,7 +5255,10 @@ static int smb2_create_reparse_symlink(const unsigned int xid, buf->PrintNameLength = cpu_to_le16(plen); memcpy(buf->PathBuffer, path, plen); buf->Flags = cpu_to_le32(*symname != '/' ? SYMLINK_FLAG_RELATIVE : 0); + if (*sym != sep) + buf->Flags = cpu_to_le32(SYMLINK_FLAG_RELATIVE); + convert_delimiter(sym, '/'); iov.iov_base = buf; iov.iov_len = len; new = smb2_get_reparse_inode(&data, inode->i_sb, xid, From d794734c9bbfe22f86686dc2909c25f5ffe1a572 Mon Sep 17 00:00:00 2001 From: Steve Wahl <steve.wahl@hpe.com> Date: Fri, 26 Jan 2024 10:48:41 -0600 Subject: [PATCH 131/304] x86/mm/ident_map: Use gbpages only where full GB page should be mapped. When ident_pud_init() uses only gbpages to create identity maps, large ranges of addresses not actually requested can be included in the resulting table; a 4K request will map a full GB. On UV systems, this ends up including regions that will cause hardware to halt the system if accessed (these are marked "reserved" by BIOS). Even processor speculation into these regions is enough to trigger the system halt. Only use gbpages when map creation requests include the full GB page of space. Fall back to using smaller 2M pages when only portions of a GB page are included in the request. No attempt is made to coalesce mapping requests. If a request requires a map entry at the 2M (pmd) level, subsequent mapping requests within the same 1G region will also be at the pmd level, even if adjacent or overlapping such requests could have been combined to map a full gbpage. Existing usage starts with larger regions and then adds smaller regions, so this should not have any great consequence. [ dhansen: fix up comment formatting, simplifty changelog ] Signed-off-by: Steve Wahl <steve.wahl@hpe.com> Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com> Cc: stable@vger.kernel.org Link: https://lore.kernel.org/all/20240126164841.170866-1-steve.wahl%40hpe.com --- arch/x86/mm/ident_map.c | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/arch/x86/mm/ident_map.c b/arch/x86/mm/ident_map.c index 968d7005f4a7..f50cc210a981 100644 --- a/arch/x86/mm/ident_map.c +++ b/arch/x86/mm/ident_map.c @@ -26,18 +26,31 @@ static int ident_pud_init(struct x86_mapping_info *info, pud_t *pud_page, for (; addr < end; addr = next) { pud_t *pud = pud_page + pud_index(addr); pmd_t *pmd; + bool use_gbpage; next = (addr & PUD_MASK) + PUD_SIZE; if (next > end) next = end; - if (info->direct_gbpages) { + /* if this is already a gbpage, this portion is already mapped */ + if (pud_large(*pud)) + continue; + + /* Is using a gbpage allowed? */ + use_gbpage = info->direct_gbpages; + + /* Don't use gbpage if it maps more than the requested region. */ + /* at the begining: */ + use_gbpage &= ((addr & ~PUD_MASK) == 0); + /* ... or at the end: */ + use_gbpage &= ((next & ~PUD_MASK) == 0); + + /* Never overwrite existing mappings */ + use_gbpage &= !pud_present(*pud); + + if (use_gbpage) { pud_t pudval; - if (pud_present(*pud)) - continue; - - addr &= PUD_MASK; pudval = __pud((addr - info->offset) | info->page_flag); set_pud(pud, pudval); continue; From 4cbec7e89a416294c46e71c967b57b9119fe0054 Mon Sep 17 00:00:00 2001 From: Mike Christie <michael.christie@oracle.com> Date: Fri, 9 Feb 2024 15:52:47 -0600 Subject: [PATCH 132/304] scsi: target: Fix unmap setup during configuration This issue was found and also debugged by Carl Lei <me@xecycle.info>. If the device is not enabled, iblock/file will have not setup their se_device to bdev/file mappings. If a user tries to config the unmap settings at this time, we will then crash trying to access a NULL pointer where the bdev/file should be. This patch adds a check to make sure the device is configured before we try to call the configure_unmap callout. Fixes: 34bd1dcacf0d ("scsi: target: Detect UNMAP support post configuration") Reported-by: Carl Lei <me@xecycle.info> Signed-off-by: Mike Christie <michael.christie@oracle.com> Link: https://lore.kernel.org/r/20240209215247.5213-1-michael.christie@oracle.com Reviewed-by: Maurizio Lombardi <mlombard@redhat.com> Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com> --- drivers/target/target_core_configfs.c | 48 ++++++++++++++++++--------- 1 file changed, 32 insertions(+), 16 deletions(-) diff --git a/drivers/target/target_core_configfs.c b/drivers/target/target_core_configfs.c index a5f58988130a..c1fbcdd16182 100644 --- a/drivers/target/target_core_configfs.c +++ b/drivers/target/target_core_configfs.c @@ -759,6 +759,29 @@ static ssize_t emulate_tas_store(struct config_item *item, return count; } +static int target_try_configure_unmap(struct se_device *dev, + const char *config_opt) +{ + if (!dev->transport->configure_unmap) { + pr_err("Generic Block Discard not supported\n"); + return -ENOSYS; + } + + if (!target_dev_configured(dev)) { + pr_err("Generic Block Discard setup for %s requires device to be configured\n", + config_opt); + return -ENODEV; + } + + if (!dev->transport->configure_unmap(dev)) { + pr_err("Generic Block Discard setup for %s failed\n", + config_opt); + return -ENOSYS; + } + + return 0; +} + static ssize_t emulate_tpu_store(struct config_item *item, const char *page, size_t count) { @@ -776,11 +799,9 @@ static ssize_t emulate_tpu_store(struct config_item *item, * Discard supported is detected iblock_create_virtdevice(). */ if (flag && !da->max_unmap_block_desc_count) { - if (!dev->transport->configure_unmap || - !dev->transport->configure_unmap(dev)) { - pr_err("Generic Block Discard not supported\n"); - return -ENOSYS; - } + ret = target_try_configure_unmap(dev, "emulate_tpu"); + if (ret) + return ret; } da->emulate_tpu = flag; @@ -806,11 +827,9 @@ static ssize_t emulate_tpws_store(struct config_item *item, * Discard supported is detected iblock_create_virtdevice(). */ if (flag && !da->max_unmap_block_desc_count) { - if (!dev->transport->configure_unmap || - !dev->transport->configure_unmap(dev)) { - pr_err("Generic Block Discard not supported\n"); - return -ENOSYS; - } + ret = target_try_configure_unmap(dev, "emulate_tpws"); + if (ret) + return ret; } da->emulate_tpws = flag; @@ -1022,12 +1041,9 @@ static ssize_t unmap_zeroes_data_store(struct config_item *item, * Discard supported is detected iblock_configure_device(). */ if (flag && !da->max_unmap_block_desc_count) { - if (!dev->transport->configure_unmap || - !dev->transport->configure_unmap(dev)) { - pr_err("dev[%p]: Thin Provisioning LBPRZ will not be set because max_unmap_block_desc_count is zero\n", - da->da_dev); - return -ENOSYS; - } + ret = target_try_configure_unmap(dev, "unmap_zeroes_data"); + if (ret) + return ret; } da->unmap_zeroes_data = flag; pr_debug("dev[%p]: SE Device Thin Provisioning LBPRZ bit: %d\n", From 977fe773dcc7098d8eaf4ee6382cb51e13e784cb Mon Sep 17 00:00:00 2001 From: Lee Duncan <lduncan@suse.com> Date: Fri, 9 Feb 2024 10:07:34 -0800 Subject: [PATCH 133/304] scsi: Revert "scsi: fcoe: Fix potential deadlock on &fip->ctlr_lock" This reverts commit 1a1975551943f681772720f639ff42fbaa746212. This commit causes interrupts to be lost for FCoE devices, since it changed sping locks from "bh" to "irqsave". Instead, a work queue should be used, and will be addressed in a separate commit. Fixes: 1a1975551943 ("scsi: fcoe: Fix potential deadlock on &fip->ctlr_lock") Signed-off-by: Lee Duncan <lduncan@suse.com> Link: https://lore.kernel.org/r/c578cdcd46b60470535c4c4a953e6a1feca0dffd.1707500786.git.lduncan@suse.com Reviewed-by: Hannes Reinecke <hare@suse.de> Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com> --- drivers/scsi/fcoe/fcoe_ctlr.c | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/drivers/scsi/fcoe/fcoe_ctlr.c b/drivers/scsi/fcoe/fcoe_ctlr.c index 19eee108db02..5c8d1ba3f8f3 100644 --- a/drivers/scsi/fcoe/fcoe_ctlr.c +++ b/drivers/scsi/fcoe/fcoe_ctlr.c @@ -319,17 +319,16 @@ static void fcoe_ctlr_announce(struct fcoe_ctlr *fip) { struct fcoe_fcf *sel; struct fcoe_fcf *fcf; - unsigned long flags; mutex_lock(&fip->ctlr_mutex); - spin_lock_irqsave(&fip->ctlr_lock, flags); + spin_lock_bh(&fip->ctlr_lock); kfree_skb(fip->flogi_req); fip->flogi_req = NULL; list_for_each_entry(fcf, &fip->fcfs, list) fcf->flogi_sent = 0; - spin_unlock_irqrestore(&fip->ctlr_lock, flags); + spin_unlock_bh(&fip->ctlr_lock); sel = fip->sel_fcf; if (sel && ether_addr_equal(sel->fcf_mac, fip->dest_addr)) @@ -700,7 +699,6 @@ int fcoe_ctlr_els_send(struct fcoe_ctlr *fip, struct fc_lport *lport, { struct fc_frame *fp; struct fc_frame_header *fh; - unsigned long flags; u16 old_xid; u8 op; u8 mac[ETH_ALEN]; @@ -734,11 +732,11 @@ int fcoe_ctlr_els_send(struct fcoe_ctlr *fip, struct fc_lport *lport, op = FIP_DT_FLOGI; if (fip->mode == FIP_MODE_VN2VN) break; - spin_lock_irqsave(&fip->ctlr_lock, flags); + spin_lock_bh(&fip->ctlr_lock); kfree_skb(fip->flogi_req); fip->flogi_req = skb; fip->flogi_req_send = 1; - spin_unlock_irqrestore(&fip->ctlr_lock, flags); + spin_unlock_bh(&fip->ctlr_lock); schedule_work(&fip->timer_work); return -EINPROGRESS; case ELS_FDISC: @@ -1707,11 +1705,10 @@ static int fcoe_ctlr_flogi_send_locked(struct fcoe_ctlr *fip) static int fcoe_ctlr_flogi_retry(struct fcoe_ctlr *fip) { struct fcoe_fcf *fcf; - unsigned long flags; int error; mutex_lock(&fip->ctlr_mutex); - spin_lock_irqsave(&fip->ctlr_lock, flags); + spin_lock_bh(&fip->ctlr_lock); LIBFCOE_FIP_DBG(fip, "re-sending FLOGI - reselect\n"); fcf = fcoe_ctlr_select(fip); if (!fcf || fcf->flogi_sent) { @@ -1722,7 +1719,7 @@ static int fcoe_ctlr_flogi_retry(struct fcoe_ctlr *fip) fcoe_ctlr_solicit(fip, NULL); error = fcoe_ctlr_flogi_send_locked(fip); } - spin_unlock_irqrestore(&fip->ctlr_lock, flags); + spin_unlock_bh(&fip->ctlr_lock); mutex_unlock(&fip->ctlr_mutex); return error; } @@ -1739,9 +1736,8 @@ static int fcoe_ctlr_flogi_retry(struct fcoe_ctlr *fip) static void fcoe_ctlr_flogi_send(struct fcoe_ctlr *fip) { struct fcoe_fcf *fcf; - unsigned long flags; - spin_lock_irqsave(&fip->ctlr_lock, flags); + spin_lock_bh(&fip->ctlr_lock); fcf = fip->sel_fcf; if (!fcf || !fip->flogi_req_send) goto unlock; @@ -1768,7 +1764,7 @@ static void fcoe_ctlr_flogi_send(struct fcoe_ctlr *fip) } else /* XXX */ LIBFCOE_FIP_DBG(fip, "No FCF selected - defer send\n"); unlock: - spin_unlock_irqrestore(&fip->ctlr_lock, flags); + spin_unlock_bh(&fip->ctlr_lock); } /** From 379a58caa19930e010b7efa1c1f3b9411d3d2ca3 Mon Sep 17 00:00:00 2001 From: Lee Duncan <lduncan@suse.com> Date: Fri, 9 Feb 2024 10:07:35 -0800 Subject: [PATCH 134/304] scsi: fnic: Move fnic_fnic_flush_tx() to a work queue Rather than call 'fnic_flush_tx()' from interrupt context we should be moving it onto a work queue to avoid any locking issues. Fixes: 1a1975551943 ("scsi: fcoe: Fix potential deadlock on &fip->ctlr_lock") Co-developed-by: Hannes Reinecke <hare@suse.de> Signed-off-by: Hannes Reinecke <hare@suse.de> Signed-off-by: Lee Duncan <lduncan@suse.com> Link: https://lore.kernel.org/r/ce5ffa5d0ff82c2b2e283b3b4bff23291d49b05c.1707500786.git.lduncan@suse.com Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com> --- drivers/scsi/fnic/fnic.h | 3 ++- drivers/scsi/fnic/fnic_fcs.c | 5 +++-- drivers/scsi/fnic/fnic_main.c | 1 + drivers/scsi/fnic/fnic_scsi.c | 4 ++-- 4 files changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/scsi/fnic/fnic.h b/drivers/scsi/fnic/fnic.h index 2074937c05bc..ce73f08ee889 100644 --- a/drivers/scsi/fnic/fnic.h +++ b/drivers/scsi/fnic/fnic.h @@ -305,6 +305,7 @@ struct fnic { unsigned int copy_wq_base; struct work_struct link_work; struct work_struct frame_work; + struct work_struct flush_work; struct sk_buff_head frame_queue; struct sk_buff_head tx_queue; @@ -363,7 +364,7 @@ void fnic_handle_event(struct work_struct *work); int fnic_rq_cmpl_handler(struct fnic *fnic, int); int fnic_alloc_rq_frame(struct vnic_rq *rq); void fnic_free_rq_buf(struct vnic_rq *rq, struct vnic_rq_buf *buf); -void fnic_flush_tx(struct fnic *); +void fnic_flush_tx(struct work_struct *work); void fnic_eth_send(struct fcoe_ctlr *, struct sk_buff *skb); void fnic_set_port_id(struct fc_lport *, u32, struct fc_frame *); void fnic_update_mac(struct fc_lport *, u8 *new); diff --git a/drivers/scsi/fnic/fnic_fcs.c b/drivers/scsi/fnic/fnic_fcs.c index 5e312a55cc7d..a08293b2ad9f 100644 --- a/drivers/scsi/fnic/fnic_fcs.c +++ b/drivers/scsi/fnic/fnic_fcs.c @@ -1182,7 +1182,7 @@ int fnic_send(struct fc_lport *lp, struct fc_frame *fp) /** * fnic_flush_tx() - send queued frames. - * @fnic: fnic device + * @work: pointer to work element * * Send frames that were waiting to go out in FC or Ethernet mode. * Whenever changing modes we purge queued frames, so these frames should @@ -1190,8 +1190,9 @@ int fnic_send(struct fc_lport *lp, struct fc_frame *fp) * * Called without fnic_lock held. */ -void fnic_flush_tx(struct fnic *fnic) +void fnic_flush_tx(struct work_struct *work) { + struct fnic *fnic = container_of(work, struct fnic, flush_work); struct sk_buff *skb; struct fc_frame *fp; diff --git a/drivers/scsi/fnic/fnic_main.c b/drivers/scsi/fnic/fnic_main.c index 5ed1d897311a..29eead383eb9 100644 --- a/drivers/scsi/fnic/fnic_main.c +++ b/drivers/scsi/fnic/fnic_main.c @@ -830,6 +830,7 @@ static int fnic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) spin_lock_init(&fnic->vlans_lock); INIT_WORK(&fnic->fip_frame_work, fnic_handle_fip_frame); INIT_WORK(&fnic->event_work, fnic_handle_event); + INIT_WORK(&fnic->flush_work, fnic_flush_tx); skb_queue_head_init(&fnic->fip_frame_queue); INIT_LIST_HEAD(&fnic->evlist); INIT_LIST_HEAD(&fnic->vlans); diff --git a/drivers/scsi/fnic/fnic_scsi.c b/drivers/scsi/fnic/fnic_scsi.c index 8d7fc5284293..fc4cee91b175 100644 --- a/drivers/scsi/fnic/fnic_scsi.c +++ b/drivers/scsi/fnic/fnic_scsi.c @@ -680,7 +680,7 @@ static int fnic_fcpio_fw_reset_cmpl_handler(struct fnic *fnic, spin_unlock_irqrestore(&fnic->fnic_lock, flags); - fnic_flush_tx(fnic); + queue_work(fnic_event_queue, &fnic->flush_work); reset_cmpl_handler_end: fnic_clear_state_flags(fnic, FNIC_FLAGS_FWRESET); @@ -736,7 +736,7 @@ static int fnic_fcpio_flogi_reg_cmpl_handler(struct fnic *fnic, } spin_unlock_irqrestore(&fnic->fnic_lock, flags); - fnic_flush_tx(fnic); + queue_work(fnic_event_queue, &fnic->flush_work); queue_work(fnic_event_queue, &fnic->frame_work); } else { spin_unlock_irqrestore(&fnic->fnic_lock, flags); From b0344d6854d25a8b3b901c778b1728885dd99007 Mon Sep 17 00:00:00 2001 From: Doug Berger <opendmb@gmail.com> Date: Fri, 9 Feb 2024 17:24:49 -0800 Subject: [PATCH 135/304] irqchip/irq-brcmstb-l2: Add write memory barrier before exit It was observed on Broadcom devices that use GIC v3 architecture L1 interrupt controllers as the parent of brcmstb-l2 interrupt controllers that the deactivation of the parent interrupt could happen before the brcmstb-l2 deasserted its output. This would lead the GIC to reactivate the interrupt only to find that no L2 interrupt was pending. The result was a spurious interrupt invoking handle_bad_irq() with its associated messaging. While this did not create a functional problem it is a waste of cycles. The hazard exists because the memory mapped bus writes to the brcmstb-l2 registers are buffered and the GIC v3 architecture uses a very efficient system register write to deactivate the interrupt. Add a write memory barrier prior to invoking chained_irq_exit() to introduce a dsb(st) on those systems to ensure the system register write cannot be executed until the memory mapped writes are visible to the system. [ florian: Added Fixes tag ] Fixes: 7f646e92766e ("irqchip: brcmstb-l2: Add Broadcom Set Top Box Level-2 interrupt controller") Signed-off-by: Doug Berger <opendmb@gmail.com> Signed-off-by: Florian Fainelli <florian.fainelli@broadcom.com> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Acked-by: Florian Fainelli <florian.fainelli@broadcom.com> Acked-by: Marc Zyngier <maz@kernel.org> Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20240210012449.3009125-1-florian.fainelli@broadcom.com --- drivers/irqchip/irq-brcmstb-l2.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/irqchip/irq-brcmstb-l2.c b/drivers/irqchip/irq-brcmstb-l2.c index 5559c943f03f..2b0b3175cea0 100644 --- a/drivers/irqchip/irq-brcmstb-l2.c +++ b/drivers/irqchip/irq-brcmstb-l2.c @@ -2,7 +2,7 @@ /* * Generic Broadcom Set Top Box Level 2 Interrupt controller driver * - * Copyright (C) 2014-2017 Broadcom + * Copyright (C) 2014-2024 Broadcom */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt @@ -112,6 +112,9 @@ static void brcmstb_l2_intc_irq_handle(struct irq_desc *desc) generic_handle_domain_irq(b->domain, irq); } while (status); out: + /* Don't ack parent before all device writes are done */ + wmb(); + chained_irq_exit(chip, desc); } From f1c2765c6afcd1f71f76ed8c9bf94acedab4cecb Mon Sep 17 00:00:00 2001 From: Bibo Mao <maobibo@loongson.cn> Date: Tue, 30 Jan 2024 16:27:20 +0800 Subject: [PATCH 136/304] irqchip/loongson-eiointc: Use correct struct type in eiointc_domain_alloc() eiointc_domain_alloc() uses struct eiointc, which is not defined, for a pointer. Older compilers treat that as a forward declaration and due to assignment of a void pointer there is no warning emitted. As the variable is then handed in as a void pointer argument to irq_domain_set_info() the code is functional. Use struct eiointc_priv instead. [ tglx: Rewrote changelog ] Fixes: dd281e1a1a93 ("irqchip: Add Loongson Extended I/O interrupt controller support") Signed-off-by: Bibo Mao <maobibo@loongson.cn> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Acked-by: Huacai Chen <chenhuacai@loongson.cn> Link: https://lore.kernel.org/r/20240130082722.2912576-2-maobibo@loongson.cn --- drivers/irqchip/irq-loongson-eiointc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/irqchip/irq-loongson-eiointc.c b/drivers/irqchip/irq-loongson-eiointc.c index 1623cd779175..b3736bdd4b9f 100644 --- a/drivers/irqchip/irq-loongson-eiointc.c +++ b/drivers/irqchip/irq-loongson-eiointc.c @@ -241,7 +241,7 @@ static int eiointc_domain_alloc(struct irq_domain *domain, unsigned int virq, int ret; unsigned int i, type; unsigned long hwirq = 0; - struct eiointc *priv = domain->host_data; + struct eiointc_priv *priv = domain->host_data; ret = irq_domain_translate_onecell(domain, arg, &hwirq, &type); if (ret) From 8ad032cc8c499af6f3289c796f411e8874b50fdb Mon Sep 17 00:00:00 2001 From: Dan Carpenter <dan.carpenter@linaro.org> Date: Thu, 1 Feb 2024 15:17:50 +0300 Subject: [PATCH 137/304] irqchip/qcom-mpm: Fix IS_ERR() vs NULL check in qcom_mpm_init() devm_ioremap() doesn't return error pointers, it returns NULL on error. Update the check accordingly. Fixes: 221b110d87c2 ("irqchip/qcom-mpm: Support passing a slice of SRAM as reg space") Signed-off-by: Dan Carpenter <dan.carpenter@linaro.org> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Reviewed-by: Konrad Dybcio <konrad.dybcio@linaro.org> Link: https://lore.kernel.org/r/22e1f4de-edce-4791-bd2d-2b2e98529492@moroto.mountain --- drivers/irqchip/irq-qcom-mpm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/irqchip/irq-qcom-mpm.c b/drivers/irqchip/irq-qcom-mpm.c index cda5838d2232..7942d8eb3d00 100644 --- a/drivers/irqchip/irq-qcom-mpm.c +++ b/drivers/irqchip/irq-qcom-mpm.c @@ -389,8 +389,8 @@ static int qcom_mpm_init(struct device_node *np, struct device_node *parent) /* Don't use devm_ioremap_resource, as we're accessing a shared region. */ priv->base = devm_ioremap(dev, res.start, resource_size(&res)); of_node_put(msgram_np); - if (IS_ERR(priv->base)) - return PTR_ERR(priv->base); + if (!priv->base) + return -ENOMEM; } else { /* Otherwise, fall back to simple MMIO. */ priv->base = devm_platform_ioremap_resource(pdev, 0); From 6ac86372102b477083db99a9af8246fb916271b5 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org> Date: Thu, 25 Jan 2024 09:15:59 +0100 Subject: [PATCH 138/304] gpiolib: add gpiod_to_gpio_device() stub for !GPIOLIB Add empty stub of gpiod_to_gpio_device() when GPIOLIB is not enabled. Cc: <stable@vger.kernel.org> Fixes: 370232d096e3 ("gpiolib: provide gpiod_to_gpio_device()") Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org> Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org> --- include/linux/gpio/driver.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index 9a5c6c76e653..012797e7106d 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -819,6 +819,12 @@ static inline struct gpio_chip *gpiod_to_chip(const struct gpio_desc *desc) return ERR_PTR(-ENODEV); } +static inline struct gpio_device *gpiod_to_gpio_device(struct gpio_desc *desc) +{ + WARN_ON(1); + return ERR_PTR(-ENODEV); +} + static inline int gpiochip_lock_as_irq(struct gpio_chip *gc, unsigned int offset) { From ebe0c15b135b1e4092c25b95d89e9a5899467499 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org> Date: Thu, 25 Jan 2024 09:16:00 +0100 Subject: [PATCH 139/304] gpiolib: add gpio_device_get_base() stub for !GPIOLIB Add empty stub of gpio_device_get_base() when GPIOLIB is not enabled. Cc: <stable@vger.kernel.org> Fixes: 8c85a102fc4e ("gpiolib: provide gpio_device_get_base()") Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org> Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org> --- include/linux/gpio/driver.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index 012797e7106d..c1df7698edb0 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -825,6 +825,12 @@ static inline struct gpio_device *gpiod_to_gpio_device(struct gpio_desc *desc) return ERR_PTR(-ENODEV); } +static inline int gpio_device_get_base(struct gpio_device *gdev) +{ + WARN_ON(1); + return -ENODEV; +} + static inline int gpiochip_lock_as_irq(struct gpio_chip *gc, unsigned int offset) { From 2df8aa3cad407044f2febdbbdf220c6dae839c79 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org> Date: Thu, 25 Jan 2024 09:16:01 +0100 Subject: [PATCH 140/304] gpiolib: add gpio_device_get_label() stub for !GPIOLIB Add empty stub of gpio_device_get_label() when GPIOLIB is not enabled. Cc: <stable@vger.kernel.org> Fixes: d1f7728259ef ("gpiolib: provide gpio_device_get_label()") Suggested-by: kernel test robot <lkp@intel.com> Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org> Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org> --- include/linux/gpio/driver.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index c1df7698edb0..7f75c9a51874 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -831,6 +831,12 @@ static inline int gpio_device_get_base(struct gpio_device *gdev) return -ENODEV; } +static inline const char *gpio_device_get_label(struct gpio_device *gdev) +{ + WARN_ON(1); + return NULL; +} + static inline int gpiochip_lock_as_irq(struct gpio_chip *gc, unsigned int offset) { From 902d6d013f75b68f31d208c6f3ff9cdca82648a7 Mon Sep 17 00:00:00 2001 From: Xiubo Li <xiubli@redhat.com> Date: Wed, 13 Sep 2023 16:18:34 +0800 Subject: [PATCH 141/304] ceph: always queue a writeback when revoking the Fb caps In case there is 'Fw' dirty caps and 'CHECK_CAPS_FLUSH' is set we will always ignore queue a writeback. Queue a writeback is very important because it will block kclient flushing the snapcaps to MDS and which will block MDS waiting for revoking the 'Fb' caps. Link: https://tracker.ceph.com/issues/50223 Signed-off-by: Xiubo Li <xiubli@redhat.com> Reviewed-by: Milind Changire <mchangir@redhat.com> Signed-off-by: Ilya Dryomov <idryomov@gmail.com> --- fs/ceph/caps.c | 48 ++++++++++++++++++++++++------------------------ 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index ad1f46c66fbf..bce3a840f15c 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -2156,6 +2156,30 @@ retry: ceph_cap_string(cap->implemented), ceph_cap_string(revoking)); + /* completed revocation? going down and there are no caps? */ + if (revoking) { + if ((revoking & cap_used) == 0) { + doutc(cl, "completed revocation of %s\n", + ceph_cap_string(cap->implemented & ~cap->issued)); + goto ack; + } + + /* + * If the "i_wrbuffer_ref" was increased by mmap or generic + * cache write just before the ceph_check_caps() is called, + * the Fb capability revoking will fail this time. Then we + * must wait for the BDI's delayed work to flush the dirty + * pages and to release the "i_wrbuffer_ref", which will cost + * at most 5 seconds. That means the MDS needs to wait at + * most 5 seconds to finished the Fb capability's revocation. + * + * Let's queue a writeback for it. + */ + if (S_ISREG(inode->i_mode) && ci->i_wrbuffer_ref && + (revoking & CEPH_CAP_FILE_BUFFER)) + queue_writeback = true; + } + if (cap == ci->i_auth_cap && (cap->issued & CEPH_CAP_FILE_WR)) { /* request larger max_size from MDS? */ @@ -2183,30 +2207,6 @@ retry: } } - /* completed revocation? going down and there are no caps? */ - if (revoking) { - if ((revoking & cap_used) == 0) { - doutc(cl, "completed revocation of %s\n", - ceph_cap_string(cap->implemented & ~cap->issued)); - goto ack; - } - - /* - * If the "i_wrbuffer_ref" was increased by mmap or generic - * cache write just before the ceph_check_caps() is called, - * the Fb capability revoking will fail this time. Then we - * must wait for the BDI's delayed work to flush the dirty - * pages and to release the "i_wrbuffer_ref", which will cost - * at most 5 seconds. That means the MDS needs to wait at - * most 5 seconds to finished the Fb capability's revocation. - * - * Let's queue a writeback for it. - */ - if (S_ISREG(inode->i_mode) && ci->i_wrbuffer_ref && - (revoking & CEPH_CAP_FILE_BUFFER)) - queue_writeback = true; - } - /* want more caps from mds? */ if (want & ~cap->mds_wanted) { if (want & ~(cap->mds_wanted | cap->issued)) From dbc347ef7f0c53aa4a5383238a804d7ebbb0b5ca Mon Sep 17 00:00:00 2001 From: Xiubo Li <xiubli@redhat.com> Date: Thu, 14 Sep 2023 10:29:16 +0800 Subject: [PATCH 142/304] ceph: add ceph_cap_unlink_work to fire check_caps() immediately When unlinking a file the check caps could be delayed for more than 5 seconds, but in MDS side it maybe waiting for the clients to release caps. This will use the cap_wq work queue and a dedicated list to help fire the check_caps() and dirty buffer flushing immediately. Link: https://tracker.ceph.com/issues/50223 Signed-off-by: Xiubo Li <xiubli@redhat.com> Reviewed-by: Milind Changire <mchangir@redhat.com> Signed-off-by: Ilya Dryomov <idryomov@gmail.com> --- fs/ceph/caps.c | 17 +++++++++++++++- fs/ceph/mds_client.c | 48 ++++++++++++++++++++++++++++++++++++++++++++ fs/ceph/mds_client.h | 5 +++++ 3 files changed, 69 insertions(+), 1 deletion(-) diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index bce3a840f15c..7fb4aae97412 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -4772,7 +4772,22 @@ int ceph_drop_caps_for_unlink(struct inode *inode) if (__ceph_caps_dirty(ci)) { struct ceph_mds_client *mdsc = ceph_inode_to_fs_client(inode)->mdsc; - __cap_delay_requeue_front(mdsc, ci); + + doutc(mdsc->fsc->client, "%p %llx.%llx\n", inode, + ceph_vinop(inode)); + spin_lock(&mdsc->cap_unlink_delay_lock); + ci->i_ceph_flags |= CEPH_I_FLUSH; + if (!list_empty(&ci->i_cap_delay_list)) + list_del_init(&ci->i_cap_delay_list); + list_add_tail(&ci->i_cap_delay_list, + &mdsc->cap_unlink_delay_list); + spin_unlock(&mdsc->cap_unlink_delay_lock); + + /* + * Fire the work immediately, because the MDS maybe + * waiting for caps release. + */ + ceph_queue_cap_unlink_work(mdsc); } } spin_unlock(&ci->i_ceph_lock); diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index f71bb9c9569f..3ab9c268a8bb 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -2484,6 +2484,50 @@ void ceph_reclaim_caps_nr(struct ceph_mds_client *mdsc, int nr) } } +void ceph_queue_cap_unlink_work(struct ceph_mds_client *mdsc) +{ + struct ceph_client *cl = mdsc->fsc->client; + if (mdsc->stopping) + return; + + if (queue_work(mdsc->fsc->cap_wq, &mdsc->cap_unlink_work)) { + doutc(cl, "caps unlink work queued\n"); + } else { + doutc(cl, "failed to queue caps unlink work\n"); + } +} + +static void ceph_cap_unlink_work(struct work_struct *work) +{ + struct ceph_mds_client *mdsc = + container_of(work, struct ceph_mds_client, cap_unlink_work); + struct ceph_client *cl = mdsc->fsc->client; + + doutc(cl, "begin\n"); + spin_lock(&mdsc->cap_unlink_delay_lock); + while (!list_empty(&mdsc->cap_unlink_delay_list)) { + struct ceph_inode_info *ci; + struct inode *inode; + + ci = list_first_entry(&mdsc->cap_unlink_delay_list, + struct ceph_inode_info, + i_cap_delay_list); + list_del_init(&ci->i_cap_delay_list); + + inode = igrab(&ci->netfs.inode); + if (inode) { + spin_unlock(&mdsc->cap_unlink_delay_lock); + doutc(cl, "on %p %llx.%llx\n", inode, + ceph_vinop(inode)); + ceph_check_caps(ci, CHECK_CAPS_FLUSH); + iput(inode); + spin_lock(&mdsc->cap_unlink_delay_lock); + } + } + spin_unlock(&mdsc->cap_unlink_delay_lock); + doutc(cl, "done\n"); +} + /* * requests */ @@ -5359,6 +5403,8 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc) INIT_LIST_HEAD(&mdsc->cap_delay_list); INIT_LIST_HEAD(&mdsc->cap_wait_list); spin_lock_init(&mdsc->cap_delay_lock); + INIT_LIST_HEAD(&mdsc->cap_unlink_delay_list); + spin_lock_init(&mdsc->cap_unlink_delay_lock); INIT_LIST_HEAD(&mdsc->snap_flush_list); spin_lock_init(&mdsc->snap_flush_lock); mdsc->last_cap_flush_tid = 1; @@ -5367,6 +5413,7 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc) spin_lock_init(&mdsc->cap_dirty_lock); init_waitqueue_head(&mdsc->cap_flushing_wq); INIT_WORK(&mdsc->cap_reclaim_work, ceph_cap_reclaim_work); + INIT_WORK(&mdsc->cap_unlink_work, ceph_cap_unlink_work); err = ceph_metric_init(&mdsc->metric); if (err) goto err_mdsmap; @@ -5640,6 +5687,7 @@ void ceph_mdsc_close_sessions(struct ceph_mds_client *mdsc) ceph_cleanup_global_and_empty_realms(mdsc); cancel_work_sync(&mdsc->cap_reclaim_work); + cancel_work_sync(&mdsc->cap_unlink_work); cancel_delayed_work_sync(&mdsc->delayed_work); /* cancel timer */ doutc(cl, "done\n"); diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index 40560af38827..03f8ff00874f 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h @@ -462,6 +462,8 @@ struct ceph_mds_client { unsigned long last_renew_caps; /* last time we renewed our caps */ struct list_head cap_delay_list; /* caps with delayed release */ spinlock_t cap_delay_lock; /* protects cap_delay_list */ + struct list_head cap_unlink_delay_list; /* caps with delayed release for unlink */ + spinlock_t cap_unlink_delay_lock; /* protects cap_unlink_delay_list */ struct list_head snap_flush_list; /* cap_snaps ready to flush */ spinlock_t snap_flush_lock; @@ -475,6 +477,8 @@ struct ceph_mds_client { struct work_struct cap_reclaim_work; atomic_t cap_reclaim_pending; + struct work_struct cap_unlink_work; + /* * Cap reservations * @@ -574,6 +578,7 @@ extern void ceph_flush_cap_releases(struct ceph_mds_client *mdsc, struct ceph_mds_session *session); extern void ceph_queue_cap_reclaim_work(struct ceph_mds_client *mdsc); extern void ceph_reclaim_caps_nr(struct ceph_mds_client *mdsc, int nr); +extern void ceph_queue_cap_unlink_work(struct ceph_mds_client *mdsc); extern int ceph_iterate_session_caps(struct ceph_mds_session *session, int (*cb)(struct inode *, int mds, void *), void *arg); From 846297e11e8ae428f8b00156a0cfe2db58100702 Mon Sep 17 00:00:00 2001 From: Marc Zyngier <maz@kernel.org> Date: Tue, 13 Feb 2024 10:12:04 +0000 Subject: [PATCH 143/304] irqchip/gic-v3-its: Handle non-coherent GICv4 redistributors Although the GICv3 code base has gained some handling of systems failing to handle the shareability attributes, the GICv4 side of things has been firmly ignored. This is unfortunate, as the new recent addition of the "dma-noncoherent" is supposed to apply to all of the GICR tables, and not just the ones that are common to v3 and v4. Add some checks to handle the VPROPBASE/VPENDBASE shareability and cacheability attributes in the same way we deal with the other GICR_BASE registers, wrapping the flag check in a helper for improved readability. Note that this has been found by inspection only, as I don't have access to HW that suffers from this particular issue. Fixes: 3a0fff0fb6a3 ("irqchip/gic-v3: Enable non-coherent redistributors/ITSes DT probing") Signed-off-by: Marc Zyngier <maz@kernel.org> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Reviewed-by: Lorenzo Pieralisi <lpieralisi@kernel.org> Link: https://lore.kernel.org/r/20240213101206.2137483-2-maz@kernel.org --- drivers/irqchip/irq-gic-v3-its.c | 37 +++++++++++++++++++++----------- 1 file changed, 25 insertions(+), 12 deletions(-) diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index d097001c1e3e..fec1b58470df 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -207,6 +207,11 @@ static bool require_its_list_vmovp(struct its_vm *vm, struct its_node *its) return (gic_rdists->has_rvpeid || vm->vlpi_count[its->list_nr]); } +static bool rdists_support_shareable(void) +{ + return !(gic_rdists->flags & RDIST_FLAGS_FORCE_NON_SHAREABLE); +} + static u16 get_its_list(struct its_vm *vm) { struct its_node *its; @@ -2710,10 +2715,12 @@ static u64 inherit_vpe_l1_table_from_its(void) break; } val |= FIELD_PREP(GICR_VPROPBASER_4_1_ADDR, addr >> 12); - val |= FIELD_PREP(GICR_VPROPBASER_SHAREABILITY_MASK, - FIELD_GET(GITS_BASER_SHAREABILITY_MASK, baser)); - val |= FIELD_PREP(GICR_VPROPBASER_INNER_CACHEABILITY_MASK, - FIELD_GET(GITS_BASER_INNER_CACHEABILITY_MASK, baser)); + if (rdists_support_shareable()) { + val |= FIELD_PREP(GICR_VPROPBASER_SHAREABILITY_MASK, + FIELD_GET(GITS_BASER_SHAREABILITY_MASK, baser)); + val |= FIELD_PREP(GICR_VPROPBASER_INNER_CACHEABILITY_MASK, + FIELD_GET(GITS_BASER_INNER_CACHEABILITY_MASK, baser)); + } val |= FIELD_PREP(GICR_VPROPBASER_4_1_SIZE, GITS_BASER_NR_PAGES(baser) - 1); return val; @@ -2936,8 +2943,10 @@ static int allocate_vpe_l1_table(void) WARN_ON(!IS_ALIGNED(pa, psz)); val |= FIELD_PREP(GICR_VPROPBASER_4_1_ADDR, pa >> 12); - val |= GICR_VPROPBASER_RaWb; - val |= GICR_VPROPBASER_InnerShareable; + if (rdists_support_shareable()) { + val |= GICR_VPROPBASER_RaWb; + val |= GICR_VPROPBASER_InnerShareable; + } val |= GICR_VPROPBASER_4_1_Z; val |= GICR_VPROPBASER_4_1_VALID; @@ -3126,7 +3135,7 @@ static void its_cpu_init_lpis(void) gicr_write_propbaser(val, rbase + GICR_PROPBASER); tmp = gicr_read_propbaser(rbase + GICR_PROPBASER); - if (gic_rdists->flags & RDIST_FLAGS_FORCE_NON_SHAREABLE) + if (!rdists_support_shareable()) tmp &= ~GICR_PROPBASER_SHAREABILITY_MASK; if ((tmp ^ val) & GICR_PROPBASER_SHAREABILITY_MASK) { @@ -3153,7 +3162,7 @@ static void its_cpu_init_lpis(void) gicr_write_pendbaser(val, rbase + GICR_PENDBASER); tmp = gicr_read_pendbaser(rbase + GICR_PENDBASER); - if (gic_rdists->flags & RDIST_FLAGS_FORCE_NON_SHAREABLE) + if (!rdists_support_shareable()) tmp &= ~GICR_PENDBASER_SHAREABILITY_MASK; if (!(tmp & GICR_PENDBASER_SHAREABILITY_MASK)) { @@ -3880,14 +3889,18 @@ static void its_vpe_schedule(struct its_vpe *vpe) val = virt_to_phys(page_address(vpe->its_vm->vprop_page)) & GENMASK_ULL(51, 12); val |= (LPI_NRBITS - 1) & GICR_VPROPBASER_IDBITS_MASK; - val |= GICR_VPROPBASER_RaWb; - val |= GICR_VPROPBASER_InnerShareable; + if (rdists_support_shareable()) { + val |= GICR_VPROPBASER_RaWb; + val |= GICR_VPROPBASER_InnerShareable; + } gicr_write_vpropbaser(val, vlpi_base + GICR_VPROPBASER); val = virt_to_phys(page_address(vpe->vpt_page)) & GENMASK_ULL(51, 16); - val |= GICR_VPENDBASER_RaWaWb; - val |= GICR_VPENDBASER_InnerShareable; + if (rdists_support_shareable()) { + val |= GICR_VPENDBASER_RaWaWb; + val |= GICR_VPENDBASER_InnerShareable; + } /* * There is no good way of finding out if the pending table is * empty as we can race against the doorbell interrupt very From 8b02da04ad978827e5ccd675acf170198f747a7a Mon Sep 17 00:00:00 2001 From: Marc Zyngier <maz@kernel.org> Date: Tue, 13 Feb 2024 10:12:05 +0000 Subject: [PATCH 144/304] irqchip/gic-v3-its: Restore quirk probing for ACPI-based systems While refactoring the way the ITSs are probed, the handling of quirks applicable to ACPI-based platforms was lost. As a result, systems such as HIP07 lose their GICv4 functionnality, and some other may even fail to boot, unless they are configured to boot with DT. Move the enabling of quirks into its_probe_one(), making it common to all firmware implementations. Fixes: 9585a495ac93 ("irqchip/gic-v3-its: Split allocation from initialisation of its_node") Signed-off-by: Marc Zyngier <maz@kernel.org> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Reviewed-by: Lorenzo Pieralisi <lpieralisi@kernel.org> Reviewed-by: Zenghui Yu <yuzenghui@huawei.com> Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20240213101206.2137483-3-maz@kernel.org --- drivers/irqchip/irq-gic-v3-its.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index fec1b58470df..250b4562f308 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -5091,6 +5091,8 @@ static int __init its_probe_one(struct its_node *its) u32 ctlr; int err; + its_enable_quirks(its); + if (is_v4(its)) { if (!(its->typer & GITS_TYPER_VMOVP)) { err = its_compute_its_list_map(its); @@ -5442,7 +5444,6 @@ static int __init its_of_probe(struct device_node *node) if (!its) return -ENOMEM; - its_enable_quirks(its); err = its_probe_one(its); if (err) { its_node_destroy(its); From af9acbfc2c4b72c378d0b9a2ee023ed01055d3e2 Mon Sep 17 00:00:00 2001 From: Marc Zyngier <maz@kernel.org> Date: Tue, 13 Feb 2024 10:12:06 +0000 Subject: [PATCH 145/304] irqchip/gic-v3-its: Fix GICv4.1 VPE affinity update When updating the affinity of a VPE, the VMOVP command is currently skipped if the two CPUs are part of the same VPE affinity. But this is wrong, as the doorbell corresponding to this VPE is still delivered on the 'old' CPU, which screws up the balancing. Furthermore, offlining that 'old' CPU results in doorbell interrupts generated for this VPE being discarded. The harsh reality is that VMOVP cannot be elided when a set_affinity() request occurs. It needs to be obeyed, and if an optimisation is to be made, it is at the point where the affinity change request is made (such as in KVM). Drop the VMOVP elision altogether, and only use the vpe_table_mask to try and stay within the same ITS affinity group if at all possible. Fixes: dd3f050a216e (irqchip/gic-v4.1: Implement the v4.1 flavour of VMOVP) Reported-by: Kunkun Jiang <jiangkunkun@huawei.com> Signed-off-by: Marc Zyngier <maz@kernel.org> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20240213101206.2137483-4-maz@kernel.org --- drivers/irqchip/irq-gic-v3-its.c | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index 250b4562f308..53abd4779914 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -3826,8 +3826,9 @@ static int its_vpe_set_affinity(struct irq_data *d, bool force) { struct its_vpe *vpe = irq_data_get_irq_chip_data(d); - int from, cpu = cpumask_first(mask_val); + struct cpumask common, *table_mask; unsigned long flags; + int from, cpu; /* * Changing affinity is mega expensive, so let's be as lazy as @@ -3843,19 +3844,22 @@ static int its_vpe_set_affinity(struct irq_data *d, * taken on any vLPI handling path that evaluates vpe->col_idx. */ from = vpe_to_cpuid_lock(vpe, &flags); + table_mask = gic_data_rdist_cpu(from)->vpe_table_mask; + + /* + * If we are offered another CPU in the same GICv4.1 ITS + * affinity, pick this one. Otherwise, any CPU will do. + */ + if (table_mask && cpumask_and(&common, mask_val, table_mask)) + cpu = cpumask_test_cpu(from, &common) ? from : cpumask_first(&common); + else + cpu = cpumask_first(mask_val); + if (from == cpu) goto out; vpe->col_idx = cpu; - /* - * GICv4.1 allows us to skip VMOVP if moving to a cpu whose RD - * is sharing its VPE table with the current one. - */ - if (gic_data_rdist_cpu(cpu)->vpe_table_mask && - cpumask_test_cpu(from, gic_data_rdist_cpu(cpu)->vpe_table_mask)) - goto out; - its_send_vmovp(vpe); its_vpe_db_proxy_move(vpe, from, cpu); From d463bcd7eb341b5b6e6d9263a3bce9f405c2af98 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi <peter.ujfalusi@linux.intel.com> Date: Tue, 13 Feb 2024 10:04:17 +0200 Subject: [PATCH 146/304] ASoC: SOF: Intel: pci-tgl: Change the default paths and firmware names The currently used paths and firmware name reflects the reference firmware convention: default_fw_path: intel/avs/{platform_name} default_lib_path: intel/avs-lib/{platform_name} default_tplg_path: intel/avs-tplg default_fw_filename: dsp_basefw.bin The SOF supports building the firmware for cAVS2.5 platforms using IPC4 and it is the preferred IPC4 implementation to be used on these devices. Change the paths and firmware names to reflect this: default_fw_path: intel/sof-ipc4/{platform_name} default_lib_path: intel/sof-ipc4-lib/{platform_name} default_tplg_path: intel/sof-ipc4-tplg default_fw_filename: sof-{platform_name}.ri Signed-off-by: Peter Ujfalusi <peter.ujfalusi@linux.intel.com> Reviewed-by: Bard Liao <yung-chuan.liao@linux.intel.com> Reviewed-by: Kai Vehmanen <kai.vehmanen@linux.intel.com> Reviewed-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com> Link: https://msgid.link/r/20240213080418.21256-2-peter.ujfalusi@linux.intel.com Signed-off-by: Mark Brown <broonie@kernel.org> --- sound/soc/sof/intel/pci-tgl.c | 64 +++++++++++++++++------------------ 1 file changed, 32 insertions(+), 32 deletions(-) diff --git a/sound/soc/sof/intel/pci-tgl.c b/sound/soc/sof/intel/pci-tgl.c index 0660d4b2ac96..a361ee9d1107 100644 --- a/sound/soc/sof/intel/pci-tgl.c +++ b/sound/soc/sof/intel/pci-tgl.c @@ -33,18 +33,18 @@ static const struct sof_dev_desc tgl_desc = { .dspless_mode_supported = true, /* Only supported for HDaudio */ .default_fw_path = { [SOF_IPC_TYPE_3] = "intel/sof", - [SOF_IPC_TYPE_4] = "intel/avs/tgl", + [SOF_IPC_TYPE_4] = "intel/sof-ipc4/tgl", }, .default_lib_path = { - [SOF_IPC_TYPE_4] = "intel/avs-lib/tgl", + [SOF_IPC_TYPE_4] = "intel/sof-ipc4-lib/tgl", }, .default_tplg_path = { [SOF_IPC_TYPE_3] = "intel/sof-tplg", - [SOF_IPC_TYPE_4] = "intel/avs-tplg", + [SOF_IPC_TYPE_4] = "intel/sof-ipc4-tplg", }, .default_fw_filename = { [SOF_IPC_TYPE_3] = "sof-tgl.ri", - [SOF_IPC_TYPE_4] = "dsp_basefw.bin", + [SOF_IPC_TYPE_4] = "sof-tgl.ri", }, .nocodec_tplg_filename = "sof-tgl-nocodec.tplg", .ops = &sof_tgl_ops, @@ -66,18 +66,18 @@ static const struct sof_dev_desc tglh_desc = { .dspless_mode_supported = true, /* Only supported for HDaudio */ .default_fw_path = { [SOF_IPC_TYPE_3] = "intel/sof", - [SOF_IPC_TYPE_4] = "intel/avs/tgl-h", + [SOF_IPC_TYPE_4] = "intel/sof-ipc4/tgl-h", }, .default_lib_path = { - [SOF_IPC_TYPE_4] = "intel/avs-lib/tgl-h", + [SOF_IPC_TYPE_4] = "intel/sof-ipc4-lib/tgl-h", }, .default_tplg_path = { [SOF_IPC_TYPE_3] = "intel/sof-tplg", - [SOF_IPC_TYPE_4] = "intel/avs-tplg", + [SOF_IPC_TYPE_4] = "intel/sof-ipc4-tplg", }, .default_fw_filename = { [SOF_IPC_TYPE_3] = "sof-tgl-h.ri", - [SOF_IPC_TYPE_4] = "dsp_basefw.bin", + [SOF_IPC_TYPE_4] = "sof-tgl-h.ri", }, .nocodec_tplg_filename = "sof-tgl-nocodec.tplg", .ops = &sof_tgl_ops, @@ -98,18 +98,18 @@ static const struct sof_dev_desc ehl_desc = { .dspless_mode_supported = true, /* Only supported for HDaudio */ .default_fw_path = { [SOF_IPC_TYPE_3] = "intel/sof", - [SOF_IPC_TYPE_4] = "intel/avs/ehl", + [SOF_IPC_TYPE_4] = "intel/sof-ipc4/ehl", }, .default_lib_path = { - [SOF_IPC_TYPE_4] = "intel/avs-lib/ehl", + [SOF_IPC_TYPE_4] = "intel/sof-ipc4-lib/ehl", }, .default_tplg_path = { [SOF_IPC_TYPE_3] = "intel/sof-tplg", - [SOF_IPC_TYPE_4] = "intel/avs-tplg", + [SOF_IPC_TYPE_4] = "intel/sof-ipc4-tplg", }, .default_fw_filename = { [SOF_IPC_TYPE_3] = "sof-ehl.ri", - [SOF_IPC_TYPE_4] = "dsp_basefw.bin", + [SOF_IPC_TYPE_4] = "sof-ehl.ri", }, .nocodec_tplg_filename = "sof-ehl-nocodec.tplg", .ops = &sof_tgl_ops, @@ -131,18 +131,18 @@ static const struct sof_dev_desc adls_desc = { .dspless_mode_supported = true, /* Only supported for HDaudio */ .default_fw_path = { [SOF_IPC_TYPE_3] = "intel/sof", - [SOF_IPC_TYPE_4] = "intel/avs/adl-s", + [SOF_IPC_TYPE_4] = "intel/sof-ipc4/adl-s", }, .default_lib_path = { - [SOF_IPC_TYPE_4] = "intel/avs-lib/adl-s", + [SOF_IPC_TYPE_4] = "intel/sof-ipc4-lib/adl-s", }, .default_tplg_path = { [SOF_IPC_TYPE_3] = "intel/sof-tplg", - [SOF_IPC_TYPE_4] = "intel/avs-tplg", + [SOF_IPC_TYPE_4] = "intel/sof-ipc4-tplg", }, .default_fw_filename = { [SOF_IPC_TYPE_3] = "sof-adl-s.ri", - [SOF_IPC_TYPE_4] = "dsp_basefw.bin", + [SOF_IPC_TYPE_4] = "sof-adl-s.ri", }, .nocodec_tplg_filename = "sof-adl-nocodec.tplg", .ops = &sof_tgl_ops, @@ -164,18 +164,18 @@ static const struct sof_dev_desc adl_desc = { .dspless_mode_supported = true, /* Only supported for HDaudio */ .default_fw_path = { [SOF_IPC_TYPE_3] = "intel/sof", - [SOF_IPC_TYPE_4] = "intel/avs/adl", + [SOF_IPC_TYPE_4] = "intel/sof-ipc4/adl", }, .default_lib_path = { - [SOF_IPC_TYPE_4] = "intel/avs-lib/adl", + [SOF_IPC_TYPE_4] = "intel/sof-ipc4-lib/adl", }, .default_tplg_path = { [SOF_IPC_TYPE_3] = "intel/sof-tplg", - [SOF_IPC_TYPE_4] = "intel/avs-tplg", + [SOF_IPC_TYPE_4] = "intel/sof-ipc4-tplg", }, .default_fw_filename = { [SOF_IPC_TYPE_3] = "sof-adl.ri", - [SOF_IPC_TYPE_4] = "dsp_basefw.bin", + [SOF_IPC_TYPE_4] = "sof-adl.ri", }, .nocodec_tplg_filename = "sof-adl-nocodec.tplg", .ops = &sof_tgl_ops, @@ -197,18 +197,18 @@ static const struct sof_dev_desc adl_n_desc = { .dspless_mode_supported = true, /* Only supported for HDaudio */ .default_fw_path = { [SOF_IPC_TYPE_3] = "intel/sof", - [SOF_IPC_TYPE_4] = "intel/avs/adl-n", + [SOF_IPC_TYPE_4] = "intel/sof-ipc4/adl-n", }, .default_lib_path = { - [SOF_IPC_TYPE_4] = "intel/avs-lib/adl-n", + [SOF_IPC_TYPE_4] = "intel/sof-ipc4-lib/adl-n", }, .default_tplg_path = { [SOF_IPC_TYPE_3] = "intel/sof-tplg", - [SOF_IPC_TYPE_4] = "intel/avs-tplg", + [SOF_IPC_TYPE_4] = "intel/sof-ipc4-tplg", }, .default_fw_filename = { [SOF_IPC_TYPE_3] = "sof-adl-n.ri", - [SOF_IPC_TYPE_4] = "dsp_basefw.bin", + [SOF_IPC_TYPE_4] = "sof-adl-n.ri", }, .nocodec_tplg_filename = "sof-adl-nocodec.tplg", .ops = &sof_tgl_ops, @@ -230,18 +230,18 @@ static const struct sof_dev_desc rpls_desc = { .dspless_mode_supported = true, /* Only supported for HDaudio */ .default_fw_path = { [SOF_IPC_TYPE_3] = "intel/sof", - [SOF_IPC_TYPE_4] = "intel/avs/rpl-s", + [SOF_IPC_TYPE_4] = "intel/sof-ipc4/rpl-s", }, .default_lib_path = { - [SOF_IPC_TYPE_4] = "intel/avs-lib/rpl-s", + [SOF_IPC_TYPE_4] = "intel/sof-ipc4-lib/rpl-s", }, .default_tplg_path = { [SOF_IPC_TYPE_3] = "intel/sof-tplg", - [SOF_IPC_TYPE_4] = "intel/avs-tplg", + [SOF_IPC_TYPE_4] = "intel/sof-ipc4-tplg", }, .default_fw_filename = { [SOF_IPC_TYPE_3] = "sof-rpl-s.ri", - [SOF_IPC_TYPE_4] = "dsp_basefw.bin", + [SOF_IPC_TYPE_4] = "sof-rpl-s.ri", }, .nocodec_tplg_filename = "sof-rpl-nocodec.tplg", .ops = &sof_tgl_ops, @@ -263,18 +263,18 @@ static const struct sof_dev_desc rpl_desc = { .dspless_mode_supported = true, /* Only supported for HDaudio */ .default_fw_path = { [SOF_IPC_TYPE_3] = "intel/sof", - [SOF_IPC_TYPE_4] = "intel/avs/rpl", + [SOF_IPC_TYPE_4] = "intel/sof-ipc4/rpl", }, .default_lib_path = { - [SOF_IPC_TYPE_4] = "intel/avs-lib/rpl", + [SOF_IPC_TYPE_4] = "intel/sof-ipc4-lib/rpl", }, .default_tplg_path = { [SOF_IPC_TYPE_3] = "intel/sof-tplg", - [SOF_IPC_TYPE_4] = "intel/avs-tplg", + [SOF_IPC_TYPE_4] = "intel/sof-ipc4-tplg", }, .default_fw_filename = { [SOF_IPC_TYPE_3] = "sof-rpl.ri", - [SOF_IPC_TYPE_4] = "dsp_basefw.bin", + [SOF_IPC_TYPE_4] = "sof-rpl.ri", }, .nocodec_tplg_filename = "sof-rpl-nocodec.tplg", .ops = &sof_tgl_ops, From b029482011bffd57319507c2bf4c5a7e06eca756 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi <peter.ujfalusi@linux.intel.com> Date: Tue, 13 Feb 2024 10:04:18 +0200 Subject: [PATCH 147/304] ASoC: SOF: Intel: pci-lnl: Change the topology path to intel/sof-ipc4-tplg The firmware release which going to introduce support for Lunar Lake will use the documented default topology directory for IPC4: intel/sof-ipc4-tplg Change the default path accordingly before sof-bin (sof-firmware) release includes Lunar Lake firmware and topologies. Link: https://github.com/thesofproject/sof-docs/blob/master/getting_started/intel_debug/introduction.rst#2-topology-file Signed-off-by: Peter Ujfalusi <peter.ujfalusi@linux.intel.com> Reviewed-by: Mengdong Lin <mengdong.lin@intel.com> Reviewed-by: Kai Vehmanen <kai.vehmanen@linux.intel.com> Reviewed-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com> Reviewed-by: Chao Song <chao.song@linux.intel.com> Link: https://msgid.link/r/20240213080418.21256-3-peter.ujfalusi@linux.intel.com Signed-off-by: Mark Brown <broonie@kernel.org> --- sound/soc/sof/intel/pci-lnl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/sof/intel/pci-lnl.c b/sound/soc/sof/intel/pci-lnl.c index 78a57eb9cbc3..b26ffe767fab 100644 --- a/sound/soc/sof/intel/pci-lnl.c +++ b/sound/soc/sof/intel/pci-lnl.c @@ -36,7 +36,7 @@ static const struct sof_dev_desc lnl_desc = { [SOF_IPC_TYPE_4] = "intel/sof-ipc4/lnl", }, .default_tplg_path = { - [SOF_IPC_TYPE_4] = "intel/sof-ace-tplg", + [SOF_IPC_TYPE_4] = "intel/sof-ipc4-tplg", }, .default_fw_filename = { [SOF_IPC_TYPE_4] = "sof-lnl.ri", From 5b5089e2a1e753ffe9ee2bf101a9e06784ec5e1a Mon Sep 17 00:00:00 2001 From: Arnd Bergmann <arnd@arndb.de> Date: Tue, 13 Feb 2024 11:10:46 +0100 Subject: [PATCH 148/304] ASoC: q6dsp: fix event handler prototype clang-16 points out a mismatch in function types that was hidden by a typecast: sound/soc/qcom/qdsp6/q6apm-dai.c:355:38: error: cast from 'void (*)(uint32_t, uint32_t, uint32_t *, void *)' (aka 'void (*)(unsigned int, unsigned int, unsigned int *, void *)') to 'q6apm_cb' (aka 'void (*)(unsigned int, unsigned int, void *, void *)') converts to incompatible function type [-Werror,-Wcast-function-type-strict] 355 | prtd->graph = q6apm_graph_open(dev, (q6apm_cb)event_handler, prtd, graph_id); sound/soc/qcom/qdsp6/q6apm-dai.c:499:38: error: cast from 'void (*)(uint32_t, uint32_t, uint32_t *, void *)' (aka 'void (*)(unsigned int, unsigned int, unsigned int *, void *)') to 'q6apm_cb' (aka 'void (*)(unsigned int, unsigned int, void *, void *)') converts to incompatible function type [-Werror,-Wcast-function-type-strict] 499 | prtd->graph = q6apm_graph_open(dev, (q6apm_cb)event_handler_compr, prtd, graph_id); The only difference here is the 'payload' argument, which is not even used in this function, so just fix its type and remove the cast. Fixes: 88b60bf047fd ("ASoC: q6dsp: q6apm-dai: Add open/free compress DAI callbacks") Signed-off-by: Arnd Bergmann <arnd@arndb.de> Link: https://msgid.link/r/20240213101105.459402-1-arnd@kernel.org Signed-off-by: Mark Brown <broonie@kernel.org> --- sound/soc/qcom/qdsp6/q6apm-dai.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sound/soc/qcom/qdsp6/q6apm-dai.c b/sound/soc/qcom/qdsp6/q6apm-dai.c index 052e40cb38fe..00bbd291be5c 100644 --- a/sound/soc/qcom/qdsp6/q6apm-dai.c +++ b/sound/soc/qcom/qdsp6/q6apm-dai.c @@ -123,7 +123,7 @@ static struct snd_pcm_hardware q6apm_dai_hardware_playback = { .fifo_size = 0, }; -static void event_handler(uint32_t opcode, uint32_t token, uint32_t *payload, void *priv) +static void event_handler(uint32_t opcode, uint32_t token, void *payload, void *priv) { struct q6apm_dai_rtd *prtd = priv; struct snd_pcm_substream *substream = prtd->substream; @@ -157,7 +157,7 @@ static void event_handler(uint32_t opcode, uint32_t token, uint32_t *payload, vo } static void event_handler_compr(uint32_t opcode, uint32_t token, - uint32_t *payload, void *priv) + void *payload, void *priv) { struct q6apm_dai_rtd *prtd = priv; struct snd_compr_stream *substream = prtd->cstream; @@ -352,7 +352,7 @@ static int q6apm_dai_open(struct snd_soc_component *component, spin_lock_init(&prtd->lock); prtd->substream = substream; - prtd->graph = q6apm_graph_open(dev, (q6apm_cb)event_handler, prtd, graph_id); + prtd->graph = q6apm_graph_open(dev, event_handler, prtd, graph_id); if (IS_ERR(prtd->graph)) { dev_err(dev, "%s: Could not allocate memory\n", __func__); ret = PTR_ERR(prtd->graph); @@ -496,7 +496,7 @@ static int q6apm_dai_compr_open(struct snd_soc_component *component, return -ENOMEM; prtd->cstream = stream; - prtd->graph = q6apm_graph_open(dev, (q6apm_cb)event_handler_compr, prtd, graph_id); + prtd->graph = q6apm_graph_open(dev, event_handler_compr, prtd, graph_id); if (IS_ERR(prtd->graph)) { ret = PTR_ERR(prtd->graph); kfree(prtd); From c40aad7c81e5fba34b70123ed7ce3397fa62a4d2 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi <peter.ujfalusi@linux.intel.com> Date: Tue, 13 Feb 2024 13:52:33 +0200 Subject: [PATCH 149/304] ASoC: SOF: ipc4-pcm: Workaround for crashed firmware on system suspend When the system is suspended while audio is active, the sof_ipc4_pcm_hw_free() is invoked to reset the pipelines since during suspend the DSP is turned off, streams will be re-started after resume. If the firmware crashes during while audio is running (or when we reset the stream before suspend) then the sof_ipc4_set_multi_pipeline_state() will fail with IPC error and the state change is interrupted. This will cause misalignment between the kernel and firmware state on next DSP boot resulting errors returned by firmware for IPC messages, eventually failing the audio resume. On stream close the errors are ignored so the kernel state will be corrected on the next DSP boot, so the second boot after the DSP panic. If sof_ipc4_trigger_pipelines() is called from sof_ipc4_pcm_hw_free() then state parameter is SOF_IPC4_PIPE_RESET and only in this case. Treat a forced pipeline reset similarly to how we treat a pcm_free by ignoring error on state sending to allow the kernel's state to be consistent with the state the firmware will have after the next boot. Link: https://github.com/thesofproject/sof/issues/8721 Signed-off-by: Peter Ujfalusi <peter.ujfalusi@linux.intel.com> Reviewed-by: Ranjani Sridharan <ranjani.sridharan@linux.intel.com> Reviewed-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com> Reviewed-by: Bard Liao <yung-chuan.liao@linux.intel.com> Link: https://msgid.link/r/20240213115233.15716-1-peter.ujfalusi@linux.intel.com Signed-off-by: Mark Brown <broonie@kernel.org> --- sound/soc/sof/ipc4-pcm.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/sound/soc/sof/ipc4-pcm.c b/sound/soc/sof/ipc4-pcm.c index 85d3f390e4b2..07eb5c6d4adf 100644 --- a/sound/soc/sof/ipc4-pcm.c +++ b/sound/soc/sof/ipc4-pcm.c @@ -413,7 +413,18 @@ skip_pause_transition: ret = sof_ipc4_set_multi_pipeline_state(sdev, state, trigger_list); if (ret < 0) { dev_err(sdev->dev, "failed to set final state %d for all pipelines\n", state); - goto free; + /* + * workaround: if the firmware is crashed while setting the + * pipelines to reset state we must ignore the error code and + * reset it to 0. + * Since the firmware is crashed we will not send IPC messages + * and we are going to see errors printed, but the state of the + * widgets will be correct for the next boot. + */ + if (sdev->fw_state != SOF_FW_CRASHED || state != SOF_IPC4_PIPE_RESET) + goto free; + + ret = 0; } /* update RUNNING/RESET state for all pipelines that were just triggered */ From fcbe4873089c84da641df75cda9cac2e9addbb4b Mon Sep 17 00:00:00 2001 From: Curtis Malainey <cujomalainey@chromium.org> Date: Tue, 13 Feb 2024 14:38:34 +0200 Subject: [PATCH 150/304] ASoC: SOF: IPC3: fix message bounds on ipc ops MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 74ad8ed65121 ("ASoC: SOF: ipc3: Implement rx_msg IPC ops") introduced a new allocation before the upper bounds check in do_rx_work. As a result A DSP can cause bad allocations if spewing garbage. Fixes: 74ad8ed65121 ("ASoC: SOF: ipc3: Implement rx_msg IPC ops") Reported-by: Tim Van Patten <timvp@google.com> Cc: stable@vger.kernel.org Signed-off-by: Curtis Malainey <cujomalainey@chromium.org> Reviewed-by: Péter Ujfalusi <peter.ujfalusi@linux.intel.com> Reviewed-by: Daniel Baluta <daniel.baluta@nxp.com> Reviewed-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com> Signed-off-by: Peter Ujfalusi <peter.ujfalusi@linux.intel.com> Link: https://msgid.link/r/20240213123834.4827-1-peter.ujfalusi@linux.intel.com Signed-off-by: Mark Brown <broonie@kernel.org> --- sound/soc/sof/ipc3.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/sof/ipc3.c b/sound/soc/sof/ipc3.c index fb40378ad084..c03dd513fbff 100644 --- a/sound/soc/sof/ipc3.c +++ b/sound/soc/sof/ipc3.c @@ -1067,7 +1067,7 @@ static void sof_ipc3_rx_msg(struct snd_sof_dev *sdev) return; } - if (hdr.size < sizeof(hdr)) { + if (hdr.size < sizeof(hdr) || hdr.size > SOF_IPC_MSG_MAX_SIZE) { dev_err(sdev->dev, "The received message size is invalid\n"); return; } From 24b6332c2d4ff08fb7601ac8f751a5ba51e0ebd3 Mon Sep 17 00:00:00 2001 From: Tomasz Kudela <ramzes005@gmail.com> Date: Tue, 13 Feb 2024 12:56:14 +0100 Subject: [PATCH 151/304] ALSA: hda: Add Lenovo Legion 7i gen7 sound quirk Add sound support for the Legion 7i gen7 laptop (16IAX7). Signed-off-by: Tomasz Kudela <ramzes005@gmail.com> Link: https://lore.kernel.org/r/20240213115614.10420-1-ramzes005@gmail.com Signed-off-by: Takashi Iwai <tiwai@suse.de> --- sound/pci/hda/cs35l41_hda_property.c | 2 ++ sound/pci/hda/patch_realtek.c | 1 + 2 files changed, 3 insertions(+) diff --git a/sound/pci/hda/cs35l41_hda_property.c b/sound/pci/hda/cs35l41_hda_property.c index d74cf11eef1e..57b21285ab6a 100644 --- a/sound/pci/hda/cs35l41_hda_property.c +++ b/sound/pci/hda/cs35l41_hda_property.c @@ -95,6 +95,7 @@ static const struct cs35l41_config cs35l41_config_table[] = { { "10431F12", 2, INTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 0, 1, -1, 1000, 4500, 24 }, { "10431F1F", 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 1, -1, 0, 0, 0, 0 }, { "10431F62", 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 1, 2, 0, 0, 0, 0 }, + { "17AA386F", 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 0, -1, -1, 0, 0, 0 }, { "17AA38B4", 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 0, 1, -1, 0, 0, 0 }, { "17AA38B5", 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 0, 1, -1, 0, 0, 0 }, { "17AA38B6", 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 0, 1, -1, 0, 0, 0 }, @@ -431,6 +432,7 @@ static const struct cs35l41_prop_model cs35l41_prop_model_table[] = { { "CSC3551", "10431F12", generic_dsd_config }, { "CSC3551", "10431F1F", generic_dsd_config }, { "CSC3551", "10431F62", generic_dsd_config }, + { "CSC3551", "17AA386F", generic_dsd_config }, { "CSC3551", "17AA38B4", generic_dsd_config }, { "CSC3551", "17AA38B5", generic_dsd_config }, { "CSC3551", "17AA38B6", generic_dsd_config }, diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index e045d3e76a45..98886b803bc5 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -10262,6 +10262,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x17aa, 0x3853, "Lenovo Yoga 7 15ITL5", ALC287_FIXUP_YOGA7_14ITL_SPEAKERS), SND_PCI_QUIRK(0x17aa, 0x3855, "Legion 7 16ITHG6", ALC287_FIXUP_LEGION_16ITHG6), SND_PCI_QUIRK(0x17aa, 0x3869, "Lenovo Yoga7 14IAL7", ALC287_FIXUP_YOGA9_14IAP7_BASS_SPK_PIN), + SND_PCI_QUIRK(0x17aa, 0x386f, "Legion 7i 16IAX7", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x17aa, 0x3870, "Lenovo Yoga 7 14ARB7", ALC287_FIXUP_YOGA7_14ARB7_I2C), SND_PCI_QUIRK(0x17aa, 0x387d, "Yoga S780-16 pro Quad AAC", ALC287_FIXUP_TAS2781_I2C), SND_PCI_QUIRK(0x17aa, 0x387e, "Yoga S780-16 pro Quad YC", ALC287_FIXUP_TAS2781_I2C), From b671cd3d456315f63171a670769356a196cf7fd0 Mon Sep 17 00:00:00 2001 From: Philip Yang <Philip.Yang@amd.com> Date: Mon, 21 Aug 2023 16:02:01 -0400 Subject: [PATCH 152/304] drm/prime: Support page array >= 4GB MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Without unsigned long typecast, the size is passed in as zero if page array size >= 4GB, nr_pages >= 0x100000, then sg list converted will have the first and the last chunk lost. Signed-off-by: Philip Yang <Philip.Yang@amd.com> Acked-by: Felix Kuehling <Felix.Kuehling@amd.com> Reviewed-by: Christian König <christian.koenig@amd.com> CC: stable@vger.kernel.org Signed-off-by: Christian König <christian.koenig@amd.com> Link: https://patchwork.freedesktop.org/patch/msgid/20230821200201.24685-1-Philip.Yang@amd.com --- drivers/gpu/drm/drm_prime.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_prime.c b/drivers/gpu/drm/drm_prime.c index 834a5e28abbe..7352bde299d5 100644 --- a/drivers/gpu/drm/drm_prime.c +++ b/drivers/gpu/drm/drm_prime.c @@ -820,7 +820,7 @@ struct sg_table *drm_prime_pages_to_sg(struct drm_device *dev, if (max_segment == 0) max_segment = UINT_MAX; err = sg_alloc_table_from_pages_segment(sg, pages, nr_pages, 0, - nr_pages << PAGE_SHIFT, + (unsigned long)nr_pages << PAGE_SHIFT, max_segment, GFP_KERNEL); if (err) { kfree(sg); From b6802b61a9d0e99dcfa6fff7c50db7c48a9623d3 Mon Sep 17 00:00:00 2001 From: Rob Clark <robdclark@chromium.org> Date: Mon, 12 Feb 2024 13:55:34 -0800 Subject: [PATCH 153/304] drm/crtc: fix uninitialized variable use even harder DRM_MODESET_LOCK_ALL_BEGIN() has a hidden trap-door (aka retry loop), which means we can't rely too much on variable initializers. Fixes: 6e455f5dcdd1 ("drm/crtc: fix uninitialized variable use") Signed-off-by: Rob Clark <robdclark@chromium.org> Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch> Reviewed-by: Abhinav Kumar <quic_abhinavk@quicinc.com> Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org> Tested-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org> # sc7180, sdm845 Link: https://patchwork.freedesktop.org/patch/msgid/20240212215534.190682-1-robdclark@gmail.com Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org> --- drivers/gpu/drm/drm_crtc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c index cb90e70d85e8..65f9f66933bb 100644 --- a/drivers/gpu/drm/drm_crtc.c +++ b/drivers/gpu/drm/drm_crtc.c @@ -904,6 +904,7 @@ out: connector_set = NULL; fb = NULL; mode = NULL; + num_connectors = 0; DRM_MODESET_LOCK_ALL_END(dev, ctx, ret); From 79bd7eab8366b29eaa099908d5694cb473684b9d Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni <kch@nvidia.com> Date: Tue, 13 Feb 2024 00:26:46 -0800 Subject: [PATCH 154/304] nvme-fabrics: fix I/O connect error handling In nvmf_connect_io_queue(), if connect I/O command fails, we log the error and continue for authentication. This overrides error captured from __nvme_submit_sync_cmd(), causing wrong return value. Add goto out_free_data after logging connect error to fix the issue. Fixes: f50fff73d620c ("nvme: implement In-Band authentication") Signed-off-by: Chaitanya Kulkarni <kch@nvidia.com> Reviewed-by: Hannes Reinecke <hare@suse.de> Signed-off-by: Keith Busch <kbusch@kernel.org> --- drivers/nvme/host/fabrics.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c index 3499acbf6a82..495c171daead 100644 --- a/drivers/nvme/host/fabrics.c +++ b/drivers/nvme/host/fabrics.c @@ -534,6 +534,7 @@ int nvmf_connect_io_queue(struct nvme_ctrl *ctrl, u16 qid) if (ret) { nvmf_log_connect_error(ctrl, ret, le32_to_cpu(res.u32), &cmd, data); + goto out_free_data; } result = le32_to_cpu(res.u32); if (result & (NVME_CONNECT_AUTHREQ_ATR | NVME_CONNECT_AUTHREQ_ASCR)) { From 29f6975332479f92233594901c649ff4d71f8cb6 Mon Sep 17 00:00:00 2001 From: Keith Busch <kbusch@kernel.org> Date: Mon, 5 Feb 2024 11:10:25 -0800 Subject: [PATCH 155/304] nvme: implement support for relaxed effects NVM Express TP4167 provides a way for controllers to report a relaxed execution constraint. Specifically, it notifies of exclusivity for IO vs. admin commands instead of grouping these together. If set, then we don't need to freeze IO in order to execute that admin command. The freezing distrupts IO processes, so it's nice to avoid that if the controller tells us it's not necessary. Reviewed-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Keith Busch <kbusch@kernel.org> --- drivers/nvme/host/core.c | 4 ++++ include/linux/nvme.h | 1 + 2 files changed, 5 insertions(+) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 60537c9224bf..0a96362912ce 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1153,6 +1153,10 @@ u32 nvme_command_effects(struct nvme_ctrl *ctrl, struct nvme_ns *ns, u8 opcode) effects &= ~NVME_CMD_EFFECTS_CSE_MASK; } else { effects = le32_to_cpu(ctrl->effects->acs[opcode]); + + /* Ignore execution restrictions if any relaxation bits are set */ + if (effects & NVME_CMD_EFFECTS_CSER_MASK) + effects &= ~NVME_CMD_EFFECTS_CSE_MASK; } return effects; diff --git a/include/linux/nvme.h b/include/linux/nvme.h index bc605ec4a3fd..3ef4053ea950 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -646,6 +646,7 @@ enum { NVME_CMD_EFFECTS_NCC = 1 << 2, NVME_CMD_EFFECTS_NIC = 1 << 3, NVME_CMD_EFFECTS_CCC = 1 << 4, + NVME_CMD_EFFECTS_CSER_MASK = GENMASK(15, 14), NVME_CMD_EFFECTS_CSE_MASK = GENMASK(18, 16), NVME_CMD_EFFECTS_UUID_SEL = 1 << 19, NVME_CMD_EFFECTS_SCOPE_MASK = GENMASK(31, 20), From bdbddb109c75365d22ec4826f480c5e75869e1cb Mon Sep 17 00:00:00 2001 From: Petr Pavlu <petr.pavlu@suse.com> Date: Tue, 13 Feb 2024 14:24:34 +0100 Subject: [PATCH 156/304] tracing: Fix HAVE_DYNAMIC_FTRACE_WITH_REGS ifdef Commit a8b9cf62ade1 ("ftrace: Fix DIRECT_CALLS to use SAVE_REGS by default") attempted to fix an issue with direct trampolines on x86, see its description for details. However, it wrongly referenced the HAVE_DYNAMIC_FTRACE_WITH_REGS config option and the problem is still present. Add the missing "CONFIG_" prefix for the logic to work as intended. Link: https://lore.kernel.org/linux-trace-kernel/20240213132434.22537-1-petr.pavlu@suse.com Fixes: a8b9cf62ade1 ("ftrace: Fix DIRECT_CALLS to use SAVE_REGS by default") Signed-off-by: Petr Pavlu <petr.pavlu@suse.com> Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org> --- kernel/trace/ftrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index c060d5b47910..83ba342aef31 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -5331,7 +5331,7 @@ static int register_ftrace_function_nolock(struct ftrace_ops *ops); * not support ftrace_regs_caller but direct_call, use SAVE_ARGS so that it * jumps from ftrace_caller for multiple ftrace_ops. */ -#ifndef HAVE_DYNAMIC_FTRACE_WITH_REGS +#ifndef CONFIG_HAVE_DYNAMIC_FTRACE_WITH_REGS #define MULTI_FLAGS (FTRACE_OPS_FL_DIRECT | FTRACE_OPS_FL_SAVE_ARGS) #else #define MULTI_FLAGS (FTRACE_OPS_FL_DIRECT | FTRACE_OPS_FL_SAVE_REGS) From a6eaa24f1cc2c7aecec6047556bdfe32042094c3 Mon Sep 17 00:00:00 2001 From: Sven Schnelle <svens@linux.ibm.com> Date: Mon, 5 Feb 2024 07:53:40 +0100 Subject: [PATCH 157/304] tracing: Use ring_buffer_record_is_set_on() in tracer_tracing_is_on() tracer_tracing_is_on() checks whether record_disabled is not zero. This checks both the record_disabled counter and the RB_BUFFER_OFF flag. Reading the source it looks like this function should only check for the RB_BUFFER_OFF flag. Therefore use ring_buffer_record_is_set_on(). This fixes spurious fails in the 'test for function traceon/off triggers' test from the ftrace testsuite when the system is under load. Link: https://lore.kernel.org/linux-trace-kernel/20240205065340.2848065-1-svens@linux.ibm.com Cc: Masami Hiramatsu <mhiramat@kernel.org> Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com> Tested-By: Mete Durlu <meted@linux.ibm.com> Signed-off-by: Sven Schnelle <svens@linux.ibm.com> Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org> --- kernel/trace/trace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 9ff8a439d674..aa54810e8b56 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1532,7 +1532,7 @@ void disable_trace_on_warning(void) bool tracer_tracing_is_on(struct trace_array *tr) { if (tr->array_buffer.buffer) - return ring_buffer_record_is_on(tr->array_buffer.buffer); + return ring_buffer_record_is_set_on(tr->array_buffer.buffer); return !tr->buffer_disabled; } From 11f522256e9043b0fcd2f994278645d3e201d20c Mon Sep 17 00:00:00 2001 From: Hari Bathini <hbathini@linux.ibm.com> Date: Thu, 8 Feb 2024 15:31:15 +0530 Subject: [PATCH 158/304] bpf: Fix warning for bpf_cpumask in verifier Compiling with CONFIG_BPF_SYSCALL & !CONFIG_BPF_JIT throws the below warning: "WARN: resolve_btfids: unresolved symbol bpf_cpumask" Fix it by adding the appropriate #ifdef. Signed-off-by: Hari Bathini <hbathini@linux.ibm.com> Signed-off-by: Andrii Nakryiko <andrii@kernel.org> Acked-by: Jiri Olsa <jolsa@kernel.org> Acked-by: Stanislav Fomichev <sdf@google.com> Acked-by: David Vernet <void@manifault.com> Link: https://lore.kernel.org/bpf/20240208100115.602172-1-hbathini@linux.ibm.com --- kernel/bpf/verifier.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 65f598694d55..b263f093ee76 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -5227,7 +5227,9 @@ BTF_ID(struct, prog_test_ref_kfunc) #ifdef CONFIG_CGROUPS BTF_ID(struct, cgroup) #endif +#ifdef CONFIG_BPF_JIT BTF_ID(struct, bpf_cpumask) +#endif BTF_ID(struct, task_struct) BTF_SET_END(rcu_protected_types) From c60d847be7b8e69e419e02a2b3d19c2842a3c35d Mon Sep 17 00:00:00 2001 From: Will Deacon <will@kernel.org> Date: Mon, 12 Feb 2024 19:30:52 +0000 Subject: [PATCH 159/304] KVM: arm64: Fix double-free following kvm_pgtable_stage2_free_unlinked() kvm_pgtable_stage2_free_unlinked() does the final put_page() on the root page of the sub-tree before returning, so remove the additional put_page() invocations in the callers. Cc: Ricardo Koller <ricarkol@google.com> Fixes: f6a27d6dc51b2 ("KVM: arm64: Drop last page ref in kvm_pgtable_stage2_free_removed()") Signed-off-by: Will Deacon <will@kernel.org> Reviewed-by: Oliver Upton <oliver.upton@linux.dev> Signed-off-by: Marc Zyngier <maz@kernel.org> Link: https://lore.kernel.org/r/20240212193052.27765-1-will@kernel.org --- arch/arm64/kvm/hyp/pgtable.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index c651df904fe3..ab9d05fcf98b 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -1419,7 +1419,6 @@ kvm_pte_t *kvm_pgtable_stage2_create_unlinked(struct kvm_pgtable *pgt, level + 1); if (ret) { kvm_pgtable_stage2_free_unlinked(mm_ops, pgtable, level); - mm_ops->put_page(pgtable); return ERR_PTR(ret); } @@ -1502,7 +1501,6 @@ static int stage2_split_walker(const struct kvm_pgtable_visit_ctx *ctx, if (!stage2_try_break_pte(ctx, mmu)) { kvm_pgtable_stage2_free_unlinked(mm_ops, childp, level); - mm_ops->put_page(childp); return -EAGAIN; } From b6ddaa63f728d26c12048aed76be99c24f435c41 Mon Sep 17 00:00:00 2001 From: Harshit Mogalapalli <harshit.m.mogalapalli@oracle.com> Date: Fri, 19 Jan 2024 11:08:40 -0800 Subject: [PATCH 160/304] drm/rockchip: vop2: add a missing unlock in vop2_crtc_atomic_enable() Unlock before returning on the error path. Fixes: 5a028e8f062f ("drm/rockchip: vop2: Add support for rk3588") Signed-off-by: Harshit Mogalapalli <harshit.m.mogalapalli@oracle.com> Reviewed-by: Sascha Hauer <s.hauer@pengutronix.de> Signed-off-by: Heiko Stuebner <heiko@sntech.de> Link: https://patchwork.freedesktop.org/patch/msgid/20240119190841.1619443-1-harshit.m.mogalapalli@oracle.com --- drivers/gpu/drm/rockchip/rockchip_drm_vop2.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c index 85b3b4871a1d..fdd768bbd487 100644 --- a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c +++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c @@ -1985,8 +1985,10 @@ static void vop2_crtc_atomic_enable(struct drm_crtc *crtc, clock = vop2_set_intf_mux(vp, rkencoder->crtc_endpoint_id, polflags); } - if (!clock) + if (!clock) { + vop2_unlock(vop2); return; + } if (vcstate->output_mode == ROCKCHIP_OUT_MODE_AAAA && !(vp_data->feature & VOP2_VP_FEATURE_OUTPUT_10BIT)) From 0db0c1770834f39e11a2902e20e1f11a482f4465 Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald <rf@opensource.cirrus.com> Date: Fri, 9 Feb 2024 11:18:40 +0000 Subject: [PATCH 161/304] ASoC: cs35l56: Workaround for ACPI with broken spk-id-gpios property The ACPI in some SoundWire laptops has a spk-id-gpios property but it points to the wrong Device node. This patch adds a workaround to try to get the GPIO directly from the correct Device node. If the attempt to get the GPIOs from the property fails, the workaround looks for the SDCA node "AF01", which is where the GpioIo resource is defined. If this exists, a spk-id-gpios mapping is added to that node and then the GPIO is got from that node using the property. Signed-off-by: Richard Fitzgerald <rf@opensource.cirrus.com> Link: https://msgid.link/r/20240209111840.1543630-1-rf@opensource.cirrus.com Signed-off-by: Mark Brown <broonie@kernel.org> --- sound/soc/codecs/cs35l56.c | 93 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 93 insertions(+) diff --git a/sound/soc/codecs/cs35l56.c b/sound/soc/codecs/cs35l56.c index 98d3957c66e7..2c1313e34cce 100644 --- a/sound/soc/codecs/cs35l56.c +++ b/sound/soc/codecs/cs35l56.c @@ -5,6 +5,7 @@ // Copyright (C) 2023 Cirrus Logic, Inc. and // Cirrus Logic International Semiconductor Ltd. +#include <linux/acpi.h> #include <linux/completion.h> #include <linux/debugfs.h> #include <linux/delay.h> @@ -15,6 +16,7 @@ #include <linux/module.h> #include <linux/pm.h> #include <linux/pm_runtime.h> +#include <linux/property.h> #include <linux/regmap.h> #include <linux/regulator/consumer.h> #include <linux/slab.h> @@ -1260,6 +1262,94 @@ static int cs35l56_get_firmware_uid(struct cs35l56_private *cs35l56) return 0; } +/* + * Some SoundWire laptops have a spk-id-gpios property but it points to + * the wrong ACPI Device node so can't be used to get the GPIO. Try to + * find the SDCA node containing the GpioIo resource and add a GPIO + * mapping to it. + */ +static const struct acpi_gpio_params cs35l56_af01_first_gpio = { 0, 0, false }; +static const struct acpi_gpio_mapping cs35l56_af01_spkid_gpios_mapping[] = { + { "spk-id-gpios", &cs35l56_af01_first_gpio, 1 }, + { } +}; + +static void cs35l56_acpi_dev_release_driver_gpios(void *adev) +{ + acpi_dev_remove_driver_gpios(adev); +} + +static int cs35l56_try_get_broken_sdca_spkid_gpio(struct cs35l56_private *cs35l56) +{ + struct fwnode_handle *af01_fwnode; + const union acpi_object *obj; + struct gpio_desc *desc; + int ret; + + /* Find the SDCA node containing the GpioIo */ + af01_fwnode = device_get_named_child_node(cs35l56->base.dev, "AF01"); + if (!af01_fwnode) { + dev_dbg(cs35l56->base.dev, "No AF01 node\n"); + return -ENOENT; + } + + ret = acpi_dev_get_property(ACPI_COMPANION(cs35l56->base.dev), + "spk-id-gpios", ACPI_TYPE_PACKAGE, &obj); + if (ret) { + dev_dbg(cs35l56->base.dev, "Could not get spk-id-gpios package: %d\n", ret); + return -ENOENT; + } + + /* The broken properties we can handle are a 4-element package (one GPIO) */ + if (obj->package.count != 4) { + dev_warn(cs35l56->base.dev, "Unexpected spk-id element count %d\n", + obj->package.count); + return -ENOENT; + } + + /* Add a GPIO mapping if it doesn't already have one */ + if (!fwnode_property_present(af01_fwnode, "spk-id-gpios")) { + struct acpi_device *adev = to_acpi_device_node(af01_fwnode); + + /* + * Can't use devm_acpi_dev_add_driver_gpios() because the + * mapping isn't being added to the node pointed to by + * ACPI_COMPANION(). + */ + ret = acpi_dev_add_driver_gpios(adev, cs35l56_af01_spkid_gpios_mapping); + if (ret) { + return dev_err_probe(cs35l56->base.dev, ret, + "Failed to add gpio mapping to AF01\n"); + } + + ret = devm_add_action_or_reset(cs35l56->base.dev, + cs35l56_acpi_dev_release_driver_gpios, + adev); + if (ret) + return ret; + + dev_dbg(cs35l56->base.dev, "Added spk-id-gpios mapping to AF01\n"); + } + + desc = fwnode_gpiod_get_index(af01_fwnode, "spk-id", 0, GPIOD_IN, NULL); + if (IS_ERR(desc)) { + ret = PTR_ERR(desc); + return dev_err_probe(cs35l56->base.dev, ret, "Get GPIO from AF01 failed\n"); + } + + ret = gpiod_get_value_cansleep(desc); + gpiod_put(desc); + + if (ret < 0) { + dev_err_probe(cs35l56->base.dev, ret, "Error reading spk-id GPIO\n"); + return ret; + } + + dev_info(cs35l56->base.dev, "Got spk-id from AF01\n"); + + return ret; +} + int cs35l56_common_probe(struct cs35l56_private *cs35l56) { int ret; @@ -1304,6 +1394,9 @@ int cs35l56_common_probe(struct cs35l56_private *cs35l56) } ret = cs35l56_get_speaker_id(&cs35l56->base); + if (ACPI_COMPANION(cs35l56->base.dev) && cs35l56->sdw_peripheral && (ret == -ENOENT)) + ret = cs35l56_try_get_broken_sdca_spkid_gpio(cs35l56); + if ((ret < 0) && (ret != -ENOENT)) goto err; From 2127c604383666675789fd4a5fc2aead46c73aad Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> Date: Fri, 2 Feb 2024 17:32:20 +0100 Subject: [PATCH 162/304] xsk: Add truesize to skb_add_rx_frag(). xsk_build_skb() allocates a page and adds it to the skb via skb_add_rx_frag() and specifies 0 for truesize. This leads to a warning in skb_add_rx_frag() with CONFIG_DEBUG_NET enabled because size is larger than truesize. Increasing truesize requires to add the same amount to socket's sk_wmem_alloc counter in order not to underflow the counter during release in the destructor (sock_wfree()). Pass the size of the allocated page as truesize to skb_add_rx_frag(). Add this mount to socket's sk_wmem_alloc counter. Fixes: cf24f5a5feea ("xsk: add support for AF_XDP multi-buffer on Tx path") Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Acked-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com> Link: https://lore.kernel.org/bpf/20240202163221.2488589-1-bigeasy@linutronix.de --- net/xdp/xsk.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index 1eadfac03cc4..b78c0e095e22 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -722,7 +722,8 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs, memcpy(vaddr, buffer, len); kunmap_local(vaddr); - skb_add_rx_frag(skb, nr_frags, page, 0, len, 0); + skb_add_rx_frag(skb, nr_frags, page, 0, len, PAGE_SIZE); + refcount_add(PAGE_SIZE, &xs->sk.sk_wmem_alloc); } if (first_frag && desc->options & XDP_TX_METADATA) { From 8d30528a170905ede9ab6ab81f229e441808590b Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni <kch@nvidia.com> Date: Mon, 12 Feb 2024 23:58:24 -0800 Subject: [PATCH 163/304] nvmet: remove superfluous initialization Remove superfluous initialization of status variable in nvmet_execute_admin_connect() and nvmet_execute_io_connect(), since it will get overwritten by nvmet_copy_from_sgl(). Signed-off-by: Chaitanya Kulkarni <kch@nvidia.com> Signed-off-by: Keith Busch <kbusch@kernel.org> --- drivers/nvme/target/fabrics-cmd.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/target/fabrics-cmd.c b/drivers/nvme/target/fabrics-cmd.c index d8da840a1c0e..9964ffe347d2 100644 --- a/drivers/nvme/target/fabrics-cmd.c +++ b/drivers/nvme/target/fabrics-cmd.c @@ -209,7 +209,7 @@ static void nvmet_execute_admin_connect(struct nvmet_req *req) struct nvmf_connect_command *c = &req->cmd->connect; struct nvmf_connect_data *d; struct nvmet_ctrl *ctrl = NULL; - u16 status = 0; + u16 status; int ret; if (!nvmet_check_transfer_len(req, sizeof(struct nvmf_connect_data))) @@ -290,7 +290,7 @@ static void nvmet_execute_io_connect(struct nvmet_req *req) struct nvmf_connect_data *d; struct nvmet_ctrl *ctrl; u16 qid = le16_to_cpu(c->qid); - u16 status = 0; + u16 status; if (!nvmet_check_transfer_len(req, sizeof(struct nvmf_connect_data))) return; From 4e07447503f01766ae976207eb3b947437ed8dbc Mon Sep 17 00:00:00 2001 From: Kent Overstreet <kent.overstreet@linux.dev> Date: Sat, 10 Feb 2024 21:01:40 -0500 Subject: [PATCH 164/304] bcachefs: Clamp replicas_required to replicas This prevents going emergency read only when the user has specified replicas_required > replicas. Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev> --- fs/bcachefs/bcachefs.h | 12 ++++++++++++ fs/bcachefs/btree_update_interior.c | 3 ++- fs/bcachefs/io_write.c | 1 + fs/bcachefs/journal_io.c | 4 +++- fs/bcachefs/journal_reclaim.c | 2 +- fs/bcachefs/super.c | 4 ++-- 6 files changed, 21 insertions(+), 5 deletions(-) diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index b80c6c9efd8c..69d0d60d50e3 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -1249,6 +1249,18 @@ static inline struct stdio_redirect *bch2_fs_stdio_redirect(struct bch_fs *c) return stdio; } +static inline unsigned metadata_replicas_required(struct bch_fs *c) +{ + return min(c->opts.metadata_replicas, + c->opts.metadata_replicas_required); +} + +static inline unsigned data_replicas_required(struct bch_fs *c) +{ + return min(c->opts.data_replicas, + c->opts.data_replicas_required); +} + #define BKEY_PADDED_ONSTACK(key, pad) \ struct { struct bkey_i key; __u64 key ## _pad[pad]; } diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index 17a5938aa71a..4530b14ff2c3 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -280,7 +280,8 @@ retry: writepoint_ptr(&c->btree_write_point), &devs_have, res->nr_replicas, - c->opts.metadata_replicas_required, + min(res->nr_replicas, + c->opts.metadata_replicas_required), watermark, 0, cl, &wp); if (unlikely(ret)) return ERR_PTR(ret); diff --git a/fs/bcachefs/io_write.c b/fs/bcachefs/io_write.c index ef3a53f9045a..2c098ac017b3 100644 --- a/fs/bcachefs/io_write.c +++ b/fs/bcachefs/io_write.c @@ -1564,6 +1564,7 @@ CLOSURE_CALLBACK(bch2_write) BUG_ON(!op->write_point.v); BUG_ON(bkey_eq(op->pos, POS_MAX)); + op->nr_replicas_required = min_t(unsigned, op->nr_replicas_required, op->nr_replicas); op->start_time = local_clock(); bch2_keylist_init(&op->insert_keys, op->inline_keys); wbio_init(bio)->put_bio = false; diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c index bfd6585e746d..47805193f18c 100644 --- a/fs/bcachefs/journal_io.c +++ b/fs/bcachefs/journal_io.c @@ -1478,6 +1478,8 @@ static int journal_write_alloc(struct journal *j, struct journal_buf *w) c->opts.foreground_target; unsigned i, replicas = 0, replicas_want = READ_ONCE(c->opts.metadata_replicas); + unsigned replicas_need = min_t(unsigned, replicas_want, + READ_ONCE(c->opts.metadata_replicas_required)); rcu_read_lock(); retry: @@ -1526,7 +1528,7 @@ done: BUG_ON(bkey_val_u64s(&w->key.k) > BCH_REPLICAS_MAX); - return replicas >= c->opts.metadata_replicas_required ? 0 : -EROFS; + return replicas >= replicas_need ? 0 : -EROFS; } static void journal_buf_realloc(struct journal *j, struct journal_buf *buf) diff --git a/fs/bcachefs/journal_reclaim.c b/fs/bcachefs/journal_reclaim.c index 820d25e19e5f..2cf626315652 100644 --- a/fs/bcachefs/journal_reclaim.c +++ b/fs/bcachefs/journal_reclaim.c @@ -205,7 +205,7 @@ void bch2_journal_space_available(struct journal *j) j->can_discard = can_discard; - if (nr_online < c->opts.metadata_replicas_required) { + if (nr_online < metadata_replicas_required(c)) { ret = JOURNAL_ERR_insufficient_devices; goto out; } diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index b9911402b175..6b23e11825e6 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -1428,10 +1428,10 @@ bool bch2_dev_state_allowed(struct bch_fs *c, struct bch_dev *ca, required = max(!(flags & BCH_FORCE_IF_METADATA_DEGRADED) ? c->opts.metadata_replicas - : c->opts.metadata_replicas_required, + : metadata_replicas_required(c), !(flags & BCH_FORCE_IF_DATA_DEGRADED) ? c->opts.data_replicas - : c->opts.data_replicas_required); + : data_replicas_required(c)); return nr_rw >= required; case BCH_MEMBER_STATE_failed: From 2eeccee86dc75a584a8c7e67a8b824d5168c978f Mon Sep 17 00:00:00 2001 From: Kent Overstreet <kent.overstreet@linux.dev> Date: Mon, 12 Feb 2024 20:05:48 -0500 Subject: [PATCH 165/304] bcachefs: Fix check_version_upgrade() When also downgrading, check_version_upgrade() could pick a new version greater than the latest supported version. Fixes: Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev> --- fs/bcachefs/recovery.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index 9127d0e3ca2f..21e13bb4335b 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -577,8 +577,9 @@ u64 bch2_recovery_passes_from_stable(u64 v) static bool check_version_upgrade(struct bch_fs *c) { - unsigned latest_compatible = bch2_latest_compatible_version(c->sb.version); unsigned latest_version = bcachefs_metadata_version_current; + unsigned latest_compatible = min(latest_version, + bch2_latest_compatible_version(c->sb.version)); unsigned old_version = c->sb.version_upgrade_complete ?: c->sb.version; unsigned new_version = 0; @@ -597,7 +598,7 @@ static bool check_version_upgrade(struct bch_fs *c) new_version = latest_version; break; case BCH_VERSION_UPGRADE_none: - new_version = old_version; + new_version = min(old_version, latest_version); break; } } @@ -774,7 +775,7 @@ int bch2_fs_recovery(struct bch_fs *c) goto err; } - if (!(c->opts.nochanges && c->opts.norecovery)) { + if (!c->opts.nochanges) { mutex_lock(&c->sb_lock); bool write_sb = false; @@ -804,7 +805,7 @@ int bch2_fs_recovery(struct bch_fs *c) if (bch2_check_version_downgrade(c)) { struct printbuf buf = PRINTBUF; - prt_str(&buf, "Version downgrade required:\n"); + prt_str(&buf, "Version downgrade required:"); __le64 passes = ext->recovery_passes_required[0]; bch2_sb_set_downgrade(c, @@ -812,7 +813,7 @@ int bch2_fs_recovery(struct bch_fs *c) BCH_VERSION_MINOR(c->sb.version)); passes = ext->recovery_passes_required[0] & ~passes; if (passes) { - prt_str(&buf, " running recovery passes: "); + prt_str(&buf, "\n running recovery passes: "); prt_bitflags(&buf, bch2_recovery_passes, bch2_recovery_passes_from_stable(le64_to_cpu(passes))); } From 816054f40a5fdaaed05d9e7f603d2824eeee7e62 Mon Sep 17 00:00:00 2001 From: Kent Overstreet <kent.overstreet@linux.dev> Date: Tue, 13 Feb 2024 20:26:09 -0500 Subject: [PATCH 166/304] bcachefs: Fix missing va_end() Fixes: https://lore.kernel.org/linux-bcachefs/202402131603.E953E2CF@keescook/T/#u Reported-by: coverity scan Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev> --- fs/bcachefs/printbuf.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/bcachefs/printbuf.c b/fs/bcachefs/printbuf.c index accf246c3233..b27d22925929 100644 --- a/fs/bcachefs/printbuf.c +++ b/fs/bcachefs/printbuf.c @@ -56,6 +56,7 @@ void bch2_prt_vprintf(struct printbuf *out, const char *fmt, va_list args) va_copy(args2, args); len = vsnprintf(out->buf + out->pos, printbuf_remaining(out), fmt, args2); + va_end(args2); } while (len + 1 >= printbuf_remaining(out) && !bch2_printbuf_make_room(out, len + 1)); From 1fba2bf8e9d5a27b7394856181b6200de7260b79 Mon Sep 17 00:00:00 2001 From: Michael Ellerman <mpe@ellerman.id.au> Date: Wed, 14 Feb 2024 11:00:41 +1100 Subject: [PATCH 167/304] Revert "powerpc/pseries/iommu: Fix iommu initialisation during DLPAR add" This reverts commit ed8b94f6e0acd652ce69bd69d678a0c769172df8. Gaurav reported that there are still problems with the patch and it should be reverted pending a fuller fix. Link: https://lore.kernel.org/all/4f6fc1ac-7a76-4447-9d0e-f55c0be373f8@linux.ibm.com/ Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> --- arch/powerpc/include/asm/ppc-pci.h | 3 --- arch/powerpc/kernel/iommu.c | 21 +++++---------------- arch/powerpc/platforms/pseries/pci_dlpar.c | 4 ---- 3 files changed, 5 insertions(+), 23 deletions(-) diff --git a/arch/powerpc/include/asm/ppc-pci.h b/arch/powerpc/include/asm/ppc-pci.h index c3a3f3df36d1..ce2b1b5eebdd 100644 --- a/arch/powerpc/include/asm/ppc-pci.h +++ b/arch/powerpc/include/asm/ppc-pci.h @@ -29,9 +29,6 @@ void *pci_traverse_device_nodes(struct device_node *start, void *(*fn)(struct device_node *, void *), void *data); extern void pci_devs_phb_init_dynamic(struct pci_controller *phb); -extern void ppc_iommu_register_device(struct pci_controller *phb); -extern void ppc_iommu_unregister_device(struct pci_controller *phb); - /* From rtas_pci.h */ extern void init_pci_config_tokens (void); diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index c6f62e130d55..ebe259bdd462 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -1388,21 +1388,6 @@ static const struct attribute_group *spapr_tce_iommu_groups[] = { NULL, }; -void ppc_iommu_register_device(struct pci_controller *phb) -{ - iommu_device_sysfs_add(&phb->iommu, phb->parent, - spapr_tce_iommu_groups, "iommu-phb%04x", - phb->global_number); - iommu_device_register(&phb->iommu, &spapr_tce_iommu_ops, - phb->parent); -} - -void ppc_iommu_unregister_device(struct pci_controller *phb) -{ - iommu_device_unregister(&phb->iommu); - iommu_device_sysfs_remove(&phb->iommu); -} - /* * This registers IOMMU devices of PHBs. This needs to happen * after core_initcall(iommu_init) + postcore_initcall(pci_driver_init) and @@ -1413,7 +1398,11 @@ static int __init spapr_tce_setup_phb_iommus_initcall(void) struct pci_controller *hose; list_for_each_entry(hose, &hose_list, list_node) { - ppc_iommu_register_device(hose); + iommu_device_sysfs_add(&hose->iommu, hose->parent, + spapr_tce_iommu_groups, "iommu-phb%04x", + hose->global_number); + iommu_device_register(&hose->iommu, &spapr_tce_iommu_ops, + hose->parent); } return 0; } diff --git a/arch/powerpc/platforms/pseries/pci_dlpar.c b/arch/powerpc/platforms/pseries/pci_dlpar.c index 4448386268d9..4ba824568119 100644 --- a/arch/powerpc/platforms/pseries/pci_dlpar.c +++ b/arch/powerpc/platforms/pseries/pci_dlpar.c @@ -35,8 +35,6 @@ struct pci_controller *init_phb_dynamic(struct device_node *dn) pseries_msi_allocate_domains(phb); - ppc_iommu_register_device(phb); - /* Create EEH devices for the PHB */ eeh_phb_pe_create(phb); @@ -78,8 +76,6 @@ int remove_phb_dynamic(struct pci_controller *phb) } } - ppc_iommu_unregister_device(phb); - pseries_msi_free_domains(phb); /* Keep a reference so phb isn't freed yet */ From f1acb109505d983779bbb7e20a1ee6244d2b5736 Mon Sep 17 00:00:00 2001 From: Michael Ellerman <mpe@ellerman.id.au> Date: Mon, 12 Feb 2024 17:42:44 +1100 Subject: [PATCH 168/304] powerpc/kasan: Limit KASAN thread size increase to 32KB KASAN is seen to increase stack usage, to the point that it was reported to lead to stack overflow on some 32-bit machines (see link). To avoid overflows the stack size was doubled for KASAN builds in commit 3e8635fb2e07 ("powerpc/kasan: Force thread size increase with KASAN"). However with a 32KB stack size to begin with, the doubling leads to a 64KB stack, which causes build errors: arch/powerpc/kernel/switch.S:249: Error: operand out of range (0x000000000000fe50 is not between 0xffffffffffff8000 and 0x0000000000007fff) Although the asm could be reworked, in practice a 32KB stack seems sufficient even for KASAN builds - the additional usage seems to be in the 2-3KB range for a 64-bit KASAN build. So only increase the stack for KASAN if the stack size is < 32KB. Fixes: 18f14afe2816 ("powerpc/64s: Increase default stack size to 32KB") Reported-by: Spoorthy <spoorthy@linux.ibm.com> Reported-by: Benjamin Gray <bgray@linux.ibm.com> Reviewed-by: Benjamin Gray <bgray@linux.ibm.com> Link: https://lore.kernel.org/linuxppc-dev/bug-207129-206035@https.bugzilla.kernel.org%2F/ Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://msgid.link/20240212064244.3924505-1-mpe@ellerman.id.au --- arch/powerpc/include/asm/thread_info.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h index bf5dde1a4114..15c5691dd218 100644 --- a/arch/powerpc/include/asm/thread_info.h +++ b/arch/powerpc/include/asm/thread_info.h @@ -14,7 +14,7 @@ #ifdef __KERNEL__ -#ifdef CONFIG_KASAN +#if defined(CONFIG_KASAN) && CONFIG_THREAD_SHIFT < 15 #define MIN_THREAD_SHIFT (CONFIG_THREAD_SHIFT + 1) #else #define MIN_THREAD_SHIFT CONFIG_THREAD_SHIFT From eb6d871f4ba49ac8d0537e051fe983a3a4027f61 Mon Sep 17 00:00:00 2001 From: David Engraf <david.engraf@sysgo.com> Date: Wed, 7 Feb 2024 10:27:58 +0100 Subject: [PATCH 169/304] powerpc/cputable: Add missing PPC_FEATURE_BOOKE on PPC64 Book-E Commit e320a76db4b0 ("powerpc/cputable: Split cpu_specs[] out of cputable.h") moved the cpu_specs to separate header files. Previously PPC_FEATURE_BOOKE was enabled by CONFIG_PPC_BOOK3E_64. The definition in cpu_specs_e500mc.h for PPC64 no longer enables PPC_FEATURE_BOOKE. This breaks user space reading the ELF hwcaps and expect PPC_FEATURE_BOOKE. Debugging an application with gdb is no longer working on e5500/e6500 because the 64-bit detection relies on PPC_FEATURE_BOOKE for Book-E. Fixes: e320a76db4b0 ("powerpc/cputable: Split cpu_specs[] out of cputable.h") Cc: stable@vger.kernel.org # v6.1+ Signed-off-by: David Engraf <david.engraf@sysgo.com> Reviewed-by: Christophe Leroy <christophe.leroy@csgroup.eu> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://msgid.link/20240207092758.1058893-1-david.engraf@sysgo.com --- arch/powerpc/kernel/cpu_specs_e500mc.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/cpu_specs_e500mc.h b/arch/powerpc/kernel/cpu_specs_e500mc.h index ceb06b109f83..2ae8e9a7b461 100644 --- a/arch/powerpc/kernel/cpu_specs_e500mc.h +++ b/arch/powerpc/kernel/cpu_specs_e500mc.h @@ -8,7 +8,8 @@ #ifdef CONFIG_PPC64 #define COMMON_USER_BOOKE (PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU | \ - PPC_FEATURE_HAS_FPU | PPC_FEATURE_64) + PPC_FEATURE_HAS_FPU | PPC_FEATURE_64 | \ + PPC_FEATURE_BOOKE) #else #define COMMON_USER_BOOKE (PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU | \ PPC_FEATURE_BOOKE) From ea73179e64131bcd29ba6defd33732abdf8ca14b Mon Sep 17 00:00:00 2001 From: Naveen N Rao <naveen@kernel.org> Date: Tue, 13 Feb 2024 23:24:10 +0530 Subject: [PATCH 170/304] powerpc/ftrace: Ignore ftrace locations in exit text sections Michael reported that we are seeing an ftrace bug on bootup when KASAN is enabled and we are using -fpatchable-function-entry: ftrace: allocating 47780 entries in 18 pages ftrace-powerpc: 0xc0000000020b3d5c: No module provided for non-kernel address ------------[ ftrace bug ]------------ ftrace faulted on modifying [<c0000000020b3d5c>] 0xc0000000020b3d5c Initializing ftrace call sites ftrace record flags: 0 (0) expected tramp: c00000000008cef4 ------------[ cut here ]------------ WARNING: CPU: 0 PID: 0 at kernel/trace/ftrace.c:2180 ftrace_bug+0x3c0/0x424 Modules linked in: CPU: 0 PID: 0 Comm: swapper Not tainted 6.5.0-rc3-00120-g0f71dcfb4aef #860 Hardware name: IBM pSeries (emulated by qemu) POWER9 (raw) 0x4e1202 0xf000005 of:SLOF,HEAD hv:linux,kvm pSeries NIP: c0000000003aa81c LR: c0000000003aa818 CTR: 0000000000000000 REGS: c0000000033cfab0 TRAP: 0700 Not tainted (6.5.0-rc3-00120-g0f71dcfb4aef) MSR: 8000000002021033 <SF,VEC,ME,IR,DR,RI,LE> CR: 28028240 XER: 00000000 CFAR: c0000000002781a8 IRQMASK: 3 ... NIP [c0000000003aa81c] ftrace_bug+0x3c0/0x424 LR [c0000000003aa818] ftrace_bug+0x3bc/0x424 Call Trace: ftrace_bug+0x3bc/0x424 (unreliable) ftrace_process_locs+0x5f4/0x8a0 ftrace_init+0xc0/0x1d0 start_kernel+0x1d8/0x484 With CONFIG_FTRACE_MCOUNT_USE_PATCHABLE_FUNCTION_ENTRY=y and CONFIG_KASAN=y, compiler emits nops in functions that it generates for registering and unregistering global variables (unlike with -pg and -mprofile-kernel where calls to _mcount() are not generated in those functions). Those functions then end up in INIT_TEXT and EXIT_TEXT respectively. We don't expect to see any profiled functions in EXIT_TEXT, so ftrace_init_nop() assumes that all addresses that aren't in the core kernel text belongs to a module. Since these functions do not match that criteria, we see the above bug. Address this by having ftrace ignore all locations in the text exit sections of vmlinux. Fixes: 0f71dcfb4aef ("powerpc/ftrace: Add support for -fpatchable-function-entry") Cc: stable@vger.kernel.org # v6.6+ Reported-by: Michael Ellerman <mpe@ellerman.id.au> Signed-off-by: Naveen N Rao <naveen@kernel.org> Reviewed-by: Benjamin Gray <bgray@linux.ibm.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://msgid.link/20240213175410.1091313-1-naveen@kernel.org --- arch/powerpc/include/asm/ftrace.h | 10 ++-------- arch/powerpc/include/asm/sections.h | 1 + arch/powerpc/kernel/trace/ftrace.c | 12 ++++++++++++ arch/powerpc/kernel/trace/ftrace_64_pg.c | 5 +++++ arch/powerpc/kernel/vmlinux.lds.S | 2 ++ 5 files changed, 22 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/include/asm/ftrace.h b/arch/powerpc/include/asm/ftrace.h index 1ebd2ca97f12..107fc5a48456 100644 --- a/arch/powerpc/include/asm/ftrace.h +++ b/arch/powerpc/include/asm/ftrace.h @@ -20,14 +20,6 @@ #ifndef __ASSEMBLY__ extern void _mcount(void); -static inline unsigned long ftrace_call_adjust(unsigned long addr) -{ - if (IS_ENABLED(CONFIG_ARCH_USING_PATCHABLE_FUNCTION_ENTRY)) - addr += MCOUNT_INSN_SIZE; - - return addr; -} - unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip, unsigned long sp); @@ -142,8 +134,10 @@ static inline u8 this_cpu_get_ftrace_enabled(void) { return 1; } #ifdef CONFIG_FUNCTION_TRACER extern unsigned int ftrace_tramp_text[], ftrace_tramp_init[]; void ftrace_free_init_tramp(void); +unsigned long ftrace_call_adjust(unsigned long addr); #else static inline void ftrace_free_init_tramp(void) { } +static inline unsigned long ftrace_call_adjust(unsigned long addr) { return addr; } #endif #endif /* !__ASSEMBLY__ */ diff --git a/arch/powerpc/include/asm/sections.h b/arch/powerpc/include/asm/sections.h index ea26665f82cf..f43f3a6b0051 100644 --- a/arch/powerpc/include/asm/sections.h +++ b/arch/powerpc/include/asm/sections.h @@ -14,6 +14,7 @@ typedef struct func_desc func_desc_t; extern char __head_end[]; extern char __srwx_boundary[]; +extern char __exittext_begin[], __exittext_end[]; /* Patch sites */ extern s32 patch__call_flush_branch_caches1; diff --git a/arch/powerpc/kernel/trace/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c index 82010629cf88..d8d6b4fd9a14 100644 --- a/arch/powerpc/kernel/trace/ftrace.c +++ b/arch/powerpc/kernel/trace/ftrace.c @@ -27,10 +27,22 @@ #include <asm/ftrace.h> #include <asm/syscall.h> #include <asm/inst.h> +#include <asm/sections.h> #define NUM_FTRACE_TRAMPS 2 static unsigned long ftrace_tramps[NUM_FTRACE_TRAMPS]; +unsigned long ftrace_call_adjust(unsigned long addr) +{ + if (addr >= (unsigned long)__exittext_begin && addr < (unsigned long)__exittext_end) + return 0; + + if (IS_ENABLED(CONFIG_ARCH_USING_PATCHABLE_FUNCTION_ENTRY)) + addr += MCOUNT_INSN_SIZE; + + return addr; +} + static ppc_inst_t ftrace_create_branch_inst(unsigned long ip, unsigned long addr, int link) { ppc_inst_t op; diff --git a/arch/powerpc/kernel/trace/ftrace_64_pg.c b/arch/powerpc/kernel/trace/ftrace_64_pg.c index 7b85c3b460a3..12fab1803bcf 100644 --- a/arch/powerpc/kernel/trace/ftrace_64_pg.c +++ b/arch/powerpc/kernel/trace/ftrace_64_pg.c @@ -37,6 +37,11 @@ #define NUM_FTRACE_TRAMPS 8 static unsigned long ftrace_tramps[NUM_FTRACE_TRAMPS]; +unsigned long ftrace_call_adjust(unsigned long addr) +{ + return addr; +} + static ppc_inst_t ftrace_call_replace(unsigned long ip, unsigned long addr, int link) { diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index 1c5970df3233..f420df7888a7 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -281,7 +281,9 @@ SECTIONS * to deal with references from __bug_table */ .exit.text : AT(ADDR(.exit.text) - LOAD_OFFSET) { + __exittext_begin = .; EXIT_TEXT + __exittext_end = .; } . = ALIGN(PAGE_SIZE); From cbecc9fcbbec60136b0180ba0609c829afed5c81 Mon Sep 17 00:00:00 2001 From: Shrikanth Hegde <sshegde@linux.ibm.com> Date: Tue, 13 Feb 2024 10:56:35 +0530 Subject: [PATCH 171/304] powerpc/pseries: fix accuracy of stolen time powerVM hypervisor updates the VPA fields with stolen time data. It currently reports enqueue_dispatch_tb and ready_enqueue_tb for this purpose. In linux these two fields are used to report the stolen time. The VPA fields are updated at the TB frequency. On powerPC its mostly set at 512Mhz. Hence this needs a conversion to ns when reporting it back as rest of the kernel timings are in ns. This conversion is already handled in tb_to_ns function. So use that function to report accurate stolen time. Observed this issue and used an Capped Shared Processor LPAR(SPLPAR) to simplify the experiments. In all these cases, 100% VP Load is run using stress-ng workload. Values of stolen time is in percentages as reported by mpstat. With the patch values are close to expected. 6.8.rc1 +Patch 12EC/12VP 0.0 0.0 12EC/24VP 25.7 50.2 12EC/36VP 37.3 69.2 12EC/48VP 38.5 78.3 Fixes: 0e8a63132800 ("powerpc/pseries: Implement CONFIG_PARAVIRT_TIME_ACCOUNTING") Cc: stable@vger.kernel.org # v6.1+ Signed-off-by: Shrikanth Hegde <sshegde@linux.ibm.com> Reviewed-by: Nicholas Piggin <npiggin@gmail.com> Reviewed-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://msgid.link/20240213052635.231597-1-sshegde@linux.ibm.com --- arch/powerpc/platforms/pseries/lpar.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index 4561667832ed..4e9916bb03d7 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -662,8 +662,12 @@ u64 pseries_paravirt_steal_clock(int cpu) { struct lppaca *lppaca = &lppaca_of(cpu); - return be64_to_cpu(READ_ONCE(lppaca->enqueue_dispatch_tb)) + - be64_to_cpu(READ_ONCE(lppaca->ready_enqueue_tb)); + /* + * VPA steal time counters are reported at TB frequency. Hence do a + * conversion to ns before returning + */ + return tb_to_ns(be64_to_cpu(READ_ONCE(lppaca->enqueue_dispatch_tb)) + + be64_to_cpu(READ_ONCE(lppaca->ready_enqueue_tb))); } #endif From 706c1fa1ab09f11a131fc4d699ce4c0224b1cb2d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jean-Lo=C3=AFc=20Charroud?= <lagiraudiere+linux@free.fr> Date: Wed, 14 Feb 2024 00:38:31 +0100 Subject: [PATCH 172/304] ALSA: hda/realtek: cs35l41: Add internal speaker support for ASUS UM3402 with missing DSD MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add the values for the missing DSD properties to the cs35l41 config table. Signed-off-by: Jean-Loïc Charroud <lagiraudiere+linux@free.fr> Link: https://lore.kernel.org/r/1435594585.650325975.1707867511062.JavaMail.zimbra@free.fr Signed-off-by: Takashi Iwai <tiwai@suse.de> --- sound/pci/hda/cs35l41_hda_property.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/pci/hda/cs35l41_hda_property.c b/sound/pci/hda/cs35l41_hda_property.c index 57b21285ab6a..87dcb367e239 100644 --- a/sound/pci/hda/cs35l41_hda_property.c +++ b/sound/pci/hda/cs35l41_hda_property.c @@ -91,6 +91,7 @@ static const struct cs35l41_config cs35l41_config_table[] = { { "10431D1F", 2, INTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 0, 1, -1, 1000, 4500, 24 }, { "10431DA2", 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 1, 2, 0, 0, 0, 0 }, { "10431E02", 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 1, 2, 0, 0, 0, 0 }, + { "10431E12", 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 0, 1, -1, 0, 0, 0 }, { "10431EE2", 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 0, -1, -1, 0, 0, 0 }, { "10431F12", 2, INTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 0, 1, -1, 1000, 4500, 24 }, { "10431F1F", 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 1, -1, 0, 0, 0, 0 }, @@ -428,6 +429,7 @@ static const struct cs35l41_prop_model cs35l41_prop_model_table[] = { { "CSC3551", "10431D1F", generic_dsd_config }, { "CSC3551", "10431DA2", generic_dsd_config }, { "CSC3551", "10431E02", generic_dsd_config }, + { "CSC3551", "10431E12", generic_dsd_config }, { "CSC3551", "10431EE2", generic_dsd_config }, { "CSC3551", "10431F12", generic_dsd_config }, { "CSC3551", "10431F1F", generic_dsd_config }, From b91050448897663b60b6d15525c8c3ecae28a368 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jean-Lo=C3=AFc=20Charroud?= <lagiraudiere+linux@free.fr> Date: Wed, 14 Feb 2024 00:42:12 +0100 Subject: [PATCH 173/304] ALSA: hda/realtek: cs35l41: Fix device ID / model name MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The patch 51d976079976c800ef19ed1b542602fcf63f0edb ("ALSA: hda/realtek: Add quirks for ASUS Zenbook 2022 Models") modified the entry 1043:1e2e from "ASUS UM3402" to "ASUS UM6702RA/RC" and added another entry for "ASUS UM3402" with 104e:1ee2. The first entry was correct, while the new one corresponds to model "ASUS UM6702RA/RC" Fix the model names for both devices. Fixes: 51d976079976 ("ALSA: hda/realtek: Add quirks for ASUS Zenbook 2022 Models") Signed-off-by: Jean-Loïc Charroud <lagiraudiere+linux@free.fr> Link: https://lore.kernel.org/r/1656546983.650349575.1707867732866.JavaMail.zimbra@free.fr Signed-off-by: Takashi Iwai <tiwai@suse.de> --- sound/pci/hda/patch_realtek.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 98886b803bc5..c8adef01e9ec 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -10051,11 +10051,11 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x16a3, "ASUS UX3402VA", ALC245_FIXUP_CS35L41_SPI_2), SND_PCI_QUIRK(0x1043, 0x1f62, "ASUS UX7602ZM", ALC245_FIXUP_CS35L41_SPI_2), SND_PCI_QUIRK(0x1043, 0x1e11, "ASUS Zephyrus G15", ALC289_FIXUP_ASUS_GA502), - SND_PCI_QUIRK(0x1043, 0x1e12, "ASUS UM6702RA/RC", ALC287_FIXUP_CS35L41_I2C_2), + SND_PCI_QUIRK(0x1043, 0x1e12, "ASUS UM3402", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x1043, 0x1e51, "ASUS Zephyrus M15", ALC294_FIXUP_ASUS_GU502_PINS), SND_PCI_QUIRK(0x1043, 0x1e5e, "ASUS ROG Strix G513", ALC294_FIXUP_ASUS_G513_PINS), SND_PCI_QUIRK(0x1043, 0x1e8e, "ASUS Zephyrus G15", ALC289_FIXUP_ASUS_GA401), - SND_PCI_QUIRK(0x1043, 0x1ee2, "ASUS UM3402", ALC287_FIXUP_CS35L41_I2C_2), + SND_PCI_QUIRK(0x1043, 0x1ee2, "ASUS UM6702RA/RC", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x1043, 0x1c52, "ASUS Zephyrus G15 2022", ALC289_FIXUP_ASUS_GA401), SND_PCI_QUIRK(0x1043, 0x1f11, "ASUS Zephyrus G14", ALC289_FIXUP_ASUS_GA401), SND_PCI_QUIRK(0x1043, 0x1f12, "ASUS UM5302", ALC287_FIXUP_CS35L41_I2C_2), From 852d432a14dbcd34e15a3a3910c5c6869a6d1929 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jean-Lo=C3=AFc=20Charroud?= <lagiraudiere+linux@free.fr> Date: Wed, 14 Feb 2024 00:44:24 +0100 Subject: [PATCH 174/304] ALSA: hda/realtek: cs35l41: Fix order and duplicates in quirks table MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move entry {0x1043, 0x16a3, "ASUS UX3402VA"} following device ID order. Remove duplicate entry for device {0x1043, 0x1f62, "ASUS UX7602ZM"}. Fixes: 51d976079976 ("ALSA: hda/realtek: Add quirks for ASUS Zenbook 2022 Models") Signed-off-by: Jean-Loïc Charroud <lagiraudiere+linux@free.fr> Link: https://lore.kernel.org/r/1969151851.650354669.1707867864074.JavaMail.zimbra@free.fr Signed-off-by: Takashi Iwai <tiwai@suse.de> --- sound/pci/hda/patch_realtek.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index c8adef01e9ec..5d0517465f08 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -10005,6 +10005,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x1662, "ASUS GV301QH", ALC294_FIXUP_ASUS_DUAL_SPK), SND_PCI_QUIRK(0x1043, 0x1663, "ASUS GU603ZI/ZJ/ZQ/ZU/ZV", ALC285_FIXUP_ASUS_HEADSET_MIC), SND_PCI_QUIRK(0x1043, 0x1683, "ASUS UM3402YAR", ALC287_FIXUP_CS35L41_I2C_2), + SND_PCI_QUIRK(0x1043, 0x16a3, "ASUS UX3402VA", ALC245_FIXUP_CS35L41_SPI_2), SND_PCI_QUIRK(0x1043, 0x16b2, "ASUS GU603", ALC289_FIXUP_ASUS_GA401), SND_PCI_QUIRK(0x1043, 0x16d3, "ASUS UX5304VA", ALC245_FIXUP_CS35L41_SPI_2), SND_PCI_QUIRK(0x1043, 0x16e3, "ASUS UX50", ALC269_FIXUP_STEREO_DMIC), @@ -10048,8 +10049,6 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x1d4e, "ASUS TM420", ALC256_FIXUP_ASUS_HPE), SND_PCI_QUIRK(0x1043, 0x1da2, "ASUS UP6502ZA/ZD", ALC245_FIXUP_CS35L41_SPI_2), SND_PCI_QUIRK(0x1043, 0x1e02, "ASUS UX3402ZA", ALC245_FIXUP_CS35L41_SPI_2), - SND_PCI_QUIRK(0x1043, 0x16a3, "ASUS UX3402VA", ALC245_FIXUP_CS35L41_SPI_2), - SND_PCI_QUIRK(0x1043, 0x1f62, "ASUS UX7602ZM", ALC245_FIXUP_CS35L41_SPI_2), SND_PCI_QUIRK(0x1043, 0x1e11, "ASUS Zephyrus G15", ALC289_FIXUP_ASUS_GA502), SND_PCI_QUIRK(0x1043, 0x1e12, "ASUS UM3402", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x1043, 0x1e51, "ASUS Zephyrus M15", ALC294_FIXUP_ASUS_GU502_PINS), From eb5c7465c3240151cd42a55c7ace9da0026308a1 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann <arnd@arndb.de> Date: Tue, 13 Feb 2024 11:07:13 +0100 Subject: [PATCH 175/304] RDMA/srpt: fix function pointer cast warnings clang-16 notices that srpt_qp_event() gets called through an incompatible pointer here: drivers/infiniband/ulp/srpt/ib_srpt.c:1815:5: error: cast from 'void (*)(struct ib_event *, struct srpt_rdma_ch *)' to 'void (*)(struct ib_event *, void *)' converts to incompatible function type [-Werror,-Wcast-function-type-strict] 1815 | = (void(*)(struct ib_event *, void*))srpt_qp_event; Change srpt_qp_event() to use the correct prototype and adjust the argument inside of it. Fixes: a42d985bd5b2 ("ib_srpt: Initial SRP Target merge for v3.3-rc1") Signed-off-by: Arnd Bergmann <arnd@arndb.de> Link: https://lore.kernel.org/r/20240213100728.458348-1-arnd@kernel.org Reviewed-by: Bart Van Assche <bvanassche@acm.org> Signed-off-by: Leon Romanovsky <leon@kernel.org> --- drivers/infiniband/ulp/srpt/ib_srpt.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index d2dce6ce30a9..040234c01be4 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -214,10 +214,12 @@ static const char *get_ch_state_name(enum rdma_ch_state s) /** * srpt_qp_event - QP event callback function * @event: Description of the event that occurred. - * @ch: SRPT RDMA channel. + * @ptr: SRPT RDMA channel. */ -static void srpt_qp_event(struct ib_event *event, struct srpt_rdma_ch *ch) +static void srpt_qp_event(struct ib_event *event, void *ptr) { + struct srpt_rdma_ch *ch = ptr; + pr_debug("QP event %d on ch=%p sess_name=%s-%d state=%s\n", event->event, ch, ch->sess_name, ch->qp->qp_num, get_ch_state_name(ch->state)); @@ -1811,8 +1813,7 @@ retry: ch->cq_size = ch->rq_size + sq_size; qp_init->qp_context = (void *)ch; - qp_init->event_handler - = (void(*)(struct ib_event *, void*))srpt_qp_event; + qp_init->event_handler = srpt_qp_event; qp_init->send_cq = ch->cq; qp_init->recv_cq = ch->cq; qp_init->sq_sig_type = IB_SIGNAL_REQ_WR; From 0846dd77c8349ec92ca0079c9c71d130f34cb192 Mon Sep 17 00:00:00 2001 From: Shivaprasad G Bhat <sbhat@linux.ibm.com> Date: Tue, 13 Feb 2024 10:05:22 -0600 Subject: [PATCH 176/304] powerpc/iommu: Fix the missing iommu_group_put() during platform domain attach The function spapr_tce_platform_iommu_attach_dev() is missing to call iommu_group_put() when the domain is already set. This refcount leak shows up with BUG_ON() during DLPAR remove operation as: KernelBug: Kernel bug in state 'None': kernel BUG at arch/powerpc/platforms/pseries/iommu.c:100! Oops: Exception in kernel mode, sig: 5 [#1] LE PAGE_SIZE=64K MMU=Radix SMP NR_CPUS=8192 NUMA pSeries <snip> Hardware name: IBM,9080-HEX POWER10 (raw) 0x800200 0xf000006 of:IBM,FW1060.00 (NH1060_016) hv:phyp pSeries NIP: c0000000000ff4d4 LR: c0000000000ff4cc CTR: 0000000000000000 REGS: c0000013aed5f840 TRAP: 0700 Tainted: G I (6.8.0-rc3-autotest-g99bd3cb0d12e) MSR: 8000000000029033 <SF,EE,ME,IR,DR,RI,LE> CR: 44002402 XER: 20040000 CFAR: c000000000a0d170 IRQMASK: 0 ... NIP iommu_reconfig_notifier+0x94/0x200 LR iommu_reconfig_notifier+0x8c/0x200 Call Trace: iommu_reconfig_notifier+0x8c/0x200 (unreliable) notifier_call_chain+0xb8/0x19c blocking_notifier_call_chain+0x64/0x98 of_reconfig_notify+0x44/0xdc of_detach_node+0x78/0xb0 ofdt_write.part.0+0x86c/0xbb8 proc_reg_write+0xf4/0x150 vfs_write+0xf8/0x488 ksys_write+0x84/0x140 system_call_exception+0x138/0x330 system_call_vectored_common+0x15c/0x2ec The patch adds the missing iommu_group_put() call. Fixes: a8ca9fc9134c ("powerpc/iommu: Do not do platform domain attach atctions after probe") Reported-by: Venkat Rao Bagalkote <venkat88@linux.vnet.ibm.com> Closes: https://lore.kernel.org/all/274e0d2b-b5cc-475e-94e6-8427e88e271d@linux.vnet.ibm.com/ Signed-off-by: Shivaprasad G Bhat <sbhat@linux.ibm.com> Tested-by: Venkat Rao Bagalkote <venkat88@linux.vnet.ibm.com> Reviewed-by: Jason Gunthorpe <jgg@nvidia.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://msgid.link/170784021983.6249.10039296655906636112.stgit@linux.ibm.com --- arch/powerpc/kernel/iommu.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index ebe259bdd462..df17b33b89d1 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -1290,8 +1290,10 @@ spapr_tce_platform_iommu_attach_dev(struct iommu_domain *platform_domain, int ret = -EINVAL; /* At first attach the ownership is already set */ - if (!domain) + if (!domain) { + iommu_group_put(grp); return 0; + } if (!grp) return -ENODEV; From 8746c6c9dfa31d269c65dd52ab42fde0720b7d91 Mon Sep 17 00:00:00 2001 From: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam@amd.com> Date: Wed, 14 Feb 2024 18:48:52 +0530 Subject: [PATCH 177/304] drm/buddy: Fix alloc_range() error handling code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Few users have observed display corruption when they boot the machine to KDE Plasma or playing games. We have root caused the problem that whenever alloc_range() couldn't find the required memory blocks the function was returning SUCCESS in some of the corner cases. The right approach would be if the total allocated size is less than the required size, the function should return -ENOSPC. Cc: <stable@vger.kernel.org> # 6.7+ Fixes: 0a1844bf0b53 ("drm/buddy: Improve contiguous memory allocation") Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3097 Tested-by: Mario Limonciello <mario.limonciello@amd.com> Link: https://patchwork.kernel.org/project/dri-devel/patch/20240207174456.341121-1-Arunpravin.PaneerSelvam@amd.com/ Acked-by: Christian König <christian.koenig@amd.com> Reviewed-by: Matthew Auld <matthew.auld@intel.com> Signed-off-by: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam@amd.com> Link: https://patchwork.freedesktop.org/patch/msgid/20240214131853.5934-1-Arunpravin.PaneerSelvam@amd.com Signed-off-by: Christian König <christian.koenig@amd.com> --- drivers/gpu/drm/drm_buddy.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c index f57e6d74fb0e..c1a99bf4dffd 100644 --- a/drivers/gpu/drm/drm_buddy.c +++ b/drivers/gpu/drm/drm_buddy.c @@ -539,6 +539,12 @@ static int __alloc_range(struct drm_buddy *mm, } while (1); list_splice_tail(&allocated, blocks); + + if (total_allocated < size) { + err = -ENOSPC; + goto err_free; + } + return 0; err_undo: From a64056bb5a3215bd31c8ce17d609ba0f4d5c55ea Mon Sep 17 00:00:00 2001 From: Matthew Auld <matthew.auld@intel.com> Date: Wed, 14 Feb 2024 18:48:53 +0530 Subject: [PATCH 178/304] drm/tests/drm_buddy: add alloc_contiguous test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Sanity check DRM_BUDDY_CONTIGUOUS_ALLOCATION. v2: Fix checkpatch warnings. Signed-off-by: Matthew Auld <matthew.auld@intel.com> Reviewed-by: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam@amd.com> Signed-off-by: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam@amd.com> Link: https://patchwork.freedesktop.org/patch/msgid/20240214131853.5934-2-Arunpravin.PaneerSelvam@amd.com Signed-off-by: Christian König <christian.koenig@amd.com> --- drivers/gpu/drm/tests/drm_buddy_test.c | 89 ++++++++++++++++++++++++++ 1 file changed, 89 insertions(+) diff --git a/drivers/gpu/drm/tests/drm_buddy_test.c b/drivers/gpu/drm/tests/drm_buddy_test.c index ea2af6bd9abe..fee6bec757d1 100644 --- a/drivers/gpu/drm/tests/drm_buddy_test.c +++ b/drivers/gpu/drm/tests/drm_buddy_test.c @@ -8,6 +8,7 @@ #include <linux/prime_numbers.h> #include <linux/sched/signal.h> +#include <linux/sizes.h> #include <drm/drm_buddy.h> @@ -18,6 +19,93 @@ static inline u64 get_size(int order, u64 chunk_size) return (1 << order) * chunk_size; } +static void drm_test_buddy_alloc_contiguous(struct kunit *test) +{ + u64 mm_size, ps = SZ_4K, i, n_pages, total; + struct drm_buddy_block *block; + struct drm_buddy mm; + LIST_HEAD(left); + LIST_HEAD(middle); + LIST_HEAD(right); + LIST_HEAD(allocated); + + mm_size = 16 * 3 * SZ_4K; + + KUNIT_EXPECT_FALSE(test, drm_buddy_init(&mm, mm_size, ps)); + + /* + * Idea is to fragment the address space by alternating block + * allocations between three different lists; one for left, middle and + * right. We can then free a list to simulate fragmentation. In + * particular we want to exercise the DRM_BUDDY_CONTIGUOUS_ALLOCATION, + * including the try_harder path. + */ + + i = 0; + n_pages = mm_size / ps; + do { + struct list_head *list; + int slot = i % 3; + + if (slot == 0) + list = &left; + else if (slot == 1) + list = &middle; + else + list = &right; + KUNIT_ASSERT_FALSE_MSG(test, + drm_buddy_alloc_blocks(&mm, 0, mm_size, + ps, ps, list, 0), + "buddy_alloc hit an error size=%d\n", + ps); + } while (++i < n_pages); + + KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size, + 3 * ps, ps, &allocated, + DRM_BUDDY_CONTIGUOUS_ALLOCATION), + "buddy_alloc didn't error size=%d\n", 3 * ps); + + drm_buddy_free_list(&mm, &middle); + KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size, + 3 * ps, ps, &allocated, + DRM_BUDDY_CONTIGUOUS_ALLOCATION), + "buddy_alloc didn't error size=%llu\n", 3 * ps); + KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size, + 2 * ps, ps, &allocated, + DRM_BUDDY_CONTIGUOUS_ALLOCATION), + "buddy_alloc didn't error size=%llu\n", 2 * ps); + + drm_buddy_free_list(&mm, &right); + KUNIT_ASSERT_TRUE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size, + 3 * ps, ps, &allocated, + DRM_BUDDY_CONTIGUOUS_ALLOCATION), + "buddy_alloc didn't error size=%llu\n", 3 * ps); + /* + * At this point we should have enough contiguous space for 2 blocks, + * however they are never buddies (since we freed middle and right) so + * will require the try_harder logic to find them. + */ + KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size, + 2 * ps, ps, &allocated, + DRM_BUDDY_CONTIGUOUS_ALLOCATION), + "buddy_alloc hit an error size=%d\n", 2 * ps); + + drm_buddy_free_list(&mm, &left); + KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_alloc_blocks(&mm, 0, mm_size, + 3 * ps, ps, &allocated, + DRM_BUDDY_CONTIGUOUS_ALLOCATION), + "buddy_alloc hit an error size=%d\n", 3 * ps); + + total = 0; + list_for_each_entry(block, &allocated, link) + total += drm_buddy_block_size(&mm, block); + + KUNIT_ASSERT_EQ(test, total, ps * 2 + ps * 3); + + drm_buddy_free_list(&mm, &allocated); + drm_buddy_fini(&mm); +} + static void drm_test_buddy_alloc_pathological(struct kunit *test) { u64 mm_size, size, start = 0; @@ -280,6 +368,7 @@ static struct kunit_case drm_buddy_tests[] = { KUNIT_CASE(drm_test_buddy_alloc_optimistic), KUNIT_CASE(drm_test_buddy_alloc_pessimistic), KUNIT_CASE(drm_test_buddy_alloc_pathological), + KUNIT_CASE(drm_test_buddy_alloc_contiguous), {} }; From e20f378d993b1034eebe3ae78e67f3ed10e75356 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann <arnd@arndb.de> Date: Fri, 9 Feb 2024 16:34:54 +0000 Subject: [PATCH 179/304] nvmem: include bit index in cell sysfs file name MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Creating sysfs files for all Cells caused a boot failure for linux-6.8-rc1 on Apple M1, which (in downstream dts files) has multiple nvmem cells that use the same byte address. This causes the device probe to fail with [ 0.605336] sysfs: cannot create duplicate filename '/devices/platform/soc@200000000/2922bc000.efuse/apple_efuses_nvmem0/cells/efuse@a10' [ 0.605347] CPU: 7 PID: 1 Comm: swapper/0 Tainted: G S 6.8.0-rc1-arnd-5+ #133 [ 0.605355] Hardware name: Apple Mac Studio (M1 Ultra, 2022) (DT) [ 0.605362] Call trace: [ 0.605365] show_stack+0x18/0x2c [ 0.605374] dump_stack_lvl+0x60/0x80 [ 0.605383] dump_stack+0x18/0x24 [ 0.605388] sysfs_warn_dup+0x64/0x80 [ 0.605395] sysfs_add_bin_file_mode_ns+0xb0/0xd4 [ 0.605402] internal_create_group+0x268/0x404 [ 0.605409] sysfs_create_groups+0x38/0x94 [ 0.605415] devm_device_add_groups+0x50/0x94 [ 0.605572] nvmem_populate_sysfs_cells+0x180/0x1b0 [ 0.605682] nvmem_register+0x38c/0x470 [ 0.605789] devm_nvmem_register+0x1c/0x6c [ 0.605895] apple_efuses_probe+0xe4/0x120 [ 0.606000] platform_probe+0xa8/0xd0 As far as I can tell, this is a problem for any device with multiple cells on different bits of the same address. Avoid the issue by changing the file name to include the first bit number. Fixes: 0331c611949f ("nvmem: core: Expose cells through sysfs") Link: https://github.com/AsahiLinux/linux/blob/bd0a1a7d4/arch/arm64/boot/dts/apple/t600x-dieX.dtsi#L156 Cc: <regressions@lists.linux.dev> Cc: Miquel Raynal <miquel.raynal@bootlin.com> Cc: Rafał Miłecki <rafal@milecki.pl> Cc: Chen-Yu Tsai <wenst@chromium.org> Cc: Srinivas Kandagatla <srinivas.kandagatla@linaro.org> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: <asahi@lists.linux.dev> Cc: Sven Peter <sven@svenpeter.dev> Signed-off-by: Arnd Bergmann <arnd@arndb.de> Signed-off-by: Srinivas Kandagatla <srinivas.kandagatla@linaro.org> Reviewed-by: Eric Curtin <ecurtin@redhat.com> Reviewed-by: Miquel Raynal <miquel.raynal@bootlin.com> Link: https://lore.kernel.org/r/20240209163454.98051-1-srinivas.kandagatla@linaro.org Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> --- Documentation/ABI/testing/sysfs-nvmem-cells | 16 ++++++++-------- drivers/nvmem/core.c | 5 +++-- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-nvmem-cells b/Documentation/ABI/testing/sysfs-nvmem-cells index 7af70adf3690..c7c9444f92a8 100644 --- a/Documentation/ABI/testing/sysfs-nvmem-cells +++ b/Documentation/ABI/testing/sysfs-nvmem-cells @@ -4,18 +4,18 @@ KernelVersion: 6.5 Contact: Miquel Raynal <miquel.raynal@bootlin.com> Description: The "cells" folder contains one file per cell exposed by the - NVMEM device. The name of the file is: <name>@<where>, with - <name> being the cell name and <where> its location in the NVMEM - device, in hexadecimal (without the '0x' prefix, to mimic device - tree node names). The length of the file is the size of the cell - (when known). The content of the file is the binary content of - the cell (may sometimes be ASCII, likely without trailing - character). + NVMEM device. The name of the file is: "<name>@<byte>,<bit>", + with <name> being the cell name and <where> its location in + the NVMEM device, in hexadecimal bytes and bits (without the + '0x' prefix, to mimic device tree node names). The length of + the file is the size of the cell (when known). The content of + the file is the binary content of the cell (may sometimes be + ASCII, likely without trailing character). Note: This file is only present if CONFIG_NVMEM_SYSFS is enabled. Example:: - hexdump -C /sys/bus/nvmem/devices/1-00563/cells/product-name@d + hexdump -C /sys/bus/nvmem/devices/1-00563/cells/product-name@d,0 00000000 54 4e 34 38 4d 2d 50 2d 44 4e |TN48M-P-DN| 0000000a diff --git a/drivers/nvmem/core.c b/drivers/nvmem/core.c index 980123fb4dde..eb357ac2e54a 100644 --- a/drivers/nvmem/core.c +++ b/drivers/nvmem/core.c @@ -460,8 +460,9 @@ static int nvmem_populate_sysfs_cells(struct nvmem_device *nvmem) list_for_each_entry(entry, &nvmem->cells, node) { sysfs_bin_attr_init(&attrs[i]); attrs[i].attr.name = devm_kasprintf(&nvmem->dev, GFP_KERNEL, - "%s@%x", entry->name, - entry->offset); + "%s@%x,%x", entry->name, + entry->offset, + entry->bit_offset); attrs[i].attr.mode = 0444; attrs[i].size = entry->bytes; attrs[i].read = &nvmem_cell_attr_read; From e37243b65d528a8a9f8b9a57a43885f8e8dfc15c Mon Sep 17 00:00:00 2001 From: Gianmarco Lusvardi <glusvardi@posteo.net> Date: Tue, 13 Feb 2024 23:05:46 +0000 Subject: [PATCH 180/304] bpf, scripts: Correct GPL license name The bpf_doc script refers to the GPL as the "GNU Privacy License". I strongly suspect that the author wanted to refer to the GNU General Public License, under which the Linux kernel is released, as, to the best of my knowledge, there is no license named "GNU Privacy License". This patch corrects the license name in the script accordingly. Fixes: 56a092c89505 ("bpf: add script and prepare bpf.h for new helpers documentation") Signed-off-by: Gianmarco Lusvardi <glusvardi@posteo.net> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Reviewed-by: Quentin Monnet <quentin@isovalent.com> Link: https://lore.kernel.org/bpf/20240213230544.930018-3-glusvardi@posteo.net --- scripts/bpf_doc.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/bpf_doc.py b/scripts/bpf_doc.py index 61b7dddedc46..0669bac5e900 100755 --- a/scripts/bpf_doc.py +++ b/scripts/bpf_doc.py @@ -513,7 +513,7 @@ eBPF programs can have an associated license, passed along with the bytecode instructions to the kernel when the programs are loaded. The format for that string is identical to the one in use for kernel modules (Dual licenses, such as "Dual BSD/GPL", may be used). Some helper functions are only accessible to -programs that are compatible with the GNU Privacy License (GPL). +programs that are compatible with the GNU General Public License (GNU GPL). In order to use such helpers, the eBPF program must be loaded with the correct license string passed (via **attr**) to the **bpf**\\ () system call, and this From 2394ac4145ea91b92271e675a09af2a9ea6840b7 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" <rostedt@goodmis.org> Date: Wed, 14 Feb 2024 11:20:46 -0500 Subject: [PATCH 181/304] tracing: Inform kmemleak of saved_cmdlines allocation The allocation of the struct saved_cmdlines_buffer structure changed from: s = kmalloc(sizeof(*s), GFP_KERNEL); s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL); to: orig_size = sizeof(*s) + val * TASK_COMM_LEN; order = get_order(orig_size); size = 1 << (order + PAGE_SHIFT); page = alloc_pages(GFP_KERNEL, order); if (!page) return NULL; s = page_address(page); memset(s, 0, sizeof(*s)); s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL); Where that s->saved_cmdlines allocation looks to be a dangling allocation to kmemleak. That's because kmemleak only keeps track of kmalloc() allocations. For allocations that use page_alloc() directly, the kmemleak needs to be explicitly informed about it. Add kmemleak_alloc() and kmemleak_free() around the page allocation so that it doesn't give the following false positive: unreferenced object 0xffff8881010c8000 (size 32760): comm "swapper", pid 0, jiffies 4294667296 hex dump (first 32 bytes): ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ................ ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ................ backtrace (crc ae6ec1b9): [<ffffffff86722405>] kmemleak_alloc+0x45/0x80 [<ffffffff8414028d>] __kmalloc_large_node+0x10d/0x190 [<ffffffff84146ab1>] __kmalloc+0x3b1/0x4c0 [<ffffffff83ed7103>] allocate_cmdlines_buffer+0x113/0x230 [<ffffffff88649c34>] tracer_alloc_buffers.isra.0+0x124/0x460 [<ffffffff8864a174>] early_trace_init+0x14/0xa0 [<ffffffff885dd5ae>] start_kernel+0x12e/0x3c0 [<ffffffff885f5758>] x86_64_start_reservations+0x18/0x30 [<ffffffff885f582b>] x86_64_start_kernel+0x7b/0x80 [<ffffffff83a001c3>] secondary_startup_64_no_verify+0x15e/0x16b Link: https://lore.kernel.org/linux-trace-kernel/87r0hfnr9r.fsf@kernel.org/ Link: https://lore.kernel.org/linux-trace-kernel/20240214112046.09a322d6@gandalf.local.home Cc: Masami Hiramatsu <mhiramat@kernel.org> Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com> Cc: Catalin Marinas <catalin.marinas@arm.com> Fixes: 44dc5c41b5b1 ("tracing: Fix wasted memory in saved_cmdlines logic") Reported-by: Kalle Valo <kvalo@kernel.org> Tested-by: Kalle Valo <kvalo@kernel.org> Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org> --- kernel/trace/trace.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index aa54810e8b56..8198bfc54b58 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -39,6 +39,7 @@ #include <linux/ctype.h> #include <linux/init.h> #include <linux/panic_notifier.h> +#include <linux/kmemleak.h> #include <linux/poll.h> #include <linux/nmi.h> #include <linux/fs.h> @@ -2339,6 +2340,7 @@ static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s) int order = get_order(sizeof(*s) + s->cmdline_num * TASK_COMM_LEN); kfree(s->map_cmdline_to_pid); + kmemleak_free(s); free_pages((unsigned long)s, order); } @@ -2358,6 +2360,7 @@ static struct saved_cmdlines_buffer *allocate_cmdlines_buffer(unsigned int val) return NULL; s = page_address(page); + kmemleak_alloc(s, size, 1, GFP_KERNEL); memset(s, 0, sizeof(*s)); /* Round up to actual allocation */ From d8bdd795d383a23e38ac48a40d3d223caf47b290 Mon Sep 17 00:00:00 2001 From: Jann Horn <jannh@google.com> Date: Wed, 14 Feb 2024 17:05:38 +0100 Subject: [PATCH 182/304] lsm: fix integer overflow in lsm_set_self_attr() syscall security_setselfattr() has an integer overflow bug that leads to out-of-bounds access when userspace provides bogus input: `lctx->ctx_len + sizeof(*lctx)` is checked against `lctx->len` (and, redundantly, also against `size`), but there are no checks on `lctx->ctx_len`. Therefore, userspace can provide an `lsm_ctx` with `->ctx_len` set to a value between `-sizeof(struct lsm_ctx)` and -1, and this bogus `->ctx_len` will then be passed to an LSM module as a buffer length, causing LSM modules to perform out-of-bounds accesses. The following reproducer will demonstrate this under ASAN (if AppArmor is loaded as an LSM): ``` struct lsm_ctx { uint64_t id; uint64_t flags; uint64_t len; uint64_t ctx_len; char ctx[]; }; int main(void) { size_t size = sizeof(struct lsm_ctx); struct lsm_ctx *ctx = malloc(size); ctx->id = 104/*LSM_ID_APPARMOR*/; ctx->flags = 0; ctx->len = size; ctx->ctx_len = -sizeof(struct lsm_ctx); syscall( 460/*__NR_lsm_set_self_attr*/, /*attr=*/ 100/*LSM_ATTR_CURRENT*/, /*ctx=*/ ctx, /*size=*/ size, /*flags=*/ 0 ); } ``` Fixes: a04a1198088a ("LSM: syscalls for current process attributes") Signed-off-by: Jann Horn <jannh@google.com> Acked-by: Casey Schaufler <casey@schaufler-ca.com> [PM: subj tweak, removed ref to ASAN splat that isn't included] Signed-off-by: Paul Moore <paul@paul-moore.com> --- security/security.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/security/security.c b/security/security.c index 3aaad75c9ce8..7035ee35a393 100644 --- a/security/security.c +++ b/security/security.c @@ -29,6 +29,7 @@ #include <linux/backing-dev.h> #include <linux/string.h> #include <linux/msg.h> +#include <linux/overflow.h> #include <net/flow.h> /* How many LSMs were built into the kernel? */ @@ -4015,6 +4016,7 @@ int security_setselfattr(unsigned int attr, struct lsm_ctx __user *uctx, struct security_hook_list *hp; struct lsm_ctx *lctx; int rc = LSM_RET_DEFAULT(setselfattr); + u64 required_len; if (flags) return -EINVAL; @@ -4027,8 +4029,9 @@ int security_setselfattr(unsigned int attr, struct lsm_ctx __user *uctx, if (IS_ERR(lctx)) return PTR_ERR(lctx); - if (size < lctx->len || size < lctx->ctx_len + sizeof(*lctx) || - lctx->len < lctx->ctx_len + sizeof(*lctx)) { + if (size < lctx->len || + check_add_overflow(sizeof(*lctx), lctx->ctx_len, &required_len) || + lctx->len < required_len) { rc = -EINVAL; goto free_out; } From 3b9ab248bc45abf8c2365ed3eec86cdefd4d626a Mon Sep 17 00:00:00 2001 From: Masahiro Yamada <masahiroy@kernel.org> Date: Fri, 2 Feb 2024 10:31:42 +0900 Subject: [PATCH 183/304] kbuild: use 4-space indentation when followed by conditionals GNU Make manual [1] clearly forbids a tab at the beginning of the conditional directive line: "Extra spaces are allowed and ignored at the beginning of the conditional directive line, but a tab is not allowed." This will not work for the next release of GNU Make, hence commit 82175d1f9430 ("kbuild: Replace tabs with spaces when followed by conditionals") replaced the inappropriate tabs with 8 spaces. However, the 8-space indentation cannot be visually distinguished. Linus suggested 2-4 spaces for those nested if-statements. [2] This commit redoes the replacement with 4 spaces. [1]: https://www.gnu.org/software/make/manual/make.html#Conditional-Syntax [2]: https://lore.kernel.org/all/CAHk-=whJKZNZWsa-VNDKafS_VfY4a5dAjG-r8BZgWk_a-xSepw@mail.gmail.com/ Suggested-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Masahiro Yamada <masahiroy@kernel.org> --- Makefile | 12 ++++++------ arch/m68k/Makefile | 4 ++-- arch/parisc/Makefile | 4 ++-- arch/x86/Makefile | 8 ++++---- 4 files changed, 14 insertions(+), 14 deletions(-) diff --git a/Makefile b/Makefile index 7e0b2ad98905..ed80c7d98a7e 100644 --- a/Makefile +++ b/Makefile @@ -294,15 +294,15 @@ may-sync-config := 1 single-build := ifneq ($(filter $(no-dot-config-targets), $(MAKECMDGOALS)),) - ifeq ($(filter-out $(no-dot-config-targets), $(MAKECMDGOALS)),) + ifeq ($(filter-out $(no-dot-config-targets), $(MAKECMDGOALS)),) need-config := - endif + endif endif ifneq ($(filter $(no-sync-config-targets), $(MAKECMDGOALS)),) - ifeq ($(filter-out $(no-sync-config-targets), $(MAKECMDGOALS)),) + ifeq ($(filter-out $(no-sync-config-targets), $(MAKECMDGOALS)),) may-sync-config := - endif + endif endif need-compiler := $(may-sync-config) @@ -323,9 +323,9 @@ endif # We cannot build single targets and the others at the same time ifneq ($(filter $(single-targets), $(MAKECMDGOALS)),) single-build := 1 - ifneq ($(filter-out $(single-targets), $(MAKECMDGOALS)),) + ifneq ($(filter-out $(single-targets), $(MAKECMDGOALS)),) mixed-build := 1 - endif + endif endif # For "make -j clean all", "make -j mrproper defconfig all", etc. diff --git a/arch/m68k/Makefile b/arch/m68k/Makefile index 76ef1a67c361..0abcf994ce55 100644 --- a/arch/m68k/Makefile +++ b/arch/m68k/Makefile @@ -15,10 +15,10 @@ KBUILD_DEFCONFIG := multi_defconfig ifdef cross_compiling - ifeq ($(CROSS_COMPILE),) + ifeq ($(CROSS_COMPILE),) CROSS_COMPILE := $(call cc-cross-prefix, \ m68k-linux-gnu- m68k-linux- m68k-unknown-linux-gnu-) - endif + endif endif # diff --git a/arch/parisc/Makefile b/arch/parisc/Makefile index 7486b3b30594..316f84f1d15c 100644 --- a/arch/parisc/Makefile +++ b/arch/parisc/Makefile @@ -50,12 +50,12 @@ export CROSS32CC # Set default cross compiler for kernel build ifdef cross_compiling - ifeq ($(CROSS_COMPILE),) + ifeq ($(CROSS_COMPILE),) CC_SUFFIXES = linux linux-gnu unknown-linux-gnu suse-linux CROSS_COMPILE := $(call cc-cross-prefix, \ $(foreach a,$(CC_ARCHES), \ $(foreach s,$(CC_SUFFIXES),$(a)-$(s)-))) - endif + endif endif ifdef CONFIG_DYNAMIC_FTRACE diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 2264db14a25d..da8f3caf2781 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -112,13 +112,13 @@ ifeq ($(CONFIG_X86_32),y) # temporary until string.h is fixed KBUILD_CFLAGS += -ffreestanding - ifeq ($(CONFIG_STACKPROTECTOR),y) - ifeq ($(CONFIG_SMP),y) + ifeq ($(CONFIG_STACKPROTECTOR),y) + ifeq ($(CONFIG_SMP),y) KBUILD_CFLAGS += -mstack-protector-guard-reg=fs -mstack-protector-guard-symbol=__stack_chk_guard - else + else KBUILD_CFLAGS += -mstack-protector-guard=global - endif endif + endif else BITS := 64 UTS_MACHINE := x86_64 From f44bff19268517ee98e80e944cad0f04f1db72e3 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann <arnd@arndb.de> Date: Mon, 12 Feb 2024 12:19:04 +0100 Subject: [PATCH 184/304] i2c: pasemi: split driver into two separate modules On powerpc, it is possible to compile test both the new apple (arm) and old pasemi (powerpc) drivers for the i2c hardware at the same time, which leads to a warning about linking the same object file twice: scripts/Makefile.build:244: drivers/i2c/busses/Makefile: i2c-pasemi-core.o is added to multiple modules: i2c-apple i2c-pasemi Rework the driver to have an explicit helper module, letting Kbuild take care of whether this should be built-in or a loadable driver. Fixes: 9bc5f4f660ff ("i2c: pasemi: Split pci driver to its own file") Signed-off-by: Arnd Bergmann <arnd@arndb.de> Reviewed-by: Sven Peter <sven@svenpeter.dev> Signed-off-by: Andi Shyti <andi.shyti@kernel.org> --- drivers/i2c/busses/Makefile | 6 ++---- drivers/i2c/busses/i2c-pasemi-core.c | 6 ++++++ 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/i2c/busses/Makefile b/drivers/i2c/busses/Makefile index 3757b9391e60..aa0ee8ecd6f2 100644 --- a/drivers/i2c/busses/Makefile +++ b/drivers/i2c/busses/Makefile @@ -90,10 +90,8 @@ obj-$(CONFIG_I2C_NPCM) += i2c-npcm7xx.o obj-$(CONFIG_I2C_OCORES) += i2c-ocores.o obj-$(CONFIG_I2C_OMAP) += i2c-omap.o obj-$(CONFIG_I2C_OWL) += i2c-owl.o -i2c-pasemi-objs := i2c-pasemi-core.o i2c-pasemi-pci.o -obj-$(CONFIG_I2C_PASEMI) += i2c-pasemi.o -i2c-apple-objs := i2c-pasemi-core.o i2c-pasemi-platform.o -obj-$(CONFIG_I2C_APPLE) += i2c-apple.o +obj-$(CONFIG_I2C_PASEMI) += i2c-pasemi-core.o i2c-pasemi-pci.o +obj-$(CONFIG_I2C_APPLE) += i2c-pasemi-core.o i2c-pasemi-platform.o obj-$(CONFIG_I2C_PCA_PLATFORM) += i2c-pca-platform.o obj-$(CONFIG_I2C_PNX) += i2c-pnx.o obj-$(CONFIG_I2C_PXA) += i2c-pxa.o diff --git a/drivers/i2c/busses/i2c-pasemi-core.c b/drivers/i2c/busses/i2c-pasemi-core.c index 7d54a9f34c74..bd8becbdeeb2 100644 --- a/drivers/i2c/busses/i2c-pasemi-core.c +++ b/drivers/i2c/busses/i2c-pasemi-core.c @@ -369,6 +369,7 @@ int pasemi_i2c_common_probe(struct pasemi_smbus *smbus) return 0; } +EXPORT_SYMBOL_GPL(pasemi_i2c_common_probe); irqreturn_t pasemi_irq_handler(int irq, void *dev_id) { @@ -378,3 +379,8 @@ irqreturn_t pasemi_irq_handler(int irq, void *dev_id) complete(&smbus->irq_completion); return IRQ_HANDLED; } +EXPORT_SYMBOL_GPL(pasemi_irq_handler); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Olof Johansson <olof@lixom.net>"); +MODULE_DESCRIPTION("PA Semi PWRficient SMBus driver"); From c1c9d0f6f7f1dbf29db996bd8e166242843a5f21 Mon Sep 17 00:00:00 2001 From: Jean Delvare <jdelvare@suse.de> Date: Wed, 14 Feb 2024 15:59:39 +0100 Subject: [PATCH 185/304] i2c: i801: Fix block process call transactions According to the Intel datasheets, software must reset the block buffer index twice for block process call transactions: once before writing the outgoing data to the buffer, and once again before reading the incoming data from the buffer. The driver is currently missing the second reset, causing the wrong portion of the block buffer to be read. Signed-off-by: Jean Delvare <jdelvare@suse.de> Reported-by: Piotr Zakowski <piotr.zakowski@intel.com> Closes: https://lore.kernel.org/linux-i2c/20240213120553.7b0ab120@endymion.delvare/ Fixes: 315cd67c9453 ("i2c: i801: Add Block Write-Block Read Process Call support") Reviewed-by: Alexander Sverdlin <alexander.sverdlin@gmail.com> Signed-off-by: Andi Shyti <andi.shyti@kernel.org> --- drivers/i2c/busses/i2c-i801.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c index 3932e8d96a17..2c36b36d7d51 100644 --- a/drivers/i2c/busses/i2c-i801.c +++ b/drivers/i2c/busses/i2c-i801.c @@ -498,11 +498,10 @@ static int i801_block_transaction_by_block(struct i801_priv *priv, /* Set block buffer mode */ outb_p(inb_p(SMBAUXCTL(priv)) | SMBAUXCTL_E32B, SMBAUXCTL(priv)); - inb_p(SMBHSTCNT(priv)); /* reset the data buffer index */ - if (read_write == I2C_SMBUS_WRITE) { len = data->block[0]; outb_p(len, SMBHSTDAT0(priv)); + inb_p(SMBHSTCNT(priv)); /* reset the data buffer index */ for (i = 0; i < len; i++) outb_p(data->block[i+1], SMBBLKDAT(priv)); } @@ -520,6 +519,7 @@ static int i801_block_transaction_by_block(struct i801_priv *priv, } data->block[0] = len; + inb_p(SMBHSTCNT(priv)); /* reset the data buffer index */ for (i = 0; i < len; i++) data->block[i + 1] = inb_p(SMBBLKDAT(priv)); } From 6388cfd0e69b56ca640610f1bf29334619d18142 Mon Sep 17 00:00:00 2001 From: Thorsten Blum <thorsten.blum@toblux.com> Date: Sat, 10 Feb 2024 16:20:04 +0100 Subject: [PATCH 186/304] docs: kconfig: Fix grammar and formatting - Remove unnecessary spaces - Fix grammar s/to solution/solution/ Signed-off-by: Thorsten Blum <thorsten.blum@toblux.com> Reviewed-by: Randy Dunlap <rdunlap@infradead.org> Signed-off-by: Masahiro Yamada <masahiroy@kernel.org> --- Documentation/kbuild/Kconfig.recursion-issue-01 | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Documentation/kbuild/Kconfig.recursion-issue-01 b/Documentation/kbuild/Kconfig.recursion-issue-01 index e8877db0461f..ac49836d8ecf 100644 --- a/Documentation/kbuild/Kconfig.recursion-issue-01 +++ b/Documentation/kbuild/Kconfig.recursion-issue-01 @@ -16,13 +16,13 @@ # that are possible for CORE. So for example if CORE_BELL_A_ADVANCED is 'y', # CORE must be 'y' too. # -# * What influences CORE_BELL_A_ADVANCED ? +# * What influences CORE_BELL_A_ADVANCED? # # As the name implies CORE_BELL_A_ADVANCED is an advanced feature of # CORE_BELL_A so naturally it depends on CORE_BELL_A. So if CORE_BELL_A is 'y' # we know CORE_BELL_A_ADVANCED can be 'y' too. # -# * What influences CORE_BELL_A ? +# * What influences CORE_BELL_A? # # CORE_BELL_A depends on CORE, so CORE influences CORE_BELL_A. # @@ -34,7 +34,7 @@ # the "recursive dependency detected" error. # # Reading the Documentation/kbuild/Kconfig.recursion-issue-01 file it may be -# obvious that an easy to solution to this problem should just be the removal +# obvious that an easy solution to this problem should just be the removal # of the "select CORE" from CORE_BELL_A_ADVANCED as that is implicit already # since CORE_BELL_A depends on CORE. Recursive dependency issues are not always # so trivial to resolve, we provide another example below of practical From e3a9ee963ad8ba677ca925149812c5932b49af69 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor <nathan@kernel.org> Date: Mon, 12 Feb 2024 19:05:10 -0700 Subject: [PATCH 187/304] kbuild: Fix changing ELF file type for output of gen_btf for big endian Commit 90ceddcb4950 ("bpf: Support llvm-objcopy for vmlinux BTF") changed the ELF type of .btf.vmlinux.bin.o to ET_REL via dd, which works fine for little endian platforms: 00000000 7f 45 4c 46 02 01 01 00 00 00 00 00 00 00 00 00 |.ELF............| -00000010 03 00 b7 00 01 00 00 00 00 00 00 80 00 80 ff ff |................| +00000010 01 00 b7 00 01 00 00 00 00 00 00 80 00 80 ff ff |................| However, for big endian platforms, it changes the wrong byte, resulting in an invalid ELF file type, which ld.lld rejects: 00000000 7f 45 4c 46 02 02 01 00 00 00 00 00 00 00 00 00 |.ELF............| -00000010 00 03 00 16 00 00 00 01 00 00 00 00 00 10 00 00 |................| +00000010 01 03 00 16 00 00 00 01 00 00 00 00 00 10 00 00 |................| Type: <unknown>: 103 ld.lld: error: .btf.vmlinux.bin.o: unknown file type Fix this by updating the entire 16-bit e_type field rather than just a single byte, so that everything works correctly for all platforms and linkers. 00000000 7f 45 4c 46 02 02 01 00 00 00 00 00 00 00 00 00 |.ELF............| -00000010 00 03 00 16 00 00 00 01 00 00 00 00 00 10 00 00 |................| +00000010 00 01 00 16 00 00 00 01 00 00 00 00 00 10 00 00 |................| Type: REL (Relocatable file) While in the area, update the comment to mention that binutils 2.35+ matches LLD's behavior of rejecting an ET_EXEC input, which occurred after the comment was added. Cc: stable@vger.kernel.org Fixes: 90ceddcb4950 ("bpf: Support llvm-objcopy for vmlinux BTF") Link: https://github.com/llvm/llvm-project/pull/75643 Suggested-by: Masahiro Yamada <masahiroy@kernel.org> Signed-off-by: Nathan Chancellor <nathan@kernel.org> Reviewed-by: Fangrui Song <maskray@google.com> Reviewed-by: Nicolas Schier <nicolas@fjasle.eu> Reviewed-by: Kees Cook <keescook@chromium.org> Reviewed-by: Justin Stitt <justinstitt@google.com> Signed-off-by: Masahiro Yamada <masahiroy@kernel.org> --- scripts/link-vmlinux.sh | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh index a432b171be82..7862a8101747 100755 --- a/scripts/link-vmlinux.sh +++ b/scripts/link-vmlinux.sh @@ -135,8 +135,13 @@ gen_btf() ${OBJCOPY} --only-section=.BTF --set-section-flags .BTF=alloc,readonly \ --strip-all ${1} ${2} 2>/dev/null # Change e_type to ET_REL so that it can be used to link final vmlinux. - # Unlike GNU ld, lld does not allow an ET_EXEC input. - printf '\1' | dd of=${2} conv=notrunc bs=1 seek=16 status=none + # GNU ld 2.35+ and lld do not allow an ET_EXEC input. + if is_enabled CONFIG_CPU_BIG_ENDIAN; then + et_rel='\0\1' + else + et_rel='\1\0' + fi + printf "${et_rel}" | dd of=${2} conv=notrunc bs=1 seek=16 status=none } # Create ${2} .S file with all symbols from the ${1} object file From dae4a0171e25884787da32823b3081b4c2acebb2 Mon Sep 17 00:00:00 2001 From: Andrew Ballance <andrewjballance@gmail.com> Date: Tue, 13 Feb 2024 19:23:05 -0600 Subject: [PATCH 188/304] gen_compile_commands: fix invalid escape sequence warning With python 3.12, '\#' results in this warning SyntaxWarning: invalid escape sequence '\#' Signed-off-by: Andrew Ballance <andrewjballance@gmail.com> Reviewed-by: Justin Stitt <justinstitt@google.com> Signed-off-by: Masahiro Yamada <masahiroy@kernel.org> --- scripts/clang-tools/gen_compile_commands.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/clang-tools/gen_compile_commands.py b/scripts/clang-tools/gen_compile_commands.py index 5dea4479240b..e4fb686dfaa9 100755 --- a/scripts/clang-tools/gen_compile_commands.py +++ b/scripts/clang-tools/gen_compile_commands.py @@ -170,7 +170,7 @@ def process_line(root_directory, command_prefix, file_path): # escape the pound sign '#', either as '\#' or '$(pound)' (depending on the # kernel version). The compile_commands.json file is not interepreted # by Make, so this code replaces the escaped version with '#'. - prefix = command_prefix.replace('\#', '#').replace('$(pound)', '#') + prefix = command_prefix.replace(r'\#', '#').replace('$(pound)', '#') # Return the canonical path, eliminating any symbolic links encountered in the path. abs_path = os.path.realpath(os.path.join(root_directory, file_path)) From 5d9a16b2a4d9e8fa028892ded43f6501bc2969e5 Mon Sep 17 00:00:00 2001 From: Radek Krejci <radek.krejci@oracle.com> Date: Wed, 14 Feb 2024 10:14:07 +0100 Subject: [PATCH 189/304] modpost: trim leading spaces when processing source files list get_line() does not trim the leading spaces, but the parse_source_files() expects to get lines with source files paths where the first space occurs after the file path. Fixes: 70f30cfe5b89 ("modpost: use read_text_file() and get_line() for reading text files") Signed-off-by: Radek Krejci <radek.krejci@oracle.com> Signed-off-by: Masahiro Yamada <masahiroy@kernel.org> --- scripts/mod/sumversion.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/scripts/mod/sumversion.c b/scripts/mod/sumversion.c index 31066bfdba04..dc4878502276 100644 --- a/scripts/mod/sumversion.c +++ b/scripts/mod/sumversion.c @@ -326,7 +326,12 @@ static int parse_source_files(const char *objfile, struct md4_ctx *md) /* Sum all files in the same dir or subdirs. */ while ((line = get_line(&pos))) { - char* p = line; + char* p; + + /* trim the leading spaces away */ + while (isspace(*line)) + line++; + p = line; if (strncmp(line, "source_", sizeof("source_")-1) == 0) { p = strrchr(line, ' '); From a37ee9e117ef73bbc2f5c0b31911afd52d229861 Mon Sep 17 00:00:00 2001 From: Jens Axboe <axboe@kernel.dk> Date: Wed, 14 Feb 2024 08:23:05 -0700 Subject: [PATCH 190/304] io_uring/net: fix multishot accept overflow handling If we hit CQ ring overflow when attempting to post a multishot accept completion, we don't properly save the result or return code. This results in losing the accepted fd value. Instead, we return the result from the poll operation that triggered the accept retry. This is generally POLLIN|POLLPRI|POLLRDNORM|POLLRDBAND which is 0xc3, or 195, which looks like a valid file descriptor, but it really has no connection to that. Handle this like we do for other multishot completions - assign the result, and return IOU_STOP_MULTISHOT to cancel any further completions from this request when overflow is hit. This preserves the result, as we should, and tells the application that the request needs to be re-armed. Cc: stable@vger.kernel.org Fixes: 515e26961295 ("io_uring: revert "io_uring fix multishot accept ordering"") Link: https://github.com/axboe/liburing/issues/1062 Signed-off-by: Jens Axboe <axboe@kernel.dk> --- io_uring/net.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/io_uring/net.c b/io_uring/net.c index 43bc9a5f96f9..161622029147 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -1372,7 +1372,7 @@ retry: * has already been done */ if (issue_flags & IO_URING_F_MULTISHOT) - ret = IOU_ISSUE_SKIP_COMPLETE; + return IOU_ISSUE_SKIP_COMPLETE; return ret; } if (ret == -ERESTARTSYS) @@ -1397,7 +1397,8 @@ retry: ret, IORING_CQE_F_MORE)) goto retry; - return -ECANCELED; + io_req_set_res(req, ret, 0); + return IOU_STOP_MULTISHOT; } int io_socket_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) From 9377de4cb3e8fb6c494fa2f5ae2c3780d3e73822 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= <thomas.hellstrom@linux.intel.com> Date: Thu, 8 Feb 2024 14:21:15 +0100 Subject: [PATCH 191/304] drm/xe/vm: Avoid reserving zero fences MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The function xe_vm_prepare_vma was blindly accepting zero as the number of fences and forwarded that to drm_exec_prepare_obj. However, that leads to an out-of-bounds shift in the dma_resv_reserve_fences() and while one could argue that the dma_resv code should be robust against that, avoid attempting to reserve zero fences. Relevant stack trace: [773.183188] ------------[ cut here ]------------ [773.183199] UBSAN: shift-out-of-bounds in ../include/linux/log2.h:57:13 [773.183241] shift exponent 64 is too large for 64-bit type 'long unsigned int' [773.183254] CPU: 2 PID: 1816 Comm: xe_evict Tainted: G U 6.8.0-rc3-xe #1 [773.183256] Hardware name: ASUS System Product Name/PRIME Z690-P D4, BIOS 2014 10/14/2022 [773.183257] Call Trace: [773.183258] <TASK> [773.183260] dump_stack_lvl+0xaf/0xd0 [773.183266] dump_stack+0x10/0x20 [773.183283] ubsan_epilogue+0x9/0x40 [773.183286] __ubsan_handle_shift_out_of_bounds+0x10f/0x170 [773.183293] dma_resv_reserve_fences.cold+0x2b/0x48 [773.183295] ? ww_mutex_lock+0x3c/0x110 [773.183301] drm_exec_prepare_obj+0x45/0x60 [drm_exec] [773.183313] xe_vm_prepare_vma+0x33/0x70 [xe] [773.183375] xe_vma_destroy_unlocked+0x55/0xa0 [xe] [773.183427] xe_vm_close_and_put+0x526/0x940 [xe] Fixes: 2714d5093620 ("drm/xe: Convert pagefaulting code to use drm_exec") Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com> Cc: Matthew Brost <matthew.brost@intel.com> Cc: Rodrigo Vivi <rodrigo.vivi@intel.com> Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com> Reviewed-by: Matthew Auld <matthew.auld@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20240208132115.3132-1-thomas.hellstrom@linux.intel.com (cherry picked from commit eb538b5574251a449f40b1ee35efc631228c8992) Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com> --- drivers/gpu/drm/xe/xe_vm.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 865e10d0a06a..7b00faa67287 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -995,9 +995,16 @@ int xe_vm_prepare_vma(struct drm_exec *exec, struct xe_vma *vma, int err; XE_WARN_ON(!vm); - err = drm_exec_prepare_obj(exec, xe_vm_obj(vm), num_shared); - if (!err && bo && !bo->vm) - err = drm_exec_prepare_obj(exec, &bo->ttm.base, num_shared); + if (num_shared) + err = drm_exec_prepare_obj(exec, xe_vm_obj(vm), num_shared); + else + err = drm_exec_lock_obj(exec, xe_vm_obj(vm)); + if (!err && bo && !bo->vm) { + if (num_shared) + err = drm_exec_prepare_obj(exec, &bo->ttm.base, num_shared); + else + err = drm_exec_lock_obj(exec, &bo->ttm.base); + } return err; } From c2626b7387210cff741be9fb91d317f02a70347c Mon Sep 17 00:00:00 2001 From: Matthew Auld <matthew.auld@intel.com> Date: Mon, 5 Feb 2024 15:31:11 +0000 Subject: [PATCH 192/304] drm/xe/display: fix i915_gem_object_is_shmem() wrapper MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit shmem ensures the memory is cleared on allocation, however here we are using TTM, which doesn't natively support shmem (other than for swap), but instead just allocates normal system memory. And we only zero such memory for userspace allocations. In the case of intel_fbdev we are missing the memset_io() since display path incorrectly thinks object is shmem based. Fixes: 44e694958b95 ("drm/xe/display: Implement display support") Signed-off-by: Matthew Auld <matthew.auld@intel.com> Reviewed-by: Suraj Kandpal <suraj.kandpal@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20240205153110.38340-2-matthew.auld@intel.com (cherry picked from commit 63fb531fbfda81bda652546a39333b565aea324d) Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com> --- drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_object.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_object.h b/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_object.h index 68d9f6116bdf..777c20ceabab 100644 --- a/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_object.h +++ b/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_object.h @@ -10,7 +10,7 @@ #include "xe_bo.h" -#define i915_gem_object_is_shmem(obj) ((obj)->flags & XE_BO_CREATE_SYSTEM_BIT) +#define i915_gem_object_is_shmem(obj) (0) /* We don't use shmem */ static inline dma_addr_t i915_gem_object_get_dma_address(const struct xe_bo *bo, pgoff_t n) { From 8cb92dc730d8ae5f803dae1a6eb91fb9603f4237 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= <thomas.hellstrom@linux.intel.com> Date: Fri, 9 Feb 2024 12:26:55 +0100 Subject: [PATCH 193/304] drm/xe/pt: Allow for stricter type- and range checking MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Distinguish between xe_pt and the xe_pt_dir subclass when allocating and freeing. Also use a fixed-size array for the xe_pt_dir page entries to make life easier for dynamic range- checkers. Finally rename the page-directory child pointer array to "children". While no functional change, this fixes ubsan splats similar to: [ 51.463021] ------------[ cut here ]------------ [ 51.463022] UBSAN: array-index-out-of-bounds in drivers/gpu/drm/xe/xe_pt.c:47:9 [ 51.463023] index 0 is out of range for type 'xe_ptw *[*]' [ 51.463024] CPU: 5 PID: 2778 Comm: xe_vm Tainted: G U 6.8.0-rc1+ #218 [ 51.463026] Hardware name: ASUS System Product Name/PRIME B560M-A AC, BIOS 2001 02/01/2023 [ 51.463027] Call Trace: [ 51.463028] <TASK> [ 51.463029] dump_stack_lvl+0x47/0x60 [ 51.463030] __ubsan_handle_out_of_bounds+0x95/0xd0 [ 51.463032] xe_pt_destroy+0xa5/0x150 [xe] [ 51.463088] __xe_pt_unbind_vma+0x36c/0x9b0 [xe] [ 51.463144] xe_vm_unbind+0xd8/0x580 [xe] [ 51.463204] ? drm_exec_prepare_obj+0x3f/0x60 [drm_exec] [ 51.463208] __xe_vma_op_execute+0x5da/0x910 [xe] [ 51.463268] ? __drm_gpuvm_sm_unmap+0x1cb/0x220 [drm_gpuvm] [ 51.463272] ? radix_tree_node_alloc.constprop.0+0x89/0xc0 [ 51.463275] ? drm_gpuva_it_remove+0x1f3/0x2a0 [drm_gpuvm] [ 51.463279] ? drm_gpuva_remove+0x2f/0xc0 [drm_gpuvm] [ 51.463283] xe_vm_bind_ioctl+0x1a55/0x20b0 [xe] [ 51.463344] ? __pfx_xe_vm_bind_ioctl+0x10/0x10 [xe] [ 51.463414] drm_ioctl_kernel+0xb6/0x120 [ 51.463416] drm_ioctl+0x287/0x4e0 [ 51.463418] ? __pfx_xe_vm_bind_ioctl+0x10/0x10 [xe] [ 51.463481] __x64_sys_ioctl+0x94/0xd0 [ 51.463484] do_syscall_64+0x86/0x170 [ 51.463486] ? syscall_exit_to_user_mode+0x7d/0x200 [ 51.463488] ? do_syscall_64+0x96/0x170 [ 51.463490] ? do_syscall_64+0x96/0x170 [ 51.463492] entry_SYSCALL_64_after_hwframe+0x6e/0x76 [ 51.463494] RIP: 0033:0x7f246bfe817d [ 51.463498] Code: 04 25 28 00 00 00 48 89 45 c8 31 c0 48 8d 45 10 c7 45 b0 10 00 00 00 48 89 45 b8 48 8d 45 d0 48 89 45 c0 b8 10 00 00 00 0f 05 <89> c2 3d 00 f0 ff ff 77 1a 48 8b 45 c8 64 48 2b 04 25 28 00 00 00 [ 51.463501] RSP: 002b:00007ffc1bd19ad0 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 [ 51.463502] RAX: ffffffffffffffda RBX: 0000000000000003 RCX: 00007f246bfe817d [ 51.463504] RDX: 00007ffc1bd19b60 RSI: 0000000040886445 RDI: 0000000000000003 [ 51.463505] RBP: 00007ffc1bd19b20 R08: 0000000000000000 R09: 0000000000000000 [ 51.463506] R10: 0000000000000000 R11: 0000000000000246 R12: 00007ffc1bd19b60 [ 51.463508] R13: 0000000040886445 R14: 0000000000000003 R15: 0000000000010000 [ 51.463510] </TASK> [ 51.463517] ---[ end trace ]--- v2 - Fix kerneldoc warning (Matthew Brost) Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Cc: Rodrigo Vivi <rodrigo.vivi@intel.com> Cc: Matthew Brost <matthew.brost@intel.com> Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com> Reviewed-by: Matthew Brost <matthew.brost@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20240209112655.4872-1-thomas.hellstrom@linux.intel.com (cherry picked from commit 157261c58b283f5c83e3f9087eca63be8d591ab8) Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com> --- drivers/gpu/drm/xe/xe_pt.c | 39 +++++++++++++++++++++------------ drivers/gpu/drm/xe/xe_pt_walk.c | 2 +- drivers/gpu/drm/xe/xe_pt_walk.h | 19 +++------------- 3 files changed, 29 insertions(+), 31 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index e45b37c3f0c2..ac19bfa3f798 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -20,8 +20,8 @@ struct xe_pt_dir { struct xe_pt pt; - /** @dir: Directory structure for the xe_pt_walk functionality */ - struct xe_ptw_dir dir; + /** @children: Array of page-table child nodes */ + struct xe_ptw *children[XE_PDES]; }; #if IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM) @@ -44,7 +44,7 @@ static struct xe_pt_dir *as_xe_pt_dir(struct xe_pt *pt) static struct xe_pt *xe_pt_entry(struct xe_pt_dir *pt_dir, unsigned int index) { - return container_of(pt_dir->dir.entries[index], struct xe_pt, base); + return container_of(pt_dir->children[index], struct xe_pt, base); } static u64 __xe_pt_empty_pte(struct xe_tile *tile, struct xe_vm *vm, @@ -65,6 +65,14 @@ static u64 __xe_pt_empty_pte(struct xe_tile *tile, struct xe_vm *vm, XE_PTE_NULL; } +static void xe_pt_free(struct xe_pt *pt) +{ + if (pt->level) + kfree(as_xe_pt_dir(pt)); + else + kfree(pt); +} + /** * xe_pt_create() - Create a page-table. * @vm: The vm to create for. @@ -85,15 +93,19 @@ struct xe_pt *xe_pt_create(struct xe_vm *vm, struct xe_tile *tile, { struct xe_pt *pt; struct xe_bo *bo; - size_t size; int err; - size = !level ? sizeof(struct xe_pt) : sizeof(struct xe_pt_dir) + - XE_PDES * sizeof(struct xe_ptw *); - pt = kzalloc(size, GFP_KERNEL); + if (level) { + struct xe_pt_dir *dir = kzalloc(sizeof(*dir), GFP_KERNEL); + + pt = (dir) ? &dir->pt : NULL; + } else { + pt = kzalloc(sizeof(*pt), GFP_KERNEL); + } if (!pt) return ERR_PTR(-ENOMEM); + pt->level = level; bo = xe_bo_create_pin_map(vm->xe, tile, vm, SZ_4K, ttm_bo_type_kernel, XE_BO_CREATE_VRAM_IF_DGFX(tile) | @@ -106,8 +118,7 @@ struct xe_pt *xe_pt_create(struct xe_vm *vm, struct xe_tile *tile, goto err_kfree; } pt->bo = bo; - pt->level = level; - pt->base.dir = level ? &as_xe_pt_dir(pt)->dir : NULL; + pt->base.children = level ? as_xe_pt_dir(pt)->children : NULL; if (vm->xef) xe_drm_client_add_bo(vm->xef->client, pt->bo); @@ -116,7 +127,7 @@ struct xe_pt *xe_pt_create(struct xe_vm *vm, struct xe_tile *tile, return pt; err_kfree: - kfree(pt); + xe_pt_free(pt); return ERR_PTR(err); } @@ -193,7 +204,7 @@ void xe_pt_destroy(struct xe_pt *pt, u32 flags, struct llist_head *deferred) deferred); } } - kfree(pt); + xe_pt_free(pt); } /** @@ -358,7 +369,7 @@ xe_pt_insert_entry(struct xe_pt_stage_bind_walk *xe_walk, struct xe_pt *parent, struct iosys_map *map = &parent->bo->vmap; if (unlikely(xe_child)) - parent->base.dir->entries[offset] = &xe_child->base; + parent->base.children[offset] = &xe_child->base; xe_pt_write(xe_walk->vm->xe, map, offset, pte); parent->num_live++; @@ -853,7 +864,7 @@ static void xe_pt_commit_bind(struct xe_vma *vma, xe_pt_destroy(xe_pt_entry(pt_dir, j_), xe_vma_vm(vma)->flags, deferred); - pt_dir->dir.entries[j_] = &newpte->base; + pt_dir->children[j_] = &newpte->base; } kfree(entries[i].pt_entries); } @@ -1507,7 +1518,7 @@ xe_pt_commit_unbind(struct xe_vma *vma, xe_pt_destroy(xe_pt_entry(pt_dir, i), xe_vma_vm(vma)->flags, deferred); - pt_dir->dir.entries[i] = NULL; + pt_dir->children[i] = NULL; } } } diff --git a/drivers/gpu/drm/xe/xe_pt_walk.c b/drivers/gpu/drm/xe/xe_pt_walk.c index 8f6c8d063f39..b8b3d2aea492 100644 --- a/drivers/gpu/drm/xe/xe_pt_walk.c +++ b/drivers/gpu/drm/xe/xe_pt_walk.c @@ -74,7 +74,7 @@ int xe_pt_walk_range(struct xe_ptw *parent, unsigned int level, u64 addr, u64 end, struct xe_pt_walk *walk) { pgoff_t offset = xe_pt_offset(addr, level, walk); - struct xe_ptw **entries = parent->dir ? parent->dir->entries : NULL; + struct xe_ptw **entries = parent->children ? parent->children : NULL; const struct xe_pt_walk_ops *ops = walk->ops; enum page_walk_action action; struct xe_ptw *child; diff --git a/drivers/gpu/drm/xe/xe_pt_walk.h b/drivers/gpu/drm/xe/xe_pt_walk.h index ec3d1e9efa6d..5ecc4d2f0f65 100644 --- a/drivers/gpu/drm/xe/xe_pt_walk.h +++ b/drivers/gpu/drm/xe/xe_pt_walk.h @@ -8,28 +8,15 @@ #include <linux/pagewalk.h> #include <linux/types.h> -struct xe_ptw_dir; - /** * struct xe_ptw - base class for driver pagetable subclassing. - * @dir: Pointer to an array of children if any. + * @children: Pointer to an array of children if any. * * Drivers could subclass this, and if it's a page-directory, typically - * embed the xe_ptw_dir::entries array in the same allocation. + * embed an array of xe_ptw pointers. */ struct xe_ptw { - struct xe_ptw_dir *dir; -}; - -/** - * struct xe_ptw_dir - page directory structure - * @entries: Array holding page directory children. - * - * It is the responsibility of the user to ensure @entries is - * correctly sized. - */ -struct xe_ptw_dir { - struct xe_ptw *entries[0]; + struct xe_ptw **children; }; /** From 455dae7549aed709707feda5d6b3e085b37d33f7 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann <arnd@arndb.de> Date: Tue, 13 Feb 2024 10:56:48 +0100 Subject: [PATCH 194/304] drm/xe: avoid function cast warnings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit clang-16 warns about a cast between incompatible function types: drivers/gpu/drm/xe/xe_range_fence.c:155:10: error: cast from 'void (*)(const void *)' to 'void (*)(struct xe_range_fence *)' converts to incompatible function type [-Werror,-Wcast-function-type-strict] 155 | .free = (void (*)(struct xe_range_fence *rfence)) kfree, | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Avoid this with a trivial helper function that calls kfree() here. v2: - s/* rfence/*rfence/ (Thomas) Fixes: 845f64bdbfc9 ("drm/xe: Introduce a range-fence utility") Signed-off-by: Arnd Bergmann <arnd@arndb.de> Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com> Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20240213095719.454865-1-arnd@kernel.org (cherry picked from commit f2c9364db57992b1496db4ae5e67ab14926be3ec) Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com> --- drivers/gpu/drm/xe/xe_range_fence.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_range_fence.c b/drivers/gpu/drm/xe/xe_range_fence.c index d35d9ec58e86..372378e89e98 100644 --- a/drivers/gpu/drm/xe/xe_range_fence.c +++ b/drivers/gpu/drm/xe/xe_range_fence.c @@ -151,6 +151,11 @@ xe_range_fence_tree_next(struct xe_range_fence *rfence, u64 start, u64 last) return xe_range_fence_tree_iter_next(rfence, start, last); } +static void xe_range_fence_free(struct xe_range_fence *rfence) +{ + kfree(rfence); +} + const struct xe_range_fence_ops xe_range_fence_kfree_ops = { - .free = (void (*)(struct xe_range_fence *rfence)) kfree, + .free = xe_range_fence_free, }; From fb091ff394792c018527b3211bbdfae93ea4ac02 Mon Sep 17 00:00:00 2001 From: Easwar Hariharan <eahariha@linux.microsoft.com> Date: Wed, 14 Feb 2024 17:55:18 +0000 Subject: [PATCH 195/304] arm64: Subscribe Microsoft Azure Cobalt 100 to ARM Neoverse N2 errata Add the MIDR value of Microsoft Azure Cobalt 100, which is a Microsoft implemented CPU based on r0p0 of the ARM Neoverse N2 CPU, and therefore suffers from all the same errata. CC: stable@vger.kernel.org # 5.15+ Signed-off-by: Easwar Hariharan <eahariha@linux.microsoft.com> Reviewed-by: Anshuman Khandual <anshuman.khandual@arm.com> Acked-by: Mark Rutland <mark.rutland@arm.com> Acked-by: Marc Zyngier <maz@kernel.org> Reviewed-by: Oliver Upton <oliver.upton@linux.dev> Link: https://lore.kernel.org/r/20240214175522.2457857-1-eahariha@linux.microsoft.com Signed-off-by: Will Deacon <will@kernel.org> --- Documentation/arch/arm64/silicon-errata.rst | 7 +++++++ arch/arm64/include/asm/cputype.h | 4 ++++ arch/arm64/kernel/cpu_errata.c | 3 +++ 3 files changed, 14 insertions(+) diff --git a/Documentation/arch/arm64/silicon-errata.rst b/Documentation/arch/arm64/silicon-errata.rst index e8c2ce1f9df6..45a7f4932fe0 100644 --- a/Documentation/arch/arm64/silicon-errata.rst +++ b/Documentation/arch/arm64/silicon-errata.rst @@ -243,3 +243,10 @@ stable kernels. +----------------+-----------------+-----------------+-----------------------------+ | ASR | ASR8601 | #8601001 | N/A | +----------------+-----------------+-----------------+-----------------------------+ ++----------------+-----------------+-----------------+-----------------------------+ +| Microsoft | Azure Cobalt 100| #2139208 | ARM64_ERRATUM_2139208 | ++----------------+-----------------+-----------------+-----------------------------+ +| Microsoft | Azure Cobalt 100| #2067961 | ARM64_ERRATUM_2067961 | ++----------------+-----------------+-----------------+-----------------------------+ +| Microsoft | Azure Cobalt 100| #2253138 | ARM64_ERRATUM_2253138 | ++----------------+-----------------+-----------------+-----------------------------+ diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index 7c7493cb571f..52f076afeb96 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -61,6 +61,7 @@ #define ARM_CPU_IMP_HISI 0x48 #define ARM_CPU_IMP_APPLE 0x61 #define ARM_CPU_IMP_AMPERE 0xC0 +#define ARM_CPU_IMP_MICROSOFT 0x6D #define ARM_CPU_PART_AEM_V8 0xD0F #define ARM_CPU_PART_FOUNDATION 0xD00 @@ -135,6 +136,8 @@ #define AMPERE_CPU_PART_AMPERE1 0xAC3 +#define MICROSOFT_CPU_PART_AZURE_COBALT_100 0xD49 /* Based on r0p0 of ARM Neoverse N2 */ + #define MIDR_CORTEX_A53 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A53) #define MIDR_CORTEX_A57 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A57) #define MIDR_CORTEX_A72 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A72) @@ -193,6 +196,7 @@ #define MIDR_APPLE_M2_BLIZZARD_MAX MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_BLIZZARD_MAX) #define MIDR_APPLE_M2_AVALANCHE_MAX MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_AVALANCHE_MAX) #define MIDR_AMPERE1 MIDR_CPU_MODEL(ARM_CPU_IMP_AMPERE, AMPERE_CPU_PART_AMPERE1) +#define MIDR_MICROSOFT_AZURE_COBALT_100 MIDR_CPU_MODEL(ARM_CPU_IMP_MICROSOFT, MICROSOFT_CPU_PART_AZURE_COBALT_100) /* Fujitsu Erratum 010001 affects A64FX 1.0 and 1.1, (v0r0 and v1r0) */ #define MIDR_FUJITSU_ERRATUM_010001 MIDR_FUJITSU_A64FX diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 967c7c7a4e7d..76b8dd37092a 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -374,6 +374,7 @@ static const struct midr_range erratum_1463225[] = { static const struct midr_range trbe_overwrite_fill_mode_cpus[] = { #ifdef CONFIG_ARM64_ERRATUM_2139208 MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2), + MIDR_ALL_VERSIONS(MIDR_MICROSOFT_AZURE_COBALT_100), #endif #ifdef CONFIG_ARM64_ERRATUM_2119858 MIDR_ALL_VERSIONS(MIDR_CORTEX_A710), @@ -387,6 +388,7 @@ static const struct midr_range trbe_overwrite_fill_mode_cpus[] = { static const struct midr_range tsb_flush_fail_cpus[] = { #ifdef CONFIG_ARM64_ERRATUM_2067961 MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2), + MIDR_ALL_VERSIONS(MIDR_MICROSOFT_AZURE_COBALT_100), #endif #ifdef CONFIG_ARM64_ERRATUM_2054223 MIDR_ALL_VERSIONS(MIDR_CORTEX_A710), @@ -399,6 +401,7 @@ static const struct midr_range tsb_flush_fail_cpus[] = { static struct midr_range trbe_write_out_of_range_cpus[] = { #ifdef CONFIG_ARM64_ERRATUM_2253138 MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2), + MIDR_ALL_VERSIONS(MIDR_MICROSOFT_AZURE_COBALT_100), #endif #ifdef CONFIG_ARM64_ERRATUM_2224489 MIDR_ALL_VERSIONS(MIDR_CORTEX_A710), From 2813926261e436d33bc74486b51cce60b76edf78 Mon Sep 17 00:00:00 2001 From: Mark Brown <broonie@kernel.org> Date: Tue, 13 Feb 2024 18:24:38 +0000 Subject: [PATCH 196/304] arm64/sve: Lower the maximum allocation for the SVE ptrace regset Doug Anderson observed that ChromeOS crashes are being reported which include failing allocations of order 7 during core dumps due to ptrace allocating storage for regsets: chrome: page allocation failure: order:7, mode:0x40dc0(GFP_KERNEL|__GFP_COMP|__GFP_ZERO), nodemask=(null),cpuset=urgent,mems_allowed=0 ... regset_get_alloc+0x1c/0x28 elf_core_dump+0x3d8/0xd8c do_coredump+0xeb8/0x1378 with further investigation showing that this is: [ 66.957385] DOUG: Allocating 279584 bytes which is the maximum size of the SVE regset. As Doug observes it is not entirely surprising that such a large allocation of contiguous memory might fail on a long running system. The SVE regset is currently sized to hold SVE registers with a VQ of SVE_VQ_MAX which is 512, substantially more than the architectural maximum of 16 which we might see even in a system emulating the limits of the architecture. Since we don't expose the size we tell the regset core externally let's define ARCH_SVE_VQ_MAX with the actual architectural maximum and use that for the regset, we'll still overallocate most of the time but much less so which will be helpful even if the core is fixed to not require contiguous allocations. Specify ARCH_SVE_VQ_MAX in terms of the maximum value that can be written into ZCR_ELx.LEN (where this is set in the hardware). For consistency update the maximum SME vector length to be specified in the same style while we are at it. We could also teach the ptrace core about runtime discoverable regset sizes but that would be a more invasive change and this is being observed in practical systems. Reported-by: Doug Anderson <dianders@chromium.org> Signed-off-by: Mark Brown <broonie@kernel.org> Tested-by: Douglas Anderson <dianders@chromium.org> Link: https://lore.kernel.org/r/20240213-arm64-sve-ptrace-regset-size-v2-1-c7600ca74b9b@kernel.org Signed-off-by: Will Deacon <will@kernel.org> --- arch/arm64/include/asm/fpsimd.h | 12 ++++++------ arch/arm64/kernel/ptrace.c | 3 ++- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h index 50e5f25d3024..481d94416d69 100644 --- a/arch/arm64/include/asm/fpsimd.h +++ b/arch/arm64/include/asm/fpsimd.h @@ -62,13 +62,13 @@ static inline void cpacr_restore(unsigned long cpacr) * When we defined the maximum SVE vector length we defined the ABI so * that the maximum vector length included all the reserved for future * expansion bits in ZCR rather than those just currently defined by - * the architecture. While SME follows a similar pattern the fact that - * it includes a square matrix means that any allocations that attempt - * to cover the maximum potential vector length (such as happen with - * the regset used for ptrace) end up being extremely large. Define - * the much lower actual limit for use in such situations. + * the architecture. Using this length to allocate worst size buffers + * results in excessively large allocations, and this effect is even + * more pronounced for SME due to ZA. Define more suitable VLs for + * these situations. */ -#define SME_VQ_MAX 16 +#define ARCH_SVE_VQ_MAX ((ZCR_ELx_LEN_MASK >> ZCR_ELx_LEN_SHIFT) + 1) +#define SME_VQ_MAX ((SMCR_ELx_LEN_MASK >> SMCR_ELx_LEN_SHIFT) + 1) struct task_struct; diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index dc6cf0e37194..e3bef38fc2e2 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -1500,7 +1500,8 @@ static const struct user_regset aarch64_regsets[] = { #ifdef CONFIG_ARM64_SVE [REGSET_SVE] = { /* Scalable Vector Extension */ .core_note_type = NT_ARM_SVE, - .n = DIV_ROUND_UP(SVE_PT_SIZE(SVE_VQ_MAX, SVE_PT_REGS_SVE), + .n = DIV_ROUND_UP(SVE_PT_SIZE(ARCH_SVE_VQ_MAX, + SVE_PT_REGS_SVE), SVE_VQ_BYTES), .size = SVE_VQ_BYTES, .align = SVE_VQ_BYTES, From a951884d82886d8453d489f84f20ac168d062b38 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann <arnd@arndb.de> Date: Thu, 15 Feb 2024 09:32:08 +0100 Subject: [PATCH 197/304] kallsyms: ignore ARMv4 thunks along with others lld is now able to build ARMv4 and ARMv4T kernels, which means it can generate thunks for those (__ARMv4PILongThunk_*, __ARMv4PILongBXThunk_*) that can interfere with kallsyms table generation since they do not get ignore like the corresponding ARMv5+ ones are: Inconsistent kallsyms data Try "make KALLSYMS_EXTRA_PASS=1" as a workaround Replace the hardcoded list of thunk symbols with a more general regex that covers this one along with future symbols that follow the same pattern. Fixes: 5eb6e280432d ("ARM: 9289/1: Allow pre-ARMv5 builds with ld.lld 16.0.0 and newer") Fixes: efe6e3068067 ("kallsyms: fix nonconverging kallsyms table with lld") Suggested-by: Masahiro Yamada <masahiroy@kernel.org> Signed-off-by: Arnd Bergmann <arnd@arndb.de> Reviewed-by: Ard Biesheuvel <ardb@kernel.org> Signed-off-by: Masahiro Yamada <masahiroy@kernel.org> --- scripts/mksysmap | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/scripts/mksysmap b/scripts/mksysmap index 9ba1c9da0a40..57ff5656d566 100755 --- a/scripts/mksysmap +++ b/scripts/mksysmap @@ -48,17 +48,8 @@ ${NM} -n ${1} | sed >${2} -e " / __kvm_nvhe_\\$/d / __kvm_nvhe_\.L/d -# arm64 lld -/ __AArch64ADRPThunk_/d - -# arm lld -/ __ARMV5PILongThunk_/d -/ __ARMV7PILongThunk_/d -/ __ThumbV7PILongThunk_/d - -# mips lld -/ __LA25Thunk_/d -/ __microLA25Thunk_/d +# lld arm/aarch64/mips thunks +/ __[[:alnum:]]*Thunk_/d # CFI type identifiers / __kcfi_typeid_/d From 32f03f4002c5df837fb920eb23fcd2f4af9b0b23 Mon Sep 17 00:00:00 2001 From: Eniac Zhang <eniac-xw.zhang@hp.com> Date: Thu, 15 Feb 2024 15:49:22 +0000 Subject: [PATCH 198/304] ALSA: hda/realtek: fix mute/micmute LED For HP mt645 The HP mt645 G7 Thin Client uses an ALC236 codec and needs the ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF quirk to make the mute and micmute LEDs work. There are two variants of the USB-C PD chip on this device. Each uses a different BIOS and board ID, hence the two entries. Signed-off-by: Eniac Zhang <eniac-xw.zhang@hp.com> Signed-off-by: Alexandru Gagniuc <alexandru.gagniuc@hp.com> Cc: <stable@vger.kernel.org> Link: https://lore.kernel.org/r/20240215154922.778394-1-alexandru.gagniuc@hp.com Signed-off-by: Takashi Iwai <tiwai@suse.de> --- sound/pci/hda/patch_realtek.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 5d0517465f08..0ec1312bffd5 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -9930,6 +9930,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8abb, "HP ZBook Firefly 14 G9", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8ad1, "HP EliteBook 840 14 inch G9 Notebook PC", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8ad2, "HP EliteBook 860 16 inch G9 Notebook PC", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8b0f, "HP Elite mt645 G7 Mobile Thin Client U81", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), SND_PCI_QUIRK(0x103c, 0x8b2f, "HP 255 15.6 inch G10 Notebook PC", ALC236_FIXUP_HP_MUTE_LED_COEFBIT2), SND_PCI_QUIRK(0x103c, 0x8b42, "HP", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8b43, "HP", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), @@ -9937,6 +9938,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8b45, "HP", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8b46, "HP", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8b47, "HP", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8b59, "HP Elite mt645 G7 Mobile Thin Client U89", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), SND_PCI_QUIRK(0x103c, 0x8b5d, "HP", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), SND_PCI_QUIRK(0x103c, 0x8b5e, "HP", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), SND_PCI_QUIRK(0x103c, 0x8b63, "HP Elite Dragonfly 13.5 inch G4", ALC245_FIXUP_CS35L41_SPI_4_HP_GPIO_LED), From 41c25e193b2befc22462aa41591d397fab174ca1 Mon Sep 17 00:00:00 2001 From: Takashi Iwai <tiwai@suse.de> Date: Thu, 15 Feb 2024 16:31:44 +0100 Subject: [PATCH 199/304] ALSA: usb-audio: More relaxed check of MIDI jack names The USB audio driver tries to retrieve MIDI jack name strings that can be used for rawmidi substream names and sequencer port names, but its checking is too strict: often the firmware provides the jack info for unexpected directions, and then we miss the info although it's present. In this patch, the code to extract the jack info is changed to allow both in and out directions in a single loop. That is, the former two functions to obtain the descriptor pointers for jack in and out are changed to a single function that returns iJack of the corresponding jack ID, no matter which direction is used. It's a code simplification at the same time as well as the fix. Fixes: eb596e0fd13c ("ALSA: usb-audio: generate midi streaming substream names from jack names") Link: https://lore.kernel.org/r/20240215153144.26047-1-tiwai@suse.de Signed-off-by: Takashi Iwai <tiwai@suse.de> --- sound/usb/midi.c | 77 +++++++++++++++++------------------------------- 1 file changed, 27 insertions(+), 50 deletions(-) diff --git a/sound/usb/midi.c b/sound/usb/midi.c index 6b0993258e03..c1f2e5a03de9 100644 --- a/sound/usb/midi.c +++ b/sound/usb/midi.c @@ -1742,50 +1742,44 @@ static void snd_usbmidi_get_port_info(struct snd_rawmidi *rmidi, int number, } } -static struct usb_midi_in_jack_descriptor *find_usb_in_jack_descriptor( - struct usb_host_interface *hostif, uint8_t jack_id) +/* return iJack for the corresponding jackID */ +static int find_usb_ijack(struct usb_host_interface *hostif, uint8_t jack_id) { unsigned char *extra = hostif->extra; int extralen = hostif->extralen; + struct usb_descriptor_header *h; + struct usb_midi_out_jack_descriptor *outjd; + struct usb_midi_in_jack_descriptor *injd; + size_t sz; while (extralen > 4) { - struct usb_midi_in_jack_descriptor *injd = - (struct usb_midi_in_jack_descriptor *)extra; + h = (struct usb_descriptor_header *)extra; + if (h->bDescriptorType != USB_DT_CS_INTERFACE) + goto next; - if (injd->bLength >= sizeof(*injd) && - injd->bDescriptorType == USB_DT_CS_INTERFACE && - injd->bDescriptorSubtype == UAC_MIDI_IN_JACK && - injd->bJackID == jack_id) - return injd; - if (!extra[0]) - break; - extralen -= extra[0]; - extra += extra[0]; - } - return NULL; -} - -static struct usb_midi_out_jack_descriptor *find_usb_out_jack_descriptor( - struct usb_host_interface *hostif, uint8_t jack_id) -{ - unsigned char *extra = hostif->extra; - int extralen = hostif->extralen; - - while (extralen > 4) { - struct usb_midi_out_jack_descriptor *outjd = - (struct usb_midi_out_jack_descriptor *)extra; - - if (outjd->bLength >= sizeof(*outjd) && - outjd->bDescriptorType == USB_DT_CS_INTERFACE && + outjd = (struct usb_midi_out_jack_descriptor *)h; + if (h->bLength >= sizeof(*outjd) && outjd->bDescriptorSubtype == UAC_MIDI_OUT_JACK && - outjd->bJackID == jack_id) - return outjd; + outjd->bJackID == jack_id) { + sz = USB_DT_MIDI_OUT_SIZE(outjd->bNrInputPins); + if (outjd->bLength < sz) + goto next; + return *(extra + sz - 1); + } + + injd = (struct usb_midi_in_jack_descriptor *)h; + if (injd->bLength >= sizeof(*injd) && + injd->bDescriptorSubtype == UAC_MIDI_IN_JACK && + injd->bJackID == jack_id) + return injd->iJack; + +next: if (!extra[0]) break; extralen -= extra[0]; extra += extra[0]; } - return NULL; + return 0; } static void snd_usbmidi_init_substream(struct snd_usb_midi *umidi, @@ -1796,13 +1790,10 @@ static void snd_usbmidi_init_substream(struct snd_usb_midi *umidi, const char *name_format; struct usb_interface *intf; struct usb_host_interface *hostif; - struct usb_midi_in_jack_descriptor *injd; - struct usb_midi_out_jack_descriptor *outjd; uint8_t jack_name_buf[32]; uint8_t *default_jack_name = "MIDI"; uint8_t *jack_name = default_jack_name; uint8_t iJack; - size_t sz; int res; struct snd_rawmidi_substream *substream = @@ -1816,21 +1807,7 @@ static void snd_usbmidi_init_substream(struct snd_usb_midi *umidi, intf = umidi->iface; if (intf && jack_id >= 0) { hostif = intf->cur_altsetting; - iJack = 0; - if (stream != SNDRV_RAWMIDI_STREAM_OUTPUT) { - /* in jacks connect to outs */ - outjd = find_usb_out_jack_descriptor(hostif, jack_id); - if (outjd) { - sz = USB_DT_MIDI_OUT_SIZE(outjd->bNrInputPins); - if (outjd->bLength >= sz) - iJack = *(((uint8_t *) outjd) + sz - sizeof(uint8_t)); - } - } else { - /* and out jacks connect to ins */ - injd = find_usb_in_jack_descriptor(hostif, jack_id); - if (injd) - iJack = injd->iJack; - } + iJack = find_usb_ijack(hostif, jack_id); if (iJack != 0) { res = usb_string(umidi->dev, iJack, jack_name_buf, ARRAY_SIZE(jack_name_buf)); From 9b6326354cf9a41521b79287da3bfab022ae0b6d Mon Sep 17 00:00:00 2001 From: Thorsten Blum <thorsten.blum@toblux.com> Date: Wed, 14 Feb 2024 23:05:56 +0100 Subject: [PATCH 200/304] tracing/synthetic: Fix trace_string() return value Fix trace_string() by assigning the string length to the return variable which got lost in commit ddeea494a16f ("tracing/synthetic: Use union instead of casts") and caused trace_string() to always return 0. Link: https://lore.kernel.org/linux-trace-kernel/20240214220555.711598-1-thorsten.blum@toblux.com Cc: stable@vger.kernel.org Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com> Fixes: ddeea494a16f ("tracing/synthetic: Use union instead of casts") Acked-by: Masami Hiramatsu (Google) <mhiramat@kernel.org> Signed-off-by: Thorsten Blum <thorsten.blum@toblux.com> Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org> --- kernel/trace/trace_events_synth.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/trace/trace_events_synth.c b/kernel/trace/trace_events_synth.c index e7af286af4f1..c82b401a294d 100644 --- a/kernel/trace/trace_events_synth.c +++ b/kernel/trace/trace_events_synth.c @@ -441,8 +441,9 @@ static unsigned int trace_string(struct synth_trace_event *entry, if (is_dynamic) { union trace_synth_field *data = &entry->fields[*n_u64]; + len = fetch_store_strlen((unsigned long)str_val); data->as_dynamic.offset = struct_size(entry, fields, event->n_u64) + data_size; - data->as_dynamic.len = fetch_store_strlen((unsigned long)str_val); + data->as_dynamic.len = len; ret = fetch_store_string((unsigned long)str_val, &entry->fields[*n_u64], entry); From 8c7bfd8262319fd3f127a5380f593ea76f1b88a2 Mon Sep 17 00:00:00 2001 From: Rob Clark <robdclark@chromium.org> Date: Tue, 13 Feb 2024 09:23:40 -0800 Subject: [PATCH 201/304] drm/msm: Wire up tlb ops The brute force iommu_flush_iotlb_all() was good enough for unmap, but in some cases a map operation could require removing a table pte entry to replace with a block entry. This also requires tlb invalidation. Missing this was resulting an obscure iova fault on what should be a valid buffer address. Thanks to Robin Murphy for helping me understand the cause of the fault. Cc: Robin Murphy <robin.murphy@arm.com> Cc: stable@vger.kernel.org Fixes: b145c6e65eb0 ("drm/msm: Add support to create a local pagetable") Signed-off-by: Rob Clark <robdclark@chromium.org> Patchwork: https://patchwork.freedesktop.org/patch/578117/ --- drivers/gpu/drm/msm/msm_iommu.c | 32 +++++++++++++++++++++++++++++--- 1 file changed, 29 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/msm/msm_iommu.c b/drivers/gpu/drm/msm/msm_iommu.c index 5cc8d358cc97..d5512037c38b 100644 --- a/drivers/gpu/drm/msm/msm_iommu.c +++ b/drivers/gpu/drm/msm/msm_iommu.c @@ -21,6 +21,8 @@ struct msm_iommu_pagetable { struct msm_mmu base; struct msm_mmu *parent; struct io_pgtable_ops *pgtbl_ops; + const struct iommu_flush_ops *tlb; + struct device *iommu_dev; unsigned long pgsize_bitmap; /* Bitmap of page sizes in use */ phys_addr_t ttbr; u32 asid; @@ -201,11 +203,33 @@ static const struct msm_mmu_funcs pagetable_funcs = { static void msm_iommu_tlb_flush_all(void *cookie) { + struct msm_iommu_pagetable *pagetable = cookie; + struct adreno_smmu_priv *adreno_smmu; + + if (!pm_runtime_get_if_in_use(pagetable->iommu_dev)) + return; + + adreno_smmu = dev_get_drvdata(pagetable->parent->dev); + + pagetable->tlb->tlb_flush_all((void *)adreno_smmu->cookie); + + pm_runtime_put_autosuspend(pagetable->iommu_dev); } static void msm_iommu_tlb_flush_walk(unsigned long iova, size_t size, size_t granule, void *cookie) { + struct msm_iommu_pagetable *pagetable = cookie; + struct adreno_smmu_priv *adreno_smmu; + + if (!pm_runtime_get_if_in_use(pagetable->iommu_dev)) + return; + + adreno_smmu = dev_get_drvdata(pagetable->parent->dev); + + pagetable->tlb->tlb_flush_walk(iova, size, granule, (void *)adreno_smmu->cookie); + + pm_runtime_put_autosuspend(pagetable->iommu_dev); } static void msm_iommu_tlb_add_page(struct iommu_iotlb_gather *gather, @@ -213,7 +237,7 @@ static void msm_iommu_tlb_add_page(struct iommu_iotlb_gather *gather, { } -static const struct iommu_flush_ops null_tlb_ops = { +static const struct iommu_flush_ops tlb_ops = { .tlb_flush_all = msm_iommu_tlb_flush_all, .tlb_flush_walk = msm_iommu_tlb_flush_walk, .tlb_add_page = msm_iommu_tlb_add_page, @@ -254,10 +278,10 @@ struct msm_mmu *msm_iommu_pagetable_create(struct msm_mmu *parent) /* The incoming cfg will have the TTBR1 quirk enabled */ ttbr0_cfg.quirks &= ~IO_PGTABLE_QUIRK_ARM_TTBR1; - ttbr0_cfg.tlb = &null_tlb_ops; + ttbr0_cfg.tlb = &tlb_ops; pagetable->pgtbl_ops = alloc_io_pgtable_ops(ARM_64_LPAE_S1, - &ttbr0_cfg, iommu->domain); + &ttbr0_cfg, pagetable); if (!pagetable->pgtbl_ops) { kfree(pagetable); @@ -279,6 +303,8 @@ struct msm_mmu *msm_iommu_pagetable_create(struct msm_mmu *parent) /* Needed later for TLB flush */ pagetable->parent = parent; + pagetable->tlb = ttbr1_cfg->tlb; + pagetable->iommu_dev = ttbr1_cfg->iommu_dev; pagetable->pgsize_bitmap = ttbr0_cfg.pgsize_bitmap; pagetable->ttbr = ttbr0_cfg.arm_lpae_s1_cfg.ttbr; From 8a566f94104df87a067458351675129bb4e1ece2 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko <andriy.shevchenko@linux.intel.com> Date: Thu, 15 Feb 2024 16:22:55 +0200 Subject: [PATCH 202/304] seq_buf: Don't use "proxy" headers Update header inclusions to follow IWYU (Include What You Use) principle. Link: https://lkml.kernel.org/r/20240215142255.400264-1-andriy.shevchenko@linux.intel.com Cc: "Matthew Wilcox (Oracle)" <willy@infradead.org> Cc: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com> Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org> --- include/linux/seq_buf.h | 5 ++++- lib/seq_buf.c | 14 ++++++++++++-- 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/include/linux/seq_buf.h b/include/linux/seq_buf.h index c44f4b47b945..07b26e751060 100644 --- a/include/linux/seq_buf.h +++ b/include/linux/seq_buf.h @@ -2,7 +2,10 @@ #ifndef _LINUX_SEQ_BUF_H #define _LINUX_SEQ_BUF_H -#include <linux/fs.h> +#include <linux/bug.h> +#include <linux/minmax.h> +#include <linux/seq_file.h> +#include <linux/types.h> /* * Trace sequences are used to allow a function to call several other functions diff --git a/lib/seq_buf.c b/lib/seq_buf.c index 010c730ca7fc..dfbfdc497d85 100644 --- a/lib/seq_buf.c +++ b/lib/seq_buf.c @@ -13,9 +13,19 @@ * seq_buf_init() more than once to reset the seq_buf to start * from scratch. */ -#include <linux/uaccess.h> -#include <linux/seq_file.h> + +#include <linux/bug.h> +#include <linux/err.h> +#include <linux/export.h> +#include <linux/hex.h> +#include <linux/minmax.h> +#include <linux/printk.h> #include <linux/seq_buf.h> +#include <linux/seq_file.h> +#include <linux/sprintf.h> +#include <linux/string.h> +#include <linux/types.h> +#include <linux/uaccess.h> /** * seq_buf_can_fit - can the new data fit in the current buffer? From 6efe4d18796934b8ada66c1c446510e7f2d9b972 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko <andriy.shevchenko@linux.intel.com> Date: Thu, 15 Feb 2024 17:25:06 +0200 Subject: [PATCH 203/304] seq_buf: Fix kernel documentation There are plenty of issues with the kernel documentation here: - misspelled word "sequence" - different style of returned value descriptions - missed Return sections - unaligned style of ASCII / NUL-terminated / etc - wrong function references Fix all these. Link: https://lkml.kernel.org/r/20240215152506.598340-1-andriy.shevchenko@linux.intel.com Cc: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com> Reviewed-by: Randy Dunlap <rdunlap@infradead.org> Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org> --- include/linux/seq_buf.h | 12 ++++++------ lib/seq_buf.c | 35 ++++++++++++++++++----------------- 2 files changed, 24 insertions(+), 23 deletions(-) diff --git a/include/linux/seq_buf.h b/include/linux/seq_buf.h index 07b26e751060..fe41da005970 100644 --- a/include/linux/seq_buf.h +++ b/include/linux/seq_buf.h @@ -13,7 +13,7 @@ */ /** - * seq_buf - seq buffer structure + * struct seq_buf - seq buffer structure * @buffer: pointer to the buffer * @size: size of the buffer * @len: the amount of data inside the buffer @@ -80,10 +80,10 @@ static inline unsigned int seq_buf_used(struct seq_buf *s) } /** - * seq_buf_str - get %NUL-terminated C string from seq_buf + * seq_buf_str - get NUL-terminated C string from seq_buf * @s: the seq_buf handle * - * This makes sure that the buffer in @s is nul terminated and + * This makes sure that the buffer in @s is NUL-terminated and * safe to read as a string. * * Note, if this is called when the buffer has overflowed, then @@ -93,7 +93,7 @@ static inline unsigned int seq_buf_used(struct seq_buf *s) * After this function is called, s->buffer is safe to use * in string operations. * - * Returns @s->buf after making sure it is terminated. + * Returns: @s->buf after making sure it is terminated. */ static inline const char *seq_buf_str(struct seq_buf *s) { @@ -113,7 +113,7 @@ static inline const char *seq_buf_str(struct seq_buf *s) * @s: the seq_buf handle * @bufp: the beginning of the buffer is stored here * - * Return the number of bytes available in the buffer, or zero if + * Returns: the number of bytes available in the buffer, or zero if * there's no space. */ static inline size_t seq_buf_get_buf(struct seq_buf *s, char **bufp) @@ -135,7 +135,7 @@ static inline size_t seq_buf_get_buf(struct seq_buf *s, char **bufp) * @num: the number of bytes to commit * * Commit @num bytes of data written to a buffer previously acquired - * by seq_buf_get. To signal an error condition, or that the data + * by seq_buf_get_buf(). To signal an error condition, or that the data * didn't fit in the available space, pass a negative @num value. */ static inline void seq_buf_commit(struct seq_buf *s, int num) diff --git a/lib/seq_buf.c b/lib/seq_buf.c index dfbfdc497d85..f3f3436d60a9 100644 --- a/lib/seq_buf.c +++ b/lib/seq_buf.c @@ -32,7 +32,7 @@ * @s: the seq_buf descriptor * @len: The length to see if it can fit in the current buffer * - * Returns true if there's enough unused space in the seq_buf buffer + * Returns: true if there's enough unused space in the seq_buf buffer * to fit the amount of new data according to @len. */ static bool seq_buf_can_fit(struct seq_buf *s, size_t len) @@ -45,7 +45,7 @@ static bool seq_buf_can_fit(struct seq_buf *s, size_t len) * @m: the seq_file descriptor that is the destination * @s: the seq_buf descriptor that is the source. * - * Returns zero on success, non zero otherwise + * Returns: zero on success, non-zero otherwise. */ int seq_buf_print_seq(struct seq_file *m, struct seq_buf *s) { @@ -60,9 +60,9 @@ int seq_buf_print_seq(struct seq_file *m, struct seq_buf *s) * @fmt: printf format string * @args: va_list of arguments from a printf() type function * - * Writes a vnprintf() format into the sequencce buffer. + * Writes a vnprintf() format into the sequence buffer. * - * Returns zero on success, -1 on overflow. + * Returns: zero on success, -1 on overflow. */ int seq_buf_vprintf(struct seq_buf *s, const char *fmt, va_list args) { @@ -88,7 +88,7 @@ int seq_buf_vprintf(struct seq_buf *s, const char *fmt, va_list args) * * Writes a printf() format into the sequence buffer. * - * Returns zero on success, -1 on overflow. + * Returns: zero on success, -1 on overflow. */ int seq_buf_printf(struct seq_buf *s, const char *fmt, ...) { @@ -104,12 +104,12 @@ int seq_buf_printf(struct seq_buf *s, const char *fmt, ...) EXPORT_SYMBOL_GPL(seq_buf_printf); /** - * seq_buf_do_printk - printk seq_buf line by line + * seq_buf_do_printk - printk() seq_buf line by line * @s: seq_buf descriptor * @lvl: printk level * * printk()-s a multi-line sequential buffer line by line. The function - * makes sure that the buffer in @s is nul terminated and safe to read + * makes sure that the buffer in @s is NUL-terminated and safe to read * as a string. */ void seq_buf_do_printk(struct seq_buf *s, const char *lvl) @@ -149,7 +149,7 @@ EXPORT_SYMBOL_GPL(seq_buf_do_printk); * This function will take the format and the binary array and finish * the conversion into the ASCII string within the buffer. * - * Returns zero on success, -1 on overflow. + * Returns: zero on success, -1 on overflow. */ int seq_buf_bprintf(struct seq_buf *s, const char *fmt, const u32 *binary) { @@ -177,7 +177,7 @@ int seq_buf_bprintf(struct seq_buf *s, const char *fmt, const u32 *binary) * * Copy a simple string into the sequence buffer. * - * Returns zero on success, -1 on overflow + * Returns: zero on success, -1 on overflow. */ int seq_buf_puts(struct seq_buf *s, const char *str) { @@ -206,7 +206,7 @@ EXPORT_SYMBOL_GPL(seq_buf_puts); * * Copy a single character into the sequence buffer. * - * Returns zero on success, -1 on overflow + * Returns: zero on success, -1 on overflow. */ int seq_buf_putc(struct seq_buf *s, unsigned char c) { @@ -222,7 +222,7 @@ int seq_buf_putc(struct seq_buf *s, unsigned char c) EXPORT_SYMBOL_GPL(seq_buf_putc); /** - * seq_buf_putmem - write raw data into the sequenc buffer + * seq_buf_putmem - write raw data into the sequence buffer * @s: seq_buf descriptor * @mem: The raw memory to copy into the buffer * @len: The length of the raw memory to copy (in bytes) @@ -231,7 +231,7 @@ EXPORT_SYMBOL_GPL(seq_buf_putc); * buffer and a strcpy() would not work. Using this function allows * for such cases. * - * Returns zero on success, -1 on overflow + * Returns: zero on success, -1 on overflow. */ int seq_buf_putmem(struct seq_buf *s, const void *mem, unsigned int len) { @@ -259,7 +259,7 @@ int seq_buf_putmem(struct seq_buf *s, const void *mem, unsigned int len) * raw memory into the buffer it writes its ASCII representation of it * in hex characters. * - * Returns zero on success, -1 on overflow + * Returns: zero on success, -1 on overflow. */ int seq_buf_putmem_hex(struct seq_buf *s, const void *mem, unsigned int len) @@ -307,7 +307,7 @@ int seq_buf_putmem_hex(struct seq_buf *s, const void *mem, * * Write a path name into the sequence buffer. * - * Returns the number of written bytes on success, -1 on overflow + * Returns: the number of written bytes on success, -1 on overflow. */ int seq_buf_path(struct seq_buf *s, const struct path *path, const char *esc) { @@ -342,6 +342,7 @@ int seq_buf_path(struct seq_buf *s, const struct path *path, const char *esc) * or until it reaches the end of the content in the buffer (@s->len), * whichever comes first. * + * Returns: * On success, it returns a positive number of the number of bytes * it copied. * @@ -392,11 +393,11 @@ int seq_buf_to_user(struct seq_buf *s, char __user *ubuf, size_t start, int cnt) * linebuf size is maximal length for one line. * 32 * 3 - maximum bytes per line, each printed into 2 chars + 1 for * separating space - * 2 - spaces separating hex dump and ascii representation - * 32 - ascii representation + * 2 - spaces separating hex dump and ASCII representation + * 32 - ASCII representation * 1 - terminating '\0' * - * Returns zero on success, -1 on overflow + * Returns: zero on success, -1 on overflow. */ int seq_buf_hex_dump(struct seq_buf *s, const char *prefix_str, int prefix_type, int rowsize, int groupsize, From d16df040c8dad25c962b4404d2d534bfea327c6a Mon Sep 17 00:00:00 2001 From: Hamza Mahfooz <hamza.mahfooz@amd.com> Date: Thu, 8 Feb 2024 16:23:29 -0500 Subject: [PATCH 204/304] drm/amdgpu: make damage clips support configurable We have observed that there are quite a number of PSR-SU panels on the market that are unable to keep up with what user space throws at them, resulting in hangs and random black screens. So, make damage clips support configurable and disable it by default for PSR-SU displays. Cc: stable@vger.kernel.org Reviewed-by: Mario Limonciello <mario.limonciello@amd.com> Signed-off-by: Hamza Mahfooz <hamza.mahfooz@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 13 +++++++++++++ drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 7 +++++++ 3 files changed, 21 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 6dce81a061ab..517117a0796f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -200,6 +200,7 @@ extern uint amdgpu_dc_debug_mask; extern uint amdgpu_dc_visual_confirm; extern uint amdgpu_dm_abm_level; extern int amdgpu_backlight; +extern int amdgpu_damage_clips; extern struct amdgpu_mgpu_info mgpu_info; extern int amdgpu_ras_enable; extern uint amdgpu_ras_mask; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 211501ea9169..586f4d03039d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -211,6 +211,7 @@ int amdgpu_seamless = -1; /* auto */ uint amdgpu_debug_mask; int amdgpu_agp = -1; /* auto */ int amdgpu_wbrf = -1; +int amdgpu_damage_clips = -1; /* auto */ static void amdgpu_drv_delayed_reset_work_handler(struct work_struct *work); @@ -859,6 +860,18 @@ int amdgpu_backlight = -1; MODULE_PARM_DESC(backlight, "Backlight control (0 = pwm, 1 = aux, -1 auto (default))"); module_param_named(backlight, amdgpu_backlight, bint, 0444); +/** + * DOC: damageclips (int) + * Enable or disable damage clips support. If damage clips support is disabled, + * we will force full frame updates, irrespective of what user space sends to + * us. + * + * Defaults to -1 (where it is enabled unless a PSR-SU display is detected). + */ +MODULE_PARM_DESC(damageclips, + "Damage clips support (0 = disable, 1 = enable, -1 auto (default))"); +module_param_named(damageclips, amdgpu_damage_clips, int, 0444); + /** * DOC: tmz (int) * Trusted Memory Zone (TMZ) is a method to protect data being written diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 59d2eee72a32..d5ef07af9906 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -5219,6 +5219,7 @@ static void fill_dc_dirty_rects(struct drm_plane *plane, struct drm_plane_state *new_plane_state, struct drm_crtc_state *crtc_state, struct dc_flip_addrs *flip_addrs, + bool is_psr_su, bool *dirty_regions_changed) { struct dm_crtc_state *dm_crtc_state = to_dm_crtc_state(crtc_state); @@ -5243,6 +5244,10 @@ static void fill_dc_dirty_rects(struct drm_plane *plane, num_clips = drm_plane_get_damage_clips_count(new_plane_state); clips = drm_plane_get_damage_clips(new_plane_state); + if (num_clips && (!amdgpu_damage_clips || (amdgpu_damage_clips < 0 && + is_psr_su))) + goto ffu; + if (!dm_crtc_state->mpo_requested) { if (!num_clips || num_clips > DC_MAX_DIRTY_RECTS) goto ffu; @@ -8298,6 +8303,8 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state, fill_dc_dirty_rects(plane, old_plane_state, new_plane_state, new_crtc_state, &bundle->flip_addrs[planes_count], + acrtc_state->stream->link->psr_settings.psr_version == + DC_PSR_VERSION_SU_1, &dirty_rects_changed); /* From a0c9956a8d5a808c173028f1e388377a890a2fdb Mon Sep 17 00:00:00 2001 From: Kent Russell <kent.russell@amd.com> Date: Tue, 6 Feb 2024 12:45:44 -0500 Subject: [PATCH 205/304] drm/amdkfd: Fix L2 cache size reporting in GFX9.4.3 Its currently incorrectly multiplied by number of XCCs in the partition Fixes: be457b2252b6 ("drm/amdkfd: Update cache info for GFX 9.4.3") Signed-off-by: Kent Russell <kent.russell@amd.com> Reviewed-by: Mukul Joshi <mukul.joshi@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index e5f7c92eebcb..6ed2ec381aaa 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -1638,12 +1638,10 @@ static int fill_in_l2_l3_pcache(struct kfd_cache_properties **props_ext, else mode = UNKNOWN_MEMORY_PARTITION_MODE; - if (pcache->cache_level == 2) - pcache->cache_size = pcache_info[cache_type].cache_size * num_xcc; - else if (mode) - pcache->cache_size = pcache_info[cache_type].cache_size / mode; - else - pcache->cache_size = pcache_info[cache_type].cache_size; + pcache->cache_size = pcache_info[cache_type].cache_size; + /* Partition mode only affects L3 cache size */ + if (mode && pcache->cache_level == 3) + pcache->cache_size /= mode; if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_DATA_CACHE) pcache->cache_type |= HSA_CACHE_TYPE_DATA; From 17ba9cde11c2bfebbd70867b0a2ac4a22e573379 Mon Sep 17 00:00:00 2001 From: Dan Carpenter <dan.carpenter@linaro.org> Date: Fri, 9 Feb 2024 16:02:42 +0300 Subject: [PATCH 206/304] drm/amd/display: Fix && vs || typos These ANDs should be ORs or it will lead to a NULL dereference. Fixes: fb5a3d037082 ("drm/amd/display: Add NULL test for 'timing generator' in 'dcn21_set_pipe()'") Fixes: 886571d217d7 ("drm/amd/display: Fix 'panel_cntl' could be null in 'dcn21_set_backlight_level()'") Reviewed-by: Anthony Koo <anthony.koo@amd.com> Signed-off-by: Dan Carpenter <dan.carpenter@linaro.org> Signed-off-by: Hamza Mahfooz <hamza.mahfooz@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_hwseq.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_hwseq.c index 5c7f380a84f9..7252f5f781f0 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_hwseq.c @@ -211,7 +211,7 @@ void dcn21_set_pipe(struct pipe_ctx *pipe_ctx) struct dmcu *dmcu = pipe_ctx->stream->ctx->dc->res_pool->dmcu; uint32_t otg_inst; - if (!abm && !tg && !panel_cntl) + if (!abm || !tg || !panel_cntl) return; otg_inst = tg->inst; @@ -245,7 +245,7 @@ bool dcn21_set_backlight_level(struct pipe_ctx *pipe_ctx, struct panel_cntl *panel_cntl = pipe_ctx->stream->link->panel_cntl; uint32_t otg_inst; - if (!abm && !tg && !panel_cntl) + if (!abm || !tg || !panel_cntl) return false; otg_inst = tg->inst; From 7edb5830ecb0033184ee2fa01ae8af17d56450ec Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam <srinivasan.shanmugam@amd.com> Date: Mon, 5 Feb 2024 16:07:36 +0530 Subject: [PATCH 207/304] drm/amd/display: Initialize 'wait_time_microsec' variable in link_dp_training_dpia.c wait_time_microsec = max(wait_time_microsec, (uint32_t) DPIA_CLK_SYNC_DELAY); Above line is trying to assign the maximum value between 'wait_time_microsec' and 'DPIA_CLK_SYNC_DELAY' to wait_time_microsec. However, 'wait_time_microsec' has not been assigned a value before this line, initialize 'wait_time_microsec' at the point of declaration. Fixes the below: drivers/gpu/drm/amd/amdgpu/../display/dc/link/protocols/link_dp_training_dpia.c:697 dpia_training_eq_non_transparent() error: uninitialized symbol 'wait_time_microsec'. Fixes: 630168a97314 ("drm/amd/display: move dp link training logic to link_dp_training") Cc: Wenjing Liu <wenjing.liu@amd.com> Cc: Rodrigo Siqueira <Rodrigo.Siqueira@amd.com> Cc: Aurabindo Pillai <aurabindo.pillai@amd.com> Signed-off-by: Srinivasan Shanmugam <srinivasan.shanmugam@amd.com> Reviewed-by: Roman Li <roman.li@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- .../drm/amd/display/dc/link/protocols/link_dp_training_dpia.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_dpia.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_dpia.c index e8dda44b23cb..5d36bab0029c 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_dpia.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training_dpia.c @@ -619,7 +619,7 @@ static enum link_training_result dpia_training_eq_non_transparent( uint32_t retries_eq = 0; enum dc_status status; enum dc_dp_training_pattern tr_pattern; - uint32_t wait_time_microsec; + uint32_t wait_time_microsec = 0; enum dc_lane_count lane_count = lt_settings->link_settings.lane_count; union lane_align_status_updated dpcd_lane_status_updated = {0}; union lane_status dpcd_lane_status[LANE_COUNT_DP_MAX] = {0}; From 88c6d84dd8f70e498f89972449e6ebb7aa1309c0 Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam <srinivasan.shanmugam@amd.com> Date: Mon, 5 Feb 2024 15:07:02 +0530 Subject: [PATCH 208/304] drm/amd/display: Fix possible use of uninitialized 'max_chunks_fbc_mode' in 'calculate_bandwidth()' 'max_chunks_fbc_mode' is only declared and assigned a value under a specific condition in the following lines: if (data->fbc_en[i] == 1) { max_chunks_fbc_mode = 128 - dmif_chunk_buff_margin; } If 'data->fbc_en[i]' is not equal to 1 for any i, max_chunks_fbc_mode will not be initialized if it's used outside of this for loop. Ensure that 'max_chunks_fbc_mode' is properly initialized before it's used. Initialize it to a default value right after its declaration to ensure that it gets a value assigned under all possible control flow paths. Thus fixing the below: drivers/gpu/drm/amd/amdgpu/../display/dc/basics/dce_calcs.c:914 calculate_bandwidth() error: uninitialized symbol 'max_chunks_fbc_mode'. drivers/gpu/drm/amd/amdgpu/../display/dc/basics/dce_calcs.c:917 calculate_bandwidth() error: uninitialized symbol 'max_chunks_fbc_mode'. Fixes: 4562236b3bc0 ("drm/amd/dc: Add dc display driver (v2)") Cc: Harry Wentland <harry.wentland@amd.com> Cc: Alex Deucher <alexander.deucher@amd.com> Cc: Rodrigo Siqueira <Rodrigo.Siqueira@amd.com> Cc: Aurabindo Pillai <aurabindo.pillai@amd.com> Signed-off-by: Srinivasan Shanmugam <srinivasan.shanmugam@amd.com> Reviewed-by: Roman Li <roman.li@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/display/dc/basics/dce_calcs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/basics/dce_calcs.c b/drivers/gpu/drm/amd/display/dc/basics/dce_calcs.c index f2dfa96f9ef5..39530b2ea495 100644 --- a/drivers/gpu/drm/amd/display/dc/basics/dce_calcs.c +++ b/drivers/gpu/drm/amd/display/dc/basics/dce_calcs.c @@ -94,7 +94,7 @@ static void calculate_bandwidth( const uint32_t s_high = 7; const uint32_t dmif_chunk_buff_margin = 1; - uint32_t max_chunks_fbc_mode; + uint32_t max_chunks_fbc_mode = 0; int32_t num_cursor_lines; int32_t i, j, k; From ccc514b7e7acbd301219cbaec0fc0bfe5741acee Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam <srinivasan.shanmugam@amd.com> Date: Mon, 5 Feb 2024 16:54:10 +0530 Subject: [PATCH 209/304] drm/amd/display: Fix possible buffer overflow in 'find_dcfclk_for_voltage()' when 'find_dcfclk_for_voltage()' function is looping over VG_NUM_SOC_VOLTAGE_LEVELS (which is 8), but the size of the DcfClocks array is VG_NUM_DCFCLK_DPM_LEVELS (which is 7). When the loop variable i reaches 7, the function tries to access clock_table->DcfClocks[7]. However, since the size of the DcfClocks array is 7, the valid indices are 0 to 6. Index 7 is beyond the size of the array, leading to a buffer overflow. Reported by smatch & thus fixing the below: drivers/gpu/drm/amd/amdgpu/../display/dc/clk_mgr/dcn301/vg_clk_mgr.c:550 find_dcfclk_for_voltage() error: buffer overflow 'clock_table->DcfClocks' 7 <= 7 Fixes: 3a83e4e64bb1 ("drm/amd/display: Add dcn3.01 support to DC (v2)") Cc: Roman Li <Roman.Li@amd.com> Cc: Rodrigo Siqueira <Rodrigo.Siqueira@amd.com> Cc: Aurabindo Pillai <aurabindo.pillai@amd.com> Signed-off-by: Srinivasan Shanmugam <srinivasan.shanmugam@amd.com> Reviewed-by: Roman Li <roman.li@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/vg_clk_mgr.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/vg_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/vg_clk_mgr.c index a5489fe6875f..aa9fd1dc550a 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/vg_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/vg_clk_mgr.c @@ -546,6 +546,8 @@ static unsigned int find_dcfclk_for_voltage(const struct vg_dpm_clocks *clock_ta int i; for (i = 0; i < VG_NUM_SOC_VOLTAGE_LEVELS; i++) { + if (i >= VG_NUM_DCFCLK_DPM_LEVELS) + break; if (clock_table->SocVoltage[i] == voltage) return clock_table->DcfClocks[i]; } From 3a9626c816db901def438dc2513622e281186d39 Mon Sep 17 00:00:00 2001 From: Mario Limonciello <mario.limonciello@amd.com> Date: Wed, 7 Feb 2024 23:52:55 -0600 Subject: [PATCH 210/304] drm/amd: Stop evicting resources on APUs in suspend MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 5095d5418193 ("drm/amd: Evict resources during PM ops prepare() callback") intentionally moved the eviction of resources to earlier in the suspend process, but this introduced a subtle change that it occurs before adev->in_s0ix or adev->in_s3 are set. This meant that APUs actually started to evict resources at suspend time as well. Explicitly set s0ix or s3 in the prepare() stage, and unset them if the prepare() stage failed. v2: squash in warning fix from Stephen Rothwell Reported-by: Jürg Billeter <j@bitron.ch> Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3132#note_2271038 Fixes: 5095d5418193 ("drm/amd: Evict resources during PM ops prepare() callback") Acked-by: Alex Deucher <alexander.deucher@amd.com> Signed-off-by: Mario Limonciello <mario.limonciello@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c | 15 +++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 11 +++++++++-- 3 files changed, 26 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 517117a0796f..79827a6dcd7f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1550,9 +1550,11 @@ static inline int amdgpu_acpi_smart_shift_update(struct drm_device *dev, #if defined(CONFIG_ACPI) && defined(CONFIG_SUSPEND) bool amdgpu_acpi_is_s3_active(struct amdgpu_device *adev); bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device *adev); +void amdgpu_choose_low_power_state(struct amdgpu_device *adev); #else static inline bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device *adev) { return false; } static inline bool amdgpu_acpi_is_s3_active(struct amdgpu_device *adev) { return false; } +static inline void amdgpu_choose_low_power_state(struct amdgpu_device *adev) { } #endif #if defined(CONFIG_DRM_AMD_DC) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c index 2deebece810e..cc21ed67a330 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c @@ -1519,4 +1519,19 @@ bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device *adev) #endif /* CONFIG_AMD_PMC */ } +/** + * amdgpu_choose_low_power_state + * + * @adev: amdgpu_device_pointer + * + * Choose the target low power state for the GPU + */ +void amdgpu_choose_low_power_state(struct amdgpu_device *adev) +{ + if (amdgpu_acpi_is_s0ix_active(adev)) + adev->in_s0ix = true; + else if (amdgpu_acpi_is_s3_active(adev)) + adev->in_s3 = true; +} + #endif /* CONFIG_SUSPEND */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index fdde7488d0ed..d4b3d044935c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -4514,13 +4514,15 @@ int amdgpu_device_prepare(struct drm_device *dev) struct amdgpu_device *adev = drm_to_adev(dev); int i, r; + amdgpu_choose_low_power_state(adev); + if (dev->switch_power_state == DRM_SWITCH_POWER_OFF) return 0; /* Evict the majority of BOs before starting suspend sequence */ r = amdgpu_device_evict_resources(adev); if (r) - return r; + goto unprepare; for (i = 0; i < adev->num_ip_blocks; i++) { if (!adev->ip_blocks[i].status.valid) @@ -4529,10 +4531,15 @@ int amdgpu_device_prepare(struct drm_device *dev) continue; r = adev->ip_blocks[i].version->funcs->prepare_suspend((void *)adev); if (r) - return r; + goto unprepare; } return 0; + +unprepare: + adev->in_s0ix = adev->in_s3 = false; + + return r; } /** From 916361685319098f696b798ef1560f69ed96e934 Mon Sep 17 00:00:00 2001 From: Mario Limonciello <mario.limonciello@amd.com> Date: Wed, 7 Feb 2024 23:52:54 -0600 Subject: [PATCH 211/304] Revert "drm/amd: flush any delayed gfxoff on suspend entry" commit ab4750332dbe ("drm/amdgpu/sdma5.2: add begin/end_use ring callbacks") caused GFXOFF control to be used more heavily and the codepath that was removed from commit 0dee72639533 ("drm/amd: flush any delayed gfxoff on suspend entry") now can be exercised at suspend again. Users report that by using GNOME to suspend the lockscreen trigger will cause SDMA traffic and the system can deadlock. This reverts commit 0dee726395333fea833eaaf838bc80962df886c8. Acked-by: Alex Deucher <alexander.deucher@amd.com> Fixes: ab4750332dbe ("drm/amdgpu/sdma5.2: add begin/end_use ring callbacks") Signed-off-by: Mario Limonciello <mario.limonciello@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 1 - drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 9 ++++++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index d4b3d044935c..94bdb5fa6ebc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -4576,7 +4576,6 @@ int amdgpu_device_suspend(struct drm_device *dev, bool fbcon) drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, true); cancel_delayed_work_sync(&adev->delayed_init_work); - flush_delayed_work(&adev->gfx.gfx_off_delay_work); amdgpu_ras_suspend(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index b9674c57c436..6ddc8e3360e2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -723,8 +723,15 @@ void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable) if (adev->gfx.gfx_off_req_count == 0 && !adev->gfx.gfx_off_state) { - schedule_delayed_work(&adev->gfx.gfx_off_delay_work, + /* If going to s2idle, no need to wait */ + if (adev->in_s0ix) { + if (!amdgpu_dpm_set_powergating_by_smu(adev, + AMD_IP_BLOCK_TYPE_GFX, true)) + adev->gfx.gfx_off_state = true; + } else { + schedule_delayed_work(&adev->gfx.gfx_off_delay_work, delay); + } } } else { if (adev->gfx.gfx_off_req_count == 0) { From 94b38b895dec8c0ef093140a141e191b60ff614c Mon Sep 17 00:00:00 2001 From: Zhikai Zhai <zhikai.zhai@amd.com> Date: Mon, 29 Jan 2024 17:02:18 +0800 Subject: [PATCH 212/304] drm/amd/display: Add align done check [WHY] We Double-check link status if training successful, but miss the lane align status. [HOW] Add the lane align status check Cc: Mario Limonciello <mario.limonciello@amd.com> Cc: Alex Deucher <alexander.deucher@amd.com> Cc: stable@vger.kernel.org Reviewed-by: Wenjing Liu <wenjing.liu@amd.com> Acked-by: Aurabindo Pillai <aurabindo.pillai@amd.com> Signed-off-by: Zhikai Zhai <zhikai.zhai@amd.com> Tested-by: Daniel Wheeler <daniel.wheeler@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- .../gpu/drm/amd/display/dc/link/protocols/link_dp_training.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.c index 5a0b04518956..16a62e018712 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.c @@ -517,6 +517,7 @@ enum link_training_result dp_check_link_loss_status( { enum link_training_result status = LINK_TRAINING_SUCCESS; union lane_status lane_status; + union lane_align_status_updated dpcd_lane_status_updated; uint8_t dpcd_buf[6] = {0}; uint32_t lane; @@ -532,10 +533,12 @@ enum link_training_result dp_check_link_loss_status( * check lanes status */ lane_status.raw = dp_get_nibble_at_index(&dpcd_buf[2], lane); + dpcd_lane_status_updated.raw = dpcd_buf[4]; if (!lane_status.bits.CHANNEL_EQ_DONE_0 || !lane_status.bits.CR_DONE_0 || - !lane_status.bits.SYMBOL_LOCKED_0) { + !lane_status.bits.SYMBOL_LOCKED_0 || + !dp_is_interlane_aligned(dpcd_lane_status_updated)) { /* if one of the channel equalization, clock * recovery or symbol lock is dropped * consider it as (link has been From a538dabf772c169641e151834e161e241802ab33 Mon Sep 17 00:00:00 2001 From: Sohaib Nadeem <sohaib.nadeem@amd.com> Date: Mon, 29 Jan 2024 17:33:40 -0500 Subject: [PATCH 213/304] Revert "drm/amd/display: increased min_dcfclk_mhz and min_fclk_mhz" [why]: This reverts commit 2ff33c759a4247c84ec0b7815f1f223e155ba82a. The commit caused corruption when running some applications in fullscreen Cc: Mario Limonciello <mario.limonciello@amd.com> Cc: Alex Deucher <alexander.deucher@amd.com> Cc: stable@vger.kernel.org Reviewed-by: Alvin Lee <alvin.lee2@amd.com> Acked-by: Aurabindo Pillai <aurabindo.pillai@amd.com> Signed-off-by: Sohaib Nadeem <sohaib.nadeem@amd.com> Tested-by: Daniel Wheeler <daniel.wheeler@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c index ba76dd4a2ce2..a0a65e099104 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c @@ -2760,7 +2760,7 @@ static int build_synthetic_soc_states(bool disable_dc_mode_overwrite, struct clk struct _vcs_dpi_voltage_scaling_st entry = {0}; struct clk_limit_table_entry max_clk_data = {0}; - unsigned int min_dcfclk_mhz = 399, min_fclk_mhz = 599; + unsigned int min_dcfclk_mhz = 199, min_fclk_mhz = 299; static const unsigned int num_dcfclk_stas = 5; unsigned int dcfclk_sta_targets[DC__VOLTAGE_STATES] = {199, 615, 906, 1324, 1564}; From a589fa17cc4456df75f16fa3b49e8da0112e5100 Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam <srinivasan.shanmugam@amd.com> Date: Tue, 6 Feb 2024 09:34:25 +0530 Subject: [PATCH 214/304] drm/amd/display: Fix possible NULL dereference on device remove/driver unload As part of a cleanup amdgpu_dm_fini() function, which is typically called when a device is being shut down or a driver is being unloaded The below error message suggests that there is a potential null pointer dereference issue with adev->dm.dc. In the below, line of code where adev->dm.dc is used without a preceding null check: for (i = 0; i < adev->dm.dc->caps.max_links; i++) { To fix this issue, add a null check for adev->dm.dc before this line. Reported by smatch: drivers/gpu/drm/amd/amdgpu/../display/amdgpu_dm/amdgpu_dm.c:1959 amdgpu_dm_fini() error: we previously assumed 'adev->dm.dc' could be null (see line 1943) Fixes: 006c26a0f1c8 ("drm/amd/display: Fix crash on device remove/driver unload") Cc: Andrey Grodzovsky <andrey.grodzovsky@amd.com> Cc: Harry Wentland <harry.wentland@amd.com> Cc: Rodrigo Siqueira <Rodrigo.Siqueira@amd.com> Cc: Aurabindo Pillai <aurabindo.pillai@amd.com> Signed-off-by: Srinivasan Shanmugam <srinivasan.shanmugam@amd.com> Reviewed-by: Roman Li <roman.li@amd.com> Reviewed-by: Aurabindo Pillai <aurabindo.pillai@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index d5ef07af9906..e0f121b221f5 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -1956,7 +1956,7 @@ static void amdgpu_dm_fini(struct amdgpu_device *adev) &adev->dm.dmub_bo_gpu_addr, &adev->dm.dmub_bo_cpu_addr); - if (adev->dm.hpd_rx_offload_wq) { + if (adev->dm.hpd_rx_offload_wq && adev->dm.dc) { for (i = 0; i < adev->dm.dc->caps.max_links; i++) { if (adev->dm.hpd_rx_offload_wq[i].wq) { destroy_workqueue(adev->dm.hpd_rx_offload_wq[i].wq); From deb110292180cd501f6fde2a0178d65fcbcabb0c Mon Sep 17 00:00:00 2001 From: Tom Chung <chiahsuan.chung@amd.com> Date: Tue, 30 Jan 2024 15:34:08 +0800 Subject: [PATCH 215/304] drm/amd/display: Preserve original aspect ratio in create stream [Why] The original picture aspect ratio in mode struct may have chance be overwritten with wrong aspect ratio data in create_stream_for_sink(). It will create a different VIC output and cause HDMI compliance test failed. [How] Preserve the original picture aspect ratio data during create the stream. Cc: Mario Limonciello <mario.limonciello@amd.com> Cc: Alex Deucher <alexander.deucher@amd.com> Cc: stable@vger.kernel.org Reviewed-by: Aurabindo Pillai <aurabindo.pillai@amd.com> Signed-off-by: Tom Chung <chiahsuan.chung@amd.com> Tested-by: Daniel Wheeler <daniel.wheeler@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index e0f121b221f5..cf875751971f 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -6199,7 +6199,9 @@ create_stream_for_sink(struct drm_connector *connector, if (recalculate_timing) { freesync_mode = get_highest_refresh_rate_mode(aconnector, false); drm_mode_copy(&saved_mode, &mode); + saved_mode.picture_aspect_ratio = mode.picture_aspect_ratio; drm_mode_copy(&mode, freesync_mode); + mode.picture_aspect_ratio = saved_mode.picture_aspect_ratio; } else { decide_crtc_timing_for_drm_display_mode( &mode, preferred_mode, scale); From 46806e59a87790760870d216f54951a5b4d545bc Mon Sep 17 00:00:00 2001 From: Roman Li <roman.li@amd.com> Date: Tue, 30 Jan 2024 18:07:24 -0500 Subject: [PATCH 216/304] drm/amd/display: Fix array-index-out-of-bounds in dcn35_clkmgr [Why] There is a potential memory access violation while iterating through array of dcn35 clks. [How] Limit iteration per array size. Cc: Mario Limonciello <mario.limonciello@amd.com> Cc: Alex Deucher <alexander.deucher@amd.com> Cc: stable@vger.kernel.org Reviewed-by: Nicholas Kazlauskas <nicholas.kazlauskas@amd.com> Acked-by: Aurabindo Pillai <aurabindo.pillai@amd.com> Signed-off-by: Roman Li <roman.li@amd.com> Tested-by: Daniel Wheeler <daniel.wheeler@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- .../amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c index 14cec1c7b718..e64890259235 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c @@ -655,10 +655,13 @@ static void dcn35_clk_mgr_helper_populate_bw_params(struct clk_mgr_internal *clk struct clk_limit_table_entry def_max = bw_params->clk_table.entries[bw_params->clk_table.num_entries - 1]; uint32_t max_fclk = 0, min_pstate = 0, max_dispclk = 0, max_dppclk = 0; uint32_t max_pstate = 0, max_dram_speed_mts = 0, min_dram_speed_mts = 0; + uint32_t num_memps, num_fclk, num_dcfclk; int i; /* Determine min/max p-state values. */ - for (i = 0; i < clock_table->NumMemPstatesEnabled; i++) { + num_memps = (clock_table->NumMemPstatesEnabled > NUM_MEM_PSTATE_LEVELS) ? NUM_MEM_PSTATE_LEVELS : + clock_table->NumMemPstatesEnabled; + for (i = 0; i < num_memps; i++) { uint32_t dram_speed_mts = calc_dram_speed_mts(&clock_table->MemPstateTable[i]); if (is_valid_clock_value(dram_speed_mts) && dram_speed_mts > max_dram_speed_mts) { @@ -670,7 +673,7 @@ static void dcn35_clk_mgr_helper_populate_bw_params(struct clk_mgr_internal *clk min_dram_speed_mts = max_dram_speed_mts; min_pstate = max_pstate; - for (i = 0; i < clock_table->NumMemPstatesEnabled; i++) { + for (i = 0; i < num_memps; i++) { uint32_t dram_speed_mts = calc_dram_speed_mts(&clock_table->MemPstateTable[i]); if (is_valid_clock_value(dram_speed_mts) && dram_speed_mts < min_dram_speed_mts) { @@ -699,9 +702,13 @@ static void dcn35_clk_mgr_helper_populate_bw_params(struct clk_mgr_internal *clk /* Base the clock table on dcfclk, need at least one entry regardless of pmfw table */ ASSERT(clock_table->NumDcfClkLevelsEnabled > 0); - max_fclk = find_max_clk_value(clock_table->FclkClocks_Freq, clock_table->NumFclkLevelsEnabled); + num_fclk = (clock_table->NumFclkLevelsEnabled > NUM_FCLK_DPM_LEVELS) ? NUM_FCLK_DPM_LEVELS : + clock_table->NumFclkLevelsEnabled; + max_fclk = find_max_clk_value(clock_table->FclkClocks_Freq, num_fclk); - for (i = 0; i < clock_table->NumDcfClkLevelsEnabled; i++) { + num_dcfclk = (clock_table->NumFclkLevelsEnabled > NUM_DCFCLK_DPM_LEVELS) ? NUM_DCFCLK_DPM_LEVELS : + clock_table->NumDcfClkLevelsEnabled; + for (i = 0; i < num_dcfclk; i++) { int j; /* First search defaults for the clocks we don't read using closest lower or equal default dcfclk */ From 0484e05d048b66d01d1f3c1d2306010bb57d8738 Mon Sep 17 00:00:00 2001 From: Sohaib Nadeem <sohaib.nadeem@amd.com> Date: Wed, 31 Jan 2024 16:40:37 -0500 Subject: [PATCH 217/304] drm/amd/display: fixed integer types and null check locations [why]: issues fixed: - comparison with wider integer type in loop condition which can cause infinite loops - pointer dereference before null check Cc: Mario Limonciello <mario.limonciello@amd.com> Cc: Alex Deucher <alexander.deucher@amd.com> Cc: stable@vger.kernel.org Reviewed-by: Josip Pavic <josip.pavic@amd.com> Acked-by: Aurabindo Pillai <aurabindo.pillai@amd.com> Signed-off-by: Sohaib Nadeem <sohaib.nadeem@amd.com> Tested-by: Daniel Wheeler <daniel.wheeler@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- .../gpu/drm/amd/display/dc/bios/bios_parser2.c | 16 ++++++++++------ .../drm/amd/display/dc/link/link_validation.c | 2 +- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c index 960c4b4f6ddf..05f392501c0a 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c +++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c @@ -1850,19 +1850,21 @@ static enum bp_result get_firmware_info_v3_2( /* Vega12 */ smu_info_v3_2 = GET_IMAGE(struct atom_smu_info_v3_2, DATA_TABLES(smu_info)); - DC_LOG_BIOS("gpuclk_ss_percentage (unit of 0.001 percent): %d\n", smu_info_v3_2->gpuclk_ss_percentage); if (!smu_info_v3_2) return BP_RESULT_BADBIOSTABLE; + DC_LOG_BIOS("gpuclk_ss_percentage (unit of 0.001 percent): %d\n", smu_info_v3_2->gpuclk_ss_percentage); + info->default_engine_clk = smu_info_v3_2->bootup_dcefclk_10khz * 10; } else if (revision.minor == 3) { /* Vega20 */ smu_info_v3_3 = GET_IMAGE(struct atom_smu_info_v3_3, DATA_TABLES(smu_info)); - DC_LOG_BIOS("gpuclk_ss_percentage (unit of 0.001 percent): %d\n", smu_info_v3_3->gpuclk_ss_percentage); if (!smu_info_v3_3) return BP_RESULT_BADBIOSTABLE; + DC_LOG_BIOS("gpuclk_ss_percentage (unit of 0.001 percent): %d\n", smu_info_v3_3->gpuclk_ss_percentage); + info->default_engine_clk = smu_info_v3_3->bootup_dcefclk_10khz * 10; } @@ -2422,10 +2424,11 @@ static enum bp_result get_integrated_info_v11( info_v11 = GET_IMAGE(struct atom_integrated_system_info_v1_11, DATA_TABLES(integratedsysteminfo)); - DC_LOG_BIOS("gpuclk_ss_percentage (unit of 0.001 percent): %d\n", info_v11->gpuclk_ss_percentage); if (info_v11 == NULL) return BP_RESULT_BADBIOSTABLE; + DC_LOG_BIOS("gpuclk_ss_percentage (unit of 0.001 percent): %d\n", info_v11->gpuclk_ss_percentage); + info->gpu_cap_info = le32_to_cpu(info_v11->gpucapinfo); /* @@ -2637,11 +2640,12 @@ static enum bp_result get_integrated_info_v2_1( info_v2_1 = GET_IMAGE(struct atom_integrated_system_info_v2_1, DATA_TABLES(integratedsysteminfo)); - DC_LOG_BIOS("gpuclk_ss_percentage (unit of 0.001 percent): %d\n", info_v2_1->gpuclk_ss_percentage); if (info_v2_1 == NULL) return BP_RESULT_BADBIOSTABLE; + DC_LOG_BIOS("gpuclk_ss_percentage (unit of 0.001 percent): %d\n", info_v2_1->gpuclk_ss_percentage); + info->gpu_cap_info = le32_to_cpu(info_v2_1->gpucapinfo); /* @@ -2799,11 +2803,11 @@ static enum bp_result get_integrated_info_v2_2( info_v2_2 = GET_IMAGE(struct atom_integrated_system_info_v2_2, DATA_TABLES(integratedsysteminfo)); - DC_LOG_BIOS("gpuclk_ss_percentage (unit of 0.001 percent): %d\n", info_v2_2->gpuclk_ss_percentage); - if (info_v2_2 == NULL) return BP_RESULT_BADBIOSTABLE; + DC_LOG_BIOS("gpuclk_ss_percentage (unit of 0.001 percent): %d\n", info_v2_2->gpuclk_ss_percentage); + info->gpu_cap_info = le32_to_cpu(info_v2_2->gpucapinfo); /* diff --git a/drivers/gpu/drm/amd/display/dc/link/link_validation.c b/drivers/gpu/drm/amd/display/dc/link/link_validation.c index 8fe66c367850..5b0bc7f6a188 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_validation.c +++ b/drivers/gpu/drm/amd/display/dc/link/link_validation.c @@ -361,7 +361,7 @@ bool link_validate_dpia_bandwidth(const struct dc_stream_state *stream, const un struct dc_link *dpia_link[MAX_DPIA_NUM] = {0}; int num_dpias = 0; - for (uint8_t i = 0; i < num_streams; ++i) { + for (unsigned int i = 0; i < num_streams; ++i) { if (stream[i].signal == SIGNAL_TYPE_DISPLAY_PORT) { /* new dpia sst stream, check whether it exceeds max dpia */ if (num_dpias >= MAX_DPIA_NUM) From 2f542421a47e8246e9b7d2c6508fe3a6e6c63078 Mon Sep 17 00:00:00 2001 From: Thong <thong.thai@amd.com> Date: Tue, 6 Feb 2024 18:05:16 -0500 Subject: [PATCH 218/304] drm/amdgpu/soc21: update VCN 4 max HEVC encoding resolution Update the maximum resolution reported for HEVC encoding on VCN 4 devices to reflect its 8K encoding capability. Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3159 Signed-off-by: Thong <thong.thai@amd.com> Reviewed-by: Ruijing Dong <ruijing.dong@amd.com> Acked-by: Alex Deucher <alexander.deucher@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/soc21.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.c b/drivers/gpu/drm/amd/amdgpu/soc21.c index 48c6efcdeac9..4d7188912edf 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc21.c +++ b/drivers/gpu/drm/amd/amdgpu/soc21.c @@ -50,13 +50,13 @@ static const struct amd_ip_funcs soc21_common_ip_funcs; /* SOC21 */ static const struct amdgpu_video_codec_info vcn_4_0_0_video_codecs_encode_array_vcn0[] = { {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 2304, 0)}, - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 4096, 2304, 0)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 0)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1, 8192, 4352, 0)}, }; static const struct amdgpu_video_codec_info vcn_4_0_0_video_codecs_encode_array_vcn1[] = { {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 2304, 0)}, - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 4096, 2304, 0)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 0)}, }; static const struct amdgpu_video_codecs vcn_4_0_0_video_codecs_encode_vcn0 = { From a82197e3a5f45450cbaf92095d8a51249dc44c79 Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam <srinivasan.shanmugam@amd.com> Date: Sun, 11 Feb 2024 09:04:19 +0530 Subject: [PATCH 219/304] drm/amdgpu/display: Initialize gamma correction mode variable in dcn30_get_gamcor_current() The dcn30_get_gamcor_current() function is responsible for determining the current gamma correction mode used by the display controller. However, the 'mode' variable, which stores the gamma correction mode, was not initialized before its first usage, leading to an uninitialized symbol error. Thus initializes the 'mode' variable with a default value of LUT_BYPASS before the conditional statements in the function, improves code clarity and stability, ensuring correct behavior of the dcn30_get_gamcor_current() function in determining the gamma correction mode. Fixes the below: drivers/gpu/drm/amd/amdgpu/../display/dc/dcn30/dcn30_dpp_cm.c:77 dpp30_get_gamcor_current() error: uninitialized symbol 'mode'. Fixes: 03f54d7d3448 ("drm/amd/display: Add DCN3 DPP") Cc: Bhawanpreet Lakha <Bhawanpreet.Lakha@amd.com> Cc: Rodrigo Siqueira <Rodrigo.Siqueira@amd.com> Cc: Aurabindo Pillai <aurabindo.pillai@amd.com> Cc: Tom Chung <chiahsuan.chung@amd.com> Signed-off-by: Srinivasan Shanmugam <srinivasan.shanmugam@amd.com> Suggested-by: Roman Li <roman.li@amd.com> Reviewed-by: Aurabindo Pillai <aurabindo.pillai@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp_cm.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp_cm.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp_cm.c index e43f77c11c00..5f97a868ada3 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp_cm.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp_cm.c @@ -56,16 +56,13 @@ static void dpp3_enable_cm_block( static enum dc_lut_mode dpp30_get_gamcor_current(struct dpp *dpp_base) { - enum dc_lut_mode mode; + enum dc_lut_mode mode = LUT_BYPASS; uint32_t state_mode; uint32_t lut_mode; struct dcn3_dpp *dpp = TO_DCN30_DPP(dpp_base); REG_GET(CM_GAMCOR_CONTROL, CM_GAMCOR_MODE_CURRENT, &state_mode); - if (state_mode == 0) - mode = LUT_BYPASS; - if (state_mode == 2) {//Programmable RAM LUT REG_GET(CM_GAMCOR_CONTROL, CM_GAMCOR_SELECT_CURRENT, &lut_mode); if (lut_mode == 0) From 0d555e481c1333c8ae170198ca111947c22fc9c9 Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas <nicholas.kazlauskas@amd.com> Date: Tue, 23 Jan 2024 12:20:06 -0500 Subject: [PATCH 220/304] drm/amd/display: Increase ips2_eval delay for DCN35 [Why] New worst-case measurement observed at 1897us. [How] Increase to 2000us to cover the new worst case + margin. Reviewed-by: Ovidiu Bunea <ovidiu.bunea@amd.com> Acked-by: Aurabindo Pillai <aurabindo.pillai@amd.com> Signed-off-by: Nicholas Kazlauskas <nicholas.kazlauskas@amd.com> Tested-by: Daniel Wheeler <daniel.wheeler@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c index 1c3d89264ef7..5fdcda8f8602 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c @@ -780,7 +780,7 @@ static const struct dc_debug_options debug_defaults_drv = { .disable_z10 = false, .ignore_pg = true, .psp_disabled_wa = true, - .ips2_eval_delay_us = 1650, + .ips2_eval_delay_us = 2000, .ips2_entry_delay_us = 800, .static_screen_wait_frames = 2, }; From e3de58f8fd5bda8685bb87bf7457bbc10479765b Mon Sep 17 00:00:00 2001 From: Rajneesh Bhardwaj <rajneesh.bhardwaj@amd.com> Date: Wed, 31 Jan 2024 19:33:49 -0500 Subject: [PATCH 221/304] drm/amdkfd: update SIMD distribution algo for GFXIP 9.4.2 onwards In certain cooperative group dispatch scenarios the default SPI resource allocation may cause reduced per-CU workgroup occupancy. Set COMPUTE_RESOURCE_LIMITS.FORCE_SIMD_DIST=1 to mitigate soft hang scenarions. Reviewed-by: Felix Kuehling <felix.kuehling@amd.com> Suggested-by: Joseph Greathouse <Joseph.Greathouse@amd.com> Signed-off-by: Rajneesh Bhardwaj <rajneesh.bhardwaj@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c | 9 +++++++++ drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 1 + drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c | 4 +++- 3 files changed, 13 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c index 42d881809dc7..697b6d530d12 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c @@ -303,6 +303,15 @@ static void update_mqd(struct mqd_manager *mm, void *mqd, update_cu_mask(mm, mqd, minfo, 0); set_priority(m, q); + if (minfo && KFD_GC_VERSION(mm->dev) >= IP_VERSION(9, 4, 2)) { + if (minfo->update_flag & UPDATE_FLAG_IS_GWS) + m->compute_resource_limits |= + COMPUTE_RESOURCE_LIMITS__FORCE_SIMD_DIST_MASK; + else + m->compute_resource_limits &= + ~COMPUTE_RESOURCE_LIMITS__FORCE_SIMD_DIST_MASK; + } + q->is_active = QUEUE_IS_ACTIVE(*q); } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 677281c0793e..80320b8603fc 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -532,6 +532,7 @@ struct queue_properties { enum mqd_update_flag { UPDATE_FLAG_DBG_WA_ENABLE = 1, UPDATE_FLAG_DBG_WA_DISABLE = 2, + UPDATE_FLAG_IS_GWS = 4, /* quirk for gfx9 IP */ }; struct mqd_update_info { diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c index 43eff221eae5..4858112f9a53 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c @@ -95,6 +95,7 @@ void kfd_process_dequeue_from_device(struct kfd_process_device *pdd) int pqm_set_gws(struct process_queue_manager *pqm, unsigned int qid, void *gws) { + struct mqd_update_info minfo = {0}; struct kfd_node *dev = NULL; struct process_queue_node *pqn; struct kfd_process_device *pdd; @@ -146,9 +147,10 @@ int pqm_set_gws(struct process_queue_manager *pqm, unsigned int qid, } pdd->qpd.num_gws = gws ? dev->adev->gds.gws_size : 0; + minfo.update_flag = gws ? UPDATE_FLAG_IS_GWS : 0; return pqn->q->device->dqm->ops.update_queue(pqn->q->device->dqm, - pqn->q, NULL); + pqn->q, &minfo); } void kfd_process_dequeue_from_all_devices(struct kfd_process *p) From a8ac4bcaeb660c5eeb273507e8dbf713ba56de44 Mon Sep 17 00:00:00 2001 From: Rajneesh Bhardwaj <rajneesh.bhardwaj@amd.com> Date: Fri, 9 Feb 2024 20:23:19 -0500 Subject: [PATCH 222/304] drm/amdgpu: Fix implicit assumtion in gfx11 debug flags Gfx11 debug flags mask is currently set with an implicit assumption that no other mqd update flags exist. This needs to be fixed with newly introduced flag UPDATE_FLAG_IS_GWS by the previous patch. Reviewed-by: Felix Kuehling <felix.kuehling@amd.com> Signed-off-by: Rajneesh Bhardwaj <rajneesh.bhardwaj@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c index d722cbd31783..826bc4f6c8a7 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c @@ -55,8 +55,8 @@ static void update_cu_mask(struct mqd_manager *mm, void *mqd, m = get_mqd(mqd); if (has_wa_flag) { - uint32_t wa_mask = minfo->update_flag == UPDATE_FLAG_DBG_WA_ENABLE ? - 0xffff : 0xffffffff; + uint32_t wa_mask = + (minfo->update_flag & UPDATE_FLAG_DBG_WA_ENABLE) ? 0xffff : 0xffffffff; m->compute_static_thread_mgmt_se0 = wa_mask; m->compute_static_thread_mgmt_se1 = wa_mask; From 14db5f64a971fce3d8ea35de4dfc7f443a3efb92 Mon Sep 17 00:00:00 2001 From: Damien Le Moal <dlemoal@kernel.org> Date: Thu, 8 Feb 2024 17:26:59 +0900 Subject: [PATCH 223/304] zonefs: Improve error handling Write error handling is racy and can sometime lead to the error recovery path wrongly changing the inode size of a sequential zone file to an incorrect value which results in garbage data being readable at the end of a file. There are 2 problems: 1) zonefs_file_dio_write() updates a zone file write pointer offset after issuing a direct IO with iomap_dio_rw(). This update is done only if the IO succeed for synchronous direct writes. However, for asynchronous direct writes, the update is done without waiting for the IO completion so that the next asynchronous IO can be immediately issued. However, if an asynchronous IO completes with a failure right before the i_truncate_mutex lock protecting the update, the update may change the value of the inode write pointer offset that was corrected by the error path (zonefs_io_error() function). 2) zonefs_io_error() is called when a read or write error occurs. This function executes a report zone operation using the callback function zonefs_io_error_cb(), which does all the error recovery handling based on the current zone condition, write pointer position and according to the mount options being used. However, depending on the zoned device being used, a report zone callback may be executed in a context that is different from the context of __zonefs_io_error(). As a result, zonefs_io_error_cb() may be executed without the inode truncate mutex lock held, which can lead to invalid error processing. Fix both problems as follows: - Problem 1: Perform the inode write pointer offset update before a direct write is issued with iomap_dio_rw(). This is safe to do as partial direct writes are not supported (IOMAP_DIO_PARTIAL is not set) and any failed IO will trigger the execution of zonefs_io_error() which will correct the inode write pointer offset to reflect the current state of the one on the device. - Problem 2: Change zonefs_io_error_cb() into zonefs_handle_io_error() and call this function directly from __zonefs_io_error() after obtaining the zone information using blkdev_report_zones() with a simple callback function that copies to a local stack variable the struct blk_zone obtained from the device. This ensures that error handling is performed holding the inode truncate mutex. This change also simplifies error handling for conventional zone files by bypassing the execution of report zones entirely. This is safe to do because the condition of conventional zones cannot be read-only or offline and conventional zone files are always fully mapped with a constant file size. Reported-by: Shin'ichiro Kawasaki <shinichiro.kawasaki@wdc.com> Fixes: 8dcc1a9d90c1 ("fs: New zonefs file system") Cc: stable@vger.kernel.org Signed-off-by: Damien Le Moal <dlemoal@kernel.org> Tested-by: Shin'ichiro Kawasaki <shinichiro.kawasaki@wdc.com> Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com> Reviewed-by: Himanshu Madhani <himanshu.madhani@oracle.com> --- fs/zonefs/file.c | 42 +++++++++++++++++++----------- fs/zonefs/super.c | 66 +++++++++++++++++++++++++++-------------------- 2 files changed, 65 insertions(+), 43 deletions(-) diff --git a/fs/zonefs/file.c b/fs/zonefs/file.c index 6ab2318a9c8e..dba5dcb62bef 100644 --- a/fs/zonefs/file.c +++ b/fs/zonefs/file.c @@ -348,7 +348,12 @@ static int zonefs_file_write_dio_end_io(struct kiocb *iocb, ssize_t size, struct zonefs_inode_info *zi = ZONEFS_I(inode); if (error) { - zonefs_io_error(inode, true); + /* + * For Sync IOs, error recovery is called from + * zonefs_file_dio_write(). + */ + if (!is_sync_kiocb(iocb)) + zonefs_io_error(inode, true); return error; } @@ -491,6 +496,14 @@ static ssize_t zonefs_file_dio_write(struct kiocb *iocb, struct iov_iter *from) ret = -EINVAL; goto inode_unlock; } + /* + * Advance the zone write pointer offset. This assumes that the + * IO will succeed, which is OK to do because we do not allow + * partial writes (IOMAP_DIO_PARTIAL is not set) and if the IO + * fails, the error path will correct the write pointer offset. + */ + z->z_wpoffset += count; + zonefs_inode_account_active(inode); mutex_unlock(&zi->i_truncate_mutex); } @@ -504,20 +517,19 @@ static ssize_t zonefs_file_dio_write(struct kiocb *iocb, struct iov_iter *from) if (ret == -ENOTBLK) ret = -EBUSY; - if (zonefs_zone_is_seq(z) && - (ret > 0 || ret == -EIOCBQUEUED)) { - if (ret > 0) - count = ret; - - /* - * Update the zone write pointer offset assuming the write - * operation succeeded. If it did not, the error recovery path - * will correct it. Also do active seq file accounting. - */ - mutex_lock(&zi->i_truncate_mutex); - z->z_wpoffset += count; - zonefs_inode_account_active(inode); - mutex_unlock(&zi->i_truncate_mutex); + /* + * For a failed IO or partial completion, trigger error recovery + * to update the zone write pointer offset to a correct value. + * For asynchronous IOs, zonefs_file_write_dio_end_io() may already + * have executed error recovery if the IO already completed when we + * reach here. However, we cannot know that and execute error recovery + * again (that will not change anything). + */ + if (zonefs_zone_is_seq(z)) { + if (ret > 0 && ret != count) + ret = -EIO; + if (ret < 0 && ret != -EIOCBQUEUED) + zonefs_io_error(inode, true); } inode_unlock: diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c index 93971742613a..b6e8e7c96251 100644 --- a/fs/zonefs/super.c +++ b/fs/zonefs/super.c @@ -246,16 +246,18 @@ static void zonefs_inode_update_mode(struct inode *inode) z->z_mode = inode->i_mode; } -struct zonefs_ioerr_data { - struct inode *inode; - bool write; -}; - static int zonefs_io_error_cb(struct blk_zone *zone, unsigned int idx, void *data) { - struct zonefs_ioerr_data *err = data; - struct inode *inode = err->inode; + struct blk_zone *z = data; + + *z = *zone; + return 0; +} + +static void zonefs_handle_io_error(struct inode *inode, struct blk_zone *zone, + bool write) +{ struct zonefs_zone *z = zonefs_inode_zone(inode); struct super_block *sb = inode->i_sb; struct zonefs_sb_info *sbi = ZONEFS_SB(sb); @@ -270,8 +272,8 @@ static int zonefs_io_error_cb(struct blk_zone *zone, unsigned int idx, data_size = zonefs_check_zone_condition(sb, z, zone); isize = i_size_read(inode); if (!(z->z_flags & (ZONEFS_ZONE_READONLY | ZONEFS_ZONE_OFFLINE)) && - !err->write && isize == data_size) - return 0; + !write && isize == data_size) + return; /* * At this point, we detected either a bad zone or an inconsistency @@ -292,7 +294,7 @@ static int zonefs_io_error_cb(struct blk_zone *zone, unsigned int idx, * In all cases, warn about inode size inconsistency and handle the * IO error according to the zone condition and to the mount options. */ - if (zonefs_zone_is_seq(z) && isize != data_size) + if (isize != data_size) zonefs_warn(sb, "inode %lu: invalid size %lld (should be %lld)\n", inode->i_ino, isize, data_size); @@ -352,8 +354,6 @@ static int zonefs_io_error_cb(struct blk_zone *zone, unsigned int idx, zonefs_i_size_write(inode, data_size); z->z_wpoffset = data_size; zonefs_inode_account_active(inode); - - return 0; } /* @@ -367,23 +367,25 @@ void __zonefs_io_error(struct inode *inode, bool write) { struct zonefs_zone *z = zonefs_inode_zone(inode); struct super_block *sb = inode->i_sb; - struct zonefs_sb_info *sbi = ZONEFS_SB(sb); unsigned int noio_flag; - unsigned int nr_zones = 1; - struct zonefs_ioerr_data err = { - .inode = inode, - .write = write, - }; + struct blk_zone zone; int ret; /* - * The only files that have more than one zone are conventional zone - * files with aggregated conventional zones, for which the inode zone - * size is always larger than the device zone size. + * Conventional zone have no write pointer and cannot become read-only + * or offline. So simply fake a report for a single or aggregated zone + * and let zonefs_handle_io_error() correct the zone inode information + * according to the mount options. */ - if (z->z_size > bdev_zone_sectors(sb->s_bdev)) - nr_zones = z->z_size >> - (sbi->s_zone_sectors_shift + SECTOR_SHIFT); + if (!zonefs_zone_is_seq(z)) { + zone.start = z->z_sector; + zone.len = z->z_size >> SECTOR_SHIFT; + zone.wp = zone.start + zone.len; + zone.type = BLK_ZONE_TYPE_CONVENTIONAL; + zone.cond = BLK_ZONE_COND_NOT_WP; + zone.capacity = zone.len; + goto handle_io_error; + } /* * Memory allocations in blkdev_report_zones() can trigger a memory @@ -394,12 +396,20 @@ void __zonefs_io_error(struct inode *inode, bool write) * the GFP_NOIO context avoids both problems. */ noio_flag = memalloc_noio_save(); - ret = blkdev_report_zones(sb->s_bdev, z->z_sector, nr_zones, - zonefs_io_error_cb, &err); - if (ret != nr_zones) + ret = blkdev_report_zones(sb->s_bdev, z->z_sector, 1, + zonefs_io_error_cb, &zone); + memalloc_noio_restore(noio_flag); + + if (ret != 1) { zonefs_err(sb, "Get inode %lu zone information failed %d\n", inode->i_ino, ret); - memalloc_noio_restore(noio_flag); + zonefs_warn(sb, "remounting filesystem read-only\n"); + sb->s_flags |= SB_RDONLY; + return; + } + +handle_io_error: + zonefs_handle_io_error(inode, &zone, write); } static struct kmem_cache *zonefs_inode_cachep; From ee0e39a63b78849f8abbef268b13e4838569f646 Mon Sep 17 00:00:00 2001 From: Hou Tao <houtao1@huawei.com> Date: Fri, 2 Feb 2024 18:39:33 +0800 Subject: [PATCH 224/304] x86/mm: Move is_vsyscall_vaddr() into asm/vsyscall.h Move is_vsyscall_vaddr() into asm/vsyscall.h to make it available for copy_from_kernel_nofault_allowed() in arch/x86/mm/maccess.c. Reviewed-by: Sohil Mehta <sohil.mehta@intel.com> Signed-off-by: Hou Tao <houtao1@huawei.com> Link: https://lore.kernel.org/r/20240202103935.3154011-2-houtao@huaweicloud.com Signed-off-by: Alexei Starovoitov <ast@kernel.org> --- arch/x86/include/asm/vsyscall.h | 10 ++++++++++ arch/x86/mm/fault.c | 9 --------- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/arch/x86/include/asm/vsyscall.h b/arch/x86/include/asm/vsyscall.h index ab60a71a8dcb..472f0263dbc6 100644 --- a/arch/x86/include/asm/vsyscall.h +++ b/arch/x86/include/asm/vsyscall.h @@ -4,6 +4,7 @@ #include <linux/seqlock.h> #include <uapi/asm/vsyscall.h> +#include <asm/page_types.h> #ifdef CONFIG_X86_VSYSCALL_EMULATION extern void map_vsyscall(void); @@ -24,4 +25,13 @@ static inline bool emulate_vsyscall(unsigned long error_code, } #endif +/* + * The (legacy) vsyscall page is the long page in the kernel portion + * of the address space that has user-accessible permissions. + */ +static inline bool is_vsyscall_vaddr(unsigned long vaddr) +{ + return unlikely((vaddr & PAGE_MASK) == VSYSCALL_ADDR); +} + #endif /* _ASM_X86_VSYSCALL_H */ diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 679b09cfe241..d6375b3c633b 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -798,15 +798,6 @@ show_signal_msg(struct pt_regs *regs, unsigned long error_code, show_opcodes(regs, loglvl); } -/* - * The (legacy) vsyscall page is the long page in the kernel portion - * of the address space that has user-accessible permissions. - */ -static bool is_vsyscall_vaddr(unsigned long vaddr) -{ - return unlikely((vaddr & PAGE_MASK) == VSYSCALL_ADDR); -} - static void __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code, unsigned long address, u32 pkey, int si_code) From 32019c659ecfe1d92e3bf9fcdfbb11a7c70acd58 Mon Sep 17 00:00:00 2001 From: Hou Tao <houtao1@huawei.com> Date: Fri, 2 Feb 2024 18:39:34 +0800 Subject: [PATCH 225/304] x86/mm: Disallow vsyscall page read for copy_from_kernel_nofault() When trying to use copy_from_kernel_nofault() to read vsyscall page through a bpf program, the following oops was reported: BUG: unable to handle page fault for address: ffffffffff600000 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 3231067 P4D 3231067 PUD 3233067 PMD 3235067 PTE 0 Oops: 0000 [#1] PREEMPT SMP PTI CPU: 1 PID: 20390 Comm: test_progs ...... 6.7.0+ #58 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996) ...... RIP: 0010:copy_from_kernel_nofault+0x6f/0x110 ...... Call Trace: <TASK> ? copy_from_kernel_nofault+0x6f/0x110 bpf_probe_read_kernel+0x1d/0x50 bpf_prog_2061065e56845f08_do_probe_read+0x51/0x8d trace_call_bpf+0xc5/0x1c0 perf_call_bpf_enter.isra.0+0x69/0xb0 perf_syscall_enter+0x13e/0x200 syscall_trace_enter+0x188/0x1c0 do_syscall_64+0xb5/0xe0 entry_SYSCALL_64_after_hwframe+0x6e/0x76 </TASK> ...... ---[ end trace 0000000000000000 ]--- The oops is triggered when: 1) A bpf program uses bpf_probe_read_kernel() to read from the vsyscall page and invokes copy_from_kernel_nofault() which in turn calls __get_user_asm(). 2) Because the vsyscall page address is not readable from kernel space, a page fault exception is triggered accordingly. 3) handle_page_fault() considers the vsyscall page address as a user space address instead of a kernel space address. This results in the fix-up setup by bpf not being applied and a page_fault_oops() is invoked due to SMAP. Considering handle_page_fault() has already considered the vsyscall page address as a userspace address, fix the problem by disallowing vsyscall page read for copy_from_kernel_nofault(). Originally-by: Thomas Gleixner <tglx@linutronix.de> Reported-by: syzbot+72aa0161922eba61b50e@syzkaller.appspotmail.com Closes: https://lore.kernel.org/bpf/CAG48ez06TZft=ATH1qh2c5mpS5BT8UakwNkzi6nvK5_djC-4Nw@mail.gmail.com Reported-by: xingwei lee <xrivendell7@gmail.com> Closes: https://lore.kernel.org/bpf/CABOYnLynjBoFZOf3Z4BhaZkc5hx_kHfsjiW+UWLoB=w33LvScw@mail.gmail.com Signed-off-by: Hou Tao <houtao1@huawei.com> Reviewed-by: Sohil Mehta <sohil.mehta@intel.com> Acked-by: Thomas Gleixner <tglx@linutronix.de> Link: https://lore.kernel.org/r/20240202103935.3154011-3-houtao@huaweicloud.com Signed-off-by: Alexei Starovoitov <ast@kernel.org> --- arch/x86/mm/maccess.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/arch/x86/mm/maccess.c b/arch/x86/mm/maccess.c index 6993f026adec..42115ac079cf 100644 --- a/arch/x86/mm/maccess.c +++ b/arch/x86/mm/maccess.c @@ -3,6 +3,8 @@ #include <linux/uaccess.h> #include <linux/kernel.h> +#include <asm/vsyscall.h> + #ifdef CONFIG_X86_64 bool copy_from_kernel_nofault_allowed(const void *unsafe_src, size_t size) { @@ -15,6 +17,14 @@ bool copy_from_kernel_nofault_allowed(const void *unsafe_src, size_t size) if (vaddr < TASK_SIZE_MAX + PAGE_SIZE) return false; + /* + * Reading from the vsyscall page may cause an unhandled fault in + * certain cases. Though it is at an address above TASK_SIZE_MAX, it is + * usually considered as a user space address. + */ + if (is_vsyscall_vaddr(vaddr)) + return false; + /* * Allow everything during early boot before 'x86_virt_bits' * is initialized. Needed for instruction decoding in early From be66d79189ec8a1006ec6ec302bb27b160b3e6ce Mon Sep 17 00:00:00 2001 From: Hou Tao <houtao1@huawei.com> Date: Fri, 2 Feb 2024 18:39:35 +0800 Subject: [PATCH 226/304] selftest/bpf: Test the read of vsyscall page under x86-64 Under x86-64, when using bpf_probe_read_kernel{_str}() or bpf_probe_read{_str}() to read vsyscall page, the read may trigger oops, so add one test case to ensure that the problem is fixed. Beside those four bpf helpers mentioned above, testing the read of vsyscall page by using bpf_probe_read_user{_str} and bpf_copy_from_user{_task}() as well. The test case passes the address of vsyscall page to these six helpers and checks whether the returned values are expected: 1) For bpf_probe_read_kernel{_str}()/bpf_probe_read{_str}(), the expected return value is -ERANGE as shown below: bpf_probe_read_kernel_common copy_from_kernel_nofault // false, return -ERANGE copy_from_kernel_nofault_allowed 2) For bpf_probe_read_user{_str}(), the expected return value is -EFAULT as show below: bpf_probe_read_user_common copy_from_user_nofault // false, return -EFAULT __access_ok 3) For bpf_copy_from_user(), the expected return value is -EFAULT: // return -EFAULT bpf_copy_from_user copy_from_user _copy_from_user // return false access_ok 4) For bpf_copy_from_user_task(), the expected return value is -EFAULT: // return -EFAULT bpf_copy_from_user_task access_process_vm // return 0 vma_lookup() // return 0 expand_stack() The occurrence of oops depends on the availability of CPU SMAP [1] feature and there are three possible configurations of vsyscall page in the boot cmd-line: vsyscall={xonly|none|emulate}, so there are a total of six possible combinations. Under all these combinations, the test case runs successfully. [1]: https://en.wikipedia.org/wiki/Supervisor_Mode_Access_Prevention Acked-by: Yonghong Song <yonghong.song@linux.dev> Signed-off-by: Hou Tao <houtao1@huawei.com> Link: https://lore.kernel.org/r/20240202103935.3154011-4-houtao@huaweicloud.com Signed-off-by: Alexei Starovoitov <ast@kernel.org> --- .../selftests/bpf/prog_tests/read_vsyscall.c | 57 +++++++++++++++++++ .../selftests/bpf/progs/read_vsyscall.c | 45 +++++++++++++++ 2 files changed, 102 insertions(+) create mode 100644 tools/testing/selftests/bpf/prog_tests/read_vsyscall.c create mode 100644 tools/testing/selftests/bpf/progs/read_vsyscall.c diff --git a/tools/testing/selftests/bpf/prog_tests/read_vsyscall.c b/tools/testing/selftests/bpf/prog_tests/read_vsyscall.c new file mode 100644 index 000000000000..3405923fe4e6 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/read_vsyscall.c @@ -0,0 +1,57 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2024. Huawei Technologies Co., Ltd */ +#include "test_progs.h" +#include "read_vsyscall.skel.h" + +#if defined(__x86_64__) +/* For VSYSCALL_ADDR */ +#include <asm/vsyscall.h> +#else +/* To prevent build failure on non-x86 arch */ +#define VSYSCALL_ADDR 0UL +#endif + +struct read_ret_desc { + const char *name; + int ret; +} all_read[] = { + { .name = "probe_read_kernel", .ret = -ERANGE }, + { .name = "probe_read_kernel_str", .ret = -ERANGE }, + { .name = "probe_read", .ret = -ERANGE }, + { .name = "probe_read_str", .ret = -ERANGE }, + { .name = "probe_read_user", .ret = -EFAULT }, + { .name = "probe_read_user_str", .ret = -EFAULT }, + { .name = "copy_from_user", .ret = -EFAULT }, + { .name = "copy_from_user_task", .ret = -EFAULT }, +}; + +void test_read_vsyscall(void) +{ + struct read_vsyscall *skel; + unsigned int i; + int err; + +#if !defined(__x86_64__) + test__skip(); + return; +#endif + skel = read_vsyscall__open_and_load(); + if (!ASSERT_OK_PTR(skel, "read_vsyscall open_load")) + return; + + skel->bss->target_pid = getpid(); + err = read_vsyscall__attach(skel); + if (!ASSERT_EQ(err, 0, "read_vsyscall attach")) + goto out; + + /* userspace may don't have vsyscall page due to LEGACY_VSYSCALL_NONE, + * but it doesn't affect the returned error codes. + */ + skel->bss->user_ptr = (void *)VSYSCALL_ADDR; + usleep(1); + + for (i = 0; i < ARRAY_SIZE(all_read); i++) + ASSERT_EQ(skel->bss->read_ret[i], all_read[i].ret, all_read[i].name); +out: + read_vsyscall__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/progs/read_vsyscall.c b/tools/testing/selftests/bpf/progs/read_vsyscall.c new file mode 100644 index 000000000000..986f96687ae1 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/read_vsyscall.c @@ -0,0 +1,45 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2024. Huawei Technologies Co., Ltd */ +#include <linux/types.h> +#include <bpf/bpf_helpers.h> + +#include "bpf_misc.h" + +int target_pid = 0; +void *user_ptr = 0; +int read_ret[8]; + +char _license[] SEC("license") = "GPL"; + +SEC("fentry/" SYS_PREFIX "sys_nanosleep") +int do_probe_read(void *ctx) +{ + char buf[8]; + + if ((bpf_get_current_pid_tgid() >> 32) != target_pid) + return 0; + + read_ret[0] = bpf_probe_read_kernel(buf, sizeof(buf), user_ptr); + read_ret[1] = bpf_probe_read_kernel_str(buf, sizeof(buf), user_ptr); + read_ret[2] = bpf_probe_read(buf, sizeof(buf), user_ptr); + read_ret[3] = bpf_probe_read_str(buf, sizeof(buf), user_ptr); + read_ret[4] = bpf_probe_read_user(buf, sizeof(buf), user_ptr); + read_ret[5] = bpf_probe_read_user_str(buf, sizeof(buf), user_ptr); + + return 0; +} + +SEC("fentry.s/" SYS_PREFIX "sys_nanosleep") +int do_copy_from_user(void *ctx) +{ + char buf[8]; + + if ((bpf_get_current_pid_tgid() >> 32) != target_pid) + return 0; + + read_ret[6] = bpf_copy_from_user(buf, sizeof(buf), user_ptr); + read_ret[7] = bpf_copy_from_user_task(buf, sizeof(buf), user_ptr, + bpf_get_current_task_btf(), 0); + + return 0; +} From 4860abb91f3d7fbaf8147d54782149bb1fc45892 Mon Sep 17 00:00:00 2001 From: Steve French <stfrench@microsoft.com> Date: Tue, 6 Feb 2024 16:34:22 -0600 Subject: [PATCH 227/304] smb: Fix regression in writes when non-standard maximum write size negotiated The conversion to netfs in the 6.3 kernel caused a regression when maximum write size is set by the server to an unexpected value which is not a multiple of 4096 (similarly if the user overrides the maximum write size by setting mount parm "wsize", but sets it to a value that is not a multiple of 4096). When negotiated write size is not a multiple of 4096 the netfs code can skip the end of the final page when doing large sequential writes, causing data corruption. This section of code is being rewritten/removed due to a large netfs change, but until that point (ie for the 6.3 kernel until now) we can not support non-standard maximum write sizes. Add a warning if a user specifies a wsize on mount that is not a multiple of 4096 (and round down), also add a change where we round down the maximum write size if the server negotiates a value that is not a multiple of 4096 (we also have to check to make sure that we do not round it down to zero). Reported-by: R. Diez" <rdiez-2006@rd10.de> Fixes: d08089f649a0 ("cifs: Change the I/O paths to use an iterator rather than a page list") Suggested-by: Ronnie Sahlberg <ronniesahlberg@gmail.com> Acked-by: Ronnie Sahlberg <ronniesahlberg@gmail.com> Tested-by: Matthew Ruffell <matthew.ruffell@canonical.com> Reviewed-by: Shyam Prasad N <sprasad@microsoft.com> Cc: stable@vger.kernel.org # v6.3+ Cc: David Howells <dhowells@redhat.com> Signed-off-by: Steve French <stfrench@microsoft.com> --- fs/smb/client/connect.c | 14 ++++++++++++-- fs/smb/client/fs_context.c | 11 +++++++++++ 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/fs/smb/client/connect.c b/fs/smb/client/connect.c index d03253f8f145..ac9595504f4b 100644 --- a/fs/smb/client/connect.c +++ b/fs/smb/client/connect.c @@ -3444,8 +3444,18 @@ int cifs_mount_get_tcon(struct cifs_mount_ctx *mnt_ctx) * the user on mount */ if ((cifs_sb->ctx->wsize == 0) || - (cifs_sb->ctx->wsize > server->ops->negotiate_wsize(tcon, ctx))) - cifs_sb->ctx->wsize = server->ops->negotiate_wsize(tcon, ctx); + (cifs_sb->ctx->wsize > server->ops->negotiate_wsize(tcon, ctx))) { + cifs_sb->ctx->wsize = + round_down(server->ops->negotiate_wsize(tcon, ctx), PAGE_SIZE); + /* + * in the very unlikely event that the server sent a max write size under PAGE_SIZE, + * (which would get rounded down to 0) then reset wsize to absolute minimum eg 4096 + */ + if (cifs_sb->ctx->wsize == 0) { + cifs_sb->ctx->wsize = PAGE_SIZE; + cifs_dbg(VFS, "wsize too small, reset to minimum ie PAGE_SIZE, usually 4096\n"); + } + } if ((cifs_sb->ctx->rsize == 0) || (cifs_sb->ctx->rsize > server->ops->negotiate_rsize(tcon, ctx))) cifs_sb->ctx->rsize = server->ops->negotiate_rsize(tcon, ctx); diff --git a/fs/smb/client/fs_context.c b/fs/smb/client/fs_context.c index aec8dbd1f9db..4b2f5aa2ea0e 100644 --- a/fs/smb/client/fs_context.c +++ b/fs/smb/client/fs_context.c @@ -1111,6 +1111,17 @@ static int smb3_fs_context_parse_param(struct fs_context *fc, case Opt_wsize: ctx->wsize = result.uint_32; ctx->got_wsize = true; + if (ctx->wsize % PAGE_SIZE != 0) { + ctx->wsize = round_down(ctx->wsize, PAGE_SIZE); + if (ctx->wsize == 0) { + ctx->wsize = PAGE_SIZE; + cifs_dbg(VFS, "wsize too small, reset to minimum %ld\n", PAGE_SIZE); + } else { + cifs_dbg(VFS, + "wsize rounded down to %d to multiple of PAGE_SIZE %ld\n", + ctx->wsize, PAGE_SIZE); + } + } break; case Opt_acregmax: ctx->acregmax = HZ * result.uint_32; From b4ea9b6a18ebf7f9f3a7a60f82e925186978cfcf Mon Sep 17 00:00:00 2001 From: Alexander Gordeev <agordeev@linux.ibm.com> Date: Wed, 14 Feb 2024 17:32:40 +0100 Subject: [PATCH 228/304] net/iucv: fix the allocation size of iucv_path_table array iucv_path_table is a dynamically allocated array of pointers to struct iucv_path items. Yet, its size is calculated as if it was an array of struct iucv_path items. Signed-off-by: Alexander Gordeev <agordeev@linux.ibm.com> Reviewed-by: Alexandra Winter <wintera@linux.ibm.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- net/iucv/iucv.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c index 6334f64f04d5..b0b3e9c5af44 100644 --- a/net/iucv/iucv.c +++ b/net/iucv/iucv.c @@ -156,7 +156,7 @@ static char iucv_error_pathid[16] = "INVALID PATHID"; static LIST_HEAD(iucv_handler_list); /* - * iucv_path_table: an array of iucv_path structures. + * iucv_path_table: array of pointers to iucv_path structures. */ static struct iucv_path **iucv_path_table; static unsigned long iucv_max_pathid; @@ -544,7 +544,7 @@ static int iucv_enable(void) cpus_read_lock(); rc = -ENOMEM; - alloc_size = iucv_max_pathid * sizeof(struct iucv_path); + alloc_size = iucv_max_pathid * sizeof(*iucv_path_table); iucv_path_table = kzalloc(alloc_size, GFP_KERNEL); if (!iucv_path_table) goto out; From dc489f86257cab5056e747344f17a164f63bff4b Mon Sep 17 00:00:00 2001 From: Tobias Waldekranz <tobias@waldekranz.com> Date: Wed, 14 Feb 2024 22:40:03 +0100 Subject: [PATCH 229/304] net: bridge: switchdev: Skip MDB replays of deferred events on offload Before this change, generation of the list of MDB events to replay would race against the creation of new group memberships, either from the IGMP/MLD snooping logic or from user configuration. While new memberships are immediately visible to walkers of br->mdb_list, the notification of their existence to switchdev event subscribers is deferred until a later point in time. So if a replay list was generated during a time that overlapped with such a window, it would also contain a replay of the not-yet-delivered event. The driver would thus receive two copies of what the bridge internally considered to be one single event. On destruction of the bridge, only a single membership deletion event was therefore sent. As a consequence of this, drivers which reference count memberships (at least DSA), would be left with orphan groups in their hardware database when the bridge was destroyed. This is only an issue when replaying additions. While deletion events may still be pending on the deferred queue, they will already have been removed from br->mdb_list, so no duplicates can be generated in that scenario. To a user this meant that old group memberships, from a bridge in which a port was previously attached, could be reanimated (in hardware) when the port joined a new bridge, without the new bridge's knowledge. For example, on an mv88e6xxx system, create a snooping bridge and immediately add a port to it: root@infix-06-0b-00:~$ ip link add dev br0 up type bridge mcast_snooping 1 && \ > ip link set dev x3 up master br0 And then destroy the bridge: root@infix-06-0b-00:~$ ip link del dev br0 root@infix-06-0b-00:~$ mvls atu ADDRESS FID STATE Q F 0 1 2 3 4 5 6 7 8 9 a DEV:0 Marvell 88E6393X 33:33:00:00:00:6a 1 static - - 0 . . . . . . . . . . 33:33:ff:87:e4:3f 1 static - - 0 . . . . . . . . . . ff:ff:ff:ff:ff:ff 1 static - - 0 1 2 3 4 5 6 7 8 9 a root@infix-06-0b-00:~$ The two IPv6 groups remain in the hardware database because the port (x3) is notified of the host's membership twice: once via the original event and once via a replay. Since only a single delete notification is sent, the count remains at 1 when the bridge is destroyed. Then add the same port (or another port belonging to the same hardware domain) to a new bridge, this time with snooping disabled: root@infix-06-0b-00:~$ ip link add dev br1 up type bridge mcast_snooping 0 && \ > ip link set dev x3 up master br1 All multicast, including the two IPv6 groups from br0, should now be flooded, according to the policy of br1. But instead the old memberships are still active in the hardware database, causing the switch to only forward traffic to those groups towards the CPU (port 0). Eliminate the race in two steps: 1. Grab the write-side lock of the MDB while generating the replay list. This prevents new memberships from showing up while we are generating the replay list. But it leaves the scenario in which a deferred event was already generated, but not delivered, before we grabbed the lock. Therefore: 2. Make sure that no deferred version of a replay event is already enqueued to the switchdev deferred queue, before adding it to the replay list, when replaying additions. Fixes: 4f2673b3a2b6 ("net: bridge: add helper to replay port and host-joined mdb entries") Signed-off-by: Tobias Waldekranz <tobias@waldekranz.com> Reviewed-by: Vladimir Oltean <olteanv@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- include/net/switchdev.h | 3 ++ net/bridge/br_switchdev.c | 76 ++++++++++++++++++++++++--------------- net/switchdev/switchdev.c | 73 +++++++++++++++++++++++++++++++++++++ 3 files changed, 123 insertions(+), 29 deletions(-) diff --git a/include/net/switchdev.h b/include/net/switchdev.h index a43062d4c734..8346b0d29542 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -308,6 +308,9 @@ void switchdev_deferred_process(void); int switchdev_port_attr_set(struct net_device *dev, const struct switchdev_attr *attr, struct netlink_ext_ack *extack); +bool switchdev_port_obj_act_is_deferred(struct net_device *dev, + enum switchdev_notifier_type nt, + const struct switchdev_obj *obj); int switchdev_port_obj_add(struct net_device *dev, const struct switchdev_obj *obj, struct netlink_ext_ack *extack); diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c index ee84e783e1df..6a7cb01f121c 100644 --- a/net/bridge/br_switchdev.c +++ b/net/bridge/br_switchdev.c @@ -595,21 +595,40 @@ br_switchdev_mdb_replay_one(struct notifier_block *nb, struct net_device *dev, } static int br_switchdev_mdb_queue_one(struct list_head *mdb_list, + struct net_device *dev, + unsigned long action, enum switchdev_obj_id id, const struct net_bridge_mdb_entry *mp, struct net_device *orig_dev) { - struct switchdev_obj_port_mdb *mdb; + struct switchdev_obj_port_mdb mdb = { + .obj = { + .id = id, + .orig_dev = orig_dev, + }, + }; + struct switchdev_obj_port_mdb *pmdb; - mdb = kzalloc(sizeof(*mdb), GFP_ATOMIC); - if (!mdb) + br_switchdev_mdb_populate(&mdb, mp); + + if (action == SWITCHDEV_PORT_OBJ_ADD && + switchdev_port_obj_act_is_deferred(dev, action, &mdb.obj)) { + /* This event is already in the deferred queue of + * events, so this replay must be elided, lest the + * driver receives duplicate events for it. This can + * only happen when replaying additions, since + * modifications are always immediately visible in + * br->mdb_list, whereas actual event delivery may be + * delayed. + */ + return 0; + } + + pmdb = kmemdup(&mdb, sizeof(mdb), GFP_ATOMIC); + if (!pmdb) return -ENOMEM; - mdb->obj.id = id; - mdb->obj.orig_dev = orig_dev; - br_switchdev_mdb_populate(mdb, mp); - list_add_tail(&mdb->obj.list, mdb_list); - + list_add_tail(&pmdb->obj.list, mdb_list); return 0; } @@ -677,51 +696,50 @@ br_switchdev_mdb_replay(struct net_device *br_dev, struct net_device *dev, if (!br_opt_get(br, BROPT_MULTICAST_ENABLED)) return 0; - /* We cannot walk over br->mdb_list protected just by the rtnl_mutex, - * because the write-side protection is br->multicast_lock. But we - * need to emulate the [ blocking ] calling context of a regular - * switchdev event, so since both br->multicast_lock and RCU read side - * critical sections are atomic, we have no choice but to pick the RCU - * read side lock, queue up all our events, leave the critical section - * and notify switchdev from blocking context. - */ - rcu_read_lock(); + if (adding) + action = SWITCHDEV_PORT_OBJ_ADD; + else + action = SWITCHDEV_PORT_OBJ_DEL; - hlist_for_each_entry_rcu(mp, &br->mdb_list, mdb_node) { + /* br_switchdev_mdb_queue_one() will take care to not queue a + * replay of an event that is already pending in the switchdev + * deferred queue. In order to safely determine that, there + * must be no new deferred MDB notifications enqueued for the + * duration of the MDB scan. Therefore, grab the write-side + * lock to avoid racing with any concurrent IGMP/MLD snooping. + */ + spin_lock_bh(&br->multicast_lock); + + hlist_for_each_entry(mp, &br->mdb_list, mdb_node) { struct net_bridge_port_group __rcu * const *pp; const struct net_bridge_port_group *p; if (mp->host_joined) { - err = br_switchdev_mdb_queue_one(&mdb_list, + err = br_switchdev_mdb_queue_one(&mdb_list, dev, action, SWITCHDEV_OBJ_ID_HOST_MDB, mp, br_dev); if (err) { - rcu_read_unlock(); + spin_unlock_bh(&br->multicast_lock); goto out_free_mdb; } } - for (pp = &mp->ports; (p = rcu_dereference(*pp)) != NULL; + for (pp = &mp->ports; (p = mlock_dereference(*pp, br)) != NULL; pp = &p->next) { if (p->key.port->dev != dev) continue; - err = br_switchdev_mdb_queue_one(&mdb_list, + err = br_switchdev_mdb_queue_one(&mdb_list, dev, action, SWITCHDEV_OBJ_ID_PORT_MDB, mp, dev); if (err) { - rcu_read_unlock(); + spin_unlock_bh(&br->multicast_lock); goto out_free_mdb; } } } - rcu_read_unlock(); - - if (adding) - action = SWITCHDEV_PORT_OBJ_ADD; - else - action = SWITCHDEV_PORT_OBJ_DEL; + spin_unlock_bh(&br->multicast_lock); list_for_each_entry(obj, &mdb_list, list) { err = br_switchdev_mdb_replay_one(nb, dev, diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index 5b045284849e..c9189a970eec 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -19,6 +19,35 @@ #include <linux/rtnetlink.h> #include <net/switchdev.h> +static bool switchdev_obj_eq(const struct switchdev_obj *a, + const struct switchdev_obj *b) +{ + const struct switchdev_obj_port_vlan *va, *vb; + const struct switchdev_obj_port_mdb *ma, *mb; + + if (a->id != b->id || a->orig_dev != b->orig_dev) + return false; + + switch (a->id) { + case SWITCHDEV_OBJ_ID_PORT_VLAN: + va = SWITCHDEV_OBJ_PORT_VLAN(a); + vb = SWITCHDEV_OBJ_PORT_VLAN(b); + return va->flags == vb->flags && + va->vid == vb->vid && + va->changed == vb->changed; + case SWITCHDEV_OBJ_ID_PORT_MDB: + case SWITCHDEV_OBJ_ID_HOST_MDB: + ma = SWITCHDEV_OBJ_PORT_MDB(a); + mb = SWITCHDEV_OBJ_PORT_MDB(b); + return ma->vid == mb->vid && + ether_addr_equal(ma->addr, mb->addr); + default: + break; + } + + BUG(); +} + static LIST_HEAD(deferred); static DEFINE_SPINLOCK(deferred_lock); @@ -307,6 +336,50 @@ int switchdev_port_obj_del(struct net_device *dev, } EXPORT_SYMBOL_GPL(switchdev_port_obj_del); +/** + * switchdev_port_obj_act_is_deferred - Is object action pending? + * + * @dev: port device + * @nt: type of action; add or delete + * @obj: object to test + * + * Returns true if a deferred item is pending, which is + * equivalent to the action @nt on an object @obj. + * + * rtnl_lock must be held. + */ +bool switchdev_port_obj_act_is_deferred(struct net_device *dev, + enum switchdev_notifier_type nt, + const struct switchdev_obj *obj) +{ + struct switchdev_deferred_item *dfitem; + bool found = false; + + ASSERT_RTNL(); + + spin_lock_bh(&deferred_lock); + + list_for_each_entry(dfitem, &deferred, list) { + if (dfitem->dev != dev) + continue; + + if ((dfitem->func == switchdev_port_obj_add_deferred && + nt == SWITCHDEV_PORT_OBJ_ADD) || + (dfitem->func == switchdev_port_obj_del_deferred && + nt == SWITCHDEV_PORT_OBJ_DEL)) { + if (switchdev_obj_eq((const void *)dfitem->data, obj)) { + found = true; + break; + } + } + } + + spin_unlock_bh(&deferred_lock); + + return found; +} +EXPORT_SYMBOL_GPL(switchdev_port_obj_act_is_deferred); + static ATOMIC_NOTIFIER_HEAD(switchdev_notif_chain); static BLOCKING_NOTIFIER_HEAD(switchdev_blocking_notif_chain); From f7a70d650b0b6b0134ccba763d672c8439d9f09b Mon Sep 17 00:00:00 2001 From: Tobias Waldekranz <tobias@waldekranz.com> Date: Wed, 14 Feb 2024 22:40:04 +0100 Subject: [PATCH 230/304] net: bridge: switchdev: Ensure deferred event delivery on unoffload When unoffloading a device, it is important to ensure that all relevant deferred events are delivered to it before it disassociates itself from the bridge. Before this change, this was true for the normal case when a device maps 1:1 to a net_bridge_port, i.e. br0 / swp0 When swp0 leaves br0, the call to switchdev_deferred_process() in del_nbp() makes sure to process any outstanding events while the device is still associated with the bridge. In the case when the association is indirect though, i.e. when the device is attached to the bridge via an intermediate device, like a LAG... br0 / lag0 / swp0 ...then detaching swp0 from lag0 does not cause any net_bridge_port to be deleted, so there was no guarantee that all events had been processed before the device disassociated itself from the bridge. Fix this by always synchronously processing all deferred events before signaling completion of unoffloading back to the driver. Fixes: 4e51bf44a03a ("net: bridge: move the switchdev object replay helpers to "push" mode") Signed-off-by: Tobias Waldekranz <tobias@waldekranz.com> Reviewed-by: Vladimir Oltean <olteanv@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- net/bridge/br_switchdev.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c index 6a7cb01f121c..7b41ee8740cb 100644 --- a/net/bridge/br_switchdev.c +++ b/net/bridge/br_switchdev.c @@ -804,6 +804,16 @@ static void nbp_switchdev_unsync_objs(struct net_bridge_port *p, br_switchdev_mdb_replay(br_dev, dev, ctx, false, blocking_nb, NULL); br_switchdev_vlan_replay(br_dev, ctx, false, blocking_nb, NULL); + + /* Make sure that the device leaving this bridge has seen all + * relevant events before it is disassociated. In the normal + * case, when the device is directly attached to the bridge, + * this is covered by del_nbp(). If the association was indirect + * however, e.g. via a team or bond, and the device is leaving + * that intermediate device, then the bridge port remains in + * place. + */ + switchdev_deferred_process(); } /* Let the bridge know that this port is offloaded, so that it can assign a From 66b60b0c8c4a163b022a9f0ad6769b0fd3dc662f Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima <kuniyu@amazon.com> Date: Wed, 14 Feb 2024 11:13:08 -0800 Subject: [PATCH 231/304] dccp/tcp: Unhash sk from ehash for tb2 alloc failure after check_estalblished(). syzkaller reported a warning [0] in inet_csk_destroy_sock() with no repro. WARN_ON(inet_sk(sk)->inet_num && !inet_csk(sk)->icsk_bind_hash); However, the syzkaller's log hinted that connect() failed just before the warning due to FAULT_INJECTION. [1] When connect() is called for an unbound socket, we search for an available ephemeral port. If a bhash bucket exists for the port, we call __inet_check_established() or __inet6_check_established() to check if the bucket is reusable. If reusable, we add the socket into ehash and set inet_sk(sk)->inet_num. Later, we look up the corresponding bhash2 bucket and try to allocate it if it does not exist. Although it rarely occurs in real use, if the allocation fails, we must revert the changes by check_established(). Otherwise, an unconnected socket could illegally occupy an ehash entry. Note that we do not put tw back into ehash because sk might have already responded to a packet for tw and it would be better to free tw earlier under such memory presure. [0]: WARNING: CPU: 0 PID: 350830 at net/ipv4/inet_connection_sock.c:1193 inet_csk_destroy_sock (net/ipv4/inet_connection_sock.c:1193) Modules linked in: Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014 RIP: 0010:inet_csk_destroy_sock (net/ipv4/inet_connection_sock.c:1193) Code: 41 5c 41 5d 41 5e e9 2d 4a 3d fd e8 28 4a 3d fd 48 89 ef e8 f0 cd 7d ff 5b 5d 41 5c 41 5d 41 5e e9 13 4a 3d fd e8 0e 4a 3d fd <0f> 0b e9 61 fe ff ff e8 02 4a 3d fd 4c 89 e7 be 03 00 00 00 e8 05 RSP: 0018:ffffc9000b21fd38 EFLAGS: 00010293 RAX: 0000000000000000 RBX: 0000000000009e78 RCX: ffffffff840bae40 RDX: ffff88806e46c600 RSI: ffffffff840bb012 RDI: ffff88811755cca8 RBP: ffff88811755c880 R08: 0000000000000003 R09: 0000000000000000 R10: 0000000000009e78 R11: 0000000000000000 R12: ffff88811755c8e0 R13: ffff88811755c892 R14: ffff88811755c918 R15: 0000000000000000 FS: 00007f03e5243800(0000) GS:ffff88811ae00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000001b32f21000 CR3: 0000000112ffe001 CR4: 0000000000770ef0 PKRU: 55555554 Call Trace: <TASK> ? inet_csk_destroy_sock (net/ipv4/inet_connection_sock.c:1193) dccp_close (net/dccp/proto.c:1078) inet_release (net/ipv4/af_inet.c:434) __sock_release (net/socket.c:660) sock_close (net/socket.c:1423) __fput (fs/file_table.c:377) __fput_sync (fs/file_table.c:462) __x64_sys_close (fs/open.c:1557 fs/open.c:1539 fs/open.c:1539) do_syscall_64 (arch/x86/entry/common.c:52 arch/x86/entry/common.c:83) entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:129) RIP: 0033:0x7f03e53852bb Code: 03 00 00 00 0f 05 48 3d 00 f0 ff ff 77 41 c3 48 83 ec 18 89 7c 24 0c e8 43 c9 f5 ff 8b 7c 24 0c 41 89 c0 b8 03 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 35 44 89 c7 89 44 24 0c e8 a1 c9 f5 ff 8b 44 RSP: 002b:00000000005dfba0 EFLAGS: 00000293 ORIG_RAX: 0000000000000003 RAX: ffffffffffffffda RBX: 0000000000000004 RCX: 00007f03e53852bb RDX: 0000000000000002 RSI: 0000000000000002 RDI: 0000000000000003 RBP: 0000000000000000 R08: 0000000000000000 R09: 000000000000167c R10: 0000000008a79680 R11: 0000000000000293 R12: 00007f03e4e43000 R13: 00007f03e4e43170 R14: 00007f03e4e43178 R15: 00007f03e4e43170 </TASK> [1]: FAULT_INJECTION: forcing a failure. name failslab, interval 1, probability 0, space 0, times 0 CPU: 0 PID: 350833 Comm: syz-executor.1 Not tainted 6.7.0-12272-g2121c43f88f5 #9 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014 Call Trace: <TASK> dump_stack_lvl (lib/dump_stack.c:107 (discriminator 1)) should_fail_ex (lib/fault-inject.c:52 lib/fault-inject.c:153) should_failslab (mm/slub.c:3748) kmem_cache_alloc (mm/slub.c:3763 mm/slub.c:3842 mm/slub.c:3867) inet_bind2_bucket_create (net/ipv4/inet_hashtables.c:135) __inet_hash_connect (net/ipv4/inet_hashtables.c:1100) dccp_v4_connect (net/dccp/ipv4.c:116) __inet_stream_connect (net/ipv4/af_inet.c:676) inet_stream_connect (net/ipv4/af_inet.c:747) __sys_connect_file (net/socket.c:2048 (discriminator 2)) __sys_connect (net/socket.c:2065) __x64_sys_connect (net/socket.c:2072) do_syscall_64 (arch/x86/entry/common.c:52 arch/x86/entry/common.c:83) entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:129) RIP: 0033:0x7f03e5284e5d Code: ff c3 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 73 9f 1b 00 f7 d8 64 89 01 48 RSP: 002b:00007f03e4641cc8 EFLAGS: 00000246 ORIG_RAX: 000000000000002a RAX: ffffffffffffffda RBX: 00000000004bbf80 RCX: 00007f03e5284e5d RDX: 0000000000000010 RSI: 0000000020000000 RDI: 0000000000000003 RBP: 00000000004bbf80 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000001 R13: 000000000000000b R14: 00007f03e52e5530 R15: 0000000000000000 </TASK> Reported-by: syzkaller <syzkaller@googlegroups.com> Fixes: 28044fc1d495 ("net: Add a bhash2 table hashed by port and address") Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com> Reviewed-by: Eric Dumazet <edumazet@google.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- net/ipv4/inet_hashtables.c | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 93e9193df544..308ff34002ea 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -1130,10 +1130,33 @@ ok: return 0; error: + if (sk_hashed(sk)) { + spinlock_t *lock = inet_ehash_lockp(hinfo, sk->sk_hash); + + sock_prot_inuse_add(net, sk->sk_prot, -1); + + spin_lock(lock); + sk_nulls_del_node_init_rcu(sk); + spin_unlock(lock); + + sk->sk_hash = 0; + inet_sk(sk)->inet_sport = 0; + inet_sk(sk)->inet_num = 0; + + if (tw) + inet_twsk_bind_unhash(tw, hinfo); + } + spin_unlock(&head2->lock); if (tb_created) inet_bind_bucket_destroy(hinfo->bind_bucket_cachep, tb); - spin_unlock_bh(&head->lock); + spin_unlock(&head->lock); + + if (tw) + inet_twsk_deschedule_put(tw); + + local_bh_enable(); + return -ENOMEM; } From a9f80df4f51440303d063b55bb98720857693821 Mon Sep 17 00:00:00 2001 From: Randy Dunlap <rdunlap@infradead.org> Date: Wed, 14 Feb 2024 23:00:50 -0800 Subject: [PATCH 232/304] net: ethernet: adi: requires PHYLIB support This driver uses functions that are supplied by the Kconfig symbol PHYLIB, so select it to ensure that they are built as needed. When CONFIG_ADIN1110=y and CONFIG_PHYLIB=m, there are multiple build (linker) errors that are resolved by this Kconfig change: ld: drivers/net/ethernet/adi/adin1110.o: in function `adin1110_net_open': drivers/net/ethernet/adi/adin1110.c:933: undefined reference to `phy_start' ld: drivers/net/ethernet/adi/adin1110.o: in function `adin1110_probe_netdevs': drivers/net/ethernet/adi/adin1110.c:1603: undefined reference to `get_phy_device' ld: drivers/net/ethernet/adi/adin1110.c:1609: undefined reference to `phy_connect' ld: drivers/net/ethernet/adi/adin1110.o: in function `adin1110_disconnect_phy': drivers/net/ethernet/adi/adin1110.c:1226: undefined reference to `phy_disconnect' ld: drivers/net/ethernet/adi/adin1110.o: in function `devm_mdiobus_alloc': include/linux/phy.h:455: undefined reference to `devm_mdiobus_alloc_size' ld: drivers/net/ethernet/adi/adin1110.o: in function `adin1110_register_mdiobus': drivers/net/ethernet/adi/adin1110.c:529: undefined reference to `__devm_mdiobus_register' ld: drivers/net/ethernet/adi/adin1110.o: in function `adin1110_net_stop': drivers/net/ethernet/adi/adin1110.c:958: undefined reference to `phy_stop' ld: drivers/net/ethernet/adi/adin1110.o: in function `adin1110_disconnect_phy': drivers/net/ethernet/adi/adin1110.c:1226: undefined reference to `phy_disconnect' ld: drivers/net/ethernet/adi/adin1110.o: in function `adin1110_adjust_link': drivers/net/ethernet/adi/adin1110.c:1077: undefined reference to `phy_print_status' ld: drivers/net/ethernet/adi/adin1110.o: in function `adin1110_ioctl': drivers/net/ethernet/adi/adin1110.c:790: undefined reference to `phy_do_ioctl' ld: drivers/net/ethernet/adi/adin1110.o:(.rodata+0xf60): undefined reference to `phy_ethtool_get_link_ksettings' ld: drivers/net/ethernet/adi/adin1110.o:(.rodata+0xf68): undefined reference to `phy_ethtool_set_link_ksettings' Fixes: bc93e19d088b ("net: ethernet: adi: Add ADIN1110 support") Signed-off-by: Randy Dunlap <rdunlap@infradead.org> Reported-by: kernel test robot <lkp@intel.com> Closes: https://lore.kernel.org/oe-kbuild-all/202402070626.eZsfVHG5-lkp@intel.com/ Cc: Lennart Franzen <lennart@lfdomain.com> Cc: Alexandru Tachici <alexandru.tachici@analog.com> Cc: "David S. Miller" <davem@davemloft.net> Cc: Eric Dumazet <edumazet@google.com> Cc: Jakub Kicinski <kuba@kernel.org> Cc: Paolo Abeni <pabeni@redhat.com> Cc: netdev@vger.kernel.org Reviewed-by: Nuno Sa <nuno.sa@analog.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- drivers/net/ethernet/adi/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/adi/Kconfig b/drivers/net/ethernet/adi/Kconfig index da3bdd302502..c91b4dcef4ec 100644 --- a/drivers/net/ethernet/adi/Kconfig +++ b/drivers/net/ethernet/adi/Kconfig @@ -7,6 +7,7 @@ config NET_VENDOR_ADI bool "Analog Devices devices" default y depends on SPI + select PHYLIB help If you have a network (Ethernet) card belonging to this class, say Y. From 52f671db18823089a02f07efc04efdb2272ddc17 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski <kuba@kernel.org> Date: Thu, 15 Feb 2024 06:33:45 -0800 Subject: [PATCH 233/304] net/sched: act_mirred: use the backlog for mirred ingress The test Davide added in commit ca22da2fbd69 ("act_mirred: use the backlog for nested calls to mirred ingress") hangs our testing VMs every 10 or so runs, with the familiar tcp_v4_rcv -> tcp_v4_rcv deadlock reported by lockdep. The problem as previously described by Davide (see Link) is that if we reverse flow of traffic with the redirect (egress -> ingress) we may reach the same socket which generated the packet. And we may still be holding its socket lock. The common solution to such deadlocks is to put the packet in the Rx backlog, rather than run the Rx path inline. Do that for all egress -> ingress reversals, not just once we started to nest mirred calls. In the past there was a concern that the backlog indirection will lead to loss of error reporting / less accurate stats. But the current workaround does not seem to address the issue. Fixes: 53592b364001 ("net/sched: act_mirred: Implement ingress actions") Cc: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com> Suggested-by: Davide Caratti <dcaratti@redhat.com> Link: https://lore.kernel.org/netdev/33dc43f587ec1388ba456b4915c75f02a8aae226.1663945716.git.dcaratti@redhat.com/ Signed-off-by: Jakub Kicinski <kuba@kernel.org> Acked-by: Jamal Hadi Salim <jhs@mojatatu.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- net/sched/act_mirred.c | 14 +++++--------- .../testing/selftests/net/forwarding/tc_actions.sh | 3 --- 2 files changed, 5 insertions(+), 12 deletions(-) diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 0a1a9e40f237..291d47c9eb69 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -232,18 +232,14 @@ release_idr: return err; } -static bool is_mirred_nested(void) -{ - return unlikely(__this_cpu_read(mirred_nest_level) > 1); -} - -static int tcf_mirred_forward(bool want_ingress, struct sk_buff *skb) +static int +tcf_mirred_forward(bool at_ingress, bool want_ingress, struct sk_buff *skb) { int err; if (!want_ingress) err = tcf_dev_queue_xmit(skb, dev_queue_xmit); - else if (is_mirred_nested()) + else if (!at_ingress) err = netif_rx(skb); else err = netif_receive_skb(skb); @@ -319,9 +315,9 @@ static int tcf_mirred_to_dev(struct sk_buff *skb, struct tcf_mirred *m, skb_set_redirected(skb_to_send, skb_to_send->tc_at_ingress); - err = tcf_mirred_forward(want_ingress, skb_to_send); + err = tcf_mirred_forward(at_ingress, want_ingress, skb_to_send); } else { - err = tcf_mirred_forward(want_ingress, skb_to_send); + err = tcf_mirred_forward(at_ingress, want_ingress, skb_to_send); } if (err) { diff --git a/tools/testing/selftests/net/forwarding/tc_actions.sh b/tools/testing/selftests/net/forwarding/tc_actions.sh index b0f5e55d2d0b..589629636502 100755 --- a/tools/testing/selftests/net/forwarding/tc_actions.sh +++ b/tools/testing/selftests/net/forwarding/tc_actions.sh @@ -235,9 +235,6 @@ mirred_egress_to_ingress_tcp_test() check_err $? "didn't mirred redirect ICMP" tc_check_packets "dev $h1 ingress" 102 10 check_err $? "didn't drop mirred ICMP" - local overlimits=$(tc_rule_stats_get ${h1} 101 egress .overlimits) - test ${overlimits} = 10 - check_err $? "wrong overlimits, expected 10 got ${overlimits}" tc filter del dev $h1 egress protocol ip pref 100 handle 100 flower tc filter del dev $h1 egress protocol ip pref 101 handle 101 flower From 166c2c8a6a4dc2e4ceba9e10cfe81c3e469e3210 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski <kuba@kernel.org> Date: Thu, 15 Feb 2024 06:33:46 -0800 Subject: [PATCH 234/304] net/sched: act_mirred: don't override retval if we already lost the skb If we're redirecting the skb, and haven't called tcf_mirred_forward(), yet, we need to tell the core to drop the skb by setting the retcode to SHOT. If we have called tcf_mirred_forward(), however, the skb is out of our hands and returning SHOT will lead to UaF. Move the retval override to the error path which actually need it. Reviewed-by: Michal Swiatkowski <michal.swiatkowski@linux.intel.com> Fixes: e5cf1baf92cb ("act_mirred: use TC_ACT_REINSERT when possible") Signed-off-by: Jakub Kicinski <kuba@kernel.org> Acked-by: Jamal Hadi Salim <jhs@mojatatu.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- net/sched/act_mirred.c | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 291d47c9eb69..6faa7d00da09 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -266,8 +266,7 @@ static int tcf_mirred_to_dev(struct sk_buff *skb, struct tcf_mirred *m, if (unlikely(!(dev->flags & IFF_UP)) || !netif_carrier_ok(dev)) { net_notice_ratelimited("tc mirred to Houston: device %s is down\n", dev->name); - err = -ENODEV; - goto out; + goto err_cant_do; } /* we could easily avoid the clone only if called by ingress and clsact; @@ -279,10 +278,8 @@ static int tcf_mirred_to_dev(struct sk_buff *skb, struct tcf_mirred *m, tcf_mirred_can_reinsert(retval); if (!dont_clone) { skb_to_send = skb_clone(skb, GFP_ATOMIC); - if (!skb_to_send) { - err = -ENOMEM; - goto out; - } + if (!skb_to_send) + goto err_cant_do; } want_ingress = tcf_mirred_act_wants_ingress(m_eaction); @@ -319,15 +316,16 @@ static int tcf_mirred_to_dev(struct sk_buff *skb, struct tcf_mirred *m, } else { err = tcf_mirred_forward(at_ingress, want_ingress, skb_to_send); } - - if (err) { -out: + if (err) tcf_action_inc_overlimit_qstats(&m->common); - if (is_redirect) - retval = TC_ACT_SHOT; - } return retval; + +err_cant_do: + if (is_redirect) + retval = TC_ACT_SHOT; + tcf_action_inc_overlimit_qstats(&m->common); + return retval; } static int tcf_blockcast_redir(struct sk_buff *skb, struct tcf_mirred *m, From 172c0cf519fb52860157c57067f1a58cfc0aa861 Mon Sep 17 00:00:00 2001 From: Siddharth Vadapalli <s-vadapalli@ti.com> Date: Fri, 16 Feb 2024 12:29:26 +0530 Subject: [PATCH 235/304] MAINTAINERS: Add Siddharth Vadapalli as PCI TI DRA7XX/J721E reviewer Since I have been contributing to the driver for a while and wish to help with the review process, add myself as a reviewer. Link: https://lore.kernel.org/r/20240216065926.473805-1-s-vadapalli@ti.com Signed-off-by: Siddharth Vadapalli <s-vadapalli@ti.com> Signed-off-by: Bjorn Helgaas <bhelgaas@google.com> --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 8d1052fa6a69..a80c590d0035 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -16832,6 +16832,7 @@ F: drivers/pci/controller/dwc/*designware* PCI DRIVER FOR TI DRA7XX/J721E M: Vignesh Raghavendra <vigneshr@ti.com> +R: Siddharth Vadapalli <s-vadapalli@ti.com> L: linux-omap@vger.kernel.org L: linux-pci@vger.kernel.org L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) From 9704669c386f9bbfef2e002e7e690c56b7dcf5de Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" <mhiramat@kernel.org> Date: Sat, 17 Feb 2024 21:25:42 +0900 Subject: [PATCH 236/304] tracing/probes: Fix to search structure fields correctly Fix to search a field from the structure which has anonymous union correctly. Since the reference `type` pointer was updated in the loop, the search loop suddenly aborted where it hits an anonymous union. Thus it can not find the field after the anonymous union. This avoids updating the cursor `type` pointer in the loop. Link: https://lore.kernel.org/all/170791694361.389532.10047514554799419688.stgit@devnote2/ Fixes: 302db0f5b3d8 ("tracing/probes: Add a function to search a member of a struct/union") Cc: stable@vger.kernel.org Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org> --- kernel/trace/trace_btf.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/trace/trace_btf.c b/kernel/trace/trace_btf.c index ca224d53bfdc..5bbdbcbbde3c 100644 --- a/kernel/trace/trace_btf.c +++ b/kernel/trace/trace_btf.c @@ -91,8 +91,8 @@ retry: for_each_member(i, type, member) { if (!member->name_off) { /* Anonymous union/struct: push it for later use */ - type = btf_type_skip_modifiers(btf, member->type, &tid); - if (type && top < BTF_ANON_STACK_MAX) { + if (btf_type_skip_modifiers(btf, member->type, &tid) && + top < BTF_ANON_STACK_MAX) { anon_stack[top].tid = tid; anon_stack[top++].offset = cur_offset + member->offset; From 5928d411557ec5d53832cdd39fc443704a3e5b77 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Date: Sat, 17 Feb 2024 13:55:31 +0100 Subject: [PATCH 237/304] Documentation: Document the Linux Kernel CVE process The Linux kernel project now has the ability to assign CVEs to fixed issues, so document the process and how individual developers can get a CVE if one is not automatically assigned for their fixes. Reviewed-by: Kees Cook <keescook@chromium.org> Reviewed-by: Konstantin Ryabitsev <konstantin@linuxfoundation.org> Reviewed-by: Krzysztof Kozlowski <krzk@kernel.org> Reviewed-by: Lukas Bulwahn <lukas.bulwahn@gmail.com> Signed-off-by: Sasha Levin <sashal@kernel.org> Signed-off-by: Lee Jones <lee@kernel.org> Link: https://lore.kernel.org/r/2024021731-essence-sadness-28fd@gregkh Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> --- Documentation/process/cve.rst | 121 ++++++++++++++++++++++++ Documentation/process/index.rst | 1 + Documentation/process/security-bugs.rst | 5 +- MAINTAINERS | 5 + 4 files changed, 129 insertions(+), 3 deletions(-) create mode 100644 Documentation/process/cve.rst diff --git a/Documentation/process/cve.rst b/Documentation/process/cve.rst new file mode 100644 index 000000000000..5e2753eff729 --- /dev/null +++ b/Documentation/process/cve.rst @@ -0,0 +1,121 @@ +==== +CVEs +==== + +Common Vulnerabilities and Exposure (CVE®) numbers were developed as an +unambiguous way to identify, define, and catalog publicly disclosed +security vulnerabilities. Over time, their usefulness has declined with +regards to the kernel project, and CVE numbers were very often assigned +in inappropriate ways and for inappropriate reasons. Because of this, +the kernel development community has tended to avoid them. However, the +combination of continuing pressure to assign CVEs and other forms of +security identifiers, and ongoing abuses by individuals and companies +outside of the kernel community has made it clear that the kernel +community should have control over those assignments. + +The Linux kernel developer team does have the ability to assign CVEs for +potential Linux kernel security issues. This assignment is independent +of the :doc:`normal Linux kernel security bug reporting +process<../process/security-bugs>`. + +A list of all assigned CVEs for the Linux kernel can be found in the +archives of the linux-cve mailing list, as seen on +https://lore.kernel.org/linux-cve-announce/. To get notice of the +assigned CVEs, please `subscribe +<https://subspace.kernel.org/subscribing.html>`_ to that mailing list. + +Process +======= + +As part of the normal stable release process, kernel changes that are +potentially security issues are identified by the developers responsible +for CVE number assignments and have CVE numbers automatically assigned +to them. These assignments are published on the linux-cve-announce +mailing list as announcements on a frequent basis. + +Note, due to the layer at which the Linux kernel is in a system, almost +any bug might be exploitable to compromise the security of the kernel, +but the possibility of exploitation is often not evident when the bug is +fixed. Because of this, the CVE assignment team is overly cautious and +assign CVE numbers to any bugfix that they identify. This +explains the seemingly large number of CVEs that are issued by the Linux +kernel team. + +If the CVE assignment team misses a specific fix that any user feels +should have a CVE assigned to it, please email them at <cve@kernel.org> +and the team there will work with you on it. Note that no potential +security issues should be sent to this alias, it is ONLY for assignment +of CVEs for fixes that are already in released kernel trees. If you +feel you have found an unfixed security issue, please follow the +:doc:`normal Linux kernel security bug reporting +process<../process/security-bugs>`. + +No CVEs will be automatically assigned for unfixed security issues in +the Linux kernel; assignment will only automatically happen after a fix +is available and applied to a stable kernel tree, and it will be tracked +that way by the git commit id of the original fix. If anyone wishes to +have a CVE assigned before an issue is resolved with a commit, please +contact the kernel CVE assignment team at <cve@kernel.org> to get an +identifier assigned from their batch of reserved identifiers. + +No CVEs will be assigned for any issue found in a version of the kernel +that is not currently being actively supported by the Stable/LTS kernel +team. A list of the currently supported kernel branches can be found at +https://kernel.org/releases.html + +Disputes of assigned CVEs +========================= + +The authority to dispute or modify an assigned CVE for a specific kernel +change lies solely with the maintainers of the relevant subsystem +affected. This principle ensures a high degree of accuracy and +accountability in vulnerability reporting. Only those individuals with +deep expertise and intimate knowledge of the subsystem can effectively +assess the validity and scope of a reported vulnerability and determine +its appropriate CVE designation. Any attempt to modify or dispute a CVE +outside of this designated authority could lead to confusion, inaccurate +reporting, and ultimately, compromised systems. + +Invalid CVEs +============ + +If a security issue is found in a Linux kernel that is only supported by +a Linux distribution due to the changes that have been made by that +distribution, or due to the distribution supporting a kernel version +that is no longer one of the kernel.org supported releases, then a CVE +can not be assigned by the Linux kernel CVE team, and must be asked for +from that Linux distribution itself. + +Any CVE that is assigned against the Linux kernel for an actively +supported kernel version, by any group other than the kernel assignment +CVE team should not be treated as a valid CVE. Please notify the +kernel CVE assignment team at <cve@kernel.org> so that they can work to +invalidate such entries through the CNA remediation process. + +Applicability of specific CVEs +============================== + +As the Linux kernel can be used in many different ways, with many +different ways of accessing it by external users, or no access at all, +the applicability of any specific CVE is up to the user of Linux to +determine, it is not up to the CVE assignment team. Please do not +contact us to attempt to determine the applicability of any specific +CVE. + +Also, as the source tree is so large, and any one system only uses a +small subset of the source tree, any users of Linux should be aware that +large numbers of assigned CVEs are not relevant for their systems. + +In short, we do not know your use case, and we do not know what portions +of the kernel that you use, so there is no way for us to determine if a +specific CVE is relevant for your system. + +As always, it is best to take all released kernel changes, as they are +tested together in a unified whole by many community members, and not as +individual cherry-picked changes. Also note that for many bugs, the +solution to the overall problem is not found in a single change, but by +the sum of many fixes on top of each other. Ideally CVEs will be +assigned to all fixes for all issues, but sometimes we will fail to +notice fixes, therefore assume that some changes without a CVE assigned +might be relevant to take. + diff --git a/Documentation/process/index.rst b/Documentation/process/index.rst index 6cb732dfcc72..de9cbb7bd7eb 100644 --- a/Documentation/process/index.rst +++ b/Documentation/process/index.rst @@ -81,6 +81,7 @@ of special classes of bugs: regressions and security problems. handling-regressions security-bugs + cve embargoed-hardware-issues Maintainer information diff --git a/Documentation/process/security-bugs.rst b/Documentation/process/security-bugs.rst index 692a3ba56cca..56c560a00b37 100644 --- a/Documentation/process/security-bugs.rst +++ b/Documentation/process/security-bugs.rst @@ -99,9 +99,8 @@ CVE assignment The security team does not assign CVEs, nor do we require them for reports or fixes, as this can needlessly complicate the process and may delay the bug handling. If a reporter wishes to have a CVE identifier -assigned, they should find one by themselves, for example by contacting -MITRE directly. However under no circumstances will a patch inclusion -be delayed to wait for a CVE identifier to arrive. +assigned for a confirmed issue, they can contact the :doc:`kernel CVE +assignment team<../process/cve>` to obtain one. Non-disclosure agreements ------------------------- diff --git a/MAINTAINERS b/MAINTAINERS index 8999497011a2..4423fdfdd8ae 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5610,6 +5610,11 @@ S: Maintained F: Documentation/devicetree/bindings/net/can/ctu,ctucanfd.yaml F: drivers/net/can/ctucanfd/ +CVE ASSIGNMENT CONTACT +M: CVE Assignment Team <cve@kernel.org> +S: Maintained +F: Documentation/process/cve.rst + CW1200 WLAN driver S: Orphan F: drivers/net/wireless/st/cw1200/ From 97dde84026339e4b4af9a6301f825d1828d7874b Mon Sep 17 00:00:00 2001 From: Pavel Sakharov <p.sakharov@ispras.ru> Date: Wed, 14 Feb 2024 12:27:17 +0300 Subject: [PATCH 238/304] net: stmmac: Fix incorrect dereference in interrupt handlers If 'dev' or 'data' is NULL, the 'priv' variable has an incorrect address when dereferencing calling netdev_err(). Since we get as 'dev_id' or 'data' what was passed as the 'dev' argument to request_irq() during interrupt initialization (that is, the net_device and rx/tx queue pointers initialized at the time of the call) and since there are usually no checks for the 'dev_id' argument in such handlers in other drivers, remove these checks from the handlers in stmmac driver. Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: 8532f613bc78 ("net: stmmac: introduce MSI Interrupt routines for mac, safety, RX & TX") Signed-off-by: Pavel Sakharov <p.sakharov@ispras.ru> Reviewed-by: Serge Semin <fancer.lancer@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- .../net/ethernet/stmicro/stmmac/stmmac_main.c | 20 ------------------- 1 file changed, 20 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 75d029704503..e80d77bd9f1f 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -6059,11 +6059,6 @@ static irqreturn_t stmmac_mac_interrupt(int irq, void *dev_id) struct net_device *dev = (struct net_device *)dev_id; struct stmmac_priv *priv = netdev_priv(dev); - if (unlikely(!dev)) { - netdev_err(priv->dev, "%s: invalid dev pointer\n", __func__); - return IRQ_NONE; - } - /* Check if adapter is up */ if (test_bit(STMMAC_DOWN, &priv->state)) return IRQ_HANDLED; @@ -6079,11 +6074,6 @@ static irqreturn_t stmmac_safety_interrupt(int irq, void *dev_id) struct net_device *dev = (struct net_device *)dev_id; struct stmmac_priv *priv = netdev_priv(dev); - if (unlikely(!dev)) { - netdev_err(priv->dev, "%s: invalid dev pointer\n", __func__); - return IRQ_NONE; - } - /* Check if adapter is up */ if (test_bit(STMMAC_DOWN, &priv->state)) return IRQ_HANDLED; @@ -6105,11 +6095,6 @@ static irqreturn_t stmmac_msi_intr_tx(int irq, void *data) dma_conf = container_of(tx_q, struct stmmac_dma_conf, tx_queue[chan]); priv = container_of(dma_conf, struct stmmac_priv, dma_conf); - if (unlikely(!data)) { - netdev_err(priv->dev, "%s: invalid dev pointer\n", __func__); - return IRQ_NONE; - } - /* Check if adapter is up */ if (test_bit(STMMAC_DOWN, &priv->state)) return IRQ_HANDLED; @@ -6136,11 +6121,6 @@ static irqreturn_t stmmac_msi_intr_rx(int irq, void *data) dma_conf = container_of(rx_q, struct stmmac_dma_conf, rx_queue[chan]); priv = container_of(dma_conf, struct stmmac_priv, dma_conf); - if (unlikely(!data)) { - netdev_err(priv->dev, "%s: invalid dev pointer\n", __func__); - return IRQ_NONE; - } - /* Check if adapter is up */ if (test_bit(STMMAC_DOWN, &priv->state)) return IRQ_HANDLED; From 081a0e3b0d4c061419d3f4679dec9f68725b17e4 Mon Sep 17 00:00:00 2001 From: Eric Dumazet <edumazet@google.com> Date: Thu, 15 Feb 2024 17:21:06 +0000 Subject: [PATCH 239/304] ipv4: properly combine dev_base_seq and ipv4.dev_addr_genid net->dev_base_seq and ipv4.dev_addr_genid are monotonically increasing. If we XOR their values, we could miss to detect if both values were changed with the same amount. Fixes: 0465277f6b3f ("ipv4: provide addr and netconf dump consistency info") Signed-off-by: Eric Dumazet <edumazet@google.com> Cc: Nicolas Dichtel <nicolas.dichtel@6wind.com> Acked-by: Nicolas Dichtel <nicolas.dichtel@6wind.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- net/ipv4/devinet.c | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index ca0ff15dc8fa..bc74f131fe4d 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1825,6 +1825,21 @@ done: return err; } +/* Combine dev_addr_genid and dev_base_seq to detect changes. + */ +static u32 inet_base_seq(const struct net *net) +{ + u32 res = atomic_read(&net->ipv4.dev_addr_genid) + + net->dev_base_seq; + + /* Must not return 0 (see nl_dump_check_consistent()). + * Chose a value far away from 0. + */ + if (!res) + res = 0x80000000; + return res; +} + static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) { const struct nlmsghdr *nlh = cb->nlh; @@ -1876,8 +1891,7 @@ static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) idx = 0; head = &tgt_net->dev_index_head[h]; rcu_read_lock(); - cb->seq = atomic_read(&tgt_net->ipv4.dev_addr_genid) ^ - tgt_net->dev_base_seq; + cb->seq = inet_base_seq(tgt_net); hlist_for_each_entry_rcu(dev, head, index_hlist) { if (idx < s_idx) goto cont; @@ -2278,8 +2292,7 @@ static int inet_netconf_dump_devconf(struct sk_buff *skb, idx = 0; head = &net->dev_index_head[h]; rcu_read_lock(); - cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^ - net->dev_base_seq; + cb->seq = inet_base_seq(net); hlist_for_each_entry_rcu(dev, head, index_hlist) { if (idx < s_idx) goto cont; From e898e4cd1aab271ca414f9ac6e08e4c761f6913c Mon Sep 17 00:00:00 2001 From: Eric Dumazet <edumazet@google.com> Date: Thu, 15 Feb 2024 17:21:07 +0000 Subject: [PATCH 240/304] ipv6: properly combine dev_base_seq and ipv6.dev_addr_genid net->dev_base_seq and ipv6.dev_addr_genid are monotonically increasing. If we XOR their values, we could miss to detect if both values were changed with the same amount. Fixes: 63998ac24f83 ("ipv6: provide addr and netconf dump consistency info") Signed-off-by: Eric Dumazet <edumazet@google.com> Cc: Nicolas Dichtel <nicolas.dichtel@6wind.com> Signed-off-by: Eric Dumazet <edumazet@google.com> Acked-by: Nicolas Dichtel <nicolas.dichtel@6wind.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- net/ipv6/addrconf.c | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 733ace18806c..5a839c5fb1a5 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -708,6 +708,22 @@ errout: return err; } +/* Combine dev_addr_genid and dev_base_seq to detect changes. + */ +static u32 inet6_base_seq(const struct net *net) +{ + u32 res = atomic_read(&net->ipv6.dev_addr_genid) + + net->dev_base_seq; + + /* Must not return 0 (see nl_dump_check_consistent()). + * Chose a value far away from 0. + */ + if (!res) + res = 0x80000000; + return res; +} + + static int inet6_netconf_dump_devconf(struct sk_buff *skb, struct netlink_callback *cb) { @@ -741,8 +757,7 @@ static int inet6_netconf_dump_devconf(struct sk_buff *skb, idx = 0; head = &net->dev_index_head[h]; rcu_read_lock(); - cb->seq = atomic_read(&net->ipv6.dev_addr_genid) ^ - net->dev_base_seq; + cb->seq = inet6_base_seq(net); hlist_for_each_entry_rcu(dev, head, index_hlist) { if (idx < s_idx) goto cont; @@ -5362,7 +5377,7 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb, } rcu_read_lock(); - cb->seq = atomic_read(&tgt_net->ipv6.dev_addr_genid) ^ tgt_net->dev_base_seq; + cb->seq = inet6_base_seq(tgt_net); for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) { idx = 0; head = &tgt_net->dev_index_head[h]; From 6c347be62ae963b301ead8e7fa7b9973e6e0d6e1 Mon Sep 17 00:00:00 2001 From: Geliang Tang <tanggeliang@kylinos.cn> Date: Thu, 15 Feb 2024 19:25:28 +0100 Subject: [PATCH 241/304] mptcp: add needs_id for userspace appending addr When userspace PM requires to create an ID 0 subflow in "userspace pm create id 0 subflow" test like this: userspace_pm_add_sf $ns2 10.0.3.2 0 An ID 1 subflow, in fact, is created. Since in mptcp_pm_nl_append_new_local_addr(), 'id 0' will be treated as no ID is set by userspace, and will allocate a new ID immediately: if (!e->addr.id) e->addr.id = find_next_zero_bit(pernet->id_bitmap, MPTCP_PM_MAX_ADDR_ID + 1, 1); To solve this issue, a new parameter needs_id is added for mptcp_userspace_pm_append_new_local_addr() to distinguish between whether userspace PM has set an ID 0 or whether userspace PM has not set any address. needs_id is true in mptcp_userspace_pm_get_local_id(), but false in mptcp_pm_nl_announce_doit() and mptcp_pm_nl_subflow_create_doit(). Fixes: e5ed101a6028 ("mptcp: userspace pm allow creating id 0 subflow") Cc: stable@vger.kernel.org Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn> Reviewed-by: Mat Martineau <martineau@kernel.org> Signed-off-by: Matthieu Baerts (NGI0) <matttbe@kernel.org> Signed-off-by: David S. Miller <davem@davemloft.net> --- net/mptcp/pm_userspace.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/net/mptcp/pm_userspace.c b/net/mptcp/pm_userspace.c index 4f3901d5b8ef..e582b3b2d174 100644 --- a/net/mptcp/pm_userspace.c +++ b/net/mptcp/pm_userspace.c @@ -26,7 +26,8 @@ void mptcp_free_local_addr_list(struct mptcp_sock *msk) } static int mptcp_userspace_pm_append_new_local_addr(struct mptcp_sock *msk, - struct mptcp_pm_addr_entry *entry) + struct mptcp_pm_addr_entry *entry, + bool needs_id) { DECLARE_BITMAP(id_bitmap, MPTCP_PM_MAX_ADDR_ID + 1); struct mptcp_pm_addr_entry *match = NULL; @@ -41,7 +42,7 @@ static int mptcp_userspace_pm_append_new_local_addr(struct mptcp_sock *msk, spin_lock_bh(&msk->pm.lock); list_for_each_entry(e, &msk->pm.userspace_pm_local_addr_list, list) { addr_match = mptcp_addresses_equal(&e->addr, &entry->addr, true); - if (addr_match && entry->addr.id == 0) + if (addr_match && entry->addr.id == 0 && needs_id) entry->addr.id = e->addr.id; id_match = (e->addr.id == entry->addr.id); if (addr_match && id_match) { @@ -64,7 +65,7 @@ static int mptcp_userspace_pm_append_new_local_addr(struct mptcp_sock *msk, } *e = *entry; - if (!e->addr.id) + if (!e->addr.id && needs_id) e->addr.id = find_next_zero_bit(id_bitmap, MPTCP_PM_MAX_ADDR_ID + 1, 1); @@ -153,7 +154,7 @@ int mptcp_userspace_pm_get_local_id(struct mptcp_sock *msk, if (new_entry.addr.port == msk_sport) new_entry.addr.port = 0; - return mptcp_userspace_pm_append_new_local_addr(msk, &new_entry); + return mptcp_userspace_pm_append_new_local_addr(msk, &new_entry, true); } int mptcp_pm_nl_announce_doit(struct sk_buff *skb, struct genl_info *info) @@ -198,7 +199,7 @@ int mptcp_pm_nl_announce_doit(struct sk_buff *skb, struct genl_info *info) goto announce_err; } - err = mptcp_userspace_pm_append_new_local_addr(msk, &addr_val); + err = mptcp_userspace_pm_append_new_local_addr(msk, &addr_val, false); if (err < 0) { GENL_SET_ERR_MSG(info, "did not match address and id"); goto announce_err; @@ -378,7 +379,7 @@ int mptcp_pm_nl_subflow_create_doit(struct sk_buff *skb, struct genl_info *info) } local.addr = addr_l; - err = mptcp_userspace_pm_append_new_local_addr(msk, &local); + err = mptcp_userspace_pm_append_new_local_addr(msk, &local, false); if (err < 0) { GENL_SET_ERR_MSG(info, "did not match address and id"); goto create_err; From 584f3894262634596532cf43a5e782e34a0ce374 Mon Sep 17 00:00:00 2001 From: Geliang Tang <tanggeliang@kylinos.cn> Date: Thu, 15 Feb 2024 19:25:29 +0100 Subject: [PATCH 242/304] mptcp: add needs_id for netlink appending addr Just the same as userspace PM, a new parameter needs_id is added for in-kernel PM mptcp_pm_nl_append_new_local_addr() too. Add a new helper mptcp_pm_has_addr_attr_id() to check whether an address ID is set from PM or not. In mptcp_pm_nl_get_local_id(), needs_id is always true, but in mptcp_pm_nl_add_addr_doit(), pass mptcp_pm_has_addr_attr_id() to needs_it. Fixes: efd5a4c04e18 ("mptcp: add the address ID assignment bitmap") Cc: stable@vger.kernel.org Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn> Reviewed-by: Mat Martineau <martineau@kernel.org> Signed-off-by: Matthieu Baerts (NGI0) <matttbe@kernel.org> Signed-off-by: David S. Miller <davem@davemloft.net> --- net/mptcp/pm_netlink.c | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 287a60381eae..a24c9128dee9 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -901,7 +901,8 @@ static void __mptcp_pm_release_addr_entry(struct mptcp_pm_addr_entry *entry) } static int mptcp_pm_nl_append_new_local_addr(struct pm_nl_pernet *pernet, - struct mptcp_pm_addr_entry *entry) + struct mptcp_pm_addr_entry *entry, + bool needs_id) { struct mptcp_pm_addr_entry *cur, *del_entry = NULL; unsigned int addr_max; @@ -949,7 +950,7 @@ static int mptcp_pm_nl_append_new_local_addr(struct pm_nl_pernet *pernet, } } - if (!entry->addr.id) { + if (!entry->addr.id && needs_id) { find_next: entry->addr.id = find_next_zero_bit(pernet->id_bitmap, MPTCP_PM_MAX_ADDR_ID + 1, @@ -960,7 +961,7 @@ find_next: } } - if (!entry->addr.id) + if (!entry->addr.id && needs_id) goto out; __set_bit(entry->addr.id, pernet->id_bitmap); @@ -1092,7 +1093,7 @@ int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct mptcp_addr_info *skc entry->ifindex = 0; entry->flags = MPTCP_PM_ADDR_FLAG_IMPLICIT; entry->lsk = NULL; - ret = mptcp_pm_nl_append_new_local_addr(pernet, entry); + ret = mptcp_pm_nl_append_new_local_addr(pernet, entry, true); if (ret < 0) kfree(entry); @@ -1285,6 +1286,18 @@ next: return 0; } +static bool mptcp_pm_has_addr_attr_id(const struct nlattr *attr, + struct genl_info *info) +{ + struct nlattr *tb[MPTCP_PM_ADDR_ATTR_MAX + 1]; + + if (!nla_parse_nested_deprecated(tb, MPTCP_PM_ADDR_ATTR_MAX, attr, + mptcp_pm_address_nl_policy, info->extack) && + tb[MPTCP_PM_ADDR_ATTR_ID]) + return true; + return false; +} + int mptcp_pm_nl_add_addr_doit(struct sk_buff *skb, struct genl_info *info) { struct nlattr *attr = info->attrs[MPTCP_PM_ENDPOINT_ADDR]; @@ -1326,7 +1339,8 @@ int mptcp_pm_nl_add_addr_doit(struct sk_buff *skb, struct genl_info *info) goto out_free; } } - ret = mptcp_pm_nl_append_new_local_addr(pernet, entry); + ret = mptcp_pm_nl_append_new_local_addr(pernet, entry, + !mptcp_pm_has_addr_attr_id(attr, info)); if (ret < 0) { GENL_SET_ERR_MSG_FMT(info, "too many addresses or duplicate one: %d", ret); goto out_free; From b8adb69a7d29c2d33eb327bca66476fb6066516b Mon Sep 17 00:00:00 2001 From: Paolo Abeni <pabeni@redhat.com> Date: Thu, 15 Feb 2024 19:25:30 +0100 Subject: [PATCH 243/304] mptcp: fix lockless access in subflow ULP diag Since the introduction of the subflow ULP diag interface, the dump callback accessed all the subflow data with lockless. We need either to annotate all the read and write operation accordingly, or acquire the subflow socket lock. Let's do latter, even if slower, to avoid a diffstat havoc. Fixes: 5147dfb50832 ("mptcp: allow dumping subflow context to userspace") Cc: stable@vger.kernel.org Signed-off-by: Paolo Abeni <pabeni@redhat.com> Reviewed-by: Mat Martineau <martineau@kernel.org> Signed-off-by: Matthieu Baerts (NGI0) <matttbe@kernel.org> Signed-off-by: David S. Miller <davem@davemloft.net> --- include/net/tcp.h | 2 +- net/mptcp/diag.c | 6 +++++- net/tls/tls_main.c | 2 +- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index dd78a1181031..f6eba9652d01 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -2506,7 +2506,7 @@ struct tcp_ulp_ops { /* cleanup ulp */ void (*release)(struct sock *sk); /* diagnostic */ - int (*get_info)(const struct sock *sk, struct sk_buff *skb); + int (*get_info)(struct sock *sk, struct sk_buff *skb); size_t (*get_info_size)(const struct sock *sk); /* clone ulp */ void (*clone)(const struct request_sock *req, struct sock *newsk, diff --git a/net/mptcp/diag.c b/net/mptcp/diag.c index a536586742f2..e57c5f47f035 100644 --- a/net/mptcp/diag.c +++ b/net/mptcp/diag.c @@ -13,17 +13,19 @@ #include <uapi/linux/mptcp.h> #include "protocol.h" -static int subflow_get_info(const struct sock *sk, struct sk_buff *skb) +static int subflow_get_info(struct sock *sk, struct sk_buff *skb) { struct mptcp_subflow_context *sf; struct nlattr *start; u32 flags = 0; + bool slow; int err; start = nla_nest_start_noflag(skb, INET_ULP_INFO_MPTCP); if (!start) return -EMSGSIZE; + slow = lock_sock_fast(sk); rcu_read_lock(); sf = rcu_dereference(inet_csk(sk)->icsk_ulp_data); if (!sf) { @@ -69,11 +71,13 @@ static int subflow_get_info(const struct sock *sk, struct sk_buff *skb) } rcu_read_unlock(); + unlock_sock_fast(sk, slow); nla_nest_end(skb, start); return 0; nla_failure: rcu_read_unlock(); + unlock_sock_fast(sk, slow); nla_nest_cancel(skb, start); return err; } diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index 1c2c6800949d..b4674f03d71a 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -1003,7 +1003,7 @@ static u16 tls_user_config(struct tls_context *ctx, bool tx) return 0; } -static int tls_get_info(const struct sock *sk, struct sk_buff *skb) +static int tls_get_info(struct sock *sk, struct sk_buff *skb) { u16 version, cipher_type; struct tls_context *ctx; From a7cfe776637004a4c938fde78be4bd608c32c3ef Mon Sep 17 00:00:00 2001 From: Paolo Abeni <pabeni@redhat.com> Date: Thu, 15 Feb 2024 19:25:31 +0100 Subject: [PATCH 244/304] mptcp: fix data races on local_id The local address id is accessed lockless by the NL PM, add all the required ONCE annotation. There is a caveat: the local id can be initialized late in the subflow life-cycle, and its validity is controlled by the local_id_valid flag. Remove such flag and encode the validity in the local_id field itself with negative value before initialization. That allows accessing the field consistently with a single read operation. Fixes: 0ee4261a3681 ("mptcp: implement mptcp_pm_remove_subflow") Cc: stable@vger.kernel.org Signed-off-by: Paolo Abeni <pabeni@redhat.com> Reviewed-by: Mat Martineau <martineau@kernel.org> Signed-off-by: Matthieu Baerts (NGI0) <matttbe@kernel.org> Signed-off-by: David S. Miller <davem@davemloft.net> --- net/mptcp/diag.c | 2 +- net/mptcp/pm_netlink.c | 6 +++--- net/mptcp/pm_userspace.c | 2 +- net/mptcp/protocol.c | 2 +- net/mptcp/protocol.h | 15 ++++++++++++--- net/mptcp/subflow.c | 9 +++++---- 6 files changed, 23 insertions(+), 13 deletions(-) diff --git a/net/mptcp/diag.c b/net/mptcp/diag.c index e57c5f47f035..6ff6f14674aa 100644 --- a/net/mptcp/diag.c +++ b/net/mptcp/diag.c @@ -65,7 +65,7 @@ static int subflow_get_info(struct sock *sk, struct sk_buff *skb) sf->map_data_len) || nla_put_u32(skb, MPTCP_SUBFLOW_ATTR_FLAGS, flags) || nla_put_u8(skb, MPTCP_SUBFLOW_ATTR_ID_REM, sf->remote_id) || - nla_put_u8(skb, MPTCP_SUBFLOW_ATTR_ID_LOC, sf->local_id)) { + nla_put_u8(skb, MPTCP_SUBFLOW_ATTR_ID_LOC, subflow_get_local_id(sf))) { err = -EMSGSIZE; goto nla_failure; } diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index a24c9128dee9..912e25077437 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -800,7 +800,7 @@ static void mptcp_pm_nl_rm_addr_or_subflow(struct mptcp_sock *msk, mptcp_for_each_subflow_safe(msk, subflow, tmp) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); int how = RCV_SHUTDOWN | SEND_SHUTDOWN; - u8 id = subflow->local_id; + u8 id = subflow_get_local_id(subflow); if (rm_type == MPTCP_MIB_RMADDR && subflow->remote_id != rm_id) continue; @@ -809,7 +809,7 @@ static void mptcp_pm_nl_rm_addr_or_subflow(struct mptcp_sock *msk, pr_debug(" -> %s rm_list_ids[%d]=%u local_id=%u remote_id=%u mpc_id=%u", rm_type == MPTCP_MIB_RMADDR ? "address" : "subflow", - i, rm_id, subflow->local_id, subflow->remote_id, + i, rm_id, id, subflow->remote_id, msk->mpc_endpoint_id); spin_unlock_bh(&msk->pm.lock); mptcp_subflow_shutdown(sk, ssk, how); @@ -1994,7 +1994,7 @@ static int mptcp_event_add_subflow(struct sk_buff *skb, const struct sock *ssk) if (WARN_ON_ONCE(!sf)) return -EINVAL; - if (nla_put_u8(skb, MPTCP_ATTR_LOC_ID, sf->local_id)) + if (nla_put_u8(skb, MPTCP_ATTR_LOC_ID, subflow_get_local_id(sf))) return -EMSGSIZE; if (nla_put_u8(skb, MPTCP_ATTR_REM_ID, sf->remote_id)) diff --git a/net/mptcp/pm_userspace.c b/net/mptcp/pm_userspace.c index e582b3b2d174..d396a5973429 100644 --- a/net/mptcp/pm_userspace.c +++ b/net/mptcp/pm_userspace.c @@ -234,7 +234,7 @@ static int mptcp_userspace_pm_remove_id_zero_address(struct mptcp_sock *msk, lock_sock(sk); mptcp_for_each_subflow(msk, subflow) { - if (subflow->local_id == 0) { + if (READ_ONCE(subflow->local_id) == 0) { has_id_0 = true; break; } diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 8ef2927ebca2..948606a537da 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -85,7 +85,7 @@ static int __mptcp_socket_create(struct mptcp_sock *msk) subflow->subflow_id = msk->subflow_id++; /* This is the first subflow, always with id 0 */ - subflow->local_id_valid = 1; + WRITE_ONCE(subflow->local_id, 0); mptcp_sock_graft(msk->first, sk->sk_socket); iput(SOCK_INODE(ssock)); diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index ed50f2015dc3..631a7f445f34 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -491,10 +491,9 @@ struct mptcp_subflow_context { remote_key_valid : 1, /* received the peer key from */ disposable : 1, /* ctx can be free at ulp release time */ stale : 1, /* unable to snd/rcv data, do not use for xmit */ - local_id_valid : 1, /* local_id is correctly initialized */ valid_csum_seen : 1, /* at least one csum validated */ is_mptfo : 1, /* subflow is doing TFO */ - __unused : 9; + __unused : 10; bool data_avail; bool scheduled; u32 remote_nonce; @@ -505,7 +504,7 @@ struct mptcp_subflow_context { u8 hmac[MPTCPOPT_HMAC_LEN]; /* MPJ subflow only */ u64 iasn; /* initial ack sequence number, MPC subflows only */ }; - u8 local_id; + s16 local_id; /* if negative not initialized yet */ u8 remote_id; u8 reset_seen:1; u8 reset_transient:1; @@ -556,6 +555,7 @@ mptcp_subflow_ctx_reset(struct mptcp_subflow_context *subflow) { memset(&subflow->reset, 0, sizeof(subflow->reset)); subflow->request_mptcp = 1; + WRITE_ONCE(subflow->local_id, -1); } static inline u64 @@ -1022,6 +1022,15 @@ int mptcp_pm_get_local_id(struct mptcp_sock *msk, struct sock_common *skc); int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct mptcp_addr_info *skc); int mptcp_userspace_pm_get_local_id(struct mptcp_sock *msk, struct mptcp_addr_info *skc); +static inline u8 subflow_get_local_id(const struct mptcp_subflow_context *subflow) +{ + int local_id = READ_ONCE(subflow->local_id); + + if (local_id < 0) + return 0; + return local_id; +} + void __init mptcp_pm_nl_init(void); void mptcp_pm_nl_work(struct mptcp_sock *msk); void mptcp_pm_nl_rm_subflow_received(struct mptcp_sock *msk, diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index c34ecadee120..015184bbf06c 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -577,8 +577,8 @@ do_reset: static void subflow_set_local_id(struct mptcp_subflow_context *subflow, int local_id) { - subflow->local_id = local_id; - subflow->local_id_valid = 1; + WARN_ON_ONCE(local_id < 0 || local_id > 255); + WRITE_ONCE(subflow->local_id, local_id); } static int subflow_chk_local_id(struct sock *sk) @@ -587,7 +587,7 @@ static int subflow_chk_local_id(struct sock *sk) struct mptcp_sock *msk = mptcp_sk(subflow->conn); int err; - if (likely(subflow->local_id_valid)) + if (likely(subflow->local_id >= 0)) return 0; err = mptcp_pm_get_local_id(msk, (struct sock_common *)sk); @@ -1731,6 +1731,7 @@ static struct mptcp_subflow_context *subflow_create_ctx(struct sock *sk, pr_debug("subflow=%p", ctx); ctx->tcp_sock = sk; + WRITE_ONCE(ctx->local_id, -1); return ctx; } @@ -1966,7 +1967,7 @@ static void subflow_ulp_clone(const struct request_sock *req, new_ctx->idsn = subflow_req->idsn; /* this is the first subflow, id is always 0 */ - new_ctx->local_id_valid = 1; + subflow_set_local_id(new_ctx, 0); } else if (subflow_req->mp_join) { new_ctx->ssn_offset = subflow_req->ssn_offset; new_ctx->mp_join = 1; From 967d3c27127e71a10ff5c083583a038606431b61 Mon Sep 17 00:00:00 2001 From: Paolo Abeni <pabeni@redhat.com> Date: Thu, 15 Feb 2024 19:25:32 +0100 Subject: [PATCH 245/304] mptcp: fix data races on remote_id Similar to the previous patch, address the data race on remote_id, adding the suitable ONCE annotations. Fixes: bedee0b56113 ("mptcp: address lookup improvements") Cc: stable@vger.kernel.org Signed-off-by: Paolo Abeni <pabeni@redhat.com> Reviewed-by: Mat Martineau <martineau@kernel.org> Signed-off-by: Matthieu Baerts (NGI0) <matttbe@kernel.org> Signed-off-by: David S. Miller <davem@davemloft.net> --- net/mptcp/pm_netlink.c | 8 ++++---- net/mptcp/subflow.c | 6 +++--- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 912e25077437..ed6983af1ab2 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -443,7 +443,7 @@ static unsigned int fill_remote_addresses_vec(struct mptcp_sock *msk, mptcp_for_each_subflow(msk, subflow) { ssk = mptcp_subflow_tcp_sock(subflow); remote_address((struct sock_common *)ssk, &addrs[i]); - addrs[i].id = subflow->remote_id; + addrs[i].id = READ_ONCE(subflow->remote_id); if (deny_id0 && !addrs[i].id) continue; @@ -799,18 +799,18 @@ static void mptcp_pm_nl_rm_addr_or_subflow(struct mptcp_sock *msk, mptcp_for_each_subflow_safe(msk, subflow, tmp) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); + u8 remote_id = READ_ONCE(subflow->remote_id); int how = RCV_SHUTDOWN | SEND_SHUTDOWN; u8 id = subflow_get_local_id(subflow); - if (rm_type == MPTCP_MIB_RMADDR && subflow->remote_id != rm_id) + if (rm_type == MPTCP_MIB_RMADDR && remote_id != rm_id) continue; if (rm_type == MPTCP_MIB_RMSUBFLOW && !mptcp_local_id_match(msk, id, rm_id)) continue; pr_debug(" -> %s rm_list_ids[%d]=%u local_id=%u remote_id=%u mpc_id=%u", rm_type == MPTCP_MIB_RMADDR ? "address" : "subflow", - i, rm_id, id, subflow->remote_id, - msk->mpc_endpoint_id); + i, rm_id, id, remote_id, msk->mpc_endpoint_id); spin_unlock_bh(&msk->pm.lock); mptcp_subflow_shutdown(sk, ssk, how); diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 015184bbf06c..71ba86246ff8 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -535,7 +535,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) subflow->backup = mp_opt.backup; subflow->thmac = mp_opt.thmac; subflow->remote_nonce = mp_opt.nonce; - subflow->remote_id = mp_opt.join_id; + WRITE_ONCE(subflow->remote_id, mp_opt.join_id); pr_debug("subflow=%p, thmac=%llu, remote_nonce=%u backup=%d", subflow, subflow->thmac, subflow->remote_nonce, subflow->backup); @@ -1567,7 +1567,7 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc, pr_debug("msk=%p remote_token=%u local_id=%d remote_id=%d", msk, remote_token, local_id, remote_id); subflow->remote_token = remote_token; - subflow->remote_id = remote_id; + WRITE_ONCE(subflow->remote_id, remote_id); subflow->request_join = 1; subflow->request_bkup = !!(flags & MPTCP_PM_ADDR_FLAG_BACKUP); subflow->subflow_id = msk->subflow_id++; @@ -1974,7 +1974,7 @@ static void subflow_ulp_clone(const struct request_sock *req, new_ctx->fully_established = 1; new_ctx->remote_key_valid = 1; new_ctx->backup = subflow_req->backup; - new_ctx->remote_id = subflow_req->remote_id; + WRITE_ONCE(new_ctx->remote_id, subflow_req->remote_id); new_ctx->token = subflow_req->token; new_ctx->thmac = subflow_req->thmac; From 045e9d812868a2d80b7a57b224ce8009444b7bbc Mon Sep 17 00:00:00 2001 From: Paolo Abeni <pabeni@redhat.com> Date: Thu, 15 Feb 2024 19:25:33 +0100 Subject: [PATCH 246/304] mptcp: fix duplicate subflow creation Fullmesh endpoints could end-up unexpectedly generating duplicate subflows - same local and remote addresses - when multiple incoming ADD_ADDR are processed before the PM creates the subflow for the local endpoints. Address the issue explicitly checking for duplicates at subflow creation time. To avoid a quadratic computational complexity, track the unavailable remote address ids in a temporary bitmap and initialize such bitmap with the remote ids of all the existing subflows matching the local address currently processed. The above allows additionally replacing the existing code checking for duplicate entry in the current set with a simple bit test operation. Fixes: 2843ff6f36db ("mptcp: remote addresses fullmesh") Cc: stable@vger.kernel.org Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/435 Signed-off-by: Paolo Abeni <pabeni@redhat.com> Reviewed-by: Mat Martineau <martineau@kernel.org> Signed-off-by: Matthieu Baerts (NGI0) <matttbe@kernel.org> Signed-off-by: David S. Miller <davem@davemloft.net> --- net/mptcp/pm_netlink.c | 33 ++++++++++++++++++--------------- 1 file changed, 18 insertions(+), 15 deletions(-) diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index ed6983af1ab2..58d17d9604e7 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -396,19 +396,6 @@ void mptcp_pm_free_anno_list(struct mptcp_sock *msk) } } -static bool lookup_address_in_vec(const struct mptcp_addr_info *addrs, unsigned int nr, - const struct mptcp_addr_info *addr) -{ - int i; - - for (i = 0; i < nr; i++) { - if (addrs[i].id == addr->id) - return true; - } - - return false; -} - /* Fill all the remote addresses into the array addrs[], * and return the array size. */ @@ -440,6 +427,16 @@ static unsigned int fill_remote_addresses_vec(struct mptcp_sock *msk, msk->pm.subflows++; addrs[i++] = remote; } else { + DECLARE_BITMAP(unavail_id, MPTCP_PM_MAX_ADDR_ID + 1); + + /* Forbid creation of new subflows matching existing + * ones, possibly already created by incoming ADD_ADDR + */ + bitmap_zero(unavail_id, MPTCP_PM_MAX_ADDR_ID + 1); + mptcp_for_each_subflow(msk, subflow) + if (READ_ONCE(subflow->local_id) == local->id) + __set_bit(subflow->remote_id, unavail_id); + mptcp_for_each_subflow(msk, subflow) { ssk = mptcp_subflow_tcp_sock(subflow); remote_address((struct sock_common *)ssk, &addrs[i]); @@ -447,11 +444,17 @@ static unsigned int fill_remote_addresses_vec(struct mptcp_sock *msk, if (deny_id0 && !addrs[i].id) continue; + if (test_bit(addrs[i].id, unavail_id)) + continue; + if (!mptcp_pm_addr_families_match(sk, local, &addrs[i])) continue; - if (!lookup_address_in_vec(addrs, i, &addrs[i]) && - msk->pm.subflows < subflows_max) { + if (msk->pm.subflows < subflows_max) { + /* forbid creating multiple address towards + * this id + */ + __set_bit(addrs[i].id, unavail_id); msk->pm.subflows++; i++; } From d2a2547565a9f1ad7989f7e21f97cbf065a9390d Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" <matttbe@kernel.org> Date: Thu, 15 Feb 2024 19:25:34 +0100 Subject: [PATCH 247/304] selftests: mptcp: pm nl: also list skipped tests If the feature is not supported by older kernels, and instead of just ignoring some tests, we should mark them as skipped, so we can still track them. Fixes: d85555ac11f9 ("selftests: mptcp: pm_netlink: format subtests results in TAP") Cc: stable@vger.kernel.org Reviewed-by: Geliang Tang <geliang@kernel.org> Signed-off-by: Matthieu Baerts (NGI0) <matttbe@kernel.org> Signed-off-by: David S. Miller <davem@davemloft.net> --- tools/testing/selftests/net/mptcp/pm_netlink.sh | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tools/testing/selftests/net/mptcp/pm_netlink.sh b/tools/testing/selftests/net/mptcp/pm_netlink.sh index 8f4ff123a7eb..79e83a2c95de 100755 --- a/tools/testing/selftests/net/mptcp/pm_netlink.sh +++ b/tools/testing/selftests/net/mptcp/pm_netlink.sh @@ -194,6 +194,12 @@ subflow 10.0.1.1" " (nofullmesh)" ip netns exec $ns1 ./pm_nl_ctl set id 1 flags backup,fullmesh check "ip netns exec $ns1 ./pm_nl_ctl dump" "id 1 flags \ subflow,backup,fullmesh 10.0.1.1" " (backup,fullmesh)" +else + for st in fullmesh nofullmesh backup,fullmesh; do + st=" (${st})" + printf "%-50s%s\n" "${st}" "[SKIP]" + mptcp_lib_result_skip "${st}" + done fi mptcp_lib_result_print_all_tap From 662f084f3396d8a804d56cb53ac05c9e39902a7b Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" <matttbe@kernel.org> Date: Thu, 15 Feb 2024 19:25:35 +0100 Subject: [PATCH 248/304] selftests: mptcp: pm nl: avoid error msg on older kernels Since the 'Fixes' commit mentioned below, and if the kernel being tested doesn't support the 'fullmesh' flag, this error will be printed: netlink error -22 (Invalid argument) ./pm_nl_ctl: bailing out due to netlink error[s] But that can be normal if the kernel doesn't support the feature, no need to print this worrying error message while everything else looks OK. So we can mute stderr. Failures will still be detected if any. Fixes: 1dc88d241f92 ("selftests: mptcp: pm_nl_ctl: always look for errors") Cc: stable@vger.kernel.org Reviewed-by: Geliang Tang <geliang@kernel.org> Signed-off-by: Matthieu Baerts (NGI0) <matttbe@kernel.org> Signed-off-by: David S. Miller <davem@davemloft.net> --- tools/testing/selftests/net/mptcp/pm_netlink.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/mptcp/pm_netlink.sh b/tools/testing/selftests/net/mptcp/pm_netlink.sh index 79e83a2c95de..71899a3ffa7a 100755 --- a/tools/testing/selftests/net/mptcp/pm_netlink.sh +++ b/tools/testing/selftests/net/mptcp/pm_netlink.sh @@ -183,7 +183,7 @@ check "ip netns exec $ns1 ./pm_nl_ctl dump" "id 1 flags \ subflow 10.0.1.1" " (nobackup)" # fullmesh support has been added later -ip netns exec $ns1 ./pm_nl_ctl set id 1 flags fullmesh +ip netns exec $ns1 ./pm_nl_ctl set id 1 flags fullmesh 2>/dev/null if ip netns exec $ns1 ./pm_nl_ctl dump | grep -q "fullmesh" || mptcp_lib_expect_all_features; then check "ip netns exec $ns1 ./pm_nl_ctl dump" "id 1 flags \ From 694bd45980a61045eb5ec07799e3b94c76db830e Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" <matttbe@kernel.org> Date: Thu, 15 Feb 2024 19:25:36 +0100 Subject: [PATCH 249/304] selftests: mptcp: diag: fix bash warnings on older kernels Since the 'Fixes' commit mentioned below, the command that is executed in __chk_nr() helper can return nothing if the feature is not supported. This is the case when the MPTCP CURRESTAB counter is not supported. To avoid this warning ... ./diag.sh: line 65: [: !=: unary operator expected ... we just need to surround '$nr' with double quotes, to support an empty string when the feature is not supported. Fixes: 81ab772819da ("selftests: mptcp: diag: check CURRESTAB counters") Cc: stable@vger.kernel.org Reviewed-by: Geliang Tang <geliang@kernel.org> Signed-off-by: Matthieu Baerts (NGI0) <matttbe@kernel.org> Signed-off-by: David S. Miller <davem@davemloft.net> --- tools/testing/selftests/net/mptcp/diag.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/net/mptcp/diag.sh b/tools/testing/selftests/net/mptcp/diag.sh index 04fcb8a077c9..e0615c6ffb8d 100755 --- a/tools/testing/selftests/net/mptcp/diag.sh +++ b/tools/testing/selftests/net/mptcp/diag.sh @@ -62,8 +62,8 @@ __chk_nr() nr=$(eval $command) printf "%-50s" "$msg" - if [ $nr != $expected ]; then - if [ $nr = "$skip" ] && ! mptcp_lib_expect_all_features; then + if [ "$nr" != "$expected" ]; then + if [ "$nr" = "$skip" ] && ! mptcp_lib_expect_all_features; then echo "[ skip ] Feature probably not supported" mptcp_lib_result_skip "${msg}" else From 4d8e0dde0403b5a86aa83e243f020711a9c3e31f Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" <matttbe@kernel.org> Date: Thu, 15 Feb 2024 19:25:37 +0100 Subject: [PATCH 250/304] selftests: mptcp: simult flows: fix some subtest names The selftest was correctly recording all the results, but the 'reverse direction' part was missing in the name when needed. It is important to have a unique (sub)test name in TAP, because some CI environments drop tests with duplicated name. Fixes: 675d99338e7a ("selftests: mptcp: simult flows: format subtests results in TAP") Cc: stable@vger.kernel.org Reviewed-by: Geliang Tang <geliang@kernel.org> Signed-off-by: Matthieu Baerts (NGI0) <matttbe@kernel.org> Signed-off-by: David S. Miller <davem@davemloft.net> --- tools/testing/selftests/net/mptcp/simult_flows.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/mptcp/simult_flows.sh b/tools/testing/selftests/net/mptcp/simult_flows.sh index 0cc964e6f2c1..8f9ddb3ad4fe 100755 --- a/tools/testing/selftests/net/mptcp/simult_flows.sh +++ b/tools/testing/selftests/net/mptcp/simult_flows.sh @@ -250,7 +250,8 @@ run_test() [ $bail -eq 0 ] || exit $ret fi - printf "%-60s" "$msg - reverse direction" + msg+=" - reverse direction" + printf "%-60s" "${msg}" do_transfer $large $small $time lret=$? mptcp_lib_result_code "${lret}" "${msg}" From 2ef0d804c090658960c008446523863fd7e3541e Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" <matttbe@kernel.org> Date: Thu, 15 Feb 2024 19:25:38 +0100 Subject: [PATCH 251/304] selftests: mptcp: userspace_pm: unique subtest names It is important to have a unique (sub)test name in TAP, because some CI environments drop tests with duplicated names. Some subtests from the userspace_pm selftest had the same names. That's because different subflows are created (and deleted) between the same pair of IP addresses. Simply adding the destination port in the name is then enough to have different names, because the destination port is always different. Note that adding such info takes a bit more space, so we need to increase a bit the width to print the name, simply to keep all the '[ OK ]' aligned as before. Fixes: f589234e1af0 ("selftests: mptcp: userspace_pm: format subtests results in TAP") Cc: stable@vger.kernel.org Reviewed-by: Geliang Tang <geliang@kernel.org> Signed-off-by: Matthieu Baerts (NGI0) <matttbe@kernel.org> Signed-off-by: David S. Miller <davem@davemloft.net> --- tools/testing/selftests/net/mptcp/userspace_pm.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/net/mptcp/userspace_pm.sh b/tools/testing/selftests/net/mptcp/userspace_pm.sh index 6167837f48e1..1b94a75604fe 100755 --- a/tools/testing/selftests/net/mptcp/userspace_pm.sh +++ b/tools/testing/selftests/net/mptcp/userspace_pm.sh @@ -75,7 +75,7 @@ print_test() { test_name="${1}" - _printf "%-63s" "${test_name}" + _printf "%-68s" "${test_name}" } print_results() @@ -542,7 +542,7 @@ verify_subflow_events() local remid local info - info="${e_saddr} (${e_from}) => ${e_daddr} (${e_to})" + info="${e_saddr} (${e_from}) => ${e_daddr}:${e_dport} (${e_to})" if [ "$e_type" = "$SUB_ESTABLISHED" ] then From 645c1dc965ef6b5554e5e69737bb179c7a0f872f Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" <matttbe@kernel.org> Date: Thu, 15 Feb 2024 19:25:39 +0100 Subject: [PATCH 252/304] selftests: mptcp: diag: unique 'in use' subtest names It is important to have a unique (sub)test name in TAP, because some CI environments drop tests with duplicated name. Some 'in use' subtests from the diag selftest had the same names, e.g.: chk 0 msk in use after flush Now the previous value is taken, to have different names, e.g.: chk 2->0 msk in use after flush While at it, avoid repeating the full message, declare it once in the helper. Fixes: ce9902573652 ("selftests: mptcp: diag: format subtests results in TAP") Cc: stable@vger.kernel.org Reviewed-by: Geliang Tang <geliang@kernel.org> Signed-off-by: Matthieu Baerts (NGI0) <matttbe@kernel.org> Signed-off-by: David S. Miller <davem@davemloft.net> --- tools/testing/selftests/net/mptcp/diag.sh | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/tools/testing/selftests/net/mptcp/diag.sh b/tools/testing/selftests/net/mptcp/diag.sh index e0615c6ffb8d..266656a16229 100755 --- a/tools/testing/selftests/net/mptcp/diag.sh +++ b/tools/testing/selftests/net/mptcp/diag.sh @@ -166,9 +166,13 @@ chk_msk_listen() chk_msk_inuse() { local expected=$1 - local msg="$2" + local msg="....chk ${2:-${expected}} msk in use" local listen_nr + if [ "${expected}" -eq 0 ]; then + msg+=" after flush" + fi + listen_nr=$(ss -N "${ns}" -Ml | grep -c LISTEN) expected=$((expected + listen_nr)) @@ -179,7 +183,7 @@ chk_msk_inuse() sleep 0.1 done - __chk_nr get_msk_inuse $expected "$msg" 0 + __chk_nr get_msk_inuse $expected "${msg}" 0 } # $1: cestab nr @@ -227,11 +231,11 @@ wait_connected $ns 10000 chk_msk_nr 2 "after MPC handshake " chk_msk_remote_key_nr 2 "....chk remote_key" chk_msk_fallback_nr 0 "....chk no fallback" -chk_msk_inuse 2 "....chk 2 msk in use" +chk_msk_inuse 2 chk_msk_cestab 2 flush_pids -chk_msk_inuse 0 "....chk 0 msk in use after flush" +chk_msk_inuse 0 "2->0" chk_msk_cestab 0 echo "a" | \ @@ -247,11 +251,11 @@ echo "b" | \ 127.0.0.1 >/dev/null & wait_connected $ns 10001 chk_msk_fallback_nr 1 "check fallback" -chk_msk_inuse 1 "....chk 1 msk in use" +chk_msk_inuse 1 chk_msk_cestab 1 flush_pids -chk_msk_inuse 0 "....chk 0 msk in use after flush" +chk_msk_inuse 0 "1->0" chk_msk_cestab 0 NR_CLIENTS=100 @@ -273,11 +277,11 @@ for I in `seq 1 $NR_CLIENTS`; do done wait_msk_nr $((NR_CLIENTS*2)) "many msk socket present" -chk_msk_inuse $((NR_CLIENTS*2)) "....chk many msk in use" +chk_msk_inuse $((NR_CLIENTS*2)) "many" chk_msk_cestab $((NR_CLIENTS*2)) flush_pids -chk_msk_inuse 0 "....chk 0 msk in use after flush" +chk_msk_inuse 0 "many->0" chk_msk_cestab 0 mptcp_lib_result_print_all_tap From 4103d8480866fe5abb71ef0ed8af3a3b7b9625bf Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" <matttbe@kernel.org> Date: Thu, 15 Feb 2024 19:25:40 +0100 Subject: [PATCH 253/304] selftests: mptcp: diag: unique 'cestab' subtest names It is important to have a unique (sub)test name in TAP, because some CI environments drop tests with duplicated name. Some 'cestab' subtests from the diag selftest had the same names, e.g.: ....chk 0 cestab Now the previous value is taken, to have different names, e.g.: ....chk 2->0 cestab after flush While at it, the 'after flush' info is added, similar to what is done with the 'in use' subtests. Also inspired by these 'in use' subtests, 'many' is displayed instead of a large number: many msk socket present [ ok ] ....chk many msk in use [ ok ] ....chk many cestab [ ok ] ....chk many->0 msk in use after flush [ ok ] ....chk many->0 cestab after flush [ ok ] Fixes: 81ab772819da ("selftests: mptcp: diag: check CURRESTAB counters") Cc: stable@vger.kernel.org Reviewed-by: Geliang Tang <geliang@kernel.org> Signed-off-by: Matthieu Baerts (NGI0) <matttbe@kernel.org> Signed-off-by: David S. Miller <davem@davemloft.net> --- tools/testing/selftests/net/mptcp/diag.sh | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/tools/testing/selftests/net/mptcp/diag.sh b/tools/testing/selftests/net/mptcp/diag.sh index 266656a16229..0a58ebb8b04c 100755 --- a/tools/testing/selftests/net/mptcp/diag.sh +++ b/tools/testing/selftests/net/mptcp/diag.sh @@ -189,10 +189,15 @@ chk_msk_inuse() # $1: cestab nr chk_msk_cestab() { - local cestab=$1 + local expected=$1 + local msg="....chk ${2:-${expected}} cestab" + + if [ "${expected}" -eq 0 ]; then + msg+=" after flush" + fi __chk_nr "mptcp_lib_get_counter ${ns} MPTcpExtMPCurrEstab" \ - "${cestab}" "....chk ${cestab} cestab" "" + "${expected}" "${msg}" "" } wait_connected() @@ -236,7 +241,7 @@ chk_msk_cestab 2 flush_pids chk_msk_inuse 0 "2->0" -chk_msk_cestab 0 +chk_msk_cestab 0 "2->0" echo "a" | \ timeout ${timeout_test} \ @@ -256,7 +261,7 @@ chk_msk_cestab 1 flush_pids chk_msk_inuse 0 "1->0" -chk_msk_cestab 0 +chk_msk_cestab 0 "1->0" NR_CLIENTS=100 for I in `seq 1 $NR_CLIENTS`; do @@ -278,11 +283,11 @@ done wait_msk_nr $((NR_CLIENTS*2)) "many msk socket present" chk_msk_inuse $((NR_CLIENTS*2)) "many" -chk_msk_cestab $((NR_CLIENTS*2)) +chk_msk_cestab $((NR_CLIENTS*2)) "many" flush_pids chk_msk_inuse 0 "many->0" -chk_msk_cestab 0 +chk_msk_cestab 0 "many->0" mptcp_lib_result_print_all_tap exit $ret From 5b76d928f8b779a1b19c5842e7cabee4cbb610c3 Mon Sep 17 00:00:00 2001 From: Florian Fainelli <florian.fainelli@broadcom.com> Date: Thu, 15 Feb 2024 10:27:31 -0800 Subject: [PATCH 254/304] net: bcmasp: Indicate MAC is in charge of PHY PM Avoid the PHY library call unnecessarily into the suspend/resume functions by setting phydev->mac_managed_pm to true. The ASP driver essentially does exactly what mdio_bus_phy_resume() does. Fixes: 490cb412007d ("net: bcmasp: Add support for ASP2.0 Ethernet controller") Signed-off-by: Florian Fainelli <florian.fainelli@broadcom.com> Signed-off-by: Justin Chen <justin.chen@broadcom.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- drivers/net/ethernet/broadcom/asp2/bcmasp_intf.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/broadcom/asp2/bcmasp_intf.c b/drivers/net/ethernet/broadcom/asp2/bcmasp_intf.c index f59557b0cd51..6ad1366270f7 100644 --- a/drivers/net/ethernet/broadcom/asp2/bcmasp_intf.c +++ b/drivers/net/ethernet/broadcom/asp2/bcmasp_intf.c @@ -1050,6 +1050,9 @@ static int bcmasp_netif_init(struct net_device *dev, bool phy_connect) netdev_err(dev, "could not attach to PHY\n"); goto err_phy_disable; } + + /* Indicate that the MAC is responsible for PHY PM */ + phydev->mac_managed_pm = true; } else if (!intf->wolopts) { ret = phy_resume(dev->phydev); if (ret) From f120e62e37f0af4c4cbe08e5a88ea60a6a17c858 Mon Sep 17 00:00:00 2001 From: Justin Chen <justin.chen@broadcom.com> Date: Thu, 15 Feb 2024 10:27:32 -0800 Subject: [PATCH 255/304] net: bcmasp: Sanity check is off by one A sanity check for OOB write is off by one leading to a false positive when the array is full. Fixes: 9b90aca97f6d ("net: ethernet: bcmasp: fix possible OOB write in bcmasp_netfilt_get_all_active()") Signed-off-by: Justin Chen <justin.chen@broadcom.com> Reviewed-by: Florian Fainelli <florian.fainelli@broadcom.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- drivers/net/ethernet/broadcom/asp2/bcmasp.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/broadcom/asp2/bcmasp.c b/drivers/net/ethernet/broadcom/asp2/bcmasp.c index 29b04a274d07..80245c65cc90 100644 --- a/drivers/net/ethernet/broadcom/asp2/bcmasp.c +++ b/drivers/net/ethernet/broadcom/asp2/bcmasp.c @@ -535,9 +535,6 @@ int bcmasp_netfilt_get_all_active(struct bcmasp_intf *intf, u32 *rule_locs, int j = 0, i; for (i = 0; i < NUM_NET_FILTERS; i++) { - if (j == *rule_cnt) - return -EMSGSIZE; - if (!priv->net_filters[i].claimed || priv->net_filters[i].port != intf->port) continue; @@ -547,6 +544,9 @@ int bcmasp_netfilt_get_all_active(struct bcmasp_intf *intf, u32 *rule_locs, priv->net_filters[i - 1].wake_filter) continue; + if (j == *rule_cnt) + return -EMSGSIZE; + rule_locs[j++] = priv->net_filters[i].fs.location; } From b401b621758e46812da61fa58a67c3fd8d91de0d Mon Sep 17 00:00:00 2001 From: Linus Torvalds <torvalds@linux-foundation.org> Date: Sun, 18 Feb 2024 12:56:25 -0800 Subject: [PATCH 256/304] Linux 6.8-rc5 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index ed80c7d98a7e..41fa8a2565f5 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 8 SUBLEVEL = 0 -EXTRAVERSION = -rc4 +EXTRAVERSION = -rc5 NAME = Hurr durr I'ma ninja sloth # *DOCUMENTATION* From cd65c48d66920457129584553f217005d09b1edb Mon Sep 17 00:00:00 2001 From: Hangbin Liu <liuhangbin@gmail.com> Date: Thu, 15 Feb 2024 10:33:25 +0800 Subject: [PATCH 257/304] selftests: bonding: set active slave to primary eth1 specifically In bond priority testing, we set the primary interface to eth1 and add eth0,1,2 to bond in serial. This is OK in normal times. But when in debug kernel, the bridge port that eth0,1,2 connected would start slowly (enter blocking, forwarding state), which caused the primary interface down for a while after enslaving and active slave changed. Here is a test log from Jakub's debug test[1]. [ 400.399070][ T50] br0: port 1(s0) entered disabled state [ 400.400168][ T50] br0: port 4(s2) entered disabled state [ 400.941504][ T2791] bond0: (slave eth0): making interface the new active one [ 400.942603][ T2791] bond0: (slave eth0): Enslaving as an active interface with an up link [ 400.943633][ T2766] br0: port 1(s0) entered blocking state [ 400.944119][ T2766] br0: port 1(s0) entered forwarding state [ 401.128792][ T2792] bond0: (slave eth1): making interface the new active one [ 401.130771][ T2792] bond0: (slave eth1): Enslaving as an active interface with an up link [ 401.131643][ T69] br0: port 2(s1) entered blocking state [ 401.132067][ T69] br0: port 2(s1) entered forwarding state [ 401.346201][ T2793] bond0: (slave eth2): Enslaving as a backup interface with an up link [ 401.348414][ T50] br0: port 4(s2) entered blocking state [ 401.348857][ T50] br0: port 4(s2) entered forwarding state [ 401.519669][ T250] bond0: (slave eth0): link status definitely down, disabling slave [ 401.526522][ T250] bond0: (slave eth1): link status definitely down, disabling slave [ 401.526986][ T250] bond0: (slave eth2): making interface the new active one [ 401.629470][ T250] bond0: (slave eth0): link status definitely up [ 401.630089][ T250] bond0: (slave eth1): link status definitely up [...] # TEST: prio (active-backup ns_ip6_target primary_reselect 1) [FAIL] # Current active slave is eth2 but not eth1 Fix it by setting active slave to primary slave specifically before testing. [1] https://netdev-3.bots.linux.dev/vmksft-bonding-dbg/results/464301/1-bond-options-sh/stdout Fixes: 481b56e0391e ("selftests: bonding: re-format bond option tests") Signed-off-by: Hangbin Liu <liuhangbin@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- tools/testing/selftests/drivers/net/bonding/bond_options.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/testing/selftests/drivers/net/bonding/bond_options.sh b/tools/testing/selftests/drivers/net/bonding/bond_options.sh index d508486cc0bd..9a3d3c389dad 100755 --- a/tools/testing/selftests/drivers/net/bonding/bond_options.sh +++ b/tools/testing/selftests/drivers/net/bonding/bond_options.sh @@ -62,6 +62,8 @@ prio_test() # create bond bond_reset "${param}" + # set active_slave to primary eth1 specifically + ip -n ${s_ns} link set bond0 type bond active_slave eth1 # check bonding member prio value ip -n ${s_ns} link set eth0 type bond_slave prio 0 From e42b9d8b9ea2672811285e6a7654887ff64d23f3 Mon Sep 17 00:00:00 2001 From: Qu Wenruo <wqu@suse.com> Date: Wed, 7 Feb 2024 10:00:42 +1030 Subject: [PATCH 258/304] btrfs: defrag: avoid unnecessary defrag caused by incorrect extent size [BUG] With the following file extent layout, defrag would do unnecessary IO and result more on-disk space usage. # mkfs.btrfs -f $dev # mount $dev $mnt # xfs_io -f -c "pwrite 0 40m" $mnt/foobar # sync # xfs_io -f -c "pwrite 40m 16k" $mnt/foobar # sync Above command would lead to the following file extent layout: item 6 key (257 EXTENT_DATA 0) itemoff 15816 itemsize 53 generation 7 type 1 (regular) extent data disk byte 298844160 nr 41943040 extent data offset 0 nr 41943040 ram 41943040 extent compression 0 (none) item 7 key (257 EXTENT_DATA 41943040) itemoff 15763 itemsize 53 generation 8 type 1 (regular) extent data disk byte 13631488 nr 16384 extent data offset 0 nr 16384 ram 16384 extent compression 0 (none) Which is mostly fine. We can allow the final 16K to be merged with the previous 40M, but it's upon the end users' preference. But if we defrag the file using the default parameters, it would result worse file layout: # btrfs filesystem defrag $mnt/foobar # sync item 6 key (257 EXTENT_DATA 0) itemoff 15816 itemsize 53 generation 7 type 1 (regular) extent data disk byte 298844160 nr 41943040 extent data offset 0 nr 8650752 ram 41943040 extent compression 0 (none) item 7 key (257 EXTENT_DATA 8650752) itemoff 15763 itemsize 53 generation 9 type 1 (regular) extent data disk byte 340787200 nr 33292288 extent data offset 0 nr 33292288 ram 33292288 extent compression 0 (none) item 8 key (257 EXTENT_DATA 41943040) itemoff 15710 itemsize 53 generation 8 type 1 (regular) extent data disk byte 13631488 nr 16384 extent data offset 0 nr 16384 ram 16384 extent compression 0 (none) Note the original 40M extent is still there, but a new 32M extent is created for no benefit at all. [CAUSE] There is an existing check to make sure we won't defrag a large enough extent (the threshold is by default 32M). But the check is using the length to the end of the extent: range_len = em->len - (cur - em->start); /* Skip too large extent */ if (range_len >= extent_thresh) goto next; This means, for the first 8MiB of the extent, the range_len is always smaller than the default threshold, and would not be defragged. But after the first 8MiB, the remaining part would fit the requirement, and be defragged. Such different behavior inside the same extent caused the above problem, and we should avoid different defrag decision inside the same extent. [FIX] Instead of using @range_len, just use @em->len, so that we have a consistent decision among the same file extent. Now with this fix, we won't touch the extent, thus not making it any worse. Reported-by: Filipe Manana <fdmanana@suse.com> Fixes: 0cb5950f3f3b ("btrfs: fix deadlock when reserving space during defrag") CC: stable@vger.kernel.org # 6.1+ Reviewed-by: Boris Burkov <boris@bur.io> Reviewed-by: Filipe Manana <fdmanana@suse.com> Signed-off-by: Qu Wenruo <wqu@suse.com> Signed-off-by: David Sterba <dsterba@suse.com> --- fs/btrfs/defrag.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/defrag.c b/fs/btrfs/defrag.c index c276b136ab63..5b0b64571418 100644 --- a/fs/btrfs/defrag.c +++ b/fs/btrfs/defrag.c @@ -1046,7 +1046,7 @@ static int defrag_collect_targets(struct btrfs_inode *inode, goto add; /* Skip too large extent */ - if (range_len >= extent_thresh) + if (em->len >= extent_thresh) goto next; /* From b0ad381fa7690244802aed119b478b4bdafc31dd Mon Sep 17 00:00:00 2001 From: Josef Bacik <josef@toxicpanda.com> Date: Mon, 12 Feb 2024 11:56:02 -0500 Subject: [PATCH 259/304] btrfs: fix deadlock with fiemap and extent locking While working on the patchset to remove extent locking I got a lockdep splat with fiemap and pagefaulting with my new extent lock replacement lock. This deadlock exists with our normal code, we just don't have lockdep annotations with the extent locking so we've never noticed it. Since we're copying the fiemap extent to user space on every iteration we have the chance of pagefaulting. Because we hold the extent lock for the entire range we could mkwrite into a range in the file that we have mmap'ed. This would deadlock with the following stack trace [<0>] lock_extent+0x28d/0x2f0 [<0>] btrfs_page_mkwrite+0x273/0x8a0 [<0>] do_page_mkwrite+0x50/0xb0 [<0>] do_fault+0xc1/0x7b0 [<0>] __handle_mm_fault+0x2fa/0x460 [<0>] handle_mm_fault+0xa4/0x330 [<0>] do_user_addr_fault+0x1f4/0x800 [<0>] exc_page_fault+0x7c/0x1e0 [<0>] asm_exc_page_fault+0x26/0x30 [<0>] rep_movs_alternative+0x33/0x70 [<0>] _copy_to_user+0x49/0x70 [<0>] fiemap_fill_next_extent+0xc8/0x120 [<0>] emit_fiemap_extent+0x4d/0xa0 [<0>] extent_fiemap+0x7f8/0xad0 [<0>] btrfs_fiemap+0x49/0x80 [<0>] __x64_sys_ioctl+0x3e1/0xb50 [<0>] do_syscall_64+0x94/0x1a0 [<0>] entry_SYSCALL_64_after_hwframe+0x6e/0x76 I wrote an fstest to reproduce this deadlock without my replacement lock and verified that the deadlock exists with our existing locking. To fix this simply don't take the extent lock for the entire duration of the fiemap. This is safe in general because we keep track of where we are when we're searching the tree, so if an ordered extent updates in the middle of our fiemap call we'll still emit the correct extents because we know what offset we were on before. The only place we maintain the lock is searching delalloc. Since the delalloc stuff can change during writeback we want to lock the extent range so we have a consistent view of delalloc at the time we're checking to see if we need to set the delalloc flag. With this patch applied we no longer deadlock with my testcase. CC: stable@vger.kernel.org # 6.1+ Reviewed-by: Filipe Manana <fdmanana@suse.com> Signed-off-by: Josef Bacik <josef@toxicpanda.com> Reviewed-by: David Sterba <dsterba@suse.com> Signed-off-by: David Sterba <dsterba@suse.com> --- fs/btrfs/extent_io.c | 62 ++++++++++++++++++++++++++++++++------------ 1 file changed, 45 insertions(+), 17 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index a0ffd41c5cc1..61d961a30dee 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2689,16 +2689,34 @@ static int fiemap_process_hole(struct btrfs_inode *inode, * it beyond i_size. */ while (cur_offset < end && cur_offset < i_size) { + struct extent_state *cached_state = NULL; u64 delalloc_start; u64 delalloc_end; u64 prealloc_start; + u64 lockstart; + u64 lockend; u64 prealloc_len = 0; bool delalloc; + lockstart = round_down(cur_offset, inode->root->fs_info->sectorsize); + lockend = round_up(end, inode->root->fs_info->sectorsize); + + /* + * We are only locking for the delalloc range because that's the + * only thing that can change here. With fiemap we have a lock + * on the inode, so no buffered or direct writes can happen. + * + * However mmaps and normal page writeback will cause this to + * change arbitrarily. We have to lock the extent lock here to + * make sure that nobody messes with the tree while we're doing + * btrfs_find_delalloc_in_range. + */ + lock_extent(&inode->io_tree, lockstart, lockend, &cached_state); delalloc = btrfs_find_delalloc_in_range(inode, cur_offset, end, delalloc_cached_state, &delalloc_start, &delalloc_end); + unlock_extent(&inode->io_tree, lockstart, lockend, &cached_state); if (!delalloc) break; @@ -2866,15 +2884,15 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo, u64 start, u64 len) { const u64 ino = btrfs_ino(inode); - struct extent_state *cached_state = NULL; struct extent_state *delalloc_cached_state = NULL; struct btrfs_path *path; struct fiemap_cache cache = { 0 }; struct btrfs_backref_share_check_ctx *backref_ctx; u64 last_extent_end; u64 prev_extent_end; - u64 lockstart; - u64 lockend; + u64 range_start; + u64 range_end; + const u64 sectorsize = inode->root->fs_info->sectorsize; bool stopped = false; int ret; @@ -2885,12 +2903,11 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo, goto out; } - lockstart = round_down(start, inode->root->fs_info->sectorsize); - lockend = round_up(start + len, inode->root->fs_info->sectorsize); - prev_extent_end = lockstart; + range_start = round_down(start, sectorsize); + range_end = round_up(start + len, sectorsize); + prev_extent_end = range_start; btrfs_inode_lock(inode, BTRFS_ILOCK_SHARED); - lock_extent(&inode->io_tree, lockstart, lockend, &cached_state); ret = fiemap_find_last_extent_offset(inode, path, &last_extent_end); if (ret < 0) @@ -2898,7 +2915,7 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo, btrfs_release_path(path); path->reada = READA_FORWARD; - ret = fiemap_search_slot(inode, path, lockstart); + ret = fiemap_search_slot(inode, path, range_start); if (ret < 0) { goto out_unlock; } else if (ret > 0) { @@ -2910,7 +2927,7 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo, goto check_eof_delalloc; } - while (prev_extent_end < lockend) { + while (prev_extent_end < range_end) { struct extent_buffer *leaf = path->nodes[0]; struct btrfs_file_extent_item *ei; struct btrfs_key key; @@ -2933,19 +2950,19 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo, * The first iteration can leave us at an extent item that ends * before our range's start. Move to the next item. */ - if (extent_end <= lockstart) + if (extent_end <= range_start) goto next_item; backref_ctx->curr_leaf_bytenr = leaf->start; /* We have in implicit hole (NO_HOLES feature enabled). */ if (prev_extent_end < key.offset) { - const u64 range_end = min(key.offset, lockend) - 1; + const u64 hole_end = min(key.offset, range_end) - 1; ret = fiemap_process_hole(inode, fieinfo, &cache, &delalloc_cached_state, backref_ctx, 0, 0, 0, - prev_extent_end, range_end); + prev_extent_end, hole_end); if (ret < 0) { goto out_unlock; } else if (ret > 0) { @@ -2955,7 +2972,7 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo, } /* We've reached the end of the fiemap range, stop. */ - if (key.offset >= lockend) { + if (key.offset >= range_end) { stopped = true; break; } @@ -3049,29 +3066,41 @@ check_eof_delalloc: btrfs_free_path(path); path = NULL; - if (!stopped && prev_extent_end < lockend) { + if (!stopped && prev_extent_end < range_end) { ret = fiemap_process_hole(inode, fieinfo, &cache, &delalloc_cached_state, backref_ctx, - 0, 0, 0, prev_extent_end, lockend - 1); + 0, 0, 0, prev_extent_end, range_end - 1); if (ret < 0) goto out_unlock; - prev_extent_end = lockend; + prev_extent_end = range_end; } if (cache.cached && cache.offset + cache.len >= last_extent_end) { const u64 i_size = i_size_read(&inode->vfs_inode); if (prev_extent_end < i_size) { + struct extent_state *cached_state = NULL; u64 delalloc_start; u64 delalloc_end; + u64 lockstart; + u64 lockend; bool delalloc; + lockstart = round_down(prev_extent_end, sectorsize); + lockend = round_up(i_size, sectorsize); + + /* + * See the comment in fiemap_process_hole as to why + * we're doing the locking here. + */ + lock_extent(&inode->io_tree, lockstart, lockend, &cached_state); delalloc = btrfs_find_delalloc_in_range(inode, prev_extent_end, i_size - 1, &delalloc_cached_state, &delalloc_start, &delalloc_end); + unlock_extent(&inode->io_tree, lockstart, lockend, &cached_state); if (!delalloc) cache.flags |= FIEMAP_EXTENT_LAST; } else { @@ -3082,7 +3111,6 @@ check_eof_delalloc: ret = emit_last_fiemap_cache(fieinfo, &cache); out_unlock: - unlock_extent(&inode->io_tree, lockstart, lockend, &cached_state); btrfs_inode_unlock(inode, BTRFS_ILOCK_SHARED); out: free_extent_state(delalloc_cached_state); From 121e4dcba3700b30e63f25203d09ddfccbab4a09 Mon Sep 17 00:00:00 2001 From: Shannon Nelson <shannon.nelson@amd.com> Date: Fri, 16 Feb 2024 14:52:59 -0800 Subject: [PATCH 260/304] ionic: use pci_is_enabled not open code Since there is a utility available for this, use the API rather than open code. Fixes: 13943d6c8273 ("ionic: prevent pci disable of already disabled device") Reviewed-by: Brett Creeley <brett.creeley@amd.com> Signed-off-by: Shannon Nelson <shannon.nelson@amd.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c b/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c index c49aa358e424..10a9d80db32c 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c @@ -223,7 +223,7 @@ static void ionic_clear_pci(struct ionic *ionic) ionic_unmap_bars(ionic); pci_release_regions(ionic->pdev); - if (atomic_read(&ionic->pdev->enable_cnt) > 0) + if (pci_is_enabled(ionic->pdev)) pci_disable_device(ionic->pdev); } From 40b9385dd8e6a0515e1c9cd06a277483556b7286 Mon Sep 17 00:00:00 2001 From: Kees Cook <keescook@chromium.org> Date: Fri, 16 Feb 2024 15:30:05 -0800 Subject: [PATCH 261/304] enic: Avoid false positive under FORTIFY_SOURCE FORTIFY_SOURCE has been ignoring 0-sized destinations while the kernel code base has been converted to flexible arrays. In order to enforce the 0-sized destinations (e.g. with __counted_by), the remaining 0-sized destinations need to be handled. Unfortunately, struct vic_provinfo resists full conversion, as it contains a flexible array of flexible arrays, which is only possible with the 0-sized fake flexible array. Use unsafe_memcpy() to avoid future false positives under CONFIG_FORTIFY_SOURCE. Signed-off-by: Kees Cook <keescook@chromium.org> Signed-off-by: David S. Miller <davem@davemloft.net> --- drivers/net/ethernet/cisco/enic/vnic_vic.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/cisco/enic/vnic_vic.c b/drivers/net/ethernet/cisco/enic/vnic_vic.c index 20fcb20b42ed..66b577835338 100644 --- a/drivers/net/ethernet/cisco/enic/vnic_vic.c +++ b/drivers/net/ethernet/cisco/enic/vnic_vic.c @@ -49,7 +49,8 @@ int vic_provinfo_add_tlv(struct vic_provinfo *vp, u16 type, u16 length, tlv->type = htons(type); tlv->length = htons(length); - memcpy(tlv->value, value, length); + unsafe_memcpy(tlv->value, value, length, + /* Flexible array of flexible arrays */); vp->num_tlvs = htonl(ntohl(vp->num_tlvs) + 1); vp->length = htonl(ntohl(vp->length) + From 0281b919e175bb9c3128bd3872ac2903e9436e3f Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau <martin.lau@kernel.org> Date: Thu, 15 Feb 2024 13:12:17 -0800 Subject: [PATCH 262/304] bpf: Fix racing between bpf_timer_cancel_and_free and bpf_timer_cancel The following race is possible between bpf_timer_cancel_and_free and bpf_timer_cancel. It will lead a UAF on the timer->timer. bpf_timer_cancel(); spin_lock(); t = timer->time; spin_unlock(); bpf_timer_cancel_and_free(); spin_lock(); t = timer->timer; timer->timer = NULL; spin_unlock(); hrtimer_cancel(&t->timer); kfree(t); /* UAF on t */ hrtimer_cancel(&t->timer); In bpf_timer_cancel_and_free, this patch frees the timer->timer after a rcu grace period. This requires a rcu_head addition to the "struct bpf_hrtimer". Another kfree(t) happens in bpf_timer_init, this does not need a kfree_rcu because it is still under the spin_lock and timer->timer has not been visible by others yet. In bpf_timer_cancel, rcu_read_lock() is added because this helper can be used in a non rcu critical section context (e.g. from a sleepable bpf prog). Other timer->timer usages in helpers.c have been audited, bpf_timer_cancel() is the only place where timer->timer is used outside of the spin_lock. Another solution considered is to mark a t->flag in bpf_timer_cancel and clear it after hrtimer_cancel() is done. In bpf_timer_cancel_and_free, it busy waits for the flag to be cleared before kfree(t). This patch goes with a straight forward solution and frees timer->timer after a rcu grace period. Fixes: b00628b1c7d5 ("bpf: Introduce bpf timers.") Suggested-by: Alexei Starovoitov <ast@kernel.org> Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Acked-by: Hou Tao <houtao1@huawei.com> Link: https://lore.kernel.org/bpf/20240215211218.990808-1-martin.lau@linux.dev --- kernel/bpf/helpers.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index be72824f32b2..d19cd863d294 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -1101,6 +1101,7 @@ struct bpf_hrtimer { struct bpf_prog *prog; void __rcu *callback_fn; void *value; + struct rcu_head rcu; }; /* the actual struct hidden inside uapi struct bpf_timer */ @@ -1332,6 +1333,7 @@ BPF_CALL_1(bpf_timer_cancel, struct bpf_timer_kern *, timer) if (in_nmi()) return -EOPNOTSUPP; + rcu_read_lock(); __bpf_spin_lock_irqsave(&timer->lock); t = timer->timer; if (!t) { @@ -1353,6 +1355,7 @@ out: * if it was running. */ ret = ret ?: hrtimer_cancel(&t->timer); + rcu_read_unlock(); return ret; } @@ -1407,7 +1410,7 @@ out: */ if (this_cpu_read(hrtimer_running) != t) hrtimer_cancel(&t->timer); - kfree(t); + kfree_rcu(t, rcu); } BPF_CALL_2(bpf_kptr_xchg, void *, map_value, void *, ptr) From 3f00e4a9c96f4488a924aff4e35b77c8eced897e Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau <martin.lau@kernel.org> Date: Thu, 15 Feb 2024 13:12:18 -0800 Subject: [PATCH 263/304] selftests/bpf: Test racing between bpf_timer_cancel_and_free and bpf_timer_cancel This selftest is based on a Alexei's test adopted from an internal user to troubleshoot another bug. During this exercise, a separate racing bug was discovered between bpf_timer_cancel_and_free and bpf_timer_cancel. The details can be found in the previous patch. This patch is to add a selftest that can trigger the bug. I can trigger the UAF everytime in my qemu setup with KASAN. The idea is to have multiple user space threads running in a tight loop to exercise both bpf_map_update_elem (which calls into bpf_timer_cancel_and_free) and bpf_timer_cancel. Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Acked-by: Hou Tao <houtao1@huawei.com> Link: https://lore.kernel.org/bpf/20240215211218.990808-2-martin.lau@linux.dev --- .../testing/selftests/bpf/prog_tests/timer.c | 35 ++++++++++++++++++- tools/testing/selftests/bpf/progs/timer.c | 34 +++++++++++++++++- 2 files changed, 67 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/timer.c b/tools/testing/selftests/bpf/prog_tests/timer.c index 760ad96b4be0..d66687f1ee6a 100644 --- a/tools/testing/selftests/bpf/prog_tests/timer.c +++ b/tools/testing/selftests/bpf/prog_tests/timer.c @@ -4,10 +4,29 @@ #include "timer.skel.h" #include "timer_failure.skel.h" +#define NUM_THR 8 + +static void *spin_lock_thread(void *arg) +{ + int i, err, prog_fd = *(int *)arg; + LIBBPF_OPTS(bpf_test_run_opts, topts); + + for (i = 0; i < 10000; i++) { + err = bpf_prog_test_run_opts(prog_fd, &topts); + if (!ASSERT_OK(err, "test_run_opts err") || + !ASSERT_OK(topts.retval, "test_run_opts retval")) + break; + } + + pthread_exit(arg); +} + static int timer(struct timer *timer_skel) { - int err, prog_fd; + int i, err, prog_fd; LIBBPF_OPTS(bpf_test_run_opts, topts); + pthread_t thread_id[NUM_THR]; + void *ret; err = timer__attach(timer_skel); if (!ASSERT_OK(err, "timer_attach")) @@ -43,6 +62,20 @@ static int timer(struct timer *timer_skel) /* check that code paths completed */ ASSERT_EQ(timer_skel->bss->ok, 1 | 2 | 4, "ok"); + prog_fd = bpf_program__fd(timer_skel->progs.race); + for (i = 0; i < NUM_THR; i++) { + err = pthread_create(&thread_id[i], NULL, + &spin_lock_thread, &prog_fd); + if (!ASSERT_OK(err, "pthread_create")) + break; + } + + while (i) { + err = pthread_join(thread_id[--i], &ret); + if (ASSERT_OK(err, "pthread_join")) + ASSERT_EQ(ret, (void *)&prog_fd, "pthread_join"); + } + return 0; } diff --git a/tools/testing/selftests/bpf/progs/timer.c b/tools/testing/selftests/bpf/progs/timer.c index 8b946c8188c6..f615da97df26 100644 --- a/tools/testing/selftests/bpf/progs/timer.c +++ b/tools/testing/selftests/bpf/progs/timer.c @@ -51,7 +51,8 @@ struct { __uint(max_entries, 1); __type(key, int); __type(value, struct elem); -} abs_timer SEC(".maps"), soft_timer_pinned SEC(".maps"), abs_timer_pinned SEC(".maps"); +} abs_timer SEC(".maps"), soft_timer_pinned SEC(".maps"), abs_timer_pinned SEC(".maps"), + race_array SEC(".maps"); __u64 bss_data; __u64 abs_data; @@ -390,3 +391,34 @@ int BPF_PROG2(test5, int, a) return 0; } + +static int race_timer_callback(void *race_array, int *race_key, struct bpf_timer *timer) +{ + bpf_timer_start(timer, 1000000, 0); + return 0; +} + +SEC("syscall") +int race(void *ctx) +{ + struct bpf_timer *timer; + int err, race_key = 0; + struct elem init; + + __builtin_memset(&init, 0, sizeof(struct elem)); + bpf_map_update_elem(&race_array, &race_key, &init, BPF_ANY); + + timer = bpf_map_lookup_elem(&race_array, &race_key); + if (!timer) + return 1; + + err = bpf_timer_init(timer, &race_array, CLOCK_MONOTONIC); + if (err && err != -EBUSY) + return 1; + + bpf_timer_set_callback(timer, race_timer_callback); + bpf_timer_start(timer, 0, 0); + bpf_timer_cancel(timer); + + return 0; +} From 5f2ae606cb5a90839a9be9d22388c4200f820e75 Mon Sep 17 00:00:00 2001 From: Yafang Shao <laoar.shao@gmail.com> Date: Sat, 17 Feb 2024 19:41:51 +0800 Subject: [PATCH 264/304] bpf: Fix an issue due to uninitialized bpf_iter_task Failure to initialize it->pos, coupled with the presence of an invalid value in the flags variable, can lead to it->pos referencing an invalid task, potentially resulting in a kernel panic. To mitigate this risk, it's crucial to ensure proper initialization of it->pos to NULL. Fixes: ac8148d957f5 ("bpf: bpf_iter_task_next: use next_task(kit->task) rather than next_task(kit->pos)") Signed-off-by: Yafang Shao <laoar.shao@gmail.com> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Acked-by: Yonghong Song <yonghong.song@linux.dev> Acked-by: Oleg Nesterov <oleg@redhat.com> Link: https://lore.kernel.org/bpf/20240217114152.1623-2-laoar.shao@gmail.com --- kernel/bpf/task_iter.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/bpf/task_iter.c b/kernel/bpf/task_iter.c index e5c3500443c6..ec4e97c61eef 100644 --- a/kernel/bpf/task_iter.c +++ b/kernel/bpf/task_iter.c @@ -978,6 +978,8 @@ __bpf_kfunc int bpf_iter_task_new(struct bpf_iter_task *it, BUILD_BUG_ON(__alignof__(struct bpf_iter_task_kern) != __alignof__(struct bpf_iter_task)); + kit->pos = NULL; + switch (flags) { case BPF_TASK_ITER_ALL_THREADS: case BPF_TASK_ITER_ALL_PROCS: From 5c138a8a4abe152fcbef1ed40a6a4b5727b2991b Mon Sep 17 00:00:00 2001 From: Yafang Shao <laoar.shao@gmail.com> Date: Sat, 17 Feb 2024 19:41:52 +0800 Subject: [PATCH 265/304] selftests/bpf: Add negtive test cases for task iter Incorporate a test case to assess the handling of invalid flags or task__nullable parameters passed to bpf_iter_task_new(). Prior to the preceding commit, this scenario could potentially trigger a kernel panic. However, with the previous commit, this test case is expected to function correctly. Signed-off-by: Yafang Shao <laoar.shao@gmail.com> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Link: https://lore.kernel.org/bpf/20240217114152.1623-3-laoar.shao@gmail.com --- tools/testing/selftests/bpf/prog_tests/iters.c | 1 + tools/testing/selftests/bpf/progs/iters_task.c | 12 +++++++++++- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/prog_tests/iters.c b/tools/testing/selftests/bpf/prog_tests/iters.c index bf84d4a1d9ae..3c440370c1f0 100644 --- a/tools/testing/selftests/bpf/prog_tests/iters.c +++ b/tools/testing/selftests/bpf/prog_tests/iters.c @@ -193,6 +193,7 @@ static void subtest_task_iters(void) ASSERT_EQ(skel->bss->procs_cnt, 1, "procs_cnt"); ASSERT_EQ(skel->bss->threads_cnt, thread_num + 1, "threads_cnt"); ASSERT_EQ(skel->bss->proc_threads_cnt, thread_num + 1, "proc_threads_cnt"); + ASSERT_EQ(skel->bss->invalid_cnt, 0, "invalid_cnt"); pthread_mutex_unlock(&do_nothing_mutex); for (int i = 0; i < thread_num; i++) ASSERT_OK(pthread_join(thread_ids[i], &ret), "pthread_join"); diff --git a/tools/testing/selftests/bpf/progs/iters_task.c b/tools/testing/selftests/bpf/progs/iters_task.c index c9b4055cd410..e4d53e40ff20 100644 --- a/tools/testing/selftests/bpf/progs/iters_task.c +++ b/tools/testing/selftests/bpf/progs/iters_task.c @@ -10,7 +10,7 @@ char _license[] SEC("license") = "GPL"; pid_t target_pid; -int procs_cnt, threads_cnt, proc_threads_cnt; +int procs_cnt, threads_cnt, proc_threads_cnt, invalid_cnt; void bpf_rcu_read_lock(void) __ksym; void bpf_rcu_read_unlock(void) __ksym; @@ -26,6 +26,16 @@ int iter_task_for_each_sleep(void *ctx) procs_cnt = threads_cnt = proc_threads_cnt = 0; bpf_rcu_read_lock(); + bpf_for_each(task, pos, NULL, ~0U) { + /* Below instructions shouldn't be executed for invalid flags */ + invalid_cnt++; + } + + bpf_for_each(task, pos, NULL, BPF_TASK_ITER_PROC_THREADS) { + /* Below instructions shouldn't be executed for invalid task__nullable */ + invalid_cnt++; + } + bpf_for_each(task, pos, NULL, BPF_TASK_ITER_ALL_PROCS) if (pos->pid == target_pid) procs_cnt++; From 5559cea2d5aa3018a5f00dd2aca3427ba09b386b Mon Sep 17 00:00:00 2001 From: Vasiliy Kovalev <kovalev@altlinux.org> Date: Thu, 15 Feb 2024 23:27:17 +0300 Subject: [PATCH 266/304] ipv6: sr: fix possible use-after-free and null-ptr-deref The pernet operations structure for the subsystem must be registered before registering the generic netlink family. Fixes: 915d7e5e5930 ("ipv6: sr: add code base for control plane support of SR-IPv6") Signed-off-by: Vasiliy Kovalev <kovalev@altlinux.org> Link: https://lore.kernel.org/r/20240215202717.29815-1-kovalev@altlinux.org Signed-off-by: Paolo Abeni <pabeni@redhat.com> --- net/ipv6/seg6.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/net/ipv6/seg6.c b/net/ipv6/seg6.c index 29346a6eec9f..35508abd76f4 100644 --- a/net/ipv6/seg6.c +++ b/net/ipv6/seg6.c @@ -512,22 +512,24 @@ int __init seg6_init(void) { int err; - err = genl_register_family(&seg6_genl_family); + err = register_pernet_subsys(&ip6_segments_ops); if (err) goto out; - err = register_pernet_subsys(&ip6_segments_ops); + err = genl_register_family(&seg6_genl_family); if (err) - goto out_unregister_genl; + goto out_unregister_pernet; #ifdef CONFIG_IPV6_SEG6_LWTUNNEL err = seg6_iptunnel_init(); if (err) - goto out_unregister_pernet; + goto out_unregister_genl; err = seg6_local_init(); - if (err) - goto out_unregister_pernet; + if (err) { + seg6_iptunnel_exit(); + goto out_unregister_genl; + } #endif #ifdef CONFIG_IPV6_SEG6_HMAC @@ -548,11 +550,11 @@ out_unregister_iptun: #endif #endif #ifdef CONFIG_IPV6_SEG6_LWTUNNEL -out_unregister_pernet: - unregister_pernet_subsys(&ip6_segments_ops); -#endif out_unregister_genl: genl_unregister_family(&seg6_genl_family); +#endif +out_unregister_pernet: + unregister_pernet_subsys(&ip6_segments_ops); goto out; } From def689fc26b9a9622d2e2cb0c4933dd3b1c8071c Mon Sep 17 00:00:00 2001 From: Vasiliy Kovalev <kovalev@altlinux.org> Date: Thu, 15 Feb 2024 23:34:00 +0300 Subject: [PATCH 267/304] devlink: fix possible use-after-free and memory leaks in devlink_init() The pernet operations structure for the subsystem must be registered before registering the generic netlink family. Make an unregister in case of unsuccessful registration. Fixes: 687125b5799c ("devlink: split out core code") Signed-off-by: Vasiliy Kovalev <kovalev@altlinux.org> Link: https://lore.kernel.org/r/20240215203400.29976-1-kovalev@altlinux.org Signed-off-by: Paolo Abeni <pabeni@redhat.com> --- net/devlink/core.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/net/devlink/core.c b/net/devlink/core.c index 6a58342752b4..7f0b093208d7 100644 --- a/net/devlink/core.c +++ b/net/devlink/core.c @@ -529,14 +529,20 @@ static int __init devlink_init(void) { int err; - err = genl_register_family(&devlink_nl_family); - if (err) - goto out; err = register_pernet_subsys(&devlink_pernet_ops); if (err) goto out; + err = genl_register_family(&devlink_nl_family); + if (err) + goto out_unreg_pernet_subsys; err = register_netdevice_notifier(&devlink_port_netdevice_nb); + if (!err) + return 0; + genl_unregister_family(&devlink_nl_family); + +out_unreg_pernet_subsys: + unregister_pernet_subsys(&devlink_pernet_ops); out: WARN_ON(err); return err; From a7d6027790acea24446ddd6632d394096c0f4667 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima <kuniyu@amazon.com> Date: Thu, 15 Feb 2024 15:05:16 -0800 Subject: [PATCH 268/304] arp: Prevent overflow in arp_req_get(). syzkaller reported an overflown write in arp_req_get(). [0] When ioctl(SIOCGARP) is issued, arp_req_get() looks up an neighbour entry and copies neigh->ha to struct arpreq.arp_ha.sa_data. The arp_ha here is struct sockaddr, not struct sockaddr_storage, so the sa_data buffer is just 14 bytes. In the splat below, 2 bytes are overflown to the next int field, arp_flags. We initialise the field just after the memcpy(), so it's not a problem. However, when dev->addr_len is greater than 22 (e.g. MAX_ADDR_LEN), arp_netmask is overwritten, which could be set as htonl(0xFFFFFFFFUL) in arp_ioctl() before calling arp_req_get(). To avoid the overflow, let's limit the max length of memcpy(). Note that commit b5f0de6df6dc ("net: dev: Convert sa_data to flexible array in struct sockaddr") just silenced syzkaller. [0]: memcpy: detected field-spanning write (size 16) of single field "r->arp_ha.sa_data" at net/ipv4/arp.c:1128 (size 14) WARNING: CPU: 0 PID: 144638 at net/ipv4/arp.c:1128 arp_req_get+0x411/0x4a0 net/ipv4/arp.c:1128 Modules linked in: CPU: 0 PID: 144638 Comm: syz-executor.4 Not tainted 6.1.74 #31 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.0-debian-1.16.0-5 04/01/2014 RIP: 0010:arp_req_get+0x411/0x4a0 net/ipv4/arp.c:1128 Code: fd ff ff e8 41 42 de fb b9 0e 00 00 00 4c 89 fe 48 c7 c2 20 6d ab 87 48 c7 c7 80 6d ab 87 c6 05 25 af 72 04 01 e8 5f 8d ad fb <0f> 0b e9 6c fd ff ff e8 13 42 de fb be 03 00 00 00 4c 89 e7 e8 a6 RSP: 0018:ffffc900050b7998 EFLAGS: 00010286 RAX: 0000000000000000 RBX: ffff88803a815000 RCX: 0000000000000000 RDX: 0000000000000000 RSI: ffffffff8641a44a RDI: 0000000000000001 RBP: ffffc900050b7a98 R08: 0000000000000001 R09: 0000000000000000 R10: 0000000000000000 R11: 203a7970636d656d R12: ffff888039c54000 R13: 1ffff92000a16f37 R14: ffff88803a815084 R15: 0000000000000010 FS: 00007f172bf306c0(0000) GS:ffff88805aa00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f172b3569f0 CR3: 0000000057f12005 CR4: 0000000000770ef0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Call Trace: <TASK> arp_ioctl+0x33f/0x4b0 net/ipv4/arp.c:1261 inet_ioctl+0x314/0x3a0 net/ipv4/af_inet.c:981 sock_do_ioctl+0xdf/0x260 net/socket.c:1204 sock_ioctl+0x3ef/0x650 net/socket.c:1321 vfs_ioctl fs/ioctl.c:51 [inline] __do_sys_ioctl fs/ioctl.c:870 [inline] __se_sys_ioctl fs/ioctl.c:856 [inline] __x64_sys_ioctl+0x18e/0x220 fs/ioctl.c:856 do_syscall_x64 arch/x86/entry/common.c:51 [inline] do_syscall_64+0x37/0x90 arch/x86/entry/common.c:81 entry_SYSCALL_64_after_hwframe+0x64/0xce RIP: 0033:0x7f172b262b8d Code: 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 00 f3 0f 1e fa 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b8 ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007f172bf300b8 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 RAX: ffffffffffffffda RBX: 00007f172b3abf80 RCX: 00007f172b262b8d RDX: 0000000020000000 RSI: 0000000000008954 RDI: 0000000000000003 RBP: 00007f172b2d3493 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 R13: 000000000000000b R14: 00007f172b3abf80 R15: 00007f172bf10000 </TASK> Reported-by: syzkaller <syzkaller@googlegroups.com> Reported-by: Bjoern Doebel <doebel@amazon.de> Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com> Link: https://lore.kernel.org/r/20240215230516.31330-1-kuniyu@amazon.com Signed-off-by: Paolo Abeni <pabeni@redhat.com> --- net/ipv4/arp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 9456f5bb35e5..0d0d725b46ad 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -1125,7 +1125,8 @@ static int arp_req_get(struct arpreq *r, struct net_device *dev) if (neigh) { if (!(READ_ONCE(neigh->nud_state) & NUD_NOARP)) { read_lock_bh(&neigh->lock); - memcpy(r->arp_ha.sa_data, neigh->ha, dev->addr_len); + memcpy(r->arp_ha.sa_data, neigh->ha, + min(dev->addr_len, sizeof(r->arp_ha.sa_data_min))); r->arp_flags = arp_state_to_flags(neigh); read_unlock_bh(&neigh->lock); r->arp_ha.sa_family = dev->type; From 23f9c2c066e7e5052406fb8f04a115d3d0260b22 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski <kuba@kernel.org> Date: Fri, 16 Feb 2024 08:19:45 -0800 Subject: [PATCH 269/304] docs: netdev: update the link to the CI repo Netronome graciously transferred the original NIPA repo to our new netdev umbrella org. Link to that instead of my private fork. Signed-off-by: Jakub Kicinski <kuba@kernel.org> Reviewed-by: Simon Horman <horms@kernel.org> Link: https://lore.kernel.org/r/20240216161945.2208842-1-kuba@kernel.org Signed-off-by: Paolo Abeni <pabeni@redhat.com> --- Documentation/process/maintainer-netdev.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/process/maintainer-netdev.rst b/Documentation/process/maintainer-netdev.rst index 84ee60fceef2..fd96e4a3cef9 100644 --- a/Documentation/process/maintainer-netdev.rst +++ b/Documentation/process/maintainer-netdev.rst @@ -431,7 +431,7 @@ patchwork checks Checks in patchwork are mostly simple wrappers around existing kernel scripts, the sources are available at: -https://github.com/kuba-moo/nipa/tree/master/tests +https://github.com/linux-netdev/nipa/tree/master/tests **Do not** post your patches just to run them through the checks. You must ensure that your patches are ready by testing them locally From 944d5fe50f3f03daacfea16300e656a1691c4a23 Mon Sep 17 00:00:00 2001 From: Linus Torvalds <torvalds@linuxfoundation.org> Date: Sun, 4 Feb 2024 15:25:12 +0000 Subject: [PATCH 270/304] sched/membarrier: reduce the ability to hammer on sys_membarrier On some systems, sys_membarrier can be very expensive, causing overall slowdowns for everything. So put a lock on the path in order to serialize the accesses to prevent the ability for this to be called at too high of a frequency and saturate the machine. Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Reviewed-and-tested-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com> Acked-by: Borislav Petkov <bp@alien8.de> Fixes: 22e4ebb97582 ("membarrier: Provide expedited private command") Fixes: c5f58bd58f43 ("membarrier: Provide GLOBAL_EXPEDITED command") Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> --- kernel/sched/membarrier.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/kernel/sched/membarrier.c b/kernel/sched/membarrier.c index 2ad881d07752..4e715b9b278e 100644 --- a/kernel/sched/membarrier.c +++ b/kernel/sched/membarrier.c @@ -162,6 +162,9 @@ | MEMBARRIER_PRIVATE_EXPEDITED_RSEQ_BITMASK \ | MEMBARRIER_CMD_GET_REGISTRATIONS) +static DEFINE_MUTEX(membarrier_ipi_mutex); +#define SERIALIZE_IPI() guard(mutex)(&membarrier_ipi_mutex) + static void ipi_mb(void *info) { smp_mb(); /* IPIs should be serializing but paranoid. */ @@ -259,6 +262,7 @@ static int membarrier_global_expedited(void) if (!zalloc_cpumask_var(&tmpmask, GFP_KERNEL)) return -ENOMEM; + SERIALIZE_IPI(); cpus_read_lock(); rcu_read_lock(); for_each_online_cpu(cpu) { @@ -347,6 +351,7 @@ static int membarrier_private_expedited(int flags, int cpu_id) if (cpu_id < 0 && !zalloc_cpumask_var(&tmpmask, GFP_KERNEL)) return -ENOMEM; + SERIALIZE_IPI(); cpus_read_lock(); if (cpu_id >= 0) { @@ -460,6 +465,7 @@ static int sync_runqueues_membarrier_state(struct mm_struct *mm) * between threads which are users of @mm has its membarrier state * updated. */ + SERIALIZE_IPI(); cpus_read_lock(); rcu_read_lock(); for_each_online_cpu(cpu) { From fca7526b7d8910c6125cb1ebc3e78ccd5f50ec52 Mon Sep 17 00:00:00 2001 From: Linus Torvalds <torvalds@linux-foundation.org> Date: Tue, 20 Feb 2024 12:16:47 -0800 Subject: [PATCH 271/304] drm/tests/drm_buddy: fix build failure on 32-bit targets MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Guenter Roeck reports that commit a64056bb5a32 ("drm/tests/drm_buddy: add alloc_contiguous test") causes build failures on 32-bit targets: "This patch breaks the build on all 32-bit systems since it introduces an unhandled direct 64-bit divide operation. ERROR: modpost: "__umoddi3" [drivers/gpu/drm/tests/drm_buddy_test.ko] undefined! ERROR: modpost: "__moddi3" [drivers/gpu/drm/tests/drm_buddy_test.ko] undefined!" and the uses of 'u64' are all entirely pointless. Yes, the arguments to drm_buddy_init() and drm_buddy_alloc_blocks() are in fact of type 'u64', but none of the values here are remotely relevant, and the compiler will happily just do the type expansion. Of course, in a perfect world the compiler would also have just noticed that all the values in question are tiny, and range analysis would have shown that doing a 64-bit divide is pointless, but that is admittedly expecting a fair amount of the compiler. IOW, we shouldn't write code that the compiler then has to notice is unnecessarily complicated just to avoid extra work. We do have fairly high expectations of compilers, but kernel code should be reasonable to begin with. It turns out that there are also other issues with this code: the KUnit assertion messages have incorrect types in the format strings, but that's a widely spread issue caused by the KUnit infrastructure not having enabled format string verification. We'll get that sorted out separately. Reported-by: Guenter Roeck <linux@roeck-us.net> Fixes: a64056bb5a32 ("drm/tests/drm_buddy: add alloc_contiguous test") Link: https://lore.kernel.org/all/538327ff-8d34-41d5-a9ae-1a334744f5ae@roeck-us.net/ Cc: Matthew Auld <matthew.auld@intel.com> Cc: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam@amd.com> Cc: Christian König <christian.koenig@amd.com> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> --- drivers/gpu/drm/tests/drm_buddy_test.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/tests/drm_buddy_test.c b/drivers/gpu/drm/tests/drm_buddy_test.c index fee6bec757d1..8a464f7f4c61 100644 --- a/drivers/gpu/drm/tests/drm_buddy_test.c +++ b/drivers/gpu/drm/tests/drm_buddy_test.c @@ -21,7 +21,8 @@ static inline u64 get_size(int order, u64 chunk_size) static void drm_test_buddy_alloc_contiguous(struct kunit *test) { - u64 mm_size, ps = SZ_4K, i, n_pages, total; + const unsigned long ps = SZ_4K, mm_size = 16 * 3 * SZ_4K; + unsigned long i, n_pages, total; struct drm_buddy_block *block; struct drm_buddy mm; LIST_HEAD(left); @@ -29,8 +30,6 @@ static void drm_test_buddy_alloc_contiguous(struct kunit *test) LIST_HEAD(right); LIST_HEAD(allocated); - mm_size = 16 * 3 * SZ_4K; - KUNIT_EXPECT_FALSE(test, drm_buddy_init(&mm, mm_size, ps)); /* From 8d3a7dfb801d157ac423261d7cd62c33e95375f8 Mon Sep 17 00:00:00 2001 From: Oliver Upton <oliver.upton@linux.dev> Date: Wed, 21 Feb 2024 09:27:31 +0000 Subject: [PATCH 272/304] KVM: arm64: vgic-its: Test for valid IRQ in its_sync_lpi_pending_table() vgic_get_irq() may not return a valid descriptor if there is no ITS that holds a valid translation for the specified INTID. If that is the case, it is safe to silently ignore it and continue processing the LPI pending table. Cc: stable@vger.kernel.org Fixes: 33d3bc9556a7 ("KVM: arm64: vgic-its: Read initial LPI pending table") Signed-off-by: Oliver Upton <oliver.upton@linux.dev> Link: https://lore.kernel.org/r/20240221092732.4126848-2-oliver.upton@linux.dev Signed-off-by: Marc Zyngier <maz@kernel.org> --- arch/arm64/kvm/vgic/vgic-its.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arm64/kvm/vgic/vgic-its.c b/arch/arm64/kvm/vgic/vgic-its.c index e2764d0ffa9f..082448de27ed 100644 --- a/arch/arm64/kvm/vgic/vgic-its.c +++ b/arch/arm64/kvm/vgic/vgic-its.c @@ -468,6 +468,9 @@ static int its_sync_lpi_pending_table(struct kvm_vcpu *vcpu) } irq = vgic_get_irq(vcpu->kvm, NULL, intids[i]); + if (!irq) + continue; + raw_spin_lock_irqsave(&irq->irq_lock, flags); irq->pending_latch = pendmask & (1U << bit_nr); vgic_queue_irq_unlock(vcpu->kvm, irq, flags); From 85a71ee9a0700f6c18862ef3b0011ed9dad99aca Mon Sep 17 00:00:00 2001 From: Oliver Upton <oliver.upton@linux.dev> Date: Wed, 21 Feb 2024 09:27:32 +0000 Subject: [PATCH 273/304] KVM: arm64: vgic-its: Test for valid IRQ in MOVALL handler It is possible that an LPI mapped in a different ITS gets unmapped while handling the MOVALL command. If that is the case, there is no state that can be migrated to the destination. Silently ignore it and continue migrating other LPIs. Cc: stable@vger.kernel.org Fixes: ff9c114394aa ("KVM: arm/arm64: GICv4: Handle MOVALL applied to a vPE") Signed-off-by: Oliver Upton <oliver.upton@linux.dev> Link: https://lore.kernel.org/r/20240221092732.4126848-3-oliver.upton@linux.dev Signed-off-by: Marc Zyngier <maz@kernel.org> --- arch/arm64/kvm/vgic/vgic-its.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/kvm/vgic/vgic-its.c b/arch/arm64/kvm/vgic/vgic-its.c index 082448de27ed..28a93074eca1 100644 --- a/arch/arm64/kvm/vgic/vgic-its.c +++ b/arch/arm64/kvm/vgic/vgic-its.c @@ -1435,6 +1435,8 @@ static int vgic_its_cmd_handle_movall(struct kvm *kvm, struct vgic_its *its, for (i = 0; i < irq_count; i++) { irq = vgic_get_irq(kvm, NULL, intids[i]); + if (!irq) + continue; update_affinity(irq, vcpu2); From 3b1ae9b71c2a97f848b00fb085a2bd29bddbe8d9 Mon Sep 17 00:00:00 2001 From: Subbaraya Sundeep <sbhatta@marvell.com> Date: Mon, 19 Feb 2024 18:25:14 +0530 Subject: [PATCH 274/304] octeontx2-af: Consider the action set by PF AF reserves MCAM entries for each PF, VF present in the system and populates the entry with DMAC and action with default RSS so that basic packet I/O works. Since PF/VF is not aware of the RSS action installed by AF, AF only fixup the actions of the rules installed by PF/VF with corresponding default RSS action. This worked well for rules installed by PF/VF for features like RX VLAN offload and DMAC filters but rules involving action like drop/forward to queue are also getting modified by AF. Hence fix it by setting the default RSS action only if requested by PF/VF. Fixes: 967db3529eca ("octeontx2-af: add support for multicast/promisc packet replication feature") Signed-off-by: Subbaraya Sundeep <sbhatta@marvell.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c index e5d6156655ba..516adb50f9f6 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c @@ -415,6 +415,10 @@ static void npc_fixup_vf_rule(struct rvu *rvu, struct npc_mcam *mcam, return; } + /* AF modifies given action iff PF/VF has requested for it */ + if ((entry->action & 0xFULL) != NIX_RX_ACTION_DEFAULT) + return; + /* copy VF default entry action to the VF mcam entry */ rx_action = npc_get_default_entry_action(rvu, mcam, blkaddr, target_func); From 56667da7399eb19af857e30f41bea89aa6fa812c Mon Sep 17 00:00:00 2001 From: Eric Dumazet <edumazet@google.com> Date: Mon, 19 Feb 2024 14:12:20 +0000 Subject: [PATCH 275/304] net: implement lockless setsockopt(SO_PEEK_OFF) syzbot reported a lockdep violation [1] involving af_unix support of SO_PEEK_OFF. Since SO_PEEK_OFF is inherently not thread safe (it uses a per-socket sk_peek_off field), there is really no point to enforce a pointless thread safety in the kernel. After this patch : - setsockopt(SO_PEEK_OFF) no longer acquires the socket lock. - skb_consume_udp() no longer has to acquire the socket lock. - af_unix no longer needs a special version of sk_set_peek_off(), because it does not lock u->iolock anymore. As a followup, we could replace prot->set_peek_off to be a boolean and avoid an indirect call, since we always use sk_set_peek_off(). [1] WARNING: possible circular locking dependency detected 6.8.0-rc4-syzkaller-00267-g0f1dd5e91e2b #0 Not tainted syz-executor.2/30025 is trying to acquire lock: ffff8880765e7d80 (&u->iolock){+.+.}-{3:3}, at: unix_set_peek_off+0x26/0xa0 net/unix/af_unix.c:789 but task is already holding lock: ffff8880765e7930 (sk_lock-AF_UNIX){+.+.}-{0:0}, at: lock_sock include/net/sock.h:1691 [inline] ffff8880765e7930 (sk_lock-AF_UNIX){+.+.}-{0:0}, at: sockopt_lock_sock net/core/sock.c:1060 [inline] ffff8880765e7930 (sk_lock-AF_UNIX){+.+.}-{0:0}, at: sk_setsockopt+0xe52/0x3360 net/core/sock.c:1193 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #1 (sk_lock-AF_UNIX){+.+.}-{0:0}: lock_acquire+0x1e3/0x530 kernel/locking/lockdep.c:5754 lock_sock_nested+0x48/0x100 net/core/sock.c:3524 lock_sock include/net/sock.h:1691 [inline] __unix_dgram_recvmsg+0x1275/0x12c0 net/unix/af_unix.c:2415 sock_recvmsg_nosec+0x18e/0x1d0 net/socket.c:1046 ____sys_recvmsg+0x3c0/0x470 net/socket.c:2801 ___sys_recvmsg net/socket.c:2845 [inline] do_recvmmsg+0x474/0xae0 net/socket.c:2939 __sys_recvmmsg net/socket.c:3018 [inline] __do_sys_recvmmsg net/socket.c:3041 [inline] __se_sys_recvmmsg net/socket.c:3034 [inline] __x64_sys_recvmmsg+0x199/0x250 net/socket.c:3034 do_syscall_64+0xf9/0x240 entry_SYSCALL_64_after_hwframe+0x6f/0x77 -> #0 (&u->iolock){+.+.}-{3:3}: check_prev_add kernel/locking/lockdep.c:3134 [inline] check_prevs_add kernel/locking/lockdep.c:3253 [inline] validate_chain+0x18ca/0x58e0 kernel/locking/lockdep.c:3869 __lock_acquire+0x1345/0x1fd0 kernel/locking/lockdep.c:5137 lock_acquire+0x1e3/0x530 kernel/locking/lockdep.c:5754 __mutex_lock_common kernel/locking/mutex.c:608 [inline] __mutex_lock+0x136/0xd70 kernel/locking/mutex.c:752 unix_set_peek_off+0x26/0xa0 net/unix/af_unix.c:789 sk_setsockopt+0x207e/0x3360 do_sock_setsockopt+0x2fb/0x720 net/socket.c:2307 __sys_setsockopt+0x1ad/0x250 net/socket.c:2334 __do_sys_setsockopt net/socket.c:2343 [inline] __se_sys_setsockopt net/socket.c:2340 [inline] __x64_sys_setsockopt+0xb5/0xd0 net/socket.c:2340 do_syscall_64+0xf9/0x240 entry_SYSCALL_64_after_hwframe+0x6f/0x77 other info that might help us debug this: Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(sk_lock-AF_UNIX); lock(&u->iolock); lock(sk_lock-AF_UNIX); lock(&u->iolock); *** DEADLOCK *** 1 lock held by syz-executor.2/30025: #0: ffff8880765e7930 (sk_lock-AF_UNIX){+.+.}-{0:0}, at: lock_sock include/net/sock.h:1691 [inline] #0: ffff8880765e7930 (sk_lock-AF_UNIX){+.+.}-{0:0}, at: sockopt_lock_sock net/core/sock.c:1060 [inline] #0: ffff8880765e7930 (sk_lock-AF_UNIX){+.+.}-{0:0}, at: sk_setsockopt+0xe52/0x3360 net/core/sock.c:1193 stack backtrace: CPU: 0 PID: 30025 Comm: syz-executor.2 Not tainted 6.8.0-rc4-syzkaller-00267-g0f1dd5e91e2b #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/25/2024 Call Trace: <TASK> __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0x1e7/0x2e0 lib/dump_stack.c:106 check_noncircular+0x36a/0x4a0 kernel/locking/lockdep.c:2187 check_prev_add kernel/locking/lockdep.c:3134 [inline] check_prevs_add kernel/locking/lockdep.c:3253 [inline] validate_chain+0x18ca/0x58e0 kernel/locking/lockdep.c:3869 __lock_acquire+0x1345/0x1fd0 kernel/locking/lockdep.c:5137 lock_acquire+0x1e3/0x530 kernel/locking/lockdep.c:5754 __mutex_lock_common kernel/locking/mutex.c:608 [inline] __mutex_lock+0x136/0xd70 kernel/locking/mutex.c:752 unix_set_peek_off+0x26/0xa0 net/unix/af_unix.c:789 sk_setsockopt+0x207e/0x3360 do_sock_setsockopt+0x2fb/0x720 net/socket.c:2307 __sys_setsockopt+0x1ad/0x250 net/socket.c:2334 __do_sys_setsockopt net/socket.c:2343 [inline] __se_sys_setsockopt net/socket.c:2340 [inline] __x64_sys_setsockopt+0xb5/0xd0 net/socket.c:2340 do_syscall_64+0xf9/0x240 entry_SYSCALL_64_after_hwframe+0x6f/0x77 RIP: 0033:0x7f78a1c7dda9 Code: 28 00 00 00 75 05 48 83 c4 28 c3 e8 e1 20 00 00 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b0 ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007f78a0fde0c8 EFLAGS: 00000246 ORIG_RAX: 0000000000000036 RAX: ffffffffffffffda RBX: 00007f78a1dac050 RCX: 00007f78a1c7dda9 RDX: 000000000000002a RSI: 0000000000000001 RDI: 0000000000000006 RBP: 00007f78a1cca47a R08: 0000000000000004 R09: 0000000000000000 R10: 0000000020000180 R11: 0000000000000246 R12: 0000000000000000 R13: 000000000000006e R14: 00007f78a1dac050 R15: 00007ffe5cd81ae8 Fixes: 859051dd165e ("bpf: Implement cgroup sockaddr hooks for unix sockets") Signed-off-by: Eric Dumazet <edumazet@google.com> Cc: Willem de Bruijn <willemdebruijn.kernel@gmail.com> Cc: Daan De Meyer <daan.j.demeyer@gmail.com> Cc: Kuniyuki Iwashima <kuniyu@amazon.com> Cc: Martin KaFai Lau <martin.lau@kernel.org> Cc: David Ahern <dsahern@kernel.org> Reviewed-by: Willem de Bruijn <willemb@google.com> Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- net/core/sock.c | 23 +++++++++++------------ net/ipv4/udp.c | 7 +------ net/unix/af_unix.c | 19 +++---------------- 3 files changed, 15 insertions(+), 34 deletions(-) diff --git a/net/core/sock.c b/net/core/sock.c index 0a7f46c37f0c..5e78798456fd 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1188,6 +1188,17 @@ int sk_setsockopt(struct sock *sk, int level, int optname, */ WRITE_ONCE(sk->sk_txrehash, (u8)val); return 0; + case SO_PEEK_OFF: + { + int (*set_peek_off)(struct sock *sk, int val); + + set_peek_off = READ_ONCE(sock->ops)->set_peek_off; + if (set_peek_off) + ret = set_peek_off(sk, val); + else + ret = -EOPNOTSUPP; + return ret; + } } sockopt_lock_sock(sk); @@ -1430,18 +1441,6 @@ set_sndbuf: sock_valbool_flag(sk, SOCK_WIFI_STATUS, valbool); break; - case SO_PEEK_OFF: - { - int (*set_peek_off)(struct sock *sk, int val); - - set_peek_off = READ_ONCE(sock->ops)->set_peek_off; - if (set_peek_off) - ret = set_peek_off(sk, val); - else - ret = -EOPNOTSUPP; - break; - } - case SO_NOFCS: sock_valbool_flag(sk, SOCK_NOFCS, valbool); break; diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index f631b0a21af4..e474b201900f 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1589,12 +1589,7 @@ int udp_init_sock(struct sock *sk) void skb_consume_udp(struct sock *sk, struct sk_buff *skb, int len) { - if (unlikely(READ_ONCE(sk->sk_peek_off) >= 0)) { - bool slow = lock_sock_fast(sk); - - sk_peek_offset_bwd(sk, len); - unlock_sock_fast(sk, slow); - } + sk_peek_offset_bwd(sk, len); if (!skb_unref(skb)) return; diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 30b178ebba60..0748e7ea5210 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -782,19 +782,6 @@ static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t); static int unix_seqpacket_recvmsg(struct socket *, struct msghdr *, size_t, int); -static int unix_set_peek_off(struct sock *sk, int val) -{ - struct unix_sock *u = unix_sk(sk); - - if (mutex_lock_interruptible(&u->iolock)) - return -EINTR; - - WRITE_ONCE(sk->sk_peek_off, val); - mutex_unlock(&u->iolock); - - return 0; -} - #ifdef CONFIG_PROC_FS static int unix_count_nr_fds(struct sock *sk) { @@ -862,7 +849,7 @@ static const struct proto_ops unix_stream_ops = { .read_skb = unix_stream_read_skb, .mmap = sock_no_mmap, .splice_read = unix_stream_splice_read, - .set_peek_off = unix_set_peek_off, + .set_peek_off = sk_set_peek_off, .show_fdinfo = unix_show_fdinfo, }; @@ -886,7 +873,7 @@ static const struct proto_ops unix_dgram_ops = { .read_skb = unix_read_skb, .recvmsg = unix_dgram_recvmsg, .mmap = sock_no_mmap, - .set_peek_off = unix_set_peek_off, + .set_peek_off = sk_set_peek_off, .show_fdinfo = unix_show_fdinfo, }; @@ -909,7 +896,7 @@ static const struct proto_ops unix_seqpacket_ops = { .sendmsg = unix_seqpacket_sendmsg, .recvmsg = unix_seqpacket_recvmsg, .mmap = sock_no_mmap, - .set_peek_off = unix_set_peek_off, + .set_peek_off = sk_set_peek_off, .show_fdinfo = unix_show_fdinfo, }; From d80f8e96d47d7374794a30fbed69be43f3388afc Mon Sep 17 00:00:00 2001 From: Alex Elder <elder@linaro.org> Date: Mon, 19 Feb 2024 08:40:15 -0600 Subject: [PATCH 276/304] net: ipa: don't overrun IPA suspend interrupt registers In newer hardware, IPA supports more than 32 endpoints. Some registers--such as IPA interrupt registers--represent endpoints as bits in a 4-byte register, and such registers are repeated as needed to represent endpoints beyond the first 32. In ipa_interrupt_suspend_clear_all(), we clear all pending IPA suspend interrupts by reading all status register(s) and writing corresponding registers to clear interrupt conditions. Unfortunately the number of registers to read/write is calculated incorrectly, and as a result we access *many* more registers than intended. This bug occurs only when the IPA hardware signals a SUSPEND interrupt, which happens when a packet is received for an endpoint (or its underlying GSI channel) that is suspended. This situation is difficult to reproduce, but possible. Fix this by correctly computing the number of interrupt registers to read and write. This is the only place in the code where registers that map endpoints or channels this way perform this calculation. Fixes: f298ba785e2d ("net: ipa: add a parameter to suspend registers") Signed-off-by: Alex Elder <elder@linaro.org> Signed-off-by: David S. Miller <davem@davemloft.net> --- drivers/net/ipa/ipa_interrupt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ipa/ipa_interrupt.c b/drivers/net/ipa/ipa_interrupt.c index 4bc05948f772..a78c692f2d3c 100644 --- a/drivers/net/ipa/ipa_interrupt.c +++ b/drivers/net/ipa/ipa_interrupt.c @@ -212,7 +212,7 @@ void ipa_interrupt_suspend_clear_all(struct ipa_interrupt *interrupt) u32 unit_count; u32 unit; - unit_count = roundup(ipa->endpoint_count, 32); + unit_count = DIV_ROUND_UP(ipa->endpoint_count, 32); for (unit = 0; unit < unit_count; unit++) { const struct reg *reg; u32 val; From aa82ac51d63328714645c827775d64dbfd9941f3 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima <kuniyu@amazon.com> Date: Mon, 19 Feb 2024 09:46:57 -0800 Subject: [PATCH 277/304] af_unix: Drop oob_skb ref before purging queue in GC. syzbot reported another task hung in __unix_gc(). [0] The current while loop assumes that all of the left candidates have oob_skb and calling kfree_skb(oob_skb) releases the remaining candidates. However, I missed a case that oob_skb has self-referencing fd and another fd and the latter sk is placed before the former in the candidate list. Then, the while loop never proceeds, resulting the task hung. __unix_gc() has the same loop just before purging the collected skb, so we can call kfree_skb(oob_skb) there and let __skb_queue_purge() release all inflight sockets. [0]: Sending NMI from CPU 0 to CPUs 1: NMI backtrace for cpu 1 CPU: 1 PID: 2784 Comm: kworker/u4:8 Not tainted 6.8.0-rc4-syzkaller-01028-g71b605d32017 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/25/2024 Workqueue: events_unbound __unix_gc RIP: 0010:__sanitizer_cov_trace_pc+0x0/0x70 kernel/kcov.c:200 Code: 89 fb e8 23 00 00 00 48 8b 3d 84 f5 1a 0c 48 89 de 5b e9 43 26 57 00 0f 1f 00 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 <f3> 0f 1e fa 48 8b 04 24 65 48 8b 0d 90 52 70 7e 65 8b 15 91 52 70 RSP: 0018:ffffc9000a17fa78 EFLAGS: 00000287 RAX: ffffffff8a0a6108 RBX: ffff88802b6c2640 RCX: ffff88802c0b3b80 RDX: 0000000000000000 RSI: 0000000000000002 RDI: 0000000000000000 RBP: ffffc9000a17fbf0 R08: ffffffff89383f1d R09: 1ffff1100ee5ff84 R10: dffffc0000000000 R11: ffffed100ee5ff85 R12: 1ffff110056d84ee R13: ffffc9000a17fae0 R14: 0000000000000000 R15: ffffffff8f47b840 FS: 0000000000000000(0000) GS:ffff8880b9500000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007ffef5687ff8 CR3: 0000000029b34000 CR4: 00000000003506f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: <NMI> </NMI> <TASK> __unix_gc+0xe69/0xf40 net/unix/garbage.c:343 process_one_work kernel/workqueue.c:2633 [inline] process_scheduled_works+0x913/0x1420 kernel/workqueue.c:2706 worker_thread+0xa5f/0x1000 kernel/workqueue.c:2787 kthread+0x2ef/0x390 kernel/kthread.c:388 ret_from_fork+0x4b/0x80 arch/x86/kernel/process.c:147 ret_from_fork_asm+0x1b/0x30 arch/x86/entry/entry_64.S:242 </TASK> Reported-and-tested-by: syzbot+ecab4d36f920c3574bf9@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=ecab4d36f920c3574bf9 Fixes: 25236c91b5ab ("af_unix: Fix task hung while purging oob_skb in GC.") Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- net/unix/garbage.c | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/net/unix/garbage.c b/net/unix/garbage.c index 2ff7ddbaa782..2a81880dac7b 100644 --- a/net/unix/garbage.c +++ b/net/unix/garbage.c @@ -284,9 +284,17 @@ void unix_gc(void) * which are creating the cycle(s). */ skb_queue_head_init(&hitlist); - list_for_each_entry(u, &gc_candidates, link) + list_for_each_entry(u, &gc_candidates, link) { scan_children(&u->sk, inc_inflight, &hitlist); +#if IS_ENABLED(CONFIG_AF_UNIX_OOB) + if (u->oob_skb) { + kfree_skb(u->oob_skb); + u->oob_skb = NULL; + } +#endif + } + /* not_cycle_list contains those sockets which do not make up a * cycle. Restore these to the inflight list. */ @@ -314,18 +322,6 @@ void unix_gc(void) /* Here we are. Hitlist is filled. Die. */ __skb_queue_purge(&hitlist); -#if IS_ENABLED(CONFIG_AF_UNIX_OOB) - while (!list_empty(&gc_candidates)) { - u = list_entry(gc_candidates.next, struct unix_sock, link); - if (u->oob_skb) { - struct sk_buff *skb = u->oob_skb; - - u->oob_skb = NULL; - kfree_skb(skb); - } - } -#endif - spin_lock(&unix_gc_lock); /* There could be io_uring registered files, just push them back to From 14dec56fdd4c70a0ebe40077368e367421ea6fef Mon Sep 17 00:00:00 2001 From: Simon Horman <horms@kernel.org> Date: Mon, 19 Feb 2024 17:55:31 +0000 Subject: [PATCH 278/304] MAINTAINERS: Add framer headers to NETWORKING [GENERAL] The cited commit [1] added framer support under drivers/net/wan, which is covered by NETWORKING [GENERAL]. And it is implied that framer-provider.h and framer.h, which were also added buy the same patch, are also maintained as part of NETWORKING [GENERAL]. Make this explicit by adding these files to the corresponding section in MAINTAINERS. [1] 82c944d05b1a ("net: wan: Add framer framework support") Signed-off-by: Simon Horman <horms@kernel.org> Reviewed-by: Herve Codina <herve.codina@bootlin.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- MAINTAINERS | 2 ++ 1 file changed, 2 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index a0697e2fb8e8..466a0fc46f76 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -15237,6 +15237,8 @@ F: Documentation/networking/ F: Documentation/networking/net_cachelines/ F: Documentation/process/maintainer-netdev.rst F: Documentation/userspace-api/netlink/ +F: include/linux/framer/framer-provider.h +F: include/linux/framer/framer.h F: include/linux/in.h F: include/linux/indirect_call_wrapper.h F: include/linux/net.h From e78fb4eac817308027da88d02e5d0213462a7562 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" <rostedt@goodmis.org> Date: Tue, 20 Feb 2024 09:51:12 -0500 Subject: [PATCH 279/304] ring-buffer: Do not let subbuf be bigger than write mask The data on the subbuffer is measured by a write variable that also contains status flags. The counter is just 20 bits in length. If the subbuffer is bigger than then counter, it will fail. Make sure that the subbuffer can not be set to greater than the counter that keeps track of the data on the subbuffer. Link: https://lore.kernel.org/linux-trace-kernel/20240220095112.77e9cb81@gandalf.local.home Cc: Masami Hiramatsu <mhiramat@kernel.org> Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com> Fixes: 2808e31ec12e5 ("ring-buffer: Add interface for configuring trace sub buffer size") Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org> --- kernel/trace/ring_buffer.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index fd4bfe3ecf01..0699027b4f4c 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -5877,6 +5877,10 @@ int ring_buffer_subbuf_order_set(struct trace_buffer *buffer, int order) if (psize <= BUF_PAGE_HDR_SIZE) return -EINVAL; + /* Size of a subbuf cannot be greater than the write counter */ + if (psize > RB_WRITE_MASK + 1) + return -EINVAL; + old_order = buffer->subbuf_order; old_size = buffer->subbuf_size; From 4cd12c6065dfcdeba10f49949bffcf383b3952d8 Mon Sep 17 00:00:00 2001 From: Shigeru Yoshida <syoshida@redhat.com> Date: Mon, 19 Feb 2024 00:09:33 +0900 Subject: [PATCH 280/304] bpf, sockmap: Fix NULL pointer dereference in sk_psock_verdict_data_ready() syzbot reported the following NULL pointer dereference issue [1]: BUG: kernel NULL pointer dereference, address: 0000000000000000 [...] RIP: 0010:0x0 [...] Call Trace: <TASK> sk_psock_verdict_data_ready+0x232/0x340 net/core/skmsg.c:1230 unix_stream_sendmsg+0x9b4/0x1230 net/unix/af_unix.c:2293 sock_sendmsg_nosec net/socket.c:730 [inline] __sock_sendmsg+0x221/0x270 net/socket.c:745 ____sys_sendmsg+0x525/0x7d0 net/socket.c:2584 ___sys_sendmsg net/socket.c:2638 [inline] __sys_sendmsg+0x2b0/0x3a0 net/socket.c:2667 do_syscall_64+0xf9/0x240 entry_SYSCALL_64_after_hwframe+0x6f/0x77 If sk_psock_verdict_data_ready() and sk_psock_stop_verdict() are called concurrently, psock->saved_data_ready can be NULL, causing the above issue. This patch fixes this issue by calling the appropriate data ready function using the sk_psock_data_ready() helper and protecting it from concurrency with sk->sk_callback_lock. Fixes: 6df7f764cd3c ("bpf, sockmap: Wake up polling after data copy") Reported-by: syzbot+fd7b34375c1c8ce29c93@syzkaller.appspotmail.com Signed-off-by: Shigeru Yoshida <syoshida@redhat.com> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Tested-by: syzbot+fd7b34375c1c8ce29c93@syzkaller.appspotmail.com Acked-by: John Fastabend <john.fastabend@gmail.com> Closes: https://syzkaller.appspot.com/bug?extid=fd7b34375c1c8ce29c93 [1] Link: https://lore.kernel.org/bpf/20240218150933.6004-1-syoshida@redhat.com --- net/core/skmsg.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/net/core/skmsg.c b/net/core/skmsg.c index 93ecfceac1bc..4d75ef9d24bf 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -1226,8 +1226,11 @@ static void sk_psock_verdict_data_ready(struct sock *sk) rcu_read_lock(); psock = sk_psock(sk); - if (psock) - psock->saved_data_ready(sk); + if (psock) { + read_lock_bh(&sk->sk_callback_lock); + sk_psock_data_ready(sk, psock); + read_unlock_bh(&sk->sk_callback_lock); + } rcu_read_unlock(); } } From 136cfaca22567a03bbb3bf53a43d8cb5748b80ec Mon Sep 17 00:00:00 2001 From: Vasiliy Kovalev <kovalev@altlinux.org> Date: Wed, 14 Feb 2024 19:27:33 +0300 Subject: [PATCH 281/304] gtp: fix use-after-free and null-ptr-deref in gtp_genl_dump_pdp() The gtp_net_ops pernet operations structure for the subsystem must be registered before registering the generic netlink family. Syzkaller hit 'general protection fault in gtp_genl_dump_pdp' bug: general protection fault, probably for non-canonical address 0xdffffc0000000002: 0000 [#1] PREEMPT SMP KASAN NOPTI KASAN: null-ptr-deref in range [0x0000000000000010-0x0000000000000017] CPU: 1 PID: 5826 Comm: gtp Not tainted 6.8.0-rc3-std-def-alt1 #1 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.0-alt1 04/01/2014 RIP: 0010:gtp_genl_dump_pdp+0x1be/0x800 [gtp] Code: c6 89 c6 e8 64 e9 86 df 58 45 85 f6 0f 85 4e 04 00 00 e8 c5 ee 86 df 48 8b 54 24 18 48 b8 00 00 00 00 00 fc ff df 48 c1 ea 03 <80> 3c 02 00 0f 85 de 05 00 00 48 8b 44 24 18 4c 8b 30 4c 39 f0 74 RSP: 0018:ffff888014107220 EFLAGS: 00010202 RAX: dffffc0000000000 RBX: 0000000000000000 RCX: 0000000000000000 RDX: 0000000000000002 RSI: 0000000000000000 RDI: 0000000000000000 RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000 R13: ffff88800fcda588 R14: 0000000000000001 R15: 0000000000000000 FS: 00007f1be4eb05c0(0000) GS:ffff88806ce80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f1be4e766cf CR3: 000000000c33e000 CR4: 0000000000750ef0 PKRU: 55555554 Call Trace: <TASK> ? show_regs+0x90/0xa0 ? die_addr+0x50/0xd0 ? exc_general_protection+0x148/0x220 ? asm_exc_general_protection+0x22/0x30 ? gtp_genl_dump_pdp+0x1be/0x800 [gtp] ? __alloc_skb+0x1dd/0x350 ? __pfx___alloc_skb+0x10/0x10 genl_dumpit+0x11d/0x230 netlink_dump+0x5b9/0xce0 ? lockdep_hardirqs_on_prepare+0x253/0x430 ? __pfx_netlink_dump+0x10/0x10 ? kasan_save_track+0x10/0x40 ? __kasan_kmalloc+0x9b/0xa0 ? genl_start+0x675/0x970 __netlink_dump_start+0x6fc/0x9f0 genl_family_rcv_msg_dumpit+0x1bb/0x2d0 ? __pfx_genl_family_rcv_msg_dumpit+0x10/0x10 ? genl_op_from_small+0x2a/0x440 ? cap_capable+0x1d0/0x240 ? __pfx_genl_start+0x10/0x10 ? __pfx_genl_dumpit+0x10/0x10 ? __pfx_genl_done+0x10/0x10 ? security_capable+0x9d/0xe0 Cc: stable@vger.kernel.org Signed-off-by: Vasiliy Kovalev <kovalev@altlinux.org> Fixes: 459aa660eb1d ("gtp: add initial driver for datapath of GPRS Tunneling Protocol (GTP-U)") Link: https://lore.kernel.org/r/20240214162733.34214-1-kovalev@altlinux.org Signed-off-by: Jakub Kicinski <kuba@kernel.org> --- drivers/net/gtp.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c index b1919278e931..2129ae42c703 100644 --- a/drivers/net/gtp.c +++ b/drivers/net/gtp.c @@ -1907,20 +1907,20 @@ static int __init gtp_init(void) if (err < 0) goto error_out; - err = genl_register_family(>p_genl_family); + err = register_pernet_subsys(>p_net_ops); if (err < 0) goto unreg_rtnl_link; - err = register_pernet_subsys(>p_net_ops); + err = genl_register_family(>p_genl_family); if (err < 0) - goto unreg_genl_family; + goto unreg_pernet_subsys; pr_info("GTP module loaded (pdp ctx size %zd bytes)\n", sizeof(struct pdp_ctx)); return 0; -unreg_genl_family: - genl_unregister_family(>p_genl_family); +unreg_pernet_subsys: + unregister_pernet_subsys(>p_net_ops); unreg_rtnl_link: rtnl_link_unregister(>p_link_ops); error_out: From 10f41d0710fc81b7af93fa6106678d57b1ff24a7 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca <sd@queasysnail.net> Date: Thu, 15 Feb 2024 17:17:29 +0100 Subject: [PATCH 282/304] tls: break out of main loop when PEEK gets a non-data record PEEK needs to leave decrypted records on the rx_list so that we can receive them later on, so it jumps back into the async code that queues the skb. Unfortunately that makes us skip the TLS_RECORD_TYPE_DATA check at the bottom of the main loop, so if two records of the same (non-DATA) type are queued, we end up merging them. Add the same record type check, and make it unlikely to not penalize the async fastpath. Async decrypt only applies to data record, so this check is only needed for PEEK. process_rx_list also has similar issues. Fixes: 692d7b5d1f91 ("tls: Fix recvmsg() to be able to peek across multiple records") Signed-off-by: Sabrina Dubroca <sd@queasysnail.net> Link: https://lore.kernel.org/r/3df2eef4fdae720c55e69472b5bea668772b45a2.1708007371.git.sd@queasysnail.net Signed-off-by: Jakub Kicinski <kuba@kernel.org> --- net/tls/tls_sw.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 9fbc70200cd0..78aedfc682ba 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -2064,6 +2064,8 @@ put_on_rx_list: decrypted += chunk; len -= chunk; __skb_queue_tail(&ctx->rx_list, skb); + if (unlikely(control != TLS_RECORD_TYPE_DATA)) + break; continue; } From fdfbaec5923d9359698cbb286bc0deadbb717504 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca <sd@queasysnail.net> Date: Thu, 15 Feb 2024 17:17:30 +0100 Subject: [PATCH 283/304] tls: stop recv() if initial process_rx_list gave us non-DATA If we have a non-DATA record on the rx_list and another record of the same type still on the queue, we will end up merging them: - process_rx_list copies the non-DATA record - we start the loop and process the first available record since it's of the same type - we break out of the loop since the record was not DATA Just check the record type and jump to the end in case process_rx_list did some work. Fixes: 692d7b5d1f91 ("tls: Fix recvmsg() to be able to peek across multiple records") Signed-off-by: Sabrina Dubroca <sd@queasysnail.net> Link: https://lore.kernel.org/r/bd31449e43bd4b6ff546f5c51cf958c31c511deb.1708007371.git.sd@queasysnail.net Signed-off-by: Jakub Kicinski <kuba@kernel.org> --- net/tls/tls_sw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 78aedfc682ba..43dd0d82b6ed 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -1971,7 +1971,7 @@ int tls_sw_recvmsg(struct sock *sk, goto end; copied = err; - if (len <= copied) + if (len <= copied || (copied && control != TLS_RECORD_TYPE_DATA)) goto end; target = sock_rcvlowat(sk, flags & MSG_WAITALL, len); From ec823bf3a479d42c589dc0f28ef4951c49cd2d2a Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca <sd@queasysnail.net> Date: Thu, 15 Feb 2024 17:17:31 +0100 Subject: [PATCH 284/304] tls: don't skip over different type records from the rx_list If we queue 3 records: - record 1, type DATA - record 2, some other type - record 3, type DATA and do a recv(PEEK), the rx_list will contain the first two records. The next large recv will walk through the rx_list and copy data from record 1, then stop because record 2 is a different type. Since we haven't filled up our buffer, we will process the next available record. It's also DATA, so we can merge it with the current read. We shouldn't do that, since there was a record in between that we ignored. Add a flag to let process_rx_list inform tls_sw_recvmsg that it had more data available. Fixes: 692d7b5d1f91 ("tls: Fix recvmsg() to be able to peek across multiple records") Signed-off-by: Sabrina Dubroca <sd@queasysnail.net> Link: https://lore.kernel.org/r/f00c0c0afa080c60f016df1471158c1caf983c34.1708007371.git.sd@queasysnail.net Signed-off-by: Jakub Kicinski <kuba@kernel.org> --- net/tls/tls_sw.c | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 43dd0d82b6ed..de96959336c4 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -1772,7 +1772,8 @@ static int process_rx_list(struct tls_sw_context_rx *ctx, u8 *control, size_t skip, size_t len, - bool is_peek) + bool is_peek, + bool *more) { struct sk_buff *skb = skb_peek(&ctx->rx_list); struct tls_msg *tlm; @@ -1785,7 +1786,7 @@ static int process_rx_list(struct tls_sw_context_rx *ctx, err = tls_record_content_type(msg, tlm, control); if (err <= 0) - goto out; + goto more; if (skip < rxm->full_len) break; @@ -1803,12 +1804,12 @@ static int process_rx_list(struct tls_sw_context_rx *ctx, err = tls_record_content_type(msg, tlm, control); if (err <= 0) - goto out; + goto more; err = skb_copy_datagram_msg(skb, rxm->offset + skip, msg, chunk); if (err < 0) - goto out; + goto more; len = len - chunk; copied = copied + chunk; @@ -1844,6 +1845,10 @@ static int process_rx_list(struct tls_sw_context_rx *ctx, out: return copied ? : err; +more: + if (more) + *more = true; + goto out; } static bool @@ -1947,6 +1952,7 @@ int tls_sw_recvmsg(struct sock *sk, int target, err; bool is_kvec = iov_iter_is_kvec(&msg->msg_iter); bool is_peek = flags & MSG_PEEK; + bool rx_more = false; bool released = true; bool bpf_strp_enabled; bool zc_capable; @@ -1966,12 +1972,12 @@ int tls_sw_recvmsg(struct sock *sk, goto end; /* Process pending decrypted records. It must be non-zero-copy */ - err = process_rx_list(ctx, msg, &control, 0, len, is_peek); + err = process_rx_list(ctx, msg, &control, 0, len, is_peek, &rx_more); if (err < 0) goto end; copied = err; - if (len <= copied || (copied && control != TLS_RECORD_TYPE_DATA)) + if (len <= copied || (copied && control != TLS_RECORD_TYPE_DATA) || rx_more) goto end; target = sock_rcvlowat(sk, flags & MSG_WAITALL, len); @@ -2130,10 +2136,10 @@ recv_end: /* Drain records from the rx_list & copy if required */ if (is_peek || is_kvec) err = process_rx_list(ctx, msg, &control, copied, - decrypted, is_peek); + decrypted, is_peek, NULL); else err = process_rx_list(ctx, msg, &control, 0, - async_copy_bytes, is_peek); + async_copy_bytes, is_peek, NULL); } copied += decrypted; From 7b2a4c2a623a9f9c5fd9cff499bb9457fa2192ec Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca <sd@queasysnail.net> Date: Thu, 15 Feb 2024 17:17:32 +0100 Subject: [PATCH 285/304] selftests: tls: add test for merging of same-type control messages Two consecutive control messages of the same type should never be merged into one large received blob of data. Signed-off-by: Sabrina Dubroca <sd@queasysnail.net> Link: https://lore.kernel.org/r/018f1633d5471684c65def5fe390de3b15c3d683.1708007371.git.sd@queasysnail.net Signed-off-by: Jakub Kicinski <kuba@kernel.org> --- tools/testing/selftests/net/tls.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c index 49c84602707f..2714c230a0f9 100644 --- a/tools/testing/selftests/net/tls.c +++ b/tools/testing/selftests/net/tls.c @@ -1485,6 +1485,32 @@ TEST_F(tls, control_msg) EXPECT_EQ(memcmp(buf, test_str, send_len), 0); } +TEST_F(tls, control_msg_nomerge) +{ + char *rec1 = "1111"; + char *rec2 = "2222"; + int send_len = 5; + char buf[15]; + + if (self->notls) + SKIP(return, "no TLS support"); + + EXPECT_EQ(tls_send_cmsg(self->fd, 100, rec1, send_len, 0), send_len); + EXPECT_EQ(tls_send_cmsg(self->fd, 100, rec2, send_len, 0), send_len); + + EXPECT_EQ(tls_recv_cmsg(_metadata, self->cfd, 100, buf, sizeof(buf), MSG_PEEK), send_len); + EXPECT_EQ(memcmp(buf, rec1, send_len), 0); + + EXPECT_EQ(tls_recv_cmsg(_metadata, self->cfd, 100, buf, sizeof(buf), MSG_PEEK), send_len); + EXPECT_EQ(memcmp(buf, rec1, send_len), 0); + + EXPECT_EQ(tls_recv_cmsg(_metadata, self->cfd, 100, buf, sizeof(buf), 0), send_len); + EXPECT_EQ(memcmp(buf, rec1, send_len), 0); + + EXPECT_EQ(tls_recv_cmsg(_metadata, self->cfd, 100, buf, sizeof(buf), 0), send_len); + EXPECT_EQ(memcmp(buf, rec2, send_len), 0); +} + TEST_F(tls, shutdown) { char const *test_str = "test_read"; From 2bf6172632e1d4a6ec7ee7e734d53a43a145f5c6 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca <sd@queasysnail.net> Date: Thu, 15 Feb 2024 17:17:33 +0100 Subject: [PATCH 286/304] selftests: tls: add test for peeking past a record of a different type If we queue 3 records: - record 1, type DATA - record 2, some other type - record 3, type DATA the current code can look past the 2nd record and merge the 2 data records. Signed-off-by: Sabrina Dubroca <sd@queasysnail.net> Link: https://lore.kernel.org/r/4623550f8617c239581030c13402d3262f2bd14f.1708007371.git.sd@queasysnail.net Signed-off-by: Jakub Kicinski <kuba@kernel.org> --- tools/testing/selftests/net/tls.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c index 2714c230a0f9..b95c249f81c2 100644 --- a/tools/testing/selftests/net/tls.c +++ b/tools/testing/selftests/net/tls.c @@ -1511,6 +1511,25 @@ TEST_F(tls, control_msg_nomerge) EXPECT_EQ(memcmp(buf, rec2, send_len), 0); } +TEST_F(tls, data_control_data) +{ + char *rec1 = "1111"; + char *rec2 = "2222"; + char *rec3 = "3333"; + int send_len = 5; + char buf[15]; + + if (self->notls) + SKIP(return, "no TLS support"); + + EXPECT_EQ(send(self->fd, rec1, send_len, 0), send_len); + EXPECT_EQ(tls_send_cmsg(self->fd, 100, rec2, send_len, 0), send_len); + EXPECT_EQ(send(self->fd, rec3, send_len, 0), send_len); + + EXPECT_EQ(recv(self->cfd, buf, sizeof(buf), MSG_PEEK), send_len); + EXPECT_EQ(recv(self->cfd, buf, sizeof(buf), MSG_PEEK), send_len); +} + TEST_F(tls, shutdown) { char const *test_str = "test_read"; From bccebf64701735533c8db37773eeacc6566cc8ec Mon Sep 17 00:00:00 2001 From: Florian Westphal <fw@strlen.de> Date: Mon, 19 Feb 2024 16:58:04 +0100 Subject: [PATCH 287/304] netfilter: nf_tables: set dormant flag on hook register failure We need to set the dormant flag again if we fail to register the hooks. During memory pressure hook registration can fail and we end up with a table marked as active but no registered hooks. On table/base chain deletion, nf_tables will attempt to unregister the hook again which yields a warn splat from the nftables core. Reported-and-tested-by: syzbot+de4025c006ec68ac56fc@syzkaller.appspotmail.com Fixes: 179d9ba5559a ("netfilter: nf_tables: fix table flag updates") Signed-off-by: Florian Westphal <fw@strlen.de> Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org> --- net/netfilter/nf_tables_api.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index f8e3f70c35bd..90038d778f37 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -1251,6 +1251,7 @@ static int nf_tables_updtable(struct nft_ctx *ctx) return 0; err_register_hooks: + ctx->table->flags |= NFT_TABLE_F_DORMANT; nft_trans_destroy(trans); return ret; } From 9e0f0430389be7696396c62f037be4bf72cf93e3 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso <pablo@netfilter.org> Date: Wed, 21 Feb 2024 12:32:58 +0100 Subject: [PATCH 288/304] netfilter: nft_flow_offload: reset dst in route object after setting up flow dst is transferred to the flow object, route object does not own it anymore. Reset dst in route object, otherwise if flow_offload_add() fails, error path releases dst twice, leading to a refcount underflow. Fixes: a3c90f7a2323 ("netfilter: nf_tables: flow offload expression") Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org> --- include/net/netfilter/nf_flow_table.h | 2 +- net/netfilter/nf_flow_table_core.c | 16 +++++++++++++--- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h index 956c752ceb31..a763dd327c6e 100644 --- a/include/net/netfilter/nf_flow_table.h +++ b/include/net/netfilter/nf_flow_table.h @@ -276,7 +276,7 @@ nf_flow_table_offload_del_cb(struct nf_flowtable *flow_table, } void flow_offload_route_init(struct flow_offload *flow, - const struct nf_flow_route *route); + struct nf_flow_route *route); int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow); void flow_offload_refresh(struct nf_flowtable *flow_table, diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c index 920a5a29ae1d..7502d6d73a60 100644 --- a/net/netfilter/nf_flow_table_core.c +++ b/net/netfilter/nf_flow_table_core.c @@ -87,12 +87,22 @@ static u32 flow_offload_dst_cookie(struct flow_offload_tuple *flow_tuple) return 0; } +static struct dst_entry *nft_route_dst_fetch(struct nf_flow_route *route, + enum flow_offload_tuple_dir dir) +{ + struct dst_entry *dst = route->tuple[dir].dst; + + route->tuple[dir].dst = NULL; + + return dst; +} + static int flow_offload_fill_route(struct flow_offload *flow, - const struct nf_flow_route *route, + struct nf_flow_route *route, enum flow_offload_tuple_dir dir) { struct flow_offload_tuple *flow_tuple = &flow->tuplehash[dir].tuple; - struct dst_entry *dst = route->tuple[dir].dst; + struct dst_entry *dst = nft_route_dst_fetch(route, dir); int i, j = 0; switch (flow_tuple->l3proto) { @@ -146,7 +156,7 @@ static void nft_flow_dst_release(struct flow_offload *flow, } void flow_offload_route_init(struct flow_offload *flow, - const struct nf_flow_route *route) + struct nf_flow_route *route) { flow_offload_fill_route(flow, route, FLOW_OFFLOAD_DIR_ORIGINAL); flow_offload_fill_route(flow, route, FLOW_OFFLOAD_DIR_REPLY); From 8762785f459be1cfe6fcf7285c123aad6a3703f0 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso <pablo@netfilter.org> Date: Tue, 20 Feb 2024 21:36:39 +0100 Subject: [PATCH 289/304] netfilter: nft_flow_offload: release dst in case direct xmit path is used Direct xmit does not use it since it calls dev_queue_xmit() to send packets, hence it calls dst_release(). kmemleak reports: unreferenced object 0xffff88814f440900 (size 184): comm "softirq", pid 0, jiffies 4294951896 hex dump (first 32 bytes): 00 60 5b 04 81 88 ff ff 00 e6 e8 82 ff ff ff ff .`[............. 21 0b 50 82 ff ff ff ff 00 00 00 00 00 00 00 00 !.P............. backtrace (crc cb2bf5d6): [<000000003ee17107>] kmem_cache_alloc+0x286/0x340 [<0000000021a5de2c>] dst_alloc+0x43/0xb0 [<00000000f0671159>] rt_dst_alloc+0x2e/0x190 [<00000000fe5092c9>] __mkroute_output+0x244/0x980 [<000000005fb96fb0>] ip_route_output_flow+0xc0/0x160 [<0000000045367433>] nf_ip_route+0xf/0x30 [<0000000085da1d8e>] nf_route+0x2d/0x60 [<00000000d1ecd1cb>] nft_flow_route+0x171/0x6a0 [nft_flow_offload] [<00000000d9b2fb60>] nft_flow_offload_eval+0x4e8/0x700 [nft_flow_offload] [<000000009f447dbb>] expr_call_ops_eval+0x53/0x330 [nf_tables] [<00000000072e1be6>] nft_do_chain+0x17c/0x840 [nf_tables] [<00000000d0551029>] nft_do_chain_inet+0xa1/0x210 [nf_tables] [<0000000097c9d5c6>] nf_hook_slow+0x5b/0x160 [<0000000005eccab1>] ip_forward+0x8b6/0x9b0 [<00000000553a269b>] ip_rcv+0x221/0x230 [<00000000412872e5>] __netif_receive_skb_one_core+0xfe/0x110 Fixes: fa502c865666 ("netfilter: flowtable: simplify route logic") Reported-by: Florian Westphal <fw@strlen.de> Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org> --- net/netfilter/nf_flow_table_core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c index 7502d6d73a60..a0571339239c 100644 --- a/net/netfilter/nf_flow_table_core.c +++ b/net/netfilter/nf_flow_table_core.c @@ -132,6 +132,7 @@ static int flow_offload_fill_route(struct flow_offload *flow, ETH_ALEN); flow_tuple->out.ifidx = route->tuple[dir].out.ifindex; flow_tuple->out.hw_ifidx = route->tuple[dir].out.hw_ifindex; + dst_release(dst); break; case FLOW_OFFLOAD_XMIT_XFRM: case FLOW_OFFLOAD_XMIT_NEIGH: From d472e9853d7b46a6b094224d131d09ccd3a03daf Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso <pablo@netfilter.org> Date: Mon, 19 Feb 2024 19:43:53 +0100 Subject: [PATCH 290/304] netfilter: nf_tables: register hooks last when adding new chain/flowtable Register hooks last when adding chain/flowtable to ensure that packets do not walk over datastructure that is being released in the error path without waiting for the rcu grace period. Fixes: 91c7b38dc9f0 ("netfilter: nf_tables: use new transaction infrastructure to handle chain") Fixes: 3b49e2e94e6e ("netfilter: nf_tables: add flow table netlink frontend") Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org> --- net/netfilter/nf_tables_api.c | 78 ++++++++++++++++++----------------- 1 file changed, 40 insertions(+), 38 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 90038d778f37..485e047d5f98 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -684,15 +684,16 @@ static int nft_delobj(struct nft_ctx *ctx, struct nft_object *obj) return err; } -static int nft_trans_flowtable_add(struct nft_ctx *ctx, int msg_type, - struct nft_flowtable *flowtable) +static struct nft_trans * +nft_trans_flowtable_add(struct nft_ctx *ctx, int msg_type, + struct nft_flowtable *flowtable) { struct nft_trans *trans; trans = nft_trans_alloc(ctx, msg_type, sizeof(struct nft_trans_flowtable)); if (trans == NULL) - return -ENOMEM; + return ERR_PTR(-ENOMEM); if (msg_type == NFT_MSG_NEWFLOWTABLE) nft_activate_next(ctx->net, flowtable); @@ -701,22 +702,22 @@ static int nft_trans_flowtable_add(struct nft_ctx *ctx, int msg_type, nft_trans_flowtable(trans) = flowtable; nft_trans_commit_list_add_tail(ctx->net, trans); - return 0; + return trans; } static int nft_delflowtable(struct nft_ctx *ctx, struct nft_flowtable *flowtable) { - int err; + struct nft_trans *trans; - err = nft_trans_flowtable_add(ctx, NFT_MSG_DELFLOWTABLE, flowtable); - if (err < 0) - return err; + trans = nft_trans_flowtable_add(ctx, NFT_MSG_DELFLOWTABLE, flowtable); + if (IS_ERR(trans)) + return PTR_ERR(trans); nft_deactivate_next(ctx->net, flowtable); nft_use_dec(&ctx->table->use); - return err; + return 0; } static void __nft_reg_track_clobber(struct nft_regs_track *track, u8 dreg) @@ -2504,19 +2505,15 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask, RCU_INIT_POINTER(chain->blob_gen_0, blob); RCU_INIT_POINTER(chain->blob_gen_1, blob); - err = nf_tables_register_hook(net, table, chain); - if (err < 0) - goto err_destroy_chain; - if (!nft_use_inc(&table->use)) { err = -EMFILE; - goto err_use; + goto err_destroy_chain; } trans = nft_trans_chain_add(ctx, NFT_MSG_NEWCHAIN); if (IS_ERR(trans)) { err = PTR_ERR(trans); - goto err_unregister_hook; + goto err_trans; } nft_trans_chain_policy(trans) = NFT_CHAIN_POLICY_UNSET; @@ -2524,17 +2521,22 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask, nft_trans_chain_policy(trans) = policy; err = nft_chain_add(table, chain); - if (err < 0) { - nft_trans_destroy(trans); - goto err_unregister_hook; - } + if (err < 0) + goto err_chain_add; + + /* This must be LAST to ensure no packets are walking over this chain. */ + err = nf_tables_register_hook(net, table, chain); + if (err < 0) + goto err_register_hook; return 0; -err_unregister_hook: +err_register_hook: + nft_chain_del(chain); +err_chain_add: + nft_trans_destroy(trans); +err_trans: nft_use_dec_restore(&table->use); -err_use: - nf_tables_unregister_hook(net, table, chain); err_destroy_chain: nf_tables_chain_destroy(ctx); @@ -8456,9 +8458,9 @@ static int nf_tables_newflowtable(struct sk_buff *skb, u8 family = info->nfmsg->nfgen_family; const struct nf_flowtable_type *type; struct nft_flowtable *flowtable; - struct nft_hook *hook, *next; struct net *net = info->net; struct nft_table *table; + struct nft_trans *trans; struct nft_ctx ctx; int err; @@ -8538,34 +8540,34 @@ static int nf_tables_newflowtable(struct sk_buff *skb, err = nft_flowtable_parse_hook(&ctx, nla, &flowtable_hook, flowtable, extack, true); if (err < 0) - goto err4; + goto err_flowtable_parse_hooks; list_splice(&flowtable_hook.list, &flowtable->hook_list); flowtable->data.priority = flowtable_hook.priority; flowtable->hooknum = flowtable_hook.num; + trans = nft_trans_flowtable_add(&ctx, NFT_MSG_NEWFLOWTABLE, flowtable); + if (IS_ERR(trans)) { + err = PTR_ERR(trans); + goto err_flowtable_trans; + } + + /* This must be LAST to ensure no packets are walking over this flowtable. */ err = nft_register_flowtable_net_hooks(ctx.net, table, &flowtable->hook_list, flowtable); - if (err < 0) { - nft_hooks_destroy(&flowtable->hook_list); - goto err4; - } - - err = nft_trans_flowtable_add(&ctx, NFT_MSG_NEWFLOWTABLE, flowtable); if (err < 0) - goto err5; + goto err_flowtable_hooks; list_add_tail_rcu(&flowtable->list, &table->flowtables); return 0; -err5: - list_for_each_entry_safe(hook, next, &flowtable->hook_list, list) { - nft_unregister_flowtable_hook(net, flowtable, hook); - list_del_rcu(&hook->list); - kfree_rcu(hook, rcu); - } -err4: + +err_flowtable_hooks: + nft_trans_destroy(trans); +err_flowtable_trans: + nft_hooks_destroy(&flowtable->hook_list); +err_flowtable_parse_hooks: flowtable->data.type->free(&flowtable->data); err3: module_put(type->owner); From 195e5f88c2e48330ba5483e0bad2de3b3fad484f Mon Sep 17 00:00:00 2001 From: Florian Westphal <fw@strlen.de> Date: Wed, 21 Feb 2024 18:38:45 +0100 Subject: [PATCH 291/304] netfilter: nf_tables: use kzalloc for hook allocation KMSAN reports unitialized variable when registering the hook, reg->hook_ops_type == NF_HOOK_OP_BPF) ~~~~~~~~~~~ undefined This is a small structure, just use kzalloc to make sure this won't happen again when new fields get added to nf_hook_ops. Fixes: 7b4b2fa37587 ("netfilter: annotate nf_tables base hook ops") Signed-off-by: Florian Westphal <fw@strlen.de> Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org> --- net/netfilter/nf_tables_api.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 485e047d5f98..7e938c7397dd 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -2082,7 +2082,7 @@ static struct nft_hook *nft_netdev_hook_alloc(struct net *net, struct nft_hook *hook; int err; - hook = kmalloc(sizeof(struct nft_hook), GFP_KERNEL_ACCOUNT); + hook = kzalloc(sizeof(struct nft_hook), GFP_KERNEL_ACCOUNT); if (!hook) { err = -ENOMEM; goto err_hook_alloc; From 9990889be14288d4f1743e4768222d5032a79c27 Mon Sep 17 00:00:00 2001 From: Jeremy Kerr <jk@codeconstruct.com.au> Date: Thu, 15 Feb 2024 15:53:08 +0800 Subject: [PATCH 292/304] net: mctp: put sock on tag allocation failure We may hold an extra reference on a socket if a tag allocation fails: we optimistically allocate the sk_key, and take a ref there, but do not drop if we end up not using the allocated key. Ensure we're dropping the sock on this failure by doing a proper unref rather than directly kfree()ing. Fixes: de8a6b15d965 ("net: mctp: add an explicit reference from a mctp_sk_key to sock") Signed-off-by: Jeremy Kerr <jk@codeconstruct.com.au> Reviewed-by: Simon Horman <horms@kernel.org> Link: https://lore.kernel.org/r/ce9b61e44d1cdae7797be0c5e3141baf582d23a0.1707983487.git.jk@codeconstruct.com.au Signed-off-by: Jakub Kicinski <kuba@kernel.org> --- net/mctp/route.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mctp/route.c b/net/mctp/route.c index 7a47a58aa54b..6218dcd07e18 100644 --- a/net/mctp/route.c +++ b/net/mctp/route.c @@ -663,7 +663,7 @@ struct mctp_sk_key *mctp_alloc_local_tag(struct mctp_sock *msk, spin_unlock_irqrestore(&mns->keys_lock, flags); if (!tagbits) { - kfree(key); + mctp_key_unref(key); return ERR_PTR(-EBUSY); } From e4fe082c38cd74a8fa384bc7542cf3edf1cb7318 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski <kuba@kernel.org> Date: Tue, 20 Feb 2024 08:11:11 -0800 Subject: [PATCH 293/304] tools: ynl: make sure we always pass yarg to mnl_cb_run There is one common error handler in ynl - ynl_cb_error(). It expects priv to be a pointer to struct ynl_parse_arg AKA yarg. To avoid potential crashes if we encounter a stray NLMSG_ERROR always pass yarg as priv (or a struct which has it as the first member). ynl_cb_null() has a similar problem directly - it expects yarg but priv passed by the caller is ys. Found by code inspection. Fixes: 86878f14d71a ("tools: ynl: user space helpers") Acked-by: Nicolas Dichtel <nicolas.dichtel@6wind.com> Link: https://lore.kernel.org/r/20240220161112.2735195-2-kuba@kernel.org Signed-off-by: Jakub Kicinski <kuba@kernel.org> --- tools/net/ynl/lib/ynl.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/tools/net/ynl/lib/ynl.c b/tools/net/ynl/lib/ynl.c index c82a7f41b31c..9e41c8c0cc99 100644 --- a/tools/net/ynl/lib/ynl.c +++ b/tools/net/ynl/lib/ynl.c @@ -466,6 +466,8 @@ ynl_gemsg_start_dump(struct ynl_sock *ys, __u32 id, __u8 cmd, __u8 version) int ynl_recv_ack(struct ynl_sock *ys, int ret) { + struct ynl_parse_arg yarg = { .ys = ys, }; + if (!ret) { yerr(ys, YNL_ERROR_EXPECT_ACK, "Expecting an ACK but nothing received"); @@ -478,7 +480,7 @@ int ynl_recv_ack(struct ynl_sock *ys, int ret) return ret; } return mnl_cb_run(ys->rx_buf, ret, ys->seq, ys->portid, - ynl_cb_null, ys); + ynl_cb_null, &yarg); } int ynl_cb_null(const struct nlmsghdr *nlh, void *data) @@ -741,11 +743,14 @@ err_free: static int ynl_ntf_trampoline(const struct nlmsghdr *nlh, void *data) { - return ynl_ntf_parse((struct ynl_sock *)data, nlh); + struct ynl_parse_arg *yarg = data; + + return ynl_ntf_parse(yarg->ys, nlh); } int ynl_ntf_check(struct ynl_sock *ys) { + struct ynl_parse_arg yarg = { .ys = ys, }; ssize_t len; int err; @@ -767,7 +772,7 @@ int ynl_ntf_check(struct ynl_sock *ys) return len; err = mnl_cb_run2(ys->rx_buf, len, ys->seq, ys->portid, - ynl_ntf_trampoline, ys, + ynl_ntf_trampoline, &yarg, ynl_cb_array, NLMSG_MIN_TYPE); if (err < 0) return err; From 5d78b73e851455d525a064f3b042b29fdc0c1a4a Mon Sep 17 00:00:00 2001 From: Jakub Kicinski <kuba@kernel.org> Date: Tue, 20 Feb 2024 08:11:12 -0800 Subject: [PATCH 294/304] tools: ynl: don't leak mcast_groups on init error Make sure to free the already-parsed mcast_groups if we don't get an ack from the kernel when reading family info. This is part of the ynl_sock_create() error path, so we won't get a call to ynl_sock_destroy() to free them later. Fixes: 86878f14d71a ("tools: ynl: user space helpers") Acked-by: Nicolas Dichtel <nicolas.dichtel@6wind.com> Link: https://lore.kernel.org/r/20240220161112.2735195-3-kuba@kernel.org Signed-off-by: Jakub Kicinski <kuba@kernel.org> --- tools/net/ynl/lib/ynl.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tools/net/ynl/lib/ynl.c b/tools/net/ynl/lib/ynl.c index 9e41c8c0cc99..6e6d474c8366 100644 --- a/tools/net/ynl/lib/ynl.c +++ b/tools/net/ynl/lib/ynl.c @@ -588,7 +588,13 @@ static int ynl_sock_read_family(struct ynl_sock *ys, const char *family_name) return err; } - return ynl_recv_ack(ys, err); + err = ynl_recv_ack(ys, err); + if (err < 0) { + free(ys->mcast_groups); + return err; + } + + return 0; } struct ynl_sock * From 90d07e36d40079a22ca99edd2412e7e9c2382968 Mon Sep 17 00:00:00 2001 From: Kurt Kanzenbach <kurt@linutronix.de> Date: Tue, 20 Feb 2024 09:22:46 +0100 Subject: [PATCH 295/304] net: stmmac: Fix EST offset for dwmac 5.10 Fix EST offset for dwmac 5.10. Currently configuring Qbv doesn't work as expected. The schedule is configured, but never confirmed: |[ 128.250219] imx-dwmac 428a0000.ethernet eth1: configured EST The reason seems to be the refactoring of the EST code which set the wrong EST offset for the dwmac 5.10. After fixing this it works as before: |[ 106.359577] imx-dwmac 428a0000.ethernet eth1: configured EST |[ 128.430715] imx-dwmac 428a0000.ethernet eth1: EST: SWOL has been switched Tested on imx93. Fixes: c3f3b97238f6 ("net: stmmac: Refactor EST implementation") Signed-off-by: Kurt Kanzenbach <kurt@linutronix.de> Reviewed-by: Serge Semin <fancer.lancer@gmail.com> Link: https://lore.kernel.org/r/20240220-stmmac_est-v1-1-c41f9ae2e7b7@linutronix.de Signed-off-by: Jakub Kicinski <kuba@kernel.org> --- drivers/net/ethernet/stmicro/stmmac/hwif.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.c b/drivers/net/ethernet/stmicro/stmmac/hwif.c index 1bd34b2a47e8..29367105df54 100644 --- a/drivers/net/ethernet/stmicro/stmmac/hwif.c +++ b/drivers/net/ethernet/stmicro/stmmac/hwif.c @@ -224,7 +224,7 @@ static const struct stmmac_hwif_entry { .regs = { .ptp_off = PTP_GMAC4_OFFSET, .mmc_off = MMC_GMAC4_OFFSET, - .est_off = EST_XGMAC_OFFSET, + .est_off = EST_GMAC4_OFFSET, }, .desc = &dwmac4_desc_ops, .dma = &dwmac410_dma_ops, From 61c43780e9444123410cd48c2483e01d2b8f75e8 Mon Sep 17 00:00:00 2001 From: Jiri Pirko <jiri@nvidia.com> Date: Tue, 20 Feb 2024 08:52:45 +0100 Subject: [PATCH 296/304] devlink: fix port dump cmd type Unlike other commands, due to a c&p error, port dump fills-up cmd with wrong value, different from port-get request cmd, port-get doit reply and port notification. Fix it by filling cmd with value DEVLINK_CMD_PORT_NEW. Skimmed through devlink userspace implementations, none of them cares about this cmd value. Only ynl, for which, this is actually a fix, as it expects doit and dumpit ops rsp_value to be the same. Omit the fixes tag, even thought this is fix, better to target this for next release. Fixes: bfcd3a466172 ("Introduce devlink infrastructure") Signed-off-by: Jiri Pirko <jiri@nvidia.com> Reviewed-by: Simon Horman <horms@kernel.org> Reviewed-by: Jakub Kicinski <kuba@kernel.org> Link: https://lore.kernel.org/r/20240220075245.75416-1-jiri@resnulli.us Signed-off-by: Jakub Kicinski <kuba@kernel.org> --- net/devlink/port.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/devlink/port.c b/net/devlink/port.c index 78592912f657..4b2d46ccfe48 100644 --- a/net/devlink/port.c +++ b/net/devlink/port.c @@ -583,7 +583,7 @@ devlink_nl_port_get_dump_one(struct sk_buff *msg, struct devlink *devlink, xa_for_each_start(&devlink->ports, port_index, devlink_port, state->idx) { err = devlink_nl_port_fill(msg, devlink_port, - DEVLINK_CMD_NEW, + DEVLINK_CMD_PORT_NEW, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, flags, cb->extack); From 1fde0ca3a0de7e9f917668941156959dd5e9108b Mon Sep 17 00:00:00 2001 From: Jianbo Liu <jianbol@nvidia.com> Date: Tue, 20 Feb 2024 08:59:28 +0000 Subject: [PATCH 297/304] net/sched: flower: Add lock protection when remove filter handle As IDR can't protect itself from the concurrent modification, place idr_remove() under the protection of tp->lock. Fixes: 08a0063df3ae ("net/sched: flower: Move filter handle initialization earlier") Signed-off-by: Jianbo Liu <jianbol@nvidia.com> Reviewed-by: Cosmin Ratiu <cratiu@nvidia.com> Reviewed-by: Gal Pressman <gal@nvidia.com> Reviewed-by: Jiri Pirko <jiri@nvidia.com> Acked-by: Jamal Hadi Salim <jhs@mojatatu.com> Link: https://lore.kernel.org/r/20240220085928.9161-1-jianbol@nvidia.com Signed-off-by: Jakub Kicinski <kuba@kernel.org> --- net/sched/cls_flower.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index efb9d2811b73..6ee7064c82fc 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -2460,8 +2460,11 @@ unbind_filter: } errout_idr: - if (!fold) + if (!fold) { + spin_lock(&tp->lock); idr_remove(&head->handle_idr, fnew->handle); + spin_unlock(&tp->lock); + } __fl_put(fnew); errout_tb: kfree(tb); From 603ead96582d85903baec2d55f021b8dac5c25d2 Mon Sep 17 00:00:00 2001 From: Horatiu Vultur <horatiu.vultur@microchip.com> Date: Mon, 19 Feb 2024 09:00:43 +0100 Subject: [PATCH 298/304] net: sparx5: Add spinlock for frame transmission from CPU Both registers used when doing manual injection or fdma injection are shared between all the net devices of the switch. It was noticed that when having two process which each of them trying to inject frames on different ethernet ports, that the HW started to behave strange, by sending out more frames then expected. When doing fdma injection it is required to set the frame in the DCB and then make sure that the next pointer of the last DCB is invalid. But because there is no locks for this, then easily this pointer between the DCB can be broken and then it would create a loop of DCBs. And that means that the HW will continuously transmit these frames in a loop. Until the SW will break this loop. Therefore to fix this issue, add a spin lock for when accessing the registers for manual or fdma injection. Signed-off-by: Horatiu Vultur <horatiu.vultur@microchip.com> Reviewed-by: Daniel Machon <daniel.machon@microchip.com> Fixes: f3cad2611a77 ("net: sparx5: add hostmode with phylink support") Link: https://lore.kernel.org/r/20240219080043.1561014-1-horatiu.vultur@microchip.com Signed-off-by: Jakub Kicinski <kuba@kernel.org> --- drivers/net/ethernet/microchip/sparx5/sparx5_main.c | 1 + drivers/net/ethernet/microchip/sparx5/sparx5_main.h | 1 + drivers/net/ethernet/microchip/sparx5/sparx5_packet.c | 2 ++ 3 files changed, 4 insertions(+) diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_main.c b/drivers/net/ethernet/microchip/sparx5/sparx5_main.c index d1f7fc8b1b71..3c066b62e689 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_main.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_main.c @@ -757,6 +757,7 @@ static int mchp_sparx5_probe(struct platform_device *pdev) platform_set_drvdata(pdev, sparx5); sparx5->pdev = pdev; sparx5->dev = &pdev->dev; + spin_lock_init(&sparx5->tx_lock); /* Do switch core reset if available */ reset = devm_reset_control_get_optional_shared(&pdev->dev, "switch"); diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_main.h b/drivers/net/ethernet/microchip/sparx5/sparx5_main.h index 6f565c0c0c3d..316fed5f2735 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_main.h +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_main.h @@ -280,6 +280,7 @@ struct sparx5 { int xtr_irq; /* Frame DMA */ int fdma_irq; + spinlock_t tx_lock; /* lock for frame transmission */ struct sparx5_rx rx; struct sparx5_tx tx; /* PTP */ diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_packet.c b/drivers/net/ethernet/microchip/sparx5/sparx5_packet.c index 6db6ac6a3bbc..ac7e1cffbcec 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_packet.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_packet.c @@ -244,10 +244,12 @@ netdev_tx_t sparx5_port_xmit_impl(struct sk_buff *skb, struct net_device *dev) } skb_tx_timestamp(skb); + spin_lock(&sparx5->tx_lock); if (sparx5->fdma_irq > 0) ret = sparx5_fdma_xmit(sparx5, ifh, skb); else ret = sparx5_inject(sparx5, ifh, skb, dev); + spin_unlock(&sparx5->tx_lock); if (ret == -EBUSY) goto busy; From 3b2d9bc4d4acdf15a876eae2c0d83149250e85ba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Denis-Courmont?= <courmisch@gmail.com> Date: Sun, 18 Feb 2024 10:12:13 +0200 Subject: [PATCH 299/304] phonet: take correct lock to peek at the RX queue MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The receive queue is protected by its embedded spin-lock, not the socket lock, so we need the former lock here (and only that one). Fixes: 107d0d9b8d9a ("Phonet: Phonet datagram transport protocol") Reported-by: Luosili <rootlab@huawei.com> Signed-off-by: Rémi Denis-Courmont <courmisch@gmail.com> Reviewed-by: Eric Dumazet <edumazet@google.com> Link: https://lore.kernel.org/r/20240218081214.4806-1-remi@remlab.net Signed-off-by: Paolo Abeni <pabeni@redhat.com> --- net/phonet/datagram.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/phonet/datagram.c b/net/phonet/datagram.c index 3aa50dc7535b..976fe250b509 100644 --- a/net/phonet/datagram.c +++ b/net/phonet/datagram.c @@ -34,10 +34,10 @@ static int pn_ioctl(struct sock *sk, int cmd, int *karg) switch (cmd) { case SIOCINQ: - lock_sock(sk); + spin_lock_bh(&sk->sk_receive_queue.lock); skb = skb_peek(&sk->sk_receive_queue); *karg = skb ? skb->len : 0; - release_sock(sk); + spin_unlock_bh(&sk->sk_receive_queue.lock); return 0; case SIOCPNADDRESOURCE: From 7d2a894d7f487dcb894df023e9d3014cf5b93fe5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Denis-Courmont?= <courmisch@gmail.com> Date: Sun, 18 Feb 2024 10:12:14 +0200 Subject: [PATCH 300/304] phonet/pep: fix racy skb_queue_empty() use MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The receive queues are protected by their respective spin-lock, not the socket lock. This could lead to skb_peek() unexpectedly returning NULL or a pointer to an already dequeued socket buffer. Fixes: 9641458d3ec4 ("Phonet: Pipe End Point for Phonet Pipes protocol") Signed-off-by: Rémi Denis-Courmont <courmisch@gmail.com> Link: https://lore.kernel.org/r/20240218081214.4806-2-remi@remlab.net Signed-off-by: Paolo Abeni <pabeni@redhat.com> --- net/phonet/pep.c | 41 ++++++++++++++++++++++++++++++++--------- 1 file changed, 32 insertions(+), 9 deletions(-) diff --git a/net/phonet/pep.c b/net/phonet/pep.c index faba31f2eff2..3dd5f52bc1b5 100644 --- a/net/phonet/pep.c +++ b/net/phonet/pep.c @@ -917,6 +917,37 @@ static int pep_sock_enable(struct sock *sk, struct sockaddr *addr, int len) return 0; } +static unsigned int pep_first_packet_length(struct sock *sk) +{ + struct pep_sock *pn = pep_sk(sk); + struct sk_buff_head *q; + struct sk_buff *skb; + unsigned int len = 0; + bool found = false; + + if (sock_flag(sk, SOCK_URGINLINE)) { + q = &pn->ctrlreq_queue; + spin_lock_bh(&q->lock); + skb = skb_peek(q); + if (skb) { + len = skb->len; + found = true; + } + spin_unlock_bh(&q->lock); + } + + if (likely(!found)) { + q = &sk->sk_receive_queue; + spin_lock_bh(&q->lock); + skb = skb_peek(q); + if (skb) + len = skb->len; + spin_unlock_bh(&q->lock); + } + + return len; +} + static int pep_ioctl(struct sock *sk, int cmd, int *karg) { struct pep_sock *pn = pep_sk(sk); @@ -929,15 +960,7 @@ static int pep_ioctl(struct sock *sk, int cmd, int *karg) break; } - lock_sock(sk); - if (sock_flag(sk, SOCK_URGINLINE) && - !skb_queue_empty(&pn->ctrlreq_queue)) - *karg = skb_peek(&pn->ctrlreq_queue)->len; - else if (!skb_queue_empty(&sk->sk_receive_queue)) - *karg = skb_peek(&sk->sk_receive_queue)->len; - else - *karg = 0; - release_sock(sk); + *karg = pep_first_packet_length(sk); ret = 0; break; From f198d933c2e4f8f89e0620fbaf1ea7eac384a0eb Mon Sep 17 00:00:00 2001 From: Justin Iurman <justin.iurman@uliege.be> Date: Mon, 19 Feb 2024 14:52:54 +0100 Subject: [PATCH 301/304] Fix write to cloned skb in ipv6_hop_ioam() ioam6_fill_trace_data() writes inside the skb payload without ensuring it's writeable (e.g., not cloned). This function is called both from the input and output path. The output path (ioam6_iptunnel) already does the check. This commit provides a fix for the input path, inside ipv6_hop_ioam(). It also updates ip6_parse_tlv() to refresh the network header pointer ("nh") when returning from ipv6_hop_ioam(). Fixes: 9ee11f0fff20 ("ipv6: ioam: Data plane support for Pre-allocated Trace") Reported-by: Paolo Abeni <pabeni@redhat.com> Signed-off-by: Justin Iurman <justin.iurman@uliege.be> Signed-off-by: Paolo Abeni <pabeni@redhat.com> --- net/ipv6/exthdrs.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index 4952ae792450..02e9ffb63af1 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -177,6 +177,8 @@ static bool ip6_parse_tlv(bool hopbyhop, case IPV6_TLV_IOAM: if (!ipv6_hop_ioam(skb, off)) return false; + + nh = skb_network_header(skb); break; case IPV6_TLV_JUMBO: if (!ipv6_hop_jumbo(skb, off)) @@ -943,6 +945,14 @@ static bool ipv6_hop_ioam(struct sk_buff *skb, int optoff) if (!skb_valid_dst(skb)) ip6_route_input(skb); + /* About to mangle packet header */ + if (skb_ensure_writable(skb, optoff + 2 + hdr->opt_len)) + goto drop; + + /* Trace pointer may have changed */ + trace = (struct ioam6_trace_hdr *)(skb_network_header(skb) + + optoff + sizeof(*hdr)); + ioam6_fill_trace_data(skb, ns, trace, true); break; default: From 187bbb6968af5400e436c193765c5d845a07364f Mon Sep 17 00:00:00 2001 From: Justin Iurman <justin.iurman@uliege.be> Date: Mon, 19 Feb 2024 14:52:55 +0100 Subject: [PATCH 302/304] selftests: ioam: refactoring to align with the fix ioam6_parser uses a packet socket. After the fix to prevent writing to cloned skb's, the receiver does not see its IOAM data anymore, which makes input/forward ioam-selftests to fail. As a workaround, ioam6_parser now uses an IPv6 raw socket and leverages ancillary data to get hop-by-hop options. As a consequence, the hook is "after" the IOAM data insertion by the receiver and all tests are working again. Signed-off-by: Justin Iurman <justin.iurman@uliege.be> Signed-off-by: Paolo Abeni <pabeni@redhat.com> --- tools/testing/selftests/net/ioam6.sh | 36 ++++---- tools/testing/selftests/net/ioam6_parser.c | 95 +++++++++++----------- 2 files changed, 65 insertions(+), 66 deletions(-) diff --git a/tools/testing/selftests/net/ioam6.sh b/tools/testing/selftests/net/ioam6.sh index fe59ca3e5596..12491850ae98 100755 --- a/tools/testing/selftests/net/ioam6.sh +++ b/tools/testing/selftests/net/ioam6.sh @@ -367,14 +367,12 @@ run_test() local desc=$2 local node_src=$3 local node_dst=$4 - local ip6_src=$5 - local ip6_dst=$6 - local if_dst=$7 - local trace_type=$8 - local ioam_ns=$9 + local ip6_dst=$5 + local trace_type=$6 + local ioam_ns=$7 + local type=$8 - ip netns exec $node_dst ./ioam6_parser $if_dst $name $ip6_src $ip6_dst \ - $trace_type $ioam_ns & + ip netns exec $node_dst ./ioam6_parser $name $trace_type $ioam_ns $type & local spid=$! sleep 0.1 @@ -489,7 +487,7 @@ out_undef_ns() trace prealloc type 0x800000 ns 0 size 4 dev veth0 run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_beta \ - db01::2 db01::1 veth0 0x800000 0 + db01::1 0x800000 0 $1 [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down } @@ -509,7 +507,7 @@ out_no_room() trace prealloc type 0xc00000 ns 123 size 4 dev veth0 run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_beta \ - db01::2 db01::1 veth0 0xc00000 123 + db01::1 0xc00000 123 $1 [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down } @@ -543,14 +541,14 @@ out_bits() if [ $cmd_res != 0 ] then npassed=$((npassed+1)) - log_test_passed "$descr" + log_test_passed "$descr ($1 mode)" else nfailed=$((nfailed+1)) - log_test_failed "$descr" + log_test_failed "$descr ($1 mode)" fi else run_test "out_bit$i" "$descr ($1 mode)" $ioam_node_alpha \ - $ioam_node_beta db01::2 db01::1 veth0 ${bit2type[$i]} 123 + $ioam_node_beta db01::1 ${bit2type[$i]} 123 $1 fi done @@ -574,7 +572,7 @@ out_full_supp_trace() trace prealloc type 0xfff002 ns 123 size 100 dev veth0 run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_beta \ - db01::2 db01::1 veth0 0xfff002 123 + db01::1 0xfff002 123 $1 [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down } @@ -604,7 +602,7 @@ in_undef_ns() trace prealloc type 0x800000 ns 0 size 4 dev veth0 run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_beta \ - db01::2 db01::1 veth0 0x800000 0 + db01::1 0x800000 0 $1 [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down } @@ -624,7 +622,7 @@ in_no_room() trace prealloc type 0xc00000 ns 123 size 4 dev veth0 run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_beta \ - db01::2 db01::1 veth0 0xc00000 123 + db01::1 0xc00000 123 $1 [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down } @@ -651,7 +649,7 @@ in_bits() dev veth0 run_test "in_bit$i" "${desc/<n>/$i} ($1 mode)" $ioam_node_alpha \ - $ioam_node_beta db01::2 db01::1 veth0 ${bit2type[$i]} 123 + $ioam_node_beta db01::1 ${bit2type[$i]} 123 $1 done [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down @@ -679,7 +677,7 @@ in_oflag() trace prealloc type 0xc00000 ns 123 size 4 dev veth0 run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_beta \ - db01::2 db01::1 veth0 0xc00000 123 + db01::1 0xc00000 123 $1 [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down @@ -703,7 +701,7 @@ in_full_supp_trace() trace prealloc type 0xfff002 ns 123 size 80 dev veth0 run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_beta \ - db01::2 db01::1 veth0 0xfff002 123 + db01::1 0xfff002 123 $1 [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down } @@ -731,7 +729,7 @@ fwd_full_supp_trace() trace prealloc type 0xfff002 ns 123 size 244 via db01::1 dev veth0 run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_gamma \ - db01::2 db02::2 veth0 0xfff002 123 + db02::2 0xfff002 123 $1 [ "$1" = "encap" ] && ip -netns $ioam_node_gamma link set ip6tnl0 down } diff --git a/tools/testing/selftests/net/ioam6_parser.c b/tools/testing/selftests/net/ioam6_parser.c index d9d1d4190126..895e5bb5044b 100644 --- a/tools/testing/selftests/net/ioam6_parser.c +++ b/tools/testing/selftests/net/ioam6_parser.c @@ -8,7 +8,6 @@ #include <errno.h> #include <limits.h> #include <linux/const.h> -#include <linux/if_ether.h> #include <linux/ioam6.h> #include <linux/ipv6.h> #include <stdlib.h> @@ -512,14 +511,6 @@ static int str2id(const char *tname) return -1; } -static int ipv6_addr_equal(const struct in6_addr *a1, const struct in6_addr *a2) -{ - return ((a1->s6_addr32[0] ^ a2->s6_addr32[0]) | - (a1->s6_addr32[1] ^ a2->s6_addr32[1]) | - (a1->s6_addr32[2] ^ a2->s6_addr32[2]) | - (a1->s6_addr32[3] ^ a2->s6_addr32[3])) == 0; -} - static int get_u32(__u32 *val, const char *arg, int base) { unsigned long res; @@ -603,70 +594,80 @@ static int (*func[__TEST_MAX])(int, struct ioam6_trace_hdr *, __u32, __u16) = { int main(int argc, char **argv) { - int fd, size, hoplen, tid, ret = 1; - struct in6_addr src, dst; + int fd, size, hoplen, tid, ret = 1, on = 1; struct ioam6_hdr *opt; - struct ipv6hdr *ip6h; - __u8 buffer[400], *p; - __u16 ioam_ns; + struct cmsghdr *cmsg; + struct msghdr msg; + struct iovec iov; + __u8 buffer[512]; __u32 tr_type; + __u16 ioam_ns; + __u8 *ptr; - if (argc != 7) + if (argc != 5) goto out; - tid = str2id(argv[2]); + tid = str2id(argv[1]); if (tid < 0 || !func[tid]) goto out; - if (inet_pton(AF_INET6, argv[3], &src) != 1 || - inet_pton(AF_INET6, argv[4], &dst) != 1) + if (get_u32(&tr_type, argv[2], 16) || + get_u16(&ioam_ns, argv[3], 0)) goto out; - if (get_u32(&tr_type, argv[5], 16) || - get_u16(&ioam_ns, argv[6], 0)) + fd = socket(PF_INET6, SOCK_RAW, + !strcmp(argv[4], "encap") ? IPPROTO_IPV6 : IPPROTO_ICMPV6); + if (fd < 0) goto out; - fd = socket(AF_PACKET, SOCK_DGRAM, __cpu_to_be16(ETH_P_IPV6)); - if (!fd) - goto out; + setsockopt(fd, IPPROTO_IPV6, IPV6_RECVHOPOPTS, &on, sizeof(on)); - if (setsockopt(fd, SOL_SOCKET, SO_BINDTODEVICE, - argv[1], strlen(argv[1]))) + iov.iov_len = 1; + iov.iov_base = malloc(CMSG_SPACE(sizeof(buffer))); + if (!iov.iov_base) goto close; - recv: - size = recv(fd, buffer, sizeof(buffer), 0); + memset(&msg, 0, sizeof(msg)); + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + msg.msg_control = buffer; + msg.msg_controllen = CMSG_SPACE(sizeof(buffer)); + + size = recvmsg(fd, &msg, 0); if (size <= 0) goto close; - ip6h = (struct ipv6hdr *)buffer; + for (cmsg = CMSG_FIRSTHDR(&msg); cmsg; cmsg = CMSG_NXTHDR(&msg, cmsg)) { + if (cmsg->cmsg_level != IPPROTO_IPV6 || + cmsg->cmsg_type != IPV6_HOPOPTS || + cmsg->cmsg_len < sizeof(struct ipv6_hopopt_hdr)) + continue; - if (!ipv6_addr_equal(&ip6h->saddr, &src) || - !ipv6_addr_equal(&ip6h->daddr, &dst)) - goto recv; + ptr = (__u8 *)CMSG_DATA(cmsg); - if (ip6h->nexthdr != IPPROTO_HOPOPTS) - goto close; + hoplen = (ptr[1] + 1) << 3; + ptr += sizeof(struct ipv6_hopopt_hdr); - p = buffer + sizeof(*ip6h); - hoplen = (p[1] + 1) << 3; - p += sizeof(struct ipv6_hopopt_hdr); + while (hoplen > 0) { + opt = (struct ioam6_hdr *)ptr; - while (hoplen > 0) { - opt = (struct ioam6_hdr *)p; + if (opt->opt_type == IPV6_TLV_IOAM && + opt->type == IOAM6_TYPE_PREALLOC) { + ptr += sizeof(*opt); + ret = func[tid](tid, + (struct ioam6_trace_hdr *)ptr, + tr_type, ioam_ns); + goto close; + } - if (opt->opt_type == IPV6_TLV_IOAM && - opt->type == IOAM6_TYPE_PREALLOC) { - p += sizeof(*opt); - ret = func[tid](tid, (struct ioam6_trace_hdr *)p, - tr_type, ioam_ns); - break; + ptr += opt->opt_len + 2; + hoplen -= opt->opt_len + 2; } - - p += opt->opt_len + 2; - hoplen -= opt->opt_len + 2; } + + goto recv; close: + free(iov.iov_base); close(fd); out: return ret; From 3489182b11d35f1944c1245fc9c4867cf622c50f Mon Sep 17 00:00:00 2001 From: Siddharth Vadapalli <s-vadapalli@ti.com> Date: Tue, 20 Feb 2024 12:30:07 +0530 Subject: [PATCH 303/304] net: phy: realtek: Fix rtl8211f_config_init() for RTL8211F(D)(I)-VD-CG PHY Commit bb726b753f75 ("net: phy: realtek: add support for RTL8211F(D)(I)-VD-CG") extended support of the driver from the existing support for RTL8211F(D)(I)-CG PHY to the newer RTL8211F(D)(I)-VD-CG PHY. While that commit indicated that the RTL8211F_PHYCR2 register is not supported by the "VD-CG" PHY model and therefore updated the corresponding section in rtl8211f_config_init() to be invoked conditionally, the call to "genphy_soft_reset()" was left as-is, when it should have also been invoked conditionally. This is because the call to "genphy_soft_reset()" was first introduced by the commit 0a4355c2b7f8 ("net: phy: realtek: add dt property to disable CLKOUT clock") since the RTL8211F guide indicates that a PHY reset should be issued after setting bits in the PHYCR2 register. As the PHYCR2 register is not applicable to the "VD-CG" PHY model, fix the rtl8211f_config_init() function by invoking "genphy_soft_reset()" conditionally based on the presence of the "PHYCR2" register. Fixes: bb726b753f75 ("net: phy: realtek: add support for RTL8211F(D)(I)-VD-CG") Signed-off-by: Siddharth Vadapalli <s-vadapalli@ti.com> Reviewed-by: Simon Horman <horms@kernel.org> Link: https://lore.kernel.org/r/20240220070007.968762-1-s-vadapalli@ti.com Signed-off-by: Paolo Abeni <pabeni@redhat.com> --- drivers/net/phy/realtek.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/phy/realtek.c b/drivers/net/phy/realtek.c index 894172a3e15f..337899c69738 100644 --- a/drivers/net/phy/realtek.c +++ b/drivers/net/phy/realtek.c @@ -421,9 +421,11 @@ static int rtl8211f_config_init(struct phy_device *phydev) ERR_PTR(ret)); return ret; } + + return genphy_soft_reset(phydev); } - return genphy_soft_reset(phydev); + return 0; } static int rtl821x_suspend(struct phy_device *phydev) From 359e54a93ab43d32ee1bff3c2f9f10cb9f6b6e79 Mon Sep 17 00:00:00 2001 From: Tom Parkin <tparkin@katalix.com> Date: Tue, 20 Feb 2024 12:21:56 +0000 Subject: [PATCH 304/304] l2tp: pass correct message length to ip6_append_data l2tp_ip6_sendmsg needs to avoid accounting for the transport header twice when splicing more data into an already partially-occupied skbuff. To manage this, we check whether the skbuff contains data using skb_queue_empty when deciding how much data to append using ip6_append_data. However, the code which performed the calculation was incorrect: ulen = len + skb_queue_empty(&sk->sk_write_queue) ? transhdrlen : 0; ...due to C operator precedence, this ends up setting ulen to transhdrlen for messages with a non-zero length, which results in corrupted packets on the wire. Add parentheses to correct the calculation in line with the original intent. Fixes: 9d4c75800f61 ("ipv4, ipv6: Fix handling of transhdrlen in __ip{,6}_append_data()") Cc: David Howells <dhowells@redhat.com> Cc: stable@vger.kernel.org Signed-off-by: Tom Parkin <tparkin@katalix.com> Reviewed-by: Simon Horman <horms@kernel.org> Link: https://lore.kernel.org/r/20240220122156.43131-1-tparkin@katalix.com Signed-off-by: Paolo Abeni <pabeni@redhat.com> --- net/l2tp/l2tp_ip6.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index dd3153966173..7bf14cf9ffaa 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -627,7 +627,7 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) back_from_confirm: lock_sock(sk); - ulen = len + skb_queue_empty(&sk->sk_write_queue) ? transhdrlen : 0; + ulen = len + (skb_queue_empty(&sk->sk_write_queue) ? transhdrlen : 0); err = ip6_append_data(sk, ip_generic_getfrag, msg, ulen, transhdrlen, &ipc6, &fl6, (struct rt6_info *)dst,