From 1982a2a02c9197436d4a8ea12f66bafab53f16a0 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Wed, 3 Jan 2024 16:06:32 +0100 Subject: [PATCH 001/225] xfrm: Clear low order bits of ->flowi4_tos in decode_session4(). Commit 23e7b1bfed61 ("xfrm: Don't accidentally set RTO_ONLINK in decode_session4()") fixed a problem where decode_session4() could erroneously set the RTO_ONLINK flag for IPv4 route lookups. This problem was reintroduced when decode_session4() was modified to use the flow dissector. Fix this by clearing again the two low order bits of ->flowi4_tos. Found by code inspection, compile tested only. Fixes: 7a0207094f1b ("xfrm: policy: replace session decode with flow dissector") Signed-off-by: Guillaume Nault Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_policy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 1b7e75159727..7351f32052dc 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -3416,7 +3416,7 @@ decode_session4(const struct xfrm_flow_keys *flkeys, struct flowi *fl, bool reve } fl4->flowi4_proto = flkeys->basic.ip_proto; - fl4->flowi4_tos = flkeys->ip.tos; + fl4->flowi4_tos = flkeys->ip.tos & ~INET_ECN_MASK; } #if IS_ENABLED(CONFIG_IPV6) From 8246601a7d391ce8207408149d65732f28af81a1 Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Wed, 20 Dec 2023 01:50:43 +0800 Subject: [PATCH 002/225] riscv: tlb: fix __p*d_free_tlb() If non-leaf PTEs I.E pmd, pud or p4d is modified, a sfence.vma is a must for safe, imagine if an implementation caches the non-leaf translation in TLB, although I didn't meet this HW so far, but it's possible in theory. Signed-off-by: Jisheng Zhang Fixes: c5e9b2c2ae82 ("riscv: Improve tlb_flush()") Link: https://lore.kernel.org/r/20231219175046.2496-2-jszhang@kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/pgalloc.h | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/arch/riscv/include/asm/pgalloc.h b/arch/riscv/include/asm/pgalloc.h index d169a4f41a2e..c80bb9990d32 100644 --- a/arch/riscv/include/asm/pgalloc.h +++ b/arch/riscv/include/asm/pgalloc.h @@ -95,7 +95,13 @@ static inline void pud_free(struct mm_struct *mm, pud_t *pud) __pud_free(mm, pud); } -#define __pud_free_tlb(tlb, pud, addr) pud_free((tlb)->mm, pud) +#define __pud_free_tlb(tlb, pud, addr) \ +do { \ + if (pgtable_l4_enabled) { \ + pagetable_pud_dtor(virt_to_ptdesc(pud)); \ + tlb_remove_page_ptdesc((tlb), virt_to_ptdesc(pud)); \ + } \ +} while (0) #define p4d_alloc_one p4d_alloc_one static inline p4d_t *p4d_alloc_one(struct mm_struct *mm, unsigned long addr) @@ -124,7 +130,11 @@ static inline void p4d_free(struct mm_struct *mm, p4d_t *p4d) __p4d_free(mm, p4d); } -#define __p4d_free_tlb(tlb, p4d, addr) p4d_free((tlb)->mm, p4d) +#define __p4d_free_tlb(tlb, p4d, addr) \ +do { \ + if (pgtable_l5_enabled) \ + tlb_remove_page_ptdesc((tlb), virt_to_ptdesc(p4d)); \ +} while (0) #endif /* __PAGETABLE_PMD_FOLDED */ static inline void sync_kernel_mappings(pgd_t *pgd) @@ -149,7 +159,11 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm) #ifndef __PAGETABLE_PMD_FOLDED -#define __pmd_free_tlb(tlb, pmd, addr) pmd_free((tlb)->mm, pmd) +#define __pmd_free_tlb(tlb, pmd, addr) \ +do { \ + pagetable_pmd_dtor(virt_to_ptdesc(pmd)); \ + tlb_remove_page_ptdesc((tlb), virt_to_ptdesc(pmd)); \ +} while (0) #endif /* __PAGETABLE_PMD_FOLDED */ From 983a73da1f996faee9997149eb05b12fa7bd8cbf Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Wed, 24 Jan 2024 00:13:54 -0800 Subject: [PATCH 003/225] xfrm: Pass UDP encapsulation in TX packet offload In addition to citied commit in Fixes line, allow UDP encapsulation in TX path too. Fixes: 89edf40220be ("xfrm: Support UDP encapsulation in packet offload mode") CC: Steffen Klassert Reported-by: Mike Yu Signed-off-by: Leon Romanovsky Signed-off-by: Saeed Mahameed Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c index 3784534c9185..653e51ae3964 100644 --- a/net/xfrm/xfrm_device.c +++ b/net/xfrm/xfrm_device.c @@ -407,7 +407,7 @@ bool xfrm_dev_offload_ok(struct sk_buff *skb, struct xfrm_state *x) struct xfrm_dst *xdst = (struct xfrm_dst *)dst; struct net_device *dev = x->xso.dev; - if (!x->type_offload || x->encap) + if (!x->type_offload) return false; if (x->xso.type == XFRM_DEV_OFFLOAD_PACKET || From 361bb7c961403173be109d8892f3c23096dc098d Mon Sep 17 00:00:00 2001 From: Neil Armstrong Date: Thu, 11 Jan 2024 17:58:49 +0100 Subject: [PATCH 004/225] arm64: dts: qcom: sm8650-qrd: add gpio74 as reserved gpio The TLMM gpio74 is also used to communicate with the secure NFC on-board module, some variants of the SM8650-QRD board requires this GPIO to be dedicated to the secure firmware and set reserved in order to successfully initialize the TLMM GPIOs from HLOS (Linux). On the other boards this GPIO is unused so it's still safe to mark the GPIO as reserved. Fixes: a834911d50c1 ("arm64: dts: qcom: sm8650: add initial SM8650 QRD dts") Reported-by: Georgi Djakov Signed-off-by: Neil Armstrong Reviewed-by: Konrad Dybcio Reviewed-by: Elliot Berman Link: https://lore.kernel.org/r/20240111-topic-sm8650-upstream-qrd-fix-gpio-reserved-v1-1-fad39b4c5def@linaro.org Signed-off-by: Bjorn Andersson --- arch/arm64/boot/dts/qcom/sm8650-qrd.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/qcom/sm8650-qrd.dts b/arch/arm64/boot/dts/qcom/sm8650-qrd.dts index 592a67a47c78..b9151c2ddf2e 100644 --- a/arch/arm64/boot/dts/qcom/sm8650-qrd.dts +++ b/arch/arm64/boot/dts/qcom/sm8650-qrd.dts @@ -659,7 +659,7 @@ &tlmm { /* Reserved I/Os for NFC */ - gpio-reserved-ranges = <32 8>; + gpio-reserved-ranges = <32 8>, <74 1>; bt_default: bt-default-state { bt-en-pins { From df77288f7e3accf246785c53cd5f117fc5d81611 Mon Sep 17 00:00:00 2001 From: Neil Armstrong Date: Thu, 11 Jan 2024 17:58:50 +0100 Subject: [PATCH 005/225] arm64: dts: qcom: sm8650-mtp: add gpio74 as reserved gpio The TLMM gpio74 is also used to communicate with the secure NFC on-board module, some variants of the SM8650-MTP board requires this GPIO to be dedicated to the secure firmware and set reserved in order to successfully initialize the TLMM GPIOs from HLOS (Linux). On the other boards this GPIO is unused so it's still safe to mark the GPIO as reserved. Fixes: 6fbdb3c1fac7 ("arm64: dts: qcom: sm8650: add initial SM8650 MTP dts") Reported-by: Georgi Djakov Signed-off-by: Neil Armstrong Reviewed-by: Konrad Dybcio Tested-by: Georgi Djakov Reviewed-by: Elliot Berman Link: https://lore.kernel.org/r/20240111-topic-sm8650-upstream-qrd-fix-gpio-reserved-v1-2-fad39b4c5def@linaro.org Signed-off-by: Bjorn Andersson --- arch/arm64/boot/dts/qcom/sm8650-mtp.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/qcom/sm8650-mtp.dts b/arch/arm64/boot/dts/qcom/sm8650-mtp.dts index 9d916edb1c73..be133a3d5cbe 100644 --- a/arch/arm64/boot/dts/qcom/sm8650-mtp.dts +++ b/arch/arm64/boot/dts/qcom/sm8650-mtp.dts @@ -622,7 +622,7 @@ &tlmm { /* Reserved I/Os for NFC */ - gpio-reserved-ranges = <32 8>; + gpio-reserved-ranges = <32 8>, <74 1>; disp0_reset_n_active: disp0-reset-n-active-state { pins = "gpio133"; From cb0bbdc4cc327ee91ba21ff744adbe07885db2b8 Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Fri, 2 Feb 2024 00:29:58 +0100 Subject: [PATCH 006/225] arm64: dts: qcom: sm6115: Fix missing interconnect-names Commit b3eaa47395b9 ("arm64: dts: qcom: sm6115: Hook up interconnects") did indeed hook up interconnects, but apparently not interconnect-names on I2C1, making it return -EINVAL due to an error getting icc paths.. Fix it! Fixes: b3eaa47395b9 ("arm64: dts: qcom: sm6115: Hook up interconnects") Signed-off-by: Konrad Dybcio Link: https://lore.kernel.org/r/20240202-topic-6115_i2c-v1-1-ecfe06f5f2ef@linaro.org Signed-off-by: Bjorn Andersson --- arch/arm64/boot/dts/qcom/sm6115.dtsi | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arm64/boot/dts/qcom/sm6115.dtsi b/arch/arm64/boot/dts/qcom/sm6115.dtsi index 160e098f1075..f9849b8befbf 100644 --- a/arch/arm64/boot/dts/qcom/sm6115.dtsi +++ b/arch/arm64/boot/dts/qcom/sm6115.dtsi @@ -1304,6 +1304,9 @@ &config_noc SLAVE_QUP_0 RPM_ALWAYS_TAG>, <&system_noc MASTER_QUP_0 RPM_ALWAYS_TAG &bimc SLAVE_EBI_CH0 RPM_ALWAYS_TAG>; + interconnect-names = "qup-core", + "qup-config", + "qup-memory"; #address-cells = <1>; #size-cells = <0>; status = "disabled"; From cd665bfc757c71e9b7e0abff0f362d8abd38a805 Mon Sep 17 00:00:00 2001 From: Kory Maincent Date: Mon, 29 Jan 2024 17:25:57 +0100 Subject: [PATCH 007/225] dmaengine: dw-edma: Fix the ch_count hdma callback The current check of ch_en enabled to know the maximum number of available hardware channels is wrong as it check the number of ch_en register set but all of them are unset at probe. This register is set at the dw_hdma_v0_core_start function which is run lately before a DMA transfer. The HDMA IP have no way to know the number of hardware channels available like the eDMA IP, then let set it to maximum channels and let the platform set the right number of channels. Fixes: e74c39573d35 ("dmaengine: dw-edma: Add support for native HDMA") Acked-by: Manivannan Sadhasivam Reviewed-by: Serge Semin Signed-off-by: Kory Maincent Link: https://lore.kernel.org/r/20240129-b4-feature_hdma_mainline-v7-1-8e8c1acb7a46@bootlin.com Signed-off-by: Vinod Koul --- drivers/dma/dw-edma/dw-hdma-v0-core.c | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/drivers/dma/dw-edma/dw-hdma-v0-core.c b/drivers/dma/dw-edma/dw-hdma-v0-core.c index 00b735a0202a..1f4cb7db5475 100644 --- a/drivers/dma/dw-edma/dw-hdma-v0-core.c +++ b/drivers/dma/dw-edma/dw-hdma-v0-core.c @@ -65,18 +65,12 @@ static void dw_hdma_v0_core_off(struct dw_edma *dw) static u16 dw_hdma_v0_core_ch_count(struct dw_edma *dw, enum dw_edma_dir dir) { - u32 num_ch = 0; - int id; - - for (id = 0; id < HDMA_V0_MAX_NR_CH; id++) { - if (GET_CH_32(dw, id, dir, ch_en) & BIT(0)) - num_ch++; - } - - if (num_ch > HDMA_V0_MAX_NR_CH) - num_ch = HDMA_V0_MAX_NR_CH; - - return (u16)num_ch; + /* + * The HDMA IP have no way to know the number of hardware channels + * available, we set it to maximum channels and let the platform + * set the right number of channels. + */ + return HDMA_V0_MAX_NR_CH; } static enum dma_status dw_hdma_v0_core_ch_status(struct dw_edma_chan *chan) From 7b52ba8616e978bf4f38f207f11a8176517244d0 Mon Sep 17 00:00:00 2001 From: Kory Maincent Date: Mon, 29 Jan 2024 17:25:58 +0100 Subject: [PATCH 008/225] dmaengine: dw-edma: Fix wrong interrupt bit set for HDMA Instead of setting HDMA_V0_LOCAL_ABORT_INT_EN bit, HDMA_V0_LOCAL_STOP_INT_EN bit got set twice, due to which the abort interrupt is not getting generated for HDMA. Fix it by setting the correct interrupt enable bit. Fixes: e74c39573d35 ("dmaengine: dw-edma: Add support for native HDMA") Reviewed-by: Serge Semin Reviewed-by: Manivannan Sadhasivam Signed-off-by: Kory Maincent Link: https://lore.kernel.org/r/20240129-b4-feature_hdma_mainline-v7-2-8e8c1acb7a46@bootlin.com Signed-off-by: Vinod Koul --- drivers/dma/dw-edma/dw-hdma-v0-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/dw-edma/dw-hdma-v0-core.c b/drivers/dma/dw-edma/dw-hdma-v0-core.c index 1f4cb7db5475..108f9127aaaa 100644 --- a/drivers/dma/dw-edma/dw-hdma-v0-core.c +++ b/drivers/dma/dw-edma/dw-hdma-v0-core.c @@ -236,7 +236,7 @@ static void dw_hdma_v0_core_start(struct dw_edma_chunk *chunk, bool first) /* Interrupt enable&unmask - done, abort */ tmp = GET_CH_32(dw, chan->dir, chan->id, int_setup) | HDMA_V0_STOP_INT_MASK | HDMA_V0_ABORT_INT_MASK | - HDMA_V0_LOCAL_STOP_INT_EN | HDMA_V0_LOCAL_STOP_INT_EN; + HDMA_V0_LOCAL_STOP_INT_EN | HDMA_V0_LOCAL_ABORT_INT_EN; SET_CH_32(dw, chan->dir, chan->id, int_setup, tmp); /* Channel control */ SET_CH_32(dw, chan->dir, chan->id, control1, HDMA_V0_LINKLIST_EN); From 930a8a015dcfde4b8906351ff081066dc277748c Mon Sep 17 00:00:00 2001 From: Kory Maincent Date: Mon, 29 Jan 2024 17:25:59 +0100 Subject: [PATCH 009/225] dmaengine: dw-edma: HDMA_V0_REMOTEL_STOP_INT_EN typo fix Fix "HDMA_V0_REMOTEL_STOP_INT_EN" typo error Fixes: e74c39573d35 ("dmaengine: dw-edma: Add support for native HDMA") Reviewed-by: Serge Semin Reviewed-by: Manivannan Sadhasivam Signed-off-by: Kory Maincent Link: https://lore.kernel.org/r/20240129-b4-feature_hdma_mainline-v7-3-8e8c1acb7a46@bootlin.com Signed-off-by: Vinod Koul --- drivers/dma/dw-edma/dw-hdma-v0-regs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/dw-edma/dw-hdma-v0-regs.h b/drivers/dma/dw-edma/dw-hdma-v0-regs.h index a974abdf8aaf..eab5fd7177e5 100644 --- a/drivers/dma/dw-edma/dw-hdma-v0-regs.h +++ b/drivers/dma/dw-edma/dw-hdma-v0-regs.h @@ -15,7 +15,7 @@ #define HDMA_V0_LOCAL_ABORT_INT_EN BIT(6) #define HDMA_V0_REMOTE_ABORT_INT_EN BIT(5) #define HDMA_V0_LOCAL_STOP_INT_EN BIT(4) -#define HDMA_V0_REMOTEL_STOP_INT_EN BIT(3) +#define HDMA_V0_REMOTE_STOP_INT_EN BIT(3) #define HDMA_V0_ABORT_INT_MASK BIT(2) #define HDMA_V0_STOP_INT_MASK BIT(0) #define HDMA_V0_LINKLIST_EN BIT(0) From e2f6a5789051ee9c632f27a12d0f01f0cbf78aac Mon Sep 17 00:00:00 2001 From: Kory Maincent Date: Mon, 29 Jan 2024 17:26:00 +0100 Subject: [PATCH 010/225] dmaengine: dw-edma: Add HDMA remote interrupt configuration Only the local interruption was configured, remote interrupt was left behind. This patch fix it by setting stop and abort remote interrupts when the DW_EDMA_CHIP_LOCAL flag is not set. Fixes: e74c39573d35 ("dmaengine: dw-edma: Add support for native HDMA") Signed-off-by: Kory Maincent Reviewed-by: Serge Semin Acked-by: Manivannan Sadhasivam Link: https://lore.kernel.org/r/20240129-b4-feature_hdma_mainline-v7-4-8e8c1acb7a46@bootlin.com Signed-off-by: Vinod Koul --- drivers/dma/dw-edma/dw-hdma-v0-core.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/dma/dw-edma/dw-hdma-v0-core.c b/drivers/dma/dw-edma/dw-hdma-v0-core.c index 108f9127aaaa..04b0bcb6ded9 100644 --- a/drivers/dma/dw-edma/dw-hdma-v0-core.c +++ b/drivers/dma/dw-edma/dw-hdma-v0-core.c @@ -237,6 +237,8 @@ static void dw_hdma_v0_core_start(struct dw_edma_chunk *chunk, bool first) tmp = GET_CH_32(dw, chan->dir, chan->id, int_setup) | HDMA_V0_STOP_INT_MASK | HDMA_V0_ABORT_INT_MASK | HDMA_V0_LOCAL_STOP_INT_EN | HDMA_V0_LOCAL_ABORT_INT_EN; + if (!(dw->chip->flags & DW_EDMA_CHIP_LOCAL)) + tmp |= HDMA_V0_REMOTE_STOP_INT_EN | HDMA_V0_REMOTE_ABORT_INT_EN; SET_CH_32(dw, chan->dir, chan->id, int_setup, tmp); /* Channel control */ SET_CH_32(dw, chan->dir, chan->id, control1, HDMA_V0_LINKLIST_EN); From 712a92a48158e02155b4b6b21e03a817f78c9b7e Mon Sep 17 00:00:00 2001 From: Kory Maincent Date: Mon, 29 Jan 2024 17:26:01 +0100 Subject: [PATCH 011/225] dmaengine: dw-edma: HDMA: Add sync read before starting the DMA transfer in remote setup The Linked list element and pointer are not stored in the same memory as the HDMA controller register. If the doorbell register is toggled before the full write of the linked list a race condition error will occur. In remote setup we can only use a readl to the memory to assure the full write has occurred. Fixes: e74c39573d35 ("dmaengine: dw-edma: Add support for native HDMA") Reviewed-by: Serge Semin Reviewed-by: Manivannan Sadhasivam Signed-off-by: Kory Maincent Link: https://lore.kernel.org/r/20240129-b4-feature_hdma_mainline-v7-5-8e8c1acb7a46@bootlin.com Signed-off-by: Vinod Koul --- drivers/dma/dw-edma/dw-hdma-v0-core.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/drivers/dma/dw-edma/dw-hdma-v0-core.c b/drivers/dma/dw-edma/dw-hdma-v0-core.c index 04b0bcb6ded9..10e8f0715114 100644 --- a/drivers/dma/dw-edma/dw-hdma-v0-core.c +++ b/drivers/dma/dw-edma/dw-hdma-v0-core.c @@ -222,6 +222,20 @@ static void dw_hdma_v0_core_write_chunk(struct dw_edma_chunk *chunk) dw_hdma_v0_write_ll_link(chunk, i, control, chunk->ll_region.paddr); } +static void dw_hdma_v0_sync_ll_data(struct dw_edma_chunk *chunk) +{ + /* + * In case of remote HDMA engine setup, the DW PCIe RP/EP internal + * configuration registers and application memory are normally accessed + * over different buses. Ensure LL-data reaches the memory before the + * doorbell register is toggled by issuing the dummy-read from the remote + * LL memory in a hope that the MRd TLP will return only after the + * last MWr TLP is completed + */ + if (!(chunk->chan->dw->chip->flags & DW_EDMA_CHIP_LOCAL)) + readl(chunk->ll_region.vaddr.io); +} + static void dw_hdma_v0_core_start(struct dw_edma_chunk *chunk, bool first) { struct dw_edma_chan *chan = chunk->chan; @@ -252,6 +266,9 @@ static void dw_hdma_v0_core_start(struct dw_edma_chunk *chunk, bool first) /* Set consumer cycle */ SET_CH_32(dw, chan->dir, chan->id, cycle_sync, HDMA_V0_CONSUMER_CYCLE_STAT | HDMA_V0_CONSUMER_CYCLE_BIT); + + dw_hdma_v0_sync_ll_data(chunk); + /* Doorbell */ SET_CH_32(dw, chan->dir, chan->id, doorbell, HDMA_V0_DOORBELL_START); } From bbcc1c83f343e580c3aa1f2a8593343bf7b55bba Mon Sep 17 00:00:00 2001 From: Kory Maincent Date: Mon, 29 Jan 2024 17:26:02 +0100 Subject: [PATCH 012/225] dmaengine: dw-edma: eDMA: Add sync read before starting the DMA transfer in remote setup The Linked list element and pointer are not stored in the same memory as the eDMA controller register. If the doorbell register is toggled before the full write of the linked list a race condition error will occur. In remote setup we can only use a readl to the memory to assure the full write has occurred. Fixes: 7e4b8a4fbe2c ("dmaengine: Add Synopsys eDMA IP version 0 support") Reviewed-by: Serge Semin Reviewed-by: Manivannan Sadhasivam Signed-off-by: Kory Maincent Link: https://lore.kernel.org/r/20240129-b4-feature_hdma_mainline-v7-6-8e8c1acb7a46@bootlin.com Signed-off-by: Vinod Koul --- drivers/dma/dw-edma/dw-edma-v0-core.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/drivers/dma/dw-edma/dw-edma-v0-core.c b/drivers/dma/dw-edma/dw-edma-v0-core.c index b38786f0ad79..b75fdaffad9a 100644 --- a/drivers/dma/dw-edma/dw-edma-v0-core.c +++ b/drivers/dma/dw-edma/dw-edma-v0-core.c @@ -346,6 +346,20 @@ static void dw_edma_v0_core_write_chunk(struct dw_edma_chunk *chunk) dw_edma_v0_write_ll_link(chunk, i, control, chunk->ll_region.paddr); } +static void dw_edma_v0_sync_ll_data(struct dw_edma_chunk *chunk) +{ + /* + * In case of remote eDMA engine setup, the DW PCIe RP/EP internal + * configuration registers and application memory are normally accessed + * over different buses. Ensure LL-data reaches the memory before the + * doorbell register is toggled by issuing the dummy-read from the remote + * LL memory in a hope that the MRd TLP will return only after the + * last MWr TLP is completed + */ + if (!(chunk->chan->dw->chip->flags & DW_EDMA_CHIP_LOCAL)) + readl(chunk->ll_region.vaddr.io); +} + static void dw_edma_v0_core_start(struct dw_edma_chunk *chunk, bool first) { struct dw_edma_chan *chan = chunk->chan; @@ -412,6 +426,9 @@ static void dw_edma_v0_core_start(struct dw_edma_chunk *chunk, bool first) SET_CH_32(dw, chan->dir, chan->id, llp.msb, upper_32_bits(chunk->ll_region.paddr)); } + + dw_edma_v0_sync_ll_data(chunk); + /* Doorbell */ SET_RW_32(dw, chan->dir, doorbell, FIELD_PREP(EDMA_V0_DOORBELL_CH_MASK, chan->id)); From 9d739bccf261dd93ec1babf82f5c5d71dd4caa3e Mon Sep 17 00:00:00 2001 From: Peng Ma Date: Thu, 1 Feb 2024 16:50:07 -0500 Subject: [PATCH 013/225] dmaengine: fsl-qdma: fix SoC may hang on 16 byte unaligned read There is chip (ls1028a) errata: The SoC may hang on 16 byte unaligned read transactions by QDMA. Unaligned read transactions initiated by QDMA may stall in the NOC (Network On-Chip), causing a deadlock condition. Stalled transactions will trigger completion timeouts in PCIe controller. Workaround: Enable prefetch by setting the source descriptor prefetchable bit ( SD[PF] = 1 ). Implement this workaround. Cc: stable@vger.kernel.org Fixes: b092529e0aa0 ("dmaengine: fsl-qdma: Add qDMA controller driver for Layerscape SoCs") Signed-off-by: Peng Ma Signed-off-by: Frank Li Link: https://lore.kernel.org/r/20240201215007.439503-1-Frank.Li@nxp.com Signed-off-by: Vinod Koul --- drivers/dma/fsl-qdma.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/dma/fsl-qdma.c b/drivers/dma/fsl-qdma.c index f405c77060ad..70e8b7d425c8 100644 --- a/drivers/dma/fsl-qdma.c +++ b/drivers/dma/fsl-qdma.c @@ -109,6 +109,7 @@ #define FSL_QDMA_CMD_WTHROTL_OFFSET 20 #define FSL_QDMA_CMD_DSEN_OFFSET 19 #define FSL_QDMA_CMD_LWC_OFFSET 16 +#define FSL_QDMA_CMD_PF BIT(17) /* Field definition for Descriptor status */ #define QDMA_CCDF_STATUS_RTE BIT(5) @@ -384,7 +385,8 @@ static void fsl_qdma_comp_fill_memcpy(struct fsl_qdma_comp *fsl_comp, qdma_csgf_set_f(csgf_dest, len); /* Descriptor Buffer */ cmd = cpu_to_le32(FSL_QDMA_CMD_RWTTYPE << - FSL_QDMA_CMD_RWTTYPE_OFFSET); + FSL_QDMA_CMD_RWTTYPE_OFFSET) | + FSL_QDMA_CMD_PF; sdf->data = QDMA_SDDF_CMD(cmd); cmd = cpu_to_le32(FSL_QDMA_CMD_RWTTYPE << From 87a39071e0b639f45e05d296cc0538eef44ec0bd Mon Sep 17 00:00:00 2001 From: Curtis Klein Date: Thu, 1 Feb 2024 17:04:06 -0500 Subject: [PATCH 014/225] dmaengine: fsl-qdma: init irq after reg initialization Initialize the qDMA irqs after the registers are configured so that interrupts that may have been pending from a primary kernel don't get processed by the irq handler before it is ready to and cause panic with the following trace: Call trace: fsl_qdma_queue_handler+0xf8/0x3e8 __handle_irq_event_percpu+0x78/0x2b0 handle_irq_event_percpu+0x1c/0x68 handle_irq_event+0x44/0x78 handle_fasteoi_irq+0xc8/0x178 generic_handle_irq+0x24/0x38 __handle_domain_irq+0x90/0x100 gic_handle_irq+0x5c/0xb8 el1_irq+0xb8/0x180 _raw_spin_unlock_irqrestore+0x14/0x40 __setup_irq+0x4bc/0x798 request_threaded_irq+0xd8/0x190 devm_request_threaded_irq+0x74/0xe8 fsl_qdma_probe+0x4d4/0xca8 platform_drv_probe+0x50/0xa0 really_probe+0xe0/0x3f8 driver_probe_device+0x64/0x130 device_driver_attach+0x6c/0x78 __driver_attach+0xbc/0x158 bus_for_each_dev+0x5c/0x98 driver_attach+0x20/0x28 bus_add_driver+0x158/0x220 driver_register+0x60/0x110 __platform_driver_register+0x44/0x50 fsl_qdma_driver_init+0x18/0x20 do_one_initcall+0x48/0x258 kernel_init_freeable+0x1a4/0x23c kernel_init+0x10/0xf8 ret_from_fork+0x10/0x18 Cc: stable@vger.kernel.org Fixes: b092529e0aa0 ("dmaengine: fsl-qdma: Add qDMA controller driver for Layerscape SoCs") Signed-off-by: Curtis Klein Signed-off-by: Yi Zhao Signed-off-by: Frank Li Link: https://lore.kernel.org/r/20240201220406.440145-1-Frank.Li@nxp.com Signed-off-by: Vinod Koul --- drivers/dma/fsl-qdma.c | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/drivers/dma/fsl-qdma.c b/drivers/dma/fsl-qdma.c index 70e8b7d425c8..1e3bf6f30f78 100644 --- a/drivers/dma/fsl-qdma.c +++ b/drivers/dma/fsl-qdma.c @@ -1198,10 +1198,6 @@ static int fsl_qdma_probe(struct platform_device *pdev) if (!fsl_qdma->queue) return -ENOMEM; - ret = fsl_qdma_irq_init(pdev, fsl_qdma); - if (ret) - return ret; - fsl_qdma->irq_base = platform_get_irq_byname(pdev, "qdma-queue0"); if (fsl_qdma->irq_base < 0) return fsl_qdma->irq_base; @@ -1240,19 +1236,22 @@ static int fsl_qdma_probe(struct platform_device *pdev) platform_set_drvdata(pdev, fsl_qdma); - ret = dma_async_device_register(&fsl_qdma->dma_dev); - if (ret) { - dev_err(&pdev->dev, - "Can't register NXP Layerscape qDMA engine.\n"); - return ret; - } - ret = fsl_qdma_reg_init(fsl_qdma); if (ret) { dev_err(&pdev->dev, "Can't Initialize the qDMA engine.\n"); return ret; } + ret = fsl_qdma_irq_init(pdev, fsl_qdma); + if (ret) + return ret; + + ret = dma_async_device_register(&fsl_qdma->dma_dev); + if (ret) { + dev_err(&pdev->dev, "Can't register NXP Layerscape qDMA engine.\n"); + return ret; + } + return 0; } From 9ba17defd9edd87970b701085402bc8ecc3a11d4 Mon Sep 17 00:00:00 2001 From: Joy Zou Date: Wed, 31 Jan 2024 11:33:18 -0500 Subject: [PATCH 015/225] dmaengine: fsl-edma: correct calculation of 'nbytes' in multi-fifo scenario The 'nbytes' should be equivalent to burst * width in audio multi-fifo setups. Given that the FIFO width is fixed at 32 bits, adjusts the burst size for multi-fifo configurations to match the slave maxburst in the configuration. Cc: stable@vger.kernel.org Fixes: 72f5801a4e2b ("dmaengine: fsl-edma: integrate v3 support") Signed-off-by: Joy Zou Signed-off-by: Frank Li Link: https://lore.kernel.org/r/20240131163318.360315-1-Frank.Li@nxp.com Signed-off-by: Vinod Koul --- drivers/dma/fsl-edma-common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/fsl-edma-common.c b/drivers/dma/fsl-edma-common.c index b53f46245c37..793f1a7ad5e3 100644 --- a/drivers/dma/fsl-edma-common.c +++ b/drivers/dma/fsl-edma-common.c @@ -503,7 +503,7 @@ void fsl_edma_fill_tcd(struct fsl_edma_chan *fsl_chan, if (fsl_chan->is_multi_fifo) { /* set mloff to support multiple fifo */ burst = cfg->direction == DMA_DEV_TO_MEM ? - cfg->src_addr_width : cfg->dst_addr_width; + cfg->src_maxburst : cfg->dst_maxburst; nbytes |= EDMA_V3_TCD_NBYTES_MLOFF(-(burst * 4)); /* enable DMLOE/SMLOE */ if (cfg->direction == DMA_MEM_TO_DEV) { From 7936378cb6d87073163130e1e1fc1e5f76a597cf Mon Sep 17 00:00:00 2001 From: Alexander Stein Date: Wed, 10 Jan 2024 10:33:43 +0100 Subject: [PATCH 016/225] phy: freescale: phy-fsl-imx8-mipi-dphy: Fix alias name to use dashes Devicetree spec lists only dashes as valid characters for alias names. Table 3.2: Valid characters for alias names, Devicee Specification, Release v0.4 Signed-off-by: Alexander Stein Fixes: 3fbae284887de ("phy: freescale: phy-fsl-imx8-mipi-dphy: Add i.MX8qxp LVDS PHY mode support") Link: https://lore.kernel.org/r/20240110093343.468810-1-alexander.stein@ew.tq-group.com Signed-off-by: Vinod Koul --- drivers/phy/freescale/phy-fsl-imx8-mipi-dphy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/phy/freescale/phy-fsl-imx8-mipi-dphy.c b/drivers/phy/freescale/phy-fsl-imx8-mipi-dphy.c index e625b32889bf..0928a526e2ab 100644 --- a/drivers/phy/freescale/phy-fsl-imx8-mipi-dphy.c +++ b/drivers/phy/freescale/phy-fsl-imx8-mipi-dphy.c @@ -706,7 +706,7 @@ static int mixel_dphy_probe(struct platform_device *pdev) return ret; } - priv->id = of_alias_get_id(np, "mipi_dphy"); + priv->id = of_alias_get_id(np, "mipi-dphy"); if (priv->id < 0) { dev_err(dev, "Failed to get phy node alias id: %d\n", priv->id); From 95055beb067cb30f626fb10f7019737ca7681df0 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Thu, 24 Aug 2023 17:13:45 +0800 Subject: [PATCH 017/225] phy: qcom: phy-qcom-m31: fix wrong pointer pass to PTR_ERR() It should be 'qphy->vreg' passed to PTR_ERR() when devm_regulator_get() fails. Fixes: 08e49af50701 ("phy: qcom: Introduce M31 USB PHY driver") Signed-off-by: Yang Yingliang Reviewed-by: Varadarajan Narayanan Link: https://lore.kernel.org/r/20230824091345.1072650-1-yangyingliang@huawei.com Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-m31.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/phy/qualcomm/phy-qcom-m31.c b/drivers/phy/qualcomm/phy-qcom-m31.c index c2590579190a..03fb0d4b75d7 100644 --- a/drivers/phy/qualcomm/phy-qcom-m31.c +++ b/drivers/phy/qualcomm/phy-qcom-m31.c @@ -299,7 +299,7 @@ static int m31usb_phy_probe(struct platform_device *pdev) qphy->vreg = devm_regulator_get(dev, "vdda-phy"); if (IS_ERR(qphy->vreg)) - return dev_err_probe(dev, PTR_ERR(qphy->phy), + return dev_err_probe(dev, PTR_ERR(qphy->vreg), "failed to get vreg\n"); phy_set_drvdata(qphy->phy, qphy); From 734550d60cdf634299f0eac7f7fe15763ed990bb Mon Sep 17 00:00:00 2001 From: Abel Vesa Date: Thu, 1 Feb 2024 10:39:33 +0200 Subject: [PATCH 018/225] phy: qualcomm: eusb2-repeater: Rework init to drop redundant zero-out loop Instead of incrementing the base of the global reg fields, which renders the second instance of the repeater broken due to wrong offsets, use regmap with base and offset. As for zeroing out the rest of the tuning regs, avoid looping though the table and just use the table as is, as it is already zero initialized. Fixes: 99a517a582fc ("phy: qualcomm: phy-qcom-eusb2-repeater: Zero out untouched tuning regs") Tested-by: Elliot Berman # sm8650-qrd Signed-off-by: Abel Vesa Link: https://lore.kernel.org/r/20240201-phy-qcom-eusb2-repeater-fixes-v4-1-cf18c8cef6d7@linaro.org Signed-off-by: Vinod Koul --- .../phy/qualcomm/phy-qcom-eusb2-repeater.c | 160 +++++++----------- 1 file changed, 59 insertions(+), 101 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-eusb2-repeater.c b/drivers/phy/qualcomm/phy-qcom-eusb2-repeater.c index a623f092b11f..a43e20abb10d 100644 --- a/drivers/phy/qualcomm/phy-qcom-eusb2-repeater.c +++ b/drivers/phy/qualcomm/phy-qcom-eusb2-repeater.c @@ -37,56 +37,28 @@ #define EUSB2_TUNE_EUSB_EQU 0x5A #define EUSB2_TUNE_EUSB_HS_COMP_CUR 0x5B -#define QCOM_EUSB2_REPEATER_INIT_CFG(r, v) \ - { \ - .reg = r, \ - .val = v, \ - } +enum eusb2_reg_layout { + TUNE_EUSB_HS_COMP_CUR, + TUNE_EUSB_EQU, + TUNE_EUSB_SLEW, + TUNE_USB2_HS_COMP_CUR, + TUNE_USB2_PREEM, + TUNE_USB2_EQU, + TUNE_USB2_SLEW, + TUNE_SQUELCH_U, + TUNE_HSDISC, + TUNE_RES_FSDIF, + TUNE_IUSB2, + TUNE_USB2_CROSSOVER, + NUM_TUNE_FIELDS, -enum reg_fields { - F_TUNE_EUSB_HS_COMP_CUR, - F_TUNE_EUSB_EQU, - F_TUNE_EUSB_SLEW, - F_TUNE_USB2_HS_COMP_CUR, - F_TUNE_USB2_PREEM, - F_TUNE_USB2_EQU, - F_TUNE_USB2_SLEW, - F_TUNE_SQUELCH_U, - F_TUNE_HSDISC, - F_TUNE_RES_FSDIF, - F_TUNE_IUSB2, - F_TUNE_USB2_CROSSOVER, - F_NUM_TUNE_FIELDS, + FORCE_VAL_5 = NUM_TUNE_FIELDS, + FORCE_EN_5, - F_FORCE_VAL_5 = F_NUM_TUNE_FIELDS, - F_FORCE_EN_5, + EN_CTL1, - F_EN_CTL1, - - F_RPTR_STATUS, - F_NUM_FIELDS, -}; - -static struct reg_field eusb2_repeater_tune_reg_fields[F_NUM_FIELDS] = { - [F_TUNE_EUSB_HS_COMP_CUR] = REG_FIELD(EUSB2_TUNE_EUSB_HS_COMP_CUR, 0, 1), - [F_TUNE_EUSB_EQU] = REG_FIELD(EUSB2_TUNE_EUSB_EQU, 0, 1), - [F_TUNE_EUSB_SLEW] = REG_FIELD(EUSB2_TUNE_EUSB_SLEW, 0, 1), - [F_TUNE_USB2_HS_COMP_CUR] = REG_FIELD(EUSB2_TUNE_USB2_HS_COMP_CUR, 0, 1), - [F_TUNE_USB2_PREEM] = REG_FIELD(EUSB2_TUNE_USB2_PREEM, 0, 2), - [F_TUNE_USB2_EQU] = REG_FIELD(EUSB2_TUNE_USB2_EQU, 0, 1), - [F_TUNE_USB2_SLEW] = REG_FIELD(EUSB2_TUNE_USB2_SLEW, 0, 1), - [F_TUNE_SQUELCH_U] = REG_FIELD(EUSB2_TUNE_SQUELCH_U, 0, 2), - [F_TUNE_HSDISC] = REG_FIELD(EUSB2_TUNE_HSDISC, 0, 2), - [F_TUNE_RES_FSDIF] = REG_FIELD(EUSB2_TUNE_RES_FSDIF, 0, 2), - [F_TUNE_IUSB2] = REG_FIELD(EUSB2_TUNE_IUSB2, 0, 3), - [F_TUNE_USB2_CROSSOVER] = REG_FIELD(EUSB2_TUNE_USB2_CROSSOVER, 0, 2), - - [F_FORCE_VAL_5] = REG_FIELD(EUSB2_FORCE_VAL_5, 0, 7), - [F_FORCE_EN_5] = REG_FIELD(EUSB2_FORCE_EN_5, 0, 7), - - [F_EN_CTL1] = REG_FIELD(EUSB2_EN_CTL1, 0, 7), - - [F_RPTR_STATUS] = REG_FIELD(EUSB2_RPTR_STATUS, 0, 7), + RPTR_STATUS, + LAYOUT_SIZE, }; struct eusb2_repeater_cfg { @@ -98,10 +70,11 @@ struct eusb2_repeater_cfg { struct eusb2_repeater { struct device *dev; - struct regmap_field *regs[F_NUM_FIELDS]; + struct regmap *regmap; struct phy *phy; struct regulator_bulk_data *vregs; const struct eusb2_repeater_cfg *cfg; + u32 base; enum phy_mode mode; }; @@ -109,10 +82,10 @@ static const char * const pm8550b_vreg_l[] = { "vdd18", "vdd3", }; -static const u32 pm8550b_init_tbl[F_NUM_TUNE_FIELDS] = { - [F_TUNE_IUSB2] = 0x8, - [F_TUNE_SQUELCH_U] = 0x3, - [F_TUNE_USB2_PREEM] = 0x5, +static const u32 pm8550b_init_tbl[NUM_TUNE_FIELDS] = { + [TUNE_IUSB2] = 0x8, + [TUNE_SQUELCH_U] = 0x3, + [TUNE_USB2_PREEM] = 0x5, }; static const struct eusb2_repeater_cfg pm8550b_eusb2_cfg = { @@ -140,47 +113,42 @@ static int eusb2_repeater_init_vregs(struct eusb2_repeater *rptr) static int eusb2_repeater_init(struct phy *phy) { - struct reg_field *regfields = eusb2_repeater_tune_reg_fields; struct eusb2_repeater *rptr = phy_get_drvdata(phy); struct device_node *np = rptr->dev->of_node; - u32 init_tbl[F_NUM_TUNE_FIELDS] = { 0 }; - u8 override; + struct regmap *regmap = rptr->regmap; + const u32 *init_tbl = rptr->cfg->init_tbl; + u8 tune_usb2_preem = init_tbl[TUNE_USB2_PREEM]; + u8 tune_hsdisc = init_tbl[TUNE_HSDISC]; + u8 tune_iusb2 = init_tbl[TUNE_IUSB2]; + u32 base = rptr->base; u32 val; int ret; - int i; + + of_property_read_u8(np, "qcom,tune-usb2-amplitude", &tune_iusb2); + of_property_read_u8(np, "qcom,tune-usb2-disc-thres", &tune_hsdisc); + of_property_read_u8(np, "qcom,tune-usb2-preem", &tune_usb2_preem); ret = regulator_bulk_enable(rptr->cfg->num_vregs, rptr->vregs); if (ret) return ret; - regmap_field_update_bits(rptr->regs[F_EN_CTL1], EUSB2_RPTR_EN, EUSB2_RPTR_EN); + regmap_write(regmap, base + EUSB2_EN_CTL1, EUSB2_RPTR_EN); - for (i = 0; i < F_NUM_TUNE_FIELDS; i++) { - if (init_tbl[i]) { - regmap_field_update_bits(rptr->regs[i], init_tbl[i], init_tbl[i]); - } else { - /* Write 0 if there's no value set */ - u32 mask = GENMASK(regfields[i].msb, regfields[i].lsb); + regmap_write(regmap, base + EUSB2_TUNE_EUSB_HS_COMP_CUR, init_tbl[TUNE_EUSB_HS_COMP_CUR]); + regmap_write(regmap, base + EUSB2_TUNE_EUSB_EQU, init_tbl[TUNE_EUSB_EQU]); + regmap_write(regmap, base + EUSB2_TUNE_EUSB_SLEW, init_tbl[TUNE_EUSB_SLEW]); + regmap_write(regmap, base + EUSB2_TUNE_USB2_HS_COMP_CUR, init_tbl[TUNE_USB2_HS_COMP_CUR]); + regmap_write(regmap, base + EUSB2_TUNE_USB2_EQU, init_tbl[TUNE_USB2_EQU]); + regmap_write(regmap, base + EUSB2_TUNE_USB2_SLEW, init_tbl[TUNE_USB2_SLEW]); + regmap_write(regmap, base + EUSB2_TUNE_SQUELCH_U, init_tbl[TUNE_SQUELCH_U]); + regmap_write(regmap, base + EUSB2_TUNE_RES_FSDIF, init_tbl[TUNE_RES_FSDIF]); + regmap_write(regmap, base + EUSB2_TUNE_USB2_CROSSOVER, init_tbl[TUNE_USB2_CROSSOVER]); - regmap_field_update_bits(rptr->regs[i], mask, 0); - } - } - memcpy(init_tbl, rptr->cfg->init_tbl, sizeof(init_tbl)); + regmap_write(regmap, base + EUSB2_TUNE_USB2_PREEM, tune_usb2_preem); + regmap_write(regmap, base + EUSB2_TUNE_HSDISC, tune_hsdisc); + regmap_write(regmap, base + EUSB2_TUNE_IUSB2, tune_iusb2); - if (!of_property_read_u8(np, "qcom,tune-usb2-amplitude", &override)) - init_tbl[F_TUNE_IUSB2] = override; - - if (!of_property_read_u8(np, "qcom,tune-usb2-disc-thres", &override)) - init_tbl[F_TUNE_HSDISC] = override; - - if (!of_property_read_u8(np, "qcom,tune-usb2-preem", &override)) - init_tbl[F_TUNE_USB2_PREEM] = override; - - for (i = 0; i < F_NUM_TUNE_FIELDS; i++) - regmap_field_update_bits(rptr->regs[i], init_tbl[i], init_tbl[i]); - - ret = regmap_field_read_poll_timeout(rptr->regs[F_RPTR_STATUS], - val, val & RPTR_OK, 10, 5); + ret = regmap_read_poll_timeout(regmap, base + EUSB2_RPTR_STATUS, val, val & RPTR_OK, 10, 5); if (ret) dev_err(rptr->dev, "initialization timed-out\n"); @@ -191,6 +159,8 @@ static int eusb2_repeater_set_mode(struct phy *phy, enum phy_mode mode, int submode) { struct eusb2_repeater *rptr = phy_get_drvdata(phy); + struct regmap *regmap = rptr->regmap; + u32 base = rptr->base; switch (mode) { case PHY_MODE_USB_HOST: @@ -199,10 +169,8 @@ static int eusb2_repeater_set_mode(struct phy *phy, * per eUSB 1.2 Spec. Below implement software workaround until * PHY and controller is fixing seen observation. */ - regmap_field_update_bits(rptr->regs[F_FORCE_EN_5], - F_CLK_19P2M_EN, F_CLK_19P2M_EN); - regmap_field_update_bits(rptr->regs[F_FORCE_VAL_5], - V_CLK_19P2M_EN, V_CLK_19P2M_EN); + regmap_write(regmap, base + EUSB2_FORCE_EN_5, F_CLK_19P2M_EN); + regmap_write(regmap, base + EUSB2_FORCE_VAL_5, V_CLK_19P2M_EN); break; case PHY_MODE_USB_DEVICE: /* @@ -211,10 +179,8 @@ static int eusb2_repeater_set_mode(struct phy *phy, * repeater doesn't clear previous value due to shared * regulators (say host <-> device mode switch). */ - regmap_field_update_bits(rptr->regs[F_FORCE_EN_5], - F_CLK_19P2M_EN, 0); - regmap_field_update_bits(rptr->regs[F_FORCE_VAL_5], - V_CLK_19P2M_EN, 0); + regmap_write(regmap, base + EUSB2_FORCE_EN_5, 0); + regmap_write(regmap, base + EUSB2_FORCE_VAL_5, 0); break; default: return -EINVAL; @@ -243,9 +209,8 @@ static int eusb2_repeater_probe(struct platform_device *pdev) struct device *dev = &pdev->dev; struct phy_provider *phy_provider; struct device_node *np = dev->of_node; - struct regmap *regmap; - int i, ret; u32 res; + int ret; rptr = devm_kzalloc(dev, sizeof(*rptr), GFP_KERNEL); if (!rptr) @@ -258,22 +223,15 @@ static int eusb2_repeater_probe(struct platform_device *pdev) if (!rptr->cfg) return -EINVAL; - regmap = dev_get_regmap(dev->parent, NULL); - if (!regmap) + rptr->regmap = dev_get_regmap(dev->parent, NULL); + if (!rptr->regmap) return -ENODEV; ret = of_property_read_u32(np, "reg", &res); if (ret < 0) return ret; - for (i = 0; i < F_NUM_FIELDS; i++) - eusb2_repeater_tune_reg_fields[i].reg += res; - - ret = devm_regmap_field_bulk_alloc(dev, regmap, rptr->regs, - eusb2_repeater_tune_reg_fields, - F_NUM_FIELDS); - if (ret) - return ret; + rptr->base = res; ret = eusb2_repeater_init_vregs(rptr); if (ret < 0) { From 30d5297862410418bb8f8b4c0a87fa55c3063dd7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Sun, 4 Feb 2024 18:30:43 +0100 Subject: [PATCH 019/225] power: supply: mm8013: select REGMAP_I2C MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The driver uses regmap APIs so it should make sure they are available. Fixes: c75f4bf6800b ("power: supply: Introduce MM8013 fuel gauge driver") Cc: Signed-off-by: Thomas Weißschuh Reviewed-by: Konrad Dybcio Link: https://lore.kernel.org/r/20240204-mm8013-regmap-v1-1-7cc6b619b7d3@weissschuh.net Signed-off-by: Sebastian Reichel --- drivers/power/supply/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/power/supply/Kconfig b/drivers/power/supply/Kconfig index f21cb05815ec..3e31375491d5 100644 --- a/drivers/power/supply/Kconfig +++ b/drivers/power/supply/Kconfig @@ -978,6 +978,7 @@ config CHARGER_QCOM_SMB2 config FUEL_GAUGE_MM8013 tristate "Mitsumi MM8013 fuel gauge driver" depends on I2C + select REGMAP_I2C help Say Y here to enable the Mitsumi MM8013 fuel gauge driver. It enables the monitoring of many battery parameters, including From eb5555d422d0fc325e1574a7353d3c616f82d8b5 Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Thu, 25 Jan 2024 19:17:56 +0000 Subject: [PATCH 020/225] pmdomain: arm: Fix NULL dereference on scmi_perf_domain removal On unloading of the scmi_perf_domain module got the below splat, when in the DT provided to the system under test the '#power-domain-cells' property was missing. Indeed, this particular setup causes the probe to bail out early without giving any error, which leads to the ->remove() callback gets to run too, but without all the expected initialized structures in place. Add a check and bail out early on remove too. Call trace: scmi_perf_domain_remove+0x28/0x70 [scmi_perf_domain] scmi_dev_remove+0x28/0x40 [scmi_core] device_remove+0x54/0x90 device_release_driver_internal+0x1dc/0x240 driver_detach+0x58/0xa8 bus_remove_driver+0x78/0x108 driver_unregister+0x38/0x70 scmi_driver_unregister+0x28/0x180 [scmi_core] scmi_perf_domain_driver_exit+0x18/0xb78 [scmi_perf_domain] __arm64_sys_delete_module+0x1a8/0x2c0 invoke_syscall+0x50/0x128 el0_svc_common.constprop.0+0x48/0xf0 do_el0_svc+0x24/0x38 el0_svc+0x34/0xb8 el0t_64_sync_handler+0x100/0x130 el0t_64_sync+0x190/0x198 Code: a90153f3 f9403c14 f9414800 955f8a05 (b9400a80) ---[ end trace 0000000000000000 ]--- Fixes: 2af23ceb8624 ("pmdomain: arm: Add the SCMI performance domain") Signed-off-by: Cristian Marussi Reviewed-by: Sudeep Holla Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20240125191756.868860-1-cristian.marussi@arm.com Signed-off-by: Ulf Hansson --- drivers/pmdomain/arm/scmi_perf_domain.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/pmdomain/arm/scmi_perf_domain.c b/drivers/pmdomain/arm/scmi_perf_domain.c index 709bbc448fad..d7ef46ccd9b8 100644 --- a/drivers/pmdomain/arm/scmi_perf_domain.c +++ b/drivers/pmdomain/arm/scmi_perf_domain.c @@ -159,6 +159,9 @@ static void scmi_perf_domain_remove(struct scmi_device *sdev) struct genpd_onecell_data *scmi_pd_data = dev_get_drvdata(dev); int i; + if (!scmi_pd_data) + return; + of_genpd_del_provider(dev->of_node); for (i = 0; i < scmi_pd_data->num_domains; i++) From fccfa646ef3628097d59f7d9c1a3e84d4b6bb45e Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 12 Feb 2024 12:24:40 +0100 Subject: [PATCH 021/225] efi/capsule-loader: fix incorrect allocation size gcc-14 notices that the allocation with sizeof(void) on 32-bit architectures is not enough for a 64-bit phys_addr_t: drivers/firmware/efi/capsule-loader.c: In function 'efi_capsule_open': drivers/firmware/efi/capsule-loader.c:295:24: error: allocation of insufficient size '4' for type 'phys_addr_t' {aka 'long long unsigned int'} with size '8' [-Werror=alloc-size] 295 | cap_info->phys = kzalloc(sizeof(void *), GFP_KERNEL); | ^ Use the correct type instead here. Fixes: f24c4d478013 ("efi/capsule-loader: Reinstate virtual capsule mapping") Signed-off-by: Arnd Bergmann Signed-off-by: Ard Biesheuvel --- drivers/firmware/efi/capsule-loader.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/firmware/efi/capsule-loader.c b/drivers/firmware/efi/capsule-loader.c index 3e8d4b51a814..97bafb5f7038 100644 --- a/drivers/firmware/efi/capsule-loader.c +++ b/drivers/firmware/efi/capsule-loader.c @@ -292,7 +292,7 @@ static int efi_capsule_open(struct inode *inode, struct file *file) return -ENOMEM; } - cap_info->phys = kzalloc(sizeof(void *), GFP_KERNEL); + cap_info->phys = kzalloc(sizeof(phys_addr_t), GFP_KERNEL); if (!cap_info->phys) { kfree(cap_info->pages); kfree(cap_info); From ff3206d2186d84e4f77e1378ba1d225633f17b9b Mon Sep 17 00:00:00 2001 From: Ivan Semenov Date: Tue, 6 Feb 2024 19:28:45 +0200 Subject: [PATCH 022/225] mmc: core: Fix eMMC initialization with 1-bit bus connection Initializing an eMMC that's connected via a 1-bit bus is current failing, if the HW (DT) informs that 4-bit bus is supported. In fact this is a regression, as we were earlier capable of falling back to 1-bit mode, when switching to 4/8-bit bus failed. Therefore, let's restore the behaviour. Log for Samsung eMMC 5.1 chip connected via 1bit bus (only D0 pin) Before patch: [134509.044225] mmc0: switch to bus width 4 failed [134509.044509] mmc0: new high speed MMC card at address 0001 [134509.054594] mmcblk0: mmc0:0001 BGUF4R 29.1 GiB [134509.281602] mmc0: switch to bus width 4 failed [134509.282638] I/O error, dev mmcblk0, sector 0 op 0x0:(READ) flags 0x0 phys_seg 1 prio class 2 [134509.282657] Buffer I/O error on dev mmcblk0, logical block 0, async page read [134509.284598] I/O error, dev mmcblk0, sector 0 op 0x0:(READ) flags 0x0 phys_seg 1 prio class 2 [134509.284602] Buffer I/O error on dev mmcblk0, logical block 0, async page read [134509.284609] ldm_validate_partition_table(): Disk read failed. [134509.286495] I/O error, dev mmcblk0, sector 0 op 0x0:(READ) flags 0x0 phys_seg 1 prio class 2 [134509.286500] Buffer I/O error on dev mmcblk0, logical block 0, async page read [134509.288303] I/O error, dev mmcblk0, sector 0 op 0x0:(READ) flags 0x0 phys_seg 1 prio class 2 [134509.288308] Buffer I/O error on dev mmcblk0, logical block 0, async page read [134509.289540] I/O error, dev mmcblk0, sector 0 op 0x0:(READ) flags 0x0 phys_seg 1 prio class 2 [134509.289544] Buffer I/O error on dev mmcblk0, logical block 0, async page read [134509.289553] mmcblk0: unable to read partition table [134509.289728] mmcblk0boot0: mmc0:0001 BGUF4R 31.9 MiB [134509.290283] mmcblk0boot1: mmc0:0001 BGUF4R 31.9 MiB [134509.294577] I/O error, dev mmcblk0, sector 0 op 0x0:(READ) flags 0x80700 phys_seg 1 prio class 2 [134509.295835] I/O error, dev mmcblk0, sector 0 op 0x0:(READ) flags 0x0 phys_seg 1 prio class 2 [134509.295841] Buffer I/O error on dev mmcblk0, logical block 0, async page read After patch: [134551.089613] mmc0: switch to bus width 4 failed [134551.090377] mmc0: new high speed MMC card at address 0001 [134551.102271] mmcblk0: mmc0:0001 BGUF4R 29.1 GiB [134551.113365] mmcblk0: p1 p2 p3 p4 p5 p6 p7 p8 p9 p10 p11 p12 p13 p14 p15 p16 p17 p18 p19 p20 p21 [134551.114262] mmcblk0boot0: mmc0:0001 BGUF4R 31.9 MiB [134551.114925] mmcblk0boot1: mmc0:0001 BGUF4R 31.9 MiB Fixes: 577fb13199b1 ("mmc: rework selection of bus speed mode") Cc: stable@vger.kernel.org Signed-off-by: Ivan Semenov Link: https://lore.kernel.org/r/20240206172845.34316-1-ivan@semenov.dev Signed-off-by: Ulf Hansson --- drivers/mmc/core/mmc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c index f410bee50132..58ed7193a3ca 100644 --- a/drivers/mmc/core/mmc.c +++ b/drivers/mmc/core/mmc.c @@ -1015,10 +1015,12 @@ static int mmc_select_bus_width(struct mmc_card *card) static unsigned ext_csd_bits[] = { EXT_CSD_BUS_WIDTH_8, EXT_CSD_BUS_WIDTH_4, + EXT_CSD_BUS_WIDTH_1, }; static unsigned bus_widths[] = { MMC_BUS_WIDTH_8, MMC_BUS_WIDTH_4, + MMC_BUS_WIDTH_1, }; struct mmc_host *host = card->host; unsigned idx, bus_width = 0; From 6b1ba3f9040be5efc4396d86c9752cdc564730be Mon Sep 17 00:00:00 2001 From: Christophe Kerello Date: Wed, 7 Feb 2024 15:39:51 +0100 Subject: [PATCH 023/225] mmc: mmci: stm32: fix DMA API overlapping mappings warning Turning on CONFIG_DMA_API_DEBUG_SG results in the following warning: DMA-API: mmci-pl18x 48220000.mmc: cacheline tracking EEXIST, overlapping mappings aren't supported WARNING: CPU: 1 PID: 51 at kernel/dma/debug.c:568 add_dma_entry+0x234/0x2f4 Modules linked in: CPU: 1 PID: 51 Comm: kworker/1:2 Not tainted 6.1.28 #1 Hardware name: STMicroelectronics STM32MP257F-EV1 Evaluation Board (DT) Workqueue: events_freezable mmc_rescan Call trace: add_dma_entry+0x234/0x2f4 debug_dma_map_sg+0x198/0x350 __dma_map_sg_attrs+0xa0/0x110 dma_map_sg_attrs+0x10/0x2c sdmmc_idma_prep_data+0x80/0xc0 mmci_prep_data+0x38/0x84 mmci_start_data+0x108/0x2dc mmci_request+0xe4/0x190 __mmc_start_request+0x68/0x140 mmc_start_request+0x94/0xc0 mmc_wait_for_req+0x70/0x100 mmc_send_tuning+0x108/0x1ac sdmmc_execute_tuning+0x14c/0x210 mmc_execute_tuning+0x48/0xec mmc_sd_init_uhs_card.part.0+0x208/0x464 mmc_sd_init_card+0x318/0x89c mmc_attach_sd+0xe4/0x180 mmc_rescan+0x244/0x320 DMA API debug brings to light leaking dma-mappings as dma_map_sg and dma_unmap_sg are not correctly balanced. If an error occurs in mmci_cmd_irq function, only mmci_dma_error function is called and as this API is not managed on stm32 variant, dma_unmap_sg is never called in this error path. Signed-off-by: Christophe Kerello Fixes: 46b723dd867d ("mmc: mmci: add stm32 sdmmc variant") Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20240207143951.938144-1-christophe.kerello@foss.st.com Signed-off-by: Ulf Hansson --- drivers/mmc/host/mmci_stm32_sdmmc.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/drivers/mmc/host/mmci_stm32_sdmmc.c b/drivers/mmc/host/mmci_stm32_sdmmc.c index 35067e1e6cd8..f5da7f9baa52 100644 --- a/drivers/mmc/host/mmci_stm32_sdmmc.c +++ b/drivers/mmc/host/mmci_stm32_sdmmc.c @@ -225,6 +225,8 @@ static int sdmmc_idma_start(struct mmci_host *host, unsigned int *datactrl) struct scatterlist *sg; int i; + host->dma_in_progress = true; + if (!host->variant->dma_lli || data->sg_len == 1 || idma->use_bounce_buffer) { u32 dma_addr; @@ -263,9 +265,30 @@ static int sdmmc_idma_start(struct mmci_host *host, unsigned int *datactrl) return 0; } +static void sdmmc_idma_error(struct mmci_host *host) +{ + struct mmc_data *data = host->data; + struct sdmmc_idma *idma = host->dma_priv; + + if (!dma_inprogress(host)) + return; + + writel_relaxed(0, host->base + MMCI_STM32_IDMACTRLR); + host->dma_in_progress = false; + data->host_cookie = 0; + + if (!idma->use_bounce_buffer) + dma_unmap_sg(mmc_dev(host->mmc), data->sg, data->sg_len, + mmc_get_dma_dir(data)); +} + static void sdmmc_idma_finalize(struct mmci_host *host, struct mmc_data *data) { + if (!dma_inprogress(host)) + return; + writel_relaxed(0, host->base + MMCI_STM32_IDMACTRLR); + host->dma_in_progress = false; if (!data->host_cookie) sdmmc_idma_unprep_data(host, data, 0); @@ -676,6 +699,7 @@ static struct mmci_host_ops sdmmc_variant_ops = { .dma_setup = sdmmc_idma_setup, .dma_start = sdmmc_idma_start, .dma_finalize = sdmmc_idma_finalize, + .dma_error = sdmmc_idma_error, .set_clkreg = mmci_sdmmc_set_clkreg, .set_pwrreg = mmci_sdmmc_set_pwrreg, .busy_complete = sdmmc_busy_complete, From 4c892121d43bc2b45896ca207b54f39a8fa6b852 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Fri, 2 Feb 2024 11:08:12 +0100 Subject: [PATCH 024/225] arm64: tegra: Set the correct PHY mode for MGBE The PHY is configured in 10GBASE-R, so make sure to reflect that in DT. Reviewed-by: Jon Hunter Tested-by: Jon Hunter Signed-off-by: Thierry Reding --- arch/arm64/boot/dts/nvidia/tegra234-p3737-0000+p3701-0000.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/nvidia/tegra234-p3737-0000+p3701-0000.dts b/arch/arm64/boot/dts/nvidia/tegra234-p3737-0000+p3701-0000.dts index ea13c4a7027c..81a82933e350 100644 --- a/arch/arm64/boot/dts/nvidia/tegra234-p3737-0000+p3701-0000.dts +++ b/arch/arm64/boot/dts/nvidia/tegra234-p3737-0000+p3701-0000.dts @@ -175,7 +175,7 @@ status = "okay"; phy-handle = <&mgbe0_phy>; - phy-mode = "usxgmii"; + phy-mode = "10gbase-r"; mdio { #address-cells = <1>; From d4c08d8b23b22807c712208cd05cb047e92e7672 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Tue, 13 Feb 2024 15:38:24 +0200 Subject: [PATCH 025/225] phy: qcom-qmp-usb: fix v3 offsets data The MSM8996 platform has registers setup different to the rest of QMP v3 USB platforms. It has PCS region at 0x600 and no PCS_MISC region, while other platforms have PCS region at 0x800 and PCS_MISC at 0x600. This results in the malfunctioning USB host on some of the platforms. The commit f74c35b630d4 ("phy: qcom-qmp-usb: fix register offsets for ipq8074/ipq6018") fixed the issue for IPQ platforms, but missed the SDM845 which has the same register layout. To simplify future platform addition and to make the driver more future proof, rename qmp_usb_offsets_v3 to qmp_usb_offsets_v3_msm8996 (to mark its peculiarity), rename qmp_usb_offsets_ipq8074 to qmp_usb_offsets_v3 and use it for SDM845 platform. Fixes: 2be22aae6b18 ("phy: qcom-qmp-usb: populate offsets configuration") Signed-off-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20240213133824.2218916-1-dmitry.baryshkov@linaro.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-usb.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c index 6621246e4ddf..5c003988c35d 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c @@ -1556,7 +1556,7 @@ static const char * const qmp_phy_vreg_l[] = { "vdda-phy", "vdda-pll", }; -static const struct qmp_usb_offsets qmp_usb_offsets_ipq8074 = { +static const struct qmp_usb_offsets qmp_usb_offsets_v3 = { .serdes = 0, .pcs = 0x800, .pcs_misc = 0x600, @@ -1572,7 +1572,7 @@ static const struct qmp_usb_offsets qmp_usb_offsets_ipq9574 = { .rx = 0x400, }; -static const struct qmp_usb_offsets qmp_usb_offsets_v3 = { +static const struct qmp_usb_offsets qmp_usb_offsets_v3_msm8996 = { .serdes = 0, .pcs = 0x600, .tx = 0x200, @@ -1624,7 +1624,7 @@ static const struct qmp_usb_offsets qmp_usb_offsets_v7 = { static const struct qmp_phy_cfg ipq6018_usb3phy_cfg = { .lanes = 1, - .offsets = &qmp_usb_offsets_ipq8074, + .offsets = &qmp_usb_offsets_v3, .serdes_tbl = ipq9574_usb3_serdes_tbl, .serdes_tbl_num = ARRAY_SIZE(ipq9574_usb3_serdes_tbl), @@ -1642,7 +1642,7 @@ static const struct qmp_phy_cfg ipq6018_usb3phy_cfg = { static const struct qmp_phy_cfg ipq8074_usb3phy_cfg = { .lanes = 1, - .offsets = &qmp_usb_offsets_ipq8074, + .offsets = &qmp_usb_offsets_v3, .serdes_tbl = ipq8074_usb3_serdes_tbl, .serdes_tbl_num = ARRAY_SIZE(ipq8074_usb3_serdes_tbl), @@ -1678,7 +1678,7 @@ static const struct qmp_phy_cfg ipq9574_usb3phy_cfg = { static const struct qmp_phy_cfg msm8996_usb3phy_cfg = { .lanes = 1, - .offsets = &qmp_usb_offsets_v3, + .offsets = &qmp_usb_offsets_v3_msm8996, .serdes_tbl = msm8996_usb3_serdes_tbl, .serdes_tbl_num = ARRAY_SIZE(msm8996_usb3_serdes_tbl), From ecec7c9f29a7114a3e23a14020b1149ea7dffb4f Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Wed, 14 Feb 2024 18:49:31 -0800 Subject: [PATCH 026/225] dmaengine: idxd: Remove shadow Event Log head stored in idxd head is defined in idxd->evl as a shadow of head in the EVLSTATUS register. There are two issues related to the shadow head: 1. Mismatch between the shadow head and the state of the EVLSTATUS register: If Event Log is supported, upon completion of the Enable Device command, the Event Log head in the variable idxd->evl->head should be cleared to match the state of the EVLSTATUS register. But the variable is not reset currently, leading mismatch between the variable and the register state. The mismatch causes incorrect processing of Event Log entries. 2. Unnecessary shadow head definition: The shadow head is unnecessary as head can be read directly from the EVLSTATUS register. Reading head from the register incurs no additional cost because event log head and tail are always read together and tail is already read directly from the register as required by hardware. Remove the shadow Event Log head stored in idxd->evl to address the mentioned issues. Fixes: 244da66cda35 ("dmaengine: idxd: setup event log configuration") Signed-off-by: Fenghua Yu Reviewed-by: Dave Jiang Link: https://lore.kernel.org/r/20240215024931.1739621-1-fenghua.yu@intel.com Signed-off-by: Vinod Koul --- drivers/dma/idxd/cdev.c | 2 +- drivers/dma/idxd/debugfs.c | 2 +- drivers/dma/idxd/idxd.h | 1 - drivers/dma/idxd/irq.c | 3 +-- 4 files changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/dma/idxd/cdev.c b/drivers/dma/idxd/cdev.c index 77f8885cf407..e5a94a93a3cc 100644 --- a/drivers/dma/idxd/cdev.c +++ b/drivers/dma/idxd/cdev.c @@ -345,7 +345,7 @@ static void idxd_cdev_evl_drain_pasid(struct idxd_wq *wq, u32 pasid) spin_lock(&evl->lock); status.bits = ioread64(idxd->reg_base + IDXD_EVLSTATUS_OFFSET); t = status.tail; - h = evl->head; + h = status.head; size = evl->size; while (h != t) { diff --git a/drivers/dma/idxd/debugfs.c b/drivers/dma/idxd/debugfs.c index 9cfbd9b14c4c..f3f25ee676f3 100644 --- a/drivers/dma/idxd/debugfs.c +++ b/drivers/dma/idxd/debugfs.c @@ -68,9 +68,9 @@ static int debugfs_evl_show(struct seq_file *s, void *d) spin_lock(&evl->lock); - h = evl->head; evl_status.bits = ioread64(idxd->reg_base + IDXD_EVLSTATUS_OFFSET); t = evl_status.tail; + h = evl_status.head; evl_size = evl->size; seq_printf(s, "Event Log head %u tail %u interrupt pending %u\n\n", diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h index 47de3f93ff1e..d0f5db6cf1ed 100644 --- a/drivers/dma/idxd/idxd.h +++ b/drivers/dma/idxd/idxd.h @@ -300,7 +300,6 @@ struct idxd_evl { unsigned int log_size; /* The number of entries in the event log. */ u16 size; - u16 head; unsigned long *bmap; bool batch_fail[IDXD_MAX_BATCH_IDENT]; }; diff --git a/drivers/dma/idxd/irq.c b/drivers/dma/idxd/irq.c index c8a0aa874b11..348aa21389a9 100644 --- a/drivers/dma/idxd/irq.c +++ b/drivers/dma/idxd/irq.c @@ -367,9 +367,9 @@ static void process_evl_entries(struct idxd_device *idxd) /* Clear interrupt pending bit */ iowrite32(evl_status.bits_upper32, idxd->reg_base + IDXD_EVLSTATUS_OFFSET + sizeof(u32)); - h = evl->head; evl_status.bits = ioread64(idxd->reg_base + IDXD_EVLSTATUS_OFFSET); t = evl_status.tail; + h = evl_status.head; size = idxd->evl->size; while (h != t) { @@ -378,7 +378,6 @@ static void process_evl_entries(struct idxd_device *idxd) h = (h + 1) % size; } - evl->head = h; evl_status.head = h; iowrite32(evl_status.bits_lower32, idxd->reg_base + IDXD_EVLSTATUS_OFFSET); spin_unlock(&evl->lock); From a79f949a5ce1d45329d63742c2a995f2b47f9852 Mon Sep 17 00:00:00 2001 From: Frank Li Date: Wed, 7 Feb 2024 14:47:32 -0500 Subject: [PATCH 027/225] dmaengine: fsl-edma: correct max_segment_size setting Correcting the previous setting of 0x3fff to the actual value of 0x7fff. Introduced new macro 'EDMA_TCD_ITER_MASK' for improved code clarity and utilization of FIELD_GET to obtain the accurate maximum value. Cc: stable@vger.kernel.org Fixes: e06748539432 ("dmaengine: fsl-edma: support edma memcpy") Signed-off-by: Frank Li Link: https://lore.kernel.org/r/20240207194733.2112870-1-Frank.Li@nxp.com Signed-off-by: Vinod Koul --- drivers/dma/fsl-edma-common.h | 5 +++-- drivers/dma/fsl-edma-main.c | 4 +++- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/dma/fsl-edma-common.h b/drivers/dma/fsl-edma-common.h index bb5221158a77..f5e216b157c7 100644 --- a/drivers/dma/fsl-edma-common.h +++ b/drivers/dma/fsl-edma-common.h @@ -30,8 +30,9 @@ #define EDMA_TCD_ATTR_SSIZE(x) (((x) & GENMASK(2, 0)) << 8) #define EDMA_TCD_ATTR_SMOD(x) (((x) & GENMASK(4, 0)) << 11) -#define EDMA_TCD_CITER_CITER(x) ((x) & GENMASK(14, 0)) -#define EDMA_TCD_BITER_BITER(x) ((x) & GENMASK(14, 0)) +#define EDMA_TCD_ITER_MASK GENMASK(14, 0) +#define EDMA_TCD_CITER_CITER(x) ((x) & EDMA_TCD_ITER_MASK) +#define EDMA_TCD_BITER_BITER(x) ((x) & EDMA_TCD_ITER_MASK) #define EDMA_TCD_CSR_START BIT(0) #define EDMA_TCD_CSR_INT_MAJOR BIT(1) diff --git a/drivers/dma/fsl-edma-main.c b/drivers/dma/fsl-edma-main.c index 45cc419b1b4a..d36e28b9c767 100644 --- a/drivers/dma/fsl-edma-main.c +++ b/drivers/dma/fsl-edma-main.c @@ -10,6 +10,7 @@ */ #include +#include #include #include #include @@ -582,7 +583,8 @@ static int fsl_edma_probe(struct platform_device *pdev) DMAENGINE_ALIGN_32_BYTES; /* Per worst case 'nbytes = 1' take CITER as the max_seg_size */ - dma_set_max_seg_size(fsl_edma->dma_dev.dev, 0x3fff); + dma_set_max_seg_size(fsl_edma->dma_dev.dev, + FIELD_GET(EDMA_TCD_ITER_MASK, EDMA_TCD_ITER_MASK)); fsl_edma->dma_dev.residue_granularity = DMA_RESIDUE_GRANULARITY_SEGMENT; From 85445b96429057d87446bcb24ec0cac9ea9c7fdf Mon Sep 17 00:00:00 2001 From: Coiby Xu Date: Tue, 9 Jan 2024 08:24:28 +0800 Subject: [PATCH 028/225] integrity: eliminate unnecessary "Problem loading X.509 certificate" msg Currently when the kernel fails to add a cert to the .machine keyring, it will throw an error immediately in the function integrity_add_key. Since the kernel will try adding to the .platform keyring next or throw an error (in the caller of integrity_add_key i.e. add_to_machine_keyring), so there is no need to throw an error immediately in integrity_add_key. Reported-by: itrymybest80@protonmail.com Closes: https://bugzilla.redhat.com/show_bug.cgi?id=2239331 Fixes: d19967764ba8 ("integrity: Introduce a Linux keyring called machine") Reviewed-by: Eric Snowberg Signed-off-by: Coiby Xu Signed-off-by: Mimi Zohar --- security/integrity/digsig.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/security/integrity/digsig.c b/security/integrity/digsig.c index df387de29bfa..45c3e5dda355 100644 --- a/security/integrity/digsig.c +++ b/security/integrity/digsig.c @@ -179,7 +179,8 @@ static int __init integrity_add_key(const unsigned int id, const void *data, KEY_ALLOC_NOT_IN_QUOTA); if (IS_ERR(key)) { rc = PTR_ERR(key); - pr_err("Problem loading X.509 certificate %d\n", rc); + if (id != INTEGRITY_KEYRING_MACHINE) + pr_err("Problem loading X.509 certificate %d\n", rc); } else { pr_notice("Loaded X.509 cert '%s'\n", key_ref_to_ptr(key)->description); From 2df70149e73e79783bcbc7db4fa51ecef0e2022c Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Thu, 15 Feb 2024 16:51:33 +0100 Subject: [PATCH 029/225] power: supply: bq27xxx-i2c: Do not free non existing IRQ The bq27xxx i2c-client may not have an IRQ, in which case client->irq will be 0. bq27xxx_battery_i2c_probe() already has an if (client->irq) check wrapping the request_threaded_irq(). But bq27xxx_battery_i2c_remove() unconditionally calls free_irq(client->irq) leading to: [ 190.310742] ------------[ cut here ]------------ [ 190.310843] Trying to free already-free IRQ 0 [ 190.310861] WARNING: CPU: 2 PID: 1304 at kernel/irq/manage.c:1893 free_irq+0x1b8/0x310 Followed by a backtrace when unbinding the driver. Add an if (client->irq) to bq27xxx_battery_i2c_remove() mirroring probe() to fix this. Fixes: 444ff00734f3 ("power: supply: bq27xxx: Fix I2C IRQ race on remove") Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20240215155133.70537-1-hdegoede@redhat.com Signed-off-by: Sebastian Reichel --- drivers/power/supply/bq27xxx_battery_i2c.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/power/supply/bq27xxx_battery_i2c.c b/drivers/power/supply/bq27xxx_battery_i2c.c index 3a1798b0c1a7..9910c600743e 100644 --- a/drivers/power/supply/bq27xxx_battery_i2c.c +++ b/drivers/power/supply/bq27xxx_battery_i2c.c @@ -209,7 +209,9 @@ static void bq27xxx_battery_i2c_remove(struct i2c_client *client) { struct bq27xxx_device_info *di = i2c_get_clientdata(client); - free_irq(client->irq, di); + if (client->irq) + free_irq(client->irq, di); + bq27xxx_battery_teardown(di); mutex_lock(&battery_mutex); From 0ee695a471a750cad4fff22286d91e038b1ef62f Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Thu, 25 Jan 2024 10:32:11 -0700 Subject: [PATCH 030/225] kbuild: Add -Wa,--fatal-warnings to as-instr invocation Certain assembler instruction tests may only induce warnings from the assembler on an unsupported instruction or option, which causes as-instr to succeed when it was expected to fail. Some tests workaround this limitation by additionally testing that invalid input fails as expected. However, this is fragile if the assembler is changed to accept the invalid input, as it will cause the instruction/option to be unavailable like it was unsupported even when it is. Use '-Wa,--fatal-warnings' in the as-instr macro to turn these warnings into hard errors, which avoids this fragility and makes tests more robust and well formed. Cc: stable@vger.kernel.org Suggested-by: Eric Biggers Signed-off-by: Nathan Chancellor Tested-by: Eric Biggers Tested-by: Andy Chiu Reviewed-by: Andy Chiu Tested-by: Conor Dooley Reviewed-by: Conor Dooley Acked-by: Masahiro Yamada Link: https://lore.kernel.org/r/20240125-fix-riscv-option-arch-llvm-18-v1-1-390ac9cc3cd0@kernel.org Signed-off-by: Palmer Dabbelt --- scripts/Kconfig.include | 2 +- scripts/Makefile.compiler | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/Kconfig.include b/scripts/Kconfig.include index 5a84b6443875..3ee8ecfb8c04 100644 --- a/scripts/Kconfig.include +++ b/scripts/Kconfig.include @@ -33,7 +33,7 @@ ld-option = $(success,$(LD) -v $(1)) # $(as-instr,) # Return y if the assembler supports , n otherwise -as-instr = $(success,printf "%b\n" "$(1)" | $(CC) $(CLANG_FLAGS) -c -x assembler-with-cpp -o /dev/null -) +as-instr = $(success,printf "%b\n" "$(1)" | $(CC) $(CLANG_FLAGS) -Wa$(comma)--fatal-warnings -c -x assembler-with-cpp -o /dev/null -) # check if $(CC) and $(LD) exist $(error-if,$(failure,command -v $(CC)),C compiler '$(CC)' not found) diff --git a/scripts/Makefile.compiler b/scripts/Makefile.compiler index 8fcb427405a6..92be0c9a13ee 100644 --- a/scripts/Makefile.compiler +++ b/scripts/Makefile.compiler @@ -38,7 +38,7 @@ as-option = $(call try-run,\ # Usage: aflags-y += $(call as-instr,instr,option1,option2) as-instr = $(call try-run,\ - printf "%b\n" "$(1)" | $(CC) -Werror $(CLANG_FLAGS) $(KBUILD_AFLAGS) -c -x assembler-with-cpp -o "$$TMP" -,$(2),$(3)) + printf "%b\n" "$(1)" | $(CC) -Werror $(CLANG_FLAGS) $(KBUILD_AFLAGS) -Wa$(comma)--fatal-warnings -c -x assembler-with-cpp -o "$$TMP" -,$(2),$(3)) # __cc-option # Usage: MY_CFLAGS += $(call __cc-option,$(CC),$(MY_CFLAGS),-march=winchip-c6,-march=i586) From 3aff0c459e77ac0fb1c4d6884433467f797f7357 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Thu, 25 Jan 2024 10:32:12 -0700 Subject: [PATCH 031/225] RISC-V: Drop invalid test from CONFIG_AS_HAS_OPTION_ARCH Commit e4bb020f3dbb ("riscv: detect assembler support for .option arch") added two tests, one for a valid value to '.option arch' that should succeed and one for an invalid value that is expected to fail to make sure that support for '.option arch' is properly detected because Clang does not error when '.option arch' is not supported: $ clang --target=riscv64-linux-gnu -Werror -x assembler -c -o /dev/null <(echo '.option arch, +m') /dev/fd/63:1:9: warning: unknown option, expected 'push', 'pop', 'rvc', 'norvc', 'relax' or 'norelax' .option arch, +m ^ $ echo $? 0 Unfortunately, the invalid test started being accepted by Clang after the linked llvm-project change, which causes CONFIG_AS_HAS_OPTION_ARCH and configurations that depend on it to be silently disabled, even though those versions do support '.option arch'. The invalid test can be avoided altogether by using '-Wa,--fatal-warnings', which will turn all assembler warnings into errors, like '-Werror' does for the compiler: $ clang --target=riscv64-linux-gnu -Werror -Wa,--fatal-warnings -x assembler -c -o /dev/null <(echo '.option arch, +m') /dev/fd/63:1:9: error: unknown option, expected 'push', 'pop', 'rvc', 'norvc', 'relax' or 'norelax' .option arch, +m ^ $ echo $? 1 The as-instr macros have been updated to make use of this flag, so remove the invalid test, which allows CONFIG_AS_HAS_OPTION_ARCH to work for all compiler versions. Cc: stable@vger.kernel.org Fixes: e4bb020f3dbb ("riscv: detect assembler support for .option arch") Link: https://github.com/llvm/llvm-project/commit/3ac9fe69f70a2b3541266daedbaaa7dc9c007a2a Reported-by: Eric Biggers Closes: https://lore.kernel.org/r/20240121011341.GA97368@sol.localdomain/ Signed-off-by: Nathan Chancellor Tested-by: Eric Biggers Tested-by: Andy Chiu Reviewed-by: Andy Chiu Tested-by: Conor Dooley Reviewed-by: Conor Dooley Acked-by: Masahiro Yamada Link: https://lore.kernel.org/r/20240125-fix-riscv-option-arch-llvm-18-v1-2-390ac9cc3cd0@kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index bffbd869a068..e3142ce531a0 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -315,7 +315,6 @@ config AS_HAS_OPTION_ARCH # https://reviews.llvm.org/D123515 def_bool y depends on $(as-instr, .option arch$(comma) +m) - depends on !$(as-instr, .option arch$(comma) -i) source "arch/riscv/Kconfig.socs" source "arch/riscv/Kconfig.errata" From 3a7845041eb7235f2fb00ef0960995da5be63b11 Mon Sep 17 00:00:00 2001 From: Yuezhang Mo Date: Fri, 16 Feb 2024 20:19:55 +0800 Subject: [PATCH 032/225] exfat: fix appending discontinuous clusters to empty file Eric Hong found that when using ftruncate to expand an empty file, exfat_ent_set() will fail if discontinuous clusters are allocated. The reason is that the empty file does not have a cluster chain, but exfat_ent_set() attempts to append the newly allocated cluster to the cluster chain. In addition, exfat_find_last_cluster() only supports finding the last cluster in a non-empty file. So this commit adds a check whether the file is empty. If the file is empty, exfat_find_last_cluster() and exfat_ent_set() are no longer called as they do not need to be called. Fixes: f55c096f62f1 ("exfat: do not zero the extended part") Reported-by: Eric Hong Signed-off-by: Yuezhang Mo Signed-off-by: Namjae Jeon --- fs/exfat/file.c | 35 +++++++++++++++++++++-------------- 1 file changed, 21 insertions(+), 14 deletions(-) diff --git a/fs/exfat/file.c b/fs/exfat/file.c index d25a96a148af..cc00f1a7a1e1 100644 --- a/fs/exfat/file.c +++ b/fs/exfat/file.c @@ -35,13 +35,18 @@ static int exfat_cont_expand(struct inode *inode, loff_t size) if (new_num_clusters == num_clusters) goto out; - exfat_chain_set(&clu, ei->start_clu, num_clusters, ei->flags); - ret = exfat_find_last_cluster(sb, &clu, &last_clu); - if (ret) - return ret; + if (num_clusters) { + exfat_chain_set(&clu, ei->start_clu, num_clusters, ei->flags); + ret = exfat_find_last_cluster(sb, &clu, &last_clu); + if (ret) + return ret; + + clu.dir = last_clu + 1; + } else { + last_clu = EXFAT_EOF_CLUSTER; + clu.dir = EXFAT_EOF_CLUSTER; + } - clu.dir = (last_clu == EXFAT_EOF_CLUSTER) ? - EXFAT_EOF_CLUSTER : last_clu + 1; clu.size = 0; clu.flags = ei->flags; @@ -51,17 +56,19 @@ static int exfat_cont_expand(struct inode *inode, loff_t size) return ret; /* Append new clusters to chain */ - if (clu.flags != ei->flags) { - exfat_chain_cont_cluster(sb, ei->start_clu, num_clusters); - ei->flags = ALLOC_FAT_CHAIN; - } - if (clu.flags == ALLOC_FAT_CHAIN) - if (exfat_ent_set(sb, last_clu, clu.dir)) - goto free_clu; + if (num_clusters) { + if (clu.flags != ei->flags) + if (exfat_chain_cont_cluster(sb, ei->start_clu, num_clusters)) + goto free_clu; - if (num_clusters == 0) + if (clu.flags == ALLOC_FAT_CHAIN) + if (exfat_ent_set(sb, last_clu, clu.dir)) + goto free_clu; + } else ei->start_clu = clu.dir; + ei->flags = clu.flags; + out: inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode)); /* Expanded range not zeroed, do not update valid_size */ From 77ce96543b03f437c6b45f286d8110db2b6622a3 Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Sun, 18 Feb 2024 12:30:26 +0900 Subject: [PATCH 033/225] ALSA: firewire-lib: fix to check cycle continuity The local helper function to compare the given pair of cycle count evaluates them. If the left value is less than the right value, the function returns negative value. If the safe cycle is less than the current cycle, it is the case of cycle lost. However, it is not currently handled properly. This commit fixes the bug. Cc: Fixes: 705794c53b00 ("ALSA: firewire-lib: check cycle continuity") Signed-off-by: Takashi Sakamoto Link: https://lore.kernel.org/r/20240218033026.72577-1-o-takashi@sakamocchi.jp Signed-off-by: Takashi Iwai --- sound/firewire/amdtp-stream.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/firewire/amdtp-stream.c b/sound/firewire/amdtp-stream.c index a13c0b408aad..7be17bca257f 100644 --- a/sound/firewire/amdtp-stream.c +++ b/sound/firewire/amdtp-stream.c @@ -951,7 +951,7 @@ static int generate_tx_packet_descs(struct amdtp_stream *s, struct pkt_desc *des // to the reason. unsigned int safe_cycle = increment_ohci_cycle_count(next_cycle, IR_JUMBO_PAYLOAD_MAX_SKIP_CYCLES); - lost = (compare_ohci_cycle_count(safe_cycle, cycle) > 0); + lost = (compare_ohci_cycle_count(safe_cycle, cycle) < 0); } if (lost) { dev_err(&s->unit->device, "Detect discontinuity of cycle: %d %d\n", From 89a0dff6105e06067bdc57595982dbf6d6dd4959 Mon Sep 17 00:00:00 2001 From: Jay Ajit Mate Date: Mon, 19 Feb 2024 15:34:04 +0530 Subject: [PATCH 034/225] ALSA: hda/realtek: Fix top speaker connection on Dell Inspiron 16 Plus 7630 The Dell Inspiron 16 Plus 7630, similar to its predecessors (7620 models), experiences an issue with unconnected top speakers. Since the controller remains unchanged, this commit addresses the problem by correctly connecting the speakers on NID 0X17 to the DAC on NIC 0x03. Signed-off-by: Jay Ajit Mate Cc: Link: https://lore.kernel.org/r/20240219100404.9573-1-jay.mate15@gmail.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 0ec1312bffd5..aaf83ecac90f 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -9746,6 +9746,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1028, 0x0c1c, "Dell Precision 3540", ALC236_FIXUP_DELL_DUAL_CODECS), SND_PCI_QUIRK(0x1028, 0x0c1d, "Dell Precision 3440", ALC236_FIXUP_DELL_DUAL_CODECS), SND_PCI_QUIRK(0x1028, 0x0c1e, "Dell Precision 3540", ALC236_FIXUP_DELL_DUAL_CODECS), + SND_PCI_QUIRK(0x1028, 0x0c28, "Dell Inspiron 16 Plus 7630", ALC295_FIXUP_DELL_INSPIRON_TOP_SPEAKERS), SND_PCI_QUIRK(0x1028, 0x0c4d, "Dell", ALC287_FIXUP_CS35L41_I2C_4), SND_PCI_QUIRK(0x1028, 0x0cbd, "Dell Oasis 13 CS MTL-U", ALC289_FIXUP_DELL_CS35L41_SPI_2), SND_PCI_QUIRK(0x1028, 0x0cbe, "Dell Oasis 13 2-IN-1 MTL-U", ALC289_FIXUP_DELL_CS35L41_SPI_2), From e33625c84b75e4f078d7f9bf58f01fe71ab99642 Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Fri, 16 Feb 2024 14:05:35 +0000 Subject: [PATCH 035/225] ASoC: cs35l56: Must clear HALO_STATE before issuing SYSTEM_RESET The driver must write 0 to HALO_STATE before sending the SYSTEM_RESET command to the firmware. HALO_STATE is in DSP memory, which is preserved across a soft reset. The SYSTEM_RESET command does not change the value of HALO_STATE. There is period of time while the CS35L56 is resetting, before the firmware has started to boot, where a read of HALO_STATE will return the value it had before the SYSTEM_RESET. If the driver does not clear HALO_STATE, this would return BOOT_DONE status even though the firmware has not booted. Signed-off-by: Richard Fitzgerald Fixes: 8a731fd37f8b ("ASoC: cs35l56: Move utility functions to shared file") Link: https://msgid.link/r/20240216140535.1434933-1-rf@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/codecs/cs35l56-shared.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/codecs/cs35l56-shared.c b/sound/soc/codecs/cs35l56-shared.c index 995d979b6d87..cb4e83126b08 100644 --- a/sound/soc/codecs/cs35l56-shared.c +++ b/sound/soc/codecs/cs35l56-shared.c @@ -335,6 +335,7 @@ void cs35l56_wait_min_reset_pulse(void) EXPORT_SYMBOL_NS_GPL(cs35l56_wait_min_reset_pulse, SND_SOC_CS35L56_SHARED); static const struct reg_sequence cs35l56_system_reset_seq[] = { + REG_SEQ0(CS35L56_DSP1_HALO_STATE, 0), REG_SEQ0(CS35L56_DSP_VIRTUAL1_MBOX_1, CS35L56_MBOX_CMD_SYSTEM_RESET), }; From 1fdf4e8be7059e7784fec11d30cd32784f0bdc83 Mon Sep 17 00:00:00 2001 From: Hans Peter Date: Mon, 19 Feb 2024 17:38:49 +0100 Subject: [PATCH 036/225] ALSA: hda/realtek: Enable Mute LED on HP 840 G8 (MB 8AB8) On my EliteBook 840 G8 Notebook PC (ProdId 5S7R6EC#ABD; built 2022 for german market) the Mute LED is always on. The mute button itself works as expected. alsa-info.sh shows a different subsystem-id 0x8ab9 for Realtek ALC285 Codec, thus the existing quirks for HP 840 G8 don't work. Therefore, add a new quirk for this type of EliteBook. Signed-off-by: Hans Peter Cc: Link: https://lore.kernel.org/r/20240219164518.4099-1-flurry123@gmx.ch Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index aaf83ecac90f..3b7f1d9816f6 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -9928,6 +9928,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8aa3, "HP ProBook 450 G9 (MB 8AA1)", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8aa8, "HP EliteBook 640 G9 (MB 8AA6)", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8aab, "HP EliteBook 650 G9 (MB 8AA9)", ALC236_FIXUP_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8ab9, "HP EliteBook 840 G8 (MB 8AB8)", ALC285_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8abb, "HP ZBook Firefly 14 G9", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8ad1, "HP EliteBook 840 14 inch G9 Notebook PC", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8ad2, "HP EliteBook 860 16 inch G9 Notebook PC", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), From 49cbb7b7d36ec3ba73ce1daf7ae1d71d435453b8 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 20 Feb 2024 16:08:43 +0100 Subject: [PATCH 037/225] ALSA: ump: Fix the discard error code from snd_ump_legacy_open() snd_ump_legacy_open() didn't return the error code properly even if it couldn't open. Fix it. Fixes: 0b5288f5fe63 ("ALSA: ump: Add legacy raw MIDI support") Cc: Link: https://lore.kernel.org/r/20240220150843.28630-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/core/ump.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/core/ump.c b/sound/core/ump.c index 3bef1944e955..fe7911498cc4 100644 --- a/sound/core/ump.c +++ b/sound/core/ump.c @@ -985,7 +985,7 @@ static int snd_ump_legacy_open(struct snd_rawmidi_substream *substream) struct snd_ump_endpoint *ump = substream->rmidi->private_data; int dir = substream->stream; int group = ump->legacy_mapping[substream->number]; - int err; + int err = 0; mutex_lock(&ump->open_mutex); if (ump->legacy_substreams[dir][group]) { @@ -1009,7 +1009,7 @@ static int snd_ump_legacy_open(struct snd_rawmidi_substream *substream) spin_unlock_irq(&ump->legacy_locks[dir]); unlock: mutex_unlock(&ump->open_mutex); - return 0; + return err; } static int snd_ump_legacy_close(struct snd_rawmidi_substream *substream) From 4df49712eb54141be00a9312547436d55677f092 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 21 Feb 2024 10:21:56 +0100 Subject: [PATCH 038/225] ALSA: Drop leftover snd-rtctimer stuff from Makefile We forgot to remove the line for snd-rtctimer from Makefile while dropping the functionality. Get rid of the stale line. Fixes: 34ce71a96dcb ("ALSA: timer: remove legacy rtctimer") Link: https://lore.kernel.org/r/20240221092156.28695-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/core/Makefile | 1 - 1 file changed, 1 deletion(-) diff --git a/sound/core/Makefile b/sound/core/Makefile index a6b444ee2832..f6526b337137 100644 --- a/sound/core/Makefile +++ b/sound/core/Makefile @@ -32,7 +32,6 @@ snd-ump-objs := ump.o snd-ump-$(CONFIG_SND_UMP_LEGACY_RAWMIDI) += ump_convert.o snd-timer-objs := timer.o snd-hrtimer-objs := hrtimer.o -snd-rtctimer-objs := rtctimer.o snd-hwdep-objs := hwdep.o snd-seq-device-objs := seq_device.o From 67c3d7717efbd46092f217b1f811df1b205cce06 Mon Sep 17 00:00:00 2001 From: Eniac Zhang Date: Tue, 20 Feb 2024 17:58:12 +0000 Subject: [PATCH 039/225] ALSA: hda/realtek: fix mute/micmute LED For HP mt440 The HP mt440 Thin Client uses an ALC236 codec and needs the ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF quirk to make the mute and micmute LEDs work. There are two variants of the USB-C PD chip on this device. Each uses a different BIOS and board ID, hence the two entries. Signed-off-by: Eniac Zhang Signed-off-by: Alexandru Gagniuc Cc: Link: https://lore.kernel.org/r/20240220175812.782687-1-alexandru.gagniuc@hp.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 3b7f1d9816f6..d9a689ed424c 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -9903,6 +9903,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8973, "HP EliteBook 860 G9", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8974, "HP EliteBook 840 Aero G9", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8975, "HP EliteBook x360 840 Aero G9", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x897d, "HP mt440 Mobile Thin Client U74", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8981, "HP Elite Dragonfly G3", ALC245_FIXUP_CS35L41_SPI_4), SND_PCI_QUIRK(0x103c, 0x898e, "HP EliteBook 835 G9", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x103c, 0x898f, "HP EliteBook 835 G9", ALC287_FIXUP_CS35L41_I2C_2), @@ -9934,6 +9935,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8ad2, "HP EliteBook 860 16 inch G9 Notebook PC", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8b0f, "HP Elite mt645 G7 Mobile Thin Client U81", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), SND_PCI_QUIRK(0x103c, 0x8b2f, "HP 255 15.6 inch G10 Notebook PC", ALC236_FIXUP_HP_MUTE_LED_COEFBIT2), + SND_PCI_QUIRK(0x103c, 0x8b3f, "HP mt440 Mobile Thin Client U91", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8b42, "HP", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8b43, "HP", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8b44, "HP", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), From 1382d8b55129875b2e07c4d2a7ebc790183769ee Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 21 Feb 2024 13:48:04 +0000 Subject: [PATCH 040/225] ASoC: qcom: Fix uninitialized pointer dmactl In the case where __lpass_get_dmactl_handle is called and the driver id dai_id is invalid the pointer dmactl is not being assigned a value, and dmactl contains a garbage value since it has not been initialized and so the null check may not work. Fix this to initialize dmactl to NULL. One could argue that modern compilers will set this to zero, but it is useful to keep this initialized as per the same way in functions __lpass_platform_codec_intf_init and lpass_cdc_dma_daiops_hw_params. Cleans up clang scan build warning: sound/soc/qcom/lpass-cdc-dma.c:275:7: warning: Branch condition evaluates to a garbage value [core.uninitialized.Branch] Fixes: b81af585ea54 ("ASoC: qcom: Add lpass CPU driver for codec dma control") Signed-off-by: Colin Ian King Link: https://msgid.link/r/20240221134804.3475989-1-colin.i.king@gmail.com Signed-off-by: Mark Brown --- sound/soc/qcom/lpass-cdc-dma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/qcom/lpass-cdc-dma.c b/sound/soc/qcom/lpass-cdc-dma.c index 48b03e60e3a3..8106c586f68a 100644 --- a/sound/soc/qcom/lpass-cdc-dma.c +++ b/sound/soc/qcom/lpass-cdc-dma.c @@ -259,7 +259,7 @@ static int lpass_cdc_dma_daiops_trigger(struct snd_pcm_substream *substream, int cmd, struct snd_soc_dai *dai) { struct snd_soc_pcm_runtime *soc_runtime = snd_soc_substream_to_rtd(substream); - struct lpaif_dmactl *dmactl; + struct lpaif_dmactl *dmactl = NULL; int ret = 0, id; switch (cmd) { From d3ea125df37dc37972d581b74a5d3785c3f283ab Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Fri, 9 Feb 2024 11:14:12 -0800 Subject: [PATCH 041/225] dmaengine: idxd: Ensure safe user copy of completion record If CONFIG_HARDENED_USERCOPY is enabled, copying completion record from event log cache to user triggers a kernel bug. [ 1987.159822] usercopy: Kernel memory exposure attempt detected from SLUB object 'dsa0' (offset 74, size 31)! [ 1987.170845] ------------[ cut here ]------------ [ 1987.176086] kernel BUG at mm/usercopy.c:102! [ 1987.180946] invalid opcode: 0000 [#1] PREEMPT SMP NOPTI [ 1987.186866] CPU: 17 PID: 528 Comm: kworker/17:1 Not tainted 6.8.0-rc2+ #5 [ 1987.194537] Hardware name: Intel Corporation AvenueCity/AvenueCity, BIOS BHSDCRB1.86B.2492.D03.2307181620 07/18/2023 [ 1987.206405] Workqueue: wq0.0 idxd_evl_fault_work [idxd] [ 1987.212338] RIP: 0010:usercopy_abort+0x72/0x90 [ 1987.217381] Code: 58 65 9c 50 48 c7 c2 17 85 61 9c 57 48 c7 c7 98 fd 6b 9c 48 0f 44 d6 48 c7 c6 b3 08 62 9c 4c 89 d1 49 0f 44 f3 e8 1e 2e d5 ff <0f> 0b 49 c7 c1 9e 42 61 9c 4c 89 cf 4d 89 c8 eb a9 66 66 2e 0f 1f [ 1987.238505] RSP: 0018:ff62f5cf20607d60 EFLAGS: 00010246 [ 1987.244423] RAX: 000000000000005f RBX: 000000000000001f RCX: 0000000000000000 [ 1987.252480] RDX: 0000000000000000 RSI: ffffffff9c61429e RDI: 00000000ffffffff [ 1987.260538] RBP: ff62f5cf20607d78 R08: ff2a6a89ef3fffe8 R09: 00000000fffeffff [ 1987.268595] R10: ff2a6a89eed00000 R11: 0000000000000003 R12: ff2a66934849c89a [ 1987.276652] R13: 0000000000000001 R14: ff2a66934849c8b9 R15: ff2a66934849c899 [ 1987.284710] FS: 0000000000000000(0000) GS:ff2a66b22fe40000(0000) knlGS:0000000000000000 [ 1987.293850] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 1987.300355] CR2: 00007fe291a37000 CR3: 000000010fbd4005 CR4: 0000000000f71ef0 [ 1987.308413] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 1987.316470] DR3: 0000000000000000 DR6: 00000000fffe07f0 DR7: 0000000000000400 [ 1987.324527] PKRU: 55555554 [ 1987.327622] Call Trace: [ 1987.330424] [ 1987.332826] ? show_regs+0x6e/0x80 [ 1987.336703] ? die+0x3c/0xa0 [ 1987.339988] ? do_trap+0xd4/0xf0 [ 1987.343662] ? do_error_trap+0x75/0xa0 [ 1987.347922] ? usercopy_abort+0x72/0x90 [ 1987.352277] ? exc_invalid_op+0x57/0x80 [ 1987.356634] ? usercopy_abort+0x72/0x90 [ 1987.360988] ? asm_exc_invalid_op+0x1f/0x30 [ 1987.365734] ? usercopy_abort+0x72/0x90 [ 1987.370088] __check_heap_object+0xb7/0xd0 [ 1987.374739] __check_object_size+0x175/0x2d0 [ 1987.379588] idxd_copy_cr+0xa9/0x130 [idxd] [ 1987.384341] idxd_evl_fault_work+0x127/0x390 [idxd] [ 1987.389878] process_one_work+0x13e/0x300 [ 1987.394435] ? __pfx_worker_thread+0x10/0x10 [ 1987.399284] worker_thread+0x2f7/0x420 [ 1987.403544] ? _raw_spin_unlock_irqrestore+0x2b/0x50 [ 1987.409171] ? __pfx_worker_thread+0x10/0x10 [ 1987.414019] kthread+0x107/0x140 [ 1987.417693] ? __pfx_kthread+0x10/0x10 [ 1987.421954] ret_from_fork+0x3d/0x60 [ 1987.426019] ? __pfx_kthread+0x10/0x10 [ 1987.430281] ret_from_fork_asm+0x1b/0x30 [ 1987.434744] The issue arises because event log cache is created using kmem_cache_create() which is not suitable for user copy. Fix the issue by creating event log cache with kmem_cache_create_usercopy(), ensuring safe user copy. Fixes: c2f156bf168f ("dmaengine: idxd: create kmem cache for event log fault items") Reported-by: Tony Zhu Tested-by: Tony Zhu Signed-off-by: Fenghua Yu Reviewed-by: Lijun Pan Reviewed-by: Dave Jiang Link: https://lore.kernel.org/r/20240209191412.1050270-1-fenghua.yu@intel.com Signed-off-by: Vinod Koul --- drivers/dma/idxd/init.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index 14df1f1347a8..4954adc6bb60 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -343,7 +343,9 @@ static void idxd_cleanup_internals(struct idxd_device *idxd) static int idxd_init_evl(struct idxd_device *idxd) { struct device *dev = &idxd->pdev->dev; + unsigned int evl_cache_size; struct idxd_evl *evl; + const char *idxd_name; if (idxd->hw.gen_cap.evl_support == 0) return 0; @@ -355,9 +357,16 @@ static int idxd_init_evl(struct idxd_device *idxd) spin_lock_init(&evl->lock); evl->size = IDXD_EVL_SIZE_MIN; - idxd->evl_cache = kmem_cache_create(dev_name(idxd_confdev(idxd)), - sizeof(struct idxd_evl_fault) + evl_ent_size(idxd), - 0, 0, NULL); + idxd_name = dev_name(idxd_confdev(idxd)); + evl_cache_size = sizeof(struct idxd_evl_fault) + evl_ent_size(idxd); + /* + * Since completion record in evl_cache will be copied to user + * when handling completion record page fault, need to create + * the cache suitable for user copy. + */ + idxd->evl_cache = kmem_cache_create_usercopy(idxd_name, evl_cache_size, + 0, 0, 0, evl_cache_size, + NULL); if (!idxd->evl_cache) { kfree(evl); return -ENOMEM; From ff6bd76f4d997642ef390bffe42e93d6f7be87d3 Mon Sep 17 00:00:00 2001 From: Jon Hunter Date: Fri, 16 Feb 2024 11:57:48 +0000 Subject: [PATCH 042/225] arm64: tegra: Fix Tegra234 MGBE power-domains The MGBE power-domains on Tegra234 are mapped to the MGBE controllers as follows: MGBE0 (0x68000000) --> Power-Domain MGBEB MGBE1 (0x69000000) --> Power-Domain MGBEC MGBE2 (0x6a000000) --> Power-Domain MGBED Update the device-tree nodes for Tegra234 to correct this. Fixes: 610cdf3186bc ("arm64: tegra: Add MGBE nodes on Tegra234") Signed-off-by: Jon Hunter Signed-off-by: Thierry Reding --- arch/arm64/boot/dts/nvidia/tegra234.dtsi | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/boot/dts/nvidia/tegra234.dtsi b/arch/arm64/boot/dts/nvidia/tegra234.dtsi index 3f16595d099c..d1bd328892af 100644 --- a/arch/arm64/boot/dts/nvidia/tegra234.dtsi +++ b/arch/arm64/boot/dts/nvidia/tegra234.dtsi @@ -1459,7 +1459,7 @@ <&mc TEGRA234_MEMORY_CLIENT_MGBEAWR &emc>; interconnect-names = "dma-mem", "write"; iommus = <&smmu_niso0 TEGRA234_SID_MGBE>; - power-domains = <&bpmp TEGRA234_POWER_DOMAIN_MGBEA>; + power-domains = <&bpmp TEGRA234_POWER_DOMAIN_MGBEB>; status = "disabled"; }; @@ -1493,7 +1493,7 @@ <&mc TEGRA234_MEMORY_CLIENT_MGBEBWR &emc>; interconnect-names = "dma-mem", "write"; iommus = <&smmu_niso0 TEGRA234_SID_MGBE_VF1>; - power-domains = <&bpmp TEGRA234_POWER_DOMAIN_MGBEB>; + power-domains = <&bpmp TEGRA234_POWER_DOMAIN_MGBEC>; status = "disabled"; }; @@ -1527,7 +1527,7 @@ <&mc TEGRA234_MEMORY_CLIENT_MGBECWR &emc>; interconnect-names = "dma-mem", "write"; iommus = <&smmu_niso0 TEGRA234_SID_MGBE_VF2>; - power-domains = <&bpmp TEGRA234_POWER_DOMAIN_MGBEC>; + power-domains = <&bpmp TEGRA234_POWER_DOMAIN_MGBED>; status = "disabled"; }; From 1fa8d07ae1a5fa4e87de42c338e8fc27f46d8bb6 Mon Sep 17 00:00:00 2001 From: Mikko Perttunen Date: Thu, 22 Feb 2024 03:05:16 +0200 Subject: [PATCH 043/225] gpu: host1x: Skip reset assert on Tegra186 On Tegra186, secure world applications may need to access host1x during suspend/resume, and rely on the kernel to keep Host1x out of reset during the suspend cycle. As such, as a quirk, skip asserting Host1x's reset on Tegra186. We don't need to keep the clocks enabled, as BPMP ensures the clock stays on while Host1x is being used. On newer SoC's, the reset line is inaccessible, so there is no need for the quirk. Fixes: b7c00cdf6df5 ("gpu: host1x: Enable system suspend callbacks") Signed-off-by: Mikko Perttunen Reviewed-by: Jon Hunter Tested-by: Jon Hunter Signed-off-by: Thierry Reding Link: https://patchwork.freedesktop.org/patch/msgid/20240222010517.1573931-1-cyndis@kapsi.fi --- drivers/gpu/host1x/dev.c | 15 +++++++++------ drivers/gpu/host1x/dev.h | 6 ++++++ 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c index 42fd504abbcd..89983d7d73ca 100644 --- a/drivers/gpu/host1x/dev.c +++ b/drivers/gpu/host1x/dev.c @@ -169,6 +169,7 @@ static const struct host1x_info host1x06_info = { .num_sid_entries = ARRAY_SIZE(tegra186_sid_table), .sid_table = tegra186_sid_table, .reserve_vblank_syncpts = false, + .skip_reset_assert = true, }; static const struct host1x_sid_entry tegra194_sid_table[] = { @@ -680,13 +681,15 @@ static int __maybe_unused host1x_runtime_suspend(struct device *dev) host1x_intr_stop(host); host1x_syncpt_save(host); - err = reset_control_bulk_assert(host->nresets, host->resets); - if (err) { - dev_err(dev, "failed to assert reset: %d\n", err); - goto resume_host1x; - } + if (!host->info->skip_reset_assert) { + err = reset_control_bulk_assert(host->nresets, host->resets); + if (err) { + dev_err(dev, "failed to assert reset: %d\n", err); + goto resume_host1x; + } - usleep_range(1000, 2000); + usleep_range(1000, 2000); + } clk_disable_unprepare(host->clk); reset_control_bulk_release(host->nresets, host->resets); diff --git a/drivers/gpu/host1x/dev.h b/drivers/gpu/host1x/dev.h index c8e302de7625..925a118db23f 100644 --- a/drivers/gpu/host1x/dev.h +++ b/drivers/gpu/host1x/dev.h @@ -116,6 +116,12 @@ struct host1x_info { * the display driver disables VBLANK increments. */ bool reserve_vblank_syncpts; + /* + * On Tegra186, secure world applications may require access to + * host1x during suspend/resume. To allow this, we need to leave + * host1x not in reset. + */ + bool skip_reset_assert; }; struct host1x { From 680341382da56bd192ebfa4e58eaf4fec2e5bca7 Mon Sep 17 00:00:00 2001 From: Zong Li Date: Fri, 2 Feb 2024 01:51:02 +0000 Subject: [PATCH 044/225] riscv: add CALLER_ADDRx support CALLER_ADDRx returns caller's address at specified level, they are used for several tracers. These macros eventually use __builtin_return_address(n) to get the caller's address if arch doesn't define their own implementation. In RISC-V, __builtin_return_address(n) only works when n == 0, we need to walk the stack frame to get the caller's address at specified level. data.level started from 'level + 3' due to the call flow of getting caller's address in RISC-V implementation. If we don't have additional three iteration, the level is corresponding to follows: callsite -> return_address -> arch_stack_walk -> walk_stackframe | | | | level 3 level 2 level 1 level 0 Fixes: 10626c32e382 ("riscv/ftrace: Add basic support") Cc: stable@vger.kernel.org Reviewed-by: Alexandre Ghiti Signed-off-by: Zong Li Link: https://lore.kernel.org/r/20240202015102.26251-1-zong.li@sifive.com Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/ftrace.h | 5 ++++ arch/riscv/kernel/Makefile | 2 ++ arch/riscv/kernel/return_address.c | 48 ++++++++++++++++++++++++++++++ 3 files changed, 55 insertions(+) create mode 100644 arch/riscv/kernel/return_address.c diff --git a/arch/riscv/include/asm/ftrace.h b/arch/riscv/include/asm/ftrace.h index 329172122952..15055f9df4da 100644 --- a/arch/riscv/include/asm/ftrace.h +++ b/arch/riscv/include/asm/ftrace.h @@ -25,6 +25,11 @@ #define ARCH_SUPPORTS_FTRACE_OPS 1 #ifndef __ASSEMBLY__ + +extern void *return_address(unsigned int level); + +#define ftrace_return_address(n) return_address(n) + void MCOUNT_NAME(void); static inline unsigned long ftrace_call_adjust(unsigned long addr) { diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile index f71910718053..604d6bf7e476 100644 --- a/arch/riscv/kernel/Makefile +++ b/arch/riscv/kernel/Makefile @@ -7,6 +7,7 @@ ifdef CONFIG_FTRACE CFLAGS_REMOVE_ftrace.o = $(CC_FLAGS_FTRACE) CFLAGS_REMOVE_patch.o = $(CC_FLAGS_FTRACE) CFLAGS_REMOVE_sbi.o = $(CC_FLAGS_FTRACE) +CFLAGS_REMOVE_return_address.o = $(CC_FLAGS_FTRACE) endif CFLAGS_syscall_table.o += $(call cc-option,-Wno-override-init,) CFLAGS_compat_syscall_table.o += $(call cc-option,-Wno-override-init,) @@ -46,6 +47,7 @@ obj-y += irq.o obj-y += process.o obj-y += ptrace.o obj-y += reset.o +obj-y += return_address.o obj-y += setup.o obj-y += signal.o obj-y += syscall_table.o diff --git a/arch/riscv/kernel/return_address.c b/arch/riscv/kernel/return_address.c new file mode 100644 index 000000000000..c8115ec8fb30 --- /dev/null +++ b/arch/riscv/kernel/return_address.c @@ -0,0 +1,48 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * This code come from arch/arm64/kernel/return_address.c + * + * Copyright (C) 2023 SiFive. + */ + +#include +#include +#include + +struct return_address_data { + unsigned int level; + void *addr; +}; + +static bool save_return_addr(void *d, unsigned long pc) +{ + struct return_address_data *data = d; + + if (!data->level) { + data->addr = (void *)pc; + return false; + } + + --data->level; + + return true; +} +NOKPROBE_SYMBOL(save_return_addr); + +noinline void *return_address(unsigned int level) +{ + struct return_address_data data; + + data.level = level + 3; + data.addr = NULL; + + arch_stack_walk(save_return_addr, &data, current, NULL); + + if (!data.level) + return data.addr; + else + return NULL; + +} +EXPORT_SYMBOL_GPL(return_address); +NOKPROBE_SYMBOL(return_address); From c21f014818600ae017f97ee087e7c136b1916aa7 Mon Sep 17 00:00:00 2001 From: Yangyu Chen Date: Wed, 21 Feb 2024 11:02:31 +0800 Subject: [PATCH 045/225] riscv: mm: fix NOCACHE_THEAD does not set bit[61] correctly Previous commit dbfbda3bd6bf ("riscv: mm: update T-Head memory type definitions") from patch [1] missed a `<` for bit shifting, result in bit(61) does not set in _PAGE_NOCACHE_THEAD and leaves bit(0) set instead. This patch get this fixed. Link: https://lore.kernel.org/linux-riscv/20230912072510.2510-1-jszhang@kernel.org/ [1] Fixes: dbfbda3bd6bf ("riscv: mm: update T-Head memory type definitions") Signed-off-by: Yangyu Chen Reviewed-by: Guo Ren Reviewed-by: Jisheng Zhang Reviewed-by: Alexandre Ghiti Link: https://lore.kernel.org/r/tencent_E19FA1A095768063102E654C6FC858A32F06@qq.com Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/pgtable-64.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/include/asm/pgtable-64.h b/arch/riscv/include/asm/pgtable-64.h index b42017d76924..b99bd66107a6 100644 --- a/arch/riscv/include/asm/pgtable-64.h +++ b/arch/riscv/include/asm/pgtable-64.h @@ -136,7 +136,7 @@ enum napot_cont_order { * 10010 - IO Strongly-ordered, Non-cacheable, Non-bufferable, Shareable, Non-trustable */ #define _PAGE_PMA_THEAD ((1UL << 62) | (1UL << 61) | (1UL << 60)) -#define _PAGE_NOCACHE_THEAD ((1UL < 61) | (1UL << 60)) +#define _PAGE_NOCACHE_THEAD ((1UL << 61) | (1UL << 60)) #define _PAGE_IO_THEAD ((1UL << 63) | (1UL << 60)) #define _PAGE_MTMASK_THEAD (_PAGE_PMA_THEAD | _PAGE_IO_THEAD | (1UL << 59)) From fc325b1a915f6d0c821bfcea21fb3f1354c4323b Mon Sep 17 00:00:00 2001 From: Alexandre Ghiti Date: Sun, 11 Feb 2024 09:36:40 +0100 Subject: [PATCH 046/225] riscv: Fix build error if !CONFIG_ARCH_ENABLE_HUGEPAGE_MIGRATION The new riscv specific arch_hugetlb_migration_supported() must be guarded with a #ifdef CONFIG_ARCH_ENABLE_HUGEPAGE_MIGRATION to avoid the following build error: In file included from include/linux/hugetlb.h:851, from kernel/fork.c:52: >> arch/riscv/include/asm/hugetlb.h:15:42: error: static declaration of 'arch_hugetlb_migration_supported' follows non-static declaration 15 | #define arch_hugetlb_migration_supported arch_hugetlb_migration_supported | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ include/linux/hugetlb.h:916:20: note: in expansion of macro 'arch_hugetlb_migration_supported' 916 | static inline bool arch_hugetlb_migration_supported(struct hstate *h) | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ arch/riscv/include/asm/hugetlb.h:14:6: note: previous declaration of 'arch_hugetlb_migration_supported' with type 'bool(struct hstate *)' {aka '_Bool(struct hstate *)'} 14 | bool arch_hugetlb_migration_supported(struct hstate *h); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202402110258.CV51JlEI-lkp@intel.com/ Fixes: ce68c035457b ("riscv: Fix arch_hugetlb_migration_supported() for NAPOT") Signed-off-by: Alexandre Ghiti Link: https://lore.kernel.org/r/20240211083640.756583-1-alexghiti@rivosinc.com Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/hugetlb.h | 2 ++ arch/riscv/mm/hugetlbpage.c | 2 ++ 2 files changed, 4 insertions(+) diff --git a/arch/riscv/include/asm/hugetlb.h b/arch/riscv/include/asm/hugetlb.h index 20f9c3ba2341..22deb7a2a6ec 100644 --- a/arch/riscv/include/asm/hugetlb.h +++ b/arch/riscv/include/asm/hugetlb.h @@ -11,8 +11,10 @@ static inline void arch_clear_hugepage_flags(struct page *page) } #define arch_clear_hugepage_flags arch_clear_hugepage_flags +#ifdef CONFIG_ARCH_ENABLE_HUGEPAGE_MIGRATION bool arch_hugetlb_migration_supported(struct hstate *h); #define arch_hugetlb_migration_supported arch_hugetlb_migration_supported +#endif #ifdef CONFIG_RISCV_ISA_SVNAPOT #define __HAVE_ARCH_HUGE_PTE_CLEAR diff --git a/arch/riscv/mm/hugetlbpage.c b/arch/riscv/mm/hugetlbpage.c index 29c7606414d2..5ef2a6891158 100644 --- a/arch/riscv/mm/hugetlbpage.c +++ b/arch/riscv/mm/hugetlbpage.c @@ -426,10 +426,12 @@ bool __init arch_hugetlb_valid_size(unsigned long size) return __hugetlb_valid_size(size); } +#ifdef CONFIG_ARCH_ENABLE_HUGEPAGE_MIGRATION bool arch_hugetlb_migration_supported(struct hstate *h) { return __hugetlb_valid_size(huge_page_size(h)); } +#endif #ifdef CONFIG_CONTIG_ALLOC static __init int gigantic_pages_init(void) From 2b0a5a8a397c0ae8f8cd25e7d3857c749239ceb8 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Thu, 1 Feb 2024 15:00:54 -0300 Subject: [PATCH 047/225] ARM: imx_v6_v7_defconfig: Restore CONFIG_BACKLIGHT_CLASS_DEVICE Since commit bfac19e239a7 ("fbdev: mx3fb: Remove the driver") backlight is no longer functional. The fbdev mx3fb driver used to automatically select CONFIG_BACKLIGHT_CLASS_DEVICE. Now that the mx3fb driver has been removed, enable the CONFIG_BACKLIGHT_CLASS_DEVICE option so that backlight can still work by default. Tested on a imx6dl-sabresd board. Cc: stable@vger.kernel.org Fixes: bfac19e239a7 ("fbdev: mx3fb: Remove the driver") Signed-off-by: Fabio Estevam Reviewed-by: Francesco Dolcini Tested-by: Francesco Dolcini # Toradex Colibri iMX7 Signed-off-by: Shawn Guo --- arch/arm/configs/imx_v6_v7_defconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/configs/imx_v6_v7_defconfig b/arch/arm/configs/imx_v6_v7_defconfig index 0a90583f9f01..8f9dbe8d9029 100644 --- a/arch/arm/configs/imx_v6_v7_defconfig +++ b/arch/arm/configs/imx_v6_v7_defconfig @@ -297,6 +297,7 @@ CONFIG_FB_MODE_HELPERS=y CONFIG_LCD_CLASS_DEVICE=y CONFIG_LCD_L4F00242T03=y CONFIG_LCD_PLATFORM=y +CONFIG_BACKLIGHT_CLASS_DEVICE=y CONFIG_BACKLIGHT_PWM=y CONFIG_BACKLIGHT_GPIO=y CONFIG_FRAMEBUFFER_CONSOLE=y From 09a3c1e46142199adcee372a420b024b4fc61051 Mon Sep 17 00:00:00 2001 From: Gaurav Batra Date: Thu, 25 Jan 2024 14:30:17 -0600 Subject: [PATCH 048/225] powerpc/pseries/iommu: IOMMU table is not initialized for kdump over SR-IOV When kdump kernel tries to copy dump data over SR-IOV, LPAR panics due to NULL pointer exception: Kernel attempted to read user page (0) - exploit attempt? (uid: 0) BUG: Kernel NULL pointer dereference on read at 0x00000000 Faulting instruction address: 0xc000000020847ad4 Oops: Kernel access of bad area, sig: 11 [#1] LE PAGE_SIZE=64K MMU=Radix SMP NR_CPUS=2048 NUMA pSeries Modules linked in: mlx5_core(+) vmx_crypto pseries_wdt papr_scm libnvdimm mlxfw tls psample sunrpc fuse overlay squashfs loop CPU: 12 PID: 315 Comm: systemd-udevd Not tainted 6.4.0-Test102+ #12 Hardware name: IBM,9080-HEX POWER10 (raw) 0x800200 0xf000006 of:IBM,FW1060.00 (NH1060_008) hv:phyp pSeries NIP: c000000020847ad4 LR: c00000002083b2dc CTR: 00000000006cd18c REGS: c000000029162ca0 TRAP: 0300 Not tainted (6.4.0-Test102+) MSR: 800000000280b033 CR: 48288244 XER: 00000008 CFAR: c00000002083b2d8 DAR: 0000000000000000 DSISR: 40000000 IRQMASK: 1 ... NIP _find_next_zero_bit+0x24/0x110 LR bitmap_find_next_zero_area_off+0x5c/0xe0 Call Trace: dev_printk_emit+0x38/0x48 (unreliable) iommu_area_alloc+0xc4/0x180 iommu_range_alloc+0x1e8/0x580 iommu_alloc+0x60/0x130 iommu_alloc_coherent+0x158/0x2b0 dma_iommu_alloc_coherent+0x3c/0x50 dma_alloc_attrs+0x170/0x1f0 mlx5_cmd_init+0xc0/0x760 [mlx5_core] mlx5_function_setup+0xf0/0x510 [mlx5_core] mlx5_init_one+0x84/0x210 [mlx5_core] probe_one+0x118/0x2c0 [mlx5_core] local_pci_probe+0x68/0x110 pci_call_probe+0x68/0x200 pci_device_probe+0xbc/0x1a0 really_probe+0x104/0x540 __driver_probe_device+0xb4/0x230 driver_probe_device+0x54/0x130 __driver_attach+0x158/0x2b0 bus_for_each_dev+0xa8/0x130 driver_attach+0x34/0x50 bus_add_driver+0x16c/0x300 driver_register+0xa4/0x1b0 __pci_register_driver+0x68/0x80 mlx5_init+0xb8/0x100 [mlx5_core] do_one_initcall+0x60/0x300 do_init_module+0x7c/0x2b0 At the time of LPAR dump, before kexec hands over control to kdump kernel, DDWs (Dynamic DMA Windows) are scanned and added to the FDT. For the SR-IOV case, default DMA window "ibm,dma-window" is removed from the FDT and DDW added, for the device. Now, kexec hands over control to the kdump kernel. When the kdump kernel initializes, PCI busses are scanned and IOMMU group/tables created, in pci_dma_bus_setup_pSeriesLP(). For the SR-IOV case, there is no "ibm,dma-window". The original commit: b1fc44eaa9ba, fixes the path where memory is pre-mapped (direct mapped) to the DDW. When TCEs are direct mapped, there is no need to initialize IOMMU tables. iommu_table_setparms_lpar() only considers "ibm,dma-window" property when initiallizing IOMMU table. In the scenario where TCEs are dynamically allocated for SR-IOV, newly created IOMMU table is not initialized. Later, when the device driver tries to enter TCEs for the SR-IOV device, NULL pointer execption is thrown from iommu_area_alloc(). The fix is to initialize the IOMMU table with DDW property stored in the FDT. There are 2 points to remember: 1. For the dedicated adapter, kdump kernel would encounter both default and DDW in FDT. In this case, DDW property is used to initialize the IOMMU table. 2. A DDW could be direct or dynamic mapped. kdump kernel would initialize IOMMU table and mark the existing DDW as "dynamic". This works fine since, at the time of table initialization, iommu_table_clear() makes some space in the DDW, for some predefined number of TCEs which are needed for kdump to succeed. Fixes: b1fc44eaa9ba ("pseries/iommu/ddw: Fix kdump to work in absence of ibm,dma-window") Signed-off-by: Gaurav Batra Reviewed-by: Brian King Signed-off-by: Michael Ellerman Link: https://msgid.link/20240125203017.61014-1-gbatra@linux.ibm.com --- arch/powerpc/platforms/pseries/iommu.c | 156 +++++++++++++++++-------- 1 file changed, 105 insertions(+), 51 deletions(-) diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index 496e16c588aa..e8c4129697b1 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -574,29 +574,6 @@ static void iommu_table_setparms(struct pci_controller *phb, struct iommu_table_ops iommu_table_lpar_multi_ops; -/* - * iommu_table_setparms_lpar - * - * Function: On pSeries LPAR systems, return TCE table info, given a pci bus. - */ -static void iommu_table_setparms_lpar(struct pci_controller *phb, - struct device_node *dn, - struct iommu_table *tbl, - struct iommu_table_group *table_group, - const __be32 *dma_window) -{ - unsigned long offset, size, liobn; - - of_parse_dma_window(dn, dma_window, &liobn, &offset, &size); - - iommu_table_setparms_common(tbl, phb->bus->number, liobn, offset, size, IOMMU_PAGE_SHIFT_4K, NULL, - &iommu_table_lpar_multi_ops); - - - table_group->tce32_start = offset; - table_group->tce32_size = size; -} - struct iommu_table_ops iommu_table_pseries_ops = { .set = tce_build_pSeries, .clear = tce_free_pSeries, @@ -724,26 +701,71 @@ struct iommu_table_ops iommu_table_lpar_multi_ops = { * dynamic 64bit DMA window, walking up the device tree. */ static struct device_node *pci_dma_find(struct device_node *dn, - const __be32 **dma_window) + struct dynamic_dma_window_prop *prop) { - const __be32 *dw = NULL; + const __be32 *default_prop = NULL; + const __be32 *ddw_prop = NULL; + struct device_node *rdn = NULL; + bool default_win = false, ddw_win = false; for ( ; dn && PCI_DN(dn); dn = dn->parent) { - dw = of_get_property(dn, "ibm,dma-window", NULL); - if (dw) { - if (dma_window) - *dma_window = dw; - return dn; + default_prop = of_get_property(dn, "ibm,dma-window", NULL); + if (default_prop) { + rdn = dn; + default_win = true; } - dw = of_get_property(dn, DIRECT64_PROPNAME, NULL); - if (dw) - return dn; - dw = of_get_property(dn, DMA64_PROPNAME, NULL); - if (dw) - return dn; + ddw_prop = of_get_property(dn, DIRECT64_PROPNAME, NULL); + if (ddw_prop) { + rdn = dn; + ddw_win = true; + break; + } + ddw_prop = of_get_property(dn, DMA64_PROPNAME, NULL); + if (ddw_prop) { + rdn = dn; + ddw_win = true; + break; + } + + /* At least found default window, which is the case for normal boot */ + if (default_win) + break; } - return NULL; + /* For PCI devices there will always be a DMA window, either on the device + * or parent bus + */ + WARN_ON(!(default_win | ddw_win)); + + /* caller doesn't want to get DMA window property */ + if (!prop) + return rdn; + + /* parse DMA window property. During normal system boot, only default + * DMA window is passed in OF. But, for kdump, a dedicated adapter might + * have both default and DDW in FDT. In this scenario, DDW takes precedence + * over default window. + */ + if (ddw_win) { + struct dynamic_dma_window_prop *p; + + p = (struct dynamic_dma_window_prop *)ddw_prop; + prop->liobn = p->liobn; + prop->dma_base = p->dma_base; + prop->tce_shift = p->tce_shift; + prop->window_shift = p->window_shift; + } else if (default_win) { + unsigned long offset, size, liobn; + + of_parse_dma_window(rdn, default_prop, &liobn, &offset, &size); + + prop->liobn = cpu_to_be32((u32)liobn); + prop->dma_base = cpu_to_be64(offset); + prop->tce_shift = cpu_to_be32(IOMMU_PAGE_SHIFT_4K); + prop->window_shift = cpu_to_be32(order_base_2(size)); + } + + return rdn; } static void pci_dma_bus_setup_pSeriesLP(struct pci_bus *bus) @@ -751,17 +773,20 @@ static void pci_dma_bus_setup_pSeriesLP(struct pci_bus *bus) struct iommu_table *tbl; struct device_node *dn, *pdn; struct pci_dn *ppci; - const __be32 *dma_window = NULL; + struct dynamic_dma_window_prop prop; dn = pci_bus_to_OF_node(bus); pr_debug("pci_dma_bus_setup_pSeriesLP: setting up bus %pOF\n", dn); - pdn = pci_dma_find(dn, &dma_window); + pdn = pci_dma_find(dn, &prop); - if (dma_window == NULL) - pr_debug(" no ibm,dma-window property !\n"); + /* In PPC architecture, there will always be DMA window on bus or one of the + * parent bus. During reboot, there will be ibm,dma-window property to + * define DMA window. For kdump, there will at least be default window or DDW + * or both. + */ ppci = PCI_DN(pdn); @@ -771,13 +796,24 @@ static void pci_dma_bus_setup_pSeriesLP(struct pci_bus *bus) if (!ppci->table_group) { ppci->table_group = iommu_pseries_alloc_group(ppci->phb->node); tbl = ppci->table_group->tables[0]; - if (dma_window) { - iommu_table_setparms_lpar(ppci->phb, pdn, tbl, - ppci->table_group, dma_window); - if (!iommu_init_table(tbl, ppci->phb->node, 0, 0)) - panic("Failed to initialize iommu table"); - } + iommu_table_setparms_common(tbl, ppci->phb->bus->number, + be32_to_cpu(prop.liobn), + be64_to_cpu(prop.dma_base), + 1ULL << be32_to_cpu(prop.window_shift), + be32_to_cpu(prop.tce_shift), NULL, + &iommu_table_lpar_multi_ops); + + /* Only for normal boot with default window. Doesn't matter even + * if we set these with DDW which is 64bit during kdump, since + * these will not be used during kdump. + */ + ppci->table_group->tce32_start = be64_to_cpu(prop.dma_base); + ppci->table_group->tce32_size = 1 << be32_to_cpu(prop.window_shift); + + if (!iommu_init_table(tbl, ppci->phb->node, 0, 0)) + panic("Failed to initialize iommu table"); + iommu_register_group(ppci->table_group, pci_domain_nr(bus), 0); pr_debug(" created table: %p\n", ppci->table_group); @@ -968,6 +1004,12 @@ static void find_existing_ddw_windows_named(const char *name) continue; } + /* If at the time of system initialization, there are DDWs in OF, + * it means this is during kexec. DDW could be direct or dynamic. + * We will just mark DDWs as "dynamic" since this is kdump path, + * no need to worry about perforance. ddw_list_new_entry() will + * set window->direct = false. + */ window = ddw_list_new_entry(pdn, dma64); if (!window) { of_node_put(pdn); @@ -1524,8 +1566,8 @@ static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev) { struct device_node *pdn, *dn; struct iommu_table *tbl; - const __be32 *dma_window = NULL; struct pci_dn *pci; + struct dynamic_dma_window_prop prop; pr_debug("pci_dma_dev_setup_pSeriesLP: %s\n", pci_name(dev)); @@ -1538,7 +1580,7 @@ static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev) dn = pci_device_to_OF_node(dev); pr_debug(" node is %pOF\n", dn); - pdn = pci_dma_find(dn, &dma_window); + pdn = pci_dma_find(dn, &prop); if (!pdn || !PCI_DN(pdn)) { printk(KERN_WARNING "pci_dma_dev_setup_pSeriesLP: " "no DMA window found for pci dev=%s dn=%pOF\n", @@ -1551,8 +1593,20 @@ static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev) if (!pci->table_group) { pci->table_group = iommu_pseries_alloc_group(pci->phb->node); tbl = pci->table_group->tables[0]; - iommu_table_setparms_lpar(pci->phb, pdn, tbl, - pci->table_group, dma_window); + + iommu_table_setparms_common(tbl, pci->phb->bus->number, + be32_to_cpu(prop.liobn), + be64_to_cpu(prop.dma_base), + 1ULL << be32_to_cpu(prop.window_shift), + be32_to_cpu(prop.tce_shift), NULL, + &iommu_table_lpar_multi_ops); + + /* Only for normal boot with default window. Doesn't matter even + * if we set these with DDW which is 64bit during kdump, since + * these will not be used during kdump. + */ + pci->table_group->tce32_start = be64_to_cpu(prop.dma_base); + pci->table_group->tce32_size = 1 << be32_to_cpu(prop.window_shift); iommu_init_table(tbl, pci->phb->node, 0, 0); iommu_register_group(pci->table_group, From fad87dbd48156ab940538f052f1820f4b6ed2819 Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Thu, 22 Feb 2024 16:19:14 -0600 Subject: [PATCH 049/225] powerpc/rtas: use correct function name for resetting TCE tables The PAPR spec spells the function name as "ibm,reset-pe-dma-windows" but in practice firmware uses the singular form: "ibm,reset-pe-dma-window" in the device tree. Since we have the wrong spelling in the RTAS function table, reverse lookups (token -> name) fail and warn: unexpected failed lookup for token 86 WARNING: CPU: 1 PID: 545 at arch/powerpc/kernel/rtas.c:659 __do_enter_rtas_trace+0x2a4/0x2b4 CPU: 1 PID: 545 Comm: systemd-udevd Not tainted 6.8.0-rc4 #30 Hardware name: IBM,9105-22A POWER10 (raw) 0x800200 0xf000006 of:IBM,FW1060.00 (NL1060_028) hv:phyp pSeries NIP [c0000000000417f0] __do_enter_rtas_trace+0x2a4/0x2b4 LR [c0000000000417ec] __do_enter_rtas_trace+0x2a0/0x2b4 Call Trace: __do_enter_rtas_trace+0x2a0/0x2b4 (unreliable) rtas_call+0x1f8/0x3e0 enable_ddw.constprop.0+0x4d0/0xc84 dma_iommu_dma_supported+0xe8/0x24c dma_set_mask+0x5c/0xd8 mlx5_pci_init.constprop.0+0xf0/0x46c [mlx5_core] probe_one+0xfc/0x32c [mlx5_core] local_pci_probe+0x68/0x12c pci_call_probe+0x68/0x1ec pci_device_probe+0xbc/0x1a8 really_probe+0x104/0x570 __driver_probe_device+0xb8/0x224 driver_probe_device+0x54/0x130 __driver_attach+0x158/0x2b0 bus_for_each_dev+0xa8/0x120 driver_attach+0x34/0x48 bus_add_driver+0x174/0x304 driver_register+0x8c/0x1c4 __pci_register_driver+0x68/0x7c mlx5_init+0xb8/0x118 [mlx5_core] do_one_initcall+0x60/0x388 do_init_module+0x7c/0x2a4 init_module_from_file+0xb4/0x108 idempotent_init_module+0x184/0x34c sys_finit_module+0x90/0x114 And oopses are possible when lockdep is enabled or the RTAS tracepoints are active, since those paths dereference the result of the lookup. Use the correct spelling to match firmware's behavior, adjusting the related constants to match. Signed-off-by: Nathan Lynch Fixes: 8252b88294d2 ("powerpc/rtas: improve function information lookups") Reported-by: Gaurav Batra Signed-off-by: Michael Ellerman Link: https://msgid.link/20240222-rtas-fix-ibm-reset-pe-dma-window-v1-1-7aaf235ac63c@linux.ibm.com --- arch/powerpc/include/asm/rtas.h | 4 ++-- arch/powerpc/kernel/rtas.c | 9 +++++++-- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h index 9bb2210c8d44..065ffd1b2f8a 100644 --- a/arch/powerpc/include/asm/rtas.h +++ b/arch/powerpc/include/asm/rtas.h @@ -69,7 +69,7 @@ enum rtas_function_index { RTAS_FNIDX__IBM_READ_SLOT_RESET_STATE, RTAS_FNIDX__IBM_READ_SLOT_RESET_STATE2, RTAS_FNIDX__IBM_REMOVE_PE_DMA_WINDOW, - RTAS_FNIDX__IBM_RESET_PE_DMA_WINDOWS, + RTAS_FNIDX__IBM_RESET_PE_DMA_WINDOW, RTAS_FNIDX__IBM_SCAN_LOG_DUMP, RTAS_FNIDX__IBM_SET_DYNAMIC_INDICATOR, RTAS_FNIDX__IBM_SET_EEH_OPTION, @@ -164,7 +164,7 @@ typedef struct { #define RTAS_FN_IBM_READ_SLOT_RESET_STATE rtas_fn_handle(RTAS_FNIDX__IBM_READ_SLOT_RESET_STATE) #define RTAS_FN_IBM_READ_SLOT_RESET_STATE2 rtas_fn_handle(RTAS_FNIDX__IBM_READ_SLOT_RESET_STATE2) #define RTAS_FN_IBM_REMOVE_PE_DMA_WINDOW rtas_fn_handle(RTAS_FNIDX__IBM_REMOVE_PE_DMA_WINDOW) -#define RTAS_FN_IBM_RESET_PE_DMA_WINDOWS rtas_fn_handle(RTAS_FNIDX__IBM_RESET_PE_DMA_WINDOWS) +#define RTAS_FN_IBM_RESET_PE_DMA_WINDOW rtas_fn_handle(RTAS_FNIDX__IBM_RESET_PE_DMA_WINDOW) #define RTAS_FN_IBM_SCAN_LOG_DUMP rtas_fn_handle(RTAS_FNIDX__IBM_SCAN_LOG_DUMP) #define RTAS_FN_IBM_SET_DYNAMIC_INDICATOR rtas_fn_handle(RTAS_FNIDX__IBM_SET_DYNAMIC_INDICATOR) #define RTAS_FN_IBM_SET_EEH_OPTION rtas_fn_handle(RTAS_FNIDX__IBM_SET_EEH_OPTION) diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index 7e793b503e29..8064d9c3de86 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -375,8 +375,13 @@ static struct rtas_function rtas_function_table[] __ro_after_init = { [RTAS_FNIDX__IBM_REMOVE_PE_DMA_WINDOW] = { .name = "ibm,remove-pe-dma-window", }, - [RTAS_FNIDX__IBM_RESET_PE_DMA_WINDOWS] = { - .name = "ibm,reset-pe-dma-windows", + [RTAS_FNIDX__IBM_RESET_PE_DMA_WINDOW] = { + /* + * Note: PAPR+ v2.13 7.3.31.4.1 spells this as + * "ibm,reset-pe-dma-windows" (plural), but RTAS + * implementations use the singular form in practice. + */ + .name = "ibm,reset-pe-dma-window", }, [RTAS_FNIDX__IBM_SCAN_LOG_DUMP] = { .name = "ibm,scan-log-dump", From 1878840a0328dac1c85d29fee31456ec26fcc01c Mon Sep 17 00:00:00 2001 From: Frank Li Date: Mon, 19 Feb 2024 10:59:39 -0500 Subject: [PATCH 050/225] dmaengine: fsl-qdma: add __iomem and struct in union to fix sparse warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix below sparse warnings. drivers/dma/fsl-qdma.c:645:50: sparse: warning: incorrect type in argument 2 (different address spaces) drivers/dma/fsl-qdma.c:645:50: sparse: expected void [noderef] __iomem *addr drivers/dma/fsl-qdma.c:645:50: sparse: got void drivers/dma/fsl-qdma.c:387:15: sparse: sparse: restricted __le32 degrades to integer drivers/dma/fsl-qdma.c:390:19: sparse: expected restricted __le64 [usertype] data drivers/dma/fsl-qdma.c:392:13: sparse: expected unsigned int [assigned] [usertype] cmd QDMA decriptor have below 3 kind formats. (little endian) Compound Command Descriptor Format ┌──────┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┐ │Offset│3│3│2│2│2│2│2│2│2│2│2│2│1│1│1│1│1│1│1│1│1│1│ │ │ │ │ │ │ │ │ │ │ │ │1│0│9│8│7│6│5│4│3│2│1│0│9│8│7│6│5│4│3│2│1│0│9│8│7│6│5│4│3│2│1│0│ ├──────┼─┴─┼─┴─┴─┼─┴─┴─┼─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┼─┴─┴─┴─┴─┴─┴─┴─┤ │ 0x0C │DD │ - │QUEUE│ - │ ADDR │ ├──────┼───┴─────┴─────┴───────────────────────────────┴───────────────┤ │ 0x08 │ ADDR │ ├──────┼─────┬─────────────────┬───────────────────────────────────────┤ │ 0x04 │ FMT │ OFFSET │ - │ ├──────┼─┬─┬─┴─────────────────┴───────────────────────┬───────────────┤ │ │ │S│ │ │ │ 0x00 │-│E│ - │ STATUS │ │ │ │R│ │ │ └──────┴─┴─┴───────────────────────────────────────────┴───────────────┘ Compound S/G Table Entry Format ┌──────┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┐ │Offset│3│3│2│2│2│2│2│2│2│2│2│2│1│1│1│1│1│1│1│1│1│1│ │ │ │ │ │ │ │ │ │ │ │ │1│0│9│8│7│6│5│4│3│2│1│0│9│8│7│6│5│4│3│2│1│0│9│8│7│6│5│4│3│2│1│0│ ├──────┼─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┼─┴─┴─┴─┴─┴─┴─┴─┤ │ 0x0C │ - │ ADDR │ ├──────┼───────────────────────────────────────────────┴───────────────┤ │ 0x08 │ ADDR │ ├──────┼─┬─┬───────────────────────────────────────────────────────────┤ │ 0x04 │E│F│ LENGTH │ ├──────┼─┴─┴─────────────────────────────────┬─────────────────────────┤ │ 0x00 │ - │ OFFSET │ └──────┴─────────────────────────────────────┴─────────────────────────┘ Source/Destination Descriptor Format ┌──────┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┐ │Offset│3│3│2│2│2│2│2│2│2│2│2│2│1│1│1│1│1│1│1│1│1│1│ │ │ │ │ │ │ │ │ │ │ │ │1│0│9│8│7│6│5│4│3│2│1│0│9│8│7│6│5│4│3│2│1│0│9│8│7│6│5│4│3│2│1│0│ ├──────┼─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┤ │ 0x0C │ CMD │ ├──────┼───────────────────────────────────────────────────────────────┤ │ 0x08 │ - │ ├──────┼───────────────┬───────────────────────┬───────────────────────┤ │ 0x04 │ - │ S[D]SS │ S[D]SD │ ├──────┼───────────────┴───────────────────────┴───────────────────────┤ │ 0x00 │ - │ └──────┴───────────────────────────────────────────────────────────────┘ Previous code use 64bit 'data' map to 0x8 and 0xC. In little endian system CMD is high part of 64bit 'data'. It is correct by left shift 32. But in big endian system, shift left 32 will write to 0x8 position. Sparse detect this problem. Add below field ot match 'Source/Destination Descriptor Format'. struct { __le32 __reserved2; __le32 cmd; } __packed; Using ddf(sdf)->cmd save to correct posistion regardless endian. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202402081929.mggOTHaZ-lkp@intel.com/ Signed-off-by: Frank Li Link: https://lore.kernel.org/r/20240219155939.611237-1-Frank.Li@nxp.com Signed-off-by: Vinod Koul --- drivers/dma/fsl-qdma.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/drivers/dma/fsl-qdma.c b/drivers/dma/fsl-qdma.c index 1e3bf6f30f78..5005e138fc23 100644 --- a/drivers/dma/fsl-qdma.c +++ b/drivers/dma/fsl-qdma.c @@ -161,6 +161,10 @@ struct fsl_qdma_format { u8 __reserved1[2]; u8 cfg8b_w1; } __packed; + struct { + __le32 __reserved2; + __le32 cmd; + } __packed; __le64 data; }; } __packed; @@ -355,7 +359,6 @@ static void fsl_qdma_free_chan_resources(struct dma_chan *chan) static void fsl_qdma_comp_fill_memcpy(struct fsl_qdma_comp *fsl_comp, dma_addr_t dst, dma_addr_t src, u32 len) { - u32 cmd; struct fsl_qdma_format *sdf, *ddf; struct fsl_qdma_format *ccdf, *csgf_desc, *csgf_src, *csgf_dest; @@ -384,15 +387,11 @@ static void fsl_qdma_comp_fill_memcpy(struct fsl_qdma_comp *fsl_comp, /* This entry is the last entry. */ qdma_csgf_set_f(csgf_dest, len); /* Descriptor Buffer */ - cmd = cpu_to_le32(FSL_QDMA_CMD_RWTTYPE << - FSL_QDMA_CMD_RWTTYPE_OFFSET) | - FSL_QDMA_CMD_PF; - sdf->data = QDMA_SDDF_CMD(cmd); + sdf->cmd = cpu_to_le32((FSL_QDMA_CMD_RWTTYPE << FSL_QDMA_CMD_RWTTYPE_OFFSET) | + FSL_QDMA_CMD_PF); - cmd = cpu_to_le32(FSL_QDMA_CMD_RWTTYPE << - FSL_QDMA_CMD_RWTTYPE_OFFSET); - cmd |= cpu_to_le32(FSL_QDMA_CMD_LWC << FSL_QDMA_CMD_LWC_OFFSET); - ddf->data = QDMA_SDDF_CMD(cmd); + ddf->cmd = cpu_to_le32((FSL_QDMA_CMD_RWTTYPE << FSL_QDMA_CMD_RWTTYPE_OFFSET) | + (FSL_QDMA_CMD_LWC << FSL_QDMA_CMD_LWC_OFFSET)); } /* @@ -626,7 +625,7 @@ static int fsl_qdma_halt(struct fsl_qdma_engine *fsl_qdma) static int fsl_qdma_queue_transfer_complete(struct fsl_qdma_engine *fsl_qdma, - void *block, + __iomem void *block, int id) { bool duplicate; From df2515a17914ecfc2a0594509deaf7fcb8d191ac Mon Sep 17 00:00:00 2001 From: Tadeusz Struk Date: Thu, 22 Feb 2024 17:30:53 +0100 Subject: [PATCH 051/225] dmaengine: ptdma: use consistent DMA masks The PTDMA driver sets DMA masks in two different places for the same device inconsistently. First call is in pt_pci_probe(), where it uses 48bit mask. The second call is in pt_dmaengine_register(), where it uses a 64bit mask. Using 64bit dma mask causes IO_PAGE_FAULT errors on DMA transfers between main memory and other devices. Without the extra call it works fine. Additionally the second call doesn't check the return value so it can silently fail. Remove the superfluous dma_set_mask() call and only use 48bit mask. Cc: stable@vger.kernel.org Fixes: b0b4a6b10577 ("dmaengine: ptdma: register PTDMA controller as a DMA resource") Reviewed-by: Basavaraj Natikar Signed-off-by: Tadeusz Struk Link: https://lore.kernel.org/r/20240222163053.13842-1-tstruk@gigaio.com Signed-off-by: Vinod Koul --- drivers/dma/ptdma/ptdma-dmaengine.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/dma/ptdma/ptdma-dmaengine.c b/drivers/dma/ptdma/ptdma-dmaengine.c index 1aa65e5de0f3..f79240734807 100644 --- a/drivers/dma/ptdma/ptdma-dmaengine.c +++ b/drivers/dma/ptdma/ptdma-dmaengine.c @@ -385,8 +385,6 @@ int pt_dmaengine_register(struct pt_device *pt) chan->vc.desc_free = pt_do_cleanup; vchan_init(&chan->vc, dma_dev); - dma_set_mask_and_coherent(pt->dev, DMA_BIT_MASK(64)); - ret = dma_async_device_register(dma_dev); if (ret) goto err_reg; From 85df6b5a6658c788d7e27d52ea334aa83abcbf56 Mon Sep 17 00:00:00 2001 From: Jaroslav Kysela Date: Thu, 22 Feb 2024 18:36:49 +0100 Subject: [PATCH 052/225] ALSA: pcm: clarify and fix default msbits value for all formats Return used most significant bits from sample bit-width rather than the whole physical sample word size. The starting bit offset is defined in the format itself. The behaviour is not changed for 32-bit formats like S32_LE. But with this change - msbits value 24 instead 32 is returned for 24-bit formats like S24_LE etc. Also, commit 2112aa034907 ("ALSA: pcm: Introduce MSBITS subformat interface") compares sample bit-width not physical sample bit-width to reset MSBITS_MAX bit from the subformat bitmask. Probably no applications are using msbits value for other than S32_LE/U32_LE formats, because no drivers are reducing msbits value for other formats (with the msb offset) at the moment. For sanity, increase PCM protocol version, letting the user space to detect the changed behaviour. Signed-off-by: Jaroslav Kysela Link: https://lore.kernel.org/r/20240222173649.1447549-1-perex@perex.cz Signed-off-by: Takashi Iwai --- include/uapi/sound/asound.h | 4 ++-- sound/core/pcm_native.c | 5 +++++ 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/include/uapi/sound/asound.h b/include/uapi/sound/asound.h index d5b9cfbd9cea..628d46a0da92 100644 --- a/include/uapi/sound/asound.h +++ b/include/uapi/sound/asound.h @@ -142,7 +142,7 @@ struct snd_hwdep_dsp_image { * * *****************************************************************************/ -#define SNDRV_PCM_VERSION SNDRV_PROTOCOL_VERSION(2, 0, 16) +#define SNDRV_PCM_VERSION SNDRV_PROTOCOL_VERSION(2, 0, 17) typedef unsigned long snd_pcm_uframes_t; typedef signed long snd_pcm_sframes_t; @@ -416,7 +416,7 @@ struct snd_pcm_hw_params { unsigned int rmask; /* W: requested masks */ unsigned int cmask; /* R: changed masks */ unsigned int info; /* R: Info flags for returned setup */ - unsigned int msbits; /* R: used most significant bits */ + unsigned int msbits; /* R: used most significant bits (in sample bit-width) */ unsigned int rate_num; /* R: rate numerator */ unsigned int rate_den; /* R: rate denominator */ snd_pcm_uframes_t fifo_size; /* R: chip FIFO size in frames */ diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c index f5ff00f99788..21baf6bf7e25 100644 --- a/sound/core/pcm_native.c +++ b/sound/core/pcm_native.c @@ -486,6 +486,11 @@ static int fixup_unreferenced_params(struct snd_pcm_substream *substream, i = hw_param_interval_c(params, SNDRV_PCM_HW_PARAM_SAMPLE_BITS); if (snd_interval_single(i)) params->msbits = snd_interval_value(i); + m = hw_param_mask_c(params, SNDRV_PCM_HW_PARAM_FORMAT); + if (snd_mask_single(m)) { + snd_pcm_format_t format = (__force snd_pcm_format_t)snd_mask_min(m); + params->msbits = snd_pcm_format_width(format); + } } if (params->msbits) { From 9ee485bdda68d6d3f5728cbe3150eb9013d7d22b Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Sat, 17 Feb 2024 16:02:23 +0100 Subject: [PATCH 053/225] drm/bridge: aux-hpd: fix OF node leaks The two device node references taken during allocation need to be dropped when the auxiliary device is freed. Fixes: 6914968a0b52 ("drm/bridge: properly refcount DT nodes in aux bridge drivers") Cc: Dmitry Baryshkov Cc: Neil Armstrong Signed-off-by: Johan Hovold Reviewed-by: Neil Armstrong Reviewed-by: Dmitry Baryshkov Reviewed-by: Bjorn Andersson Link: https://lore.kernel.org/r/20240217150228.5788-2-johan+linaro@kernel.org Signed-off-by: Neil Armstrong Link: https://patchwork.freedesktop.org/patch/msgid/20240217150228.5788-2-johan+linaro@kernel.org --- drivers/gpu/drm/bridge/aux-hpd-bridge.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/bridge/aux-hpd-bridge.c b/drivers/gpu/drm/bridge/aux-hpd-bridge.c index bb55f697a181..9e71daf95bde 100644 --- a/drivers/gpu/drm/bridge/aux-hpd-bridge.c +++ b/drivers/gpu/drm/bridge/aux-hpd-bridge.c @@ -25,6 +25,7 @@ static void drm_aux_hpd_bridge_release(struct device *dev) ida_free(&drm_aux_hpd_bridge_ida, adev->id); of_node_put(adev->dev.platform_data); + of_node_put(adev->dev.of_node); kfree(adev); } @@ -74,6 +75,8 @@ struct device *drm_dp_hpd_bridge_register(struct device *parent, ret = auxiliary_device_init(adev); if (ret) { + of_node_put(adev->dev.platform_data); + of_node_put(adev->dev.of_node); ida_free(&drm_aux_hpd_bridge_ida, adev->id); kfree(adev); return ERR_PTR(ret); From c1947ce61ff4cd4de2fe5f72423abedb6dc83011 Mon Sep 17 00:00:00 2001 From: Gergo Koteles Date: Fri, 23 Feb 2024 12:34:30 +0100 Subject: [PATCH 054/225] ALSA: hda/realtek: tas2781: enable subwoofer volume control The volume of subwoofer channels is always at maximum with the ALC269_FIXUP_THINKPAD_ACPI chain. Use ALC285_FIXUP_THINKPAD_HEADSET_JACK to align it to the master volume. Link: https://bugzilla.kernel.org/show_bug.cgi?id=208555#c827 Fixes: 3babae915f4c ("ALSA: hda/tas2781: Add tas2781 HDA driver") Cc: Signed-off-by: Gergo Koteles Link: https://lore.kernel.org/r/7ffae10ebba58601d25fe2ff8381a6ae3a926e62.1708687813.git.soyer@irl.hu Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index d9a689ed424c..d2cdb89cb2a9 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -9585,7 +9585,7 @@ static const struct hda_fixup alc269_fixups[] = { .type = HDA_FIXUP_FUNC, .v.func = tas2781_fixup_i2c, .chained = true, - .chain_id = ALC269_FIXUP_THINKPAD_ACPI, + .chain_id = ALC285_FIXUP_THINKPAD_HEADSET_JACK, }, [ALC287_FIXUP_YOGA7_14ARB7_I2C] = { .type = HDA_FIXUP_FUNC, From eba2eb2495f47690400331c722868902784e59de Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Wed, 21 Feb 2024 12:37:10 +0000 Subject: [PATCH 055/225] ASoC: soc-card: Fix missing locking in snd_soc_card_get_kcontrol() snd_soc_card_get_kcontrol() must be holding a read lock on card->controls_rwsem while walking the controls list. Compare with snd_ctl_find_numid(). The existing function is renamed snd_soc_card_get_kcontrol_locked() so that it can be called from contexts that are already holding card->controls_rwsem (for example, control get/put functions). There are few direct or indirect callers of snd_soc_card_get_kcontrol(), and most are safe. Three require changes, which have been included in this patch: codecs/cs35l45.c: cs35l45_activate_ctl() is called from a control put() function so is changed to call snd_soc_card_get_kcontrol_locked(). codecs/cs35l56.c: cs35l56_sync_asp1_mixer_widgets_with_firmware() is called from control get()/put() functions so is changed to call snd_soc_card_get_kcontrol_locked(). fsl/fsl_xcvr.c: fsl_xcvr_activate_ctl() is called from three places, one of which already holds card->controls_rwsem: 1. fsl_xcvr_mode_put(), a control put function, which will already be holding card->controls_rwsem. 2. fsl_xcvr_startup(), a DAI startup function. 3. fsl_xcvr_shutdown(), a DAI shutdown function. To fix this, fsl_xcvr_activate_ctl() has been changed to call snd_soc_card_get_kcontrol_locked() so that it is safe to call directly from fsl_xcvr_mode_put(). The fsl_xcvr_startup() and fsl_xcvr_shutdown() functions have been changed to take a read lock on card->controls_rsem() around calls to fsl_xcvr_activate_ctl(). While this is not very elegant, it keeps the change small, to avoid this patch creating a large collateral churn in fsl/fsl_xcvr.c. Analysis of other callers of snd_soc_card_get_kcontrol() is that they do not need any changes, they are not holding card->controls_rwsem when they call snd_soc_card_get_kcontrol(). Direct callers of snd_soc_card_get_kcontrol(): fsl/fsl_spdif.c: fsl_spdif_dai_probe() - DAI probe function fsl/fsl_micfil.c: voice_detected_fn() - IRQ handler Indirect callers via soc_component_notify_control(): codecs/cs42l43: cs42l43_mic_shutter() - IRQ handler codecs/cs42l43: cs42l43_spk_shutter() - IRQ handler codecs/ak4118.c: ak4118_irq_handler() - IRQ handler codecs/wm_adsp.c: wm_adsp_write_ctl() - not currently used Indirect callers via snd_soc_limit_volume(): qcom/sc8280xp.c: sc8280xp_snd_init() - DAIlink init function ti/rx51.c: rx51_aic34_init() - DAI init function I don't have hardware to test the fsl/*, qcom/sc828xp.c, ti/rx51.c and ak4118.c changes. Backport note: The fsl/, qcom/, cs35l45, cs35l56 and cs42l43 callers were added since the Fixes commit so won't all be present on older kernels. Signed-off-by: Richard Fitzgerald Fixes: 209c6cdfd283 ("ASoC: soc-card: move snd_soc_card_get_kcontrol() to soc-card") Link: https://lore.kernel.org/r/20240221123710.690224-1-rf@opensource.cirrus.com Signed-off-by: Mark Brown --- include/sound/soc-card.h | 2 ++ sound/soc/codecs/cs35l45.c | 2 +- sound/soc/codecs/cs35l56.c | 2 +- sound/soc/fsl/fsl_xcvr.c | 12 +++++++++++- sound/soc/soc-card.c | 24 ++++++++++++++++++++++-- 5 files changed, 37 insertions(+), 5 deletions(-) diff --git a/include/sound/soc-card.h b/include/sound/soc-card.h index ecc02e955279..1f4c39922d82 100644 --- a/include/sound/soc-card.h +++ b/include/sound/soc-card.h @@ -30,6 +30,8 @@ static inline void snd_soc_card_mutex_unlock(struct snd_soc_card *card) struct snd_kcontrol *snd_soc_card_get_kcontrol(struct snd_soc_card *soc_card, const char *name); +struct snd_kcontrol *snd_soc_card_get_kcontrol_locked(struct snd_soc_card *soc_card, + const char *name); int snd_soc_card_jack_new(struct snd_soc_card *card, const char *id, int type, struct snd_soc_jack *jack); int snd_soc_card_jack_new_pins(struct snd_soc_card *card, const char *id, diff --git a/sound/soc/codecs/cs35l45.c b/sound/soc/codecs/cs35l45.c index 44c221745c3b..2392c6effed8 100644 --- a/sound/soc/codecs/cs35l45.c +++ b/sound/soc/codecs/cs35l45.c @@ -184,7 +184,7 @@ static int cs35l45_activate_ctl(struct snd_soc_component *component, else snprintf(name, SNDRV_CTL_ELEM_ID_NAME_MAXLEN, "%s", ctl_name); - kcontrol = snd_soc_card_get_kcontrol(component->card, name); + kcontrol = snd_soc_card_get_kcontrol_locked(component->card, name); if (!kcontrol) { dev_err(component->dev, "Can't find kcontrol %s\n", name); return -EINVAL; diff --git a/sound/soc/codecs/cs35l56.c b/sound/soc/codecs/cs35l56.c index 2c1313e34cce..6dd0319bc843 100644 --- a/sound/soc/codecs/cs35l56.c +++ b/sound/soc/codecs/cs35l56.c @@ -114,7 +114,7 @@ static int cs35l56_sync_asp1_mixer_widgets_with_firmware(struct cs35l56_private name = full_name; } - kcontrol = snd_soc_card_get_kcontrol(dapm->card, name); + kcontrol = snd_soc_card_get_kcontrol_locked(dapm->card, name); if (!kcontrol) { dev_warn(cs35l56->base.dev, "Could not find control %s\n", name); continue; diff --git a/sound/soc/fsl/fsl_xcvr.c b/sound/soc/fsl/fsl_xcvr.c index f0fb33d719c2..c46f64557a7f 100644 --- a/sound/soc/fsl/fsl_xcvr.c +++ b/sound/soc/fsl/fsl_xcvr.c @@ -174,7 +174,9 @@ static int fsl_xcvr_activate_ctl(struct snd_soc_dai *dai, const char *name, struct snd_kcontrol *kctl; bool enabled; - kctl = snd_soc_card_get_kcontrol(card, name); + lockdep_assert_held(&card->snd_card->controls_rwsem); + + kctl = snd_soc_card_get_kcontrol_locked(card, name); if (kctl == NULL) return -ENOENT; @@ -576,10 +578,14 @@ static int fsl_xcvr_startup(struct snd_pcm_substream *substream, xcvr->streams |= BIT(substream->stream); if (!xcvr->soc_data->spdif_only) { + struct snd_soc_card *card = dai->component->card; + /* Disable XCVR controls if there is stream started */ + down_read(&card->snd_card->controls_rwsem); fsl_xcvr_activate_ctl(dai, fsl_xcvr_mode_kctl.name, false); fsl_xcvr_activate_ctl(dai, fsl_xcvr_arc_mode_kctl.name, false); fsl_xcvr_activate_ctl(dai, fsl_xcvr_earc_capds_kctl.name, false); + up_read(&card->snd_card->controls_rwsem); } return 0; @@ -598,11 +604,15 @@ static void fsl_xcvr_shutdown(struct snd_pcm_substream *substream, /* Enable XCVR controls if there is no stream started */ if (!xcvr->streams) { if (!xcvr->soc_data->spdif_only) { + struct snd_soc_card *card = dai->component->card; + + down_read(&card->snd_card->controls_rwsem); fsl_xcvr_activate_ctl(dai, fsl_xcvr_mode_kctl.name, true); fsl_xcvr_activate_ctl(dai, fsl_xcvr_arc_mode_kctl.name, (xcvr->mode == FSL_XCVR_MODE_ARC)); fsl_xcvr_activate_ctl(dai, fsl_xcvr_earc_capds_kctl.name, (xcvr->mode == FSL_XCVR_MODE_EARC)); + up_read(&card->snd_card->controls_rwsem); } ret = regmap_update_bits(xcvr->regmap, FSL_XCVR_EXT_IER0, FSL_XCVR_IRQ_EARC_ALL, 0); diff --git a/sound/soc/soc-card.c b/sound/soc/soc-card.c index 285ab4c9c716..8a2f163da6bc 100644 --- a/sound/soc/soc-card.c +++ b/sound/soc/soc-card.c @@ -5,6 +5,9 @@ // Copyright (C) 2019 Renesas Electronics Corp. // Kuninori Morimoto // + +#include +#include #include #include @@ -26,12 +29,15 @@ static inline int _soc_card_ret(struct snd_soc_card *card, return ret; } -struct snd_kcontrol *snd_soc_card_get_kcontrol(struct snd_soc_card *soc_card, - const char *name) +struct snd_kcontrol *snd_soc_card_get_kcontrol_locked(struct snd_soc_card *soc_card, + const char *name) { struct snd_card *card = soc_card->snd_card; struct snd_kcontrol *kctl; + /* must be held read or write */ + lockdep_assert_held(&card->controls_rwsem); + if (unlikely(!name)) return NULL; @@ -40,6 +46,20 @@ struct snd_kcontrol *snd_soc_card_get_kcontrol(struct snd_soc_card *soc_card, return kctl; return NULL; } +EXPORT_SYMBOL_GPL(snd_soc_card_get_kcontrol_locked); + +struct snd_kcontrol *snd_soc_card_get_kcontrol(struct snd_soc_card *soc_card, + const char *name) +{ + struct snd_card *card = soc_card->snd_card; + struct snd_kcontrol *kctl; + + down_read(&card->controls_rwsem); + kctl = snd_soc_card_get_kcontrol_locked(soc_card, name); + up_read(&card->controls_rwsem); + + return kctl; +} EXPORT_SYMBOL_GPL(snd_soc_card_get_kcontrol); static int jack_new(struct snd_soc_card *card, const char *id, int type, From e5ca263508f7e9d2cf711edf3258d11ca087885c Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Sat, 17 Feb 2024 16:02:24 +0100 Subject: [PATCH 056/225] drm/bridge: aux-hpd: separate allocation and registration Combining allocation and registration is an anti-pattern that should be avoided. Add two new functions for allocating and registering an dp-hpd bridge with a proper 'devm' prefix so that it is clear that these are device managed interfaces. devm_drm_dp_hpd_bridge_alloc() devm_drm_dp_hpd_bridge_add() The new interface will be used to fix a use-after-free bug in the Qualcomm PMIC GLINK driver and may prevent similar issues from being introduced elsewhere. The existing drm_dp_hpd_bridge_register() is reimplemented using the above and left in place for now. Signed-off-by: Johan Hovold Reviewed-by: Bjorn Andersson Reviewed-by: Dmitry Baryshkov Signed-off-by: Dmitry Baryshkov Link: https://patchwork.freedesktop.org/patch/msgid/20240217150228.5788-3-johan+linaro@kernel.org --- drivers/gpu/drm/bridge/aux-hpd-bridge.c | 67 +++++++++++++++++++------ include/drm/bridge/aux-bridge.h | 15 ++++++ 2 files changed, 67 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/bridge/aux-hpd-bridge.c b/drivers/gpu/drm/bridge/aux-hpd-bridge.c index 9e71daf95bde..6886db2d9e00 100644 --- a/drivers/gpu/drm/bridge/aux-hpd-bridge.c +++ b/drivers/gpu/drm/bridge/aux-hpd-bridge.c @@ -30,16 +30,13 @@ static void drm_aux_hpd_bridge_release(struct device *dev) kfree(adev); } -static void drm_aux_hpd_bridge_unregister_adev(void *_adev) +static void drm_aux_hpd_bridge_free_adev(void *_adev) { - struct auxiliary_device *adev = _adev; - - auxiliary_device_delete(adev); - auxiliary_device_uninit(adev); + auxiliary_device_uninit(_adev); } /** - * drm_dp_hpd_bridge_register - Create a simple HPD DisplayPort bridge + * devm_drm_dp_hpd_bridge_alloc - allocate a HPD DisplayPort bridge * @parent: device instance providing this bridge * @np: device node pointer corresponding to this bridge instance * @@ -47,11 +44,9 @@ static void drm_aux_hpd_bridge_unregister_adev(void *_adev) * DRM_MODE_CONNECTOR_DisplayPort, which terminates the bridge chain and is * able to send the HPD events. * - * Return: device instance that will handle created bridge or an error code - * encoded into the pointer. + * Return: bridge auxiliary device pointer or an error pointer */ -struct device *drm_dp_hpd_bridge_register(struct device *parent, - struct device_node *np) +struct auxiliary_device *devm_drm_dp_hpd_bridge_alloc(struct device *parent, struct device_node *np) { struct auxiliary_device *adev; int ret; @@ -82,13 +77,55 @@ struct device *drm_dp_hpd_bridge_register(struct device *parent, return ERR_PTR(ret); } - ret = auxiliary_device_add(adev); - if (ret) { - auxiliary_device_uninit(adev); + ret = devm_add_action_or_reset(parent, drm_aux_hpd_bridge_free_adev, adev); + if (ret) return ERR_PTR(ret); - } - ret = devm_add_action_or_reset(parent, drm_aux_hpd_bridge_unregister_adev, adev); + return adev; +} +EXPORT_SYMBOL_GPL(devm_drm_dp_hpd_bridge_alloc); + +static void drm_aux_hpd_bridge_del_adev(void *_adev) +{ + auxiliary_device_delete(_adev); +} + +/** + * devm_drm_dp_hpd_bridge_add - register a HDP DisplayPort bridge + * @dev: struct device to tie registration lifetime to + * @adev: bridge auxiliary device to be registered + * + * Returns: zero on success or a negative errno + */ +int devm_drm_dp_hpd_bridge_add(struct device *dev, struct auxiliary_device *adev) +{ + int ret; + + ret = auxiliary_device_add(adev); + if (ret) + return ret; + + return devm_add_action_or_reset(dev, drm_aux_hpd_bridge_del_adev, adev); +} +EXPORT_SYMBOL_GPL(devm_drm_dp_hpd_bridge_add); + +/** + * drm_dp_hpd_bridge_register - allocate and register a HDP DisplayPort bridge + * @parent: device instance providing this bridge + * @np: device node pointer corresponding to this bridge instance + * + * Return: device instance that will handle created bridge or an error pointer + */ +struct device *drm_dp_hpd_bridge_register(struct device *parent, struct device_node *np) +{ + struct auxiliary_device *adev; + int ret; + + adev = devm_drm_dp_hpd_bridge_alloc(parent, np); + if (IS_ERR(adev)) + return ERR_CAST(adev); + + ret = devm_drm_dp_hpd_bridge_add(parent, adev); if (ret) return ERR_PTR(ret); diff --git a/include/drm/bridge/aux-bridge.h b/include/drm/bridge/aux-bridge.h index c4c423e97f06..4453906105ca 100644 --- a/include/drm/bridge/aux-bridge.h +++ b/include/drm/bridge/aux-bridge.h @@ -9,6 +9,8 @@ #include +struct auxiliary_device; + #if IS_ENABLED(CONFIG_DRM_AUX_BRIDGE) int drm_aux_bridge_register(struct device *parent); #else @@ -19,10 +21,23 @@ static inline int drm_aux_bridge_register(struct device *parent) #endif #if IS_ENABLED(CONFIG_DRM_AUX_HPD_BRIDGE) +struct auxiliary_device *devm_drm_dp_hpd_bridge_alloc(struct device *parent, struct device_node *np); +int devm_drm_dp_hpd_bridge_add(struct device *dev, struct auxiliary_device *adev); struct device *drm_dp_hpd_bridge_register(struct device *parent, struct device_node *np); void drm_aux_hpd_bridge_notify(struct device *dev, enum drm_connector_status status); #else +static inline struct auxiliary_device *devm_drm_dp_hpd_bridge_alloc(struct device *parent, + struct device_node *np) +{ + return NULL; +} + +static inline int devm_drm_dp_hpd_bridge_add(struct auxiliary_device *adev) +{ + return 0; +} + static inline struct device *drm_dp_hpd_bridge_register(struct device *parent, struct device_node *np) { From b979f2d50a099f3402418d7ff5f26c3952fb08bb Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Sat, 17 Feb 2024 16:02:25 +0100 Subject: [PATCH 057/225] soc: qcom: pmic_glink_altmode: fix drm bridge use-after-free A recent DRM series purporting to simplify support for "transparent bridges" and handling of probe deferrals ironically exposed a use-after-free issue on pmic_glink_altmode probe deferral. This has manifested itself as the display subsystem occasionally failing to initialise and NULL-pointer dereferences during boot of machines like the Lenovo ThinkPad X13s. Specifically, the dp-hpd bridge is currently registered before all resources have been acquired which means that it can also be deregistered on probe deferrals. In the meantime there is a race window where the new aux bridge driver (or PHY driver previously) may have looked up the dp-hpd bridge and stored a (non-reference-counted) pointer to the bridge which is about to be deallocated. When the display controller is later initialised, this triggers a use-after-free when attaching the bridges: dp -> aux -> dp-hpd (freed) which may, for example, result in the freed bridge failing to attach: [drm:drm_bridge_attach [drm]] *ERROR* failed to attach bridge /soc@0/phy@88eb000 to encoder TMDS-31: -16 or a NULL-pointer dereference: Unable to handle kernel NULL pointer dereference at virtual address 0000000000000000 ... Call trace: drm_bridge_attach+0x70/0x1a8 [drm] drm_aux_bridge_attach+0x24/0x38 [aux_bridge] drm_bridge_attach+0x80/0x1a8 [drm] dp_bridge_init+0xa8/0x15c [msm] msm_dp_modeset_init+0x28/0xc4 [msm] The DRM bridge implementation is clearly fragile and implicitly built on the assumption that bridges may never go away. In this case, the fix is to move the bridge registration in the pmic_glink_altmode driver to after all resources have been looked up. Incidentally, with the new dp-hpd bridge implementation, which registers child devices, this is also a requirement due to a long-standing issue in driver core that can otherwise lead to a probe deferral loop (see commit fbc35b45f9f6 ("Add documentation on meaning of -EPROBE_DEFER")). [DB: slightly fixed commit message by adding the word 'commit'] Fixes: 080b4e24852b ("soc: qcom: pmic_glink: Introduce altmode support") Fixes: 2bcca96abfbf ("soc: qcom: pmic-glink: switch to DRM_AUX_HPD_BRIDGE") Cc: # 6.3 Cc: Bjorn Andersson Cc: Dmitry Baryshkov Signed-off-by: Johan Hovold Reviewed-by: Bjorn Andersson Reviewed-by: Dmitry Baryshkov Signed-off-by: Dmitry Baryshkov Link: https://patchwork.freedesktop.org/patch/msgid/20240217150228.5788-4-johan+linaro@kernel.org --- drivers/soc/qcom/pmic_glink_altmode.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/drivers/soc/qcom/pmic_glink_altmode.c b/drivers/soc/qcom/pmic_glink_altmode.c index 5fcd0fdd2faa..b3808fc24c69 100644 --- a/drivers/soc/qcom/pmic_glink_altmode.c +++ b/drivers/soc/qcom/pmic_glink_altmode.c @@ -76,7 +76,7 @@ struct pmic_glink_altmode_port { struct work_struct work; - struct device *bridge; + struct auxiliary_device *bridge; enum typec_orientation orientation; u16 svid; @@ -230,7 +230,7 @@ static void pmic_glink_altmode_worker(struct work_struct *work) else pmic_glink_altmode_enable_usb(altmode, alt_port); - drm_aux_hpd_bridge_notify(alt_port->bridge, + drm_aux_hpd_bridge_notify(&alt_port->bridge->dev, alt_port->hpd_state ? connector_status_connected : connector_status_disconnected); @@ -454,7 +454,7 @@ static int pmic_glink_altmode_probe(struct auxiliary_device *adev, alt_port->index = port; INIT_WORK(&alt_port->work, pmic_glink_altmode_worker); - alt_port->bridge = drm_dp_hpd_bridge_register(dev, to_of_node(fwnode)); + alt_port->bridge = devm_drm_dp_hpd_bridge_alloc(dev, to_of_node(fwnode)); if (IS_ERR(alt_port->bridge)) { fwnode_handle_put(fwnode); return PTR_ERR(alt_port->bridge); @@ -510,6 +510,16 @@ static int pmic_glink_altmode_probe(struct auxiliary_device *adev, } } + for (port = 0; port < ARRAY_SIZE(altmode->ports); port++) { + alt_port = &altmode->ports[port]; + if (!alt_port->bridge) + continue; + + ret = devm_drm_dp_hpd_bridge_add(dev, alt_port->bridge); + if (ret) + return ret; + } + altmode->client = devm_pmic_glink_register_client(dev, altmode->owner_id, pmic_glink_altmode_callback, From f79ee78767ca60e7a2c89eacd2dbdf237d97e838 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Sat, 17 Feb 2024 16:02:26 +0100 Subject: [PATCH 058/225] soc: qcom: pmic_glink: Fix boot when QRTR=m We need to bail out before adding/removing devices if we are going to -EPROBE_DEFER. Otherwise boot can get stuck in a probe deferral loop due to a long-standing issue in driver core (see commit fbc35b45f9f6 ("Add documentation on meaning of -EPROBE_DEFER")). Deregistering the altmode child device can potentially also trigger bugs in the DRM bridge implementation, which does not expect bridges to go away. [DB: slightly fixed commit message by adding the word 'commit'] Suggested-by: Dmitry Baryshkov Signed-off-by: Rob Clark Link: https://lore.kernel.org/r/20231213210644.8702-1-robdclark@gmail.com [ johan: rebase on 6.8-rc4, amend commit message and mention DRM ] Fixes: 58ef4ece1e41 ("soc: qcom: pmic_glink: Introduce base PMIC GLINK driver") Cc: # 6.3 Cc: Bjorn Andersson Signed-off-by: Johan Hovold Reviewed-by: Bjorn Andersson Reviewed-by: Dmitry Baryshkov Reviewed-by: Neil Armstrong Signed-off-by: Dmitry Baryshkov Link: https://patchwork.freedesktop.org/patch/msgid/20240217150228.5788-5-johan+linaro@kernel.org --- drivers/soc/qcom/pmic_glink.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/drivers/soc/qcom/pmic_glink.c b/drivers/soc/qcom/pmic_glink.c index f4bfd24386f1..f913e9bd57ed 100644 --- a/drivers/soc/qcom/pmic_glink.c +++ b/drivers/soc/qcom/pmic_glink.c @@ -265,10 +265,17 @@ static int pmic_glink_probe(struct platform_device *pdev) pg->client_mask = *match_data; + pg->pdr = pdr_handle_alloc(pmic_glink_pdr_callback, pg); + if (IS_ERR(pg->pdr)) { + ret = dev_err_probe(&pdev->dev, PTR_ERR(pg->pdr), + "failed to initialize pdr\n"); + return ret; + } + if (pg->client_mask & BIT(PMIC_GLINK_CLIENT_UCSI)) { ret = pmic_glink_add_aux_device(pg, &pg->ucsi_aux, "ucsi"); if (ret) - return ret; + goto out_release_pdr_handle; } if (pg->client_mask & BIT(PMIC_GLINK_CLIENT_ALTMODE)) { ret = pmic_glink_add_aux_device(pg, &pg->altmode_aux, "altmode"); @@ -281,17 +288,11 @@ static int pmic_glink_probe(struct platform_device *pdev) goto out_release_altmode_aux; } - pg->pdr = pdr_handle_alloc(pmic_glink_pdr_callback, pg); - if (IS_ERR(pg->pdr)) { - ret = dev_err_probe(&pdev->dev, PTR_ERR(pg->pdr), "failed to initialize pdr\n"); - goto out_release_aux_devices; - } - service = pdr_add_lookup(pg->pdr, "tms/servreg", "msm/adsp/charger_pd"); if (IS_ERR(service)) { ret = dev_err_probe(&pdev->dev, PTR_ERR(service), "failed adding pdr lookup for charger_pd\n"); - goto out_release_pdr_handle; + goto out_release_aux_devices; } mutex_lock(&__pmic_glink_lock); @@ -300,8 +301,6 @@ static int pmic_glink_probe(struct platform_device *pdev) return 0; -out_release_pdr_handle: - pdr_handle_release(pg->pdr); out_release_aux_devices: if (pg->client_mask & BIT(PMIC_GLINK_CLIENT_BATT)) pmic_glink_del_aux_device(pg, &pg->ps_aux); @@ -311,6 +310,8 @@ out_release_altmode_aux: out_release_ucsi_aux: if (pg->client_mask & BIT(PMIC_GLINK_CLIENT_UCSI)) pmic_glink_del_aux_device(pg, &pg->ucsi_aux); +out_release_pdr_handle: + pdr_handle_release(pg->pdr); return ret; } From d82f32202e0df7bf40d4b67c8a4ff9cea32df4d9 Mon Sep 17 00:00:00 2001 From: Conor Dooley Date: Fri, 23 Feb 2024 11:31:31 +0000 Subject: [PATCH 059/225] RISC-V: Ignore V from the riscv,isa DT property on older T-Head CPUs Before attempting to support the pre-ratification version of vector found on older T-Head CPUs, disallow "v" in riscv,isa on these platforms. The deprecated property has no clear way to communicate the specific version of vector that is supported and much of the vendor provided software puts "v" in the isa string. riscv,isa-extensions should be used instead. This should not be too much of a burden for these systems, as the vendor shipped devicetrees and firmware do not work with a mainline kernel and will require updating. We can limit this restriction to only ignore v in riscv,isa on CPUs that report T-Head's vendor ID and a zero marchid. Newer T-Head CPUs that support the ratified version of vector should report non-zero marchid, according to Guo Ren [1]. Link: https://lore.kernel.org/linux-riscv/CAJF2gTRy5eK73=d6s7CVy9m9pB8p4rAoMHM3cZFwzg=AuF7TDA@mail.gmail.com/ [1] Fixes: dc6667a4e7e3 ("riscv: Extending cpufeature.c to detect V-extension") Co-developed-by: Conor Dooley Signed-off-by: Conor Dooley Acked-by: Guo Ren Link: https://lore.kernel.org/r/20240223-tidings-shabby-607f086cb4d7@spud Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/cpufeature.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c index 89920f84d0a3..3a05af7be510 100644 --- a/arch/riscv/kernel/cpufeature.c +++ b/arch/riscv/kernel/cpufeature.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include "copy-unaligned.h" @@ -538,6 +539,20 @@ static void __init riscv_fill_hwcap_from_isa_string(unsigned long *isa2hwcap) set_bit(RISCV_ISA_EXT_ZIHPM, isainfo->isa); } + /* + * "V" in ISA strings is ambiguous in practice: it should mean + * just the standard V-1.0 but vendors aren't well behaved. + * Many vendors with T-Head CPU cores which implement the 0.7.1 + * version of the vector specification put "v" into their DTs. + * CPU cores with the ratified spec will contain non-zero + * marchid. + */ + if (acpi_disabled && riscv_cached_mvendorid(cpu) == THEAD_VENDOR_ID && + riscv_cached_marchid(cpu) == 0x0) { + this_hwcap &= ~isa2hwcap[RISCV_ISA_EXT_v]; + clear_bit(RISCV_ISA_EXT_v, isainfo->isa); + } + /* * All "okay" hart should have same isa. Set HWCAP based on * common capabilities of every "okay" hart, in case they don't From cbec657208bdd4bff4ff31cac5924aa18817027c Mon Sep 17 00:00:00 2001 From: Jernej Skrabec Date: Thu, 22 Feb 2024 22:13:26 +0100 Subject: [PATCH 060/225] arm64: dts: allwinner: h616: Add Orange Pi Zero 2W to Makefile Orange Pi Zero 2W dts file is not included in Makefile. Fix this. Fixes: c505ee1eae18 ("arm64: dts: allwinner: h616: add Orange Pi Zero 2W support") Reviewed-by: Andre Przywara Link: https://lore.kernel.org/r/20240222211326.114955-1-jernej.skrabec@gmail.com Signed-off-by: Jernej Skrabec --- arch/arm64/boot/dts/allwinner/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/boot/dts/allwinner/Makefile b/arch/arm64/boot/dts/allwinner/Makefile index 91d505b385de..1f1f8d865d0e 100644 --- a/arch/arm64/boot/dts/allwinner/Makefile +++ b/arch/arm64/boot/dts/allwinner/Makefile @@ -42,5 +42,6 @@ dtb-$(CONFIG_ARCH_SUNXI) += sun50i-h616-bigtreetech-cb1-manta.dtb dtb-$(CONFIG_ARCH_SUNXI) += sun50i-h616-bigtreetech-pi.dtb dtb-$(CONFIG_ARCH_SUNXI) += sun50i-h616-orangepi-zero2.dtb dtb-$(CONFIG_ARCH_SUNXI) += sun50i-h616-x96-mate.dtb +dtb-$(CONFIG_ARCH_SUNXI) += sun50i-h618-orangepi-zero2w.dtb dtb-$(CONFIG_ARCH_SUNXI) += sun50i-h618-orangepi-zero3.dtb dtb-$(CONFIG_ARCH_SUNXI) += sun50i-h618-transpeed-8k618-t.dtb From 0ac32a396e4f41e88df76ce2282423188a2d2ed0 Mon Sep 17 00:00:00 2001 From: Willian Wang Date: Sat, 24 Feb 2024 13:11:49 -0300 Subject: [PATCH 061/225] ALSA: hda/realtek: Add special fixup for Lenovo 14IRP8 Lenovo Slim/Yoga Pro 9 14IRP8 requires a special fixup because there is a collision of its PCI SSID (17aa:3802) with Lenovo Yoga DuetITL 2021 codec SSID. Fixes: 3babae915f4c ("ALSA: hda/tas2781: Add tas2781 HDA driver") Link: https://bugzilla.kernel.org/show_bug.cgi?id=208555 Link: https://lore.kernel.org/all/d5b42e483566a3815d229270abd668131a0d9f3a.camel@irl.hu Cc: stable@vger.kernel.org Signed-off-by: Willian Wang Reviewed-by: Gergo Koteles Link: https://lore.kernel.org/r/170879111795.8.6687687359006700715.273812184@willian.wang Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index d2cdb89cb2a9..cf83fff9a387 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -7444,6 +7444,7 @@ enum { ALC287_FIXUP_LEGION_15IMHG05_AUTOMUTE, ALC287_FIXUP_YOGA7_14ITL_SPEAKERS, ALC298_FIXUP_LENOVO_C940_DUET7, + ALC287_FIXUP_LENOVO_14IRP8_DUETITL, ALC287_FIXUP_13S_GEN2_SPEAKERS, ALC256_FIXUP_SET_COEF_DEFAULTS, ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE, @@ -7495,6 +7496,26 @@ static void alc298_fixup_lenovo_c940_duet7(struct hda_codec *codec, __snd_hda_apply_fixup(codec, id, action, 0); } +/* A special fixup for Lenovo Slim/Yoga Pro 9 14IRP8 and Yoga DuetITL 2021; + * 14IRP8 PCI SSID will mistakenly be matched with the DuetITL codec SSID, + * so we need to apply a different fixup in this case. The only DuetITL codec + * SSID reported so far is the 17aa:3802 while the 14IRP8 has the 17aa:38be + * and 17aa:38bf. If it weren't for the PCI SSID, the 14IRP8 models would + * have matched correctly by their codecs. + */ +static void alc287_fixup_lenovo_14irp8_duetitl(struct hda_codec *codec, + const struct hda_fixup *fix, + int action) +{ + int id; + + if (codec->core.subsystem_id == 0x17aa3802) + id = ALC287_FIXUP_YOGA7_14ITL_SPEAKERS; /* DuetITL */ + else + id = ALC287_FIXUP_TAS2781_I2C; /* 14IRP8 */ + __snd_hda_apply_fixup(codec, id, action, 0); +} + static const struct hda_fixup alc269_fixups[] = { [ALC269_FIXUP_GPIO2] = { .type = HDA_FIXUP_FUNC, @@ -9379,6 +9400,10 @@ static const struct hda_fixup alc269_fixups[] = { .type = HDA_FIXUP_FUNC, .v.func = alc298_fixup_lenovo_c940_duet7, }, + [ALC287_FIXUP_LENOVO_14IRP8_DUETITL] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc287_fixup_lenovo_14irp8_duetitl, + }, [ALC287_FIXUP_13S_GEN2_SPEAKERS] = { .type = HDA_FIXUP_VERBS, .v.verbs = (const struct hda_verb[]) { @@ -10251,7 +10276,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x17aa, 0x31af, "ThinkCentre Station", ALC623_FIXUP_LENOVO_THINKSTATION_P340), SND_PCI_QUIRK(0x17aa, 0x334b, "Lenovo ThinkCentre M70 Gen5", ALC283_FIXUP_HEADSET_MIC), SND_PCI_QUIRK(0x17aa, 0x3801, "Lenovo Yoga9 14IAP7", ALC287_FIXUP_YOGA9_14IAP7_BASS_SPK_PIN), - SND_PCI_QUIRK(0x17aa, 0x3802, "Lenovo Yoga DuetITL 2021", ALC287_FIXUP_YOGA7_14ITL_SPEAKERS), + SND_PCI_QUIRK(0x17aa, 0x3802, "Lenovo Yoga Pro 9 14IRP8 / DuetITL 2021", ALC287_FIXUP_LENOVO_14IRP8_DUETITL), SND_PCI_QUIRK(0x17aa, 0x3813, "Legion 7i 15IMHG05", ALC287_FIXUP_LEGION_15IMHG05_SPEAKERS), SND_PCI_QUIRK(0x17aa, 0x3818, "Lenovo C940 / Yoga Duet 7", ALC298_FIXUP_LENOVO_C940_DUET7), SND_PCI_QUIRK(0x17aa, 0x3819, "Lenovo 13s Gen2 ITL", ALC287_FIXUP_13S_GEN2_SPEAKERS), From f45812cc23fb74bef62d4eb8a69fe7218f4b9f2a Mon Sep 17 00:00:00 2001 From: Tim Schumacher Date: Fri, 26 Jan 2024 17:25:23 +0100 Subject: [PATCH 062/225] efivarfs: Request at most 512 bytes for variable names Work around a quirk in a few old (2011-ish) UEFI implementations, where a call to `GetNextVariableName` with a buffer size larger than 512 bytes will always return EFI_INVALID_PARAMETER. There is some lore around EFI variable names being up to 1024 bytes in size, but this has no basis in the UEFI specification, and the upper bounds are typically platform specific, and apply to the entire variable (name plus payload). Given that Linux does not permit creating files with names longer than NAME_MAX (255) bytes, 512 bytes (== 256 UTF-16 characters) is a reasonable limit. Cc: # 6.1+ Signed-off-by: Tim Schumacher Signed-off-by: Ard Biesheuvel --- fs/efivarfs/vars.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/fs/efivarfs/vars.c b/fs/efivarfs/vars.c index 114ff0fd4e55..2ad377818d0f 100644 --- a/fs/efivarfs/vars.c +++ b/fs/efivarfs/vars.c @@ -373,7 +373,7 @@ int efivar_init(int (*func)(efi_char16_t *, efi_guid_t, unsigned long, void *, struct list_head *), void *data, bool duplicates, struct list_head *head) { - unsigned long variable_name_size = 1024; + unsigned long variable_name_size = 512; efi_char16_t *variable_name; efi_status_t status; efi_guid_t vendor_guid; @@ -390,12 +390,13 @@ int efivar_init(int (*func)(efi_char16_t *, efi_guid_t, unsigned long, void *, goto free; /* - * Per EFI spec, the maximum storage allocated for both - * the variable name and variable data is 1024 bytes. + * A small set of old UEFI implementations reject sizes + * above a certain threshold, the lowest seen in the wild + * is 512. */ do { - variable_name_size = 1024; + variable_name_size = 512; status = efivar_get_next_variable(&variable_name_size, variable_name, @@ -432,9 +433,13 @@ int efivar_init(int (*func)(efi_char16_t *, efi_guid_t, unsigned long, void *, break; case EFI_NOT_FOUND: break; + case EFI_BUFFER_TOO_SMALL: + pr_warn("efivars: Variable name size exceeds maximum (%lu > 512)\n", + variable_name_size); + status = EFI_NOT_FOUND; + break; default: - printk(KERN_WARNING "efivars: get_next_variable: status=%lx\n", - status); + pr_warn("efivars: get_next_variable: status=%lx\n", status); status = EFI_NOT_FOUND; break; } From 9ca01c7adf3993044f59934082087ebb9f7df6d5 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Sat, 24 Feb 2024 18:45:41 +0100 Subject: [PATCH 063/225] efivarfs: Drop redundant cleanup on fill_super() failure Al points out that kill_sb() will be called if efivarfs_fill_super() fails and so there is no point in cleaning up the efivar entry list. Reported-by: Alexander Viro Signed-off-by: Ard Biesheuvel --- fs/efivarfs/super.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/fs/efivarfs/super.c b/fs/efivarfs/super.c index 6038dd39367a..210daac79748 100644 --- a/fs/efivarfs/super.c +++ b/fs/efivarfs/super.c @@ -343,12 +343,7 @@ static int efivarfs_fill_super(struct super_block *sb, struct fs_context *fc) if (err) return err; - err = efivar_init(efivarfs_callback, (void *)sb, true, - &sfi->efivarfs_list); - if (err) - efivar_entry_iter(efivarfs_destroy, &sfi->efivarfs_list, NULL); - - return err; + return efivar_init(efivarfs_callback, sb, true, &sfi->efivarfs_list); } static int efivarfs_get_tree(struct fs_context *fc) From 2ce507f57ba9c78c080d4a050ebdc97263239de8 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Sat, 24 Feb 2024 18:48:14 +0100 Subject: [PATCH 064/225] efivarfs: Drop 'duplicates' bool parameter on efivar_init() The 'duplicates' bool argument is always true when efivar_init() is called from its only caller so let's just drop it instead. Signed-off-by: Ard Biesheuvel --- fs/efivarfs/internal.h | 2 +- fs/efivarfs/super.c | 2 +- fs/efivarfs/vars.c | 6 ++---- 3 files changed, 4 insertions(+), 6 deletions(-) diff --git a/fs/efivarfs/internal.h b/fs/efivarfs/internal.h index 169252e6dc46..f7206158ee81 100644 --- a/fs/efivarfs/internal.h +++ b/fs/efivarfs/internal.h @@ -38,7 +38,7 @@ struct efivar_entry { int efivar_init(int (*func)(efi_char16_t *, efi_guid_t, unsigned long, void *, struct list_head *), - void *data, bool duplicates, struct list_head *head); + void *data, struct list_head *head); int efivar_entry_add(struct efivar_entry *entry, struct list_head *head); void __efivar_entry_add(struct efivar_entry *entry, struct list_head *head); diff --git a/fs/efivarfs/super.c b/fs/efivarfs/super.c index 210daac79748..bb14462f6d99 100644 --- a/fs/efivarfs/super.c +++ b/fs/efivarfs/super.c @@ -343,7 +343,7 @@ static int efivarfs_fill_super(struct super_block *sb, struct fs_context *fc) if (err) return err; - return efivar_init(efivarfs_callback, sb, true, &sfi->efivarfs_list); + return efivar_init(efivarfs_callback, sb, &sfi->efivarfs_list); } static int efivarfs_get_tree(struct fs_context *fc) diff --git a/fs/efivarfs/vars.c b/fs/efivarfs/vars.c index 2ad377818d0f..4d722af1014f 100644 --- a/fs/efivarfs/vars.c +++ b/fs/efivarfs/vars.c @@ -361,7 +361,6 @@ static void dup_variable_bug(efi_char16_t *str16, efi_guid_t *vendor_guid, * efivar_init - build the initial list of EFI variables * @func: callback function to invoke for every variable * @data: function-specific data to pass to @func - * @duplicates: error if we encounter duplicates on @head? * @head: initialised head of variable list * * Get every EFI variable from the firmware and invoke @func. @func @@ -371,7 +370,7 @@ static void dup_variable_bug(efi_char16_t *str16, efi_guid_t *vendor_guid, */ int efivar_init(int (*func)(efi_char16_t *, efi_guid_t, unsigned long, void *, struct list_head *), - void *data, bool duplicates, struct list_head *head) + void *data, struct list_head *head) { unsigned long variable_name_size = 512; efi_char16_t *variable_name; @@ -414,8 +413,7 @@ int efivar_init(int (*func)(efi_char16_t *, efi_guid_t, unsigned long, void *, * we'll ever see a different variable name, * and may end up looping here forever. */ - if (duplicates && - variable_is_present(variable_name, &vendor_guid, + if (variable_is_present(variable_name, &vendor_guid, head)) { dup_variable_bug(variable_name, &vendor_guid, variable_name_size); From d2f8795d9e50aa33c1e2bc0fcbb98ba4a7795749 Mon Sep 17 00:00:00 2001 From: Francesco Dolcini Date: Fri, 16 Feb 2024 11:42:55 +0100 Subject: [PATCH 065/225] ARM: dts: imx7: remove DSI port endpoints This fixes the display not working on colibri imx7, the driver fails to load with the following error: mxsfb 30730000.lcdif: error -ENODEV: Cannot connect bridge NXP i.MX7 LCDIF is connected to both the Parallel LCD Display and to a MIPI DSI IP block, currently it's not possible to describe the connection to both. Remove the port endpoint from the SOC dtsi to prevent regressions, this would need to be defined on the board DTS. Reported-by: Hiago De Franco Closes: https://lore.kernel.org/r/34yzygh3mbwpqr2re7nxmhyxy3s7qmqy4vhxvoyxnoguktriur@z66m7gvpqlia/ Fixes: edbbae7fba49 ("ARM: dts: imx7: add MIPI-DSI support") Signed-off-by: Francesco Dolcini Signed-off-by: Shawn Guo --- arch/arm/boot/dts/nxp/imx/imx7s.dtsi | 26 -------------------------- 1 file changed, 26 deletions(-) diff --git a/arch/arm/boot/dts/nxp/imx/imx7s.dtsi b/arch/arm/boot/dts/nxp/imx/imx7s.dtsi index ebf7befcc11e..9c81c6baa2d3 100644 --- a/arch/arm/boot/dts/nxp/imx/imx7s.dtsi +++ b/arch/arm/boot/dts/nxp/imx/imx7s.dtsi @@ -834,16 +834,6 @@ <&clks IMX7D_LCDIF_PIXEL_ROOT_CLK>; clock-names = "pix", "axi"; status = "disabled"; - - port { - #address-cells = <1>; - #size-cells = <0>; - - lcdif_out_mipi_dsi: endpoint@0 { - reg = <0>; - remote-endpoint = <&mipi_dsi_in_lcdif>; - }; - }; }; mipi_csi: mipi-csi@30750000 { @@ -895,22 +885,6 @@ samsung,esc-clock-frequency = <20000000>; samsung,pll-clock-frequency = <24000000>; status = "disabled"; - - ports { - #address-cells = <1>; - #size-cells = <0>; - - port@0 { - reg = <0>; - #address-cells = <1>; - #size-cells = <0>; - - mipi_dsi_in_lcdif: endpoint@0 { - reg = <0>; - remote-endpoint = <&lcdif_out_mipi_dsi>; - }; - }; - }; }; }; From 892cc217565fcca477c15116c21cf666a3fdbf9d Mon Sep 17 00:00:00 2001 From: Daniel Baluta Date: Thu, 22 Feb 2024 15:06:41 +0200 Subject: [PATCH 066/225] MAINTAINERS: Use a proper mailinglist for NXP i.MX development So far we used an internal linux-imx@nxp.com email address to gather all patches related to NXP i.MX development. Let's switch to an open mailing list that provides ability for people from the community to subscribe and also have a proper archive. List interface at: https://lists.linux.dev. Archive is at: https://lore.kernel.org/imx/ Signed-off-by: Daniel Baluta Signed-off-by: Shawn Guo --- MAINTAINERS | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 8d1052fa6a69..2344eda616f9 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2156,7 +2156,7 @@ M: Shawn Guo M: Sascha Hauer R: Pengutronix Kernel Team R: Fabio Estevam -R: NXP Linux Team +L: imx@lists.linux.dev L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/shawnguo/linux.git @@ -8489,7 +8489,7 @@ FREESCALE IMX / MXC FEC DRIVER M: Wei Fang R: Shenwei Wang R: Clark Wang -R: NXP Linux Team +L: imx@lists.linux.dev L: netdev@vger.kernel.org S: Maintained F: Documentation/devicetree/bindings/net/fsl,fec.yaml @@ -8524,7 +8524,7 @@ F: drivers/i2c/busses/i2c-imx.c FREESCALE IMX LPI2C DRIVER M: Dong Aisheng L: linux-i2c@vger.kernel.org -L: linux-imx@nxp.com +L: imx@lists.linux.dev S: Maintained F: Documentation/devicetree/bindings/i2c/i2c-imx-lpi2c.yaml F: drivers/i2c/busses/i2c-imx-lpi2c.c @@ -15704,7 +15704,7 @@ F: drivers/iio/gyro/fxas21002c_spi.c NXP i.MX 7D/6SX/6UL/93 AND VF610 ADC DRIVER M: Haibo Chen L: linux-iio@vger.kernel.org -L: linux-imx@nxp.com +L: imx@lists.linux.dev S: Maintained F: Documentation/devicetree/bindings/iio/adc/fsl,imx7d-adc.yaml F: Documentation/devicetree/bindings/iio/adc/fsl,vf610-adc.yaml @@ -15741,7 +15741,7 @@ F: drivers/gpu/drm/imx/dcss/ NXP i.MX 8QXP ADC DRIVER M: Cai Huoqing M: Haibo Chen -L: linux-imx@nxp.com +L: imx@lists.linux.dev L: linux-iio@vger.kernel.org S: Maintained F: Documentation/devicetree/bindings/iio/adc/nxp,imx8qxp-adc.yaml @@ -15749,7 +15749,7 @@ F: drivers/iio/adc/imx8qxp-adc.c NXP i.MX 8QXP/8QM JPEG V4L2 DRIVER M: Mirela Rabulea -R: NXP Linux Team +L: imx@lists.linux.dev L: linux-media@vger.kernel.org S: Maintained F: Documentation/devicetree/bindings/media/nxp,imx8-jpeg.yaml @@ -15759,7 +15759,7 @@ NXP i.MX CLOCK DRIVERS M: Abel Vesa R: Peng Fan L: linux-clk@vger.kernel.org -L: linux-imx@nxp.com +L: imx@lists.linux.dev S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/abelvesa/linux.git clk/imx F: Documentation/devicetree/bindings/clock/imx* @@ -19630,7 +19630,7 @@ F: drivers/mmc/host/sdhci-of-at91.c SECURE DIGITAL HOST CONTROLLER INTERFACE (SDHCI) NXP i.MX DRIVER M: Haibo Chen -L: linux-imx@nxp.com +L: imx@lists.linux.dev L: linux-mmc@vger.kernel.org S: Maintained F: drivers/mmc/host/sdhci-esdhc-imx.c From 418a7fc5397719c4b8f50eaeca6694879f89a6ec Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Sun, 25 Feb 2024 04:33:42 +0100 Subject: [PATCH 067/225] arm64: dts: imx8mp: Fix TC9595 reset GPIO on DH i.MX8M Plus DHCOM SoM The TC9595 reset GPIO is SAI1_RXC / GPIO4_IO01, fix the DT accordingly. The SAI5_RXD0 / GPIO3_IO21 is thus far unused TC9595 interrupt line. Fixes: 20d0b83e712b ("arm64: dts: imx8mp: Add TC9595 bridge on DH electronics i.MX8M Plus DHCOM") Signed-off-by: Marek Vasut Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx8mp-dhcom-som.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/freescale/imx8mp-dhcom-som.dtsi b/arch/arm64/boot/dts/freescale/imx8mp-dhcom-som.dtsi index 4ae4fdab461e..43f1d45ccc96 100644 --- a/arch/arm64/boot/dts/freescale/imx8mp-dhcom-som.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mp-dhcom-som.dtsi @@ -255,7 +255,7 @@ <&clk IMX8MP_AUDIO_PLL2_OUT>; assigned-clock-parents = <&clk IMX8MP_AUDIO_PLL2_OUT>; assigned-clock-rates = <13000000>, <13000000>, <156000000>; - reset-gpios = <&gpio3 21 GPIO_ACTIVE_HIGH>; + reset-gpios = <&gpio4 1 GPIO_ACTIVE_HIGH>; status = "disabled"; ports { From 65e32301e1a0a436bf542674c23030d0dcc1afde Mon Sep 17 00:00:00 2001 From: Liu Ying Date: Fri, 23 Feb 2024 17:15:22 +0800 Subject: [PATCH 068/225] arm64: dts: imx8mp: Fix LDB clocks property The "media_ldb_root_clk" is the gate clock to enable or disable the clock provided by CCM(Clock Control Module) to LDB instead of the "media_ldb" clock which is the parent of the "media_ldb_root_clk" clock as a composite clock. Fix LDB clocks property by referencing the "media_ldb_root_clk" clock instead of the "media_ldb" clock. Fixes: e7567840ecd3 ("arm64: dts: imx8mp: Reorder clock and reg properties") Fixes: 94e6197dadc9 ("arm64: dts: imx8mp: Add LCDIF2 & LDB nodes") Signed-off-by: Liu Ying Reviewed-by: Marek Vasut Reviewed-by: Alexander Stein Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx8mp.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/freescale/imx8mp.dtsi b/arch/arm64/boot/dts/freescale/imx8mp.dtsi index 76c73daf546b..39a550c1cd26 100644 --- a/arch/arm64/boot/dts/freescale/imx8mp.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mp.dtsi @@ -1820,7 +1820,7 @@ compatible = "fsl,imx8mp-ldb"; reg = <0x5c 0x4>, <0x128 0x4>; reg-names = "ldb", "lvds"; - clocks = <&clk IMX8MP_CLK_MEDIA_LDB>; + clocks = <&clk IMX8MP_CLK_MEDIA_LDB_ROOT>; clock-names = "ldb"; assigned-clocks = <&clk IMX8MP_CLK_MEDIA_LDB>; assigned-clock-parents = <&clk IMX8MP_VIDEO_PLL1_OUT>; From 1a807e46aa93ebad1dfbed4f82dc3bf779423a6e Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Wed, 21 Feb 2024 14:46:21 -0700 Subject: [PATCH 069/225] xfrm: Avoid clang fortify warning in copy_to_user_tmpl() After a couple recent changes in LLVM, there is a warning (or error with CONFIG_WERROR=y or W=e) from the compile time fortify source routines, specifically the memset() in copy_to_user_tmpl(). In file included from net/xfrm/xfrm_user.c:14: ... include/linux/fortify-string.h:438:4: error: call to '__write_overflow_field' declared with 'warning' attribute: detected write beyond size of field (1st parameter); maybe use struct_group()? [-Werror,-Wattribute-warning] 438 | __write_overflow_field(p_size_field, size); | ^ 1 error generated. While ->xfrm_nr has been validated against XFRM_MAX_DEPTH when its value is first assigned in copy_templates() by calling validate_tmpl() first (so there should not be any issue in practice), LLVM/clang cannot really deduce that across the boundaries of these functions. Without that knowledge, it cannot assume that the loop stops before i is greater than XFRM_MAX_DEPTH, which would indeed result a stack buffer overflow in the memset(). To make the bounds of ->xfrm_nr clear to the compiler and add additional defense in case copy_to_user_tmpl() is ever used in a path where ->xfrm_nr has not been properly validated against XFRM_MAX_DEPTH first, add an explicit bound check and early return, which clears up the warning. Cc: stable@vger.kernel.org Link: https://github.com/ClangBuiltLinux/linux/issues/1985 Signed-off-by: Nathan Chancellor Reviewed-by: Kees Cook Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_user.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index ad01997c3aa9..444e58bc3f44 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -2017,6 +2017,9 @@ static int copy_to_user_tmpl(struct xfrm_policy *xp, struct sk_buff *skb) if (xp->xfrm_nr == 0) return 0; + if (xp->xfrm_nr > XFRM_MAX_DEPTH) + return -ENOBUFS; + for (i = 0; i < xp->xfrm_nr; i++) { struct xfrm_user_tmpl *up = &vec[i]; struct xfrm_tmpl *kp = &xp->xfrm_vec[i]; From 86bf8cfda6d2a6720fa2e6e676c98f0882c9d3d7 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Fri, 23 Feb 2024 16:03:33 +0100 Subject: [PATCH 070/225] drm/tegra: Remove existing framebuffer only if we support display Tegra DRM doesn't support display on Tegra234 and later, so make sure not to remove any existing framebuffers in that case. v2: - add comments explaining how this situation can come about - clear DRIVER_MODESET and DRIVER_ATOMIC feature bits Fixes: 6848c291a54f ("drm/aperture: Convert drivers to aperture interfaces") Signed-off-by: Thierry Reding Reviewed-by: Thomas Zimmermann Reviewed-by: Javier Martinez Canillas Signed-off-by: Robert Foss Link: https://patchwork.freedesktop.org/patch/msgid/20240223150333.1401582-1-thierry.reding@gmail.com --- drivers/gpu/drm/tegra/drm.c | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c index ff36171c8fb7..373bcd79257e 100644 --- a/drivers/gpu/drm/tegra/drm.c +++ b/drivers/gpu/drm/tegra/drm.c @@ -1242,9 +1242,26 @@ static int host1x_drm_probe(struct host1x_device *dev) drm_mode_config_reset(drm); - err = drm_aperture_remove_framebuffers(&tegra_drm_driver); - if (err < 0) - goto hub; + /* + * Only take over from a potential firmware framebuffer if any CRTCs + * have been registered. This must not be a fatal error because there + * are other accelerators that are exposed via this driver. + * + * Another case where this happens is on Tegra234 where the display + * hardware is no longer part of the host1x complex, so this driver + * will not expose any modesetting features. + */ + if (drm->mode_config.num_crtc > 0) { + err = drm_aperture_remove_framebuffers(&tegra_drm_driver); + if (err < 0) + goto hub; + } else { + /* + * Indicate to userspace that this doesn't expose any display + * capabilities. + */ + drm->driver_features &= ~(DRIVER_MODESET | DRIVER_ATOMIC); + } err = drm_dev_register(drm, 0); if (err < 0) From 9d3f8a723c7950e56e0b95ab84b572caee29e065 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Wed, 21 Feb 2024 08:18:59 +0100 Subject: [PATCH 071/225] drm/ttm/tests: depend on UML || COMPILE_TEST MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit At least the device test requires that no other driver using TTM is loaded. So make those unit tests depend on UML || COMPILE_TEST to prevent people from trying them on bare metal. Signed-off-by: Christian König Acked-by: Alex Deucher Link: https://lore.kernel.org/all/20240219230116.77b8ad68@yea/ --- drivers/gpu/drm/Kconfig | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig index 2520db0b776e..c7edba18a6f0 100644 --- a/drivers/gpu/drm/Kconfig +++ b/drivers/gpu/drm/Kconfig @@ -199,7 +199,7 @@ config DRM_TTM config DRM_TTM_KUNIT_TEST tristate "KUnit tests for TTM" if !KUNIT_ALL_TESTS default n - depends on DRM && KUNIT && MMU + depends on DRM && KUNIT && MMU && (UML || COMPILE_TEST) select DRM_TTM select DRM_EXPORT_FOR_TESTS if m select DRM_KUNIT_TEST_HELPERS @@ -207,7 +207,8 @@ config DRM_TTM_KUNIT_TEST help Enables unit tests for TTM, a GPU memory manager subsystem used to manage memory buffers. This option is mostly useful for kernel - developers. + developers. It depends on (UML || COMPILE_TEST) since no other driver + which uses TTM can be loaded while running the tests. If in doubt, say "N". From 50ee641643dd0f46702e9a99354398196e1734c2 Mon Sep 17 00:00:00 2001 From: Johnny Hsieh Date: Mon, 26 Feb 2024 21:44:50 +0800 Subject: [PATCH 072/225] ASoC: amd: yc: Add Lenovo ThinkBook 21J0 into DMI quirk table This patch adds Lenovo 21J0 (ThinkBook 16 G5+ ARP) to the DMI quirks table to enable internal microphone array. Cc: linux-sound@vger.kernel.org Signed-off-by: Johnny Hsieh Link: https://msgid.link/r/TYSPR04MB8429D62DFDB6727866ECF1DEC55A2@TYSPR04MB8429.apcprd04.prod.outlook.com Signed-off-by: Mark Brown --- sound/soc/amd/yc/acp6x-mach.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sound/soc/amd/yc/acp6x-mach.c b/sound/soc/amd/yc/acp6x-mach.c index cc231185d72c..5587198751da 100644 --- a/sound/soc/amd/yc/acp6x-mach.c +++ b/sound/soc/amd/yc/acp6x-mach.c @@ -199,6 +199,13 @@ static const struct dmi_system_id yc_acp_quirk_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "21HY"), } }, + { + .driver_data = &acp6x_card, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "21J0"), + } + }, { .driver_data = &acp6x_card, .matches = { From 00d6a284fcf3fad1b7e1b5bc3cd87cbfb60ce03f Mon Sep 17 00:00:00 2001 From: "Jiri Slaby (SUSE)" Date: Thu, 8 Feb 2024 12:44:11 +0100 Subject: [PATCH 073/225] fbcon: always restore the old font data in fbcon_do_set_font() Commit a5a923038d70 (fbdev: fbcon: Properly revert changes when vc_resize() failed) started restoring old font data upon failure (of vc_resize()). But it performs so only for user fonts. It means that the "system"/internal fonts are not restored at all. So in result, the very first call to fbcon_do_set_font() performs no restore at all upon failing vc_resize(). This can be reproduced by Syzkaller to crash the system on the next invocation of font_get(). It's rather hard to hit the allocation failure in vc_resize() on the first font_set(), but not impossible. Esp. if fault injection is used to aid the execution/failure. It was demonstrated by Sirius: BUG: unable to handle page fault for address: fffffffffffffff8 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD cb7b067 P4D cb7b067 PUD cb7d067 PMD 0 Oops: 0000 [#1] PREEMPT SMP KASAN CPU: 1 PID: 8007 Comm: poc Not tainted 6.7.0-g9d1694dc91ce #20 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.15.0-1 04/01/2014 RIP: 0010:fbcon_get_font+0x229/0x800 drivers/video/fbdev/core/fbcon.c:2286 Call Trace: con_font_get drivers/tty/vt/vt.c:4558 [inline] con_font_op+0x1fc/0xf20 drivers/tty/vt/vt.c:4673 vt_k_ioctl drivers/tty/vt/vt_ioctl.c:474 [inline] vt_ioctl+0x632/0x2ec0 drivers/tty/vt/vt_ioctl.c:752 tty_ioctl+0x6f8/0x1570 drivers/tty/tty_io.c:2803 vfs_ioctl fs/ioctl.c:51 [inline] ... So restore the font data in any case, not only for user fonts. Note the later 'if' is now protected by 'old_userfont' and not 'old_data' as the latter is always set now. (And it is supposed to be non-NULL. Otherwise we would see the bug above again.) Signed-off-by: Jiri Slaby (SUSE) Fixes: a5a923038d70 ("fbdev: fbcon: Properly revert changes when vc_resize() failed") Reported-and-tested-by: Ubisectech Sirius Cc: Ubisectech Sirius Cc: Daniel Vetter Cc: Helge Deller Cc: linux-fbdev@vger.kernel.org Cc: dri-devel@lists.freedesktop.org Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20240208114411.14604-1-jirislaby@kernel.org --- drivers/video/fbdev/core/fbcon.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/video/fbdev/core/fbcon.c b/drivers/video/fbdev/core/fbcon.c index 1183e7a871f8..46823c2e2ba1 100644 --- a/drivers/video/fbdev/core/fbcon.c +++ b/drivers/video/fbdev/core/fbcon.c @@ -2399,11 +2399,9 @@ static int fbcon_do_set_font(struct vc_data *vc, int w, int h, int charcount, struct fbcon_ops *ops = info->fbcon_par; struct fbcon_display *p = &fb_display[vc->vc_num]; int resize, ret, old_userfont, old_width, old_height, old_charcount; - char *old_data = NULL; + u8 *old_data = vc->vc_font.data; resize = (w != vc->vc_font.width) || (h != vc->vc_font.height); - if (p->userfont) - old_data = vc->vc_font.data; vc->vc_font.data = (void *)(p->fontdata = data); old_userfont = p->userfont; if ((p->userfont = userfont)) @@ -2437,13 +2435,13 @@ static int fbcon_do_set_font(struct vc_data *vc, int w, int h, int charcount, update_screen(vc); } - if (old_data && (--REFCOUNT(old_data) == 0)) + if (old_userfont && (--REFCOUNT(old_data) == 0)) kfree(old_data - FONT_EXTRA_WORDS * sizeof(int)); return 0; err_out: p->fontdata = old_data; - vc->vc_font.data = (void *)old_data; + vc->vc_font.data = old_data; if (userfont) { p->userfont = old_userfont; From 9a458198eba98b7207669a166e64d04b04cb651b Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 1 Feb 2024 00:09:01 +0100 Subject: [PATCH 074/225] x86/cpu: Allow reducing x86_phys_bits during early_identify_cpu() In commit fbf6449f84bf ("x86/sev-es: Set x86_virt_bits to the correct value straight away, instead of a two-phase approach"), the initialization of c->x86_phys_bits was moved after this_cpu->c_early_init(c). This is incorrect because early_init_amd() expected to be able to reduce the value according to the contents of CPUID leaf 0x8000001f. Fortunately, the bug was negated by init_amd()'s call to early_init_amd(), which does reduce x86_phys_bits in the end. However, this is very late in the boot process and, most notably, the wrong value is used for x86_phys_bits when setting up MTRRs. To fix this, call get_cpu_address_sizes() as soon as X86_FEATURE_CPUID is set/cleared, and c->extended_cpuid_level is retrieved. Fixes: fbf6449f84bf ("x86/sev-es: Set x86_virt_bits to the correct value straight away, instead of a two-phase approach") Signed-off-by: Paolo Bonzini Signed-off-by: Dave Hansen Cc:stable@vger.kernel.org Link: https://lore.kernel.org/all/20240131230902.1867092-2-pbonzini%40redhat.com --- arch/x86/kernel/cpu/common.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 0b97bcde70c6..fbc4e60d027c 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1589,6 +1589,7 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) get_cpu_vendor(c); get_cpu_cap(c); setup_force_cpu_cap(X86_FEATURE_CPUID); + get_cpu_address_sizes(c); cpu_parse_early_param(); if (this_cpu->c_early_init) @@ -1601,10 +1602,9 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) this_cpu->c_bsp_init(c); } else { setup_clear_cpu_cap(X86_FEATURE_CPUID); + get_cpu_address_sizes(c); } - get_cpu_address_sizes(c); - setup_force_cpu_cap(X86_FEATURE_ALWAYS); cpu_set_bug_bits(c); From 6890cb1ace350b4386c8aee1343dc3b3ddd214da Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 1 Feb 2024 00:09:02 +0100 Subject: [PATCH 075/225] x86/cpu/intel: Detect TME keyid bits before setting MTRR mask registers MKTME repurposes the high bit of physical address to key id for encryption key and, even though MAXPHYADDR in CPUID[0x80000008] remains the same, the valid bits in the MTRR mask register are based on the reduced number of physical address bits. detect_tme() in arch/x86/kernel/cpu/intel.c detects TME and subtracts it from the total usable physical bits, but it is called too late. Move the call to early_init_intel() so that it is called in setup_arch(), before MTRRs are setup. This fixes boot on TDX-enabled systems, which until now only worked with "disable_mtrr_cleanup". Without the patch, the values written to the MTRRs mask registers were 52-bit wide (e.g. 0x000fffff_80000800) and the writes failed; with the patch, the values are 46-bit wide, which matches the reduced MAXPHYADDR that is shown in /proc/cpuinfo. Reported-by: Zixi Chen Signed-off-by: Paolo Bonzini Signed-off-by: Dave Hansen Cc:stable@vger.kernel.org Link: https://lore.kernel.org/all/20240131230902.1867092-3-pbonzini%40redhat.com --- arch/x86/kernel/cpu/intel.c | 178 ++++++++++++++++++------------------ 1 file changed, 91 insertions(+), 87 deletions(-) diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index a927a8fc9624..40dec9b56f87 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -184,6 +184,90 @@ static bool bad_spectre_microcode(struct cpuinfo_x86 *c) return false; } +#define MSR_IA32_TME_ACTIVATE 0x982 + +/* Helpers to access TME_ACTIVATE MSR */ +#define TME_ACTIVATE_LOCKED(x) (x & 0x1) +#define TME_ACTIVATE_ENABLED(x) (x & 0x2) + +#define TME_ACTIVATE_POLICY(x) ((x >> 4) & 0xf) /* Bits 7:4 */ +#define TME_ACTIVATE_POLICY_AES_XTS_128 0 + +#define TME_ACTIVATE_KEYID_BITS(x) ((x >> 32) & 0xf) /* Bits 35:32 */ + +#define TME_ACTIVATE_CRYPTO_ALGS(x) ((x >> 48) & 0xffff) /* Bits 63:48 */ +#define TME_ACTIVATE_CRYPTO_AES_XTS_128 1 + +/* Values for mktme_status (SW only construct) */ +#define MKTME_ENABLED 0 +#define MKTME_DISABLED 1 +#define MKTME_UNINITIALIZED 2 +static int mktme_status = MKTME_UNINITIALIZED; + +static void detect_tme_early(struct cpuinfo_x86 *c) +{ + u64 tme_activate, tme_policy, tme_crypto_algs; + int keyid_bits = 0, nr_keyids = 0; + static u64 tme_activate_cpu0 = 0; + + rdmsrl(MSR_IA32_TME_ACTIVATE, tme_activate); + + if (mktme_status != MKTME_UNINITIALIZED) { + if (tme_activate != tme_activate_cpu0) { + /* Broken BIOS? */ + pr_err_once("x86/tme: configuration is inconsistent between CPUs\n"); + pr_err_once("x86/tme: MKTME is not usable\n"); + mktme_status = MKTME_DISABLED; + + /* Proceed. We may need to exclude bits from x86_phys_bits. */ + } + } else { + tme_activate_cpu0 = tme_activate; + } + + if (!TME_ACTIVATE_LOCKED(tme_activate) || !TME_ACTIVATE_ENABLED(tme_activate)) { + pr_info_once("x86/tme: not enabled by BIOS\n"); + mktme_status = MKTME_DISABLED; + return; + } + + if (mktme_status != MKTME_UNINITIALIZED) + goto detect_keyid_bits; + + pr_info("x86/tme: enabled by BIOS\n"); + + tme_policy = TME_ACTIVATE_POLICY(tme_activate); + if (tme_policy != TME_ACTIVATE_POLICY_AES_XTS_128) + pr_warn("x86/tme: Unknown policy is active: %#llx\n", tme_policy); + + tme_crypto_algs = TME_ACTIVATE_CRYPTO_ALGS(tme_activate); + if (!(tme_crypto_algs & TME_ACTIVATE_CRYPTO_AES_XTS_128)) { + pr_err("x86/mktme: No known encryption algorithm is supported: %#llx\n", + tme_crypto_algs); + mktme_status = MKTME_DISABLED; + } +detect_keyid_bits: + keyid_bits = TME_ACTIVATE_KEYID_BITS(tme_activate); + nr_keyids = (1UL << keyid_bits) - 1; + if (nr_keyids) { + pr_info_once("x86/mktme: enabled by BIOS\n"); + pr_info_once("x86/mktme: %d KeyIDs available\n", nr_keyids); + } else { + pr_info_once("x86/mktme: disabled by BIOS\n"); + } + + if (mktme_status == MKTME_UNINITIALIZED) { + /* MKTME is usable */ + mktme_status = MKTME_ENABLED; + } + + /* + * KeyID bits effectively lower the number of physical address + * bits. Update cpuinfo_x86::x86_phys_bits accordingly. + */ + c->x86_phys_bits -= keyid_bits; +} + static void early_init_intel(struct cpuinfo_x86 *c) { u64 misc_enable; @@ -322,6 +406,13 @@ static void early_init_intel(struct cpuinfo_x86 *c) */ if (detect_extended_topology_early(c) < 0) detect_ht_early(c); + + /* + * Adjust the number of physical bits early because it affects the + * valid bits of the MTRR mask registers. + */ + if (cpu_has(c, X86_FEATURE_TME)) + detect_tme_early(c); } static void bsp_init_intel(struct cpuinfo_x86 *c) @@ -482,90 +573,6 @@ static void srat_detect_node(struct cpuinfo_x86 *c) #endif } -#define MSR_IA32_TME_ACTIVATE 0x982 - -/* Helpers to access TME_ACTIVATE MSR */ -#define TME_ACTIVATE_LOCKED(x) (x & 0x1) -#define TME_ACTIVATE_ENABLED(x) (x & 0x2) - -#define TME_ACTIVATE_POLICY(x) ((x >> 4) & 0xf) /* Bits 7:4 */ -#define TME_ACTIVATE_POLICY_AES_XTS_128 0 - -#define TME_ACTIVATE_KEYID_BITS(x) ((x >> 32) & 0xf) /* Bits 35:32 */ - -#define TME_ACTIVATE_CRYPTO_ALGS(x) ((x >> 48) & 0xffff) /* Bits 63:48 */ -#define TME_ACTIVATE_CRYPTO_AES_XTS_128 1 - -/* Values for mktme_status (SW only construct) */ -#define MKTME_ENABLED 0 -#define MKTME_DISABLED 1 -#define MKTME_UNINITIALIZED 2 -static int mktme_status = MKTME_UNINITIALIZED; - -static void detect_tme(struct cpuinfo_x86 *c) -{ - u64 tme_activate, tme_policy, tme_crypto_algs; - int keyid_bits = 0, nr_keyids = 0; - static u64 tme_activate_cpu0 = 0; - - rdmsrl(MSR_IA32_TME_ACTIVATE, tme_activate); - - if (mktme_status != MKTME_UNINITIALIZED) { - if (tme_activate != tme_activate_cpu0) { - /* Broken BIOS? */ - pr_err_once("x86/tme: configuration is inconsistent between CPUs\n"); - pr_err_once("x86/tme: MKTME is not usable\n"); - mktme_status = MKTME_DISABLED; - - /* Proceed. We may need to exclude bits from x86_phys_bits. */ - } - } else { - tme_activate_cpu0 = tme_activate; - } - - if (!TME_ACTIVATE_LOCKED(tme_activate) || !TME_ACTIVATE_ENABLED(tme_activate)) { - pr_info_once("x86/tme: not enabled by BIOS\n"); - mktme_status = MKTME_DISABLED; - return; - } - - if (mktme_status != MKTME_UNINITIALIZED) - goto detect_keyid_bits; - - pr_info("x86/tme: enabled by BIOS\n"); - - tme_policy = TME_ACTIVATE_POLICY(tme_activate); - if (tme_policy != TME_ACTIVATE_POLICY_AES_XTS_128) - pr_warn("x86/tme: Unknown policy is active: %#llx\n", tme_policy); - - tme_crypto_algs = TME_ACTIVATE_CRYPTO_ALGS(tme_activate); - if (!(tme_crypto_algs & TME_ACTIVATE_CRYPTO_AES_XTS_128)) { - pr_err("x86/mktme: No known encryption algorithm is supported: %#llx\n", - tme_crypto_algs); - mktme_status = MKTME_DISABLED; - } -detect_keyid_bits: - keyid_bits = TME_ACTIVATE_KEYID_BITS(tme_activate); - nr_keyids = (1UL << keyid_bits) - 1; - if (nr_keyids) { - pr_info_once("x86/mktme: enabled by BIOS\n"); - pr_info_once("x86/mktme: %d KeyIDs available\n", nr_keyids); - } else { - pr_info_once("x86/mktme: disabled by BIOS\n"); - } - - if (mktme_status == MKTME_UNINITIALIZED) { - /* MKTME is usable */ - mktme_status = MKTME_ENABLED; - } - - /* - * KeyID bits effectively lower the number of physical address - * bits. Update cpuinfo_x86::x86_phys_bits accordingly. - */ - c->x86_phys_bits -= keyid_bits; -} - static void init_cpuid_fault(struct cpuinfo_x86 *c) { u64 msr; @@ -702,9 +709,6 @@ static void init_intel(struct cpuinfo_x86 *c) init_ia32_feat_ctl(c); - if (cpu_has(c, X86_FEATURE_TME)) - detect_tme(c); - init_intel_misc_features(c); split_lock_init(); From 51d31149a88b5c5a8d2d33f06df93f6187a25b4c Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Mon, 19 Feb 2024 13:14:32 +0800 Subject: [PATCH 076/225] ceph: switch to corrected encoding of max_xattr_size in mdsmap The addition of bal_rank_mask with encoding version 17 was merged into ceph.git in Oct 2022 and made it into v18.2.0 release normally. A few months later, the much delayed addition of max_xattr_size got merged, also with encoding version 17, placed before bal_rank_mask in the encoding -- but it didn't make v18.2.0 release. The way this ended up being resolved on the MDS side is that bal_rank_mask will continue to be encoded in version 17 while max_xattr_size is now encoded in version 18. This does mean that older kernels will misdecode version 17, but this is also true for v18.2.0 and v18.2.1 clients in userspace. The best we can do is backport this adjustment -- see ceph.git commit 78abfeaff27fee343fb664db633de5b221699a73 for details. [ idryomov: changelog ] Cc: stable@vger.kernel.org Link: https://tracker.ceph.com/issues/64440 Fixes: d93231a6bc8a ("ceph: prevent a client from exceeding the MDS maximum xattr size") Signed-off-by: Xiubo Li Reviewed-by: Patrick Donnelly Reviewed-by: Venky Shankar Signed-off-by: Ilya Dryomov --- fs/ceph/mdsmap.c | 7 ++++--- fs/ceph/mdsmap.h | 6 +++++- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/fs/ceph/mdsmap.c b/fs/ceph/mdsmap.c index fae97c25ce58..8109aba66e02 100644 --- a/fs/ceph/mdsmap.c +++ b/fs/ceph/mdsmap.c @@ -380,10 +380,11 @@ struct ceph_mdsmap *ceph_mdsmap_decode(struct ceph_mds_client *mdsc, void **p, ceph_decode_skip_8(p, end, bad_ext); /* required_client_features */ ceph_decode_skip_set(p, end, 64, bad_ext); + /* bal_rank_mask */ + ceph_decode_skip_string(p, end, bad_ext); + } + if (mdsmap_ev >= 18) { ceph_decode_64_safe(p, end, m->m_max_xattr_size, bad_ext); - } else { - /* This forces the usage of the (sync) SETXATTR Op */ - m->m_max_xattr_size = 0; } bad_ext: doutc(cl, "m_enabled: %d, m_damaged: %d, m_num_laggy: %d\n", diff --git a/fs/ceph/mdsmap.h b/fs/ceph/mdsmap.h index 89f1931f1ba6..1f2171dd01bf 100644 --- a/fs/ceph/mdsmap.h +++ b/fs/ceph/mdsmap.h @@ -27,7 +27,11 @@ struct ceph_mdsmap { u32 m_session_timeout; /* seconds */ u32 m_session_autoclose; /* seconds */ u64 m_max_file_size; - u64 m_max_xattr_size; /* maximum size for xattrs blob */ + /* + * maximum size for xattrs blob. + * Zeroed by default to force the usage of the (sync) SETXATTR Op. + */ + u64 m_max_xattr_size; u32 m_max_mds; /* expected up:active mds number */ u32 m_num_active_mds; /* actual up:active mds number */ u32 possible_max_rank; /* possible max rank index */ From aeb004c0cd6958e910123a1607634401009c9539 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Thu, 22 Feb 2024 13:23:45 -0800 Subject: [PATCH 077/225] iommufd: Fix iopt_access_list_id overwrite bug Syzkaller reported the following WARN_ON: WARNING: CPU: 1 PID: 4738 at drivers/iommu/iommufd/io_pagetable.c:1360 Call Trace: iommufd_access_change_ioas+0x2fe/0x4e0 iommufd_access_destroy_object+0x50/0xb0 iommufd_object_remove+0x2a3/0x490 iommufd_object_destroy_user iommufd_access_destroy+0x71/0xb0 iommufd_test_staccess_release+0x89/0xd0 __fput+0x272/0xb50 __fput_sync+0x4b/0x60 __do_sys_close __se_sys_close __x64_sys_close+0x8b/0x110 do_syscall_x64 The mismatch between the access pointer in the list and the passed-in pointer is resulting from an overwrite of access->iopt_access_list_id, in iopt_add_access(). Called from iommufd_access_change_ioas() when xa_alloc() succeeds but iopt_calculate_iova_alignment() fails. Add a new_id in iopt_add_access() and only update iopt_access_list_id when returning successfully. Cc: stable@vger.kernel.org Fixes: 9227da7816dd ("iommufd: Add iommufd_access_change_ioas(_id) helpers") Link: https://lore.kernel.org/r/2dda7acb25b8562ec5f1310de828ef5da9ef509c.1708636627.git.nicolinc@nvidia.com Reported-by: Jason Gunthorpe Suggested-by: Jason Gunthorpe Signed-off-by: Nicolin Chen Reviewed-by: Kevin Tian Signed-off-by: Jason Gunthorpe --- drivers/iommu/iommufd/io_pagetable.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/iommufd/io_pagetable.c b/drivers/iommu/iommufd/io_pagetable.c index 504ac1b01b2d..05fd9d3abf1b 100644 --- a/drivers/iommu/iommufd/io_pagetable.c +++ b/drivers/iommu/iommufd/io_pagetable.c @@ -1330,20 +1330,23 @@ out_unlock: int iopt_add_access(struct io_pagetable *iopt, struct iommufd_access *access) { + u32 new_id; int rc; down_write(&iopt->domains_rwsem); down_write(&iopt->iova_rwsem); - rc = xa_alloc(&iopt->access_list, &access->iopt_access_list_id, access, - xa_limit_16b, GFP_KERNEL_ACCOUNT); + rc = xa_alloc(&iopt->access_list, &new_id, access, xa_limit_16b, + GFP_KERNEL_ACCOUNT); + if (rc) goto out_unlock; rc = iopt_calculate_iova_alignment(iopt); if (rc) { - xa_erase(&iopt->access_list, access->iopt_access_list_id); + xa_erase(&iopt->access_list, new_id); goto out_unlock; } + access->iopt_access_list_id = new_id; out_unlock: up_write(&iopt->iova_rwsem); From fde372df96afddcda3ec94944351f2a14f7cd98d Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Thu, 22 Feb 2024 13:23:46 -0800 Subject: [PATCH 078/225] iommufd/selftest: Fix mock_dev_num bug Syzkaller reported the following bug: sysfs: cannot create duplicate filename '/devices/iommufd_mock4' Call Trace: sysfs_warn_dup+0x71/0x90 sysfs_create_dir_ns+0x1ee/0x260 ? sysfs_create_mount_point+0x80/0x80 ? spin_bug+0x1d0/0x1d0 ? do_raw_spin_unlock+0x54/0x220 kobject_add_internal+0x221/0x970 kobject_add+0x11c/0x1e0 ? lockdep_hardirqs_on_prepare+0x273/0x3e0 ? kset_create_and_add+0x160/0x160 ? kobject_put+0x5d/0x390 ? bus_get_dev_root+0x4a/0x60 ? kobject_put+0x5d/0x390 device_add+0x1d5/0x1550 ? __fw_devlink_link_to_consumers.isra.0+0x1f0/0x1f0 ? __init_waitqueue_head+0xcb/0x150 iommufd_test+0x462/0x3b60 ? lock_release+0x1fe/0x640 ? __might_fault+0x117/0x170 ? reacquire_held_locks+0x4b0/0x4b0 ? iommufd_selftest_destroy+0xd0/0xd0 ? __might_fault+0xbe/0x170 iommufd_fops_ioctl+0x256/0x350 ? iommufd_option+0x180/0x180 ? __lock_acquire+0x1755/0x45f0 __x64_sys_ioctl+0xa13/0x1640 The bug is triggered when Syzkaller created multiple mock devices but didn't destroy them in the same sequence, messing up the mock_dev_num counter. Replace the atomic with an mock_dev_ida. Cc: stable@vger.kernel.org Fixes: 23a1b46f15d5 ("iommufd/selftest: Make the mock iommu driver into a real driver") Link: https://lore.kernel.org/r/5af41d5af6d5c013cc51de01427abb8141b3587e.1708636627.git.nicolinc@nvidia.com Reported-by: Jason Gunthorpe Signed-off-by: Nicolin Chen Reviewed-by: Kevin Tian Signed-off-by: Jason Gunthorpe --- drivers/iommu/iommufd/selftest.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/iommu/iommufd/selftest.c b/drivers/iommu/iommufd/selftest.c index 8abf9747773e..2bfe77bd351d 100644 --- a/drivers/iommu/iommufd/selftest.c +++ b/drivers/iommu/iommufd/selftest.c @@ -36,7 +36,7 @@ static struct mock_bus_type iommufd_mock_bus_type = { }, }; -static atomic_t mock_dev_num; +static DEFINE_IDA(mock_dev_ida); enum { MOCK_DIRTY_TRACK = 1, @@ -123,6 +123,7 @@ enum selftest_obj_type { struct mock_dev { struct device dev; unsigned long flags; + int id; }; struct selftest_obj { @@ -631,7 +632,7 @@ static void mock_dev_release(struct device *dev) { struct mock_dev *mdev = container_of(dev, struct mock_dev, dev); - atomic_dec(&mock_dev_num); + ida_free(&mock_dev_ida, mdev->id); kfree(mdev); } @@ -653,8 +654,12 @@ static struct mock_dev *mock_dev_create(unsigned long dev_flags) mdev->dev.release = mock_dev_release; mdev->dev.bus = &iommufd_mock_bus_type.bus; - rc = dev_set_name(&mdev->dev, "iommufd_mock%u", - atomic_inc_return(&mock_dev_num)); + rc = ida_alloc(&mock_dev_ida, GFP_KERNEL); + if (rc < 0) + goto err_put; + mdev->id = rc; + + rc = dev_set_name(&mdev->dev, "iommufd_mock%u", mdev->id); if (rc) goto err_put; From cf7c2789822db8b5efa34f5ebcf1621bc0008d48 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Thu, 22 Feb 2024 13:23:47 -0800 Subject: [PATCH 079/225] iommufd: Fix protection fault in iommufd_test_syz_conv_iova Syzkaller reported the following bug: general protection fault, probably for non-canonical address 0xdffffc0000000038: 0000 [#1] SMP KASAN KASAN: null-ptr-deref in range [0x00000000000001c0-0x00000000000001c7] Call Trace: lock_acquire lock_acquire+0x1ce/0x4f0 down_read+0x93/0x4a0 iommufd_test_syz_conv_iova+0x56/0x1f0 iommufd_test_access_rw.isra.0+0x2ec/0x390 iommufd_test+0x1058/0x1e30 iommufd_fops_ioctl+0x381/0x510 vfs_ioctl __do_sys_ioctl __se_sys_ioctl __x64_sys_ioctl+0x170/0x1e0 do_syscall_x64 do_syscall_64+0x71/0x140 This is because the new iommufd_access_change_ioas() sets access->ioas to NULL during its process, so the lock might be gone in a concurrent racing context. Fix this by doing the same access->ioas sanity as iommufd_access_rw() and iommufd_access_pin_pages() functions do. Cc: stable@vger.kernel.org Fixes: 9227da7816dd ("iommufd: Add iommufd_access_change_ioas(_id) helpers") Link: https://lore.kernel.org/r/3f1932acaf1dd494d404c04364d73ce8f57f3e5e.1708636627.git.nicolinc@nvidia.com Reported-by: Jason Gunthorpe Signed-off-by: Nicolin Chen Reviewed-by: Kevin Tian Signed-off-by: Jason Gunthorpe --- drivers/iommu/iommufd/selftest.c | 27 +++++++++++++++++++++------ 1 file changed, 21 insertions(+), 6 deletions(-) diff --git a/drivers/iommu/iommufd/selftest.c b/drivers/iommu/iommufd/selftest.c index 2bfe77bd351d..d59e199a8705 100644 --- a/drivers/iommu/iommufd/selftest.c +++ b/drivers/iommu/iommufd/selftest.c @@ -63,8 +63,8 @@ enum { * In syzkaller mode the 64 bit IOVA is converted into an nth area and offset * value. This has a much smaller randomization space and syzkaller can hit it. */ -static unsigned long iommufd_test_syz_conv_iova(struct io_pagetable *iopt, - u64 *iova) +static unsigned long __iommufd_test_syz_conv_iova(struct io_pagetable *iopt, + u64 *iova) { struct syz_layout { __u32 nth_area; @@ -88,6 +88,21 @@ static unsigned long iommufd_test_syz_conv_iova(struct io_pagetable *iopt, return 0; } +static unsigned long iommufd_test_syz_conv_iova(struct iommufd_access *access, + u64 *iova) +{ + unsigned long ret; + + mutex_lock(&access->ioas_lock); + if (!access->ioas) { + mutex_unlock(&access->ioas_lock); + return 0; + } + ret = __iommufd_test_syz_conv_iova(&access->ioas->iopt, iova); + mutex_unlock(&access->ioas_lock); + return ret; +} + void iommufd_test_syz_conv_iova_id(struct iommufd_ucmd *ucmd, unsigned int ioas_id, u64 *iova, u32 *flags) { @@ -100,7 +115,7 @@ void iommufd_test_syz_conv_iova_id(struct iommufd_ucmd *ucmd, ioas = iommufd_get_ioas(ucmd->ictx, ioas_id); if (IS_ERR(ioas)) return; - *iova = iommufd_test_syz_conv_iova(&ioas->iopt, iova); + *iova = __iommufd_test_syz_conv_iova(&ioas->iopt, iova); iommufd_put_object(ucmd->ictx, &ioas->obj); } @@ -1161,7 +1176,7 @@ static int iommufd_test_access_pages(struct iommufd_ucmd *ucmd, } if (flags & MOCK_FLAGS_ACCESS_SYZ) - iova = iommufd_test_syz_conv_iova(&staccess->access->ioas->iopt, + iova = iommufd_test_syz_conv_iova(staccess->access, &cmd->access_pages.iova); npages = (ALIGN(iova + length, PAGE_SIZE) - @@ -1263,8 +1278,8 @@ static int iommufd_test_access_rw(struct iommufd_ucmd *ucmd, } if (flags & MOCK_FLAGS_ACCESS_SYZ) - iova = iommufd_test_syz_conv_iova(&staccess->access->ioas->iopt, - &cmd->access_rw.iova); + iova = iommufd_test_syz_conv_iova(staccess->access, + &cmd->access_rw.iova); rc = iommufd_access_rw(staccess->access, iova, tmp, length, flags); if (rc) From bb04d13353885f81c87879b2deb296bd2adb6cab Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Fri, 23 Feb 2024 14:44:08 -0400 Subject: [PATCH 080/225] iommufd/selftest: Don't check map/unmap pairing with HUGE_PAGES Since MOCK_HUGE_PAGE_SIZE was introduced it allows the core code to invoke mock with large page sizes. This confuses the validation logic that checks that map/unmap are paired. This is because the page size computed for map is based on the physical address and in many cases will always be the base page size, however the entire range generated by iommufd will be passed to map. Randomly iommufd can see small groups of physically contiguous pages, (say 8k unaligned and grouped together), but that group crosses a huge page boundary. The map side will observe this as a contiguous run and mark it accordingly, but there is a chance the unmap side will end up terminating interior huge pages in the middle of that group and trigger a validation failure. Meaning the validation only works if the core code passes the iova/length directly from iommufd to mock. syzkaller randomly hits this with failures like: WARNING: CPU: 0 PID: 11568 at drivers/iommu/iommufd/selftest.c:461 mock_domain_unmap_pages+0x1c0/0x250 Modules linked in: CPU: 0 PID: 11568 Comm: syz-executor.0 Not tainted 6.8.0-rc3+ #4 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.15.0-1 04/01/2014 RIP: 0010:mock_domain_unmap_pages+0x1c0/0x250 Code: 2b e8 94 37 0f ff 48 d1 eb 31 ff 48 b8 00 00 00 00 00 00 20 00 48 21 c3 48 89 de e8 aa 32 0f ff 48 85 db 75 07 e8 70 37 0f ff <0f> 0b e8 69 37 0f ff 31 f6 31 ff e8 90 32 0f ff e8 5b 37 0f ff 4c RSP: 0018:ffff88800e707490 EFLAGS: 00010293 RAX: 0000000000000000 RBX: 0000000000000000 RCX: ffffffff822dfae6 RDX: ffff88800cf86400 RSI: ffffffff822dfaf0 RDI: 0000000000000007 RBP: ffff88800e7074d8 R08: 0000000000000000 R09: ffffed1001167c90 R10: 0000000000000000 R11: 0000000000000000 R12: 0000000001500000 R13: 0000000000083000 R14: 0000000000000001 R15: 0000000000000800 FS: 0000555556048480(0000) GS:ffff88806d400000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000001b2dc23000 CR3: 0000000008cbb000 CR4: 0000000000350eb0 Call Trace: __iommu_unmap+0x281/0x520 iommu_unmap+0xc9/0x180 iopt_area_unmap_domain_range+0x1b1/0x290 iopt_area_unpin_domain+0x590/0x800 __iopt_area_unfill_domain+0x22e/0x650 iopt_area_unfill_domain+0x47/0x60 iopt_unfill_domain+0x187/0x590 iopt_table_remove_domain+0x267/0x2d0 iommufd_hwpt_paging_destroy+0x1f1/0x370 iommufd_object_remove+0x2a3/0x490 iommufd_device_detach+0x23a/0x2c0 iommufd_selftest_destroy+0x7a/0xf0 iommufd_fops_release+0x1d3/0x340 __fput+0x272/0xb50 __fput_sync+0x4b/0x60 __x64_sys_close+0x8b/0x110 do_syscall_64+0x71/0x140 entry_SYSCALL_64_after_hwframe+0x46/0x4e Do the simple thing and just disable the validation when the huge page tests are being run. Fixes: 7db521e23fe9 ("iommufd/selftest: Hugepage mock domain support") Link: https://lore.kernel.org/r/0-v1-1e17e60a5c8a+103fb-iommufd_mock_hugepg_jgg@nvidia.com Reviewed-by: Joao Martins Reviewed-by: Kevin Tian Signed-off-by: Jason Gunthorpe --- drivers/iommu/iommufd/selftest.c | 29 ++++++++++++++++++----------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/drivers/iommu/iommufd/selftest.c b/drivers/iommu/iommufd/selftest.c index d59e199a8705..7a2199470f31 100644 --- a/drivers/iommu/iommufd/selftest.c +++ b/drivers/iommu/iommufd/selftest.c @@ -446,20 +446,27 @@ static size_t mock_domain_unmap_pages(struct iommu_domain *domain, /* * iommufd generates unmaps that must be a strict - * superset of the map's performend So every starting - * IOVA should have been an iova passed to map, and the + * superset of the map's performend So every + * starting/ending IOVA should have been an iova passed + * to map. * - * First IOVA must be present and have been a first IOVA - * passed to map_pages + * This simple logic doesn't work when the HUGE_PAGE is + * turned on since the core code will automatically + * switch between the two page sizes creating a break in + * the unmap calls. The break can land in the middle of + * contiguous IOVA. */ - if (first) { - WARN_ON(ent && !(xa_to_value(ent) & - MOCK_PFN_START_IOVA)); - first = false; + if (!(domain->pgsize_bitmap & MOCK_HUGE_PAGE_SIZE)) { + if (first) { + WARN_ON(ent && !(xa_to_value(ent) & + MOCK_PFN_START_IOVA)); + first = false; + } + if (pgcount == 1 && + cur + MOCK_IO_PAGE_SIZE == pgsize) + WARN_ON(ent && !(xa_to_value(ent) & + MOCK_PFN_LAST_IOVA)); } - if (pgcount == 1 && cur + MOCK_IO_PAGE_SIZE == pgsize) - WARN_ON(ent && !(xa_to_value(ent) & - MOCK_PFN_LAST_IOVA)); iova += MOCK_IO_PAGE_SIZE; ret += MOCK_IO_PAGE_SIZE; From 5cc2da0b60e5b4daf6cf7442ee66f1f91878c0b5 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 23 Jan 2024 14:07:36 +0100 Subject: [PATCH 081/225] scsi: mpi3mr: Reduce stack usage in mpi3mr_refresh_sas_ports() Doubling the number of PHYs also doubled the stack usage of this function, exceeding the 32-bit limit of 1024 bytes: drivers/scsi/mpi3mr/mpi3mr_transport.c: In function 'mpi3mr_refresh_sas_ports': drivers/scsi/mpi3mr/mpi3mr_transport.c:1818:1: error: the frame size of 1636 bytes is larger than 1024 bytes [-Werror=frame-larger-than=] Since the sas_io_unit_pg0 structure is already allocated dynamically, use the same method here. The size of the allocation can be smaller based on the actual number of phys now, so use this as an upper bound. Fixes: cb5b60894602 ("scsi: mpi3mr: Increase maximum number of PHYs to 64 from 32") Reviewed-by: Johannes Thumshirn Cc: Sathya Prakash Veerichetty Signed-off-by: Arnd Bergmann Link: https://lore.kernel.org/r/20240123130754.2011469-1-arnd@kernel.org Tested-by: John Garry #build only Signed-off-by: Martin K. Petersen --- drivers/scsi/mpi3mr/mpi3mr_transport.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/mpi3mr/mpi3mr_transport.c b/drivers/scsi/mpi3mr/mpi3mr_transport.c index c0c8ab586957..d32ad46318cb 100644 --- a/drivers/scsi/mpi3mr/mpi3mr_transport.c +++ b/drivers/scsi/mpi3mr/mpi3mr_transport.c @@ -1671,7 +1671,7 @@ mpi3mr_update_mr_sas_port(struct mpi3mr_ioc *mrioc, struct host_port *h_port, void mpi3mr_refresh_sas_ports(struct mpi3mr_ioc *mrioc) { - struct host_port h_port[64]; + struct host_port *h_port = NULL; int i, j, found, host_port_count = 0, port_idx; u16 sz, attached_handle, ioc_status; struct mpi3_sas_io_unit_page0 *sas_io_unit_pg0 = NULL; @@ -1685,6 +1685,10 @@ mpi3mr_refresh_sas_ports(struct mpi3mr_ioc *mrioc) sas_io_unit_pg0 = kzalloc(sz, GFP_KERNEL); if (!sas_io_unit_pg0) return; + h_port = kcalloc(64, sizeof(struct host_port), GFP_KERNEL); + if (!h_port) + goto out; + if (mpi3mr_cfg_get_sas_io_unit_pg0(mrioc, sas_io_unit_pg0, sz)) { ioc_err(mrioc, "failure at %s:%d/%s()!\n", __FILE__, __LINE__, __func__); @@ -1814,6 +1818,7 @@ mpi3mr_refresh_sas_ports(struct mpi3mr_ioc *mrioc) } } out: + kfree(h_port); kfree(sas_io_unit_pg0); } From ee0017c3ed8a8abfa4d40e42f908fb38c31e7515 Mon Sep 17 00:00:00 2001 From: Ranjan Kumar Date: Wed, 21 Feb 2024 12:47:24 +0530 Subject: [PATCH 082/225] scsi: mpt3sas: Prevent sending diag_reset when the controller is ready If the driver detects that the controller is not ready before sending the first IOC facts command, it will wait for a maximum of 10 seconds for it to become ready. However, even if the controller becomes ready within 10 seconds, the driver will still issue a diagnostic reset. Modify the driver to avoid sending a diag reset if the controller becomes ready within the 10-second wait time. Signed-off-by: Ranjan Kumar Link: https://lore.kernel.org/r/20240221071724.14986-1-ranjan.kumar@broadcom.com Signed-off-by: Martin K. Petersen --- drivers/scsi/mpt3sas/mpt3sas_base.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c index 8761bc58d965..b8120ca93c79 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_base.c +++ b/drivers/scsi/mpt3sas/mpt3sas_base.c @@ -7378,7 +7378,9 @@ _base_wait_for_iocstate(struct MPT3SAS_ADAPTER *ioc, int timeout) return -EFAULT; } - issue_diag_reset: + return 0; + +issue_diag_reset: rc = _base_diag_reset(ioc); return rc; } From 27c86d43bcdb97d00359702713bfff6c006f0d90 Mon Sep 17 00:00:00 2001 From: Shiyang Ruan Date: Fri, 15 Sep 2023 14:38:54 +0800 Subject: [PATCH 083/225] xfs: drop experimental warning for FSDAX FSDAX and reflink can work together now, let's drop this warning. Signed-off-by: Shiyang Ruan Reviewed-by: "Darrick J. Wong" Acked-by: Dan Williams Signed-off-by: Chandan Babu R --- fs/xfs/xfs_super.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 5a2512d20bd0..98401de832ee 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -350,7 +350,6 @@ xfs_setup_dax_always( return -EINVAL; } - xfs_warn(mp, "DAX enabled. Warning: EXPERIMENTAL, use at your own risk"); return 0; disable_dax: From b34bf65838f7c6e785f62681605a538b73c2808c Mon Sep 17 00:00:00 2001 From: Kailang Yang Date: Fri, 23 Feb 2024 14:54:34 +0800 Subject: [PATCH 084/225] ALSA: hda/realtek - ALC285 reduce pop noise from Headphone port It had pop noise from Headphone port when system reboot state. If NID 58h Index 0x0 to fill default value, it will reduce pop noise. Signed-off-by: Kailang Yang Link: https://lore.kernel.org/r/7493e207919a4fb3a0599324fd010e3e@realtek.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index cf83fff9a387..62701197a019 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -3684,6 +3684,7 @@ static void alc285_hp_init(struct hda_codec *codec) int i, val; int coef38, coef0d, coef36; + alc_write_coefex_idx(codec, 0x58, 0x00, 0x1888); /* write default value */ alc_update_coef_idx(codec, 0x4a, 1<<15, 1<<15); /* Reset HP JD */ coef38 = alc_read_coef_idx(codec, 0x38); /* Amp control */ coef0d = alc_read_coef_idx(codec, 0x0d); /* Digital Misc control */ From 5f7a07646655fb4108da527565dcdc80124b14c4 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 23 Feb 2024 13:15:02 +0000 Subject: [PATCH 085/225] afs: Fix endless loop in directory parsing If a directory has a block with only ".__afsXXXX" files in it (from uncompleted silly-rename), these .__afsXXXX files are skipped but without advancing the file position in the dir_context. This leads to afs_dir_iterate() repeating the block again and again. Fix this by making the code that skips the .__afsXXXX file also manually advance the file position. The symptoms are a soft lookup: watchdog: BUG: soft lockup - CPU#3 stuck for 52s! [check:5737] ... RIP: 0010:afs_dir_iterate_block+0x39/0x1fd ... ? watchdog_timer_fn+0x1a6/0x213 ... ? asm_sysvec_apic_timer_interrupt+0x16/0x20 ? afs_dir_iterate_block+0x39/0x1fd afs_dir_iterate+0x10a/0x148 afs_readdir+0x30/0x4a iterate_dir+0x93/0xd3 __do_sys_getdents64+0x6b/0xd4 This is almost certainly the actual fix for: https://bugzilla.kernel.org/show_bug.cgi?id=218496 Fixes: 57e9d49c5452 ("afs: Hide silly-rename files from userspace") Signed-off-by: David Howells Link: https://lore.kernel.org/r/786185.1708694102@warthog.procyon.org.uk Reviewed-by: Marc Dionne cc: Marc Dionne cc: Markus Suvanto cc: linux-afs@lists.infradead.org Signed-off-by: Christian Brauner --- fs/afs/dir.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/afs/dir.c b/fs/afs/dir.c index b5b8de521f99..8a67fc427e74 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -479,8 +479,10 @@ static int afs_dir_iterate_block(struct afs_vnode *dvnode, dire->u.name[0] == '.' && ctx->actor != afs_lookup_filldir && ctx->actor != afs_lookup_one_filldir && - memcmp(dire->u.name, ".__afs", 6) == 0) + memcmp(dire->u.name, ".__afs", 6) == 0) { + ctx->pos = blkoff + next * sizeof(union afs_xdr_dirent); continue; + } /* found the next entry */ if (!dir_emit(ctx, dire->u.name, nlen, From 54cbc058d86beca3515c994039b5c0f0a34f53dd Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 15 Feb 2024 12:47:39 -0800 Subject: [PATCH 086/225] fs/aio: Make io_cancel() generate completions again The following patch accidentally removed the code for delivering completions for cancelled reads and writes to user space: "[PATCH 04/33] aio: remove retry-based AIO" (https://lore.kernel.org/all/1363883754-27966-5-git-send-email-koverstreet@google.com/) >From that patch: - if (kiocbIsCancelled(iocb)) { - ret = -EINTR; - aio_complete(iocb, ret, 0); - /* must not access the iocb after this */ - goto out; - } This leads to a leak in user space of a struct iocb. Hence this patch that restores the code that reports to user space that a read or write has been cancelled successfully. Fixes: 41003a7bcfed ("aio: remove retry-based AIO") Cc: Christoph Hellwig Cc: Avi Kivity Cc: Sandeep Dhavale Cc: Jens Axboe Cc: Greg Kroah-Hartman Cc: Kent Overstreet Cc: stable@vger.kernel.org Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20240215204739.2677806-3-bvanassche@acm.org Signed-off-by: Christian Brauner --- fs/aio.c | 27 +++++++++++---------------- 1 file changed, 11 insertions(+), 16 deletions(-) diff --git a/fs/aio.c b/fs/aio.c index da18dbcfcb22..28223f511931 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -2165,14 +2165,11 @@ COMPAT_SYSCALL_DEFINE3(io_submit, compat_aio_context_t, ctx_id, #endif /* sys_io_cancel: - * Attempts to cancel an iocb previously passed to io_submit. If - * the operation is successfully cancelled, the resulting event is - * copied into the memory pointed to by result without being placed - * into the completion queue and 0 is returned. May fail with - * -EFAULT if any of the data structures pointed to are invalid. - * May fail with -EINVAL if aio_context specified by ctx_id is - * invalid. May fail with -EAGAIN if the iocb specified was not - * cancelled. Will fail with -ENOSYS if not implemented. + * Attempts to cancel an iocb previously passed to io_submit(). If the + * operation is successfully cancelled 0 is returned. May fail with + * -EFAULT if any of the data structures pointed to are invalid. May + * fail with -EINVAL if aio_context specified by ctx_id is invalid. Will + * fail with -ENOSYS if not implemented. */ SYSCALL_DEFINE3(io_cancel, aio_context_t, ctx_id, struct iocb __user *, iocb, struct io_event __user *, result) @@ -2203,14 +2200,12 @@ SYSCALL_DEFINE3(io_cancel, aio_context_t, ctx_id, struct iocb __user *, iocb, } spin_unlock_irq(&ctx->ctx_lock); - if (!ret) { - /* - * The result argument is no longer used - the io_event is - * always delivered via the ring buffer. -EINPROGRESS indicates - * cancellation is progress: - */ - ret = -EINPROGRESS; - } + /* + * The result argument is no longer used - the io_event is always + * delivered via the ring buffer. + */ + if (ret == 0 && kiocb->rw.ki_flags & IOCB_AIO_RW) + aio_complete_rw(&kiocb->rw, -EINTR); percpu_ref_put(&ctx->users); From 0f8ca019544a252d1afb468ce840c6dcbac73af4 Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Tue, 20 Feb 2024 09:14:25 +0530 Subject: [PATCH 087/225] drm/amd/display: Prevent potential buffer overflow in map_hw_resources Adds a check in the map_hw_resources function to prevent a potential buffer overflow. The function was accessing arrays using an index that could potentially be greater than the size of the arrays, leading to a buffer overflow. Adds a check to ensure that the index is within the bounds of the arrays. If the index is out of bounds, an error message is printed and break it will continue execution with just ignoring extra data early to prevent the buffer overflow. Reported by smatch: drivers/gpu/drm/amd/amdgpu/../display/dc/dml2/dml2_wrapper.c:79 map_hw_resources() error: buffer overflow 'dml2->v20.scratch.dml_to_dc_pipe_mapping.disp_cfg_to_stream_id' 6 <= 7 drivers/gpu/drm/amd/amdgpu/../display/dc/dml2/dml2_wrapper.c:81 map_hw_resources() error: buffer overflow 'dml2->v20.scratch.dml_to_dc_pipe_mapping.disp_cfg_to_plane_id' 6 <= 7 Fixes: 7966f319c66d ("drm/amd/display: Introduce DML2") Cc: Rodrigo Siqueira Cc: Roman Li Cc: Qingqing Zhuo Cc: Aurabindo Pillai Cc: Tom Chung Signed-off-by: Srinivasan Shanmugam Suggested-by: Roman Li Reviewed-by: Roman Li Reviewed-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c index 26307e599614..2a58a7687bdb 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c @@ -76,6 +76,11 @@ static void map_hw_resources(struct dml2_context *dml2, in_out_display_cfg->hw.DLGRefClkFreqMHz = 50; } for (j = 0; j < mode_support_info->DPPPerSurface[i]; j++) { + if (i >= __DML2_WRAPPER_MAX_STREAMS_PLANES__) { + dml_print("DML::%s: Index out of bounds: i=%d, __DML2_WRAPPER_MAX_STREAMS_PLANES__=%d\n", + __func__, i, __DML2_WRAPPER_MAX_STREAMS_PLANES__); + break; + } dml2->v20.scratch.dml_to_dc_pipe_mapping.dml_pipe_idx_to_stream_id[num_pipes] = dml2->v20.scratch.dml_to_dc_pipe_mapping.disp_cfg_to_stream_id[i]; dml2->v20.scratch.dml_to_dc_pipe_mapping.dml_pipe_idx_to_stream_id_valid[num_pipes] = true; dml2->v20.scratch.dml_to_dc_pipe_mapping.dml_pipe_idx_to_plane_id[num_pipes] = dml2->v20.scratch.dml_to_dc_pipe_mapping.disp_cfg_to_plane_id[i]; From 7968e9748fbbd7ae49770d9f8a8231d8bce2aebb Mon Sep 17 00:00:00 2001 From: Ma Jun Date: Thu, 22 Feb 2024 17:08:42 +0800 Subject: [PATCH 088/225] drm/amdgpu/pm: Fix the power1_min_cap value MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It's unreasonable to use 0 as the power1_min_cap when OD is disabled. So, use the same lower limit as the value used when OD is enabled. Fixes: 1958946858a6 ("drm/amd/pm: Support for getting power1_cap_min value") Signed-off-by: Ma Jun Acked-by: Alex Deucher Acked-by: Christian König Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c | 9 ++++----- drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c | 9 ++++----- drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c | 9 ++++----- drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c | 9 ++++----- drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c | 9 ++++----- 5 files changed, 20 insertions(+), 25 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c index 4cd43bbec910..bcad42534da4 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c @@ -1303,13 +1303,12 @@ static int arcturus_get_power_limit(struct smu_context *smu, if (default_power_limit) *default_power_limit = power_limit; - if (smu->od_enabled) { + if (smu->od_enabled) od_percent_upper = le32_to_cpu(powerplay_table->overdrive_table.max[SMU_11_0_ODSETTING_POWERPERCENTAGE]); - od_percent_lower = le32_to_cpu(powerplay_table->overdrive_table.min[SMU_11_0_ODSETTING_POWERPERCENTAGE]); - } else { + else od_percent_upper = 0; - od_percent_lower = 100; - } + + od_percent_lower = le32_to_cpu(powerplay_table->overdrive_table.min[SMU_11_0_ODSETTING_POWERPERCENTAGE]); dev_dbg(smu->adev->dev, "od percent upper:%d, od percent lower:%d (default power: %d)\n", od_percent_upper, od_percent_lower, power_limit); diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c index 8d1d29ffb0f1..ed189a3878eb 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c @@ -2357,13 +2357,12 @@ static int navi10_get_power_limit(struct smu_context *smu, *default_power_limit = power_limit; if (smu->od_enabled && - navi10_od_feature_is_supported(od_settings, SMU_11_0_ODCAP_POWER_LIMIT)) { + navi10_od_feature_is_supported(od_settings, SMU_11_0_ODCAP_POWER_LIMIT)) od_percent_upper = le32_to_cpu(powerplay_table->overdrive_table.max[SMU_11_0_ODSETTING_POWERPERCENTAGE]); - od_percent_lower = le32_to_cpu(powerplay_table->overdrive_table.min[SMU_11_0_ODSETTING_POWERPERCENTAGE]); - } else { + else od_percent_upper = 0; - od_percent_lower = 100; - } + + od_percent_lower = le32_to_cpu(powerplay_table->overdrive_table.min[SMU_11_0_ODSETTING_POWERPERCENTAGE]); dev_dbg(smu->adev->dev, "od percent upper:%d, od percent lower:%d (default power: %d)\n", od_percent_upper, od_percent_lower, power_limit); diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c index 21fc033528fa..e2ad2b972ab0 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c @@ -640,13 +640,12 @@ static int sienna_cichlid_get_power_limit(struct smu_context *smu, if (default_power_limit) *default_power_limit = power_limit; - if (smu->od_enabled) { + if (smu->od_enabled) od_percent_upper = le32_to_cpu(powerplay_table->overdrive_table.max[SMU_11_0_7_ODSETTING_POWERPERCENTAGE]); - od_percent_lower = le32_to_cpu(powerplay_table->overdrive_table.min[SMU_11_0_7_ODSETTING_POWERPERCENTAGE]); - } else { + else od_percent_upper = 0; - od_percent_lower = 100; - } + + od_percent_lower = le32_to_cpu(powerplay_table->overdrive_table.min[SMU_11_0_7_ODSETTING_POWERPERCENTAGE]); dev_dbg(smu->adev->dev, "od percent upper:%d, od percent lower:%d (default power: %d)\n", od_percent_upper, od_percent_lower, power_limit); diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c index a9954ffc02c5..9b80f18ea6c3 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c @@ -2369,13 +2369,12 @@ static int smu_v13_0_0_get_power_limit(struct smu_context *smu, if (default_power_limit) *default_power_limit = power_limit; - if (smu->od_enabled) { + if (smu->od_enabled) od_percent_upper = le32_to_cpu(powerplay_table->overdrive_table.max[SMU_13_0_0_ODSETTING_POWERPERCENTAGE]); - od_percent_lower = le32_to_cpu(powerplay_table->overdrive_table.min[SMU_13_0_0_ODSETTING_POWERPERCENTAGE]); - } else { + else od_percent_upper = 0; - od_percent_lower = 100; - } + + od_percent_lower = le32_to_cpu(powerplay_table->overdrive_table.min[SMU_13_0_0_ODSETTING_POWERPERCENTAGE]); dev_dbg(smu->adev->dev, "od percent upper:%d, od percent lower:%d (default power: %d)\n", od_percent_upper, od_percent_lower, power_limit); diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c index 0ffdb58af74e..3dc7b60cb075 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c @@ -2333,13 +2333,12 @@ static int smu_v13_0_7_get_power_limit(struct smu_context *smu, if (default_power_limit) *default_power_limit = power_limit; - if (smu->od_enabled) { + if (smu->od_enabled) od_percent_upper = le32_to_cpu(powerplay_table->overdrive_table.max[SMU_13_0_7_ODSETTING_POWERPERCENTAGE]); - od_percent_lower = le32_to_cpu(powerplay_table->overdrive_table.min[SMU_13_0_7_ODSETTING_POWERPERCENTAGE]); - } else { + else od_percent_upper = 0; - od_percent_lower = 100; - } + + od_percent_lower = le32_to_cpu(powerplay_table->overdrive_table.min[SMU_13_0_7_ODSETTING_POWERPERCENTAGE]); dev_dbg(smu->adev->dev, "od percent upper:%d, od percent lower:%d (default power: %d)\n", od_percent_upper, od_percent_lower, power_limit); From c671ec01311b4744b377f98b0b4c6d033fe569b3 Mon Sep 17 00:00:00 2001 From: Prike Liang Date: Thu, 22 Feb 2024 20:56:59 +0800 Subject: [PATCH 089/225] drm/amdgpu: Enable gpu reset for S3 abort cases on Raven series Currently, GPU resets can now be performed successfully on the Raven series. While GPU reset is required for the S3 suspend abort case. So now can enable gpu reset for S3 abort cases on the Raven series. Signed-off-by: Prike Liang Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/soc15.c | 45 +++++++++++++++++------------- 1 file changed, 25 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index c64c01e2944a..1c614451dead 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -574,11 +574,34 @@ soc15_asic_reset_method(struct amdgpu_device *adev) return AMD_RESET_METHOD_MODE1; } +static bool soc15_need_reset_on_resume(struct amdgpu_device *adev) +{ + u32 sol_reg; + + sol_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81); + + /* Will reset for the following suspend abort cases. + * 1) Only reset limit on APU side, dGPU hasn't checked yet. + * 2) S3 suspend abort and TOS already launched. + */ + if (adev->flags & AMD_IS_APU && adev->in_s3 && + !adev->suspend_complete && + sol_reg) + return true; + + return false; +} + static int soc15_asic_reset(struct amdgpu_device *adev) { /* original raven doesn't have full asic reset */ - if ((adev->apu_flags & AMD_APU_IS_RAVEN) || - (adev->apu_flags & AMD_APU_IS_RAVEN2)) + /* On the latest Raven, the GPU reset can be performed + * successfully. So now, temporarily enable it for the + * S3 suspend abort case. + */ + if (((adev->apu_flags & AMD_APU_IS_RAVEN) || + (adev->apu_flags & AMD_APU_IS_RAVEN2)) && + !soc15_need_reset_on_resume(adev)) return 0; switch (soc15_asic_reset_method(adev)) { @@ -1298,24 +1321,6 @@ static int soc15_common_suspend(void *handle) return soc15_common_hw_fini(adev); } -static bool soc15_need_reset_on_resume(struct amdgpu_device *adev) -{ - u32 sol_reg; - - sol_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81); - - /* Will reset for the following suspend abort cases. - * 1) Only reset limit on APU side, dGPU hasn't checked yet. - * 2) S3 suspend abort and TOS already launched. - */ - if (adev->flags & AMD_IS_APU && adev->in_s3 && - !adev->suspend_complete && - sol_reg) - return true; - - return false; -} - static int soc15_common_resume(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; From 955558030954b9637b41c97b730f9b38c92ac488 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 9 Aug 2023 15:06:00 -0400 Subject: [PATCH 090/225] Revert "drm/amd/pm: resolve reboot exception for si oland" This reverts commit e490d60a2f76bff636c68ce4fe34c1b6c34bbd86. This causes hangs on SI when DC is enabled and errors on driver reboot and power off cycles. Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3216 Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/2755 Reviewed-by: Yang Wang Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c | 29 ++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c index df4f20293c16..eb4da3666e05 100644 --- a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c +++ b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c @@ -6925,6 +6925,23 @@ static int si_dpm_enable(struct amdgpu_device *adev) return 0; } +static int si_set_temperature_range(struct amdgpu_device *adev) +{ + int ret; + + ret = si_thermal_enable_alert(adev, false); + if (ret) + return ret; + ret = si_thermal_set_temperature_range(adev, R600_TEMP_RANGE_MIN, R600_TEMP_RANGE_MAX); + if (ret) + return ret; + ret = si_thermal_enable_alert(adev, true); + if (ret) + return ret; + + return ret; +} + static void si_dpm_disable(struct amdgpu_device *adev) { struct rv7xx_power_info *pi = rv770_get_pi(adev); @@ -7608,6 +7625,18 @@ static int si_dpm_process_interrupt(struct amdgpu_device *adev, static int si_dpm_late_init(void *handle) { + int ret; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + if (!adev->pm.dpm_enabled) + return 0; + + ret = si_set_temperature_range(adev); + if (ret) + return ret; +#if 0 //TODO ? + si_dpm_powergate_uvd(adev, true); +#endif return 0; } From 65730fe8f4fb039683d76fa8ea7e8d18a53c6cc6 Mon Sep 17 00:00:00 2001 From: Vadim Shakirov Date: Tue, 27 Feb 2024 20:00:01 +0300 Subject: [PATCH 091/225] drivers: perf: added capabilities for legacy PMU Added the PERF_PMU_CAP_NO_INTERRUPT flag because the legacy pmu driver does not provide sampling capabilities Added the PERF_PMU_CAP_NO_EXCLUDE flag because the legacy pmu driver does not provide the ability to disable counter incrementation in different privilege modes Suggested-by: Atish Patra Signed-off-by: Vadim Shakirov Reviewed-by: Atish Patra Fixes: 9b3e150e310e ("RISC-V: Add a simple platform driver for RISC-V legacy perf") Link: https://lore.kernel.org/r/20240227170002.188671-2-vadim.shakirov@syntacore.com Signed-off-by: Palmer Dabbelt --- drivers/perf/riscv_pmu_legacy.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/perf/riscv_pmu_legacy.c b/drivers/perf/riscv_pmu_legacy.c index 79fdd667922e..a85fc9a15f03 100644 --- a/drivers/perf/riscv_pmu_legacy.c +++ b/drivers/perf/riscv_pmu_legacy.c @@ -117,6 +117,8 @@ static void pmu_legacy_init(struct riscv_pmu *pmu) pmu->event_mapped = pmu_legacy_event_mapped; pmu->event_unmapped = pmu_legacy_event_unmapped; pmu->csr_index = pmu_legacy_csr_index; + pmu->pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT; + pmu->pmu.capabilities |= PERF_PMU_CAP_NO_EXCLUDE; perf_pmu_register(&pmu->pmu, "cpu", PERF_TYPE_RAW); } From 682dc133f83e0194796e6ea72eb642df1c03dfbe Mon Sep 17 00:00:00 2001 From: Vadim Shakirov Date: Tue, 27 Feb 2024 20:00:02 +0300 Subject: [PATCH 092/225] drivers: perf: ctr_get_width function for legacy is not defined With parameters CONFIG_RISCV_PMU_LEGACY=y and CONFIG_RISCV_PMU_SBI=n linux kernel crashes when you try perf record: $ perf record ls [ 46.749286] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000000 [ 46.750199] Oops [#1] [ 46.750342] Modules linked in: [ 46.750608] CPU: 0 PID: 107 Comm: perf-exec Not tainted 6.6.0 #2 [ 46.750906] Hardware name: riscv-virtio,qemu (DT) [ 46.751184] epc : 0x0 [ 46.751430] ra : arch_perf_update_userpage+0x54/0x13e [ 46.751680] epc : 0000000000000000 ra : ffffffff8072ee52 sp : ff2000000022b8f0 [ 46.751958] gp : ffffffff81505988 tp : ff6000000290d400 t0 : ff2000000022b9c0 [ 46.752229] t1 : 0000000000000001 t2 : 0000000000000003 s0 : ff2000000022b930 [ 46.752451] s1 : ff600000028fb000 a0 : 0000000000000000 a1 : ff600000028fb000 [ 46.752673] a2 : 0000000ae2751268 a3 : 00000000004fb708 a4 : 0000000000000004 [ 46.752895] a5 : 0000000000000000 a6 : 000000000017ffe3 a7 : 00000000000000d2 [ 46.753117] s2 : ff600000028fb000 s3 : 0000000ae2751268 s4 : 0000000000000000 [ 46.753338] s5 : ffffffff8153e290 s6 : ff600000863b9000 s7 : ff60000002961078 [ 46.753562] s8 : ff60000002961048 s9 : ff60000002961058 s10: 0000000000000001 [ 46.753783] s11: 0000000000000018 t3 : ffffffffffffffff t4 : ffffffffffffffff [ 46.754005] t5 : ff6000000292270c t6 : ff2000000022bb30 [ 46.754179] status: 0000000200000100 badaddr: 0000000000000000 cause: 000000000000000c [ 46.754653] Code: Unable to access instruction at 0xffffffffffffffec. [ 46.754939] ---[ end trace 0000000000000000 ]--- [ 46.755131] note: perf-exec[107] exited with irqs disabled [ 46.755546] note: perf-exec[107] exited with preempt_count 4 This happens because in the legacy case the ctr_get_width function was not defined, but it is used in arch_perf_update_userpage. Also remove extra check in riscv_pmu_ctr_get_width_mask Signed-off-by: Vadim Shakirov Reviewed-by: Alexandre Ghiti Reviewed-by: Atish Patra Fixes: cc4c07c89aad ("drivers: perf: Implement perf event mmap support in the SBI backend") Link: https://lore.kernel.org/r/20240227170002.188671-3-vadim.shakirov@syntacore.com Signed-off-by: Palmer Dabbelt --- drivers/perf/riscv_pmu.c | 18 +++++------------- drivers/perf/riscv_pmu_legacy.c | 8 +++++++- 2 files changed, 12 insertions(+), 14 deletions(-) diff --git a/drivers/perf/riscv_pmu.c b/drivers/perf/riscv_pmu.c index 0dda70e1ef90..c78a6fd6c57f 100644 --- a/drivers/perf/riscv_pmu.c +++ b/drivers/perf/riscv_pmu.c @@ -150,19 +150,11 @@ u64 riscv_pmu_ctr_get_width_mask(struct perf_event *event) struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu); struct hw_perf_event *hwc = &event->hw; - if (!rvpmu->ctr_get_width) - /** - * If the pmu driver doesn't support counter width, set it to default - * maximum allowed by the specification. - */ - cwidth = 63; - else { - if (hwc->idx == -1) - /* Handle init case where idx is not initialized yet */ - cwidth = rvpmu->ctr_get_width(0); - else - cwidth = rvpmu->ctr_get_width(hwc->idx); - } + if (hwc->idx == -1) + /* Handle init case where idx is not initialized yet */ + cwidth = rvpmu->ctr_get_width(0); + else + cwidth = rvpmu->ctr_get_width(hwc->idx); return GENMASK_ULL(cwidth, 0); } diff --git a/drivers/perf/riscv_pmu_legacy.c b/drivers/perf/riscv_pmu_legacy.c index a85fc9a15f03..fa0bccf4edf2 100644 --- a/drivers/perf/riscv_pmu_legacy.c +++ b/drivers/perf/riscv_pmu_legacy.c @@ -37,6 +37,12 @@ static int pmu_legacy_event_map(struct perf_event *event, u64 *config) return pmu_legacy_ctr_get_idx(event); } +/* cycle & instret are always 64 bit, one bit less according to SBI spec */ +static int pmu_legacy_ctr_get_width(int idx) +{ + return 63; +} + static u64 pmu_legacy_read_ctr(struct perf_event *event) { struct hw_perf_event *hwc = &event->hw; @@ -111,7 +117,7 @@ static void pmu_legacy_init(struct riscv_pmu *pmu) pmu->ctr_stop = NULL; pmu->event_map = pmu_legacy_event_map; pmu->ctr_get_idx = pmu_legacy_ctr_get_idx; - pmu->ctr_get_width = NULL; + pmu->ctr_get_width = pmu_legacy_ctr_get_width; pmu->ctr_clear_idx = NULL; pmu->ctr_read = pmu_legacy_read_ctr; pmu->event_mapped = pmu_legacy_event_mapped; From f41900e4a6ef019d64a70394b0e0c3bd048d4ec8 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Mon, 19 Feb 2024 12:18:52 +0000 Subject: [PATCH 093/225] drm/buddy: fix range bias MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is a corner case here where start/end is after/before the block range we are currently checking. If so we need to be sure that splitting the block will eventually give use the block size we need. To do that we should adjust the block range to account for the start/end, and only continue with the split if the size/alignment will fit the requested size. Not doing so can result in leaving split blocks unmerged when it eventually fails. Fixes: afea229fe102 ("drm: improve drm_buddy_alloc function") Signed-off-by: Matthew Auld Cc: Arunpravin Paneer Selvam Cc: Christian König Cc: # v5.18+ Reviewed-by: Arunpravin Paneer Selvam Link: https://patchwork.freedesktop.org/patch/msgid/20240219121851.25774-4-matthew.auld@intel.com Signed-off-by: Christian König --- drivers/gpu/drm/drm_buddy.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c index c4222b886db7..f3a6ac908f81 100644 --- a/drivers/gpu/drm/drm_buddy.c +++ b/drivers/gpu/drm/drm_buddy.c @@ -332,6 +332,7 @@ alloc_range_bias(struct drm_buddy *mm, u64 start, u64 end, unsigned int order) { + u64 req_size = mm->chunk_size << order; struct drm_buddy_block *block; struct drm_buddy_block *buddy; LIST_HEAD(dfs); @@ -367,6 +368,15 @@ alloc_range_bias(struct drm_buddy *mm, if (drm_buddy_block_is_allocated(block)) continue; + if (block_start < start || block_end > end) { + u64 adjusted_start = max(block_start, start); + u64 adjusted_end = min(block_end, end); + + if (round_down(adjusted_end + 1, req_size) <= + round_up(adjusted_start, req_size)) + continue; + } + if (contains(start, end, block_start, block_end) && order == drm_buddy_block_order(block)) { /* From 2986314aa811c8a23aeb292edd30315495d54966 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Mon, 19 Feb 2024 12:18:53 +0000 Subject: [PATCH 094/225] drm/buddy: check range allocation matches alignment MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Likely not a big deal for real users, but for consistency we should respect the min_page_size here. Main issue is that bias allocations turns into normal range allocation if the range and size matches exactly, and in the next patch we want to add some unit tests for this part of the api. Signed-off-by: Matthew Auld Cc: Arunpravin Paneer Selvam Cc: Christian König Reviewed-by: Arunpravin Paneer Selvam Link: https://patchwork.freedesktop.org/patch/msgid/20240219121851.25774-5-matthew.auld@intel.com Signed-off-by: Christian König --- drivers/gpu/drm/drm_buddy.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c index f3a6ac908f81..5ebdd6f8f36e 100644 --- a/drivers/gpu/drm/drm_buddy.c +++ b/drivers/gpu/drm/drm_buddy.c @@ -771,8 +771,12 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm, return -EINVAL; /* Actual range allocation */ - if (start + size == end) + if (start + size == end) { + if (!IS_ALIGNED(start | end, min_block_size)) + return -EINVAL; + return __drm_buddy_alloc_range(mm, start, size, NULL, blocks); + } original_size = size; original_min_size = min_block_size; From c70703320e557ff30847915e6a7631a9abdda16b Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Mon, 19 Feb 2024 12:18:54 +0000 Subject: [PATCH 095/225] drm/tests/drm_buddy: add alloc_range_bias test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Sanity check range bias with DRM_BUDDY_RANGE_ALLOCATION. v2: - Be consistent with u32 here. Signed-off-by: Matthew Auld Cc: Arunpravin Paneer Selvam Cc: Christian König Reviewed-by: Arunpravin Paneer Selvam Link: https://patchwork.freedesktop.org/patch/msgid/20240219121851.25774-6-matthew.auld@intel.com Signed-off-by: Christian König --- drivers/gpu/drm/tests/drm_buddy_test.c | 218 +++++++++++++++++++++++++ 1 file changed, 218 insertions(+) diff --git a/drivers/gpu/drm/tests/drm_buddy_test.c b/drivers/gpu/drm/tests/drm_buddy_test.c index edacc1adb28f..1008d5b9d61e 100644 --- a/drivers/gpu/drm/tests/drm_buddy_test.c +++ b/drivers/gpu/drm/tests/drm_buddy_test.c @@ -14,11 +14,216 @@ #include "../lib/drm_random.h" +static unsigned int random_seed; + static inline u64 get_size(int order, u64 chunk_size) { return (1 << order) * chunk_size; } +static void drm_test_buddy_alloc_range_bias(struct kunit *test) +{ + u32 mm_size, ps, bias_size, bias_start, bias_end, bias_rem; + DRM_RND_STATE(prng, random_seed); + unsigned int i, count, *order; + struct drm_buddy mm; + LIST_HEAD(allocated); + + bias_size = SZ_1M; + ps = roundup_pow_of_two(prandom_u32_state(&prng) % bias_size); + ps = max(SZ_4K, ps); + mm_size = (SZ_8M-1) & ~(ps-1); /* Multiple roots */ + + kunit_info(test, "mm_size=%u, ps=%u\n", mm_size, ps); + + KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_init(&mm, mm_size, ps), + "buddy_init failed\n"); + + count = mm_size / bias_size; + order = drm_random_order(count, &prng); + KUNIT_EXPECT_TRUE(test, order); + + /* + * Idea is to split the address space into uniform bias ranges, and then + * in some random order allocate within each bias, using various + * patterns within. This should detect if allocations leak out from a + * given bias, for example. + */ + + for (i = 0; i < count; i++) { + LIST_HEAD(tmp); + u32 size; + + bias_start = order[i] * bias_size; + bias_end = bias_start + bias_size; + bias_rem = bias_size; + + /* internal round_up too big */ + KUNIT_ASSERT_TRUE_MSG(test, + drm_buddy_alloc_blocks(&mm, bias_start, + bias_end, bias_size + ps, bias_size, + &allocated, + DRM_BUDDY_RANGE_ALLOCATION), + "buddy_alloc failed with bias(%x-%x), size=%u, ps=%u\n", + bias_start, bias_end, bias_size, bias_size); + + /* size too big */ + KUNIT_ASSERT_TRUE_MSG(test, + drm_buddy_alloc_blocks(&mm, bias_start, + bias_end, bias_size + ps, ps, + &allocated, + DRM_BUDDY_RANGE_ALLOCATION), + "buddy_alloc didn't fail with bias(%x-%x), size=%u, ps=%u\n", + bias_start, bias_end, bias_size + ps, ps); + + /* bias range too small for size */ + KUNIT_ASSERT_TRUE_MSG(test, + drm_buddy_alloc_blocks(&mm, bias_start + ps, + bias_end, bias_size, ps, + &allocated, + DRM_BUDDY_RANGE_ALLOCATION), + "buddy_alloc didn't fail with bias(%x-%x), size=%u, ps=%u\n", + bias_start + ps, bias_end, bias_size, ps); + + /* bias misaligned */ + KUNIT_ASSERT_TRUE_MSG(test, + drm_buddy_alloc_blocks(&mm, bias_start + ps, + bias_end - ps, + bias_size >> 1, bias_size >> 1, + &allocated, + DRM_BUDDY_RANGE_ALLOCATION), + "buddy_alloc h didn't fail with bias(%x-%x), size=%u, ps=%u\n", + bias_start + ps, bias_end - ps, bias_size >> 1, bias_size >> 1); + + /* single big page */ + KUNIT_ASSERT_FALSE_MSG(test, + drm_buddy_alloc_blocks(&mm, bias_start, + bias_end, bias_size, bias_size, + &tmp, + DRM_BUDDY_RANGE_ALLOCATION), + "buddy_alloc i failed with bias(%x-%x), size=%u, ps=%u\n", + bias_start, bias_end, bias_size, bias_size); + drm_buddy_free_list(&mm, &tmp); + + /* single page with internal round_up */ + KUNIT_ASSERT_FALSE_MSG(test, + drm_buddy_alloc_blocks(&mm, bias_start, + bias_end, ps, bias_size, + &tmp, + DRM_BUDDY_RANGE_ALLOCATION), + "buddy_alloc failed with bias(%x-%x), size=%u, ps=%u\n", + bias_start, bias_end, ps, bias_size); + drm_buddy_free_list(&mm, &tmp); + + /* random size within */ + size = max(round_up(prandom_u32_state(&prng) % bias_rem, ps), ps); + if (size) + KUNIT_ASSERT_FALSE_MSG(test, + drm_buddy_alloc_blocks(&mm, bias_start, + bias_end, size, ps, + &tmp, + DRM_BUDDY_RANGE_ALLOCATION), + "buddy_alloc failed with bias(%x-%x), size=%u, ps=%u\n", + bias_start, bias_end, size, ps); + + bias_rem -= size; + /* too big for current avail */ + KUNIT_ASSERT_TRUE_MSG(test, + drm_buddy_alloc_blocks(&mm, bias_start, + bias_end, bias_rem + ps, ps, + &allocated, + DRM_BUDDY_RANGE_ALLOCATION), + "buddy_alloc didn't fail with bias(%x-%x), size=%u, ps=%u\n", + bias_start, bias_end, bias_rem + ps, ps); + + if (bias_rem) { + /* random fill of the remainder */ + size = max(round_up(prandom_u32_state(&prng) % bias_rem, ps), ps); + size = max(size, ps); + + KUNIT_ASSERT_FALSE_MSG(test, + drm_buddy_alloc_blocks(&mm, bias_start, + bias_end, size, ps, + &allocated, + DRM_BUDDY_RANGE_ALLOCATION), + "buddy_alloc failed with bias(%x-%x), size=%u, ps=%u\n", + bias_start, bias_end, size, ps); + /* + * Intentionally allow some space to be left + * unallocated, and ideally not always on the bias + * boundaries. + */ + drm_buddy_free_list(&mm, &tmp); + } else { + list_splice_tail(&tmp, &allocated); + } + } + + kfree(order); + drm_buddy_free_list(&mm, &allocated); + drm_buddy_fini(&mm); + + /* + * Something more free-form. Idea is to pick a random starting bias + * range within the address space and then start filling it up. Also + * randomly grow the bias range in both directions as we go along. This + * should give us bias start/end which is not always uniform like above, + * and in some cases will require the allocator to jump over already + * allocated nodes in the middle of the address space. + */ + + KUNIT_ASSERT_FALSE_MSG(test, drm_buddy_init(&mm, mm_size, ps), + "buddy_init failed\n"); + + bias_start = round_up(prandom_u32_state(&prng) % (mm_size - ps), ps); + bias_end = round_up(bias_start + prandom_u32_state(&prng) % (mm_size - bias_start), ps); + bias_end = max(bias_end, bias_start + ps); + bias_rem = bias_end - bias_start; + + do { + u32 size = max(round_up(prandom_u32_state(&prng) % bias_rem, ps), ps); + + KUNIT_ASSERT_FALSE_MSG(test, + drm_buddy_alloc_blocks(&mm, bias_start, + bias_end, size, ps, + &allocated, + DRM_BUDDY_RANGE_ALLOCATION), + "buddy_alloc failed with bias(%x-%x), size=%u, ps=%u\n", + bias_start, bias_end, size); + bias_rem -= size; + + /* + * Try to randomly grow the bias range in both directions, or + * only one, or perhaps don't grow at all. + */ + do { + u32 old_bias_start = bias_start; + u32 old_bias_end = bias_end; + + if (bias_start) + bias_start -= round_up(prandom_u32_state(&prng) % bias_start, ps); + if (bias_end != mm_size) + bias_end += round_up(prandom_u32_state(&prng) % (mm_size - bias_end), ps); + + bias_rem += old_bias_start - bias_start; + bias_rem += bias_end - old_bias_end; + } while (!bias_rem && (bias_start || bias_end != mm_size)); + } while (bias_rem); + + KUNIT_ASSERT_EQ(test, bias_start, 0); + KUNIT_ASSERT_EQ(test, bias_end, mm_size); + KUNIT_ASSERT_TRUE_MSG(test, + drm_buddy_alloc_blocks(&mm, bias_start, bias_end, + ps, ps, + &allocated, + DRM_BUDDY_RANGE_ALLOCATION), + "buddy_alloc passed with bias(%x-%x), size=%u\n", + bias_start, bias_end, ps); + + drm_buddy_free_list(&mm, &allocated); + drm_buddy_fini(&mm); +} + static void drm_test_buddy_alloc_contiguous(struct kunit *test) { u32 mm_size, ps = SZ_4K, i, n_pages, total; @@ -363,17 +568,30 @@ static void drm_test_buddy_alloc_limit(struct kunit *test) drm_buddy_fini(&mm); } +static int drm_buddy_suite_init(struct kunit_suite *suite) +{ + while (!random_seed) + random_seed = get_random_u32(); + + kunit_info(suite, "Testing DRM buddy manager, with random_seed=0x%x\n", + random_seed); + + return 0; +} + static struct kunit_case drm_buddy_tests[] = { KUNIT_CASE(drm_test_buddy_alloc_limit), KUNIT_CASE(drm_test_buddy_alloc_optimistic), KUNIT_CASE(drm_test_buddy_alloc_pessimistic), KUNIT_CASE(drm_test_buddy_alloc_pathological), KUNIT_CASE(drm_test_buddy_alloc_contiguous), + KUNIT_CASE(drm_test_buddy_alloc_range_bias), {} }; static struct kunit_suite drm_buddy_test_suite = { .name = "drm_buddy", + .suite_init = drm_buddy_suite_init, .test_cases = drm_buddy_tests, }; From 183420038444547c149a0fc5f58e792c2752860c Mon Sep 17 00:00:00 2001 From: Andrey Skvortsov Date: Tue, 27 Feb 2024 00:53:57 +0300 Subject: [PATCH 096/225] crypto: sun8i-ce - Fix use after free in unprepare sun8i_ce_cipher_unprepare should be called before crypto_finalize_skcipher_request, because client callbacks may immediately free memory, that isn't needed anymore. But it will be used by unprepare after free. Before removing prepare/unprepare callbacks it was handled by crypto engine in crypto_finalize_request. Usually that results in a pointer dereference problem during a in crypto selftest. Unable to handle kernel NULL pointer dereference at virtual address 0000000000000030 Mem abort info: ESR = 0x0000000096000004 EC = 0x25: DABT (current EL), IL = 32 bits SET = 0, FnV = 0 EA = 0, S1PTW = 0 FSC = 0x04: level 0 translation fault Data abort info: ISV = 0, ISS = 0x00000004, ISS2 = 0x00000000 CM = 0, WnR = 0, TnD = 0, TagAccess = 0 GCS = 0, Overlay = 0, DirtyBit = 0, Xs = 0 user pgtable: 4k pages, 48-bit VAs, pgdp=000000004716d000 [0000000000000030] pgd=0000000000000000, p4d=0000000000000000 Internal error: Oops: 0000000096000004 [#1] SMP This problem is detected by KASAN as well. ================================================================== BUG: KASAN: slab-use-after-free in sun8i_ce_cipher_do_one+0x6e8/0xf80 [sun8i_ce] Read of size 8 at addr ffff00000dcdc040 by task 1c15000.crypto-/373 Hardware name: Pine64 PinePhone (1.2) (DT) Call trace: dump_backtrace+0x9c/0x128 show_stack+0x20/0x38 dump_stack_lvl+0x48/0x60 print_report+0xf8/0x5d8 kasan_report+0x90/0xd0 __asan_load8+0x9c/0xc0 sun8i_ce_cipher_do_one+0x6e8/0xf80 [sun8i_ce] crypto_pump_work+0x354/0x620 [crypto_engine] kthread_worker_fn+0x244/0x498 kthread+0x168/0x178 ret_from_fork+0x10/0x20 Allocated by task 379: kasan_save_stack+0x3c/0x68 kasan_set_track+0x2c/0x40 kasan_save_alloc_info+0x24/0x38 __kasan_kmalloc+0xd4/0xd8 __kmalloc+0x74/0x1d0 alg_test_skcipher+0x90/0x1f0 alg_test+0x24c/0x830 cryptomgr_test+0x38/0x60 kthread+0x168/0x178 ret_from_fork+0x10/0x20 Freed by task 379: kasan_save_stack+0x3c/0x68 kasan_set_track+0x2c/0x40 kasan_save_free_info+0x38/0x60 __kasan_slab_free+0x100/0x170 slab_free_freelist_hook+0xd4/0x1e8 __kmem_cache_free+0x15c/0x290 kfree+0x74/0x100 kfree_sensitive+0x80/0xb0 alg_test_skcipher+0x12c/0x1f0 alg_test+0x24c/0x830 cryptomgr_test+0x38/0x60 kthread+0x168/0x178 ret_from_fork+0x10/0x20 The buggy address belongs to the object at ffff00000dcdc000 which belongs to the cache kmalloc-256 of size 256 The buggy address is located 64 bytes inside of freed 256-byte region [ffff00000dcdc000, ffff00000dcdc100) Signed-off-by: Andrey Skvortsov Fixes: 4136212ab18e ("crypto: sun8i-ce - Remove prepare/unprepare request") Cc: Signed-off-by: Herbert Xu --- .../allwinner/sun8i-ce/sun8i-ce-cipher.c | 34 +++++++++---------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-cipher.c b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-cipher.c index 1262a7773ef3..de50c00ba218 100644 --- a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-cipher.c +++ b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-cipher.c @@ -299,22 +299,6 @@ theend: return err; } -static void sun8i_ce_cipher_run(struct crypto_engine *engine, void *areq) -{ - struct skcipher_request *breq = container_of(areq, struct skcipher_request, base); - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(breq); - struct sun8i_cipher_tfm_ctx *op = crypto_skcipher_ctx(tfm); - struct sun8i_ce_dev *ce = op->ce; - struct sun8i_cipher_req_ctx *rctx = skcipher_request_ctx(breq); - int flow, err; - - flow = rctx->flow; - err = sun8i_ce_run_task(ce, flow, crypto_tfm_alg_name(breq->base.tfm)); - local_bh_disable(); - crypto_finalize_skcipher_request(engine, breq, err); - local_bh_enable(); -} - static void sun8i_ce_cipher_unprepare(struct crypto_engine *engine, void *async_req) { @@ -360,6 +344,23 @@ static void sun8i_ce_cipher_unprepare(struct crypto_engine *engine, dma_unmap_single(ce->dev, rctx->addr_key, op->keylen, DMA_TO_DEVICE); } +static void sun8i_ce_cipher_run(struct crypto_engine *engine, void *areq) +{ + struct skcipher_request *breq = container_of(areq, struct skcipher_request, base); + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(breq); + struct sun8i_cipher_tfm_ctx *op = crypto_skcipher_ctx(tfm); + struct sun8i_ce_dev *ce = op->ce; + struct sun8i_cipher_req_ctx *rctx = skcipher_request_ctx(breq); + int flow, err; + + flow = rctx->flow; + err = sun8i_ce_run_task(ce, flow, crypto_tfm_alg_name(breq->base.tfm)); + sun8i_ce_cipher_unprepare(engine, areq); + local_bh_disable(); + crypto_finalize_skcipher_request(engine, breq, err); + local_bh_enable(); +} + int sun8i_ce_cipher_do_one(struct crypto_engine *engine, void *areq) { int err = sun8i_ce_cipher_prepare(engine, areq); @@ -368,7 +369,6 @@ int sun8i_ce_cipher_do_one(struct crypto_engine *engine, void *areq) return err; sun8i_ce_cipher_run(engine, areq); - sun8i_ce_cipher_unprepare(engine, areq); return 0; } From 316a784839b21b122e1761cdca54677bb19a47fa Mon Sep 17 00:00:00 2001 From: Jiawei Wang Date: Wed, 28 Feb 2024 15:39:13 +0800 Subject: [PATCH 097/225] ASoC: amd: yc: add new YC platform variant (0x63) support The Lenovo 21J2 (ThinkBook 16 G5+ APO) has this new variant, as detected with lspci: 64:00.5 Multimedia controller: Advanced Micro Devices, Inc. [AMD] ACP/ACP3X/ACP6x Audio Coprocessor (rev 63) Signed-off-by: Jiawei Wang Link: https://msgid.link/r/20240228073914.232204-1-me@jwang.link Signed-off-by: Mark Brown --- sound/soc/amd/yc/pci-acp6x.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/amd/yc/pci-acp6x.c b/sound/soc/amd/yc/pci-acp6x.c index 7af6a349b1d4..694b8e313902 100644 --- a/sound/soc/amd/yc/pci-acp6x.c +++ b/sound/soc/amd/yc/pci-acp6x.c @@ -162,6 +162,7 @@ static int snd_acp6x_probe(struct pci_dev *pci, /* Yellow Carp device check */ switch (pci->revision) { case 0x60: + case 0x63: case 0x6f: break; default: From ed00a6945dc32462c2d3744a3518d2316da66fcc Mon Sep 17 00:00:00 2001 From: Jiawei Wang Date: Wed, 28 Feb 2024 15:39:14 +0800 Subject: [PATCH 098/225] ASoC: amd: yc: Fix non-functional mic on Lenovo 21J2 Like many other models, the Lenovo 21J2 (ThinkBook 16 G5+ APO) needs a quirk entry for the internal microphone to function. Signed-off-by: Jiawei Wang Link: https://msgid.link/r/20240228073914.232204-2-me@jwang.link Signed-off-by: Mark Brown --- sound/soc/amd/yc/acp6x-mach.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sound/soc/amd/yc/acp6x-mach.c b/sound/soc/amd/yc/acp6x-mach.c index 5587198751da..abb9589b8477 100644 --- a/sound/soc/amd/yc/acp6x-mach.c +++ b/sound/soc/amd/yc/acp6x-mach.c @@ -199,6 +199,13 @@ static const struct dmi_system_id yc_acp_quirk_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "21HY"), } }, + { + .driver_data = &acp6x_card, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "21J2"), + } + }, { .driver_data = &acp6x_card, .matches = { From 8e9f25a290ae0016353c9ea13314c95fb3207812 Mon Sep 17 00:00:00 2001 From: Elad Nachman Date: Thu, 22 Feb 2024 22:09:30 +0200 Subject: [PATCH 099/225] mmc: sdhci-xenon: fix PHY init clock stability Each time SD/mmc phy is initialized, at times, in some of the attempts, phy fails to completes its initialization which results into timeout error. Per the HW spec, it is a pre-requisite to ensure a stable SD clock before a phy initialization is attempted. Fixes: 06c8b667ff5b ("mmc: sdhci-xenon: Add support to PHYs of Marvell Xenon SDHC") Acked-by: Adrian Hunter Cc: stable@vger.kernel.org Signed-off-by: Elad Nachman Link: https://lore.kernel.org/r/20240222200930.1277665-1-enachman@marvell.com Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci-xenon-phy.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/drivers/mmc/host/sdhci-xenon-phy.c b/drivers/mmc/host/sdhci-xenon-phy.c index 8cf3a375de65..c3096230a969 100644 --- a/drivers/mmc/host/sdhci-xenon-phy.c +++ b/drivers/mmc/host/sdhci-xenon-phy.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include "sdhci-pltfm.h" @@ -216,6 +217,19 @@ static int xenon_alloc_emmc_phy(struct sdhci_host *host) return 0; } +static int xenon_check_stability_internal_clk(struct sdhci_host *host) +{ + u32 reg; + int err; + + err = read_poll_timeout(sdhci_readw, reg, reg & SDHCI_CLOCK_INT_STABLE, + 1100, 20000, false, host, SDHCI_CLOCK_CONTROL); + if (err) + dev_err(mmc_dev(host->mmc), "phy_init: Internal clock never stabilized.\n"); + + return err; +} + /* * eMMC 5.0/5.1 PHY init/re-init. * eMMC PHY init should be executed after: @@ -232,6 +246,11 @@ static int xenon_emmc_phy_init(struct sdhci_host *host) struct xenon_priv *priv = sdhci_pltfm_priv(pltfm_host); struct xenon_emmc_phy_regs *phy_regs = priv->emmc_phy_regs; + int ret = xenon_check_stability_internal_clk(host); + + if (ret) + return ret; + reg = sdhci_readl(host, phy_regs->timing_adj); reg |= XENON_PHY_INITIALIZAION; sdhci_writel(host, reg, phy_regs->timing_adj); From 09e23823ae9a3e2d5d20f2e1efe0d6e48cef9129 Mon Sep 17 00:00:00 2001 From: Elad Nachman Date: Thu, 22 Feb 2024 21:17:14 +0200 Subject: [PATCH 100/225] mmc: sdhci-xenon: add timeout for PHY init complete AC5X spec says PHY init complete bit must be polled until zero. We see cases in which timeout can take longer than the standard calculation on AC5X, which is expected following the spec comment above. According to the spec, we must wait as long as it takes for that bit to toggle on AC5X. Cap that with 100 delay loops so we won't get stuck forever. Fixes: 06c8b667ff5b ("mmc: sdhci-xenon: Add support to PHYs of Marvell Xenon SDHC") Acked-by: Adrian Hunter Cc: stable@vger.kernel.org Signed-off-by: Elad Nachman Link: https://lore.kernel.org/r/20240222191714.1216470-3-enachman@marvell.com Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci-xenon-phy.c | 29 ++++++++++++++++++++--------- 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/drivers/mmc/host/sdhci-xenon-phy.c b/drivers/mmc/host/sdhci-xenon-phy.c index c3096230a969..cc9d28b75eb9 100644 --- a/drivers/mmc/host/sdhci-xenon-phy.c +++ b/drivers/mmc/host/sdhci-xenon-phy.c @@ -110,6 +110,8 @@ #define XENON_EMMC_PHY_LOGIC_TIMING_ADJUST (XENON_EMMC_PHY_REG_BASE + 0x18) #define XENON_LOGIC_TIMING_VALUE 0x00AA8977 +#define XENON_MAX_PHY_TIMEOUT_LOOPS 100 + /* * List offset of PHY registers and some special register values * in eMMC PHY 5.0 or eMMC PHY 5.1 @@ -278,18 +280,27 @@ static int xenon_emmc_phy_init(struct sdhci_host *host) /* get the wait time */ wait /= clock; wait++; - /* wait for host eMMC PHY init completes */ - udelay(wait); - reg = sdhci_readl(host, phy_regs->timing_adj); - reg &= XENON_PHY_INITIALIZAION; - if (reg) { + /* + * AC5X spec says bit must be polled until zero. + * We see cases in which timeout can take longer + * than the standard calculation on AC5X, which is + * expected following the spec comment above. + * According to the spec, we must wait as long as + * it takes for that bit to toggle on AC5X. + * Cap that with 100 delay loops so we won't get + * stuck here forever: + */ + + ret = read_poll_timeout(sdhci_readl, reg, + !(reg & XENON_PHY_INITIALIZAION), + wait, XENON_MAX_PHY_TIMEOUT_LOOPS * wait, + false, host, phy_regs->timing_adj); + if (ret) dev_err(mmc_dev(host->mmc), "eMMC PHY init cannot complete after %d us\n", - wait); - return -ETIMEDOUT; - } + wait * XENON_MAX_PHY_TIMEOUT_LOOPS); - return 0; + return ret; } #define ARMADA_3700_SOC_PAD_1_8V 0x1 From 664bad6af3cbe01d6804b7264bee674b3e7dae7e Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Wed, 28 Feb 2024 00:08:08 +0200 Subject: [PATCH 101/225] Revert "drm/msm/dp: use drm_bridge_hpd_notify() to report HPD status changes" This reverts commit e467e0bde881 ("drm/msm/dp: use drm_bridge_hpd_notify() to report HPD status changes"). The commit changed the way how the MSM DP driver communicates HPD-related events to the userspace. The mentioned commit made some of the HPD events being reported earlier. This way userspace starts poking around. It interacts in a bad way with the dp_bridge_detect and the driver's state machine, ending up either with the very long delays during hotplug detection or even inability of the DP driver to report the display as connected. A proper fix will involve redesigning of the HPD handling in the MSM DP driver. It is underway, but it will be intrusive and can not be thought about as a simple fix for the issue. Thus, revert the offending commit. Fixes: e467e0bde881 ("drm/msm/dp: use drm_bridge_hpd_notify() to report HPD status changes") Link: https://gitlab.freedesktop.org/drm/msm/-/issues/50 Reported-by: Johan Hovold Link: https://lore.kernel.org/r/Zd3YPGmrprxv-N-O@hovoldconsulting.com/ Signed-off-by: Dmitry Baryshkov Reviewed-by: Abhinav Kumar Tested-by: Paloma Arellano Tested-by: Johan Hovold Tested-by: Neil Armstrong # on SM8650-HDK Patchwork: https://patchwork.freedesktop.org/patch/580313/ Link: https://lore.kernel.org/r/20240227220808.50146-1-dmitry.baryshkov@linaro.org --- drivers/gpu/drm/msm/dp/dp_display.c | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/msm/dp/dp_display.c b/drivers/gpu/drm/msm/dp/dp_display.c index d37d599aec27..4c72124ffb5d 100644 --- a/drivers/gpu/drm/msm/dp/dp_display.c +++ b/drivers/gpu/drm/msm/dp/dp_display.c @@ -329,10 +329,26 @@ static const struct component_ops dp_display_comp_ops = { .unbind = dp_display_unbind, }; +static void dp_display_send_hpd_event(struct msm_dp *dp_display) +{ + struct dp_display_private *dp; + struct drm_connector *connector; + + dp = container_of(dp_display, struct dp_display_private, dp_display); + + connector = dp->dp_display.connector; + drm_helper_hpd_irq_event(connector->dev); +} + static int dp_display_send_hpd_notification(struct dp_display_private *dp, bool hpd) { - struct drm_bridge *bridge = dp->dp_display.bridge; + if ((hpd && dp->dp_display.link_ready) || + (!hpd && !dp->dp_display.link_ready)) { + drm_dbg_dp(dp->drm_dev, "HPD already %s\n", + (hpd ? "on" : "off")); + return 0; + } /* reset video pattern flag on disconnect */ if (!hpd) { @@ -348,7 +364,7 @@ static int dp_display_send_hpd_notification(struct dp_display_private *dp, drm_dbg_dp(dp->drm_dev, "type=%d hpd=%d\n", dp->dp_display.connector_type, hpd); - drm_bridge_hpd_notify(bridge, dp->dp_display.link_ready); + dp_display_send_hpd_event(&dp->dp_display); return 0; } From 2a93c6cbd5a703d44c414a3c3945a87ce11430ba Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Mon, 26 Feb 2024 17:49:57 -0800 Subject: [PATCH 102/225] pmdomain: qcom: rpmhpd: Fix enabled_corner aggregation Commit 'e3e56c050ab6 ("soc: qcom: rpmhpd: Make power_on actually enable the domain")' aimed to make sure that a power-domain that is being enabled without any particular performance-state requested will at least turn the rail on, to avoid filling DeviceTree with otherwise unnecessary required-opps properties. But in the event that aggregation happens on a disabled power-domain, with an enabled peer without performance-state, both the local and peer corner are 0. The peer's enabled_corner is not considered, with the result that the underlying (shared) resource is disabled. One case where this can be observed is when the display stack keeps mmcx enabled (but without a particular performance-state vote) in order to access registers and sync_state happens in the rpmhpd driver. As mmcx_ao is flushed the state of the peer (mmcx) is not considered and mmcx_ao ends up turning off "mmcx.lvl" underneath mmcx. This has been observed several times, but has been painted over in DeviceTree by adding an explicit vote for the lowest non-disabled performance-state. Fixes: e3e56c050ab6 ("soc: qcom: rpmhpd: Make power_on actually enable the domain") Reported-by: Johan Hovold Closes: https://lore.kernel.org/linux-arm-msm/ZdMwZa98L23mu3u6@hovoldconsulting.com/ Cc: Signed-off-by: Bjorn Andersson Reviewed-by: Konrad Dybcio Reviewed-by: Dmitry Baryshkov Tested-by: Dmitry Baryshkov Reviewed-by: Abhinav Kumar Reviewed-by: Stephen Boyd Tested-by: Johan Hovold Link: https://lore.kernel.org/r/20240226-rpmhpd-enable-corner-fix-v1-1-68c004cec48c@quicinc.com Signed-off-by: Ulf Hansson --- drivers/pmdomain/qcom/rpmhpd.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/pmdomain/qcom/rpmhpd.c b/drivers/pmdomain/qcom/rpmhpd.c index 3078896b1300..47df910645f6 100644 --- a/drivers/pmdomain/qcom/rpmhpd.c +++ b/drivers/pmdomain/qcom/rpmhpd.c @@ -692,6 +692,7 @@ static int rpmhpd_aggregate_corner(struct rpmhpd *pd, unsigned int corner) unsigned int active_corner, sleep_corner; unsigned int this_active_corner = 0, this_sleep_corner = 0; unsigned int peer_active_corner = 0, peer_sleep_corner = 0; + unsigned int peer_enabled_corner; if (pd->state_synced) { to_active_sleep(pd, corner, &this_active_corner, &this_sleep_corner); @@ -701,9 +702,11 @@ static int rpmhpd_aggregate_corner(struct rpmhpd *pd, unsigned int corner) this_sleep_corner = pd->level_count - 1; } - if (peer && peer->enabled) - to_active_sleep(peer, peer->corner, &peer_active_corner, + if (peer && peer->enabled) { + peer_enabled_corner = max(peer->corner, peer->enable_corner); + to_active_sleep(peer, peer_enabled_corner, &peer_active_corner, &peer_sleep_corner); + } active_corner = max(this_active_corner, peer_active_corner); From 66f40b926dd249f74334a22162c09e7ec1ec5b07 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Tue, 27 Feb 2024 19:58:01 -0500 Subject: [PATCH 103/225] cgroup/cpuset: Fix a memory leak in update_exclusive_cpumask() Fix a possible memory leak in update_exclusive_cpumask() by moving the alloc_cpumasks() down after the validate_change() check which can fail and still before the temporary cpumasks are needed. Fixes: e2ffe502ba45 ("cgroup/cpuset: Add cpuset.cpus.exclusive for v2") Reported-and-tested-by: Mirsad Todorovac Closes: https://lore.kernel.org/lkml/14915689-27a3-4cd8-80d2-9c30d0c768b6@alu.unizg.hr Signed-off-by: Waiman Long Signed-off-by: Tejun Heo Cc: stable@vger.kernel.org # v6.7+ --- kernel/cgroup/cpuset.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index ba36c073304a..7260f095802a 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -2598,9 +2598,6 @@ static int update_exclusive_cpumask(struct cpuset *cs, struct cpuset *trialcs, if (cpumask_equal(cs->exclusive_cpus, trialcs->exclusive_cpus)) return 0; - if (alloc_cpumasks(NULL, &tmp)) - return -ENOMEM; - if (*buf) compute_effective_exclusive_cpumask(trialcs, NULL); @@ -2615,6 +2612,9 @@ static int update_exclusive_cpumask(struct cpuset *cs, struct cpuset *trialcs, if (retval) return retval; + if (alloc_cpumasks(NULL, &tmp)) + return -ENOMEM; + if (old_prs) { if (cpumask_empty(trialcs->effective_xcpus)) { invalidate = true; From b7cdccc6a849568775f738b1e233f751a8fed013 Mon Sep 17 00:00:00 2001 From: Ryan Lin Date: Wed, 28 Feb 2024 11:39:21 -0700 Subject: [PATCH 104/225] drm/amd/display: Add monitor patch for specific eDP [WHY] Some eDP panels' ext caps don't write initial values. The value of dpcd_addr (0x317) can be random and the backlight control interface will be incorrect. [HOW] Add new panel patches to remove sink ext caps. Cc: Mario Limonciello Cc: Alex Deucher Cc: stable@vger.kernel.org # 6.5.x Cc: Tsung-hua Lin Cc: Chris Chi Reviewed-by: Wayne Lin Acked-by: Alex Hung Signed-off-by: Ryan Lin Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c index 85b7f58a7f35..c27063305a13 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c @@ -67,6 +67,8 @@ static void apply_edid_quirks(struct edid *edid, struct dc_edid_caps *edid_caps) /* Workaround for some monitors that do not clear DPCD 0x317 if FreeSync is unsupported */ case drm_edid_encode_panel_id('A', 'U', 'O', 0xA7AB): case drm_edid_encode_panel_id('A', 'U', 'O', 0xE69B): + case drm_edid_encode_panel_id('B', 'O', 'E', 0x092A): + case drm_edid_encode_panel_id('L', 'G', 'D', 0x06D1): DRM_DEBUG_DRIVER("Clearing DPCD 0x317 on monitor with panel id %X\n", panel_id); edid_caps->panel_patch.remove_sink_ext_caps = true; break; @@ -120,6 +122,8 @@ enum dc_edid_status dm_helpers_parse_edid_caps( edid_caps->edid_hdmi = connector->display_info.is_hdmi; + apply_edid_quirks(edid_buf, edid_caps); + sad_count = drm_edid_to_sad((struct edid *) edid->raw_edid, &sads); if (sad_count <= 0) return result; @@ -146,8 +150,6 @@ enum dc_edid_status dm_helpers_parse_edid_caps( else edid_caps->speaker_flags = DEFAULT_SPEAKER_LOCATION; - apply_edid_quirks(edid_buf, edid_caps); - kfree(sads); kfree(sadb); From 7e10d87e63f7f9c324d533bb4369e35bb19ab9a9 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Wed, 21 Feb 2024 14:30:18 +0100 Subject: [PATCH 105/225] drm/xe: Add uapi for dumpable bos MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add the flag XE_VM_BIND_FLAG_DUMPABLE to notify devcoredump that this mapping should be dumped. This is not hooked up, but the uapi should be ready before merging. It's likely easier to dump the contents of the bo's at devcoredump readout time, so it's better if the bos will stay unmodified after a hang. The NEEDS_CPU_MAPPING flag is removed as requirement. Signed-off-by: Maarten Lankhorst Reviewed-by: José Roberto de Souza Link: https://patchwork.freedesktop.org/patch/msgid/20240221133024.898315-3-maarten.lankhorst@linux.intel.com (cherry picked from commit 76a86b58d2b3de31e88acb487ebfa0c3cc7c41d2) Signed-off-by: Thomas Hellström --- drivers/gpu/drm/xe/xe_vm.c | 3 ++- include/uapi/drm/xe_drm.h | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 921ca28d49dd..945c89b5e4b5 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -2722,7 +2722,8 @@ static int vm_bind_ioctl_ops_execute(struct xe_vm *vm, #define SUPPORTED_FLAGS \ (DRM_XE_VM_BIND_FLAG_READONLY | \ - DRM_XE_VM_BIND_FLAG_IMMEDIATE | DRM_XE_VM_BIND_FLAG_NULL) + DRM_XE_VM_BIND_FLAG_IMMEDIATE | DRM_XE_VM_BIND_FLAG_NULL | \ + DRM_XE_VM_BIND_FLAG_DUMPABLE) #define XE_64K_PAGE_MASK 0xffffull #define ALL_DRM_XE_SYNCS_FLAGS (DRM_XE_SYNCS_FLAG_WAIT_FOR_OP) diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 6d11ee9e571a..4f010949f812 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -931,6 +931,7 @@ struct drm_xe_vm_bind_op { #define DRM_XE_VM_BIND_FLAG_READONLY (1 << 0) #define DRM_XE_VM_BIND_FLAG_IMMEDIATE (1 << 1) #define DRM_XE_VM_BIND_FLAG_NULL (1 << 2) +#define DRM_XE_VM_BIND_FLAG_DUMPABLE (1 << 3) /** @flags: Bind flags */ __u32 flags; From b6f4fb397db09024c189834d638abbd21bf00769 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Roberto=20de=20Souza?= Date: Tue, 26 Dec 2023 08:11:14 -0800 Subject: [PATCH 106/225] drm/xe/uapi: Remove DRM_XE_VM_BIND_FLAG_ASYNC comment left over MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is a comment left over of commit d3d767396a02 ("drm/xe/uapi: Remove sync binds"). Fixes: d3d767396a02 ("drm/xe/uapi: Remove sync binds") Reviewed-by: Rodrigo Vivi Cc: Matthew Brost Signed-off-by: José Roberto de Souza Link: https://patchwork.freedesktop.org/patch/msgid/20231226172321.61518-1-jose.souza@intel.com (cherry picked from commit f031c3a7af8ea06790dd0a71872c4f0175084baa) Signed-off-by: Thomas Hellström --- include/uapi/drm/xe_drm.h | 1 - 1 file changed, 1 deletion(-) diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 4f010949f812..a7274a99d456 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -832,7 +832,6 @@ struct drm_xe_vm_destroy { * * and the @flags can be: * - %DRM_XE_VM_BIND_FLAG_READONLY - * - %DRM_XE_VM_BIND_FLAG_ASYNC * - %DRM_XE_VM_BIND_FLAG_IMMEDIATE - Valid on a faulting VM only, do the * MAP operation immediately rather than deferring the MAP to the page * fault handler. From eaa367a0317ea4cbc7aa60f25829c89c0e12717b Mon Sep 17 00:00:00 2001 From: Francois Dugast Date: Thu, 22 Feb 2024 18:23:56 -0500 Subject: [PATCH 107/225] drm/xe/uapi: Remove unused flags MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Those cases missed in previous uAPI cleanups were mostly accidentally brought in from i915 or created to exercise the possibilities of gpuvm but they are not used by userspace yet, so let's remove them. They can still be brought back later if needed. v2: - Fix XE_VM_FLAG_FAULT_MODE support in xe_lrc.c (Brian Welty) - Leave DRM_XE_VM_BIND_OP_UNMAP_ALL (José Roberto de Souza) - Ensure invalid flag values are rejected (Rodrigo Vivi) v3: Rebase after removal of persistent exec_queues (Francois Dugast) v4: Rodrigo: Rebase after the new dumpable flag. Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Cc: Thomas Hellström Cc: Rodrigo Vivi Signed-off-by: Francois Dugast Reviewed-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20240222232356.175431-1-rodrigo.vivi@intel.com (cherry picked from commit 84a1ed5e67565b09b8fd22a26754d2897de55ce0) Signed-off-by: Thomas Hellström --- drivers/gpu/drm/xe/xe_exec_queue.c | 88 +----------------------- drivers/gpu/drm/xe/xe_exec_queue_types.h | 10 --- drivers/gpu/drm/xe/xe_lrc.c | 10 +-- drivers/gpu/drm/xe/xe_vm.c | 12 +--- drivers/gpu/drm/xe/xe_vm_types.h | 4 -- include/uapi/drm/xe_drm.h | 19 ----- 6 files changed, 6 insertions(+), 137 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index 3acfd4f07666..49223026c89f 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -309,85 +309,6 @@ static int exec_queue_set_timeslice(struct xe_device *xe, struct xe_exec_queue * return q->ops->set_timeslice(q, value); } -static int exec_queue_set_preemption_timeout(struct xe_device *xe, - struct xe_exec_queue *q, u64 value, - bool create) -{ - u32 min = 0, max = 0; - - xe_exec_queue_get_prop_minmax(q->hwe->eclass, - XE_EXEC_QUEUE_PREEMPT_TIMEOUT, &min, &max); - - if (xe_exec_queue_enforce_schedule_limit() && - !xe_hw_engine_timeout_in_range(value, min, max)) - return -EINVAL; - - return q->ops->set_preempt_timeout(q, value); -} - -static int exec_queue_set_job_timeout(struct xe_device *xe, struct xe_exec_queue *q, - u64 value, bool create) -{ - u32 min = 0, max = 0; - - if (XE_IOCTL_DBG(xe, !create)) - return -EINVAL; - - xe_exec_queue_get_prop_minmax(q->hwe->eclass, - XE_EXEC_QUEUE_JOB_TIMEOUT, &min, &max); - - if (xe_exec_queue_enforce_schedule_limit() && - !xe_hw_engine_timeout_in_range(value, min, max)) - return -EINVAL; - - return q->ops->set_job_timeout(q, value); -} - -static int exec_queue_set_acc_trigger(struct xe_device *xe, struct xe_exec_queue *q, - u64 value, bool create) -{ - if (XE_IOCTL_DBG(xe, !create)) - return -EINVAL; - - if (XE_IOCTL_DBG(xe, !xe->info.has_usm)) - return -EINVAL; - - q->usm.acc_trigger = value; - - return 0; -} - -static int exec_queue_set_acc_notify(struct xe_device *xe, struct xe_exec_queue *q, - u64 value, bool create) -{ - if (XE_IOCTL_DBG(xe, !create)) - return -EINVAL; - - if (XE_IOCTL_DBG(xe, !xe->info.has_usm)) - return -EINVAL; - - q->usm.acc_notify = value; - - return 0; -} - -static int exec_queue_set_acc_granularity(struct xe_device *xe, struct xe_exec_queue *q, - u64 value, bool create) -{ - if (XE_IOCTL_DBG(xe, !create)) - return -EINVAL; - - if (XE_IOCTL_DBG(xe, !xe->info.has_usm)) - return -EINVAL; - - if (value > DRM_XE_ACC_GRANULARITY_64M) - return -EINVAL; - - q->usm.acc_granularity = value; - - return 0; -} - typedef int (*xe_exec_queue_set_property_fn)(struct xe_device *xe, struct xe_exec_queue *q, u64 value, bool create); @@ -395,11 +316,6 @@ typedef int (*xe_exec_queue_set_property_fn)(struct xe_device *xe, static const xe_exec_queue_set_property_fn exec_queue_set_property_funcs[] = { [DRM_XE_EXEC_QUEUE_SET_PROPERTY_PRIORITY] = exec_queue_set_priority, [DRM_XE_EXEC_QUEUE_SET_PROPERTY_TIMESLICE] = exec_queue_set_timeslice, - [DRM_XE_EXEC_QUEUE_SET_PROPERTY_PREEMPTION_TIMEOUT] = exec_queue_set_preemption_timeout, - [DRM_XE_EXEC_QUEUE_SET_PROPERTY_JOB_TIMEOUT] = exec_queue_set_job_timeout, - [DRM_XE_EXEC_QUEUE_SET_PROPERTY_ACC_TRIGGER] = exec_queue_set_acc_trigger, - [DRM_XE_EXEC_QUEUE_SET_PROPERTY_ACC_NOTIFY] = exec_queue_set_acc_notify, - [DRM_XE_EXEC_QUEUE_SET_PROPERTY_ACC_GRANULARITY] = exec_queue_set_acc_granularity, }; static int exec_queue_user_ext_set_property(struct xe_device *xe, @@ -418,7 +334,9 @@ static int exec_queue_user_ext_set_property(struct xe_device *xe, if (XE_IOCTL_DBG(xe, ext.property >= ARRAY_SIZE(exec_queue_set_property_funcs)) || - XE_IOCTL_DBG(xe, ext.pad)) + XE_IOCTL_DBG(xe, ext.pad) || + XE_IOCTL_DBG(xe, ext.property != DRM_XE_EXEC_QUEUE_SET_PROPERTY_PRIORITY && + ext.property != DRM_XE_EXEC_QUEUE_SET_PROPERTY_TIMESLICE)) return -EINVAL; idx = array_index_nospec(ext.property, ARRAY_SIZE(exec_queue_set_property_funcs)); diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h b/drivers/gpu/drm/xe/xe_exec_queue_types.h index 947bbc4b285d..36f4901d8d7e 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue_types.h +++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h @@ -150,16 +150,6 @@ struct xe_exec_queue { spinlock_t lock; } compute; - /** @usm: unified shared memory state */ - struct { - /** @acc_trigger: access counter trigger */ - u32 acc_trigger; - /** @acc_notify: access counter notify */ - u32 acc_notify; - /** @acc_granularity: access counter granularity */ - u32 acc_granularity; - } usm; - /** @ops: submission backend exec queue operations */ const struct xe_exec_queue_ops *ops; diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index 0ec5ad2539f1..b38319d2801e 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -682,8 +682,6 @@ static void xe_lrc_set_ppgtt(struct xe_lrc *lrc, struct xe_vm *vm) #define PVC_CTX_ASID (0x2e + 1) #define PVC_CTX_ACC_CTR_THOLD (0x2a + 1) -#define ACC_GRANULARITY_S 20 -#define ACC_NOTIFY_S 16 int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, struct xe_exec_queue *q, struct xe_vm *vm, u32 ring_size) @@ -754,13 +752,7 @@ int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, xe_lrc_write_ctx_reg(lrc, CTX_RING_CTL, RING_CTL_SIZE(lrc->ring.size) | RING_VALID); if (xe->info.has_asid && vm) - xe_lrc_write_ctx_reg(lrc, PVC_CTX_ASID, - (q->usm.acc_granularity << - ACC_GRANULARITY_S) | vm->usm.asid); - if (xe->info.has_usm && vm) - xe_lrc_write_ctx_reg(lrc, PVC_CTX_ACC_CTR_THOLD, - (q->usm.acc_notify << ACC_NOTIFY_S) | - q->usm.acc_trigger); + xe_lrc_write_ctx_reg(lrc, PVC_CTX_ASID, vm->usm.asid); lrc->desc = LRC_VALID; lrc->desc |= LRC_LEGACY_64B_CONTEXT << LRC_ADDRESSING_MODE_SHIFT; diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 945c89b5e4b5..1d82616aa935 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -2117,10 +2117,6 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo, struct xe_vma_op *op = gpuva_op_to_vma_op(__op); if (__op->op == DRM_GPUVA_OP_MAP) { - op->map.immediate = - flags & DRM_XE_VM_BIND_FLAG_IMMEDIATE; - op->map.read_only = - flags & DRM_XE_VM_BIND_FLAG_READONLY; op->map.is_null = flags & DRM_XE_VM_BIND_FLAG_NULL; op->map.pat_index = pat_index; } else if (__op->op == DRM_GPUVA_OP_PREFETCH) { @@ -2313,8 +2309,6 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q, switch (op->base.op) { case DRM_GPUVA_OP_MAP: { - flags |= op->map.read_only ? - VMA_CREATE_FLAG_READ_ONLY : 0; flags |= op->map.is_null ? VMA_CREATE_FLAG_IS_NULL : 0; @@ -2445,7 +2439,7 @@ static int op_execute(struct drm_exec *exec, struct xe_vm *vm, case DRM_GPUVA_OP_MAP: err = xe_vm_bind(vm, vma, op->q, xe_vma_bo(vma), op->syncs, op->num_syncs, - op->map.immediate || !xe_vm_in_fault_mode(vm), + !xe_vm_in_fault_mode(vm), op->flags & XE_VMA_OP_FIRST, op->flags & XE_VMA_OP_LAST); break; @@ -2720,9 +2714,7 @@ static int vm_bind_ioctl_ops_execute(struct xe_vm *vm, return 0; } -#define SUPPORTED_FLAGS \ - (DRM_XE_VM_BIND_FLAG_READONLY | \ - DRM_XE_VM_BIND_FLAG_IMMEDIATE | DRM_XE_VM_BIND_FLAG_NULL | \ +#define SUPPORTED_FLAGS (DRM_XE_VM_BIND_FLAG_NULL | \ DRM_XE_VM_BIND_FLAG_DUMPABLE) #define XE_64K_PAGE_MASK 0xffffull #define ALL_DRM_XE_SYNCS_FLAGS (DRM_XE_SYNCS_FLAG_WAIT_FOR_OP) diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h index a603cc2eb56b..0f220b5d2e7b 100644 --- a/drivers/gpu/drm/xe/xe_vm_types.h +++ b/drivers/gpu/drm/xe/xe_vm_types.h @@ -288,10 +288,6 @@ struct xe_vm { struct xe_vma_op_map { /** @vma: VMA to map */ struct xe_vma *vma; - /** @immediate: Immediate bind */ - bool immediate; - /** @read_only: Read only */ - bool read_only; /** @is_null: is NULL binding */ bool is_null; /** @pat_index: The pat index to use for this operation. */ diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index a7274a99d456..bb0c8a994116 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -831,10 +831,6 @@ struct drm_xe_vm_destroy { * - %DRM_XE_VM_BIND_OP_PREFETCH * * and the @flags can be: - * - %DRM_XE_VM_BIND_FLAG_READONLY - * - %DRM_XE_VM_BIND_FLAG_IMMEDIATE - Valid on a faulting VM only, do the - * MAP operation immediately rather than deferring the MAP to the page - * fault handler. * - %DRM_XE_VM_BIND_FLAG_NULL - When the NULL flag is set, the page * tables are setup with a special bit which indicates writes are * dropped and all reads return zero. In the future, the NULL flags @@ -927,8 +923,6 @@ struct drm_xe_vm_bind_op { /** @op: Bind operation to perform */ __u32 op; -#define DRM_XE_VM_BIND_FLAG_READONLY (1 << 0) -#define DRM_XE_VM_BIND_FLAG_IMMEDIATE (1 << 1) #define DRM_XE_VM_BIND_FLAG_NULL (1 << 2) #define DRM_XE_VM_BIND_FLAG_DUMPABLE (1 << 3) /** @flags: Bind flags */ @@ -1045,19 +1039,6 @@ struct drm_xe_exec_queue_create { #define DRM_XE_EXEC_QUEUE_EXTENSION_SET_PROPERTY 0 #define DRM_XE_EXEC_QUEUE_SET_PROPERTY_PRIORITY 0 #define DRM_XE_EXEC_QUEUE_SET_PROPERTY_TIMESLICE 1 -#define DRM_XE_EXEC_QUEUE_SET_PROPERTY_PREEMPTION_TIMEOUT 2 -#define DRM_XE_EXEC_QUEUE_SET_PROPERTY_JOB_TIMEOUT 4 -#define DRM_XE_EXEC_QUEUE_SET_PROPERTY_ACC_TRIGGER 5 -#define DRM_XE_EXEC_QUEUE_SET_PROPERTY_ACC_NOTIFY 6 -#define DRM_XE_EXEC_QUEUE_SET_PROPERTY_ACC_GRANULARITY 7 -/* Monitor 128KB contiguous region with 4K sub-granularity */ -#define DRM_XE_ACC_GRANULARITY_128K 0 -/* Monitor 2MB contiguous region with 64KB sub-granularity */ -#define DRM_XE_ACC_GRANULARITY_2M 1 -/* Monitor 16MB contiguous region with 512KB sub-granularity */ -#define DRM_XE_ACC_GRANULARITY_16M 2 -/* Monitor 64MB contiguous region with 2M sub-granularity */ -#define DRM_XE_ACC_GRANULARITY_64M 3 /** @extensions: Pointer to the first extension struct, if any */ __u64 extensions; From dc15bd0aa7b5ba77bb216394b368c6f9aedbf2f4 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Thu, 22 Feb 2024 15:20:19 -0800 Subject: [PATCH 108/225] drm/xe: Fix execlist splat MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Although execlist submission is not supported it should be kept in a basic working state as it can be used for very early hardware bring up. Fix the below splat. WARNING: CPU: 3 PID: 11 at drivers/gpu/drm/xe/xe_execlist.c:217 execlist_run_job+0x1c2/0x220 [xe] Modules linked in: xe drm_kunit_helpers drm_gpuvm drm_ttm_helper ttm drm_exec drm_suballoc_helper drm_buddy gpu_sched mei_pxp mei_hdcp wmi_bmof x86_pkg_temp_thermal coretemp crct10dif_pclmul crc32_pclmul snd_hda_intel ghash_clmulni_intel snd_intel_dspcfg snd_hda_codec snd_hwdep snd_hda_core video snd_pcm mei_me mei wmi fuse e1000e i2c_i801 ptp i2c_smbus pps_core intel_lpss_pci CPU: 3 PID: 11 Comm: kworker/u16:0 Tainted: G U 6.8.0-rc3-guc+ #1046 Hardware name: Intel Corporation Tiger Lake Client Platform/TigerLake U DDR4 SODIMM RVP, BIOS TGLSFWI1.R00.3243.A01.2006102133 06/10/2020 Workqueue: rcs0 drm_sched_run_job_work [gpu_sched] RIP: 0010:execlist_run_job+0x1c2/0x220 [xe] Code: 8b f8 03 00 00 4c 89 39 e9 e2 fe ff ff 49 8d 7d 20 be ff ff ff ff e8 ed fd a6 e1 85 c0 0f 85 e1 fe ff ff 0f 0b e9 da fe ff ff <0f> 0b 0f 0b 41 83 fc 03 0f 86 8a fe ff ff 0f 0b e9 83 fe ff ff be RSP: 0018:ffffc9000013bdb8 EFLAGS: 00010246 RAX: ffff888105021a00 RBX: ffff888105078400 RCX: 0000000000000000 RDX: 0000000000000001 RSI: ffffc9000013bd14 RDI: ffffc90001609090 RBP: ffff88811e3f0040 R08: 0000000000000088 R09: 00000000ffffff81 R10: 0000000000000001 R11: ffff88810c10c000 R12: 00000000fffffffe R13: ffff888109b72c28 R14: ffff8881050784a0 R15: ffff888105078408 FS: 0000000000000000(0000) GS:ffff88849f980000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000563459d130f8 CR3: 000000000563a001 CR4: 0000000000f70ef0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Call Trace: ? __warn+0x7f/0x170 ? execlist_run_job+0x1c2/0x220 [xe] ? report_bug+0x1c7/0x1d0 ? handle_bug+0x3c/0x70 ? exc_invalid_op+0x18/0x70 ? asm_exc_invalid_op+0x1a/0x20 ? execlist_run_job+0x1c2/0x220 [xe] ? execlist_run_job+0x2c/0x220 [xe] drm_sched_run_job_work+0x246/0x3f0 [gpu_sched] ? process_one_work+0x18d/0x4e0 process_one_work+0x1f7/0x4e0 worker_thread+0x1da/0x3e0 ? __pfx_worker_thread+0x10/0x10 kthread+0xfe/0x130 ? __pfx_kthread+0x10/0x10 ret_from_fork+0x2c/0x50 ? __pfx_kthread+0x10/0x10 ret_from_fork_asm+0x1b/0x30 Fixes: 9b9529ce379a ("drm/xe: Rename engine to exec_queue") Signed-off-by: Matthew Brost Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20240222232021.3911545-2-matthew.brost@intel.com (cherry picked from commit ddadc7120d4be7a40a9745924339c472c5850d14) Signed-off-by: Thomas Hellström --- drivers/gpu/drm/xe/xe_execlist.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_execlist.c b/drivers/gpu/drm/xe/xe_execlist.c index 42d01bbbf7d0..acb4d9f38fd7 100644 --- a/drivers/gpu/drm/xe/xe_execlist.c +++ b/drivers/gpu/drm/xe/xe_execlist.c @@ -212,7 +212,7 @@ static void xe_execlist_port_wake_locked(struct xe_execlist_port *port, static void xe_execlist_make_active(struct xe_execlist_exec_queue *exl) { struct xe_execlist_port *port = exl->port; - enum xe_exec_queue_priority priority = exl->active_priority; + enum xe_exec_queue_priority priority = exl->q->sched_props.priority; XE_WARN_ON(priority == XE_EXEC_QUEUE_PRIORITY_UNSET); XE_WARN_ON(priority < 0); From ccff0b21ebe0cbe3f402edb27b0b1fd22a9d08aa Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Thu, 22 Feb 2024 15:20:21 -0800 Subject: [PATCH 109/225] drm/xe: Don't support execlists in xe_gt_tlb_invalidation layer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The xe_gt_tlb_invalidation layer implements TLB invalidations for a GuC backend. Simply return if in execlists mode. A follow up may properly implement the xe_gt_tlb_invalidation layer for both GuC and execlists. Fixes: a9351846d945 ("drm/xe: Break of TLB invalidation into its own file") Cc: Rodrigo Vivi Signed-off-by: Matthew Brost Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20240222232021.3911545-4-matthew.brost@intel.com (cherry picked from commit a9e483dda3efa5b9aae5d9eef94d2c3a878d9bea) Signed-off-by: Thomas Hellström --- drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c index 7eef23a00d77..f4c485289dbe 100644 --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c @@ -247,6 +247,14 @@ int xe_gt_tlb_invalidation_vma(struct xe_gt *gt, xe_gt_assert(gt, vma); + /* Execlists not supported */ + if (gt_to_xe(gt)->info.force_execlist) { + if (fence) + __invalidation_fence_signal(fence); + + return 0; + } + action[len++] = XE_GUC_ACTION_TLB_INVALIDATION; action[len++] = 0; /* seqno, replaced in send_tlb_invalidation */ if (!xe->info.has_range_tlb_invalidation) { @@ -317,6 +325,10 @@ int xe_gt_tlb_invalidation_wait(struct xe_gt *gt, int seqno) struct drm_printer p = drm_err_printer(__func__); int ret; + /* Execlists not supported */ + if (gt_to_xe(gt)->info.force_execlist) + return 0; + /* * XXX: See above, this algorithm only works if seqno are always in * order From a41f6b0db58fe3cc2686e4065db48ebf44effa36 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Mon, 26 Feb 2024 07:55:54 -0800 Subject: [PATCH 110/225] drm/xe: Use vmalloc for array of bind allocation in bind IOCTL MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use vmalloc in effort to allow a user pass in a large number of binds in an IOCTL (mesa use case). Also use array allocations rather open coding the size calculation. v2: Use __GFP_ACCOUNT for allocations (Thomas) Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Signed-off-by: Matthew Brost Reviewed-by: Thomas Hellström Link: https://patchwork.freedesktop.org/patch/msgid/20240226155554.103384-1-matthew.brost@intel.com (cherry picked from commit 35ed1d2bfff7b1969e7f99f3641a83ea54f037e2) Signed-off-by: Thomas Hellström --- drivers/gpu/drm/xe/xe_vm.c | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 1d82616aa935..041b29439c4b 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -2740,8 +2740,9 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe, u64 __user *bind_user = u64_to_user_ptr(args->vector_of_binds); - *bind_ops = kmalloc(sizeof(struct drm_xe_vm_bind_op) * - args->num_binds, GFP_KERNEL); + *bind_ops = kvmalloc_array(args->num_binds, + sizeof(struct drm_xe_vm_bind_op), + GFP_KERNEL | __GFP_ACCOUNT); if (!*bind_ops) return -ENOMEM; @@ -2831,7 +2832,7 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe, free_bind_ops: if (args->num_binds > 1) - kfree(*bind_ops); + kvfree(*bind_ops); return err; } @@ -2919,13 +2920,15 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) } if (args->num_binds) { - bos = kcalloc(args->num_binds, sizeof(*bos), GFP_KERNEL); + bos = kvcalloc(args->num_binds, sizeof(*bos), + GFP_KERNEL | __GFP_ACCOUNT); if (!bos) { err = -ENOMEM; goto release_vm_lock; } - ops = kcalloc(args->num_binds, sizeof(*ops), GFP_KERNEL); + ops = kvcalloc(args->num_binds, sizeof(*ops), + GFP_KERNEL | __GFP_ACCOUNT); if (!ops) { err = -ENOMEM; goto release_vm_lock; @@ -3066,10 +3069,10 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) for (i = 0; bos && i < args->num_binds; ++i) xe_bo_put(bos[i]); - kfree(bos); - kfree(ops); + kvfree(bos); + kvfree(ops); if (args->num_binds > 1) - kfree(bind_ops); + kvfree(bind_ops); return err; @@ -3093,10 +3096,10 @@ put_exec_queue: if (q) xe_exec_queue_put(q); free_objs: - kfree(bos); - kfree(ops); + kvfree(bos); + kvfree(ops); if (args->num_binds > 1) - kfree(bind_ops); + kvfree(bind_ops); return err; } From 14d4d0ad0ab5aa980cf71a82da1297b28b274de1 Mon Sep 17 00:00:00 2001 From: Paulo Zanoni Date: Wed, 14 Feb 2024 16:53:53 -0800 Subject: [PATCH 111/225] drm/xe: get rid of MAX_BINDS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Mesa has been issuing a single bind operation per ioctl since xe.ko changed to GPUVA due xe.ko bug #746. If I change Mesa to try again to issue every single bind operation it can in the same ioctl, it hits the MAX_BINDS assertion when running Vulkan conformance tests. Test dEQP-VK.sparse_resources.transfer_queue.3d.rgba32i.1024_128_8 issues 960 bind operations in a single ioctl, it's the most I could find in the conformance suite. I don't see a reason to keep the MAX_BINDS restriction: it doesn't seem to be preventing any specific issue. If the number is too big for the memory allocations, then those will fail. Nothing related to num_binds seems to be using the stack. Let's just get rid of it. Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Testcase: dEQP-VK.sparse_resources.transfer_queue.3d.rgba32i.1024_128_8 References: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/746 Cc: Matthew Brost Signed-off-by: Paulo Zanoni Reviewed-by: Matthew Brost Signed-off-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20240215005353.1295420-1-paulo.r.zanoni@intel.com (cherry picked from commit ba6bbdc6eaef92998ec7f323c9e1211d344d2556) Signed-off-by: Thomas Hellström --- drivers/gpu/drm/xe/xe_vm.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 041b29439c4b..75b44777067e 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -2719,8 +2719,6 @@ static int vm_bind_ioctl_ops_execute(struct xe_vm *vm, #define XE_64K_PAGE_MASK 0xffffull #define ALL_DRM_XE_SYNCS_FLAGS (DRM_XE_SYNCS_FLAG_WAIT_FOR_OP) -#define MAX_BINDS 512 /* FIXME: Picking random upper limit */ - static int vm_bind_ioctl_check_args(struct xe_device *xe, struct drm_xe_vm_bind *args, struct drm_xe_vm_bind_op **bind_ops) @@ -2732,8 +2730,7 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe, XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) return -EINVAL; - if (XE_IOCTL_DBG(xe, args->extensions) || - XE_IOCTL_DBG(xe, args->num_binds > MAX_BINDS)) + if (XE_IOCTL_DBG(xe, args->extensions)) return -EINVAL; if (args->num_binds > 1) { From 12cb2b21c2d037a4299028fc56ac941185992e5e Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 26 Feb 2024 13:46:37 +0100 Subject: [PATCH 112/225] drm/xe/mmio: fix build warning for BAR resize on 32-bit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit clang complains about a nonsensical test on builds with a 32-bit phys_addr_t, which means resizing will always fail: drivers/gpu/drm/xe/xe_mmio.c:109:23: error: result of comparison of constant 4294967296 with expression of type 'resource_size_t' (aka 'unsigned int') is always false [-Werror,-Wtautological-constant-out-of-range-compare] 109 | root_res->start > 0x100000000ull) | ~~~~~~~~~~~~~~~ ^ ~~~~~~~~~~~~~~ Previously, BAR resize was always disallowed on 32-bit kernels, but this apparently changed recently. Since 32-bit machines can in theory support PAE/LPAE for large address spaces, this may end up useful, so change the driver to shut up the warning but still work when phys_addr_t/resource_size_t is 64 bit wide. Fixes: 9a6e6c14bfde ("drm/xe/mmio: Use non-atomic writeq/readq variant for 32b") Signed-off-by: Arnd Bergmann Reviewed-by: Lucas De Marchi Acked-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20240226124736.1272949-2-arnd@kernel.org Signed-off-by: Lucas De Marchi (cherry picked from commit f5d3983366c0b88ec388b3407b29c1c0862ee2b8) Signed-off-by: Thomas Hellström --- drivers/gpu/drm/xe/xe_mmio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c index 5f6b53ea5528..02f7808f28ca 100644 --- a/drivers/gpu/drm/xe/xe_mmio.c +++ b/drivers/gpu/drm/xe/xe_mmio.c @@ -105,7 +105,7 @@ static void xe_resize_vram_bar(struct xe_device *xe) pci_bus_for_each_resource(root, root_res, i) { if (root_res && root_res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) && - root_res->start > 0x100000000ull) + (u64)root_res->start > 0x100000000ul) break; } From a09946a9a903e809abab9e0fb813dbf5a32084f5 Mon Sep 17 00:00:00 2001 From: Priyanka Dandamudi Date: Tue, 20 Feb 2024 10:17:48 +0530 Subject: [PATCH 113/225] drm/xe/xe_bo_move: Enhance xe_bo_move trace MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Enhanced xe_bo_move trace to be more readable. It will help to show the migration details. Src and dst details. v2: Modify trace_xe_bo_move(), it takes the integer mem_type rather than a string. Make mem_type_to_name() extern, it will be used by trace.(Thomas) v3: Move mem_type_to_name() to xe_bo.[ch] (Thomas, Matt) v4: Add device details to reduce ambiquity related to vram0/vram1. (Oak) v5: Rename mem_type_to_name to xe_mem_type_to_name. (Thomas) v6: Optimised code to use xe_bo_device(__entry->bo). (Thomas) Cc: Thomas Hellström Cc: Oak Zeng Cc: Kempczynski Zbigniew Cc: Matthew Brost Cc: Brian Welty Signed-off-by: Priyanka Dandamudi Reviewed-by: Oak Zeng Reviewed-by: Thomas Hellström Signed-off-by: Thomas Hellström Link: https://patchwork.freedesktop.org/patch/msgid/20240220044748.948496-1-priyanka.dandamudi@intel.com (cherry picked from commit a0df2cc858c309a8bc2e87b4274772587aa25e05) Signed-off-by: Thomas Hellström --- drivers/gpu/drm/xe/xe_bo.c | 11 +++++++++-- drivers/gpu/drm/xe/xe_bo.h | 1 + drivers/gpu/drm/xe/xe_drm_client.c | 12 ++---------- drivers/gpu/drm/xe/xe_trace.h | 25 ++++++++++++++++++++++--- 4 files changed, 34 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index 0b0e262e2166..f2ea188663ac 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -28,6 +28,14 @@ #include "xe_ttm_stolen_mgr.h" #include "xe_vm.h" +const char *const xe_mem_type_to_name[TTM_NUM_MEM_TYPES] = { + [XE_PL_SYSTEM] = "system", + [XE_PL_TT] = "gtt", + [XE_PL_VRAM0] = "vram0", + [XE_PL_VRAM1] = "vram1", + [XE_PL_STOLEN] = "stolen" +}; + static const struct ttm_place sys_placement_flags = { .fpfn = 0, .lpfn = 0, @@ -713,8 +721,7 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, migrate = xe->tiles[0].migrate; xe_assert(xe, migrate); - - trace_xe_bo_move(bo); + trace_xe_bo_move(bo, new_mem->mem_type, old_mem_type); xe_device_mem_access_get(xe); if (xe_bo_is_pinned(bo) && !xe_bo_is_user(bo)) { diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h index 9b1279aca127..8be42ac6cd07 100644 --- a/drivers/gpu/drm/xe/xe_bo.h +++ b/drivers/gpu/drm/xe/xe_bo.h @@ -243,6 +243,7 @@ int xe_bo_evict_pinned(struct xe_bo *bo); int xe_bo_restore_pinned(struct xe_bo *bo); extern struct ttm_device_funcs xe_ttm_funcs; +extern const char *const xe_mem_type_to_name[]; int xe_gem_create_ioctl(struct drm_device *dev, void *data, struct drm_file *file); diff --git a/drivers/gpu/drm/xe/xe_drm_client.c b/drivers/gpu/drm/xe/xe_drm_client.c index 82d1305e831f..6040e4d22b28 100644 --- a/drivers/gpu/drm/xe/xe_drm_client.c +++ b/drivers/gpu/drm/xe/xe_drm_client.c @@ -131,14 +131,6 @@ static void bo_meminfo(struct xe_bo *bo, static void show_meminfo(struct drm_printer *p, struct drm_file *file) { - static const char *const mem_type_to_name[TTM_NUM_MEM_TYPES] = { - [XE_PL_SYSTEM] = "system", - [XE_PL_TT] = "gtt", - [XE_PL_VRAM0] = "vram0", - [XE_PL_VRAM1] = "vram1", - [4 ... 6] = NULL, - [XE_PL_STOLEN] = "stolen" - }; struct drm_memory_stats stats[TTM_NUM_MEM_TYPES] = {}; struct xe_file *xef = file->driver_priv; struct ttm_device *bdev = &xef->xe->ttm; @@ -171,7 +163,7 @@ static void show_meminfo(struct drm_printer *p, struct drm_file *file) spin_unlock(&client->bos_lock); for (mem_type = XE_PL_SYSTEM; mem_type < TTM_NUM_MEM_TYPES; ++mem_type) { - if (!mem_type_to_name[mem_type]) + if (!xe_mem_type_to_name[mem_type]) continue; man = ttm_manager_type(bdev, mem_type); @@ -182,7 +174,7 @@ static void show_meminfo(struct drm_printer *p, struct drm_file *file) DRM_GEM_OBJECT_RESIDENT | (mem_type != XE_PL_SYSTEM ? 0 : DRM_GEM_OBJECT_PURGEABLE), - mem_type_to_name[mem_type]); + xe_mem_type_to_name[mem_type]); } } } diff --git a/drivers/gpu/drm/xe/xe_trace.h b/drivers/gpu/drm/xe/xe_trace.h index 95163c303f3e..0cce98a6b14b 100644 --- a/drivers/gpu/drm/xe/xe_trace.h +++ b/drivers/gpu/drm/xe/xe_trace.h @@ -12,6 +12,7 @@ #include #include +#include "xe_bo.h" #include "xe_bo_types.h" #include "xe_exec_queue_types.h" #include "xe_gpu_scheduler_types.h" @@ -100,9 +101,27 @@ DEFINE_EVENT(xe_bo, xe_bo_cpu_fault, TP_ARGS(bo) ); -DEFINE_EVENT(xe_bo, xe_bo_move, - TP_PROTO(struct xe_bo *bo), - TP_ARGS(bo) +TRACE_EVENT(xe_bo_move, + TP_PROTO(struct xe_bo *bo, uint32_t new_placement, uint32_t old_placement), + TP_ARGS(bo, new_placement, old_placement), + TP_STRUCT__entry( + __field(struct xe_bo *, bo) + __field(size_t, size) + __field(u32, new_placement) + __field(u32, old_placement) + __array(char, device_id, 12) + ), + + TP_fast_assign( + __entry->bo = bo; + __entry->size = bo->size; + __entry->new_placement = new_placement; + __entry->old_placement = old_placement; + strscpy(__entry->device_id, dev_name(xe_bo_device(__entry->bo)->drm.dev), 12); + ), + TP_printk("migrate object %p [size %zu] from %s to %s device_id:%s", + __entry->bo, __entry->size, xe_mem_type_to_name[__entry->old_placement], + xe_mem_type_to_name[__entry->new_placement], __entry->device_id) ); DECLARE_EVENT_CLASS(xe_exec_queue, From 4ca5c82988e73f51587e2d7564d44f99429c111a Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Thu, 22 Feb 2024 06:41:24 -0800 Subject: [PATCH 114/225] drm/xe: Use pointers in trace events MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit a0df2cc858c3 ("drm/xe/xe_bo_move: Enhance xe_bo_move trace") inadvertently reverted commit 8d038f49c1f3 ("drm/xe: Fix cast on trace variable"), breaking the build on 32bits. As noted by Ville, there's no point in converting the pointers to u64 and add casts everywhere. In fact, it's better to just use %p and let the address be hashed. Convert all the cases in xe_trace.h to use pointers. Cc: Ville Syrjälä Cc: Matt Roper Cc: Priyanka Dandamudi Cc: Oak Zeng Cc: Thomas Hellström Signed-off-by: Lucas De Marchi Reviewed-by: Thomas Hellström Link: https://patchwork.freedesktop.org/patch/msgid/20240222144125.2862546-1-lucas.demarchi@intel.com (cherry picked from commit 7a975748d4dc0a524c99a390c6f74b7097ef8cf7) Signed-off-by: Thomas Hellström --- drivers/gpu/drm/xe/xe_trace.h | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_trace.h b/drivers/gpu/drm/xe/xe_trace.h index 0cce98a6b14b..3b97633d81d8 100644 --- a/drivers/gpu/drm/xe/xe_trace.h +++ b/drivers/gpu/drm/xe/xe_trace.h @@ -27,16 +27,16 @@ DECLARE_EVENT_CLASS(xe_gt_tlb_invalidation_fence, TP_ARGS(fence), TP_STRUCT__entry( - __field(u64, fence) + __field(struct xe_gt_tlb_invalidation_fence *, fence) __field(int, seqno) ), TP_fast_assign( - __entry->fence = (u64)fence; + __entry->fence = fence; __entry->seqno = fence->seqno; ), - TP_printk("fence=0x%016llx, seqno=%d", + TP_printk("fence=%p, seqno=%d", __entry->fence, __entry->seqno) ); @@ -83,16 +83,16 @@ DECLARE_EVENT_CLASS(xe_bo, TP_STRUCT__entry( __field(size_t, size) __field(u32, flags) - __field(u64, vm) + __field(struct xe_vm *, vm) ), TP_fast_assign( __entry->size = bo->size; __entry->flags = bo->flags; - __entry->vm = (unsigned long)bo->vm; + __entry->vm = bo->vm; ), - TP_printk("size=%zu, flags=0x%02x, vm=0x%016llx", + TP_printk("size=%zu, flags=0x%02x, vm=%p", __entry->size, __entry->flags, __entry->vm) ); @@ -346,16 +346,16 @@ DECLARE_EVENT_CLASS(xe_hw_fence, TP_STRUCT__entry( __field(u64, ctx) __field(u32, seqno) - __field(u64, fence) + __field(struct xe_hw_fence *, fence) ), TP_fast_assign( __entry->ctx = fence->dma.context; __entry->seqno = fence->dma.seqno; - __entry->fence = (unsigned long)fence; + __entry->fence = fence; ), - TP_printk("ctx=0x%016llx, fence=0x%016llx, seqno=%u", + TP_printk("ctx=0x%016llx, fence=%p, seqno=%u", __entry->ctx, __entry->fence, __entry->seqno) ); @@ -384,7 +384,7 @@ DECLARE_EVENT_CLASS(xe_vma, TP_ARGS(vma), TP_STRUCT__entry( - __field(u64, vma) + __field(struct xe_vma *, vma) __field(u32, asid) __field(u64, start) __field(u64, end) @@ -392,14 +392,14 @@ DECLARE_EVENT_CLASS(xe_vma, ), TP_fast_assign( - __entry->vma = (unsigned long)vma; + __entry->vma = vma; __entry->asid = xe_vma_vm(vma)->usm.asid; __entry->start = xe_vma_start(vma); __entry->end = xe_vma_end(vma) - 1; __entry->ptr = xe_vma_userptr(vma); ), - TP_printk("vma=0x%016llx, asid=0x%05x, start=0x%012llx, end=0x%012llx, ptr=0x%012llx,", + TP_printk("vma=%p, asid=0x%05x, start=0x%012llx, end=0x%012llx, userptr=0x%012llx,", __entry->vma, __entry->asid, __entry->start, __entry->end, __entry->ptr) ) @@ -484,16 +484,16 @@ DECLARE_EVENT_CLASS(xe_vm, TP_ARGS(vm), TP_STRUCT__entry( - __field(u64, vm) + __field(struct xe_vm *, vm) __field(u32, asid) ), TP_fast_assign( - __entry->vm = (unsigned long)vm; + __entry->vm = vm; __entry->asid = vm->usm.asid; ), - TP_printk("vm=0x%016llx, asid=0x%05x", __entry->vm, + TP_printk("vm=%p, asid=0x%05x", __entry->vm, __entry->asid) ); From 86b3cd6d0713b3b1cb4e17dbddd4d4a2bff98d60 Mon Sep 17 00:00:00 2001 From: Mika Kuoppala Date: Thu, 15 Feb 2024 20:11:51 +0200 Subject: [PATCH 115/225] drm/xe: Expose user fence from xe_sync_entry MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit By allowing getting reference to user fence, we can control the lifetime outside of sync entries. This is needed to allow vma to track the associated user fence that was provided with bind ioctl. v2: xe_user_fence can be kept opaque (Jani, Matt) v3: indent fix (Matt) Cc: Thomas Hellström Cc: Matthew Brost Cc: Jani Nikula Signed-off-by: Mika Kuoppala Reviewed-by: Matthew Brost Signed-off-by: Thomas Hellström Link: https://patchwork.freedesktop.org/patch/msgid/20240215181152.450082-2-mika.kuoppala@linux.intel.com (cherry picked from commit 977e5b82e0901480bc201342d39f855fc0a2ef47) Signed-off-by: Thomas Hellström --- drivers/gpu/drm/xe/xe_sync.c | 58 ++++++++++++++++++++++++------ drivers/gpu/drm/xe/xe_sync.h | 4 +++ drivers/gpu/drm/xe/xe_sync_types.h | 2 +- 3 files changed, 53 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_sync.c b/drivers/gpu/drm/xe/xe_sync.c index aab92bee1d7c..02c9577fe418 100644 --- a/drivers/gpu/drm/xe/xe_sync.c +++ b/drivers/gpu/drm/xe/xe_sync.c @@ -19,7 +19,7 @@ #include "xe_macros.h" #include "xe_sched_job_types.h" -struct user_fence { +struct xe_user_fence { struct xe_device *xe; struct kref refcount; struct dma_fence_cb cb; @@ -27,31 +27,32 @@ struct user_fence { struct mm_struct *mm; u64 __user *addr; u64 value; + int signalled; }; static void user_fence_destroy(struct kref *kref) { - struct user_fence *ufence = container_of(kref, struct user_fence, + struct xe_user_fence *ufence = container_of(kref, struct xe_user_fence, refcount); mmdrop(ufence->mm); kfree(ufence); } -static void user_fence_get(struct user_fence *ufence) +static void user_fence_get(struct xe_user_fence *ufence) { kref_get(&ufence->refcount); } -static void user_fence_put(struct user_fence *ufence) +static void user_fence_put(struct xe_user_fence *ufence) { kref_put(&ufence->refcount, user_fence_destroy); } -static struct user_fence *user_fence_create(struct xe_device *xe, u64 addr, - u64 value) +static struct xe_user_fence *user_fence_create(struct xe_device *xe, u64 addr, + u64 value) { - struct user_fence *ufence; + struct xe_user_fence *ufence; ufence = kmalloc(sizeof(*ufence), GFP_KERNEL); if (!ufence) @@ -69,7 +70,7 @@ static struct user_fence *user_fence_create(struct xe_device *xe, u64 addr, static void user_fence_worker(struct work_struct *w) { - struct user_fence *ufence = container_of(w, struct user_fence, worker); + struct xe_user_fence *ufence = container_of(w, struct xe_user_fence, worker); if (mmget_not_zero(ufence->mm)) { kthread_use_mm(ufence->mm); @@ -80,10 +81,11 @@ static void user_fence_worker(struct work_struct *w) } wake_up_all(&ufence->xe->ufence_wq); + WRITE_ONCE(ufence->signalled, 1); user_fence_put(ufence); } -static void kick_ufence(struct user_fence *ufence, struct dma_fence *fence) +static void kick_ufence(struct xe_user_fence *ufence, struct dma_fence *fence) { INIT_WORK(&ufence->worker, user_fence_worker); queue_work(ufence->xe->ordered_wq, &ufence->worker); @@ -92,7 +94,7 @@ static void kick_ufence(struct user_fence *ufence, struct dma_fence *fence) static void user_fence_cb(struct dma_fence *fence, struct dma_fence_cb *cb) { - struct user_fence *ufence = container_of(cb, struct user_fence, cb); + struct xe_user_fence *ufence = container_of(cb, struct xe_user_fence, cb); kick_ufence(ufence, fence); } @@ -340,3 +342,39 @@ err_out: return ERR_PTR(-ENOMEM); } + +/** + * xe_sync_ufence_get() - Get user fence from sync + * @sync: input sync + * + * Get a user fence reference from sync. + * + * Return: xe_user_fence pointer with reference + */ +struct xe_user_fence *xe_sync_ufence_get(struct xe_sync_entry *sync) +{ + user_fence_get(sync->ufence); + + return sync->ufence; +} + +/** + * xe_sync_ufence_put() - Put user fence reference + * @ufence: user fence reference + * + */ +void xe_sync_ufence_put(struct xe_user_fence *ufence) +{ + user_fence_put(ufence); +} + +/** + * xe_sync_ufence_get_status() - Get user fence status + * @ufence: user fence + * + * Return: 1 if signalled, 0 not signalled, <0 on error + */ +int xe_sync_ufence_get_status(struct xe_user_fence *ufence) +{ + return READ_ONCE(ufence->signalled); +} diff --git a/drivers/gpu/drm/xe/xe_sync.h b/drivers/gpu/drm/xe/xe_sync.h index f43cdcaca6c5..0fd0d51208e6 100644 --- a/drivers/gpu/drm/xe/xe_sync.h +++ b/drivers/gpu/drm/xe/xe_sync.h @@ -38,4 +38,8 @@ static inline bool xe_sync_is_ufence(struct xe_sync_entry *sync) return !!sync->ufence; } +struct xe_user_fence *xe_sync_ufence_get(struct xe_sync_entry *sync); +void xe_sync_ufence_put(struct xe_user_fence *ufence); +int xe_sync_ufence_get_status(struct xe_user_fence *ufence); + #endif diff --git a/drivers/gpu/drm/xe/xe_sync_types.h b/drivers/gpu/drm/xe/xe_sync_types.h index 852db5e7884f..30ac3f51993b 100644 --- a/drivers/gpu/drm/xe/xe_sync_types.h +++ b/drivers/gpu/drm/xe/xe_sync_types.h @@ -18,7 +18,7 @@ struct xe_sync_entry { struct drm_syncobj *syncobj; struct dma_fence *fence; struct dma_fence_chain *chain_fence; - struct user_fence *ufence; + struct xe_user_fence *ufence; u64 addr; u64 timeline_value; u32 type; From 785f4cc0689f32ab615f043d7889d17eb4f37061 Mon Sep 17 00:00:00 2001 From: Mika Kuoppala Date: Thu, 15 Feb 2024 20:11:52 +0200 Subject: [PATCH 116/225] drm/xe: Deny unbinds if uapi ufence pending MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If user fence was provided for MAP in vm_bind_ioctl and it has still not been signalled, deny UNMAP of said vma with EBUSY as long as unsignalled fence exists. This guarantees that MAP vs UNMAP sequences won't escape under the radar if we ever want to track the client's state wrt to completed and accessible MAPs. By means of intercepting the ufence release signalling. v2: find ufence with num_fences > 1 (Matt) v3: careful on clearing vma ufence (Matt) Link: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/1159 Cc: Thomas Hellström Cc: Matthew Brost Cc: Joonas Lahtinen Signed-off-by: Mika Kuoppala Reviewed-by: Matthew Brost Signed-off-by: Thomas Hellström Link: https://patchwork.freedesktop.org/patch/msgid/20240215181152.450082-3-mika.kuoppala@linux.intel.com (cherry picked from commit 158900ade92cce5ab85a06d618eb51e6c7ffb28a) Signed-off-by: Thomas Hellström --- drivers/gpu/drm/xe/xe_vm.c | 37 ++++++++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_vm_types.h | 7 ++++++ 2 files changed, 44 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 75b44777067e..3b21afe5b488 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -897,6 +897,11 @@ static void xe_vma_destroy_late(struct xe_vma *vma) struct xe_device *xe = vm->xe; bool read_only = xe_vma_read_only(vma); + if (vma->ufence) { + xe_sync_ufence_put(vma->ufence); + vma->ufence = NULL; + } + if (xe_vma_is_userptr(vma)) { struct xe_userptr *userptr = &to_userptr_vma(vma)->userptr; @@ -1608,6 +1613,16 @@ xe_vm_unbind_vma(struct xe_vma *vma, struct xe_exec_queue *q, trace_xe_vma_unbind(vma); + if (vma->ufence) { + struct xe_user_fence * const f = vma->ufence; + + if (!xe_sync_ufence_get_status(f)) + return ERR_PTR(-EBUSY); + + vma->ufence = NULL; + xe_sync_ufence_put(f); + } + if (number_tiles > 1) { fences = kmalloc_array(number_tiles, sizeof(*fences), GFP_KERNEL); @@ -1741,6 +1756,21 @@ err_fences: return ERR_PTR(err); } +static struct xe_user_fence * +find_ufence_get(struct xe_sync_entry *syncs, u32 num_syncs) +{ + unsigned int i; + + for (i = 0; i < num_syncs; i++) { + struct xe_sync_entry *e = &syncs[i]; + + if (xe_sync_is_ufence(e)) + return xe_sync_ufence_get(e); + } + + return NULL; +} + static int __xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma, struct xe_exec_queue *q, struct xe_sync_entry *syncs, u32 num_syncs, bool immediate, bool first_op, @@ -1748,9 +1778,16 @@ static int __xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma, { struct dma_fence *fence; struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, q); + struct xe_user_fence *ufence; xe_vm_assert_held(vm); + ufence = find_ufence_get(syncs, num_syncs); + if (vma->ufence && ufence) + xe_sync_ufence_put(vma->ufence); + + vma->ufence = ufence ?: vma->ufence; + if (immediate) { fence = xe_vm_bind_vma(vma, q, syncs, num_syncs, first_op, last_op); diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h index 0f220b5d2e7b..7300eea5394b 100644 --- a/drivers/gpu/drm/xe/xe_vm_types.h +++ b/drivers/gpu/drm/xe/xe_vm_types.h @@ -19,6 +19,7 @@ struct xe_bo; struct xe_sync_entry; +struct xe_user_fence; struct xe_vm; #define XE_VMA_READ_ONLY DRM_GPUVA_USERBITS @@ -104,6 +105,12 @@ struct xe_vma { * @pat_index: The pat index to use when encoding the PTEs for this vma. */ u16 pat_index; + + /** + * @ufence: The user fence that was provided with MAP. + * Needs to be signalled before UNMAP can be processed. + */ + struct xe_user_fence *ufence; }; /** From 8188cae3cc3d8018ec97ca9ab8caa3acc69a056d Mon Sep 17 00:00:00 2001 From: Priyanka Dandamudi Date: Wed, 21 Feb 2024 15:49:50 +0530 Subject: [PATCH 117/225] drm/xe/xe_trace: Add move_lacks_source detail to xe_bo_move trace MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add move_lacks_source detail to xe_bo_move trace to make it readable that is to check if it is migrate clear or migrate copy. Cc: Thomas Hellström Signed-off-by: Priyanka Dandamudi Reviewed-by: Thomas Hellström Fixes: a09946a9a903 ("drm/xe/xe_bo_move: Enhance xe_bo_move trace") Signed-off-by: Thomas Hellström Link: https://patchwork.freedesktop.org/patch/msgid/20240221101950.1019312-1-priyanka.dandamudi@intel.com (cherry picked from commit 8034f6b070cc3716e81b1846f8a4ca5339c3f29b) Signed-off-by: Thomas Hellström --- drivers/gpu/drm/xe/xe_bo.c | 2 +- drivers/gpu/drm/xe/xe_trace.h | 12 ++++++++---- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index f2ea188663ac..4d3b80ec906d 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -721,7 +721,7 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, migrate = xe->tiles[0].migrate; xe_assert(xe, migrate); - trace_xe_bo_move(bo, new_mem->mem_type, old_mem_type); + trace_xe_bo_move(bo, new_mem->mem_type, old_mem_type, move_lacks_source); xe_device_mem_access_get(xe); if (xe_bo_is_pinned(bo) && !xe_bo_is_user(bo)) { diff --git a/drivers/gpu/drm/xe/xe_trace.h b/drivers/gpu/drm/xe/xe_trace.h index 3b97633d81d8..4ddc55527f9a 100644 --- a/drivers/gpu/drm/xe/xe_trace.h +++ b/drivers/gpu/drm/xe/xe_trace.h @@ -102,14 +102,16 @@ DEFINE_EVENT(xe_bo, xe_bo_cpu_fault, ); TRACE_EVENT(xe_bo_move, - TP_PROTO(struct xe_bo *bo, uint32_t new_placement, uint32_t old_placement), - TP_ARGS(bo, new_placement, old_placement), + TP_PROTO(struct xe_bo *bo, uint32_t new_placement, uint32_t old_placement, + bool move_lacks_source), + TP_ARGS(bo, new_placement, old_placement, move_lacks_source), TP_STRUCT__entry( __field(struct xe_bo *, bo) __field(size_t, size) __field(u32, new_placement) __field(u32, old_placement) __array(char, device_id, 12) + __field(bool, move_lacks_source) ), TP_fast_assign( @@ -118,9 +120,11 @@ TRACE_EVENT(xe_bo_move, __entry->new_placement = new_placement; __entry->old_placement = old_placement; strscpy(__entry->device_id, dev_name(xe_bo_device(__entry->bo)->drm.dev), 12); + __entry->move_lacks_source = move_lacks_source; ), - TP_printk("migrate object %p [size %zu] from %s to %s device_id:%s", - __entry->bo, __entry->size, xe_mem_type_to_name[__entry->old_placement], + TP_printk("move_lacks_source:%s, migrate object %p [size %zu] from %s to %s device_id:%s", + __entry->move_lacks_source ? "yes" : "no", __entry->bo, __entry->size, + xe_mem_type_to_name[__entry->old_placement], xe_mem_type_to_name[__entry->new_placement], __entry->device_id) ); From d0b06dc48fb15902d7da09c5c0861e7f042a9381 Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Thu, 29 Feb 2024 22:17:37 +0900 Subject: [PATCH 118/225] firewire: core: use long bus reset on gap count error When resetting the bus after a gap count error, use a long rather than short bus reset. IEEE 1394-1995 uses only long bus resets. IEEE 1394a adds the option of short bus resets. When video or audio transmission is in progress and a device is hot-plugged elsewhere on the bus, the resulting bus reset can cause video frame drops or audio dropouts. Short bus resets reduce or eliminate this problem. Accordingly, short bus resets are almost always preferred. However, on a mixed 1394/1394a bus, a short bus reset can trigger an immediate additional bus reset. This double bus reset can be interpreted differently by different nodes on the bus, resulting in an inconsistent gap count after the bus reset. An inconsistent gap count will cause another bus reset, leading to a neverending bus reset loop. This only happens for some bus topologies, not for all mixed 1394/1394a buses. By instead sending a long bus reset after a gap count inconsistency, we avoid the doubled bus reset, restoring the bus to normal operation. Signed-off-by: Adam Goldman Link: https://sourceforge.net/p/linux1394/mailman/message/58741624/ Signed-off-by: Takashi Sakamoto --- drivers/firewire/core-card.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/drivers/firewire/core-card.c b/drivers/firewire/core-card.c index 8aaa7fcb2630..401a77e3b5fa 100644 --- a/drivers/firewire/core-card.c +++ b/drivers/firewire/core-card.c @@ -500,7 +500,19 @@ static void bm_work(struct work_struct *work) fw_notice(card, "phy config: new root=%x, gap_count=%d\n", new_root_id, gap_count); fw_send_phy_config(card, new_root_id, generation, gap_count); - reset_bus(card, true); + /* + * Where possible, use a short bus reset to minimize + * disruption to isochronous transfers. But in the event + * of a gap count inconsistency, use a long bus reset. + * + * As noted in 1394a 8.4.6.2, nodes on a mixed 1394/1394a bus + * may set different gap counts after a bus reset. On a mixed + * 1394/1394a bus, a short bus reset can get doubled. Some + * nodes may treat the double reset as one bus reset and others + * may treat it as two, causing a gap count inconsistency + * again. Using a long bus reset prevents this. + */ + reset_bus(card, card->gap_count != 0); /* Will allocate broadcast channel after the reset. */ goto out; } From 680945f0aa5084d49bbfb705ca162fc00cc146ae Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Thu, 15 Feb 2024 15:49:11 -0800 Subject: [PATCH 119/225] MAINTAINERS: Update SiFive driver maintainers Add myself as a maintainer for the various SiFive drivers, since I have been performing cleanup activity on these drivers and reviewing patches to them for a while now. Remove Palmer as a maintainer, as he is focused on overall RISC-V architecture support. Collapse some duplicate entries into the main SiFive drivers entry: - Conor is already maintainer of standalone cache drivers as a whole, and these files are also covered by the "sifive" file name regex. - Paul's git tree has not been updated since 2018, and all file names matching the "fu540" pattern also match the "sifive" pattern. - Green has not been active on the LKML for a couple of years. Signed-off-by: Samuel Holland Acked-by: Paul Walmsley Acked-by: Conor Dooley Acked-by: Palmer Dabbelt Link: https://lore.kernel.org/r/20240215234941.1663791-1-samuel.holland@sifive.com Signed-off-by: Palmer Dabbelt --- MAINTAINERS | 31 ++++++------------------------- 1 file changed, 6 insertions(+), 25 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 8d1052fa6a69..7a0139a9b1bb 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1395,6 +1395,7 @@ F: drivers/hwmon/max31760.c ANALOGBITS PLL LIBRARIES M: Paul Walmsley +M: Samuel Holland S: Supported F: drivers/clk/analogbits/* F: include/linux/clk/analogbits* @@ -16720,6 +16721,7 @@ F: drivers/pci/controller/dwc/*layerscape* PCI DRIVER FOR FU740 M: Paul Walmsley M: Greentime Hu +M: Samuel Holland L: linux-pci@vger.kernel.org S: Maintained F: Documentation/devicetree/bindings/pci/sifive,fu740-pcie.yaml @@ -19965,35 +19967,14 @@ S: Maintained F: drivers/watchdog/simatic-ipc-wdt.c SIFIVE DRIVERS -M: Palmer Dabbelt M: Paul Walmsley +M: Samuel Holland L: linux-riscv@lists.infradead.org S: Supported -N: sifive -K: [^@]sifive - -SIFIVE CACHE DRIVER -M: Conor Dooley -L: linux-riscv@lists.infradead.org -S: Maintained -F: Documentation/devicetree/bindings/cache/sifive,ccache0.yaml -F: drivers/cache/sifive_ccache.c - -SIFIVE FU540 SYSTEM-ON-CHIP -M: Paul Walmsley -M: Palmer Dabbelt -L: linux-riscv@lists.infradead.org -S: Supported -T: git git://git.kernel.org/pub/scm/linux/kernel/git/pjw/sifive.git -N: fu540 -K: fu540 - -SIFIVE PDMA DRIVER -M: Green Wan -S: Maintained -F: Documentation/devicetree/bindings/dma/sifive,fu540-c000-pdma.yaml F: drivers/dma/sf-pdma/ - +N: sifive +K: fu[57]40 +K: [^@]sifive SILEAD TOUCHSCREEN DRIVER M: Hans de Goede From 34b567868777e9fd39ec5333969728a7f0cf179c Mon Sep 17 00:00:00 2001 From: Fei Wu Date: Wed, 28 Feb 2024 19:54:25 +0800 Subject: [PATCH 120/225] perf: RISCV: Fix panic on pmu overflow handler (1 << idx) of int is not desired when setting bits in unsigned long overflowed_ctrs, use BIT() instead. This panic happens when running 'perf record -e branches' on sophgo sg2042. [ 273.311852] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000098 [ 273.320851] Oops [#1] [ 273.323179] Modules linked in: [ 273.326303] CPU: 0 PID: 1475 Comm: perf Not tainted 6.6.0-rc3+ #9 [ 273.332521] Hardware name: Sophgo Mango (DT) [ 273.336878] epc : riscv_pmu_ctr_get_width_mask+0x8/0x62 [ 273.342291] ra : pmu_sbi_ovf_handler+0x2e0/0x34e [ 273.347091] epc : ffffffff80aecd98 ra : ffffffff80aee056 sp : fffffff6e36928b0 [ 273.354454] gp : ffffffff821f82d0 tp : ffffffd90c353200 t0 : 0000002ade4f9978 [ 273.361815] t1 : 0000000000504d55 t2 : ffffffff8016cd8c s0 : fffffff6e3692a70 [ 273.369180] s1 : 0000000000000020 a0 : 0000000000000000 a1 : 00001a8e81800000 [ 273.376540] a2 : 0000003c00070198 a3 : 0000003c00db75a4 a4 : 0000000000000015 [ 273.383901] a5 : ffffffd7ff8804b0 a6 : 0000000000000015 a7 : 000000000000002a [ 273.391327] s2 : 000000000000ffff s3 : 0000000000000000 s4 : ffffffd7ff8803b0 [ 273.398773] s5 : 0000000000504d55 s6 : ffffffd905069800 s7 : ffffffff821fe210 [ 273.406139] s8 : 000000007fffffff s9 : ffffffd7ff8803b0 s10: ffffffd903f29098 [ 273.413660] s11: 0000000080000000 t3 : 0000000000000003 t4 : ffffffff8017a0ca [ 273.421022] t5 : ffffffff8023cfc2 t6 : ffffffd9040780e8 [ 273.426437] status: 0000000200000100 badaddr: 0000000000000098 cause: 000000000000000d [ 273.434512] [] riscv_pmu_ctr_get_width_mask+0x8/0x62 [ 273.441169] [] handle_percpu_devid_irq+0x98/0x1ee [ 273.447562] [] generic_handle_domain_irq+0x28/0x36 [ 273.454151] [] riscv_intc_irq+0x36/0x4e [ 273.459659] [] handle_riscv_irq+0x4a/0x74 [ 273.465442] [] do_irq+0x62/0x92 [ 273.470360] Code: 0420 60a2 6402 5529 0141 8082 0013 0000 0013 0000 (6d5c) b783 [ 273.477921] ---[ end trace 0000000000000000 ]--- [ 273.482630] Kernel panic - not syncing: Fatal exception in interrupt Reviewed-by: Alexandre Ghiti Reviewed-by: Atish Patra Signed-off-by: Fei Wu Link: https://lore.kernel.org/r/20240228115425.2613856-1-fei2.wu@intel.com Signed-off-by: Palmer Dabbelt --- drivers/perf/riscv_pmu_sbi.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/perf/riscv_pmu_sbi.c b/drivers/perf/riscv_pmu_sbi.c index 16acd4dcdb96..452aab49db1e 100644 --- a/drivers/perf/riscv_pmu_sbi.c +++ b/drivers/perf/riscv_pmu_sbi.c @@ -512,7 +512,7 @@ static void pmu_sbi_set_scounteren(void *arg) if (event->hw.idx != -1) csr_write(CSR_SCOUNTEREN, - csr_read(CSR_SCOUNTEREN) | (1 << pmu_sbi_csr_index(event))); + csr_read(CSR_SCOUNTEREN) | BIT(pmu_sbi_csr_index(event))); } static void pmu_sbi_reset_scounteren(void *arg) @@ -521,7 +521,7 @@ static void pmu_sbi_reset_scounteren(void *arg) if (event->hw.idx != -1) csr_write(CSR_SCOUNTEREN, - csr_read(CSR_SCOUNTEREN) & ~(1 << pmu_sbi_csr_index(event))); + csr_read(CSR_SCOUNTEREN) & ~BIT(pmu_sbi_csr_index(event))); } static void pmu_sbi_ctr_start(struct perf_event *event, u64 ival) @@ -731,14 +731,14 @@ static irqreturn_t pmu_sbi_ovf_handler(int irq, void *dev) /* compute hardware counter index */ hidx = info->csr - CSR_CYCLE; /* check if the corresponding bit is set in sscountovf */ - if (!(overflow & (1 << hidx))) + if (!(overflow & BIT(hidx))) continue; /* * Keep a track of overflowed counters so that they can be started * with updated initial value. */ - overflowed_ctrs |= 1 << lidx; + overflowed_ctrs |= BIT(lidx); hw_evt = &event->hw; riscv_pmu_event_update(event); perf_sample_data_init(&data, 0, hw_evt->last_period); From 3fb3f7164edc467450e650dca51dbe4823315a56 Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Tue, 27 Feb 2024 22:55:33 -0800 Subject: [PATCH 121/225] riscv: Fix enabling cbo.zero when running in M-mode When the kernel is running in M-mode, the CBZE bit must be set in the menvcfg CSR, not in senvcfg. Cc: Fixes: 43c16d51a19b ("RISC-V: Enable cbo.zero in usermode") Reviewed-by: Andrew Jones Signed-off-by: Samuel Holland Reviewed-by: Conor Dooley Link: https://lore.kernel.org/r/20240228065559.3434837-2-samuel.holland@sifive.com Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/csr.h | 2 ++ arch/riscv/kernel/cpufeature.c | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/riscv/include/asm/csr.h b/arch/riscv/include/asm/csr.h index 510014051f5d..2468c55933cd 100644 --- a/arch/riscv/include/asm/csr.h +++ b/arch/riscv/include/asm/csr.h @@ -424,6 +424,7 @@ # define CSR_STATUS CSR_MSTATUS # define CSR_IE CSR_MIE # define CSR_TVEC CSR_MTVEC +# define CSR_ENVCFG CSR_MENVCFG # define CSR_SCRATCH CSR_MSCRATCH # define CSR_EPC CSR_MEPC # define CSR_CAUSE CSR_MCAUSE @@ -448,6 +449,7 @@ # define CSR_STATUS CSR_SSTATUS # define CSR_IE CSR_SIE # define CSR_TVEC CSR_STVEC +# define CSR_ENVCFG CSR_SENVCFG # define CSR_SCRATCH CSR_SSCRATCH # define CSR_EPC CSR_SEPC # define CSR_CAUSE CSR_SCAUSE diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c index 89920f84d0a3..c5b13f7dd482 100644 --- a/arch/riscv/kernel/cpufeature.c +++ b/arch/riscv/kernel/cpufeature.c @@ -950,7 +950,7 @@ arch_initcall(check_unaligned_access_all_cpus); void riscv_user_isa_enable(void) { if (riscv_cpu_has_extension_unlikely(smp_processor_id(), RISCV_ISA_EXT_ZICBOZ)) - csr_set(CSR_SENVCFG, ENVCFG_CBZE); + csr_set(CSR_ENVCFG, ENVCFG_CBZE); } #ifdef CONFIG_RISCV_ALTERNATIVE From 4774848fef6041716a4883217eb75f6b10eb183b Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Tue, 27 Feb 2024 22:55:34 -0800 Subject: [PATCH 122/225] riscv: Add a custom ISA extension for the [ms]envcfg CSR The [ms]envcfg CSR was added in version 1.12 of the RISC-V privileged ISA (aka S[ms]1p12). However, bits in this CSR are defined by several other extensions which may be implemented separately from any particular version of the privileged ISA (for example, some unrelated errata may prevent an implementation from claiming conformance with Ss1p12). As a result, Linux cannot simply use the privileged ISA version to determine if the CSR is present. It must also check if any of these other extensions are implemented. It also cannot probe the existence of the CSR at runtime, because Linux does not require Sstrict, so (in the absence of additional information) it cannot know if a CSR at that address is [ms]envcfg or part of some non-conforming vendor extension. Since there are several standard extensions that imply the existence of the [ms]envcfg CSR, it becomes unwieldy to check for all of them wherever the CSR is accessed. Instead, define a custom Xlinuxenvcfg ISA extension bit that is implied by the other extensions and denotes that the CSR exists as defined in the privileged ISA, containing at least one of the fields common between menvcfg and senvcfg. This extension does not need to be parsed from the devicetree or ISA string because it can only be implemented as a subset of some other standard extension. Cc: # v6.7+ Signed-off-by: Samuel Holland Reviewed-by: Conor Dooley Reviewed-by: Andrew Jones Link: https://lore.kernel.org/r/20240228065559.3434837-3-samuel.holland@sifive.com Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/hwcap.h | 2 ++ arch/riscv/kernel/cpufeature.c | 14 ++++++++++++-- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/arch/riscv/include/asm/hwcap.h b/arch/riscv/include/asm/hwcap.h index 5340f818746b..1f2d2599c655 100644 --- a/arch/riscv/include/asm/hwcap.h +++ b/arch/riscv/include/asm/hwcap.h @@ -81,6 +81,8 @@ #define RISCV_ISA_EXT_ZTSO 72 #define RISCV_ISA_EXT_ZACAS 73 +#define RISCV_ISA_EXT_XLINUXENVCFG 127 + #define RISCV_ISA_EXT_MAX 128 #define RISCV_ISA_EXT_INVALID U32_MAX diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c index c5b13f7dd482..dacffef68ce2 100644 --- a/arch/riscv/kernel/cpufeature.c +++ b/arch/riscv/kernel/cpufeature.c @@ -201,6 +201,16 @@ static const unsigned int riscv_zvbb_exts[] = { RISCV_ISA_EXT_ZVKB }; +/* + * While the [ms]envcfg CSRs were not defined until version 1.12 of the RISC-V + * privileged ISA, the existence of the CSRs is implied by any extension which + * specifies [ms]envcfg bit(s). Hence, we define a custom ISA extension for the + * existence of the CSR, and treat it as a subset of those other extensions. + */ +static const unsigned int riscv_xlinuxenvcfg_exts[] = { + RISCV_ISA_EXT_XLINUXENVCFG +}; + /* * The canonical order of ISA extension names in the ISA string is defined in * chapter 27 of the unprivileged specification. @@ -250,8 +260,8 @@ const struct riscv_isa_ext_data riscv_isa_ext[] = { __RISCV_ISA_EXT_DATA(c, RISCV_ISA_EXT_c), __RISCV_ISA_EXT_DATA(v, RISCV_ISA_EXT_v), __RISCV_ISA_EXT_DATA(h, RISCV_ISA_EXT_h), - __RISCV_ISA_EXT_DATA(zicbom, RISCV_ISA_EXT_ZICBOM), - __RISCV_ISA_EXT_DATA(zicboz, RISCV_ISA_EXT_ZICBOZ), + __RISCV_ISA_EXT_SUPERSET(zicbom, RISCV_ISA_EXT_ZICBOM, riscv_xlinuxenvcfg_exts), + __RISCV_ISA_EXT_SUPERSET(zicboz, RISCV_ISA_EXT_ZICBOZ, riscv_xlinuxenvcfg_exts), __RISCV_ISA_EXT_DATA(zicntr, RISCV_ISA_EXT_ZICNTR), __RISCV_ISA_EXT_DATA(zicond, RISCV_ISA_EXT_ZICOND), __RISCV_ISA_EXT_DATA(zicsr, RISCV_ISA_EXT_ZICSR), From 05ab803d1ad8ac505ade77c6bd3f86b1b4ea0dc4 Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Tue, 27 Feb 2024 22:55:35 -0800 Subject: [PATCH 123/225] riscv: Save/restore envcfg CSR during CPU suspend The value of the [ms]envcfg CSR is lost when entering a nonretentive idle state, so the CSR must be rewritten when resuming the CPU. Cc: # v6.7+ Fixes: 43c16d51a19b ("RISC-V: Enable cbo.zero in usermode") Signed-off-by: Samuel Holland Reviewed-by: Conor Dooley Reviewed-by: Andrew Jones Link: https://lore.kernel.org/r/20240228065559.3434837-4-samuel.holland@sifive.com Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/suspend.h | 1 + arch/riscv/kernel/suspend.c | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/arch/riscv/include/asm/suspend.h b/arch/riscv/include/asm/suspend.h index 02f87867389a..491296a335d0 100644 --- a/arch/riscv/include/asm/suspend.h +++ b/arch/riscv/include/asm/suspend.h @@ -14,6 +14,7 @@ struct suspend_context { struct pt_regs regs; /* Saved and restored by high-level functions */ unsigned long scratch; + unsigned long envcfg; unsigned long tvec; unsigned long ie; #ifdef CONFIG_MMU diff --git a/arch/riscv/kernel/suspend.c b/arch/riscv/kernel/suspend.c index 239509367e42..299795341e8a 100644 --- a/arch/riscv/kernel/suspend.c +++ b/arch/riscv/kernel/suspend.c @@ -15,6 +15,8 @@ void suspend_save_csrs(struct suspend_context *context) { context->scratch = csr_read(CSR_SCRATCH); + if (riscv_cpu_has_extension_unlikely(smp_processor_id(), RISCV_ISA_EXT_XLINUXENVCFG)) + context->envcfg = csr_read(CSR_ENVCFG); context->tvec = csr_read(CSR_TVEC); context->ie = csr_read(CSR_IE); @@ -36,6 +38,8 @@ void suspend_save_csrs(struct suspend_context *context) void suspend_restore_csrs(struct suspend_context *context) { csr_write(CSR_SCRATCH, context->scratch); + if (riscv_cpu_has_extension_unlikely(smp_processor_id(), RISCV_ISA_EXT_XLINUXENVCFG)) + csr_write(CSR_ENVCFG, context->envcfg); csr_write(CSR_TVEC, context->tvec); csr_write(CSR_IE, context->ie); From 16ab4646c9057e0528b985ad772e3cb88c613db2 Mon Sep 17 00:00:00 2001 From: Alexandre Ghiti Date: Tue, 27 Feb 2024 21:50:15 +0100 Subject: [PATCH 124/225] Revert "riscv: mm: support Svnapot in huge vmap" This reverts commit ce173474cf19fe7fbe8f0fc74e3c81ec9c3d9807. We cannot correctly deal with NAPOT mappings in vmalloc/vmap because if some part of a NAPOT mapping is unmapped, the remaining mapping is not updated accordingly. For example: ptr = vmalloc_huge(64 * 1024, GFP_KERNEL); vunmap_range((unsigned long)(ptr + PAGE_SIZE), (unsigned long)(ptr + 64 * 1024)); leads to the following kernel page table dump: 0xffff8f8000ef0000-0xffff8f8000ef1000 0x00000001033c0000 4K PTE N .. .. D A G . . W R V Meaning the first entry which was not unmapped still has the N bit set, which, if accessed first and cached in the TLB, could allow access to the unmapped range. That's because the logic to break the NAPOT mapping does not exist and likely won't. Indeed, to break a NAPOT mapping, we first have to clear the whole mapping, flush the TLB and then set the new mapping ("break- before-make" equivalent). That works fine in userspace since we can handle any pagefault occurring on the remaining mapping but we can't handle a kernel pagefault on such mapping. So fix this by reverting the commit that introduced the vmap/vmalloc support. Fixes: ce173474cf19 ("riscv: mm: support Svnapot in huge vmap") Signed-off-by: Alexandre Ghiti Link: https://lore.kernel.org/r/20240227205016.121901-2-alexghiti@rivosinc.com Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/vmalloc.h | 61 +------------------------------- 1 file changed, 1 insertion(+), 60 deletions(-) diff --git a/arch/riscv/include/asm/vmalloc.h b/arch/riscv/include/asm/vmalloc.h index 924d01b56c9a..51f6dfe19745 100644 --- a/arch/riscv/include/asm/vmalloc.h +++ b/arch/riscv/include/asm/vmalloc.h @@ -19,65 +19,6 @@ static inline bool arch_vmap_pmd_supported(pgprot_t prot) return true; } -#ifdef CONFIG_RISCV_ISA_SVNAPOT -#include +#endif -#define arch_vmap_pte_range_map_size arch_vmap_pte_range_map_size -static inline unsigned long arch_vmap_pte_range_map_size(unsigned long addr, unsigned long end, - u64 pfn, unsigned int max_page_shift) -{ - unsigned long map_size = PAGE_SIZE; - unsigned long size, order; - - if (!has_svnapot()) - return map_size; - - for_each_napot_order_rev(order) { - if (napot_cont_shift(order) > max_page_shift) - continue; - - size = napot_cont_size(order); - if (end - addr < size) - continue; - - if (!IS_ALIGNED(addr, size)) - continue; - - if (!IS_ALIGNED(PFN_PHYS(pfn), size)) - continue; - - map_size = size; - break; - } - - return map_size; -} - -#define arch_vmap_pte_supported_shift arch_vmap_pte_supported_shift -static inline int arch_vmap_pte_supported_shift(unsigned long size) -{ - int shift = PAGE_SHIFT; - unsigned long order; - - if (!has_svnapot()) - return shift; - - WARN_ON_ONCE(size >= PMD_SIZE); - - for_each_napot_order_rev(order) { - if (napot_cont_size(order) > size) - continue; - - if (!IS_ALIGNED(size, napot_cont_size(order))) - continue; - - shift = napot_cont_shift(order); - break; - } - - return shift; -} - -#endif /* CONFIG_RISCV_ISA_SVNAPOT */ -#endif /* CONFIG_HAVE_ARCH_HUGE_VMAP */ #endif /* _ASM_RISCV_VMALLOC_H */ From e0fe5ab4192c171c111976dbe90bbd37d3976be0 Mon Sep 17 00:00:00 2001 From: Alexandre Ghiti Date: Tue, 27 Feb 2024 21:50:16 +0100 Subject: [PATCH 125/225] riscv: Fix pte_leaf_size() for NAPOT pte_leaf_size() must be reimplemented to add support for NAPOT mappings. Fixes: 82a1a1f3bfb6 ("riscv: mm: support Svnapot in hugetlb page") Signed-off-by: Alexandre Ghiti Link: https://lore.kernel.org/r/20240227205016.121901-3-alexghiti@rivosinc.com Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/pgtable.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index 0c94260b5d0c..89f5f1bd6e46 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -439,6 +439,10 @@ static inline pte_t pte_mkhuge(pte_t pte) return pte; } +#define pte_leaf_size(pte) (pte_napot(pte) ? \ + napot_cont_size(napot_cont_order(pte)) :\ + PAGE_SIZE) + #ifdef CONFIG_NUMA_BALANCING /* * See the comment in include/asm-generic/pgtable.h From a11dd49dcb9376776193e15641f84fcc1e5980c9 Mon Sep 17 00:00:00 2001 From: Dimitris Vlachos Date: Thu, 29 Feb 2024 21:17:23 +0200 Subject: [PATCH 126/225] riscv: Sparse-Memory/vmemmap out-of-bounds fix MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Offset vmemmap so that the first page of vmemmap will be mapped to the first page of physical memory in order to ensure that vmemmap’s bounds will be respected during pfn_to_page()/page_to_pfn() operations. The conversion macros will produce correct SV39/48/57 addresses for every possible/valid DRAM_BASE inside the physical memory limits. v2:Address Alex's comments Suggested-by: Alexandre Ghiti Signed-off-by: Dimitris Vlachos Reported-by: Dimitris Vlachos Closes: https://lore.kernel.org/linux-riscv/20240202135030.42265-1-csd4492@csd.uoc.gr Fixes: d95f1a542c3d ("RISC-V: Implement sparsemem") Reviewed-by: Alexandre Ghiti Link: https://lore.kernel.org/r/20240229191723.32779-1-dvlachos@ics.forth.gr Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/pgtable.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index 89f5f1bd6e46..6066822e7396 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -84,7 +84,7 @@ * Define vmemmap for pfn_to_page & page_to_pfn calls. Needed if kernel * is configured with CONFIG_SPARSEMEM_VMEMMAP enabled. */ -#define vmemmap ((struct page *)VMEMMAP_START) +#define vmemmap ((struct page *)VMEMMAP_START - (phys_ram_base >> PAGE_SHIFT)) #define PCI_IO_SIZE SZ_16M #define PCI_IO_END VMEMMAP_START From 25125a4762835d62ba1e540c1351d447fc1f6c7c Mon Sep 17 00:00:00 2001 From: Kamalesh Babulal Date: Thu, 29 Feb 2024 15:41:14 +0530 Subject: [PATCH 127/225] cgroup/cpuset: Fix retval in update_cpumask() The update_cpumask(), checks for newly requested cpumask by calling validate_change(), which returns an error on passing an invalid set of cpu(s). Independent of the error returned, update_cpumask() always returns zero, suppressing the error and returning success to the user on writing an invalid cpu range for a cpuset. Fix it by returning retval instead, which is returned by validate_change(). Fixes: 99fe36ba6fc1 ("cgroup/cpuset: Improve temporary cpumasks handling") Signed-off-by: Kamalesh Babulal Reviewed-by: Waiman Long Cc: stable@vger.kernel.org # v6.6+ Signed-off-by: Tejun Heo --- kernel/cgroup/cpuset.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index 7260f095802a..927bef3a598a 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -2562,7 +2562,7 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs, update_partition_sd_lb(cs, old_prs); out_free: free_cpumasks(NULL, &tmp); - return 0; + return retval; } /** From a1a4a9ca77f143c00fce69c1239887ff8b813bec Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Thu, 22 Feb 2024 12:29:26 +0000 Subject: [PATCH 128/225] btrfs: fix race between ordered extent completion and fiemap For fiemap we recently stopped locking the target extent range for the whole duration of the fiemap call, in order to avoid a deadlock in a scenario where the fiemap buffer happens to be a memory mapped range of the same file. This use case is very unlikely to be useful in practice but it may be triggered by fuzz testing (syzbot, etc). However by not locking the target extent range for the whole duration of the fiemap call we can race with an ordered extent. This happens like this: 1) The fiemap task finishes processing a file extent item that covers the file range [512K, 1M[, and that file extent item is the last item in the leaf currently being processed; 2) And ordered extent for the file range [768K, 2M[, in COW mode, completes (btrfs_finish_one_ordered()) and the file extent item covering the range [512K, 1M[ is trimmed to cover the range [512K, 768K[ and then a new file extent item for the range [768K, 2M[ is inserted in the inode's subvolume tree; 3) The fiemap task calls fiemap_next_leaf_item(), which then calls btrfs_next_leaf() to find the next leaf / item. This finds that the the next key following the one we previously processed (its type is BTRFS_EXTENT_DATA_KEY and its offset is 512K), is the key corresponding to the new file extent item inserted by the ordered extent, which has a type of BTRFS_EXTENT_DATA_KEY and an offset of 768K; 4) Later the fiemap code ends up at emit_fiemap_extent() and triggers the warning: if (cache->offset + cache->len > offset) { WARN_ON(1); return -EINVAL; } Since we get 1M > 768K, because the previously emitted entry for the old extent covering the file range [512K, 1M[ ends at an offset that is greater than the new extent's start offset (768K). This makes fiemap fail with -EINVAL besides triggering the warning that produces a stack trace like the following: [1621.677651] ------------[ cut here ]------------ [1621.677656] WARNING: CPU: 1 PID: 204366 at fs/btrfs/extent_io.c:2492 emit_fiemap_extent+0x84/0x90 [btrfs] [1621.677899] Modules linked in: btrfs blake2b_generic (...) [1621.677951] CPU: 1 PID: 204366 Comm: pool Not tainted 6.8.0-rc5-btrfs-next-151+ #1 [1621.677954] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.2-0-gea1b7a073390-prebuilt.qemu.org 04/01/2014 [1621.677956] RIP: 0010:emit_fiemap_extent+0x84/0x90 [btrfs] [1621.678033] Code: 2b 4c 89 63 (...) [1621.678035] RSP: 0018:ffffab16089ffd20 EFLAGS: 00010206 [1621.678037] RAX: 00000000004fa000 RBX: ffffab16089ffe08 RCX: 0000000000009000 [1621.678039] RDX: 00000000004f9000 RSI: 00000000004f1000 RDI: ffffab16089ffe90 [1621.678040] RBP: 00000000004f9000 R08: 0000000000001000 R09: 0000000000000000 [1621.678041] R10: 0000000000000000 R11: 0000000000001000 R12: 0000000041d78000 [1621.678043] R13: 0000000000001000 R14: 0000000000000000 R15: ffff9434f0b17850 [1621.678044] FS: 00007fa6e20006c0(0000) GS:ffff943bdfa40000(0000) knlGS:0000000000000000 [1621.678046] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [1621.678048] CR2: 00007fa6b0801000 CR3: 000000012d404002 CR4: 0000000000370ef0 [1621.678053] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [1621.678055] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [1621.678056] Call Trace: [1621.678074] [1621.678076] ? __warn+0x80/0x130 [1621.678082] ? emit_fiemap_extent+0x84/0x90 [btrfs] [1621.678159] ? report_bug+0x1f4/0x200 [1621.678164] ? handle_bug+0x42/0x70 [1621.678167] ? exc_invalid_op+0x14/0x70 [1621.678170] ? asm_exc_invalid_op+0x16/0x20 [1621.678178] ? emit_fiemap_extent+0x84/0x90 [btrfs] [1621.678253] extent_fiemap+0x766/0xa30 [btrfs] [1621.678339] btrfs_fiemap+0x45/0x80 [btrfs] [1621.678420] do_vfs_ioctl+0x1e4/0x870 [1621.678431] __x64_sys_ioctl+0x6a/0xc0 [1621.678434] do_syscall_64+0x52/0x120 [1621.678445] entry_SYSCALL_64_after_hwframe+0x6e/0x76 There's also another case where before calling btrfs_next_leaf() we are processing a hole or a prealloc extent and we had several delalloc ranges within that hole or prealloc extent. In that case if the ordered extents complete before we find the next key, we may end up finding an extent item with an offset smaller than (or equals to) the offset in cache->offset. So fix this by changing emit_fiemap_extent() to address these three scenarios like this: 1) For the first case, steps listed above, adjust the length of the previously cached extent so that it does not overlap with the current extent, emit the previous one and cache the current file extent item; 2) For the second case where he had a hole or prealloc extent with multiple delalloc ranges inside the hole or prealloc extent's range, and the current file extent item has an offset that matches the offset in the fiemap cache, just discard what we have in the fiemap cache and assign the current file extent item to the cache, since it's more up to date; 3) For the third case where he had a hole or prealloc extent with multiple delalloc ranges inside the hole or prealloc extent's range and the offset of the file extent item we just found is smaller than what we have in the cache, just skip the current file extent item if its range end at or behind the cached extent's end, because we may have emitted (to the fiemap user space buffer) delalloc ranges that overlap with the current file extent item's range. If the file extent item's range goes beyond the end offset of the cached extent, just emit the cached extent and cache a subrange of the file extent item, that goes from the end offset of the cached extent to the end offset of the file extent item. Dealing with those cases in those ways makes everything consistent by reflecting the current state of file extent items in the btree and without emitting extents that have overlapping ranges (which would be confusing and violating expectations). This issue could be triggered often with test case generic/561, and was also hit and reported by Wang Yugui. Reported-by: Wang Yugui Link: https://lore.kernel.org/linux-btrfs/20240223104619.701F.409509F4@e16-tech.com/ Fixes: b0ad381fa769 ("btrfs: fix deadlock with fiemap and extent locking") Reviewed-by: Josef Bacik Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/extent_io.c | 103 ++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 96 insertions(+), 7 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 61d961a30dee..e6abfc29f20e 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2480,6 +2480,7 @@ static int emit_fiemap_extent(struct fiemap_extent_info *fieinfo, struct fiemap_cache *cache, u64 offset, u64 phys, u64 len, u32 flags) { + u64 cache_end; int ret = 0; /* Set at the end of extent_fiemap(). */ @@ -2489,15 +2490,102 @@ static int emit_fiemap_extent(struct fiemap_extent_info *fieinfo, goto assign; /* - * Sanity check, extent_fiemap() should have ensured that new - * fiemap extent won't overlap with cached one. - * Not recoverable. + * When iterating the extents of the inode, at extent_fiemap(), we may + * find an extent that starts at an offset behind the end offset of the + * previous extent we processed. This happens if fiemap is called + * without FIEMAP_FLAG_SYNC and there are ordered extents completing + * while we call btrfs_next_leaf() (through fiemap_next_leaf_item()). * - * NOTE: Physical address can overlap, due to compression + * For example we are in leaf X processing its last item, which is the + * file extent item for file range [512K, 1M[, and after + * btrfs_next_leaf() releases the path, there's an ordered extent that + * completes for the file range [768K, 2M[, and that results in trimming + * the file extent item so that it now corresponds to the file range + * [512K, 768K[ and a new file extent item is inserted for the file + * range [768K, 2M[, which may end up as the last item of leaf X or as + * the first item of the next leaf - in either case btrfs_next_leaf() + * will leave us with a path pointing to the new extent item, for the + * file range [768K, 2M[, since that's the first key that follows the + * last one we processed. So in order not to report overlapping extents + * to user space, we trim the length of the previously cached extent and + * emit it. + * + * Upon calling btrfs_next_leaf() we may also find an extent with an + * offset smaller than or equals to cache->offset, and this happens + * when we had a hole or prealloc extent with several delalloc ranges in + * it, but after btrfs_next_leaf() released the path, delalloc was + * flushed and the resulting ordered extents were completed, so we can + * now have found a file extent item for an offset that is smaller than + * or equals to what we have in cache->offset. We deal with this as + * described below. */ - if (cache->offset + cache->len > offset) { - WARN_ON(1); - return -EINVAL; + cache_end = cache->offset + cache->len; + if (cache_end > offset) { + if (offset == cache->offset) { + /* + * We cached a dealloc range (found in the io tree) for + * a hole or prealloc extent and we have now found a + * file extent item for the same offset. What we have + * now is more recent and up to date, so discard what + * we had in the cache and use what we have just found. + */ + goto assign; + } else if (offset > cache->offset) { + /* + * The extent range we previously found ends after the + * offset of the file extent item we found and that + * offset falls somewhere in the middle of that previous + * extent range. So adjust the range we previously found + * to end at the offset of the file extent item we have + * just found, since this extent is more up to date. + * Emit that adjusted range and cache the file extent + * item we have just found. This corresponds to the case + * where a previously found file extent item was split + * due to an ordered extent completing. + */ + cache->len = offset - cache->offset; + goto emit; + } else { + const u64 range_end = offset + len; + + /* + * The offset of the file extent item we have just found + * is behind the cached offset. This means we were + * processing a hole or prealloc extent for which we + * have found delalloc ranges (in the io tree), so what + * we have in the cache is the last delalloc range we + * found while the file extent item we found can be + * either for a whole delalloc range we previously + * emmitted or only a part of that range. + * + * We have two cases here: + * + * 1) The file extent item's range ends at or behind the + * cached extent's end. In this case just ignore the + * current file extent item because we don't want to + * overlap with previous ranges that may have been + * emmitted already; + * + * 2) The file extent item starts behind the currently + * cached extent but its end offset goes beyond the + * end offset of the cached extent. We don't want to + * overlap with a previous range that may have been + * emmitted already, so we emit the currently cached + * extent and then partially store the current file + * extent item's range in the cache, for the subrange + * going the cached extent's end to the end of the + * file extent item. + */ + if (range_end <= cache_end) + return 0; + + if (!(flags & (FIEMAP_EXTENT_ENCODED | FIEMAP_EXTENT_DELALLOC))) + phys += cache_end - offset; + + offset = cache_end; + len = range_end - cache_end; + goto emit; + } } /* @@ -2517,6 +2605,7 @@ static int emit_fiemap_extent(struct fiemap_extent_info *fieinfo, return 0; } +emit: /* Not mergeable, need to submit cached one */ ret = fiemap_fill_next_extent(fieinfo, cache->offset, cache->phys, cache->len, cache->flags); From 418b09027743d9a9fb39116bed46a192f868a3c3 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Thu, 22 Feb 2024 12:29:34 +0000 Subject: [PATCH 129/225] btrfs: ensure fiemap doesn't race with writes when FIEMAP_FLAG_SYNC is given When FIEMAP_FLAG_SYNC is given to fiemap the expectation is that that are no concurrent writes and we get a stable view of the inode's extent layout. When the flag is given we flush all IO (and wait for ordered extents to complete) and then lock the inode in shared mode, however that leaves open the possibility that a write might happen right after the flushing and before locking the inode. So fix this by flushing again after locking the inode - we leave the initial flushing before locking the inode to avoid holding the lock and blocking other RO operations while waiting for IO and ordered extents to complete. The second flushing while holding the inode's lock will most of the time do nothing or very little since the time window for new writes to have happened is small. Reviewed-by: Josef Bacik Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/extent_io.c | 21 ++++++++------------- fs/btrfs/inode.c | 22 +++++++++++++++++++++- 2 files changed, 29 insertions(+), 14 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index e6abfc29f20e..b2eeee236e80 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2996,17 +2996,15 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo, range_end = round_up(start + len, sectorsize); prev_extent_end = range_start; - btrfs_inode_lock(inode, BTRFS_ILOCK_SHARED); - ret = fiemap_find_last_extent_offset(inode, path, &last_extent_end); if (ret < 0) - goto out_unlock; + goto out; btrfs_release_path(path); path->reada = READA_FORWARD; ret = fiemap_search_slot(inode, path, range_start); if (ret < 0) { - goto out_unlock; + goto out; } else if (ret > 0) { /* * No file extent item found, but we may have delalloc between @@ -3053,7 +3051,7 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo, backref_ctx, 0, 0, 0, prev_extent_end, hole_end); if (ret < 0) { - goto out_unlock; + goto out; } else if (ret > 0) { /* fiemap_fill_next_extent() told us to stop. */ stopped = true; @@ -3109,7 +3107,7 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo, extent_gen, backref_ctx); if (ret < 0) - goto out_unlock; + goto out; else if (ret > 0) flags |= FIEMAP_EXTENT_SHARED; } @@ -3120,7 +3118,7 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo, } if (ret < 0) { - goto out_unlock; + goto out; } else if (ret > 0) { /* fiemap_fill_next_extent() told us to stop. */ stopped = true; @@ -3131,12 +3129,12 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo, next_item: if (fatal_signal_pending(current)) { ret = -EINTR; - goto out_unlock; + goto out; } ret = fiemap_next_leaf_item(inode, path); if (ret < 0) { - goto out_unlock; + goto out; } else if (ret > 0) { /* No more file extent items for this inode. */ break; @@ -3160,7 +3158,7 @@ check_eof_delalloc: &delalloc_cached_state, backref_ctx, 0, 0, 0, prev_extent_end, range_end - 1); if (ret < 0) - goto out_unlock; + goto out; prev_extent_end = range_end; } @@ -3198,9 +3196,6 @@ check_eof_delalloc: } ret = emit_last_fiemap_cache(fieinfo, &cache); - -out_unlock: - btrfs_inode_unlock(inode, BTRFS_ILOCK_SHARED); out: free_extent_state(delalloc_cached_state); btrfs_free_backref_share_ctx(backref_ctx); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 5f39462dceb0..a3248c47012f 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -7835,6 +7835,7 @@ struct iomap_dio *btrfs_dio_write(struct kiocb *iocb, struct iov_iter *iter, static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, u64 start, u64 len) { + struct btrfs_inode *btrfs_inode = BTRFS_I(inode); int ret; ret = fiemap_prep(inode, fieinfo, start, &len, 0); @@ -7860,7 +7861,26 @@ static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, return ret; } - return extent_fiemap(BTRFS_I(inode), fieinfo, start, len); + btrfs_inode_lock(btrfs_inode, BTRFS_ILOCK_SHARED); + + /* + * We did an initial flush to avoid holding the inode's lock while + * triggering writeback and waiting for the completion of IO and ordered + * extents. Now after we locked the inode we do it again, because it's + * possible a new write may have happened in between those two steps. + */ + if (fieinfo->fi_flags & FIEMAP_FLAG_SYNC) { + ret = btrfs_wait_ordered_range(inode, 0, LLONG_MAX); + if (ret) { + btrfs_inode_unlock(btrfs_inode, BTRFS_ILOCK_SHARED); + return ret; + } + } + + ret = extent_fiemap(btrfs_inode, fieinfo, start, len); + btrfs_inode_unlock(btrfs_inode, BTRFS_ILOCK_SHARED); + + return ret; } static int btrfs_writepages(struct address_space *mapping, From e2b54eaf28df0c978626c9736b94f003b523b451 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Fri, 23 Feb 2024 16:38:43 +0000 Subject: [PATCH 130/225] btrfs: fix double free of anonymous device after snapshot creation failure When creating a snapshot we may do a double free of an anonymous device in case there's an error committing the transaction. The second free may result in freeing an anonymous device number that was allocated by some other subsystem in the kernel or another btrfs filesystem. The steps that lead to this: 1) At ioctl.c:create_snapshot() we allocate an anonymous device number and assign it to pending_snapshot->anon_dev; 2) Then we call btrfs_commit_transaction() and end up at transaction.c:create_pending_snapshot(); 3) There we call btrfs_get_new_fs_root() and pass it the anonymous device number stored in pending_snapshot->anon_dev; 4) btrfs_get_new_fs_root() frees that anonymous device number because btrfs_lookup_fs_root() returned a root - someone else did a lookup of the new root already, which could some task doing backref walking; 5) After that some error happens in the transaction commit path, and at ioctl.c:create_snapshot() we jump to the 'fail' label, and after that we free again the same anonymous device number, which in the meanwhile may have been reallocated somewhere else, because pending_snapshot->anon_dev still has the same value as in step 1. Recently syzbot ran into this and reported the following trace: ------------[ cut here ]------------ ida_free called for id=51 which is not allocated. WARNING: CPU: 1 PID: 31038 at lib/idr.c:525 ida_free+0x370/0x420 lib/idr.c:525 Modules linked in: CPU: 1 PID: 31038 Comm: syz-executor.2 Not tainted 6.8.0-rc4-syzkaller-00410-gc02197fc9076 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/25/2024 RIP: 0010:ida_free+0x370/0x420 lib/idr.c:525 Code: 10 42 80 3c 28 (...) RSP: 0018:ffffc90015a67300 EFLAGS: 00010246 RAX: be5130472f5dd000 RBX: 0000000000000033 RCX: 0000000000040000 RDX: ffffc90009a7a000 RSI: 000000000003ffff RDI: 0000000000040000 RBP: ffffc90015a673f0 R08: ffffffff81577992 R09: 1ffff92002b4cdb4 R10: dffffc0000000000 R11: fffff52002b4cdb5 R12: 0000000000000246 R13: dffffc0000000000 R14: ffffffff8e256b80 R15: 0000000000000246 FS: 00007fca3f4b46c0(0000) GS:ffff8880b9500000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f167a17b978 CR3: 000000001ed26000 CR4: 0000000000350ef0 Call Trace: btrfs_get_root_ref+0xa48/0xaf0 fs/btrfs/disk-io.c:1346 create_pending_snapshot+0xff2/0x2bc0 fs/btrfs/transaction.c:1837 create_pending_snapshots+0x195/0x1d0 fs/btrfs/transaction.c:1931 btrfs_commit_transaction+0xf1c/0x3740 fs/btrfs/transaction.c:2404 create_snapshot+0x507/0x880 fs/btrfs/ioctl.c:848 btrfs_mksubvol+0x5d0/0x750 fs/btrfs/ioctl.c:998 btrfs_mksnapshot+0xb5/0xf0 fs/btrfs/ioctl.c:1044 __btrfs_ioctl_snap_create+0x387/0x4b0 fs/btrfs/ioctl.c:1306 btrfs_ioctl_snap_create_v2+0x1ca/0x400 fs/btrfs/ioctl.c:1393 btrfs_ioctl+0xa74/0xd40 vfs_ioctl fs/ioctl.c:51 [inline] __do_sys_ioctl fs/ioctl.c:871 [inline] __se_sys_ioctl+0xfe/0x170 fs/ioctl.c:857 do_syscall_64+0xfb/0x240 entry_SYSCALL_64_after_hwframe+0x6f/0x77 RIP: 0033:0x7fca3e67dda9 Code: 28 00 00 00 (...) RSP: 002b:00007fca3f4b40c8 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 RAX: ffffffffffffffda RBX: 00007fca3e7abf80 RCX: 00007fca3e67dda9 RDX: 00000000200005c0 RSI: 0000000050009417 RDI: 0000000000000003 RBP: 00007fca3e6ca47a R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 R13: 000000000000000b R14: 00007fca3e7abf80 R15: 00007fff6bf95658 Where we get an explicit message where we attempt to free an anonymous device number that is not currently allocated. It happens in a different code path from the example below, at btrfs_get_root_ref(), so this change may not fix the case triggered by syzbot. To fix at least the code path from the example above, change btrfs_get_root_ref() and its callers to receive a dev_t pointer argument for the anonymous device number, so that in case it frees the number, it also resets it to 0, so that up in the call chain we don't attempt to do the double free. CC: stable@vger.kernel.org # 5.10+ Link: https://lore.kernel.org/linux-btrfs/000000000000f673a1061202f630@google.com/ Fixes: e03ee2fe873e ("btrfs: do not ASSERT() if the newly created subvolume already got read") Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 22 +++++++++++----------- fs/btrfs/disk-io.h | 2 +- fs/btrfs/ioctl.c | 2 +- fs/btrfs/transaction.c | 2 +- 4 files changed, 14 insertions(+), 14 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index e71ef97d0a7c..c843563914ca 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1307,12 +1307,12 @@ void btrfs_free_fs_info(struct btrfs_fs_info *fs_info) * * @objectid: root id * @anon_dev: preallocated anonymous block device number for new roots, - * pass 0 for new allocation. + * pass NULL for a new allocation. * @check_ref: whether to check root item references, If true, return -ENOENT * for orphan roots */ static struct btrfs_root *btrfs_get_root_ref(struct btrfs_fs_info *fs_info, - u64 objectid, dev_t anon_dev, + u64 objectid, dev_t *anon_dev, bool check_ref) { struct btrfs_root *root; @@ -1342,9 +1342,9 @@ again: * that common but still possible. In that case, we just need * to free the anon_dev. */ - if (unlikely(anon_dev)) { - free_anon_bdev(anon_dev); - anon_dev = 0; + if (unlikely(anon_dev && *anon_dev)) { + free_anon_bdev(*anon_dev); + *anon_dev = 0; } if (check_ref && btrfs_root_refs(&root->root_item) == 0) { @@ -1366,7 +1366,7 @@ again: goto fail; } - ret = btrfs_init_fs_root(root, anon_dev); + ret = btrfs_init_fs_root(root, anon_dev ? *anon_dev : 0); if (ret) goto fail; @@ -1402,7 +1402,7 @@ fail: * root's anon_dev to 0 to avoid a double free, once by btrfs_put_root() * and once again by our caller. */ - if (anon_dev) + if (anon_dev && *anon_dev) root->anon_dev = 0; btrfs_put_root(root); return ERR_PTR(ret); @@ -1418,7 +1418,7 @@ fail: struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info, u64 objectid, bool check_ref) { - return btrfs_get_root_ref(fs_info, objectid, 0, check_ref); + return btrfs_get_root_ref(fs_info, objectid, NULL, check_ref); } /* @@ -1426,11 +1426,11 @@ struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info, * the anonymous block device id * * @objectid: tree objectid - * @anon_dev: if zero, allocate a new anonymous block device or use the - * parameter value + * @anon_dev: if NULL, allocate a new anonymous block device or use the + * parameter value if not NULL */ struct btrfs_root *btrfs_get_new_fs_root(struct btrfs_fs_info *fs_info, - u64 objectid, dev_t anon_dev) + u64 objectid, dev_t *anon_dev) { return btrfs_get_root_ref(fs_info, objectid, anon_dev, true); } diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 9413726b329b..eb3473d1c1ac 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -61,7 +61,7 @@ void btrfs_free_fs_roots(struct btrfs_fs_info *fs_info); struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info, u64 objectid, bool check_ref); struct btrfs_root *btrfs_get_new_fs_root(struct btrfs_fs_info *fs_info, - u64 objectid, dev_t anon_dev); + u64 objectid, dev_t *anon_dev); struct btrfs_root *btrfs_get_fs_root_commit_root(struct btrfs_fs_info *fs_info, struct btrfs_path *path, u64 objectid); diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index fb2323b323bf..b004e3b75311 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -721,7 +721,7 @@ static noinline int create_subvol(struct mnt_idmap *idmap, free_extent_buffer(leaf); leaf = NULL; - new_root = btrfs_get_new_fs_root(fs_info, objectid, anon_dev); + new_root = btrfs_get_new_fs_root(fs_info, objectid, &anon_dev); if (IS_ERR(new_root)) { ret = PTR_ERR(new_root); btrfs_abort_transaction(trans, ret); diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index c52807d97efa..bf8e64c766b6 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -1834,7 +1834,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, } key.offset = (u64)-1; - pending->snap = btrfs_get_new_fs_root(fs_info, objectid, pending->anon_dev); + pending->snap = btrfs_get_new_fs_root(fs_info, objectid, &pending->anon_dev); if (IS_ERR(pending->snap)) { ret = PTR_ERR(pending->snap); pending->snap = NULL; From 6572786006fa96ad2c35bb31757f1f861298093b Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Fri, 1 Mar 2024 09:18:24 +0900 Subject: [PATCH 131/225] fprobe: Fix to allocate entry_data_size buffer with rethook instances Fix to allocate fprobe::entry_data_size buffer with rethook instances. If fprobe doesn't allocate entry_data_size buffer for each rethook instance, fprobe entry handler can cause a buffer overrun when storing entry data in entry handler. Link: https://lore.kernel.org/all/170920576727.107552.638161246679734051.stgit@devnote2/ Reported-by: Jiri Olsa Closes: https://lore.kernel.org/all/Zd9eBn2FTQzYyg7L@krava/ Fixes: 4bbd93455659 ("kprobes: kretprobe scalability improvement") Cc: stable@vger.kernel.org Tested-by: Jiri Olsa Signed-off-by: Masami Hiramatsu (Google) --- kernel/trace/fprobe.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/kernel/trace/fprobe.c b/kernel/trace/fprobe.c index 6cd2a4e3afb8..9ff018245840 100644 --- a/kernel/trace/fprobe.c +++ b/kernel/trace/fprobe.c @@ -189,9 +189,6 @@ static int fprobe_init_rethook(struct fprobe *fp, int num) { int size; - if (num <= 0) - return -EINVAL; - if (!fp->exit_handler) { fp->rethook = NULL; return 0; @@ -199,15 +196,16 @@ static int fprobe_init_rethook(struct fprobe *fp, int num) /* Initialize rethook if needed */ if (fp->nr_maxactive) - size = fp->nr_maxactive; + num = fp->nr_maxactive; else - size = num * num_possible_cpus() * 2; - if (size <= 0) + num *= num_possible_cpus() * 2; + if (num <= 0) return -EINVAL; + size = sizeof(struct fprobe_rethook_node) + fp->entry_data_size; + /* Initialize rethook */ - fp->rethook = rethook_alloc((void *)fp, fprobe_exit_handler, - sizeof(struct fprobe_rethook_node), size); + fp->rethook = rethook_alloc((void *)fp, fprobe_exit_handler, size, num); if (IS_ERR(fp->rethook)) return PTR_ERR(fp->rethook); From f7916c47f66d778817068d86e5c9b5e511e23c86 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Mon, 26 Feb 2024 17:16:10 +1000 Subject: [PATCH 132/225] nouveau: report byte usage in VRAM usage. Turns out usage is always in bytes not shifted. Fixes: 72fa02fdf833 ("nouveau: add an ioctl to report vram usage") Signed-off-by: Dave Airlie --- drivers/gpu/drm/nouveau/nouveau_abi16.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_abi16.c b/drivers/gpu/drm/nouveau/nouveau_abi16.c index cd14f993bdd1..80f74ee0fc78 100644 --- a/drivers/gpu/drm/nouveau/nouveau_abi16.c +++ b/drivers/gpu/drm/nouveau/nouveau_abi16.c @@ -269,7 +269,7 @@ nouveau_abi16_ioctl_getparam(ABI16_IOCTL_ARGS) break; case NOUVEAU_GETPARAM_VRAM_USED: { struct ttm_resource_manager *vram_mgr = ttm_manager_type(&drm->ttm.bdev, TTM_PL_VRAM); - getparam->value = (u64)ttm_resource_manager_usage(vram_mgr) << PAGE_SHIFT; + getparam->value = (u64)ttm_resource_manager_usage(vram_mgr); break; } default: From f6ecfdad359a01c7fd8a3bcfde3ef0acdf107e6e Mon Sep 17 00:00:00 2001 From: Sid Pranjale Date: Thu, 29 Feb 2024 21:52:05 +0530 Subject: [PATCH 133/225] drm/nouveau: keep DMA buffers required for suspend/resume Nouveau deallocates a few buffers post GPU init which are required for GPU suspend/resume to function correctly. This is likely not as big an issue on systems where the NVGPU is the only GPU, but on multi-GPU set ups it leads to a regression where the kernel module errors and results in a system-wide rendering freeze. This commit addresses that regression by moving the two buffers required for suspend and resume to be deallocated at driver unload instead of post init. Fixes: 042b5f83841fb ("drm/nouveau: fix several DMA buffer leaks") Signed-off-by: Sid Pranjale Signed-off-by: Dave Airlie --- drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c index a64c81385682..a73a5b589790 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c @@ -1054,8 +1054,6 @@ r535_gsp_postinit(struct nvkm_gsp *gsp) /* Release the DMA buffers that were needed only for boot and init */ nvkm_gsp_mem_dtor(gsp, &gsp->boot.fw); nvkm_gsp_mem_dtor(gsp, &gsp->libos); - nvkm_gsp_mem_dtor(gsp, &gsp->rmargs); - nvkm_gsp_mem_dtor(gsp, &gsp->wpr_meta); return ret; } @@ -2163,6 +2161,8 @@ r535_gsp_dtor(struct nvkm_gsp *gsp) r535_gsp_dtor_fws(gsp); + nvkm_gsp_mem_dtor(gsp, &gsp->rmargs); + nvkm_gsp_mem_dtor(gsp, &gsp->wpr_meta); nvkm_gsp_mem_dtor(gsp, &gsp->shm.mem); nvkm_gsp_mem_dtor(gsp, &gsp->loginit); nvkm_gsp_mem_dtor(gsp, &gsp->logintr); From 530b1dbd97846b110ea8a94c7cc903eca21786e5 Mon Sep 17 00:00:00 2001 From: Arturas Moskvinas Date: Fri, 1 Mar 2024 09:12:04 +0200 Subject: [PATCH 134/225] gpio: 74x164: Enable output pins after registers are reset Chip outputs are enabled[1] before actual reset is performed[2] which might cause pin output value to flip flop if previous pin value was set to 1. Fix that behavior by making sure chip is fully reset before all outputs are enabled. Flip-flop can be noticed when module is removed and inserted again and one of the pins was changed to 1 before removal. 100 microsecond flipping is noticeable on oscilloscope (100khz SPI bus). For a properly reset chip - output is enabled around 100 microseconds (on 100khz SPI bus) later during probing process hence should be irrelevant behavioral change. Fixes: 7ebc194d0fd4 (gpio: 74x164: Introduce 'enable-gpios' property) Link: https://elixir.bootlin.com/linux/v6.7.4/source/drivers/gpio/gpio-74x164.c#L130 [1] Link: https://elixir.bootlin.com/linux/v6.7.4/source/drivers/gpio/gpio-74x164.c#L150 [2] Signed-off-by: Arturas Moskvinas Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-74x164.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpio/gpio-74x164.c b/drivers/gpio/gpio-74x164.c index e00c33310517..753e7be039e4 100644 --- a/drivers/gpio/gpio-74x164.c +++ b/drivers/gpio/gpio-74x164.c @@ -127,8 +127,6 @@ static int gen_74x164_probe(struct spi_device *spi) if (IS_ERR(chip->gpiod_oe)) return PTR_ERR(chip->gpiod_oe); - gpiod_set_value_cansleep(chip->gpiod_oe, 1); - spi_set_drvdata(spi, chip); chip->gpio_chip.label = spi->modalias; @@ -153,6 +151,8 @@ static int gen_74x164_probe(struct spi_device *spi) goto exit_destroy; } + gpiod_set_value_cansleep(chip->gpiod_oe, 1); + ret = gpiochip_add_data(&chip->gpio_chip, chip); if (!ret) return 0; From e4aec4daa8c009057b5e063db1b7322252c92dc8 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 21 Feb 2024 21:28:46 +0200 Subject: [PATCH 135/225] gpiolib: Fix the error path order in gpiochip_add_data_with_key() After shuffling the code, error path wasn't updated correctly. Fix it here. Fixes: 2f4133bb5f14 ("gpiolib: No need to call gpiochip_remove_pin_ranges() twice") Signed-off-by: Andy Shevchenko Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpiolib.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index e434e8cc1229..58a839c1ae48 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -999,11 +999,11 @@ err_remove_irqchip_mask: gpiochip_irqchip_free_valid_mask(gc); err_remove_acpi_chip: acpi_gpiochip_remove(gc); + gpiochip_remove_pin_ranges(gc); err_remove_of_chip: gpiochip_free_hogs(gc); of_gpiochip_remove(gc); err_free_gpiochip_mask: - gpiochip_remove_pin_ranges(gc); gpiochip_free_valid_mask(gc); err_remove_from_list: spin_lock_irqsave(&gpio_lock, flags); From adf47524b56a791734ae24da8412c6579e2fab4f Mon Sep 17 00:00:00 2001 From: Peter Martincic Date: Mon, 27 Nov 2023 13:35:24 -0800 Subject: [PATCH 136/225] hv_utils: Allow implicit ICTIMESYNCFLAG_SYNC Hyper-V hosts can omit the _SYNC flag to due a bug on resume from modern suspend. In such a case, the guest may fail to update its time-of-day to account for the period when it was suspended, and could proceed with a significantly wrong time-of-day. In such a case when the guest is significantly behind, fix it by treating a _SAMPLE the same as if _SYNC was received so that the guest time-of-day is updated. This is hidden behind param hv_utils.timesync_implicit. Signed-off-by: Peter Martincic Acked-by: Boqun Feng Link: https://lore.kernel.org/r/20231127213524.52783-1-pmartincic@linux.microsoft.com Signed-off-by: Wei Liu Message-ID: <20231127213524.52783-1-pmartincic@linux.microsoft.com> --- drivers/hv/hv_util.c | 31 ++++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/drivers/hv/hv_util.c b/drivers/hv/hv_util.c index 42aec2c5606a..9c97c4065fe7 100644 --- a/drivers/hv/hv_util.c +++ b/drivers/hv/hv_util.c @@ -296,6 +296,11 @@ static struct { spinlock_t lock; } host_ts; +static bool timesync_implicit; + +module_param(timesync_implicit, bool, 0644); +MODULE_PARM_DESC(timesync_implicit, "If set treat SAMPLE as SYNC when clock is behind"); + static inline u64 reftime_to_ns(u64 reftime) { return (reftime - WLTIMEDELTA) * 100; @@ -344,6 +349,29 @@ static void hv_set_host_time(struct work_struct *work) do_settimeofday64(&ts); } +/* + * Due to a bug on Hyper-V hosts, the sync flag may not always be sent on resume. + * Force a sync if the guest is behind. + */ +static inline bool hv_implicit_sync(u64 host_time) +{ + struct timespec64 new_ts; + struct timespec64 threshold_ts; + + new_ts = ns_to_timespec64(reftime_to_ns(host_time)); + ktime_get_real_ts64(&threshold_ts); + + threshold_ts.tv_sec += 5; + + /* + * If guest behind the host by 5 or more seconds. + */ + if (timespec64_compare(&new_ts, &threshold_ts) >= 0) + return true; + + return false; +} + /* * Synchronize time with host after reboot, restore, etc. * @@ -384,7 +412,8 @@ static inline void adj_guesttime(u64 hosttime, u64 reftime, u8 adj_flags) spin_unlock_irqrestore(&host_ts.lock, flags); /* Schedule work to do do_settimeofday64() */ - if (adj_flags & ICTIMESYNCFLAG_SYNC) + if ((adj_flags & ICTIMESYNCFLAG_SYNC) || + (timesync_implicit && hv_implicit_sync(host_ts.host_time))) schedule_work(&adj_time_work); } From b8209544296edbd1af186e2ea9c648642c37b18c Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Wed, 28 Feb 2024 16:45:33 -0800 Subject: [PATCH 137/225] Drivers: hv: vmbus: Calculate ring buffer size for more efficient use of memory The VMBUS_RING_SIZE macro adds space for a ring buffer header to the requested ring buffer size. The header size is always 1 page, and so its size varies based on the PAGE_SIZE for which the kernel is built. If the requested ring buffer size is a large power-of-2 size and the header size is small, the resulting size is inefficient in its use of memory. For example, a 512 Kbyte ring buffer with a 4 Kbyte page size results in a 516 Kbyte allocation, which is rounded to up 1 Mbyte by the memory allocator, and wastes 508 Kbytes of memory. In such situations, the exact size of the ring buffer isn't that important, and it's OK to allocate the 4 Kbyte header at the beginning of the 512 Kbytes, leaving the ring buffer itself with just 508 Kbytes. The memory allocation can be 512 Kbytes instead of 1 Mbyte and nothing is wasted. Update VMBUS_RING_SIZE to implement this approach for "large" ring buffer sizes. "Large" is somewhat arbitrarily defined as 8 times the size of the ring buffer header (which is of size PAGE_SIZE). For example, for 4 Kbyte PAGE_SIZE, ring buffers of 32 Kbytes and larger use the first 4 Kbytes as the ring buffer header. For 64 Kbyte PAGE_SIZE, ring buffers of 512 Kbytes and larger use the first 64 Kbytes as the ring buffer header. In both cases, smaller sizes add space for the header so the ring size isn't reduced too much by using part of the space for the header. For example, with a 64 Kbyte page size, we don't want a 128 Kbyte ring buffer to be reduced to 64 Kbytes by allocating half of the space for the header. In such a case, the memory allocation is less efficient, but it's the best that can be done. While the new algorithm slightly changes the amount of space allocated for ring buffers by drivers that use VMBUS_RING_SIZE, the devices aren't known to be sensitive to small changes in ring buffer size, so there shouldn't be any effect. Fixes: c1135c7fd0e9 ("Drivers: hv: vmbus: Introduce types of GPADL") Fixes: 6941f67ad37d ("hv_netvsc: Calculate correct ring size when PAGE_SIZE is not 4 Kbytes") Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218502 Cc: stable@vger.kernel.org Signed-off-by: Michael Kelley Reviewed-by: Saurabh Sengar Reviewed-by: Dexuan Cui Tested-by: Souradeep Chakrabarti Link: https://lore.kernel.org/r/20240229004533.313662-1-mhklinux@outlook.com Signed-off-by: Wei Liu Message-ID: <20240229004533.313662-1-mhklinux@outlook.com> --- include/linux/hyperv.h | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index 2b00faf98017..6ef0557b4bff 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -164,8 +164,28 @@ struct hv_ring_buffer { u8 buffer[]; } __packed; + +/* + * If the requested ring buffer size is at least 8 times the size of the + * header, steal space from the ring buffer for the header. Otherwise, add + * space for the header so that is doesn't take too much of the ring buffer + * space. + * + * The factor of 8 is somewhat arbitrary. The goal is to prevent adding a + * relatively small header (4 Kbytes on x86) to a large-ish power-of-2 ring + * buffer size (such as 128 Kbytes) and so end up making a nearly twice as + * large allocation that will be almost half wasted. As a contrasting example, + * on ARM64 with 64 Kbyte page size, we don't want to take 64 Kbytes for the + * header from a 128 Kbyte allocation, leaving only 64 Kbytes for the ring. + * In this latter case, we must add 64 Kbytes for the header and not worry + * about what's wasted. + */ +#define VMBUS_HEADER_ADJ(payload_sz) \ + ((payload_sz) >= 8 * sizeof(struct hv_ring_buffer) ? \ + 0 : sizeof(struct hv_ring_buffer)) + /* Calculate the proper size of a ringbuffer, it must be page-aligned */ -#define VMBUS_RING_SIZE(payload_sz) PAGE_ALIGN(sizeof(struct hv_ring_buffer) + \ +#define VMBUS_RING_SIZE(payload_sz) PAGE_ALIGN(VMBUS_HEADER_ADJ(payload_sz) + \ (payload_sz)) struct hv_ring_buffer_info { From 20ee2ae8c58990ca9e98954b7ac2b66c53a0310e Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Wed, 31 Jan 2024 22:00:22 -0800 Subject: [PATCH 138/225] fbdev/hyperv_fb: Fix logic error for Gen2 VMs in hvfb_getmem() A recent commit removing the use of screen_info introduced a logic error. The error causes hvfb_getmem() to always return -ENOMEM for Generation 2 VMs. As a result, the Hyper-V frame buffer device fails to initialize. The error was introduced by removing an "else if" clause, leaving Gen2 VMs to always take the -ENOMEM error path. Fix the problem by removing the error path "else" clause. Gen 2 VMs now always proceed through the MMIO memory allocation code, but with "base" and "size" defaulting to 0. Fixes: 0aa0838c84da ("fbdev/hyperv_fb: Remove firmware framebuffers with aperture helpers") Signed-off-by: Michael Kelley Reviewed-by: Thomas Zimmermann Reviewed-by: Saurabh Sengar Link: https://lore.kernel.org/r/20240201060022.233666-1-mhklinux@outlook.com Signed-off-by: Wei Liu Message-ID: <20240201060022.233666-1-mhklinux@outlook.com> --- drivers/video/fbdev/hyperv_fb.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/video/fbdev/hyperv_fb.c b/drivers/video/fbdev/hyperv_fb.c index c26ee6fd73c9..8fdccf033b2d 100644 --- a/drivers/video/fbdev/hyperv_fb.c +++ b/drivers/video/fbdev/hyperv_fb.c @@ -1010,8 +1010,6 @@ static int hvfb_getmem(struct hv_device *hdev, struct fb_info *info) goto getmem_done; } pr_info("Unable to allocate enough contiguous physical memory on Gen 1 VM. Using MMIO instead.\n"); - } else { - goto err1; } /* From 8db0edc4acb1c654e4c115a3978fb2681c5bfb74 Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Thu, 11 Jan 2024 08:54:50 -0800 Subject: [PATCH 139/225] Drivers: hv: vmbus: Remove duplication and cleanup code in create_gpadl_header() create_gpadl_header() creates a message header, and one or more message bodies if the number of GPADL entries exceeds what fits in the header. Currently the code for creating the message header is duplicated in the two halves of the main "if" statement governing whether message bodies are created. Eliminate the duplication by making minor tweaks to the logic and associated comments. While here, simplify the handling of memory allocation errors, and use umin() instead of open coding it. For ease of review, the indentation of sizable chunks of code is *not* changed. A follow-on patch updates only the indentation. No functional change. Suggested-by: Dan Carpenter Signed-off-by: Michael Kelley Link: https://lore.kernel.org/r/20240111165451.269418-1-mhklinux@outlook.com Signed-off-by: Wei Liu Message-ID: <20240111165451.269418-1-mhklinux@outlook.com> --- drivers/hv/channel.c | 54 ++++++++------------------------------------ 1 file changed, 10 insertions(+), 44 deletions(-) diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c index 56f7e06c673e..604f5aff8502 100644 --- a/drivers/hv/channel.c +++ b/drivers/hv/channel.c @@ -322,21 +322,17 @@ static int create_gpadl_header(enum hv_gpadl_type type, void *kbuffer, pagecount = hv_gpadl_size(type, size) >> HV_HYP_PAGE_SHIFT; - /* do we need a gpadl body msg */ pfnsize = MAX_SIZE_CHANNEL_MESSAGE - sizeof(struct vmbus_channel_gpadl_header) - sizeof(struct gpa_range); - pfncount = pfnsize / sizeof(u64); + pfncount = umin(pagecount, pfnsize / sizeof(u64)); - if (pagecount > pfncount) { - /* we need a gpadl body */ - /* fill in the header */ msgsize = sizeof(struct vmbus_channel_msginfo) + sizeof(struct vmbus_channel_gpadl_header) + sizeof(struct gpa_range) + pfncount * sizeof(u64); msgheader = kzalloc(msgsize, GFP_KERNEL); if (!msgheader) - goto nomem; + return -ENOMEM; INIT_LIST_HEAD(&msgheader->submsglist); msgheader->msgsize = msgsize; @@ -356,18 +352,17 @@ static int create_gpadl_header(enum hv_gpadl_type type, void *kbuffer, pfnsum = pfncount; pfnleft = pagecount - pfncount; - /* how many pfns can we fit */ + /* how many pfns can we fit in a body message */ pfnsize = MAX_SIZE_CHANNEL_MESSAGE - sizeof(struct vmbus_channel_gpadl_body); pfncount = pfnsize / sizeof(u64); - /* fill in the body */ + /* + * If pfnleft is zero, everything fits in the header and no body + * messages are needed + */ while (pfnleft) { - if (pfnleft > pfncount) - pfncurr = pfncount; - else - pfncurr = pfnleft; - + pfncurr = umin(pfncount, pfnleft); msgsize = sizeof(struct vmbus_channel_msginfo) + sizeof(struct vmbus_channel_gpadl_body) + pfncurr * sizeof(u64); @@ -386,8 +381,8 @@ static int create_gpadl_header(enum hv_gpadl_type type, void *kbuffer, list_del(&pos->msglistentry); kfree(pos); } - - goto nomem; + kfree(msgheader); + return -ENOMEM; } msgbody->msgsize = msgsize; @@ -410,37 +405,8 @@ static int create_gpadl_header(enum hv_gpadl_type type, void *kbuffer, pfnsum += pfncurr; pfnleft -= pfncurr; } - } else { - /* everything fits in a header */ - msgsize = sizeof(struct vmbus_channel_msginfo) + - sizeof(struct vmbus_channel_gpadl_header) + - sizeof(struct gpa_range) + pagecount * sizeof(u64); - msgheader = kzalloc(msgsize, GFP_KERNEL); - if (msgheader == NULL) - goto nomem; - - INIT_LIST_HEAD(&msgheader->submsglist); - msgheader->msgsize = msgsize; - - gpadl_header = (struct vmbus_channel_gpadl_header *) - msgheader->msg; - gpadl_header->rangecount = 1; - gpadl_header->range_buflen = sizeof(struct gpa_range) + - pagecount * sizeof(u64); - gpadl_header->range[0].byte_offset = 0; - gpadl_header->range[0].byte_count = hv_gpadl_size(type, size); - for (i = 0; i < pagecount; i++) - gpadl_header->range[0].pfn_array[i] = hv_gpadl_hvpfn( - type, kbuffer, size, send_offset, i); - - *msginfo = msgheader; - } return 0; -nomem: - kfree(msgheader); - kfree(msgbody); - return -ENOMEM; } /* From 9645e74414fb725b0305f4f03035b68207659007 Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Thu, 11 Jan 2024 08:54:51 -0800 Subject: [PATCH 140/225] Drivers: hv: vmbus: Update indentation in create_gpadl_header() A previous commit left the indentation in create_gpadl_header() unchanged for ease of review. Update the indentation and remove line wrap in two places where it is no longer necessary. No functional change. Signed-off-by: Michael Kelley Link: https://lore.kernel.org/r/20240111165451.269418-2-mhklinux@outlook.com Signed-off-by: Wei Liu Message-ID: <20240111165451.269418-2-mhklinux@outlook.com> --- drivers/hv/channel.c | 140 +++++++++++++++++++++---------------------- 1 file changed, 69 insertions(+), 71 deletions(-) diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c index 604f5aff8502..adbf674355b2 100644 --- a/drivers/hv/channel.c +++ b/drivers/hv/channel.c @@ -327,84 +327,82 @@ static int create_gpadl_header(enum hv_gpadl_type type, void *kbuffer, sizeof(struct gpa_range); pfncount = umin(pagecount, pfnsize / sizeof(u64)); + msgsize = sizeof(struct vmbus_channel_msginfo) + + sizeof(struct vmbus_channel_gpadl_header) + + sizeof(struct gpa_range) + pfncount * sizeof(u64); + msgheader = kzalloc(msgsize, GFP_KERNEL); + if (!msgheader) + return -ENOMEM; + + INIT_LIST_HEAD(&msgheader->submsglist); + msgheader->msgsize = msgsize; + + gpadl_header = (struct vmbus_channel_gpadl_header *) + msgheader->msg; + gpadl_header->rangecount = 1; + gpadl_header->range_buflen = sizeof(struct gpa_range) + + pagecount * sizeof(u64); + gpadl_header->range[0].byte_offset = 0; + gpadl_header->range[0].byte_count = hv_gpadl_size(type, size); + for (i = 0; i < pfncount; i++) + gpadl_header->range[0].pfn_array[i] = hv_gpadl_hvpfn( + type, kbuffer, size, send_offset, i); + *msginfo = msgheader; + + pfnsum = pfncount; + pfnleft = pagecount - pfncount; + + /* how many pfns can we fit in a body message */ + pfnsize = MAX_SIZE_CHANNEL_MESSAGE - + sizeof(struct vmbus_channel_gpadl_body); + pfncount = pfnsize / sizeof(u64); + + /* + * If pfnleft is zero, everything fits in the header and no body + * messages are needed + */ + while (pfnleft) { + pfncurr = umin(pfncount, pfnleft); msgsize = sizeof(struct vmbus_channel_msginfo) + - sizeof(struct vmbus_channel_gpadl_header) + - sizeof(struct gpa_range) + pfncount * sizeof(u64); - msgheader = kzalloc(msgsize, GFP_KERNEL); - if (!msgheader) + sizeof(struct vmbus_channel_gpadl_body) + + pfncurr * sizeof(u64); + msgbody = kzalloc(msgsize, GFP_KERNEL); + + if (!msgbody) { + struct vmbus_channel_msginfo *pos = NULL; + struct vmbus_channel_msginfo *tmp = NULL; + /* + * Free up all the allocated messages. + */ + list_for_each_entry_safe(pos, tmp, + &msgheader->submsglist, + msglistentry) { + + list_del(&pos->msglistentry); + kfree(pos); + } + kfree(msgheader); return -ENOMEM; + } - INIT_LIST_HEAD(&msgheader->submsglist); - msgheader->msgsize = msgsize; - - gpadl_header = (struct vmbus_channel_gpadl_header *) - msgheader->msg; - gpadl_header->rangecount = 1; - gpadl_header->range_buflen = sizeof(struct gpa_range) + - pagecount * sizeof(u64); - gpadl_header->range[0].byte_offset = 0; - gpadl_header->range[0].byte_count = hv_gpadl_size(type, size); - for (i = 0; i < pfncount; i++) - gpadl_header->range[0].pfn_array[i] = hv_gpadl_hvpfn( - type, kbuffer, size, send_offset, i); - *msginfo = msgheader; - - pfnsum = pfncount; - pfnleft = pagecount - pfncount; - - /* how many pfns can we fit in a body message */ - pfnsize = MAX_SIZE_CHANNEL_MESSAGE - - sizeof(struct vmbus_channel_gpadl_body); - pfncount = pfnsize / sizeof(u64); + msgbody->msgsize = msgsize; + gpadl_body = (struct vmbus_channel_gpadl_body *)msgbody->msg; /* - * If pfnleft is zero, everything fits in the header and no body - * messages are needed + * Gpadl is u32 and we are using a pointer which could + * be 64-bit + * This is governed by the guest/host protocol and + * so the hypervisor guarantees that this is ok. */ - while (pfnleft) { - pfncurr = umin(pfncount, pfnleft); - msgsize = sizeof(struct vmbus_channel_msginfo) + - sizeof(struct vmbus_channel_gpadl_body) + - pfncurr * sizeof(u64); - msgbody = kzalloc(msgsize, GFP_KERNEL); + for (i = 0; i < pfncurr; i++) + gpadl_body->pfn[i] = hv_gpadl_hvpfn(type, + kbuffer, size, send_offset, pfnsum + i); - if (!msgbody) { - struct vmbus_channel_msginfo *pos = NULL; - struct vmbus_channel_msginfo *tmp = NULL; - /* - * Free up all the allocated messages. - */ - list_for_each_entry_safe(pos, tmp, - &msgheader->submsglist, - msglistentry) { - - list_del(&pos->msglistentry); - kfree(pos); - } - kfree(msgheader); - return -ENOMEM; - } - - msgbody->msgsize = msgsize; - gpadl_body = - (struct vmbus_channel_gpadl_body *)msgbody->msg; - - /* - * Gpadl is u32 and we are using a pointer which could - * be 64-bit - * This is governed by the guest/host protocol and - * so the hypervisor guarantees that this is ok. - */ - for (i = 0; i < pfncurr; i++) - gpadl_body->pfn[i] = hv_gpadl_hvpfn(type, - kbuffer, size, send_offset, pfnsum + i); - - /* add to msg header */ - list_add_tail(&msgbody->msglistentry, - &msgheader->submsglist); - pfnsum += pfncurr; - pfnleft -= pfncurr; - } + /* add to msg header */ + list_add_tail(&msgbody->msglistentry, &msgheader->submsglist); + pfnsum += pfncurr; + pfnleft -= pfncurr; + } return 0; } From 04ed680e76b0d320612601cef46cb7092f860b31 Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Thu, 22 Feb 2024 12:07:10 -0800 Subject: [PATCH 141/225] Documentation: hyperv: Add overview of PCI pass-thru device support Add documentation topic for PCI pass-thru devices in Linux guests on Hyper-V and for the associated PCI controller driver (pci-hyperv.c). Signed-off-by: Michael Kelley Reviewed-by: Easwar Hariharan Link: https://lore.kernel.org/r/20240222200710.305259-1-mhklinux@outlook.com Signed-off-by: Wei Liu Message-ID: <20240222200710.305259-1-mhklinux@outlook.com> --- Documentation/virt/hyperv/index.rst | 1 + Documentation/virt/hyperv/vpci.rst | 316 ++++++++++++++++++++++++++++ 2 files changed, 317 insertions(+) create mode 100644 Documentation/virt/hyperv/vpci.rst diff --git a/Documentation/virt/hyperv/index.rst b/Documentation/virt/hyperv/index.rst index 4a7a1b738bbe..de447e11b4a5 100644 --- a/Documentation/virt/hyperv/index.rst +++ b/Documentation/virt/hyperv/index.rst @@ -10,3 +10,4 @@ Hyper-V Enlightenments overview vmbus clocks + vpci diff --git a/Documentation/virt/hyperv/vpci.rst b/Documentation/virt/hyperv/vpci.rst new file mode 100644 index 000000000000..b65b2126ede3 --- /dev/null +++ b/Documentation/virt/hyperv/vpci.rst @@ -0,0 +1,316 @@ +.. SPDX-License-Identifier: GPL-2.0 + +PCI pass-thru devices +========================= +In a Hyper-V guest VM, PCI pass-thru devices (also called +virtual PCI devices, or vPCI devices) are physical PCI devices +that are mapped directly into the VM's physical address space. +Guest device drivers can interact directly with the hardware +without intermediation by the host hypervisor. This approach +provides higher bandwidth access to the device with lower +latency, compared with devices that are virtualized by the +hypervisor. The device should appear to the guest just as it +would when running on bare metal, so no changes are required +to the Linux device drivers for the device. + +Hyper-V terminology for vPCI devices is "Discrete Device +Assignment" (DDA). Public documentation for Hyper-V DDA is +available here: `DDA`_ + +.. _DDA: https://learn.microsoft.com/en-us/windows-server/virtualization/hyper-v/plan/plan-for-deploying-devices-using-discrete-device-assignment + +DDA is typically used for storage controllers, such as NVMe, +and for GPUs. A similar mechanism for NICs is called SR-IOV +and produces the same benefits by allowing a guest device +driver to interact directly with the hardware. See Hyper-V +public documentation here: `SR-IOV`_ + +.. _SR-IOV: https://learn.microsoft.com/en-us/windows-hardware/drivers/network/overview-of-single-root-i-o-virtualization--sr-iov- + +This discussion of vPCI devices includes DDA and SR-IOV +devices. + +Device Presentation +------------------- +Hyper-V provides full PCI functionality for a vPCI device when +it is operating, so the Linux device driver for the device can +be used unchanged, provided it uses the correct Linux kernel +APIs for accessing PCI config space and for other integration +with Linux. But the initial detection of the PCI device and +its integration with the Linux PCI subsystem must use Hyper-V +specific mechanisms. Consequently, vPCI devices on Hyper-V +have a dual identity. They are initially presented to Linux +guests as VMBus devices via the standard VMBus "offer" +mechanism, so they have a VMBus identity and appear under +/sys/bus/vmbus/devices. The VMBus vPCI driver in Linux at +drivers/pci/controller/pci-hyperv.c handles a newly introduced +vPCI device by fabricating a PCI bus topology and creating all +the normal PCI device data structures in Linux that would +exist if the PCI device were discovered via ACPI on a bare- +metal system. Once those data structures are set up, the +device also has a normal PCI identity in Linux, and the normal +Linux device driver for the vPCI device can function as if it +were running in Linux on bare-metal. Because vPCI devices are +presented dynamically through the VMBus offer mechanism, they +do not appear in the Linux guest's ACPI tables. vPCI devices +may be added to a VM or removed from a VM at any time during +the life of the VM, and not just during initial boot. + +With this approach, the vPCI device is a VMBus device and a +PCI device at the same time. In response to the VMBus offer +message, the hv_pci_probe() function runs and establishes a +VMBus connection to the vPCI VSP on the Hyper-V host. That +connection has a single VMBus channel. The channel is used to +exchange messages with the vPCI VSP for the purpose of setting +up and configuring the vPCI device in Linux. Once the device +is fully configured in Linux as a PCI device, the VMBus +channel is used only if Linux changes the vCPU to be interrupted +in the guest, or if the vPCI device is removed from +the VM while the VM is running. The ongoing operation of the +device happens directly between the Linux device driver for +the device and the hardware, with VMBus and the VMBus channel +playing no role. + +PCI Device Setup +---------------- +PCI device setup follows a sequence that Hyper-V originally +created for Windows guests, and that can be ill-suited for +Linux guests due to differences in the overall structure of +the Linux PCI subsystem compared with Windows. Nonetheless, +with a bit of hackery in the Hyper-V virtual PCI driver for +Linux, the virtual PCI device is setup in Linux so that +generic Linux PCI subsystem code and the Linux driver for the +device "just work". + +Each vPCI device is set up in Linux to be in its own PCI +domain with a host bridge. The PCI domainID is derived from +bytes 4 and 5 of the instance GUID assigned to the VMBus vPCI +device. The Hyper-V host does not guarantee that these bytes +are unique, so hv_pci_probe() has an algorithm to resolve +collisions. The collision resolution is intended to be stable +across reboots of the same VM so that the PCI domainIDs don't +change, as the domainID appears in the user space +configuration of some devices. + +hv_pci_probe() allocates a guest MMIO range to be used as PCI +config space for the device. This MMIO range is communicated +to the Hyper-V host over the VMBus channel as part of telling +the host that the device is ready to enter d0. See +hv_pci_enter_d0(). When the guest subsequently accesses this +MMIO range, the Hyper-V host intercepts the accesses and maps +them to the physical device PCI config space. + +hv_pci_probe() also gets BAR information for the device from +the Hyper-V host, and uses this information to allocate MMIO +space for the BARs. That MMIO space is then setup to be +associated with the host bridge so that it works when generic +PCI subsystem code in Linux processes the BARs. + +Finally, hv_pci_probe() creates the root PCI bus. At this +point the Hyper-V virtual PCI driver hackery is done, and the +normal Linux PCI machinery for scanning the root bus works to +detect the device, to perform driver matching, and to +initialize the driver and device. + +PCI Device Removal +------------------ +A Hyper-V host may initiate removal of a vPCI device from a +guest VM at any time during the life of the VM. The removal +is instigated by an admin action taken on the Hyper-V host and +is not under the control of the guest OS. + +A guest VM is notified of the removal by an unsolicited +"Eject" message sent from the host to the guest over the VMBus +channel associated with the vPCI device. Upon receipt of such +a message, the Hyper-V virtual PCI driver in Linux +asynchronously invokes Linux kernel PCI subsystem calls to +shutdown and remove the device. When those calls are +complete, an "Ejection Complete" message is sent back to +Hyper-V over the VMBus channel indicating that the device has +been removed. At this point, Hyper-V sends a VMBus rescind +message to the Linux guest, which the VMBus driver in Linux +processes by removing the VMBus identity for the device. Once +that processing is complete, all vestiges of the device having +been present are gone from the Linux kernel. The rescind +message also indicates to the guest that Hyper-V has stopped +providing support for the vPCI device in the guest. If the +guest were to attempt to access that device's MMIO space, it +would be an invalid reference. Hypercalls affecting the device +return errors, and any further messages sent in the VMBus +channel are ignored. + +After sending the Eject message, Hyper-V allows the guest VM +60 seconds to cleanly shutdown the device and respond with +Ejection Complete before sending the VMBus rescind +message. If for any reason the Eject steps don't complete +within the allowed 60 seconds, the Hyper-V host forcibly +performs the rescind steps, which will likely result in +cascading errors in the guest because the device is now no +longer present from the guest standpoint and accessing the +device MMIO space will fail. + +Because ejection is asynchronous and can happen at any point +during the guest VM lifecycle, proper synchronization in the +Hyper-V virtual PCI driver is very tricky. Ejection has been +observed even before a newly offered vPCI device has been +fully setup. The Hyper-V virtual PCI driver has been updated +several times over the years to fix race conditions when +ejections happen at inopportune times. Care must be taken when +modifying this code to prevent re-introducing such problems. +See comments in the code. + +Interrupt Assignment +-------------------- +The Hyper-V virtual PCI driver supports vPCI devices using +MSI, multi-MSI, or MSI-X. Assigning the guest vCPU that will +receive the interrupt for a particular MSI or MSI-X message is +complex because of the way the Linux setup of IRQs maps onto +the Hyper-V interfaces. For the single-MSI and MSI-X cases, +Linux calls hv_compse_msi_msg() twice, with the first call +containing a dummy vCPU and the second call containing the +real vCPU. Furthermore, hv_irq_unmask() is finally called +(on x86) or the GICD registers are set (on arm64) to specify +the real vCPU again. Each of these three calls interact +with Hyper-V, which must decide which physical CPU should +receive the interrupt before it is forwarded to the guest VM. +Unfortunately, the Hyper-V decision-making process is a bit +limited, and can result in concentrating the physical +interrupts on a single CPU, causing a performance bottleneck. +See details about how this is resolved in the extensive +comment above the function hv_compose_msi_req_get_cpu(). + +The Hyper-V virtual PCI driver implements the +irq_chip.irq_compose_msi_msg function as hv_compose_msi_msg(). +Unfortunately, on Hyper-V the implementation requires sending +a VMBus message to the Hyper-V host and awaiting an interrupt +indicating receipt of a reply message. Since +irq_chip.irq_compose_msi_msg can be called with IRQ locks +held, it doesn't work to do the normal sleep until awakened by +the interrupt. Instead hv_compose_msi_msg() must send the +VMBus message, and then poll for the completion message. As +further complexity, the vPCI device could be ejected/rescinded +while the polling is in progress, so this scenario must be +detected as well. See comments in the code regarding this +very tricky area. + +Most of the code in the Hyper-V virtual PCI driver (pci- +hyperv.c) applies to Hyper-V and Linux guests running on x86 +and on arm64 architectures. But there are differences in how +interrupt assignments are managed. On x86, the Hyper-V +virtual PCI driver in the guest must make a hypercall to tell +Hyper-V which guest vCPU should be interrupted by each +MSI/MSI-X interrupt, and the x86 interrupt vector number that +the x86_vector IRQ domain has picked for the interrupt. This +hypercall is made by hv_arch_irq_unmask(). On arm64, the +Hyper-V virtual PCI driver manages the allocation of an SPI +for each MSI/MSI-X interrupt. The Hyper-V virtual PCI driver +stores the allocated SPI in the architectural GICD registers, +which Hyper-V emulates, so no hypercall is necessary as with +x86. Hyper-V does not support using LPIs for vPCI devices in +arm64 guest VMs because it does not emulate a GICv3 ITS. + +The Hyper-V virtual PCI driver in Linux supports vPCI devices +whose drivers create managed or unmanaged Linux IRQs. If the +smp_affinity for an unmanaged IRQ is updated via the /proc/irq +interface, the Hyper-V virtual PCI driver is called to tell +the Hyper-V host to change the interrupt targeting and +everything works properly. However, on x86 if the x86_vector +IRQ domain needs to reassign an interrupt vector due to +running out of vectors on a CPU, there's no path to inform the +Hyper-V host of the change, and things break. Fortunately, +guest VMs operate in a constrained device environment where +using all the vectors on a CPU doesn't happen. Since such a +problem is only a theoretical concern rather than a practical +concern, it has been left unaddressed. + +DMA +--- +By default, Hyper-V pins all guest VM memory in the host +when the VM is created, and programs the physical IOMMU to +allow the VM to have DMA access to all its memory. Hence +it is safe to assign PCI devices to the VM, and allow the +guest operating system to program the DMA transfers. The +physical IOMMU prevents a malicious guest from initiating +DMA to memory belonging to the host or to other VMs on the +host. From the Linux guest standpoint, such DMA transfers +are in "direct" mode since Hyper-V does not provide a virtual +IOMMU in the guest. + +Hyper-V assumes that physical PCI devices always perform +cache-coherent DMA. When running on x86, this behavior is +required by the architecture. When running on arm64, the +architecture allows for both cache-coherent and +non-cache-coherent devices, with the behavior of each device +specified in the ACPI DSDT. But when a PCI device is assigned +to a guest VM, that device does not appear in the DSDT, so the +Hyper-V VMBus driver propagates cache-coherency information +from the VMBus node in the ACPI DSDT to all VMBus devices, +including vPCI devices (since they have a dual identity as a VMBus +device and as a PCI device). See vmbus_dma_configure(). +Current Hyper-V versions always indicate that the VMBus is +cache coherent, so vPCI devices on arm64 always get marked as +cache coherent and the CPU does not perform any sync +operations as part of dma_map/unmap_*() calls. + +vPCI protocol versions +---------------------- +As previously described, during vPCI device setup and teardown +messages are passed over a VMBus channel between the Hyper-V +host and the Hyper-v vPCI driver in the Linux guest. Some +messages have been revised in newer versions of Hyper-V, so +the guest and host must agree on the vPCI protocol version to +be used. The version is negotiated when communication over +the VMBus channel is first established. See +hv_pci_protocol_negotiation(). Newer versions of the protocol +extend support to VMs with more than 64 vCPUs, and provide +additional information about the vPCI device, such as the +guest virtual NUMA node to which it is most closely affined in +the underlying hardware. + +Guest NUMA node affinity +------------------------ +When the vPCI protocol version provides it, the guest NUMA +node affinity of the vPCI device is stored as part of the Linux +device information for subsequent use by the Linux driver. See +hv_pci_assign_numa_node(). If the negotiated protocol version +does not support the host providing NUMA affinity information, +the Linux guest defaults the device NUMA node to 0. But even +when the negotiated protocol version includes NUMA affinity +information, the ability of the host to provide such +information depends on certain host configuration options. If +the guest receives NUMA node value "0", it could mean NUMA +node 0, or it could mean "no information is available". +Unfortunately it is not possible to distinguish the two cases +from the guest side. + +PCI config space access in a CoCo VM +------------------------------------ +Linux PCI device drivers access PCI config space using a +standard set of functions provided by the Linux PCI subsystem. +In Hyper-V guests these standard functions map to functions +hv_pcifront_read_config() and hv_pcifront_write_config() +in the Hyper-V virtual PCI driver. In normal VMs, +these hv_pcifront_*() functions directly access the PCI config +space, and the accesses trap to Hyper-V to be handled. +But in CoCo VMs, memory encryption prevents Hyper-V +from reading the guest instruction stream to emulate the +access, so the hv_pcifront_*() functions must invoke +hypercalls with explicit arguments describing the access to be +made. + +Config Block back-channel +------------------------- +The Hyper-V host and Hyper-V virtual PCI driver in Linux +together implement a non-standard back-channel communication +path between the host and guest. The back-channel path uses +messages sent over the VMBus channel associated with the vPCI +device. The functions hyperv_read_cfg_blk() and +hyperv_write_cfg_blk() are the primary interfaces provided to +other parts of the Linux kernel. As of this writing, these +interfaces are used only by the Mellanox mlx5 driver to pass +diagnostic data to a Hyper-V host running in the Azure public +cloud. The functions hyperv_read_cfg_blk() and +hyperv_write_cfg_blk() are implemented in a separate module +(pci-hyperv-intf.c, under CONFIG_PCI_HYPERV_INTERFACE) that +effectively stubs them out when running in non-Hyper-V +environments. From 9fef276f9f416a1e85eb48d3bd38e6018a220bf5 Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Mon, 15 Jan 2024 18:20:06 -0800 Subject: [PATCH 142/225] x86/hyperv: Use slow_virt_to_phys() in page transition hypervisor callback In preparation for temporarily marking pages not present during a transition between encrypted and decrypted, use slow_virt_to_phys() in the hypervisor callback. As long as the PFN is correct, slow_virt_to_phys() works even if the leaf PTE is not present. The existing functions that depend on vmalloc_to_page() all require that the leaf PTE be marked present, so they don't work. Update the comments for slow_virt_to_phys() to note this broader usage and the requirement to work even if the PTE is not marked present. Signed-off-by: Michael Kelley Acked-by: Kirill A. Shutemov Reviewed-by: Rick Edgecombe Link: https://lore.kernel.org/r/20240116022008.1023398-2-mhklinux@outlook.com Signed-off-by: Wei Liu Message-ID: <20240116022008.1023398-2-mhklinux@outlook.com> --- arch/x86/hyperv/ivm.c | 12 +++++++++++- arch/x86/mm/pat/set_memory.c | 12 ++++++++---- 2 files changed, 19 insertions(+), 5 deletions(-) diff --git a/arch/x86/hyperv/ivm.c b/arch/x86/hyperv/ivm.c index 7dcbf153ad72..f38cdcb130ed 100644 --- a/arch/x86/hyperv/ivm.c +++ b/arch/x86/hyperv/ivm.c @@ -515,6 +515,8 @@ static bool hv_vtom_set_host_visibility(unsigned long kbuffer, int pagecount, bo enum hv_mem_host_visibility visibility = enc ? VMBUS_PAGE_NOT_VISIBLE : VMBUS_PAGE_VISIBLE_READ_WRITE; u64 *pfn_array; + phys_addr_t paddr; + void *vaddr; int ret = 0; bool result = true; int i, pfn; @@ -524,7 +526,15 @@ static bool hv_vtom_set_host_visibility(unsigned long kbuffer, int pagecount, bo return false; for (i = 0, pfn = 0; i < pagecount; i++) { - pfn_array[pfn] = virt_to_hvpfn((void *)kbuffer + i * HV_HYP_PAGE_SIZE); + /* + * Use slow_virt_to_phys() because the PRESENT bit has been + * temporarily cleared in the PTEs. slow_virt_to_phys() works + * without the PRESENT bit while virt_to_hvpfn() or similar + * does not. + */ + vaddr = (void *)kbuffer + (i * HV_HYP_PAGE_SIZE); + paddr = slow_virt_to_phys(vaddr); + pfn_array[pfn] = paddr >> HV_HYP_PAGE_SHIFT; pfn++; if (pfn == HV_MAX_MODIFY_GPA_REP_COUNT || i == pagecount - 1) { diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c index e9b448d1b1b7..b28b6f8afb0e 100644 --- a/arch/x86/mm/pat/set_memory.c +++ b/arch/x86/mm/pat/set_memory.c @@ -755,10 +755,14 @@ pmd_t *lookup_pmd_address(unsigned long address) * areas on 32-bit NUMA systems. The percpu areas can * end up in this kind of memory, for instance. * - * This could be optimized, but it is only intended to be - * used at initialization time, and keeping it - * unoptimized should increase the testing coverage for - * the more obscure platforms. + * Note that as long as the PTEs are well-formed with correct PFNs, this + * works without checking the PRESENT bit in the leaf PTE. This is unlike + * the similar vmalloc_to_page() and derivatives. Callers may depend on + * this behavior. + * + * This could be optimized, but it is only used in paths that are not perf + * sensitive, and keeping it unoptimized should increase the testing coverage + * for the more obscure platforms. */ phys_addr_t slow_virt_to_phys(void *__virt_addr) { From 030ad7af94371f1faeecfc12dda296d8b5a17ef8 Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Mon, 15 Jan 2024 18:20:07 -0800 Subject: [PATCH 143/225] x86/mm: Regularize set_memory_p() parameters and make non-static set_memory_p() is currently static. It has parameters that don't match set_memory_p() under arch/powerpc and that aren't congruent with the other set_memory_* functions. There's no good reason for the difference. Fix this by making the parameters consistent, and update the one existing call site. Make the function non-static and add it to include/asm/set_memory.h so that it is completely parallel to set_memory_np() and is usable in other modules. No functional change. Signed-off-by: Michael Kelley Reviewed-by: Rick Edgecombe Acked-by: Kirill A. Shutemov Link: https://lore.kernel.org/r/20240116022008.1023398-3-mhklinux@outlook.com Signed-off-by: Wei Liu Message-ID: <20240116022008.1023398-3-mhklinux@outlook.com> --- arch/x86/include/asm/set_memory.h | 1 + arch/x86/mm/pat/set_memory.c | 12 ++++++------ 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/arch/x86/include/asm/set_memory.h b/arch/x86/include/asm/set_memory.h index a5e89641bd2d..9aee31862b4a 100644 --- a/arch/x86/include/asm/set_memory.h +++ b/arch/x86/include/asm/set_memory.h @@ -47,6 +47,7 @@ int set_memory_uc(unsigned long addr, int numpages); int set_memory_wc(unsigned long addr, int numpages); int set_memory_wb(unsigned long addr, int numpages); int set_memory_np(unsigned long addr, int numpages); +int set_memory_p(unsigned long addr, int numpages); int set_memory_4k(unsigned long addr, int numpages); int set_memory_encrypted(unsigned long addr, int numpages); int set_memory_decrypted(unsigned long addr, int numpages); diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c index b28b6f8afb0e..102880404046 100644 --- a/arch/x86/mm/pat/set_memory.c +++ b/arch/x86/mm/pat/set_memory.c @@ -2045,17 +2045,12 @@ int set_mce_nospec(unsigned long pfn) return rc; } -static int set_memory_p(unsigned long *addr, int numpages) -{ - return change_page_attr_set(addr, numpages, __pgprot(_PAGE_PRESENT), 0); -} - /* Restore full speculative operation to the pfn. */ int clear_mce_nospec(unsigned long pfn) { unsigned long addr = (unsigned long) pfn_to_kaddr(pfn); - return set_memory_p(&addr, 1); + return set_memory_p(addr, 1); } EXPORT_SYMBOL_GPL(clear_mce_nospec); #endif /* CONFIG_X86_64 */ @@ -2108,6 +2103,11 @@ int set_memory_np_noalias(unsigned long addr, int numpages) CPA_NO_CHECK_ALIAS, NULL); } +int set_memory_p(unsigned long addr, int numpages) +{ + return change_page_attr_set(&addr, numpages, __pgprot(_PAGE_PRESENT), 0); +} + int set_memory_4k(unsigned long addr, int numpages) { return change_page_attr_set_clr(&addr, numpages, __pgprot(0), From 0f34d11234868dc979730a905717c15067a7d205 Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Mon, 15 Jan 2024 18:20:08 -0800 Subject: [PATCH 144/225] x86/hyperv: Make encrypted/decrypted changes safe for load_unaligned_zeropad() In a CoCo VM, when transitioning memory from encrypted to decrypted, or vice versa, the caller of set_memory_encrypted() or set_memory_decrypted() is responsible for ensuring the memory isn't in use and isn't referenced while the transition is in progress. The transition has multiple steps, and the memory is in an inconsistent state until all steps are complete. A reference while the state is inconsistent could result in an exception that can't be cleanly fixed up. However, the kernel load_unaligned_zeropad() mechanism could cause a stray reference that can't be prevented by the caller of set_memory_encrypted() or set_memory_decrypted(), so there's specific code to handle this case. But a CoCo VM running on Hyper-V may be configured to run with a paravisor, with the #VC or #VE exception routed to the paravisor. There's no architectural way to forward the exceptions back to the guest kernel, and in such a case, the load_unaligned_zeropad() specific code doesn't work. To avoid this problem, mark pages as "not present" while a transition is in progress. If load_unaligned_zeropad() causes a stray reference, a normal page fault is generated instead of #VC or #VE, and the page-fault-based fixup handlers for load_unaligned_zeropad() resolve the reference. When the encrypted/decrypted transition is complete, mark the pages as "present" again. Signed-off-by: Michael Kelley Reviewed-by: Kuppuswamy Sathyanarayanan Link: https://lore.kernel.org/r/20240116022008.1023398-4-mhklinux@outlook.com Signed-off-by: Wei Liu Message-ID: <20240116022008.1023398-4-mhklinux@outlook.com> --- arch/x86/hyperv/ivm.c | 53 +++++++++++++++++++++++++++++++++++++++---- 1 file changed, 49 insertions(+), 4 deletions(-) diff --git a/arch/x86/hyperv/ivm.c b/arch/x86/hyperv/ivm.c index f38cdcb130ed..768d73de0d09 100644 --- a/arch/x86/hyperv/ivm.c +++ b/arch/x86/hyperv/ivm.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -502,6 +503,31 @@ static int hv_mark_gpa_visibility(u16 count, const u64 pfn[], return -EFAULT; } +/* + * When transitioning memory between encrypted and decrypted, the caller + * of set_memory_encrypted() or set_memory_decrypted() is responsible for + * ensuring that the memory isn't in use and isn't referenced while the + * transition is in progress. The transition has multiple steps, and the + * memory is in an inconsistent state until all steps are complete. A + * reference while the state is inconsistent could result in an exception + * that can't be cleanly fixed up. + * + * But the Linux kernel load_unaligned_zeropad() mechanism could cause a + * stray reference that can't be prevented by the caller, so Linux has + * specific code to handle this case. But when the #VC and #VE exceptions + * routed to a paravisor, the specific code doesn't work. To avoid this + * problem, mark the pages as "not present" while the transition is in + * progress. If load_unaligned_zeropad() causes a stray reference, a normal + * page fault is generated instead of #VC or #VE, and the page-fault-based + * handlers for load_unaligned_zeropad() resolve the reference. When the + * transition is complete, hv_vtom_set_host_visibility() marks the pages + * as "present" again. + */ +static bool hv_vtom_clear_present(unsigned long kbuffer, int pagecount, bool enc) +{ + return !set_memory_np(kbuffer, pagecount); +} + /* * hv_vtom_set_host_visibility - Set specified memory visible to host. * @@ -522,8 +548,10 @@ static bool hv_vtom_set_host_visibility(unsigned long kbuffer, int pagecount, bo int i, pfn; pfn_array = kmalloc(HV_HYP_PAGE_SIZE, GFP_KERNEL); - if (!pfn_array) - return false; + if (!pfn_array) { + result = false; + goto err_set_memory_p; + } for (i = 0, pfn = 0; i < pagecount; i++) { /* @@ -548,14 +576,30 @@ static bool hv_vtom_set_host_visibility(unsigned long kbuffer, int pagecount, bo } } - err_free_pfn_array: +err_free_pfn_array: kfree(pfn_array); + +err_set_memory_p: + /* + * Set the PTE PRESENT bits again to revert what hv_vtom_clear_present() + * did. Do this even if there is an error earlier in this function in + * order to avoid leaving the memory range in a "broken" state. Setting + * the PRESENT bits shouldn't fail, but return an error if it does. + */ + if (set_memory_p(kbuffer, pagecount)) + result = false; + return result; } static bool hv_vtom_tlb_flush_required(bool private) { - return true; + /* + * Since hv_vtom_clear_present() marks the PTEs as "not present" + * and flushes the TLB, they can't be in the TLB. That makes the + * flush controlled by this function redundant, so return "false". + */ + return false; } static bool hv_vtom_cache_flush_required(void) @@ -618,6 +662,7 @@ void __init hv_vtom_init(void) x86_platform.hyper.is_private_mmio = hv_is_private_mmio; x86_platform.guest.enc_cache_flush_required = hv_vtom_cache_flush_required; x86_platform.guest.enc_tlb_flush_required = hv_vtom_tlb_flush_required; + x86_platform.guest.enc_status_change_prepare = hv_vtom_clear_present; x86_platform.guest.enc_status_change_finish = hv_vtom_set_host_visibility; /* Set WB as the default cache mode. */ From 7be40883b1cb734a31a3cbfd5f8f64a97965d26f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Niklas=20S=C3=B6derlund?= Date: Fri, 23 Feb 2024 20:55:26 +0100 Subject: [PATCH 145/225] dt-bindings: net: renesas,ethertsn: Document default for delays MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The internal delay properties are not mandatory and should have a documented default value. The device only supports either no delay or a fixed delay and the device reset default is no delay, document the default as no delay. Signed-off-by: Niklas Söderlund Reviewed-by: Geert Uytterhoeven Signed-off-by: David S. Miller --- Documentation/devicetree/bindings/net/renesas,ethertsn.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Documentation/devicetree/bindings/net/renesas,ethertsn.yaml b/Documentation/devicetree/bindings/net/renesas,ethertsn.yaml index 475aff7714d6..ea35d19be829 100644 --- a/Documentation/devicetree/bindings/net/renesas,ethertsn.yaml +++ b/Documentation/devicetree/bindings/net/renesas,ethertsn.yaml @@ -65,9 +65,11 @@ properties: rx-internal-delay-ps: enum: [0, 1800] + default: 0 tx-internal-delay-ps: enum: [0, 2000] + default: 0 '#address-cells': const: 1 From ec5c54a9d3c4f9c15e647b049fea401ee5258696 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Thu, 29 Feb 2024 18:25:49 +0100 Subject: [PATCH 146/225] gpio: fix resource unwinding order in error path Hogs are added *after* ACPI so should be removed *before* in error path. Fixes: a411e81e61df ("gpiolib: add hogs support for machine code") Signed-off-by: Bartosz Golaszewski Reviewed-by: Andy Shevchenko --- drivers/gpio/gpiolib.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index 58a839c1ae48..75be4a3ca7f8 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -968,11 +968,11 @@ int gpiochip_add_data_with_key(struct gpio_chip *gc, void *data, ret = gpiochip_irqchip_init_valid_mask(gc); if (ret) - goto err_remove_acpi_chip; + goto err_free_hogs; ret = gpiochip_irqchip_init_hw(gc); if (ret) - goto err_remove_acpi_chip; + goto err_remove_irqchip_mask; ret = gpiochip_add_irqchip(gc, lock_key, request_key); if (ret) @@ -997,11 +997,11 @@ err_remove_irqchip: gpiochip_irqchip_remove(gc); err_remove_irqchip_mask: gpiochip_irqchip_free_valid_mask(gc); -err_remove_acpi_chip: +err_free_hogs: + gpiochip_free_hogs(gc); acpi_gpiochip_remove(gc); gpiochip_remove_pin_ranges(gc); err_remove_of_chip: - gpiochip_free_hogs(gc); of_gpiochip_remove(gc); err_free_gpiochip_mask: gpiochip_free_valid_mask(gc); From 0d63e4c0ebc2b5c329babde44fd61d3f08db814d Mon Sep 17 00:00:00 2001 From: Saurabh Sengar Date: Mon, 15 Jan 2024 09:57:40 -0800 Subject: [PATCH 147/225] x86/hyperv: Allow 15-bit APIC IDs for VTL platforms The current method for signaling the compatibility of a Hyper-V host with MSIs featuring 15-bit APIC IDs relies on a synthetic cpuid leaf. However, for higher VTLs, this leaf is not reported, due to the absence of an IO-APIC. As an alternative, assume that when running at a high VTL, the host supports 15-bit APIC IDs. This assumption is safe, as Hyper-V does not employ any architectural MSIs at higher VTLs This unblocks startup of VTL2 environments with more than 256 CPUs. Signed-off-by: Saurabh Sengar Reviewed-by: Michael Kelley Link: https://lore.kernel.org/r/1705341460-18394-1-git-send-email-ssengar@linux.microsoft.com Signed-off-by: Wei Liu Message-ID: <1705341460-18394-1-git-send-email-ssengar@linux.microsoft.com> --- arch/x86/hyperv/hv_vtl.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/x86/hyperv/hv_vtl.c b/arch/x86/hyperv/hv_vtl.c index 96e6c51515f5..cf1b78cb2d04 100644 --- a/arch/x86/hyperv/hv_vtl.c +++ b/arch/x86/hyperv/hv_vtl.c @@ -16,6 +16,11 @@ extern struct boot_params boot_params; static struct real_mode_header hv_vtl_real_mode_header; +static bool __init hv_vtl_msi_ext_dest_id(void) +{ + return true; +} + void __init hv_vtl_init_platform(void) { pr_info("Linux runs in Hyper-V Virtual Trust Level\n"); @@ -38,6 +43,8 @@ void __init hv_vtl_init_platform(void) x86_platform.legacy.warm_reset = 0; x86_platform.legacy.reserve_bios_regions = 0; x86_platform.legacy.devices.pnpbios = 0; + + x86_init.hyper.msi_ext_dest_id = hv_vtl_msi_ext_dest_id; } static inline u64 hv_vtl_system_desc_base(struct ldttss_desc *desc) From 1eecc7ab82c42133b748e1895275942a054a7f67 Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Wed, 28 Feb 2024 13:45:17 +0100 Subject: [PATCH 148/225] net: lan78xx: fix runtime PM count underflow on link stop Current driver has some asymmetry in the runtime PM calls. On lan78xx_open() it will call usb_autopm_get() and unconditionally usb_autopm_put(). And on lan78xx_stop() it will call only usb_autopm_put(). So far, it was working only because this driver do not activate autosuspend by default, so it was visible only by warning "Runtime PM usage count underflow!". Since, with current driver, we can't use runtime PM with active link, execute lan78xx_open()->usb_autopm_put() only in error case. Otherwise, keep ref counting high as long as interface is open. Fixes: 55d7de9de6c3 ("Microchip's LAN7800 family USB 2/3 to 10/100/1000 Ethernet device driver") Signed-off-by: Oleksij Rempel Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/usb/lan78xx.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index ba6c8ac2a736..d2aa2c5b1989 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -3135,7 +3135,8 @@ static int lan78xx_open(struct net_device *net) done: mutex_unlock(&dev->dev_mutex); - usb_autopm_put_interface(dev->intf); + if (ret < 0) + usb_autopm_put_interface(dev->intf); return ret; } From eb2c11b27c58a62b5027b77f702c15cd0ca38f7d Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 28 Feb 2024 17:06:56 +0100 Subject: [PATCH 149/225] net: bql: fix building with BQL disabled It is now possible to disable BQL, but that causes the cpsw driver to break: drivers/net/ethernet/ti/am65-cpsw-nuss.c:297:28: error: no member named 'dql' in 'struct netdev_queue' 297 | dql_avail(&netif_txq->dql), There is already a helper function in net/sch_generic.h that could be used to help here. Move its implementation into the common linux/netdevice.h along with the other bql interfaces and change both users over to the new interface. Fixes: ea7f3cfaa588 ("net: bql: allow the config to be disabled") Signed-off-by: Arnd Bergmann Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/am65-cpsw-nuss.c | 2 +- include/linux/netdevice.h | 10 ++++++++++ include/net/sch_generic.h | 7 +------ 3 files changed, 12 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c index 9d2f4ac783e4..2939a21ca74f 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c +++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c @@ -294,7 +294,7 @@ static void am65_cpsw_nuss_ndo_host_tx_timeout(struct net_device *ndev, txqueue, netif_tx_queue_stopped(netif_txq), jiffies_to_msecs(jiffies - trans_start), - dql_avail(&netif_txq->dql), + netdev_queue_dql_avail(netif_txq), k3_cppi_desc_pool_avail(tx_chn->desc_pool)); if (netif_tx_queue_stopped(netif_txq)) { diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index a9c973b92294..735a9386fcf8 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3499,6 +3499,16 @@ static inline void netdev_queue_set_dql_min_limit(struct netdev_queue *dev_queue #endif } +static inline int netdev_queue_dql_avail(const struct netdev_queue *txq) +{ +#ifdef CONFIG_BQL + /* Non-BQL migrated drivers will return 0, too. */ + return dql_avail(&txq->dql); +#else + return 0; +#endif +} + /** * netdev_txq_bql_enqueue_prefetchw - prefetch bql data for write * @dev_queue: pointer to transmit queue diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 934fdb977551..cefe0c4bdae3 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -238,12 +238,7 @@ static inline bool qdisc_may_bulk(const struct Qdisc *qdisc) static inline int qdisc_avail_bulklimit(const struct netdev_queue *txq) { -#ifdef CONFIG_BQL - /* Non-BQL migrated drivers will return 0, too. */ - return dql_avail(&txq->dql); -#else - return 0; -#endif + return netdev_queue_dql_avail(txq); } struct Qdisc_class_ops { From 1c61728be22c1cb49c1be88693e72d8c06b1c81e Mon Sep 17 00:00:00 2001 From: Masahisa Kojima Date: Fri, 1 Mar 2024 15:32:14 +0900 Subject: [PATCH 150/225] MAINTAINERS: net: netsec: add myself as co-maintainer Add myself as co-maintainer for Socionext netsec driver. This commit also removes Jassi from maintainer since he no longer has a Developerbox. Cc: Jassi Brar Cc: Ilias Apalodimas Signed-off-by: Masahisa Kojima Acked-by: Ilias Apalodimas Signed-off-by: David S. Miller --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 5144282a2578..132b2917b9f6 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -20226,8 +20226,8 @@ F: Documentation/devicetree/bindings/net/socionext,uniphier-ave4.yaml F: drivers/net/ethernet/socionext/sni_ave.c SOCIONEXT (SNI) NETSEC NETWORK DRIVER -M: Jassi Brar M: Ilias Apalodimas +M: Masahisa Kojima L: netdev@vger.kernel.org S: Maintained F: Documentation/devicetree/bindings/net/socionext,synquacer-netsec.yaml From c0afb6b88fbbc177fa322a835f874be217bffe45 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 28 Feb 2024 17:13:16 +0800 Subject: [PATCH 151/225] crypto: rk3288 - Fix use after free in unprepare The unprepare call must be carried out before the finalize call as the latter can free the request. Fixes: c66c17a0f69b ("crypto: rk3288 - Remove prepare/unprepare request") Reported-by: Andrey Skvortsov Cc: Signed-off-by: Herbert Xu Reviewed-by: Andrey Skvortsov Signed-off-by: Herbert Xu --- drivers/crypto/rockchip/rk3288_crypto_ahash.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/rockchip/rk3288_crypto_ahash.c b/drivers/crypto/rockchip/rk3288_crypto_ahash.c index 1b13b4aa16ec..a235e6c300f1 100644 --- a/drivers/crypto/rockchip/rk3288_crypto_ahash.c +++ b/drivers/crypto/rockchip/rk3288_crypto_ahash.c @@ -332,12 +332,12 @@ static int rk_hash_run(struct crypto_engine *engine, void *breq) theend: pm_runtime_put_autosuspend(rkc->dev); + rk_hash_unprepare(engine, breq); + local_bh_disable(); crypto_finalize_hash_request(engine, breq, err); local_bh_enable(); - rk_hash_unprepare(engine, breq); - return 0; } From 380cb2f4df78433f64847cbc655fad2650e4769c Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Fri, 1 Mar 2024 21:10:35 +1100 Subject: [PATCH 152/225] selftests/powerpc: Fix fpu_signal failures My recent commit e5d00aaac651 ("selftests/powerpc: Check all FPRs in fpu_preempt") inadvertently broke the fpu_signal test. It needs to take into account that fpu_preempt now loads 32 FPRs, so enlarge darray. Also use the newly added randomise_darray() to properly randomise darray. Finally the checking done in signal_fpu_sig() needs to skip checking f30/f31, because they are used as scratch registers in check_all_fprs(), called by preempt_fpu(), and so could hold other values when the signal is taken. Fixes: e5d00aaac651 ("selftests/powerpc: Check all FPRs in fpu_preempt") Reported-by: Spoorthy Depends-on: 2ba107f6795d ("selftests/powerpc: Generate better bit patterns for FPU tests") Signed-off-by: Michael Ellerman Link: https://msgid.link/20240301101035.1230024-1-mpe@ellerman.id.au --- .../testing/selftests/powerpc/math/fpu_signal.c | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/tools/testing/selftests/powerpc/math/fpu_signal.c b/tools/testing/selftests/powerpc/math/fpu_signal.c index 7b1addd50420..8a64f63e37ce 100644 --- a/tools/testing/selftests/powerpc/math/fpu_signal.c +++ b/tools/testing/selftests/powerpc/math/fpu_signal.c @@ -18,6 +18,7 @@ #include #include "utils.h" +#include "fpu.h" /* Number of times each thread should receive the signal */ #define ITERATIONS 10 @@ -27,9 +28,7 @@ */ #define THREAD_FACTOR 8 -__thread double darray[] = {0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, - 1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8, 1.9, 2.0, - 2.1}; +__thread double darray[32]; bool bad_context; int threads_starting; @@ -43,9 +42,9 @@ void signal_fpu_sig(int sig, siginfo_t *info, void *context) ucontext_t *uc = context; mcontext_t *mc = &uc->uc_mcontext; - /* Only the non volatiles were loaded up */ - for (i = 14; i < 32; i++) { - if (mc->fp_regs[i] != darray[i - 14]) { + // Don't check f30/f31, they're used as scratches in check_all_fprs() + for (i = 0; i < 30; i++) { + if (mc->fp_regs[i] != darray[i]) { bad_context = true; break; } @@ -54,7 +53,6 @@ void signal_fpu_sig(int sig, siginfo_t *info, void *context) void *signal_fpu_c(void *p) { - int i; long rc; struct sigaction act; act.sa_sigaction = signal_fpu_sig; @@ -64,9 +62,7 @@ void *signal_fpu_c(void *p) return p; srand(pthread_self()); - for (i = 0; i < 21; i++) - darray[i] = rand(); - + randomise_darray(darray, ARRAY_SIZE(darray)); rc = preempt_fpu(darray, &threads_starting, &running); return (void *) rc; From 6384c56c99d98c84afea5b9ec7029a6e153ae431 Mon Sep 17 00:00:00 2001 From: Zhangfei Gao Date: Tue, 27 Feb 2024 06:48:21 +0000 Subject: [PATCH 153/225] iommu/sva: Fix SVA handle sharing in multi device case iommu_sva_bind_device will directly goto out in multi-device case when found existing domain, ignoring list_add handle, which causes the handle to fail to be shared. Fixes: 65d4418c5002 ("iommu/sva: Restore SVA handle sharing") Signed-off-by: Zhangfei Gao Reviewed-by: Jason Gunthorpe Reviewed-by: Lu Baolu Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20240227064821.128-1-zhangfei.gao@linaro.org Signed-off-by: Joerg Roedel --- drivers/iommu/iommu-sva.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/iommu-sva.c b/drivers/iommu/iommu-sva.c index 7f91c8d0064b..65814cbc8402 100644 --- a/drivers/iommu/iommu-sva.c +++ b/drivers/iommu/iommu-sva.c @@ -117,11 +117,11 @@ struct iommu_sva *iommu_sva_bind_device(struct device *dev, struct mm_struct *mm if (ret) goto out_free_domain; domain->users = 1; - refcount_set(&handle->users, 1); list_add(&domain->next, &mm->iommu_mm->sva_domains); - list_add(&handle->handle_item, &mm->iommu_mm->sva_handles); out: + refcount_set(&handle->users, 1); + list_add(&handle->handle_item, &mm->iommu_mm->sva_handles); mutex_unlock(&iommu_sva_lock); handle->dev = dev; handle->domain = domain; From db8138845cebcdd0c709570b8217bd052757b8df Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 23 Feb 2024 16:21:18 +0100 Subject: [PATCH 154/225] arm64: dts: qcom: sc8280xp-crd: limit pcie4 link speed Limit the WiFi PCIe link speed to Gen2 speed (500 MB/s), which is the speed that Windows uses. Signed-off-by: Johan Hovold Reviewed-by: Konrad Dybcio Reviewed-by: Manivannan Sadhasivam Link: https://lore.kernel.org/r/20240223152124.20042-7-johan+linaro@kernel.org Signed-off-by: Bjorn Andersson --- arch/arm64/boot/dts/qcom/sc8280xp-crd.dts | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/boot/dts/qcom/sc8280xp-crd.dts b/arch/arm64/boot/dts/qcom/sc8280xp-crd.dts index ffc4406422ae..41215567b3ae 100644 --- a/arch/arm64/boot/dts/qcom/sc8280xp-crd.dts +++ b/arch/arm64/boot/dts/qcom/sc8280xp-crd.dts @@ -563,6 +563,8 @@ }; &pcie4 { + max-link-speed = <2>; + perst-gpios = <&tlmm 141 GPIO_ACTIVE_LOW>; wake-gpios = <&tlmm 139 GPIO_ACTIVE_LOW>; From 7a1c6a8bf47b0b290c79b9cc3ba6ee68be5522e8 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 23 Feb 2024 16:21:19 +0100 Subject: [PATCH 155/225] arm64: dts: qcom: sc8280xp-x13s: limit pcie4 link speed Limit the WiFi PCIe link speed to Gen2 speed (500 MB/s), which is the speed that the boot firmware has brought up the link at (and that Windows uses). This is specifically needed to avoid a large amount of link errors when restarting the link during boot (but which are currently not reported). This also appears to fix intermittent failures to download the ath11k firmware during boot which can be seen when there is a longer delay between restarting the link and loading the WiFi driver (e.g. when using full disk encryption). Fixes: 123b30a75623 ("arm64: dts: qcom: sc8280xp-x13s: enable WiFi controller") Cc: stable@vger.kernel.org # 6.2 Signed-off-by: Johan Hovold Reviewed-by: Konrad Dybcio Reviewed-by: Manivannan Sadhasivam Link: https://lore.kernel.org/r/20240223152124.20042-8-johan+linaro@kernel.org Signed-off-by: Bjorn Andersson --- arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts b/arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts index def3976bd5bb..eb657e544961 100644 --- a/arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts +++ b/arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts @@ -722,6 +722,8 @@ }; &pcie4 { + max-link-speed = <2>; + perst-gpios = <&tlmm 141 GPIO_ACTIVE_LOW>; wake-gpios = <&tlmm 139 GPIO_ACTIVE_LOW>; From cbf996f52c4e658b3fb4349a869a62fd2d4c3c1c Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Tue, 20 Feb 2024 22:45:51 +0100 Subject: [PATCH 156/225] ixgbe: {dis, en}able irqs in ixgbe_txrx_ring_{dis, en}able Currently routines that are supposed to toggle state of ring pair do not take care of associated interrupt with queue vector that these rings belong to. This causes funky issues such as dead interface due to irq misconfiguration, as per Pavel's report from Closes: tag. Add a function responsible for disabling single IRQ in EIMC register and call this as a very first thing when disabling ring pair during xsk_pool setup. For enable let's reuse ixgbe_irq_enable_queues(). Besides this, disable/enable NAPI as first/last thing when dealing with closing or opening ring pair that xsk_pool is being configured on. Reported-by: Pavel Vazharov Closes: https://lore.kernel.org/netdev/CAJEV1ijxNyPTwASJER1bcZzS9nMoZJqfR86nu_3jFFVXzZQ4NA@mail.gmail.com/ Fixes: 024aa5800f32 ("ixgbe: added Rx/Tx ring disable/enable functions") Signed-off-by: Maciej Fijalkowski Acked-by: Magnus Karlsson Tested-by: Chandan Kumar Rout (A Contingent Worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 56 ++++++++++++++++--- 1 file changed, 49 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index bd541527c8c7..99876b765b08 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -2939,8 +2939,8 @@ static void ixgbe_check_lsc(struct ixgbe_adapter *adapter) static inline void ixgbe_irq_enable_queues(struct ixgbe_adapter *adapter, u64 qmask) { - u32 mask; struct ixgbe_hw *hw = &adapter->hw; + u32 mask; switch (hw->mac.type) { case ixgbe_mac_82598EB: @@ -10524,6 +10524,44 @@ static void ixgbe_reset_rxr_stats(struct ixgbe_ring *rx_ring) memset(&rx_ring->rx_stats, 0, sizeof(rx_ring->rx_stats)); } +/** + * ixgbe_irq_disable_single - Disable single IRQ vector + * @adapter: adapter structure + * @ring: ring index + **/ +static void ixgbe_irq_disable_single(struct ixgbe_adapter *adapter, u32 ring) +{ + struct ixgbe_hw *hw = &adapter->hw; + u64 qmask = BIT_ULL(ring); + u32 mask; + + switch (adapter->hw.mac.type) { + case ixgbe_mac_82598EB: + mask = qmask & IXGBE_EIMC_RTX_QUEUE; + IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC, mask); + break; + case ixgbe_mac_82599EB: + case ixgbe_mac_X540: + case ixgbe_mac_X550: + case ixgbe_mac_X550EM_x: + case ixgbe_mac_x550em_a: + mask = (qmask & 0xFFFFFFFF); + if (mask) + IXGBE_WRITE_REG(hw, IXGBE_EIMS_EX(0), mask); + mask = (qmask >> 32); + if (mask) + IXGBE_WRITE_REG(hw, IXGBE_EIMS_EX(1), mask); + break; + default: + break; + } + IXGBE_WRITE_FLUSH(&adapter->hw); + if (adapter->flags & IXGBE_FLAG_MSIX_ENABLED) + synchronize_irq(adapter->msix_entries[ring].vector); + else + synchronize_irq(adapter->pdev->irq); +} + /** * ixgbe_txrx_ring_disable - Disable Rx/Tx/XDP Tx rings * @adapter: adapter structure @@ -10540,6 +10578,11 @@ void ixgbe_txrx_ring_disable(struct ixgbe_adapter *adapter, int ring) tx_ring = adapter->tx_ring[ring]; xdp_ring = adapter->xdp_ring[ring]; + ixgbe_irq_disable_single(adapter, ring); + + /* Rx/Tx/XDP Tx share the same napi context. */ + napi_disable(&rx_ring->q_vector->napi); + ixgbe_disable_txr(adapter, tx_ring); if (xdp_ring) ixgbe_disable_txr(adapter, xdp_ring); @@ -10548,9 +10591,6 @@ void ixgbe_txrx_ring_disable(struct ixgbe_adapter *adapter, int ring) if (xdp_ring) synchronize_rcu(); - /* Rx/Tx/XDP Tx share the same napi context. */ - napi_disable(&rx_ring->q_vector->napi); - ixgbe_clean_tx_ring(tx_ring); if (xdp_ring) ixgbe_clean_tx_ring(xdp_ring); @@ -10578,9 +10618,6 @@ void ixgbe_txrx_ring_enable(struct ixgbe_adapter *adapter, int ring) tx_ring = adapter->tx_ring[ring]; xdp_ring = adapter->xdp_ring[ring]; - /* Rx/Tx/XDP Tx share the same napi context. */ - napi_enable(&rx_ring->q_vector->napi); - ixgbe_configure_tx_ring(adapter, tx_ring); if (xdp_ring) ixgbe_configure_tx_ring(adapter, xdp_ring); @@ -10589,6 +10626,11 @@ void ixgbe_txrx_ring_enable(struct ixgbe_adapter *adapter, int ring) clear_bit(__IXGBE_TX_DISABLED, &tx_ring->state); if (xdp_ring) clear_bit(__IXGBE_TX_DISABLED, &xdp_ring->state); + + /* Rx/Tx/XDP Tx share the same napi context. */ + napi_enable(&rx_ring->q_vector->napi); + ixgbe_irq_enable_queues(adapter, BIT_ULL(ring)); + IXGBE_WRITE_FLUSH(&adapter->hw); } /** From d562b11c1eac7d73f4c778b4cbe5468f86b1f20d Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Tue, 20 Feb 2024 22:45:52 +0100 Subject: [PATCH 157/225] i40e: disable NAPI right after disabling irqs when handling xsk_pool Disable NAPI before shutting down queues that this particular NAPI contains so that the order of actions in i40e_queue_pair_disable() mirrors what we do in i40e_queue_pair_enable(). Fixes: 123cecd427b6 ("i40e: added queue pair disable/enable functions") Signed-off-by: Maciej Fijalkowski Tested-by: Chandan Kumar Rout (A Contingent Worker at Intel) Acked-by: Magnus Karlsson Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/i40e/i40e_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 54eb55464e31..89a3401d20ab 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -13560,9 +13560,9 @@ int i40e_queue_pair_disable(struct i40e_vsi *vsi, int queue_pair) return err; i40e_queue_pair_disable_irq(vsi, queue_pair); + i40e_queue_pair_toggle_napi(vsi, queue_pair, false /* off */); err = i40e_queue_pair_toggle_rings(vsi, queue_pair, false /* off */); i40e_clean_rx_ring(vsi->rx_rings[queue_pair]); - i40e_queue_pair_toggle_napi(vsi, queue_pair, false /* off */); i40e_queue_pair_clean_rings(vsi, queue_pair); i40e_queue_pair_reset_stats(vsi, queue_pair); From 99099c6bc75a30b76bb5d6774a0509ab6f06af05 Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Tue, 20 Feb 2024 22:45:53 +0100 Subject: [PATCH 158/225] ice: reorder disabling IRQ and NAPI in ice_qp_dis ice_qp_dis() currently does things in very mixed way. Tx is stopped before disabling IRQ on related queue vector, then it takes care of disabling Rx and finally NAPI is disabled. Let us start with disabling IRQs in the first place followed by turning off NAPI. Then it is safe to handle queues. One subtle change on top of that is that even though ice_qp_ena() looks more sane, clear ICE_CFG_BUSY as the last thing there. Fixes: 2d4238f55697 ("ice: Add support for AF_XDP") Signed-off-by: Maciej Fijalkowski Tested-by: Chandan Kumar Rout (A Contingent Worker at Intel) Acked-by: Magnus Karlsson Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_xsk.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c index 8b81a1677045..2eecd0f39aa6 100644 --- a/drivers/net/ethernet/intel/ice/ice_xsk.c +++ b/drivers/net/ethernet/intel/ice/ice_xsk.c @@ -179,6 +179,10 @@ static int ice_qp_dis(struct ice_vsi *vsi, u16 q_idx) return -EBUSY; usleep_range(1000, 2000); } + + ice_qvec_dis_irq(vsi, rx_ring, q_vector); + ice_qvec_toggle_napi(vsi, q_vector, false); + netif_tx_stop_queue(netdev_get_tx_queue(vsi->netdev, q_idx)); ice_fill_txq_meta(vsi, tx_ring, &txq_meta); @@ -195,13 +199,10 @@ static int ice_qp_dis(struct ice_vsi *vsi, u16 q_idx) if (err) return err; } - ice_qvec_dis_irq(vsi, rx_ring, q_vector); - err = ice_vsi_ctrl_one_rx_ring(vsi, false, q_idx, true); if (err) return err; - ice_qvec_toggle_napi(vsi, q_vector, false); ice_qp_clean_rings(vsi, q_idx); ice_qp_reset_stats(vsi, q_idx); @@ -259,11 +260,11 @@ static int ice_qp_ena(struct ice_vsi *vsi, u16 q_idx) if (err) return err; - clear_bit(ICE_CFG_BUSY, vsi->state); ice_qvec_toggle_napi(vsi, q_vector, true); ice_qvec_ena_irq(vsi, q_vector); netif_tx_start_queue(netdev_get_tx_queue(vsi->netdev, q_idx)); + clear_bit(ICE_CFG_BUSY, vsi->state); return 0; } From 4035c72dc1ba81a96f94de84dfd5409056c1d9c9 Mon Sep 17 00:00:00 2001 From: Michal Swiatkowski Date: Fri, 23 Feb 2024 07:40:24 +0100 Subject: [PATCH 159/225] ice: reconfig host after changing MSI-X on VF During VSI reconfiguration filters and VSI config which is set in ice_vf_init_host_cfg() are lost. Recall the host configuration function to restore them. Without this config VF on which MSI-X amount was changed might had a connection problems. Fixes: 4d38cb44bd32 ("ice: manage VFs MSI-X using resource tracking") Reviewed-by: Jacob Keller Signed-off-by: Michal Swiatkowski Reviewed-by: Simon Horman Tested-by: Rafal Romanowski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_sriov.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_sriov.c b/drivers/net/ethernet/intel/ice/ice_sriov.c index a94a1c48c3de..b0f78c2f2790 100644 --- a/drivers/net/ethernet/intel/ice/ice_sriov.c +++ b/drivers/net/ethernet/intel/ice/ice_sriov.c @@ -1068,6 +1068,7 @@ int ice_sriov_set_msix_vec_count(struct pci_dev *vf_dev, int msix_vec_count) struct ice_pf *pf = pci_get_drvdata(pdev); u16 prev_msix, prev_queues, queues; bool needs_rebuild = false; + struct ice_vsi *vsi; struct ice_vf *vf; int id; @@ -1102,6 +1103,10 @@ int ice_sriov_set_msix_vec_count(struct pci_dev *vf_dev, int msix_vec_count) if (!vf) return -ENOENT; + vsi = ice_get_vf_vsi(vf); + if (!vsi) + return -ENOENT; + prev_msix = vf->num_msix; prev_queues = vf->num_vf_qs; @@ -1122,7 +1127,7 @@ int ice_sriov_set_msix_vec_count(struct pci_dev *vf_dev, int msix_vec_count) if (vf->first_vector_idx < 0) goto unroll; - if (ice_vf_reconfig_vsi(vf)) { + if (ice_vf_reconfig_vsi(vf) || ice_vf_init_host_cfg(vf, vsi)) { /* Try to rebuild with previous values */ needs_rebuild = true; goto unroll; @@ -1148,8 +1153,10 @@ unroll: if (vf->first_vector_idx < 0) return -EINVAL; - if (needs_rebuild) + if (needs_rebuild) { ice_vf_reconfig_vsi(vf); + ice_vf_init_host_cfg(vf, vsi); + } ice_ena_vf_mappings(vf); ice_put_vf(vf); From 7fd817c906503b6813ea3b41f5fdf4192449a707 Mon Sep 17 00:00:00 2001 From: Jiri Bohac Date: Wed, 31 Jan 2024 01:04:28 +0100 Subject: [PATCH 160/225] x86/e820: Don't reserve SETUP_RNG_SEED in e820 SETUP_RNG_SEED in setup_data is supplied by kexec and should not be reserved in the e820 map. Doing so reserves 16 bytes of RAM when booting with kexec. (16 bytes because data->len is zeroed by parse_setup_data so only sizeof(setup_data) is reserved.) When kexec is used repeatedly, each boot adds two entries in the kexec-provided e820 map as the 16-byte range splits a larger range of usable memory. Eventually all of the 128 available entries get used up. The next split will result in losing usable memory as the new entries cannot be added to the e820 map. Fixes: 68b8e9713c8e ("x86/setup: Use rng seeds from setup_data") Signed-off-by: Jiri Bohac Signed-off-by: Borislav Petkov (AMD) Signed-off-by: Dave Hansen Cc: Link: https://lore.kernel.org/r/ZbmOjKnARGiaYBd5@dwarf.suse.cz --- arch/x86/kernel/e820.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index fb8cf953380d..b66f540de054 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -1017,10 +1017,12 @@ void __init e820__reserve_setup_data(void) e820__range_update(pa_data, sizeof(*data)+data->len, E820_TYPE_RAM, E820_TYPE_RESERVED_KERN); /* - * SETUP_EFI and SETUP_IMA are supplied by kexec and do not need - * to be reserved. + * SETUP_EFI, SETUP_IMA and SETUP_RNG_SEED are supplied by + * kexec and do not need to be reserved. */ - if (data->type != SETUP_EFI && data->type != SETUP_IMA) + if (data->type != SETUP_EFI && + data->type != SETUP_IMA && + data->type != SETUP_RNG_SEED) e820__range_update_kexec(pa_data, sizeof(*data) + data->len, E820_TYPE_RAM, E820_TYPE_RESERVED_KERN); From 2f03fc340cac9ea1dc63cbf8c93dd2eb0f227815 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Fri, 1 Mar 2024 22:04:06 +0900 Subject: [PATCH 161/225] tomoyo: fix UAF write bug in tomoyo_write_control() Since tomoyo_write_control() updates head->write_buf when write() of long lines is requested, we need to fetch head->write_buf after head->io_sem is held. Otherwise, concurrent write() requests can cause use-after-free-write and double-free problems. Reported-by: Sam Sun Closes: https://lkml.kernel.org/r/CAEkJfYNDspuGxYx5kym8Lvp--D36CMDUErg4rxfWFJuPbbji8g@mail.gmail.com Fixes: bd03a3e4c9a9 ("TOMOYO: Add policy namespace support.") Cc: # Linux 3.1+ Signed-off-by: Tetsuo Handa Signed-off-by: Linus Torvalds --- security/tomoyo/common.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/security/tomoyo/common.c b/security/tomoyo/common.c index 57ee70ae50f2..ea3140d510ec 100644 --- a/security/tomoyo/common.c +++ b/security/tomoyo/common.c @@ -2649,13 +2649,14 @@ ssize_t tomoyo_write_control(struct tomoyo_io_buffer *head, { int error = buffer_len; size_t avail_len = buffer_len; - char *cp0 = head->write_buf; + char *cp0; int idx; if (!head->write) return -EINVAL; if (mutex_lock_interruptible(&head->io_sem)) return -EINTR; + cp0 = head->write_buf; head->read_user_buf_avail = 0; idx = tomoyo_read_lock(); /* Read a line and dispatch it to the policy handler. */ From 7cb50f6c9fbaa1c0b80100b8971bf13db5d75d06 Mon Sep 17 00:00:00 2001 From: Saravana Kannan Date: Fri, 23 Feb 2024 21:24:35 -0800 Subject: [PATCH 162/225] of: property: fw_devlink: Fix stupid bug in remote-endpoint parsing Introduced a stupid bug in commit 782bfd03c3ae ("of: property: Improve finding the supplier of a remote-endpoint property") due to a last minute incorrect edit of "index !=0" into "!index". This patch fixes it to be "index > 0" to match the comment right next to it. Reported-by: Luca Ceresoli Link: https://lore.kernel.org/lkml/20240223171849.10f9901d@booty/ Fixes: 782bfd03c3ae ("of: property: Improve finding the supplier of a remote-endpoint property") Signed-off-by: Saravana Kannan Reviewed-by: Herve Codina Reviewed-by: Luca Ceresoli Tested-by: Luca Ceresoli Link: https://lore.kernel.org/r/20240224052436.3552333-1-saravanak@google.com Signed-off-by: Rob Herring --- drivers/of/property.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/of/property.c b/drivers/of/property.c index b71267c6667c..fa8cd33be131 100644 --- a/drivers/of/property.c +++ b/drivers/of/property.c @@ -1304,7 +1304,7 @@ static struct device_node *parse_remote_endpoint(struct device_node *np, int index) { /* Return NULL for index > 0 to signify end of remote-endpoints. */ - if (!index || strcmp(prop_name, "remote-endpoint")) + if (index > 0 || strcmp(prop_name, "remote-endpoint")) return NULL; return of_graph_get_remote_port_parent(np); From 8deeefb24786ea7950b37bde4516b286c877db00 Mon Sep 17 00:00:00 2001 From: Gavin Li Date: Thu, 19 Oct 2023 04:49:54 +0300 Subject: [PATCH 163/225] Revert "net/mlx5: Block entering switchdev mode with ns inconsistency" This reverts commit 662404b24a4c4d839839ed25e3097571f5938b9b. The revert is required due to the suspicion it is not good for anything and cause crash. Fixes: 662404b24a4c ("net/mlx5e: Block entering switchdev mode with ns inconsistency") Signed-off-by: Gavin Li Reviewed-by: Jiri Pirko Signed-off-by: Saeed Mahameed --- .../mellanox/mlx5/core/eswitch_offloads.c | 23 ------------------- 1 file changed, 23 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index b0455134c98e..14b3bd3c5e2f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -3658,22 +3658,6 @@ static int esw_inline_mode_to_devlink(u8 mlx5_mode, u8 *mode) return 0; } -static bool esw_offloads_devlink_ns_eq_netdev_ns(struct devlink *devlink) -{ - struct mlx5_core_dev *dev = devlink_priv(devlink); - struct net *devl_net, *netdev_net; - bool ret = false; - - mutex_lock(&dev->mlx5e_res.uplink_netdev_lock); - if (dev->mlx5e_res.uplink_netdev) { - netdev_net = dev_net(dev->mlx5e_res.uplink_netdev); - devl_net = devlink_net(devlink); - ret = net_eq(devl_net, netdev_net); - } - mutex_unlock(&dev->mlx5e_res.uplink_netdev_lock); - return ret; -} - int mlx5_eswitch_block_mode(struct mlx5_core_dev *dev) { struct mlx5_eswitch *esw = dev->priv.eswitch; @@ -3718,13 +3702,6 @@ int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode, if (esw_mode_from_devlink(mode, &mlx5_mode)) return -EINVAL; - if (mode == DEVLINK_ESWITCH_MODE_SWITCHDEV && - !esw_offloads_devlink_ns_eq_netdev_ns(devlink)) { - NL_SET_ERR_MSG_MOD(extack, - "Can't change E-Switch mode to switchdev when netdev net namespace has diverged from the devlink's."); - return -EPERM; - } - mlx5_lag_disable_change(esw->dev); err = mlx5_esw_try_lock(esw); if (err < 0) { From b7bbd698c90591546d22093181e266785f08c18b Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Wed, 13 Dec 2023 17:07:08 -0800 Subject: [PATCH 164/225] Revert "net/mlx5e: Check the number of elements before walk TC rhashtable" This reverts commit 4e25b661f484df54b6751b65f9ea2434a3b67539. This Commit was mistakenly applied by pulling the wrong tag, remove it. Fixes: 4e25b661f484 ("net/mlx5e: Check the number of elements before walk TC rhashtable") Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.c index 190f10aba170..5a0047bdcb51 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.c @@ -152,7 +152,7 @@ void mlx5_esw_ipsec_restore_dest_uplink(struct mlx5_core_dev *mdev) xa_for_each(&esw->offloads.vport_reps, i, rep) { rpriv = rep->rep_data[REP_ETH].priv; - if (!rpriv || !rpriv->netdev || !atomic_read(&rpriv->tc_ht.nelems)) + if (!rpriv || !rpriv->netdev) continue; rhashtable_walk_enter(&rpriv->tc_ht, &iter); From 85ea2c5c5ef5f24fe6e6e7028ddd90be1cb5d27e Mon Sep 17 00:00:00 2001 From: Jianbo Liu Date: Thu, 11 Jan 2024 01:27:47 +0000 Subject: [PATCH 165/225] net/mlx5: E-switch, Change flow rule destination checking The checking in the cited commit is not accurate. In the common case, VF destination is internal, and uplink destination is external. However, uplink destination with packet reformat is considered as internal because firmware uses LB+hairpin to support it. Update the checking so header rewrite rules with both internal and external destinations are not allowed. Fixes: e0e22d59b47a ("net/mlx5: E-switch, Add checking for flow rule destinations") Signed-off-by: Jianbo Liu Reviewed-by: Rahul Rameshbabu Signed-off-by: Saeed Mahameed --- .../mellanox/mlx5/core/eswitch_offloads.c | 23 +++++++++++-------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 14b3bd3c5e2f..baaae628b0a0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -535,21 +535,26 @@ esw_src_port_rewrite_supported(struct mlx5_eswitch *esw) } static bool -esw_dests_to_vf_pf_vports(struct mlx5_flow_destination *dests, int max_dest) +esw_dests_to_int_external(struct mlx5_flow_destination *dests, int max_dest) { - bool vf_dest = false, pf_dest = false; + bool internal_dest = false, external_dest = false; int i; for (i = 0; i < max_dest; i++) { - if (dests[i].type != MLX5_FLOW_DESTINATION_TYPE_VPORT) + if (dests[i].type != MLX5_FLOW_DESTINATION_TYPE_VPORT && + dests[i].type != MLX5_FLOW_DESTINATION_TYPE_UPLINK) continue; - if (dests[i].vport.num == MLX5_VPORT_UPLINK) - pf_dest = true; + /* Uplink dest is external, but considered as internal + * if there is reformat because firmware uses LB+hairpin to support it. + */ + if (dests[i].vport.num == MLX5_VPORT_UPLINK && + !(dests[i].vport.flags & MLX5_FLOW_DEST_VPORT_REFORMAT_ID)) + external_dest = true; else - vf_dest = true; + internal_dest = true; - if (vf_dest && pf_dest) + if (internal_dest && external_dest) return true; } @@ -695,9 +700,9 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, /* Header rewrite with combined wire+loopback in FDB is not allowed */ if ((flow_act.action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) && - esw_dests_to_vf_pf_vports(dest, i)) { + esw_dests_to_int_external(dest, i)) { esw_warn(esw->dev, - "FDB: Header rewrite with forwarding to both PF and VF is not allowed\n"); + "FDB: Header rewrite with forwarding to both internal and external dests is not allowed\n"); rule = ERR_PTR(-EINVAL); goto err_esw_get; } From ac8082a3c7a158640a2c493ec437dd9da881a6a7 Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Tue, 16 Jan 2024 20:13:34 +0200 Subject: [PATCH 166/225] net/mlx5: Fix fw reporter diagnose output Restore fw reporter diagnose to print the syndrome even if it is zero. Following the cited commit, in this case (syndrome == 0) command returns no output at all. This fix restores command output in case syndrome is cleared: $ devlink health diagnose pci/0000:82:00.0 reporter fw Syndrome: 0 Fixes: d17f98bf7cc9 ("net/mlx5: devlink health: use retained error fmsg API") Signed-off-by: Aya Levin Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/health.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c index 8ff6dc9bc803..b5c709bba155 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/health.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c @@ -452,10 +452,10 @@ mlx5_fw_reporter_diagnose(struct devlink_health_reporter *reporter, struct health_buffer __iomem *h = health->health; u8 synd = ioread8(&h->synd); + devlink_fmsg_u8_pair_put(fmsg, "Syndrome", synd); if (!synd) return 0; - devlink_fmsg_u8_pair_put(fmsg, "Syndrome", synd); devlink_fmsg_string_pair_put(fmsg, "Description", hsynd_str(synd)); return 0; From 5e6107b499f3fc4748109e1d87fd9603b34f1e0d Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Sun, 28 Jan 2024 20:43:58 +0200 Subject: [PATCH 167/225] net/mlx5: Check capability for fw_reset Functions which can't access MFRL (Management Firmware Reset Level) register, have no use of fw_reset structures or events. Remove fw_reset structures allocation and registration for fw reset events notifications for these functions. Having the devlink param enable_remote_dev_reset on functions that don't have this capability is misleading as these functions are not allowed to influence the reset flow. Hence, this patch removes this parameter for such functions. In addition, return not supported on devlink reload action fw_activate for these functions. Fixes: 38b9f903f22b ("net/mlx5: Handle sync reset request event") Signed-off-by: Moshe Shemesh Reviewed-by: Aya Levin Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/devlink.c | 6 +++++ .../ethernet/mellanox/mlx5/core/fw_reset.c | 22 +++++++++++++++++-- include/linux/mlx5/mlx5_ifc.h | 4 +++- 3 files changed, 29 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c index 3e064234f6fe..98d4306929f3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c @@ -157,6 +157,12 @@ static int mlx5_devlink_reload_down(struct devlink *devlink, bool netns_change, return -EOPNOTSUPP; } + if (action == DEVLINK_RELOAD_ACTION_FW_ACTIVATE && + !dev->priv.fw_reset) { + NL_SET_ERR_MSG_MOD(extack, "FW activate is unsupported for this function"); + return -EOPNOTSUPP; + } + if (mlx5_core_is_pf(dev) && pci_num_vf(pdev)) NL_SET_ERR_MSG_MOD(extack, "reload while VFs are present is unfavorable"); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c index f27eab6e4929..2911aa34a5be 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c @@ -703,19 +703,30 @@ void mlx5_fw_reset_events_start(struct mlx5_core_dev *dev) { struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset; + if (!fw_reset) + return; + MLX5_NB_INIT(&fw_reset->nb, fw_reset_event_notifier, GENERAL_EVENT); mlx5_eq_notifier_register(dev, &fw_reset->nb); } void mlx5_fw_reset_events_stop(struct mlx5_core_dev *dev) { - mlx5_eq_notifier_unregister(dev, &dev->priv.fw_reset->nb); + struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset; + + if (!fw_reset) + return; + + mlx5_eq_notifier_unregister(dev, &fw_reset->nb); } void mlx5_drain_fw_reset(struct mlx5_core_dev *dev) { struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset; + if (!fw_reset) + return; + set_bit(MLX5_FW_RESET_FLAGS_DROP_NEW_REQUESTS, &fw_reset->reset_flags); cancel_work_sync(&fw_reset->fw_live_patch_work); cancel_work_sync(&fw_reset->reset_request_work); @@ -733,9 +744,13 @@ static const struct devlink_param mlx5_fw_reset_devlink_params[] = { int mlx5_fw_reset_init(struct mlx5_core_dev *dev) { - struct mlx5_fw_reset *fw_reset = kzalloc(sizeof(*fw_reset), GFP_KERNEL); + struct mlx5_fw_reset *fw_reset; int err; + if (!MLX5_CAP_MCAM_REG(dev, mfrl)) + return 0; + + fw_reset = kzalloc(sizeof(*fw_reset), GFP_KERNEL); if (!fw_reset) return -ENOMEM; fw_reset->wq = create_singlethread_workqueue("mlx5_fw_reset_events"); @@ -771,6 +786,9 @@ void mlx5_fw_reset_cleanup(struct mlx5_core_dev *dev) { struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset; + if (!fw_reset) + return; + devl_params_unregister(priv_to_devlink(dev), mlx5_fw_reset_devlink_params, ARRAY_SIZE(mlx5_fw_reset_devlink_params)); diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 3fd6310b6da6..486b7492050c 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -10261,7 +10261,9 @@ struct mlx5_ifc_mcam_access_reg_bits { u8 regs_63_to_46[0x12]; u8 mrtc[0x1]; - u8 regs_44_to_32[0xd]; + u8 regs_44_to_41[0x4]; + u8 mfrl[0x1]; + u8 regs_39_to_32[0x8]; u8 regs_31_to_10[0x16]; u8 mtmp[0x1]; From dd238b702064b21d25b4fc39a19699319746d655 Mon Sep 17 00:00:00 2001 From: Jianbo Liu Date: Mon, 25 Dec 2023 01:47:05 +0000 Subject: [PATCH 168/225] net/mlx5e: Change the warning when ignore_flow_level is not supported Downgrade the print from mlx5_core_warn() to mlx5_core_dbg(), as it is just a statement of fact that firmware doesn't support ignore flow level. And change the wording to "firmware flow level support is missing", to make it more accurate. Fixes: ae2ee3be99a8 ("net/mlx5: CT: Remove warning of ignore_flow_level support for VFs") Signed-off-by: Jianbo Liu Suggested-by: Elliott, Robert (Servers) Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c index 86bf007fd05b..b500cc2c9689 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c @@ -37,7 +37,7 @@ mlx5e_tc_post_act_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains, if (!MLX5_CAP_FLOWTABLE_TYPE(priv->mdev, ignore_flow_level, table_type)) { if (priv->mdev->coredev_type == MLX5_COREDEV_PF) - mlx5_core_warn(priv->mdev, "firmware level support is missing\n"); + mlx5_core_dbg(priv->mdev, "firmware flow level support is missing\n"); err = -EOPNOTSUPP; goto err_check; } From a71f2147b64941efee156bfda54fd6461d0f95df Mon Sep 17 00:00:00 2001 From: Emeel Hakim Date: Mon, 13 Mar 2023 17:03:03 +0200 Subject: [PATCH 169/225] net/mlx5e: Fix MACsec state loss upon state update in offload path The packet number attribute of the SA is incremented by the device rather than the software stack when enabling hardware offload. Because the packet number attribute is managed by the hardware, the software has no insight into the value of the packet number attribute actually written by the device. Previously when MACsec offload was enabled, the hardware object for handling the offload was destroyed when the SA was disabled. Re-enabling the SA would lead to a new hardware object being instantiated. This new hardware object would not have any recollection of the correct packet number for the SA. Instead, destroy the flow steering rule when deactivating the SA and recreate it upon reactivation, preserving the original hardware object. Fixes: 8ff0ac5be144 ("net/mlx5: Add MACsec offload Tx command support") Signed-off-by: Emeel Hakim Signed-off-by: Rahul Rameshbabu Reviewed-by: Gal Pressman Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- .../mellanox/mlx5/core/en_accel/macsec.c | 82 ++++++++++++------- 1 file changed, 51 insertions(+), 31 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c index d4ebd8743114..b2cabd6ab86c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c @@ -310,9 +310,9 @@ static void mlx5e_macsec_destroy_object(struct mlx5_core_dev *mdev, u32 macsec_o mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); } -static void mlx5e_macsec_cleanup_sa(struct mlx5e_macsec *macsec, - struct mlx5e_macsec_sa *sa, - bool is_tx, struct net_device *netdev, u32 fs_id) +static void mlx5e_macsec_cleanup_sa_fs(struct mlx5e_macsec *macsec, + struct mlx5e_macsec_sa *sa, bool is_tx, + struct net_device *netdev, u32 fs_id) { int action = (is_tx) ? MLX5_ACCEL_MACSEC_ACTION_ENCRYPT : MLX5_ACCEL_MACSEC_ACTION_DECRYPT; @@ -322,20 +322,49 @@ static void mlx5e_macsec_cleanup_sa(struct mlx5e_macsec *macsec, mlx5_macsec_fs_del_rule(macsec->mdev->macsec_fs, sa->macsec_rule, action, netdev, fs_id); - mlx5e_macsec_destroy_object(macsec->mdev, sa->macsec_obj_id); sa->macsec_rule = NULL; } +static void mlx5e_macsec_cleanup_sa(struct mlx5e_macsec *macsec, + struct mlx5e_macsec_sa *sa, bool is_tx, + struct net_device *netdev, u32 fs_id) +{ + mlx5e_macsec_cleanup_sa_fs(macsec, sa, is_tx, netdev, fs_id); + mlx5e_macsec_destroy_object(macsec->mdev, sa->macsec_obj_id); +} + +static int mlx5e_macsec_init_sa_fs(struct macsec_context *ctx, + struct mlx5e_macsec_sa *sa, bool encrypt, + bool is_tx, u32 *fs_id) +{ + struct mlx5e_priv *priv = macsec_netdev_priv(ctx->netdev); + struct mlx5_macsec_fs *macsec_fs = priv->mdev->macsec_fs; + struct mlx5_macsec_rule_attrs rule_attrs; + union mlx5_macsec_rule *macsec_rule; + + rule_attrs.macsec_obj_id = sa->macsec_obj_id; + rule_attrs.sci = sa->sci; + rule_attrs.assoc_num = sa->assoc_num; + rule_attrs.action = (is_tx) ? MLX5_ACCEL_MACSEC_ACTION_ENCRYPT : + MLX5_ACCEL_MACSEC_ACTION_DECRYPT; + + macsec_rule = mlx5_macsec_fs_add_rule(macsec_fs, ctx, &rule_attrs, fs_id); + if (!macsec_rule) + return -ENOMEM; + + sa->macsec_rule = macsec_rule; + + return 0; +} + static int mlx5e_macsec_init_sa(struct macsec_context *ctx, struct mlx5e_macsec_sa *sa, bool encrypt, bool is_tx, u32 *fs_id) { struct mlx5e_priv *priv = macsec_netdev_priv(ctx->netdev); struct mlx5e_macsec *macsec = priv->macsec; - struct mlx5_macsec_rule_attrs rule_attrs; struct mlx5_core_dev *mdev = priv->mdev; struct mlx5_macsec_obj_attrs obj_attrs; - union mlx5_macsec_rule *macsec_rule; int err; obj_attrs.next_pn = sa->next_pn; @@ -357,20 +386,12 @@ static int mlx5e_macsec_init_sa(struct macsec_context *ctx, if (err) return err; - rule_attrs.macsec_obj_id = sa->macsec_obj_id; - rule_attrs.sci = sa->sci; - rule_attrs.assoc_num = sa->assoc_num; - rule_attrs.action = (is_tx) ? MLX5_ACCEL_MACSEC_ACTION_ENCRYPT : - MLX5_ACCEL_MACSEC_ACTION_DECRYPT; - - macsec_rule = mlx5_macsec_fs_add_rule(mdev->macsec_fs, ctx, &rule_attrs, fs_id); - if (!macsec_rule) { - err = -ENOMEM; - goto destroy_macsec_object; + if (sa->active) { + err = mlx5e_macsec_init_sa_fs(ctx, sa, encrypt, is_tx, fs_id); + if (err) + goto destroy_macsec_object; } - sa->macsec_rule = macsec_rule; - return 0; destroy_macsec_object: @@ -526,9 +547,7 @@ static int mlx5e_macsec_add_txsa(struct macsec_context *ctx) goto destroy_sa; macsec_device->tx_sa[assoc_num] = tx_sa; - if (!secy->operational || - assoc_num != tx_sc->encoding_sa || - !tx_sa->active) + if (!secy->operational) goto out; err = mlx5e_macsec_init_sa(ctx, tx_sa, tx_sc->encrypt, true, NULL); @@ -595,7 +614,7 @@ static int mlx5e_macsec_upd_txsa(struct macsec_context *ctx) goto out; if (ctx_tx_sa->active) { - err = mlx5e_macsec_init_sa(ctx, tx_sa, tx_sc->encrypt, true, NULL); + err = mlx5e_macsec_init_sa_fs(ctx, tx_sa, tx_sc->encrypt, true, NULL); if (err) goto out; } else { @@ -604,7 +623,7 @@ static int mlx5e_macsec_upd_txsa(struct macsec_context *ctx) goto out; } - mlx5e_macsec_cleanup_sa(macsec, tx_sa, true, ctx->secy->netdev, 0); + mlx5e_macsec_cleanup_sa_fs(macsec, tx_sa, true, ctx->secy->netdev, 0); } out: mutex_unlock(&macsec->lock); @@ -1030,8 +1049,9 @@ static int mlx5e_macsec_del_rxsa(struct macsec_context *ctx) goto out; } - mlx5e_macsec_cleanup_sa(macsec, rx_sa, false, ctx->secy->netdev, - rx_sc->sc_xarray_element->fs_id); + if (rx_sa->active) + mlx5e_macsec_cleanup_sa(macsec, rx_sa, false, ctx->secy->netdev, + rx_sc->sc_xarray_element->fs_id); mlx5_destroy_encryption_key(macsec->mdev, rx_sa->enc_key_id); kfree(rx_sa); rx_sc->rx_sa[assoc_num] = NULL; @@ -1112,8 +1132,8 @@ static int macsec_upd_secy_hw_address(struct macsec_context *ctx, if (!rx_sa || !rx_sa->macsec_rule) continue; - mlx5e_macsec_cleanup_sa(macsec, rx_sa, false, ctx->secy->netdev, - rx_sc->sc_xarray_element->fs_id); + mlx5e_macsec_cleanup_sa_fs(macsec, rx_sa, false, ctx->secy->netdev, + rx_sc->sc_xarray_element->fs_id); } } @@ -1124,8 +1144,8 @@ static int macsec_upd_secy_hw_address(struct macsec_context *ctx, continue; if (rx_sa->active) { - err = mlx5e_macsec_init_sa(ctx, rx_sa, true, false, - &rx_sc->sc_xarray_element->fs_id); + err = mlx5e_macsec_init_sa_fs(ctx, rx_sa, true, false, + &rx_sc->sc_xarray_element->fs_id); if (err) goto out; } @@ -1178,7 +1198,7 @@ static int mlx5e_macsec_upd_secy(struct macsec_context *ctx) if (!tx_sa) continue; - mlx5e_macsec_cleanup_sa(macsec, tx_sa, true, ctx->secy->netdev, 0); + mlx5e_macsec_cleanup_sa_fs(macsec, tx_sa, true, ctx->secy->netdev, 0); } for (i = 0; i < MACSEC_NUM_AN; ++i) { @@ -1187,7 +1207,7 @@ static int mlx5e_macsec_upd_secy(struct macsec_context *ctx) continue; if (tx_sa->assoc_num == tx_sc->encoding_sa && tx_sa->active) { - err = mlx5e_macsec_init_sa(ctx, tx_sa, tx_sc->encrypt, true, NULL); + err = mlx5e_macsec_init_sa_fs(ctx, tx_sa, tx_sc->encrypt, true, NULL); if (err) goto out; } From b7cf07586c40f926063d4d09f7de28ff82f62b2a Mon Sep 17 00:00:00 2001 From: Rahul Rameshbabu Date: Mon, 5 Feb 2024 13:12:28 -0800 Subject: [PATCH 170/225] net/mlx5e: Use a memory barrier to enforce PTP WQ xmit submission tracking occurs after populating the metadata_map Just simply reordering the functions mlx5e_ptp_metadata_map_put and mlx5e_ptpsq_track_metadata in the mlx5e_txwqe_complete context is not good enough since both the compiler and CPU are free to reorder these two functions. If reordering does occur, the issue that was supposedly fixed by 7e3f3ba97e6c ("net/mlx5e: Track xmit submission to PTP WQ after populating metadata map") will be seen. This will lead to NULL pointer dereferences in mlx5e_ptpsq_mark_ts_cqes_undelivered in the NAPI polling context due to the tracking list being populated before the metadata map. Fixes: 7e3f3ba97e6c ("net/mlx5e: Track xmit submission to PTP WQ after populating metadata map") Signed-off-by: Rahul Rameshbabu Signed-off-by: Saeed Mahameed CC: Vadim Fedorenko --- drivers/net/ethernet/mellanox/mlx5/core/en_tx.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index 5c166d9d2dca..2fa076b23fbe 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -401,6 +401,8 @@ mlx5e_txwqe_complete(struct mlx5e_txqsq *sq, struct sk_buff *skb, mlx5e_skb_cb_hwtstamp_init(skb); mlx5e_ptp_metadata_map_put(&sq->ptpsq->metadata_map, skb, metadata_index); + /* ensure skb is put on metadata_map before tracking the index */ + wmb(); mlx5e_ptpsq_track_metadata(sq->ptpsq, metadata_index); if (!netif_tx_queue_stopped(sq->txq) && mlx5e_ptpsq_metadata_freelist_empty(sq->ptpsq)) { From 90502d433c0e7e5483745a574cb719dd5d05b10c Mon Sep 17 00:00:00 2001 From: Rahul Rameshbabu Date: Thu, 8 Feb 2024 15:09:34 -0800 Subject: [PATCH 171/225] net/mlx5e: Switch to using _bh variant of of spinlock API in port timestamping NAPI poll context The NAPI poll context is a softirq context. Do not use normal spinlock API in this context to prevent concurrency issues. Fixes: 3178308ad4ca ("net/mlx5e: Make tx_port_ts logic resilient to out-of-order CQEs") Signed-off-by: Rahul Rameshbabu Signed-off-by: Saeed Mahameed CC: Vadim Fedorenko --- drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c index 078f56a3cbb2..ca05b3252a1b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c @@ -42,9 +42,9 @@ mlx5e_ptp_port_ts_cqe_list_add(struct mlx5e_ptp_port_ts_cqe_list *list, u8 metad WARN_ON_ONCE(tracker->inuse); tracker->inuse = true; - spin_lock(&list->tracker_list_lock); + spin_lock_bh(&list->tracker_list_lock); list_add_tail(&tracker->entry, &list->tracker_list_head); - spin_unlock(&list->tracker_list_lock); + spin_unlock_bh(&list->tracker_list_lock); } static void @@ -54,9 +54,9 @@ mlx5e_ptp_port_ts_cqe_list_remove(struct mlx5e_ptp_port_ts_cqe_list *list, u8 me WARN_ON_ONCE(!tracker->inuse); tracker->inuse = false; - spin_lock(&list->tracker_list_lock); + spin_lock_bh(&list->tracker_list_lock); list_del(&tracker->entry); - spin_unlock(&list->tracker_list_lock); + spin_unlock_bh(&list->tracker_list_lock); } void mlx5e_ptpsq_track_metadata(struct mlx5e_ptpsq *ptpsq, u8 metadata) @@ -155,7 +155,7 @@ static void mlx5e_ptpsq_mark_ts_cqes_undelivered(struct mlx5e_ptpsq *ptpsq, struct mlx5e_ptp_metadata_map *metadata_map = &ptpsq->metadata_map; struct mlx5e_ptp_port_ts_cqe_tracker *pos, *n; - spin_lock(&cqe_list->tracker_list_lock); + spin_lock_bh(&cqe_list->tracker_list_lock); list_for_each_entry_safe(pos, n, &cqe_list->tracker_list_head, entry) { struct sk_buff *skb = mlx5e_ptp_metadata_map_lookup(metadata_map, pos->metadata_id); @@ -170,7 +170,7 @@ static void mlx5e_ptpsq_mark_ts_cqes_undelivered(struct mlx5e_ptpsq *ptpsq, pos->inuse = false; list_del(&pos->entry); } - spin_unlock(&cqe_list->tracker_list_lock); + spin_unlock_bh(&cqe_list->tracker_list_lock); } #define PTP_WQE_CTR2IDX(val) ((val) & ptpsq->ts_cqe_ctr_mask) From 7838b4656110d950afdd92a081cc0f33e23e0ea8 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Sun, 25 Feb 2024 11:01:41 +0800 Subject: [PATCH 172/225] block: define bvec_iter as __packed __aligned(4) In commit 19416123ab3e ("block: define 'struct bvec_iter' as packed"), what we need is to save the 4byte padding, and avoid `bio` to spread on one extra cache line. It is enough to define it as '__packed __aligned(4)', as '__packed' alone means byte aligned, and can cause compiler to generate horrible code on architectures that don't support unaligned access in case that bvec_iter is embedded in other structures. Cc: Mikulas Patocka Suggested-by: Linus Torvalds Fixes: 19416123ab3e ("block: define 'struct bvec_iter' as packed") Signed-off-by: Ming Lei Signed-off-by: Linus Torvalds --- include/linux/bvec.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/bvec.h b/include/linux/bvec.h index 555aae5448ae..bd1e361b351c 100644 --- a/include/linux/bvec.h +++ b/include/linux/bvec.h @@ -83,7 +83,7 @@ struct bvec_iter { unsigned int bi_bvec_done; /* number of bytes completed in current bvec */ -} __packed; +} __packed __aligned(4); struct bvec_iter_all { struct bio_vec bv; From aa707b615ce1551c25c5a3500cca2cf620e36b12 Mon Sep 17 00:00:00 2001 From: "Ricardo B. Marliere" Date: Sun, 4 Feb 2024 13:38:02 -0300 Subject: [PATCH 173/225] Drivers: hv: vmbus: make hv_bus const Now that the driver core can properly handle constant struct bus_type, move the hv_bus variable to be a constant structure as well, placing it into read-only memory which can not be modified at runtime. Cc: Greg Kroah-Hartman Suggested-by: Greg Kroah-Hartman Signed-off-by: Ricardo B. Marliere Reviewed-by: Greg Kroah-Hartman Reviewed-by: Michael Kelley Link: https://lore.kernel.org/r/20240204-bus_cleanup-hv-v1-1-521bd4140673@marliere.net Signed-off-by: Wei Liu Message-ID: <20240204-bus_cleanup-hv-v1-1-521bd4140673@marliere.net> --- drivers/hv/vmbus_drv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index b33d5abd9beb..7f7965f3d187 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -988,7 +988,7 @@ static const struct dev_pm_ops vmbus_pm = { }; /* The one and only one */ -static struct bus_type hv_bus = { +static const struct bus_type hv_bus = { .name = "vmbus", .match = vmbus_match, .shutdown = vmbus_shutdown, From 90d35da658da8cff0d4ecbb5113f5fac9d00eb72 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 3 Mar 2024 13:02:52 -0800 Subject: [PATCH 174/225] Linux 6.8-rc7 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 6cdb5717bfe0..0e36eff14608 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 8 SUBLEVEL = 0 -EXTRAVERSION = -rc6 +EXTRAVERSION = -rc7 NAME = Hurr durr I'ma ninja sloth # *DOCUMENTATION* From 4f423c4cbe26d79d8974936eb01e0d6574c5d2ac Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Wed, 21 Feb 2024 01:07:21 +0200 Subject: [PATCH 175/225] Revert "arm64: dts: qcom: msm8996: Hook up MPM" Commit 09896da07315 ("arm64: dts: qcom: msm8996: Hook up MPM") has hooked up the MPM irq chip on the MSM8996 platform. However this causes my Dragonboard 820c crash during bootup (usually when probing IOMMUs). Revert the offending commit for now. Quick debug shows that making tlmm's wakeup-parent point to the MPM is enough to trigger the crash. Fixes: 09896da07315 ("arm64: dts: qcom: msm8996: Hook up MPM") Signed-off-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20240221-msm8996-revert-mpm-v1-1-cdca9e30c9b4@linaro.org Signed-off-by: Bjorn Andersson --- arch/arm64/boot/dts/qcom/msm8996.dtsi | 39 +++++---------------------- 1 file changed, 6 insertions(+), 33 deletions(-) diff --git a/arch/arm64/boot/dts/qcom/msm8996.dtsi b/arch/arm64/boot/dts/qcom/msm8996.dtsi index 8d41ed261adf..ee6f87c828ae 100644 --- a/arch/arm64/boot/dts/qcom/msm8996.dtsi +++ b/arch/arm64/boot/dts/qcom/msm8996.dtsi @@ -457,25 +457,6 @@ }; }; - mpm: interrupt-controller { - compatible = "qcom,mpm"; - qcom,rpm-msg-ram = <&apss_mpm>; - interrupts = ; - mboxes = <&apcs_glb 1>; - interrupt-controller; - #interrupt-cells = <2>; - #power-domain-cells = <0>; - interrupt-parent = <&intc>; - qcom,mpm-pin-count = <96>; - qcom,mpm-pin-map = <2 184>, /* TSENS1 upper_lower_int */ - <52 243>, /* DWC3_PRI ss_phy_irq */ - <79 347>, /* DWC3_PRI hs_phy_irq */ - <80 352>, /* DWC3_SEC hs_phy_irq */ - <81 347>, /* QUSB2_PHY_PRI DP+DM */ - <82 352>, /* QUSB2_PHY_SEC DP+DM */ - <87 326>; /* SPMI */ - }; - psci { compatible = "arm,psci-1.0"; method = "smc"; @@ -765,15 +746,8 @@ }; rpm_msg_ram: sram@68000 { - compatible = "qcom,rpm-msg-ram", "mmio-sram"; + compatible = "qcom,rpm-msg-ram"; reg = <0x00068000 0x6000>; - #address-cells = <1>; - #size-cells = <1>; - ranges = <0 0x00068000 0x7000>; - - apss_mpm: sram@1b8 { - reg = <0x1b8 0x48>; - }; }; qfprom@74000 { @@ -856,8 +830,8 @@ reg = <0x004ad000 0x1000>, /* TM */ <0x004ac000 0x1000>; /* SROT */ #qcom,sensors = <8>; - interrupts-extended = <&mpm 2 IRQ_TYPE_LEVEL_HIGH>, - <&intc GIC_SPI 430 IRQ_TYPE_LEVEL_HIGH>; + interrupts = , + ; interrupt-names = "uplow", "critical"; #thermal-sensor-cells = <1>; }; @@ -1363,7 +1337,6 @@ interrupts = ; gpio-controller; gpio-ranges = <&tlmm 0 0 150>; - wakeup-parent = <&mpm>; #gpio-cells = <2>; interrupt-controller; #interrupt-cells = <2>; @@ -1891,7 +1864,7 @@ <0x0400a000 0x002100>; reg-names = "core", "chnls", "obsrvr", "intr", "cnfg"; interrupt-names = "periph_irq"; - interrupts-extended = <&mpm 87 IRQ_TYPE_LEVEL_HIGH>; + interrupts = ; qcom,ee = <0>; qcom,channel = <0>; #address-cells = <2>; @@ -3052,8 +3025,8 @@ #size-cells = <1>; ranges; - interrupts-extended = <&mpm 79 IRQ_TYPE_LEVEL_HIGH>, - <&mpm 52 IRQ_TYPE_LEVEL_HIGH>; + interrupts = , + ; interrupt-names = "hs_phy_irq", "ss_phy_irq"; clocks = <&gcc GCC_SYS_NOC_USB3_AXI_CLK>, From 95915ba4b987cf2b222b0f251280228a1ff977ac Mon Sep 17 00:00:00 2001 From: Sumit Garg Date: Fri, 1 Mar 2024 20:07:31 +0530 Subject: [PATCH 176/225] tee: optee: Fix kernel panic caused by incorrect error handling The error path while failing to register devices on the TEE bus has a bug leading to kernel panic as follows: [ 15.398930] Unable to handle kernel paging request at virtual address ffff07ed00626d7c [ 15.406913] Mem abort info: [ 15.409722] ESR = 0x0000000096000005 [ 15.413490] EC = 0x25: DABT (current EL), IL = 32 bits [ 15.418814] SET = 0, FnV = 0 [ 15.421878] EA = 0, S1PTW = 0 [ 15.425031] FSC = 0x05: level 1 translation fault [ 15.429922] Data abort info: [ 15.432813] ISV = 0, ISS = 0x00000005, ISS2 = 0x00000000 [ 15.438310] CM = 0, WnR = 0, TnD = 0, TagAccess = 0 [ 15.443372] GCS = 0, Overlay = 0, DirtyBit = 0, Xs = 0 [ 15.448697] swapper pgtable: 4k pages, 48-bit VAs, pgdp=00000000d9e3e000 [ 15.455413] [ffff07ed00626d7c] pgd=1800000bffdf9003, p4d=1800000bffdf9003, pud=0000000000000000 [ 15.464146] Internal error: Oops: 0000000096000005 [#1] PREEMPT SMP Commit 7269cba53d90 ("tee: optee: Fix supplicant based device enumeration") lead to the introduction of this bug. So fix it appropriately. Reported-by: Mikko Rapeli Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218542 Fixes: 7269cba53d90 ("tee: optee: Fix supplicant based device enumeration") Cc: stable@vger.kernel.org Signed-off-by: Sumit Garg Signed-off-by: Jens Wiklander --- drivers/tee/optee/device.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/tee/optee/device.c b/drivers/tee/optee/device.c index 4b1092127694..1892e49a8e6a 100644 --- a/drivers/tee/optee/device.c +++ b/drivers/tee/optee/device.c @@ -90,13 +90,14 @@ static int optee_register_device(const uuid_t *device_uuid, u32 func) if (rc) { pr_err("device registration failed, err: %d\n", rc); put_device(&optee_device->dev); + return rc; } if (func == PTA_CMD_GET_DEVICES_SUPP) device_create_file(&optee_device->dev, &dev_attr_need_supplicant); - return rc; + return 0; } static int __optee_enumerate_devices(u32 func) From 51270d573a8d9dd5afdc7934de97d66c0e14b5fd Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Thu, 29 Feb 2024 14:34:44 -0500 Subject: [PATCH 177/225] tracing/net_sched: Fix tracepoints that save qdisc_dev() as a string MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I'm updating __assign_str() and will be removing the second parameter. To make sure that it does not break anything, I make sure that it matches the __string() field, as that is where the string is actually going to be saved in. To make sure there's nothing that breaks, I added a WARN_ON() to make sure that what was used in __string() is the same that is used in __assign_str(). In doing this change, an error was triggered as __assign_str() now expects the string passed in to be a char * value. I instead had the following warning: include/trace/events/qdisc.h: In function ‘trace_event_raw_event_qdisc_reset’: include/trace/events/qdisc.h:91:35: error: passing argument 1 of 'strcmp' from incompatible pointer type [-Werror=incompatible-pointer-types] 91 | __assign_str(dev, qdisc_dev(q)); That's because the qdisc_enqueue() and qdisc_reset() pass in qdisc_dev(q) to __assign_str() and to __string(). But that function returns a pointer to struct net_device and not a string. It appears that these events are just saving the pointer as a string and then reading it as a string as well. Use qdisc_dev(q)->name to save the device instead. Fixes: a34dac0b90552 ("net_sched: add tracepoints for qdisc_reset() and qdisc_destroy()") Signed-off-by: Steven Rostedt (Google) Reviewed-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/trace/events/qdisc.h | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/include/trace/events/qdisc.h b/include/trace/events/qdisc.h index a3995925cb05..1f4258308b96 100644 --- a/include/trace/events/qdisc.h +++ b/include/trace/events/qdisc.h @@ -81,14 +81,14 @@ TRACE_EVENT(qdisc_reset, TP_ARGS(q), TP_STRUCT__entry( - __string( dev, qdisc_dev(q) ) - __string( kind, q->ops->id ) - __field( u32, parent ) - __field( u32, handle ) + __string( dev, qdisc_dev(q)->name ) + __string( kind, q->ops->id ) + __field( u32, parent ) + __field( u32, handle ) ), TP_fast_assign( - __assign_str(dev, qdisc_dev(q)); + __assign_str(dev, qdisc_dev(q)->name); __assign_str(kind, q->ops->id); __entry->parent = q->parent; __entry->handle = q->handle; @@ -106,14 +106,14 @@ TRACE_EVENT(qdisc_destroy, TP_ARGS(q), TP_STRUCT__entry( - __string( dev, qdisc_dev(q) ) - __string( kind, q->ops->id ) - __field( u32, parent ) - __field( u32, handle ) + __string( dev, qdisc_dev(q)->name ) + __string( kind, q->ops->id ) + __field( u32, parent ) + __field( u32, handle ) ), TP_fast_assign( - __assign_str(dev, qdisc_dev(q)); + __assign_str(dev, qdisc_dev(q)->name); __assign_str(kind, q->ops->id); __entry->parent = q->parent; __entry->handle = q->handle; From 1ca1ba465e55b9460e4e75dec9fff31e708fec74 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 29 Feb 2024 13:11:52 +0000 Subject: [PATCH 178/225] geneve: make sure to pull inner header in geneve_rx() syzbot triggered a bug in geneve_rx() [1] Issue is similar to the one I fixed in commit 8d975c15c0cd ("ip6_tunnel: make sure to pull inner header in __ip6_tnl_rcv()") We have to save skb->network_header in a temporary variable in order to be able to recompute the network_header pointer after a pskb_inet_may_pull() call. pskb_inet_may_pull() makes sure the needed headers are in skb->head. [1] BUG: KMSAN: uninit-value in IP_ECN_decapsulate include/net/inet_ecn.h:302 [inline] BUG: KMSAN: uninit-value in geneve_rx drivers/net/geneve.c:279 [inline] BUG: KMSAN: uninit-value in geneve_udp_encap_recv+0x36f9/0x3c10 drivers/net/geneve.c:391 IP_ECN_decapsulate include/net/inet_ecn.h:302 [inline] geneve_rx drivers/net/geneve.c:279 [inline] geneve_udp_encap_recv+0x36f9/0x3c10 drivers/net/geneve.c:391 udp_queue_rcv_one_skb+0x1d39/0x1f20 net/ipv4/udp.c:2108 udp_queue_rcv_skb+0x6ae/0x6e0 net/ipv4/udp.c:2186 udp_unicast_rcv_skb+0x184/0x4b0 net/ipv4/udp.c:2346 __udp4_lib_rcv+0x1c6b/0x3010 net/ipv4/udp.c:2422 udp_rcv+0x7d/0xa0 net/ipv4/udp.c:2604 ip_protocol_deliver_rcu+0x264/0x1300 net/ipv4/ip_input.c:205 ip_local_deliver_finish+0x2b8/0x440 net/ipv4/ip_input.c:233 NF_HOOK include/linux/netfilter.h:314 [inline] ip_local_deliver+0x21f/0x490 net/ipv4/ip_input.c:254 dst_input include/net/dst.h:461 [inline] ip_rcv_finish net/ipv4/ip_input.c:449 [inline] NF_HOOK include/linux/netfilter.h:314 [inline] ip_rcv+0x46f/0x760 net/ipv4/ip_input.c:569 __netif_receive_skb_one_core net/core/dev.c:5534 [inline] __netif_receive_skb+0x1a6/0x5a0 net/core/dev.c:5648 process_backlog+0x480/0x8b0 net/core/dev.c:5976 __napi_poll+0xe3/0x980 net/core/dev.c:6576 napi_poll net/core/dev.c:6645 [inline] net_rx_action+0x8b8/0x1870 net/core/dev.c:6778 __do_softirq+0x1b7/0x7c5 kernel/softirq.c:553 do_softirq+0x9a/0xf0 kernel/softirq.c:454 __local_bh_enable_ip+0x9b/0xa0 kernel/softirq.c:381 local_bh_enable include/linux/bottom_half.h:33 [inline] rcu_read_unlock_bh include/linux/rcupdate.h:820 [inline] __dev_queue_xmit+0x2768/0x51c0 net/core/dev.c:4378 dev_queue_xmit include/linux/netdevice.h:3171 [inline] packet_xmit+0x9c/0x6b0 net/packet/af_packet.c:276 packet_snd net/packet/af_packet.c:3081 [inline] packet_sendmsg+0x8aef/0x9f10 net/packet/af_packet.c:3113 sock_sendmsg_nosec net/socket.c:730 [inline] __sock_sendmsg net/socket.c:745 [inline] __sys_sendto+0x735/0xa10 net/socket.c:2191 __do_sys_sendto net/socket.c:2203 [inline] __se_sys_sendto net/socket.c:2199 [inline] __x64_sys_sendto+0x125/0x1c0 net/socket.c:2199 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xcf/0x1e0 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x63/0x6b Uninit was created at: slab_post_alloc_hook mm/slub.c:3819 [inline] slab_alloc_node mm/slub.c:3860 [inline] kmem_cache_alloc_node+0x5cb/0xbc0 mm/slub.c:3903 kmalloc_reserve+0x13d/0x4a0 net/core/skbuff.c:560 __alloc_skb+0x352/0x790 net/core/skbuff.c:651 alloc_skb include/linux/skbuff.h:1296 [inline] alloc_skb_with_frags+0xc8/0xbd0 net/core/skbuff.c:6394 sock_alloc_send_pskb+0xa80/0xbf0 net/core/sock.c:2783 packet_alloc_skb net/packet/af_packet.c:2930 [inline] packet_snd net/packet/af_packet.c:3024 [inline] packet_sendmsg+0x70c2/0x9f10 net/packet/af_packet.c:3113 sock_sendmsg_nosec net/socket.c:730 [inline] __sock_sendmsg net/socket.c:745 [inline] __sys_sendto+0x735/0xa10 net/socket.c:2191 __do_sys_sendto net/socket.c:2203 [inline] __se_sys_sendto net/socket.c:2199 [inline] __x64_sys_sendto+0x125/0x1c0 net/socket.c:2199 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xcf/0x1e0 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x63/0x6b Fixes: 2d07dc79fe04 ("geneve: add initial netdev driver for GENEVE tunnels") Reported-and-tested-by: syzbot+6a1423ff3f97159aae64@syzkaller.appspotmail.com Signed-off-by: Eric Dumazet Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/geneve.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index 32c51c244153..c4ed36c71897 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -221,7 +221,7 @@ static void geneve_rx(struct geneve_dev *geneve, struct geneve_sock *gs, struct genevehdr *gnvh = geneve_hdr(skb); struct metadata_dst *tun_dst = NULL; unsigned int len; - int err = 0; + int nh, err = 0; void *oiph; if (ip_tunnel_collect_metadata() || gs->collect_md) { @@ -272,9 +272,23 @@ static void geneve_rx(struct geneve_dev *geneve, struct geneve_sock *gs, skb->pkt_type = PACKET_HOST; } - oiph = skb_network_header(skb); + /* Save offset of outer header relative to skb->head, + * because we are going to reset the network header to the inner header + * and might change skb->head. + */ + nh = skb_network_header(skb) - skb->head; + skb_reset_network_header(skb); + if (!pskb_inet_may_pull(skb)) { + DEV_STATS_INC(geneve->dev, rx_length_errors); + DEV_STATS_INC(geneve->dev, rx_errors); + goto drop; + } + + /* Get the outer header. */ + oiph = skb->head + nh; + if (geneve_get_sk_family(gs) == AF_INET) err = IP_ECN_decapsulate(oiph, skb); #if IS_ENABLED(CONFIG_IPV6) From 429679dcf7d918b7bb523d89bde3307fb02496c3 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 29 Feb 2024 17:13:31 -0800 Subject: [PATCH 179/225] page_pool: fix netlink dump stop/resume If message fills up we need to stop writing. 'break' will only get us out of the iteration over pools of a single netdev, we need to also stop walking netdevs. This results in either infinite dump, or missing pools, depending on whether message full happens on the last netdev (infinite dump) or non-last (missing pools). Fixes: 950ab53b77ab ("net: page_pool: implement GET in the netlink API") Signed-off-by: Jakub Kicinski Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/page_pool_user.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/core/page_pool_user.c b/net/core/page_pool_user.c index ffe5244e5597..278294aca66a 100644 --- a/net/core/page_pool_user.c +++ b/net/core/page_pool_user.c @@ -94,11 +94,12 @@ netdev_nl_page_pool_get_dump(struct sk_buff *skb, struct netlink_callback *cb, state->pp_id = pool->user.id; err = fill(skb, pool, info); if (err) - break; + goto out; } state->pp_id = 0; } +out: mutex_unlock(&page_pools_lock); rtnl_unlock(); From 45bcc0346561daa3f59e19a753cc7f3e08e8dff1 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Fri, 1 Mar 2024 18:11:22 +0100 Subject: [PATCH 180/225] selftests: mptcp: diag: return KSFT_FAIL not test_cnt The test counter 'test_cnt' should not be returned in diag.sh, e.g. what if only the 4th test fail? Will do 'exit 4' which is 'exit ${KSFT_SKIP}', the whole test will be marked as skipped instead of 'failed'! So we should do ret=${KSFT_FAIL} instead. Fixes: df62f2ec3df6 ("selftests/mptcp: add diag interface tests") Cc: stable@vger.kernel.org Fixes: 42fb6cddec3b ("selftests: mptcp: more stable diag tests") Signed-off-by: Geliang Tang Reviewed-by: Matthieu Baerts (NGI0) Signed-off-by: Matthieu Baerts (NGI0) Signed-off-by: David S. Miller --- tools/testing/selftests/net/mptcp/diag.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/net/mptcp/diag.sh b/tools/testing/selftests/net/mptcp/diag.sh index f300f4e1eb59..18d37d4695c1 100755 --- a/tools/testing/selftests/net/mptcp/diag.sh +++ b/tools/testing/selftests/net/mptcp/diag.sh @@ -69,7 +69,7 @@ __chk_nr() else echo "[ fail ] expected $expected found $nr" mptcp_lib_result_fail "${msg}" - ret=$test_cnt + ret=${KSFT_FAIL} fi else echo "[ ok ]" @@ -124,11 +124,11 @@ wait_msk_nr() if [ $i -ge $timeout ]; then echo "[ fail ] timeout while expecting $expected max $max last $nr" mptcp_lib_result_fail "${msg} # timeout" - ret=$test_cnt + ret=${KSFT_FAIL} elif [ $nr != $expected ]; then echo "[ fail ] expected $expected found $nr" mptcp_lib_result_fail "${msg} # unexpected result" - ret=$test_cnt + ret=${KSFT_FAIL} else echo "[ ok ]" mptcp_lib_result_pass "${msg}" From f05d2283d11187675569d3a0286dbaf45d8eb70c Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Fri, 1 Mar 2024 18:11:23 +0100 Subject: [PATCH 181/225] selftests: mptcp: diag: avoid extra waiting When creating a lot of listener sockets, it is enough to wait only for the last one, like we are doing before in diag.sh for other subtests. If we do a check for each listener sockets, each time listing all available sockets, it can take a very long time in very slow environments, at the point we can reach some timeout. When using the debug kconfig, the waiting time switches from more than 8 sec to 0.1 sec on my side. In slow/busy environments, and with a poll timeout set to 30 ms, the waiting time could go up to ~100 sec because the listener socket would timeout and stop, while the script would still be checking one by one if all sockets are ready. The result is that after having waited for everything to be ready, all sockets have been stopped due to a timeout, and it is too late for the script to check how many there were. While at it, also removed ss options we don't need: we only need the filtering options, to count how many listener sockets have been created. We don't need to ask ss to display internal TCP information, and the memory if the output is dropped by the 'wc -l' command anyway. Fixes: b4b51d36bbaa ("selftests: mptcp: explicitly trigger the listener diag code-path") Reported-by: Jakub Kicinski Closes: https://lore.kernel.org/r/20240301063754.2ecefecf@kernel.org Signed-off-by: Matthieu Baerts (NGI0) Signed-off-by: David S. Miller --- tools/testing/selftests/net/mptcp/diag.sh | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/tools/testing/selftests/net/mptcp/diag.sh b/tools/testing/selftests/net/mptcp/diag.sh index 18d37d4695c1..75fc95675e2d 100755 --- a/tools/testing/selftests/net/mptcp/diag.sh +++ b/tools/testing/selftests/net/mptcp/diag.sh @@ -96,8 +96,8 @@ chk_listener_nr() local expected=$1 local msg="$2" - __chk_nr "ss -inmlHMON $ns | wc -l" "$expected" "$msg - mptcp" 0 - __chk_nr "ss -inmlHtON $ns | wc -l" "$expected" "$msg - subflows" + __chk_nr "ss -nlHMON $ns | wc -l" "$expected" "$msg - mptcp" 0 + __chk_nr "ss -nlHtON $ns | wc -l" "$expected" "$msg - subflows" } wait_msk_nr() @@ -304,10 +304,7 @@ for I in $(seq 1 $NR_SERVERS); do ip netns exec $ns ./mptcp_connect -p $((I + 20001)) \ -t ${timeout_poll} -l 0.0.0.0 >/dev/null 2>&1 & done - -for I in $(seq 1 $NR_SERVERS); do - mptcp_lib_wait_local_port_listen $ns $((I + 20001)) -done +mptcp_lib_wait_local_port_listen $ns $((NR_SERVERS + 20001)) chk_listener_nr $NR_SERVERS "many listener sockets" From af1e0a7d39f98c0dea1b186a76fcee7da6a5f7bc Mon Sep 17 00:00:00 2001 From: Dawei Li Date: Mon, 4 Mar 2024 18:16:53 +0800 Subject: [PATCH 182/225] firmware: microchip: Fix over-requested allocation size cocci warnings: (new ones prefixed by >>) >> drivers/firmware/microchip/mpfs-auto-update.c:387:72-78: ERROR: application of sizeof to pointer drivers/firmware/microchip/mpfs-auto-update.c:170:72-78: ERROR: application of sizeof to pointer response_msg is a pointer to u32, so the size of element it points to is supposed to be a multiple of sizeof(u32), rather than sizeof(u32 *). Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202403040516.CYxoWTXw-lkp@intel.com/ Signed-off-by: Dawei Li Fixes: ec5b0f1193ad ("firmware: microchip: add PolarFire SoC Auto Update support") Signed-off-by: Conor Dooley --- drivers/firmware/microchip/mpfs-auto-update.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/firmware/microchip/mpfs-auto-update.c b/drivers/firmware/microchip/mpfs-auto-update.c index 81f5f62e34fc..32394c24b37d 100644 --- a/drivers/firmware/microchip/mpfs-auto-update.c +++ b/drivers/firmware/microchip/mpfs-auto-update.c @@ -384,7 +384,8 @@ static int mpfs_auto_update_available(struct mpfs_auto_update_priv *priv) u32 *response_msg; int ret; - response_msg = devm_kzalloc(priv->dev, AUTO_UPDATE_FEATURE_RESP_SIZE * sizeof(response_msg), + response_msg = devm_kzalloc(priv->dev, + AUTO_UPDATE_FEATURE_RESP_SIZE * sizeof(*response_msg), GFP_KERNEL); if (!response_msg) return -ENOMEM; From 89d72d4125e94aa3c2140fedd97ce07ba9e37674 Mon Sep 17 00:00:00 2001 From: Horatiu Vultur Date: Fri, 1 Mar 2024 09:06:08 +0100 Subject: [PATCH 183/225] net: sparx5: Fix use after free inside sparx5_del_mact_entry Based on the static analyzis of the code it looks like when an entry from the MAC table was removed, the entry was still used after being freed. More precise the vid of the mac_entry was used after calling devm_kfree on the mac_entry. The fix consists in first using the vid of the mac_entry to delete the entry from the HW and after that to free it. Fixes: b37a1bae742f ("net: sparx5: add mactable support") Signed-off-by: Horatiu Vultur Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240301080608.3053468-1-horatiu.vultur@microchip.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/microchip/sparx5/sparx5_mactable.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_mactable.c b/drivers/net/ethernet/microchip/sparx5/sparx5_mactable.c index 4af285918ea2..75868b3f548e 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_mactable.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_mactable.c @@ -347,10 +347,10 @@ int sparx5_del_mact_entry(struct sparx5 *sparx5, list) { if ((vid == 0 || mact_entry->vid == vid) && ether_addr_equal(addr, mact_entry->mac)) { + sparx5_mact_forget(sparx5, addr, mact_entry->vid); + list_del(&mact_entry->list); devm_kfree(sparx5->dev, mact_entry); - - sparx5_mact_forget(sparx5, addr, mact_entry->vid); } } mutex_unlock(&sparx5->mact_lock); From 28468cbed92ea5eed19e2cbd2d55758c3c7938ca Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 4 Mar 2024 10:29:44 -0800 Subject: [PATCH 184/225] Revert "fs/aio: Make io_cancel() generate completions again" Patch "fs/aio: Make io_cancel() generate completions again" is based on the assumption that calling kiocb->ki_cancel() does not complete R/W requests. This is incorrect: the two drivers that call kiocb_set_cancel_fn() callers set a cancellation function that calls usb_ep_dequeue(). According to its documentation, usb_ep_dequeue() calls the completion routine with status -ECONNRESET. Hence this revert. Cc: Benjamin LaHaise Cc: Eric Biggers Cc: Christoph Hellwig Cc: Avi Kivity Cc: Sandeep Dhavale Cc: Jens Axboe Cc: Greg Kroah-Hartman Cc: Kent Overstreet Cc: stable@vger.kernel.org Reported-by: syzbot+b91eb2ed18f599dd3c31@syzkaller.appspotmail.com Fixes: 54cbc058d86b ("fs/aio: Make io_cancel() generate completions again") Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20240304182945.3646109-1-bvanassche@acm.org Acked-by: Eric Biggers Signed-off-by: Christian Brauner --- fs/aio.c | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/fs/aio.c b/fs/aio.c index 28223f511931..da18dbcfcb22 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -2165,11 +2165,14 @@ COMPAT_SYSCALL_DEFINE3(io_submit, compat_aio_context_t, ctx_id, #endif /* sys_io_cancel: - * Attempts to cancel an iocb previously passed to io_submit(). If the - * operation is successfully cancelled 0 is returned. May fail with - * -EFAULT if any of the data structures pointed to are invalid. May - * fail with -EINVAL if aio_context specified by ctx_id is invalid. Will - * fail with -ENOSYS if not implemented. + * Attempts to cancel an iocb previously passed to io_submit. If + * the operation is successfully cancelled, the resulting event is + * copied into the memory pointed to by result without being placed + * into the completion queue and 0 is returned. May fail with + * -EFAULT if any of the data structures pointed to are invalid. + * May fail with -EINVAL if aio_context specified by ctx_id is + * invalid. May fail with -EAGAIN if the iocb specified was not + * cancelled. Will fail with -ENOSYS if not implemented. */ SYSCALL_DEFINE3(io_cancel, aio_context_t, ctx_id, struct iocb __user *, iocb, struct io_event __user *, result) @@ -2200,12 +2203,14 @@ SYSCALL_DEFINE3(io_cancel, aio_context_t, ctx_id, struct iocb __user *, iocb, } spin_unlock_irq(&ctx->ctx_lock); - /* - * The result argument is no longer used - the io_event is always - * delivered via the ring buffer. - */ - if (ret == 0 && kiocb->rw.ki_flags & IOCB_AIO_RW) - aio_complete_rw(&kiocb->rw, -EINTR); + if (!ret) { + /* + * The result argument is no longer used - the io_event is + * always delivered via the ring buffer. -EINPROGRESS indicates + * cancellation is progress: + */ + ret = -EINPROGRESS; + } percpu_ref_put(&ctx->users); From aec7d25b497ce4a8d044e9496de0aa433f7f8f06 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 4 Mar 2024 14:43:55 +0100 Subject: [PATCH 185/225] platform/x86: p2sb: On Goldmont only cache P2SB and SPI devfn BAR On Goldmont p2sb_bar() only ever gets called for 2 devices, the actual P2SB devfn 13,0 and the SPI controller which is part of the P2SB, devfn 13,2. But the current p2sb code tries to cache BAR0 info for all of devfn 13,0 to 13,7 . This involves calling pci_scan_single_device() for device 13 functions 0-7 and the hw does not seem to like pci_scan_single_device() getting called for some of the other hidden devices. E.g. on an ASUS VivoBook D540NV-GQ065T this leads to continuous ACPI errors leading to high CPU usage. Fix this by only caching BAR0 info and thus only calling pci_scan_single_device() for the P2SB and the SPI controller. Fixes: 5913320eb0b3 ("platform/x86: p2sb: Allow p2sb_bar() calls during PCI device probe") Reported-by: Danil Rybakov Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218531 Tested-by: Danil Rybakov Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20240304134356.305375-2-hdegoede@redhat.com --- drivers/platform/x86/p2sb.c | 23 ++++++++--------------- 1 file changed, 8 insertions(+), 15 deletions(-) diff --git a/drivers/platform/x86/p2sb.c b/drivers/platform/x86/p2sb.c index 6bd14d0132db..3d66e1d4eb1f 100644 --- a/drivers/platform/x86/p2sb.c +++ b/drivers/platform/x86/p2sb.c @@ -20,9 +20,11 @@ #define P2SBC_HIDE BIT(8) #define P2SB_DEVFN_DEFAULT PCI_DEVFN(31, 1) +#define P2SB_DEVFN_GOLDMONT PCI_DEVFN(13, 0) +#define SPI_DEVFN_GOLDMONT PCI_DEVFN(13, 2) static const struct x86_cpu_id p2sb_cpu_ids[] = { - X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT, PCI_DEVFN(13, 0)), + X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT, P2SB_DEVFN_GOLDMONT), {} }; @@ -98,21 +100,12 @@ static void p2sb_scan_and_cache_devfn(struct pci_bus *bus, unsigned int devfn) static int p2sb_scan_and_cache(struct pci_bus *bus, unsigned int devfn) { - unsigned int slot, fn; + /* Scan the P2SB device and cache its BAR0 */ + p2sb_scan_and_cache_devfn(bus, devfn); - if (PCI_FUNC(devfn) == 0) { - /* - * When function number of the P2SB device is zero, scan it and - * other function numbers, and if devices are available, cache - * their BAR0s. - */ - slot = PCI_SLOT(devfn); - for (fn = 0; fn < NR_P2SB_RES_CACHE; fn++) - p2sb_scan_and_cache_devfn(bus, PCI_DEVFN(slot, fn)); - } else { - /* Scan the P2SB device and cache its BAR0 */ - p2sb_scan_and_cache_devfn(bus, devfn); - } + /* On Goldmont p2sb_bar() also gets called for the SPI controller */ + if (devfn == P2SB_DEVFN_GOLDMONT) + p2sb_scan_and_cache_devfn(bus, SPI_DEVFN_GOLDMONT); if (!p2sb_valid_resource(&p2sb_resources[PCI_FUNC(devfn)].res)) return -ENOENT; From 0314cebb29be2f961abb37bd0b01cb16899868f2 Mon Sep 17 00:00:00 2001 From: Harshit Mogalapalli Date: Mon, 26 Feb 2024 06:40:10 -0800 Subject: [PATCH 186/225] platform/x86/amd/pmf: Fix missing error code in amd_pmf_init_smart_pc() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On the error path, assign -ENOMEM to ret when memory allocation of "dev->prev_data" fails. Fixes: e70961505808 ("platform/x86/amd/pmf: Fixup error handling for amd_pmf_init_smart_pc()") Signed-off-by: Harshit Mogalapalli Reviewed-by: Ilpo Järvinen Reviewed-by: Mario Limonciello Link: https://lore.kernel.org/r/20240226144011.2100804-1-harshit.m.mogalapalli@oracle.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/x86/amd/pmf/tee-if.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/platform/x86/amd/pmf/tee-if.c b/drivers/platform/x86/amd/pmf/tee-if.c index 8527dca9cf56..dcbe8f85e122 100644 --- a/drivers/platform/x86/amd/pmf/tee-if.c +++ b/drivers/platform/x86/amd/pmf/tee-if.c @@ -458,8 +458,10 @@ int amd_pmf_init_smart_pc(struct amd_pmf_dev *dev) amd_pmf_hex_dump_pb(dev); dev->prev_data = kzalloc(sizeof(*dev->prev_data), GFP_KERNEL); - if (!dev->prev_data) + if (!dev->prev_data) { + ret = -ENOMEM; goto error; + } ret = amd_pmf_start_policy_engine(dev); if (ret) From 961ebd120565cb60cebe21cb634fbc456022db4a Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 4 Mar 2024 15:57:15 -0800 Subject: [PATCH 187/225] fs/aio: Check IOCB_AIO_RW before the struct aio_kiocb conversion The first kiocb_set_cancel_fn() argument may point at a struct kiocb that is not embedded inside struct aio_kiocb. With the current code, depending on the compiler, the req->ki_ctx read happens either before the IOCB_AIO_RW test or after that test. Move the req->ki_ctx read such that it is guaranteed that the IOCB_AIO_RW test happens first. Reported-by: Eric Biggers Cc: Benjamin LaHaise Cc: Eric Biggers Cc: Christoph Hellwig Cc: Avi Kivity Cc: Sandeep Dhavale Cc: Jens Axboe Cc: Greg Kroah-Hartman Cc: Kent Overstreet Cc: stable@vger.kernel.org Fixes: b820de741ae4 ("fs/aio: Restrict kiocb_set_cancel_fn() to I/O submitted via libaio") Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20240304235715.3790858-1-bvanassche@acm.org Reviewed-by: Jens Axboe Reviewed-by: Eric Biggers Signed-off-by: Christian Brauner --- fs/aio.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/fs/aio.c b/fs/aio.c index da18dbcfcb22..9cdaa2faa536 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -589,8 +589,8 @@ static int aio_setup_ring(struct kioctx *ctx, unsigned int nr_events) void kiocb_set_cancel_fn(struct kiocb *iocb, kiocb_cancel_fn *cancel) { - struct aio_kiocb *req = container_of(iocb, struct aio_kiocb, rw); - struct kioctx *ctx = req->ki_ctx; + struct aio_kiocb *req; + struct kioctx *ctx; unsigned long flags; /* @@ -600,9 +600,13 @@ void kiocb_set_cancel_fn(struct kiocb *iocb, kiocb_cancel_fn *cancel) if (!(iocb->ki_flags & IOCB_AIO_RW)) return; + req = container_of(iocb, struct aio_kiocb, rw); + if (WARN_ON_ONCE(!list_empty(&req->ki_list))) return; + ctx = req->ki_ctx; + spin_lock_irqsave(&ctx->ctx_lock, flags); list_add_tail(&req->ki_list, &ctx->active_reqs); req->ki_cancel = cancel; From d4872d70fc6feabfc8e897edad993a81096ade9f Mon Sep 17 00:00:00 2001 From: Mike Yu Date: Mon, 4 Mar 2024 12:24:08 +0000 Subject: [PATCH 188/225] xfrm: fix xfrm child route lookup for packet offload In current code, xfrm_bundle_create() always uses the matched SA's family type to look up a xfrm child route for the skb. The route returned by xfrm_dst_lookup() will eventually be used in xfrm_output_resume() (skb_dst(skb)->ops->local_out()). If packet offload is used, the above behavior can lead to calling ip_local_out() for an IPv6 packet or calling ip6_local_out() for an IPv4 packet, which is likely to fail. This change fixes the behavior by checking if the matched SA has packet offload enabled. If not, keep the same behavior; if yes, use the matched SP's family type for the lookup. Test: verified IPv6-in-IPv4 packets on Android device with IPsec packet offload enabled Signed-off-by: Mike Yu Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_policy.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 7351f32052dc..da6ecc6b3e15 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -2694,7 +2694,9 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy, if (xfrm[i]->props.smark.v || xfrm[i]->props.smark.m) mark = xfrm_smark_get(fl->flowi_mark, xfrm[i]); - family = xfrm[i]->props.family; + if (xfrm[i]->xso.type != XFRM_DEV_OFFLOAD_PACKET) + family = xfrm[i]->props.family; + oif = fl->flowi_oif ? : fl->flowi_l3mdev; dst = xfrm_dst_lookup(xfrm[i], tos, oif, &saddr, &daddr, family, mark); From 8688ab2170a5be0bc922195f7091c38b506bab2e Mon Sep 17 00:00:00 2001 From: Mike Yu Date: Mon, 4 Mar 2024 12:24:09 +0000 Subject: [PATCH 189/225] xfrm: set skb control buffer based on packet offload as well In packet offload, packets are not encrypted in XFRM stack, so the next network layer which the packets will be forwarded to should depend on where the packet came from (either xfrm4_output or xfrm6_output) rather than the matched SA's family type. Test: verified IPv6-in-IPv4 packets on Android device with IPsec packet offload enabled Signed-off-by: Mike Yu Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_output.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c index 662c83beb345..e5722c95b8bb 100644 --- a/net/xfrm/xfrm_output.c +++ b/net/xfrm/xfrm_output.c @@ -704,9 +704,13 @@ int xfrm_output(struct sock *sk, struct sk_buff *skb) { struct net *net = dev_net(skb_dst(skb)->dev); struct xfrm_state *x = skb_dst(skb)->xfrm; + int family; int err; - switch (x->outer_mode.family) { + family = (x->xso.type != XFRM_DEV_OFFLOAD_PACKET) ? x->outer_mode.family + : skb_dst(skb)->ops->family; + + switch (family) { case AF_INET: memset(IPCB(skb), 0, sizeof(*IPCB(skb))); IPCB(skb)->flags |= IPSKB_XFRM_TRANSFORMED; From 330068589389ccae3452db15ecacc3e147ac9c1c Mon Sep 17 00:00:00 2001 From: Emil Tantilov Date: Wed, 7 Feb 2024 16:42:43 -0800 Subject: [PATCH 190/225] idpf: disable local BH when scheduling napi for marker packets Fix softirq's not being handled during napi_schedule() call when receiving marker packets for queue disable by disabling local bottom half. The issue can be seen on ifdown: NOHZ tick-stop error: Non-RCU local softirq work is pending, handler #08!!! Using ftrace to catch the failing scenario: ifconfig [003] d.... 22739.830624: softirq_raise: vec=3 [action=NET_RX] -0 [003] ..s.. 22739.831357: softirq_entry: vec=3 [action=NET_RX] No interrupt and CPU is idle. After the patch when disabling local BH before calling napi_schedule: ifconfig [003] d.... 22993.928336: softirq_raise: vec=3 [action=NET_RX] ifconfig [003] ..s1. 22993.928337: softirq_entry: vec=3 [action=NET_RX] Fixes: c2d548cad150 ("idpf: add TX splitq napi poll support") Reviewed-by: Jesse Brandeburg Reviewed-by: Przemek Kitszel Signed-off-by: Emil Tantilov Signed-off-by: Alan Brady Reviewed-by: Simon Horman Tested-by: Krishneil Singh Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/idpf/idpf_virtchnl.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c b/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c index d0cdd63b3d5b..390977a76de2 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c +++ b/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c @@ -2087,8 +2087,10 @@ int idpf_send_disable_queues_msg(struct idpf_vport *vport) set_bit(__IDPF_Q_POLL_MODE, vport->txqs[i]->flags); /* schedule the napi to receive all the marker packets */ + local_bh_disable(); for (i = 0; i < vport->num_q_vectors; i++) napi_schedule(&vport->q_vectors[i].napi); + local_bh_enable(); return idpf_wait_for_marker_event(vport); } From 2652b99e43403dc464f3648483ffb38e48872fe4 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Wed, 31 Jan 2024 13:51:58 -0800 Subject: [PATCH 191/225] ice: virtchnl: stop pretending to support RSS over AQ or registers The E800 series hardware uses the same iAVF driver as older devices, including the virtchnl negotiation scheme. This negotiation scheme includes a mechanism to determine what type of RSS should be supported, including RSS over PF virtchnl messages, RSS over firmware AdminQ messages, and RSS via direct register access. The PF driver will always prefer VIRTCHNL_VF_OFFLOAD_RSS_PF if its supported by the VF driver. However, if an older VF driver is loaded, it may request only VIRTCHNL_VF_OFFLOAD_RSS_REG or VIRTCHNL_VF_OFFLOAD_RSS_AQ. The ice driver happily agrees to support these methods. Unfortunately, the underlying hardware does not support these mechanisms. The E800 series VFs don't have the appropriate registers for RSS_REG. The mailbox queue used by VFs for VF to PF communication blocks messages which do not have the VF-to-PF opcode. Stop lying to the VF that it could support RSS over AdminQ or registers, as these interfaces do not work when the hardware is operating on an E800 series device. In practice this is unlikely to be hit by any normal user. The iAVF driver has supported RSS over PF virtchnl commands since 2016, and always defaults to using RSS_PF if possible. In principle, nothing actually stops the existing VF from attempting to access the registers or send an AQ command. However a properly coded VF will check the capability flags and will report a more useful error if it detects a case where the driver does not support the RSS offloads that it does. Fixes: 1071a8358a28 ("ice: Implement virtchnl commands for AVF support") Signed-off-by: Jacob Keller Reviewed-by: Alan Brady Tested-by: Rafal Romanowski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_virtchnl.c | 9 +-------- drivers/net/ethernet/intel/ice/ice_virtchnl_allowlist.c | 2 -- 2 files changed, 1 insertion(+), 10 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl.c b/drivers/net/ethernet/intel/ice/ice_virtchnl.c index c925813ec9ca..6f2328a049bf 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl.c +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl.c @@ -440,7 +440,6 @@ static int ice_vc_get_vf_res_msg(struct ice_vf *vf, u8 *msg) vf->driver_caps = *(u32 *)msg; else vf->driver_caps = VIRTCHNL_VF_OFFLOAD_L2 | - VIRTCHNL_VF_OFFLOAD_RSS_REG | VIRTCHNL_VF_OFFLOAD_VLAN; vfres->vf_cap_flags = VIRTCHNL_VF_OFFLOAD_L2; @@ -453,14 +452,8 @@ static int ice_vc_get_vf_res_msg(struct ice_vf *vf, u8 *msg) vfres->vf_cap_flags |= ice_vc_get_vlan_caps(hw, vf, vsi, vf->driver_caps); - if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_RSS_PF) { + if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_RSS_PF) vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_RSS_PF; - } else { - if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_RSS_AQ) - vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_RSS_AQ; - else - vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_RSS_REG; - } if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC) vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC; diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_allowlist.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_allowlist.c index 5e19d48a05b4..d796dbd2a440 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl_allowlist.c +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_allowlist.c @@ -13,8 +13,6 @@ * - opcodes needed by VF when caps are activated * * Caps that don't use new opcodes (no opcodes should be allowed): - * - VIRTCHNL_VF_OFFLOAD_RSS_AQ - * - VIRTCHNL_VF_OFFLOAD_RSS_REG * - VIRTCHNL_VF_OFFLOAD_WB_ON_ITR * - VIRTCHNL_VF_OFFLOAD_CRC * - VIRTCHNL_VF_OFFLOAD_RX_POLLING From 06e456a05d669ca30b224b8ed962421770c1496c Mon Sep 17 00:00:00 2001 From: Rand Deeb Date: Wed, 28 Feb 2024 18:54:48 +0300 Subject: [PATCH 192/225] net: ice: Fix potential NULL pointer dereference in ice_bridge_setlink() The function ice_bridge_setlink() may encounter a NULL pointer dereference if nlmsg_find_attr() returns NULL and br_spec is dereferenced subsequently in nla_for_each_nested(). To address this issue, add a check to ensure that br_spec is not NULL before proceeding with the nested attribute iteration. Fixes: b1edc14a3fbf ("ice: Implement ice_bridge_getlink and ice_bridge_setlink") Signed-off-by: Rand Deeb Reviewed-by: Simon Horman Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 59c7e37f175f..df6a68ab747e 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -8013,6 +8013,8 @@ ice_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh, pf_sw = pf->first_sw; /* find the attribute in the netlink message */ br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC); + if (!br_spec) + return -EINVAL; nla_for_each_nested(attr, br_spec, rem) { __u16 mode; From 9224fc86f1776193650a33a275cac628952f80a9 Mon Sep 17 00:00:00 2001 From: Michal Schmidt Date: Fri, 1 Mar 2024 14:37:08 +0100 Subject: [PATCH 193/225] ice: fix uninitialized dplls mutex usage The pf->dplls.lock mutex is initialized too late, after its first use. Move it to the top of ice_dpll_init. Note that the "err_exit" error path destroys the mutex. And the mutex is the last thing destroyed in ice_dpll_deinit. This fixes the following warning with CONFIG_DEBUG_MUTEXES: ice 0000:10:00.0: The DDP package was successfully loaded: ICE OS Default Package version 1.3.36.0 ice 0000:10:00.0: 252.048 Gb/s available PCIe bandwidth (16.0 GT/s PCIe x16 link) ice 0000:10:00.0: PTP init successful ------------[ cut here ]------------ DEBUG_LOCKS_WARN_ON(lock->magic != lock) WARNING: CPU: 0 PID: 410 at kernel/locking/mutex.c:587 __mutex_lock+0x773/0xd40 Modules linked in: crct10dif_pclmul crc32_pclmul crc32c_intel polyval_clmulni polyval_generic ice(+) nvme nvme_c> CPU: 0 PID: 410 Comm: kworker/0:4 Not tainted 6.8.0-rc5+ #3 Hardware name: HPE ProLiant DL110 Gen10 Plus/ProLiant DL110 Gen10 Plus, BIOS U56 10/19/2023 Workqueue: events work_for_cpu_fn RIP: 0010:__mutex_lock+0x773/0xd40 Code: c0 0f 84 1d f9 ff ff 44 8b 35 0d 9c 69 01 45 85 f6 0f 85 0d f9 ff ff 48 c7 c6 12 a2 a9 85 48 c7 c7 12 f1 a> RSP: 0018:ff7eb1a3417a7ae0 EFLAGS: 00010286 RAX: 0000000000000000 RBX: 0000000000000002 RCX: 0000000000000000 RDX: 0000000000000002 RSI: ffffffff85ac2bff RDI: 00000000ffffffff RBP: ff7eb1a3417a7b80 R08: 0000000000000000 R09: 00000000ffffbfff R10: ff7eb1a3417a7978 R11: ff32b80f7fd2e568 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: ff32b7f02c50e0d8 FS: 0000000000000000(0000) GS:ff32b80efe800000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000055b5852cc000 CR3: 000000003c43a004 CR4: 0000000000771ef0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Call Trace: ? __warn+0x84/0x170 ? __mutex_lock+0x773/0xd40 ? report_bug+0x1c7/0x1d0 ? prb_read_valid+0x1b/0x30 ? handle_bug+0x42/0x70 ? exc_invalid_op+0x18/0x70 ? asm_exc_invalid_op+0x1a/0x20 ? __mutex_lock+0x773/0xd40 ? rcu_is_watching+0x11/0x50 ? __kmalloc_node_track_caller+0x346/0x490 ? ice_dpll_lock_status_get+0x28/0x50 [ice] ? __pfx_ice_dpll_lock_status_get+0x10/0x10 [ice] ? ice_dpll_lock_status_get+0x28/0x50 [ice] ice_dpll_lock_status_get+0x28/0x50 [ice] dpll_device_get_one+0x14f/0x2e0 dpll_device_event_send+0x7d/0x150 dpll_device_register+0x124/0x180 ice_dpll_init_dpll+0x7b/0xd0 [ice] ice_dpll_init+0x224/0xa40 [ice] ? _dev_info+0x70/0x90 ice_load+0x468/0x690 [ice] ice_probe+0x75b/0xa10 [ice] ? _raw_spin_unlock_irqrestore+0x4f/0x80 ? process_one_work+0x1a3/0x500 local_pci_probe+0x47/0xa0 work_for_cpu_fn+0x17/0x30 process_one_work+0x20d/0x500 worker_thread+0x1df/0x3e0 ? __pfx_worker_thread+0x10/0x10 kthread+0x103/0x140 ? __pfx_kthread+0x10/0x10 ret_from_fork+0x31/0x50 ? __pfx_kthread+0x10/0x10 ret_from_fork_asm+0x1b/0x30 irq event stamp: 125197 hardirqs last enabled at (125197): [] finish_task_switch.isra.0+0x12d/0x3d0 hardirqs last disabled at (125196): [] __schedule+0xea4/0x19f0 softirqs last enabled at (105334): [] napi_get_frags_check+0x1a/0x60 softirqs last disabled at (105332): [] napi_get_frags_check+0x1a/0x60 ---[ end trace 0000000000000000 ]--- Fixes: d7999f5ea64b ("ice: implement dpll interface to control cgu") Signed-off-by: Michal Schmidt Reviewed-by: Maciej Fijalkowski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_dpll.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ice/ice_dpll.c b/drivers/net/ethernet/intel/ice/ice_dpll.c index adfa1f2a80a6..fda9140c0da4 100644 --- a/drivers/net/ethernet/intel/ice/ice_dpll.c +++ b/drivers/net/ethernet/intel/ice/ice_dpll.c @@ -2120,6 +2120,7 @@ void ice_dpll_init(struct ice_pf *pf) struct ice_dplls *d = &pf->dplls; int err = 0; + mutex_init(&d->lock); err = ice_dpll_init_info(pf, cgu); if (err) goto err_exit; @@ -2132,7 +2133,6 @@ void ice_dpll_init(struct ice_pf *pf) err = ice_dpll_init_pins(pf, cgu); if (err) goto deinit_pps; - mutex_init(&d->lock); if (cgu) { err = ice_dpll_init_worker(pf); if (err) From 6c5b6ca7642f2992502a22dbd8b80927de174b67 Mon Sep 17 00:00:00 2001 From: Jesse Brandeburg Date: Mon, 4 Mar 2024 16:37:07 -0800 Subject: [PATCH 194/225] ice: fix typo in assignment Fix an obviously incorrect assignment, created with a typo or cut-n-paste error. Fixes: 5995ef88e3a8 ("ice: realloc VSI stats arrays") Signed-off-by: Jesse Brandeburg Reviewed-by: Simon Horman Reviewed-by: Paul Menzel Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_lib.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index 097bf8fd6bf0..fc23dbe302b4 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -3192,7 +3192,7 @@ ice_vsi_realloc_stat_arrays(struct ice_vsi *vsi) } } - tx_ring_stats = vsi_stat->rx_ring_stats; + tx_ring_stats = vsi_stat->tx_ring_stats; vsi_stat->tx_ring_stats = krealloc_array(vsi_stat->tx_ring_stats, req_txq, sizeof(*vsi_stat->tx_ring_stats), From 36c824ca3e4fa8d1224c2dcdeaca39d2ca86a42f Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Wed, 28 Feb 2024 18:26:03 +0100 Subject: [PATCH 195/225] i40e: Fix firmware version comparison function Helper i40e_is_fw_ver_eq() compares incorrectly given firmware version as it returns true when the major version of running firmware is greater than the given major version that is wrong and results in failure during getting of DCB configuration where this helper is used. Fix the check and return true only if the running FW version is exactly equals to the given version. Reproducer: 1. Load i40e driver 2. Check dmesg output [root@host ~]# modprobe i40e [root@host ~]# dmesg | grep 'i40e.*DCB' [ 74.750642] i40e 0000:02:00.0: Query for DCB configuration failed, err -EIO aq_err I40E_AQ_RC_EINVAL [ 74.759770] i40e 0000:02:00.0: DCB init failed -5, disabled [ 74.966550] i40e 0000:02:00.1: Query for DCB configuration failed, err -EIO aq_err I40E_AQ_RC_EINVAL [ 74.975683] i40e 0000:02:00.1: DCB init failed -5, disabled Fixes: cf488e13221f ("i40e: Add other helpers to check version of running firmware and AQ API") Signed-off-by: Ivan Vecera Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/i40e/i40e_prototype.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_prototype.h b/drivers/net/ethernet/intel/i40e/i40e_prototype.h index af4269330581..ce1f11b8ad65 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_prototype.h +++ b/drivers/net/ethernet/intel/i40e/i40e_prototype.h @@ -567,8 +567,7 @@ static inline bool i40e_is_fw_ver_lt(struct i40e_hw *hw, u16 maj, u16 min) **/ static inline bool i40e_is_fw_ver_eq(struct i40e_hw *hw, u16 maj, u16 min) { - return (hw->aq.fw_maj_ver > maj || - (hw->aq.fw_maj_ver == maj && hw->aq.fw_min_ver == min)); + return (hw->aq.fw_maj_ver == maj && hw->aq.fw_min_ver == min); } #endif /* _I40E_PROTOTYPE_H_ */ From ef27f655b438bed4c83680e4f01e1cde2739854b Mon Sep 17 00:00:00 2001 From: Florian Kauer Date: Mon, 19 Feb 2024 10:08:43 +0100 Subject: [PATCH 196/225] igc: avoid returning frame twice in XDP_REDIRECT When a frame can not be transmitted in XDP_REDIRECT (e.g. due to a full queue), it is necessary to free it by calling xdp_return_frame_rx_napi. However, this is the responsibility of the caller of the ndo_xdp_xmit (see for example bq_xmit_all in kernel/bpf/devmap.c) and thus calling it inside igc_xdp_xmit (which is the ndo_xdp_xmit of the igc driver) as well will lead to memory corruption. In fact, bq_xmit_all expects that it can return all frames after the last successfully transmitted one. Therefore, break for the first not transmitted frame, but do not call xdp_return_frame_rx_napi in igc_xdp_xmit. This is equally implemented in other Intel drivers such as the igb. There are two alternatives to this that were rejected: 1. Return num_frames as all the frames would have been transmitted and release them inside igc_xdp_xmit. While it might work technically, it is not what the return value is meant to represent (i.e. the number of SUCCESSFULLY transmitted packets). 2. Rework kernel/bpf/devmap.c and all drivers to support non-consecutively dropped packets. Besides being complex, it likely has a negative performance impact without a significant gain since it is anyway unlikely that the next frame can be transmitted if the previous one was dropped. The memory corruption can be reproduced with the following script which leads to a kernel panic after a few seconds. It basically generates more traffic than a i225 NIC can transmit and pushes it via XDP_REDIRECT from a virtual interface to the physical interface where frames get dropped. #!/bin/bash INTERFACE=enp4s0 INTERFACE_IDX=`cat /sys/class/net/$INTERFACE/ifindex` sudo ip link add dev veth1 type veth peer name veth2 sudo ip link set up $INTERFACE sudo ip link set up veth1 sudo ip link set up veth2 cat << EOF > redirect.bpf.c SEC("prog") int redirect(struct xdp_md *ctx) { return bpf_redirect($INTERFACE_IDX, 0); } char _license[] SEC("license") = "GPL"; EOF clang -O2 -g -Wall -target bpf -c redirect.bpf.c -o redirect.bpf.o sudo ip link set veth2 xdp obj redirect.bpf.o cat << EOF > pass.bpf.c SEC("prog") int pass(struct xdp_md *ctx) { return XDP_PASS; } char _license[] SEC("license") = "GPL"; EOF clang -O2 -g -Wall -target bpf -c pass.bpf.c -o pass.bpf.o sudo ip link set $INTERFACE xdp obj pass.bpf.o cat << EOF > trafgen.cfg { /* Ethernet Header */ 0xe8, 0x6a, 0x64, 0x41, 0xbf, 0x46, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, const16(ETH_P_IP), /* IPv4 Header */ 0b01000101, 0, # IPv4 version, IHL, TOS const16(1028), # IPv4 total length (UDP length + 20 bytes (IP header)) const16(2), # IPv4 ident 0b01000000, 0, # IPv4 flags, fragmentation off 64, # IPv4 TTL 17, # Protocol UDP csumip(14, 33), # IPv4 checksum /* UDP Header */ 10, 0, 1, 1, # IP Src - adapt as needed 10, 0, 1, 2, # IP Dest - adapt as needed const16(6666), # UDP Src Port const16(6666), # UDP Dest Port const16(1008), # UDP length (UDP header 8 bytes + payload length) csumudp(14, 34), # UDP checksum /* Payload */ fill('W', 1000), } EOF sudo trafgen -i trafgen.cfg -b3000MB -o veth1 --cpp Fixes: 4ff320361092 ("igc: Add support for XDP_REDIRECT action") Signed-off-by: Florian Kauer Reviewed-by: Maciej Fijalkowski Tested-by: Naama Meir Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igc/igc_main.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index ba8d3fe186ae..81c21a893ede 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -6487,7 +6487,7 @@ static int igc_xdp_xmit(struct net_device *dev, int num_frames, int cpu = smp_processor_id(); struct netdev_queue *nq; struct igc_ring *ring; - int i, drops; + int i, nxmit; if (unlikely(!netif_carrier_ok(dev))) return -ENETDOWN; @@ -6503,16 +6503,15 @@ static int igc_xdp_xmit(struct net_device *dev, int num_frames, /* Avoid transmit queue timeout since we share it with the slow path */ txq_trans_cond_update(nq); - drops = 0; + nxmit = 0; for (i = 0; i < num_frames; i++) { int err; struct xdp_frame *xdpf = frames[i]; err = igc_xdp_init_tx_descriptor(ring, xdpf); - if (err) { - xdp_return_frame_rx_napi(xdpf); - drops++; - } + if (err) + break; + nxmit++; } if (flags & XDP_XMIT_FLUSH) @@ -6520,7 +6519,7 @@ static int igc_xdp_xmit(struct net_device *dev, int num_frames, __netif_tx_unlock(nq); - return num_frames - drops; + return nxmit; } static void igc_trigger_rxtxq_interrupt(struct igc_adapter *adapter, From ba54b1a276a6b69d80649942fe5334d19851443e Mon Sep 17 00:00:00 2001 From: Sasha Neftin Date: Sun, 18 Feb 2024 09:42:21 +0200 Subject: [PATCH 197/225] intel: legacy: Partial revert of field get conversion Refactoring of the field get conversion introduced a regression in the legacy Wake On Lan from a magic packet with i219 devices. Rx address copied not correctly from MAC to PHY with FIELD_GET macro. Fixes: b9a452545075 ("intel: legacy: field get conversion") Suggested-by: Vitaly Lifshits Signed-off-by: Sasha Neftin Tested-by: Naama Meir Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/e1000e/ich8lan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c index a2788fd5f8bb..19e450a5bd31 100644 --- a/drivers/net/ethernet/intel/e1000e/ich8lan.c +++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c @@ -2559,7 +2559,7 @@ void e1000_copy_rx_addrs_to_phy_ich8lan(struct e1000_hw *hw) hw->phy.ops.write_reg_page(hw, BM_RAR_H(i), (u16)(mac_reg & 0xFFFF)); hw->phy.ops.write_reg_page(hw, BM_RAR_CTRL(i), - FIELD_GET(E1000_RAH_AV, mac_reg)); + (u16)((mac_reg & E1000_RAH_AV) >> 16)); } e1000_disable_phy_wakeup_reg_access_bm(hw, &phy_reg); From 685f7d531264599b3f167f1e94bbd22f120e5fab Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 3 Mar 2024 14:48:00 +0000 Subject: [PATCH 198/225] net/ipv6: avoid possible UAF in ip6_route_mpath_notify() syzbot found another use-after-free in ip6_route_mpath_notify() [1] Commit f7225172f25a ("net/ipv6: prevent use after free in ip6_route_mpath_notify") was not able to fix the root cause. We need to defer the fib6_info_release() calls after ip6_route_mpath_notify(), in the cleanup phase. [1] BUG: KASAN: slab-use-after-free in rt6_fill_node+0x1460/0x1ac0 Read of size 4 at addr ffff88809a07fc64 by task syz-executor.2/23037 CPU: 0 PID: 23037 Comm: syz-executor.2 Not tainted 6.8.0-rc4-syzkaller-01035-gea7f3cfaa588 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/25/2024 Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0x1e7/0x2e0 lib/dump_stack.c:106 print_address_description mm/kasan/report.c:377 [inline] print_report+0x167/0x540 mm/kasan/report.c:488 kasan_report+0x142/0x180 mm/kasan/report.c:601 rt6_fill_node+0x1460/0x1ac0 inet6_rt_notify+0x13b/0x290 net/ipv6/route.c:6184 ip6_route_mpath_notify net/ipv6/route.c:5198 [inline] ip6_route_multipath_add net/ipv6/route.c:5404 [inline] inet6_rtm_newroute+0x1d0f/0x2300 net/ipv6/route.c:5517 rtnetlink_rcv_msg+0x885/0x1040 net/core/rtnetlink.c:6597 netlink_rcv_skb+0x1e3/0x430 net/netlink/af_netlink.c:2543 netlink_unicast_kernel net/netlink/af_netlink.c:1341 [inline] netlink_unicast+0x7ea/0x980 net/netlink/af_netlink.c:1367 netlink_sendmsg+0xa3b/0xd70 net/netlink/af_netlink.c:1908 sock_sendmsg_nosec net/socket.c:730 [inline] __sock_sendmsg+0x221/0x270 net/socket.c:745 ____sys_sendmsg+0x525/0x7d0 net/socket.c:2584 ___sys_sendmsg net/socket.c:2638 [inline] __sys_sendmsg+0x2b0/0x3a0 net/socket.c:2667 do_syscall_64+0xf9/0x240 entry_SYSCALL_64_after_hwframe+0x6f/0x77 RIP: 0033:0x7f73dd87dda9 Code: 28 00 00 00 75 05 48 83 c4 28 c3 e8 e1 20 00 00 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b0 ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007f73de6550c8 EFLAGS: 00000246 ORIG_RAX: 000000000000002e RAX: ffffffffffffffda RBX: 00007f73dd9ac050 RCX: 00007f73dd87dda9 RDX: 0000000000000000 RSI: 0000000020000140 RDI: 0000000000000005 RBP: 00007f73dd8ca47a R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 R13: 000000000000006e R14: 00007f73dd9ac050 R15: 00007ffdbdeb7858 Allocated by task 23037: kasan_save_stack mm/kasan/common.c:47 [inline] kasan_save_track+0x3f/0x80 mm/kasan/common.c:68 poison_kmalloc_redzone mm/kasan/common.c:372 [inline] __kasan_kmalloc+0x98/0xb0 mm/kasan/common.c:389 kasan_kmalloc include/linux/kasan.h:211 [inline] __do_kmalloc_node mm/slub.c:3981 [inline] __kmalloc+0x22e/0x490 mm/slub.c:3994 kmalloc include/linux/slab.h:594 [inline] kzalloc include/linux/slab.h:711 [inline] fib6_info_alloc+0x2e/0xf0 net/ipv6/ip6_fib.c:155 ip6_route_info_create+0x445/0x12b0 net/ipv6/route.c:3758 ip6_route_multipath_add net/ipv6/route.c:5298 [inline] inet6_rtm_newroute+0x744/0x2300 net/ipv6/route.c:5517 rtnetlink_rcv_msg+0x885/0x1040 net/core/rtnetlink.c:6597 netlink_rcv_skb+0x1e3/0x430 net/netlink/af_netlink.c:2543 netlink_unicast_kernel net/netlink/af_netlink.c:1341 [inline] netlink_unicast+0x7ea/0x980 net/netlink/af_netlink.c:1367 netlink_sendmsg+0xa3b/0xd70 net/netlink/af_netlink.c:1908 sock_sendmsg_nosec net/socket.c:730 [inline] __sock_sendmsg+0x221/0x270 net/socket.c:745 ____sys_sendmsg+0x525/0x7d0 net/socket.c:2584 ___sys_sendmsg net/socket.c:2638 [inline] __sys_sendmsg+0x2b0/0x3a0 net/socket.c:2667 do_syscall_64+0xf9/0x240 entry_SYSCALL_64_after_hwframe+0x6f/0x77 Freed by task 16: kasan_save_stack mm/kasan/common.c:47 [inline] kasan_save_track+0x3f/0x80 mm/kasan/common.c:68 kasan_save_free_info+0x4e/0x60 mm/kasan/generic.c:640 poison_slab_object+0xa6/0xe0 mm/kasan/common.c:241 __kasan_slab_free+0x34/0x70 mm/kasan/common.c:257 kasan_slab_free include/linux/kasan.h:184 [inline] slab_free_hook mm/slub.c:2121 [inline] slab_free mm/slub.c:4299 [inline] kfree+0x14a/0x380 mm/slub.c:4409 rcu_do_batch kernel/rcu/tree.c:2190 [inline] rcu_core+0xd76/0x1810 kernel/rcu/tree.c:2465 __do_softirq+0x2bb/0x942 kernel/softirq.c:553 Last potentially related work creation: kasan_save_stack+0x3f/0x60 mm/kasan/common.c:47 __kasan_record_aux_stack+0xae/0x100 mm/kasan/generic.c:586 __call_rcu_common kernel/rcu/tree.c:2715 [inline] call_rcu+0x167/0xa80 kernel/rcu/tree.c:2829 fib6_info_release include/net/ip6_fib.h:341 [inline] ip6_route_multipath_add net/ipv6/route.c:5344 [inline] inet6_rtm_newroute+0x114d/0x2300 net/ipv6/route.c:5517 rtnetlink_rcv_msg+0x885/0x1040 net/core/rtnetlink.c:6597 netlink_rcv_skb+0x1e3/0x430 net/netlink/af_netlink.c:2543 netlink_unicast_kernel net/netlink/af_netlink.c:1341 [inline] netlink_unicast+0x7ea/0x980 net/netlink/af_netlink.c:1367 netlink_sendmsg+0xa3b/0xd70 net/netlink/af_netlink.c:1908 sock_sendmsg_nosec net/socket.c:730 [inline] __sock_sendmsg+0x221/0x270 net/socket.c:745 ____sys_sendmsg+0x525/0x7d0 net/socket.c:2584 ___sys_sendmsg net/socket.c:2638 [inline] __sys_sendmsg+0x2b0/0x3a0 net/socket.c:2667 do_syscall_64+0xf9/0x240 entry_SYSCALL_64_after_hwframe+0x6f/0x77 The buggy address belongs to the object at ffff88809a07fc00 which belongs to the cache kmalloc-512 of size 512 The buggy address is located 100 bytes inside of freed 512-byte region [ffff88809a07fc00, ffff88809a07fe00) The buggy address belongs to the physical page: page:ffffea0002681f00 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x9a07c head:ffffea0002681f00 order:2 entire_mapcount:0 nr_pages_mapped:0 pincount:0 flags: 0xfff00000000840(slab|head|node=0|zone=1|lastcpupid=0x7ff) page_type: 0xffffffff() raw: 00fff00000000840 ffff888014c41c80 dead000000000122 0000000000000000 raw: 0000000000000000 0000000080100010 00000001ffffffff 0000000000000000 page dumped because: kasan: bad access detected page_owner tracks the page as allocated page last allocated via order 2, migratetype Unmovable, gfp_mask 0x1d20c0(__GFP_IO|__GFP_FS|__GFP_NOWARN|__GFP_NORETRY|__GFP_COMP|__GFP_NOMEMALLOC|__GFP_HARDWALL), pid 23028, tgid 23027 (syz-executor.4), ts 2340253595219, free_ts 2339107097036 set_page_owner include/linux/page_owner.h:31 [inline] post_alloc_hook+0x1ea/0x210 mm/page_alloc.c:1533 prep_new_page mm/page_alloc.c:1540 [inline] get_page_from_freelist+0x33ea/0x3580 mm/page_alloc.c:3311 __alloc_pages+0x255/0x680 mm/page_alloc.c:4567 __alloc_pages_node include/linux/gfp.h:238 [inline] alloc_pages_node include/linux/gfp.h:261 [inline] alloc_slab_page+0x5f/0x160 mm/slub.c:2190 allocate_slab mm/slub.c:2354 [inline] new_slab+0x84/0x2f0 mm/slub.c:2407 ___slab_alloc+0xd17/0x13e0 mm/slub.c:3540 __slab_alloc mm/slub.c:3625 [inline] __slab_alloc_node mm/slub.c:3678 [inline] slab_alloc_node mm/slub.c:3850 [inline] __do_kmalloc_node mm/slub.c:3980 [inline] __kmalloc+0x2e0/0x490 mm/slub.c:3994 kmalloc include/linux/slab.h:594 [inline] kzalloc include/linux/slab.h:711 [inline] new_dir fs/proc/proc_sysctl.c:956 [inline] get_subdir fs/proc/proc_sysctl.c:1000 [inline] sysctl_mkdir_p fs/proc/proc_sysctl.c:1295 [inline] __register_sysctl_table+0xb30/0x1440 fs/proc/proc_sysctl.c:1376 neigh_sysctl_register+0x416/0x500 net/core/neighbour.c:3859 devinet_sysctl_register+0xaf/0x1f0 net/ipv4/devinet.c:2644 inetdev_init+0x296/0x4d0 net/ipv4/devinet.c:286 inetdev_event+0x338/0x15c0 net/ipv4/devinet.c:1555 notifier_call_chain+0x18f/0x3b0 kernel/notifier.c:93 call_netdevice_notifiers_extack net/core/dev.c:1987 [inline] call_netdevice_notifiers net/core/dev.c:2001 [inline] register_netdevice+0x15b2/0x1a20 net/core/dev.c:10340 br_dev_newlink+0x27/0x100 net/bridge/br_netlink.c:1563 rtnl_newlink_create net/core/rtnetlink.c:3497 [inline] __rtnl_newlink net/core/rtnetlink.c:3717 [inline] rtnl_newlink+0x158f/0x20a0 net/core/rtnetlink.c:3730 page last free pid 11583 tgid 11583 stack trace: reset_page_owner include/linux/page_owner.h:24 [inline] free_pages_prepare mm/page_alloc.c:1140 [inline] free_unref_page_prepare+0x968/0xa90 mm/page_alloc.c:2346 free_unref_page+0x37/0x3f0 mm/page_alloc.c:2486 kasan_depopulate_vmalloc_pte+0x74/0x90 mm/kasan/shadow.c:415 apply_to_pte_range mm/memory.c:2619 [inline] apply_to_pmd_range mm/memory.c:2663 [inline] apply_to_pud_range mm/memory.c:2699 [inline] apply_to_p4d_range mm/memory.c:2735 [inline] __apply_to_page_range+0x8ec/0xe40 mm/memory.c:2769 kasan_release_vmalloc+0x9a/0xb0 mm/kasan/shadow.c:532 __purge_vmap_area_lazy+0x163f/0x1a10 mm/vmalloc.c:1770 drain_vmap_area_work+0x40/0xd0 mm/vmalloc.c:1804 process_one_work kernel/workqueue.c:2633 [inline] process_scheduled_works+0x913/0x1420 kernel/workqueue.c:2706 worker_thread+0xa5f/0x1000 kernel/workqueue.c:2787 kthread+0x2ef/0x390 kernel/kthread.c:388 ret_from_fork+0x4b/0x80 arch/x86/kernel/process.c:147 ret_from_fork_asm+0x1b/0x30 arch/x86/entry/entry_64.S:242 Memory state around the buggy address: ffff88809a07fb00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc ffff88809a07fb80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc >ffff88809a07fc00: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ^ ffff88809a07fc80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff88809a07fd00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb Fixes: 3b1137fe7482 ("net: ipv6: Change notifications for multipath add to RTA_MULTIPATH") Reported-by: syzbot Signed-off-by: Eric Dumazet Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20240303144801.702646-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/ipv6/route.c | 21 +++++++-------------- 1 file changed, 7 insertions(+), 14 deletions(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index ea1dec8448fc..ef815ba583a8 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -5332,19 +5332,7 @@ static int ip6_route_multipath_add(struct fib6_config *cfg, err_nh = NULL; list_for_each_entry(nh, &rt6_nh_list, next) { err = __ip6_ins_rt(nh->fib6_info, info, extack); - fib6_info_release(nh->fib6_info); - if (!err) { - /* save reference to last route successfully inserted */ - rt_last = nh->fib6_info; - - /* save reference to first route for notification */ - if (!rt_notif) - rt_notif = nh->fib6_info; - } - - /* nh->fib6_info is used or freed at this point, reset to NULL*/ - nh->fib6_info = NULL; if (err) { if (replace && nhn) NL_SET_ERR_MSG_MOD(extack, @@ -5352,6 +5340,12 @@ static int ip6_route_multipath_add(struct fib6_config *cfg, err_nh = nh; goto add_errout; } + /* save reference to last route successfully inserted */ + rt_last = nh->fib6_info; + + /* save reference to first route for notification */ + if (!rt_notif) + rt_notif = nh->fib6_info; /* Because each route is added like a single route we remove * these flags after the first nexthop: if there is a collision, @@ -5412,8 +5406,7 @@ add_errout: cleanup: list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) { - if (nh->fib6_info) - fib6_info_release(nh->fib6_info); + fib6_info_release(nh->fib6_info); list_del(&nh->next); kfree(nh); } From e9a8e5a587ca55fec6c58e4881742705d45bee54 Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Thu, 22 Feb 2024 17:41:20 +0200 Subject: [PATCH 199/225] bpf: check bpf_func_state->callback_depth when pruning states When comparing current and cached states verifier should consider bpf_func_state->callback_depth. Current state cannot be pruned against cached state, when current states has more iterations left compared to cached state. Current state has more iterations left when it's callback_depth is smaller. Below is an example illustrating this bug, minimized from mailing list discussion [0] (assume that BPF_F_TEST_STATE_FREQ is set). The example is not a safe program: if loop_cb point (1) is followed by loop_cb point (2), then division by zero is possible at point (4). struct ctx { __u64 a; __u64 b; __u64 c; }; static void loop_cb(int i, struct ctx *ctx) { /* assume that generated code is "fallthrough-first": * if ... == 1 goto * if ... == 2 goto * */ switch (bpf_get_prandom_u32()) { case 1: /* 1 */ ctx->a = 42; return 0; break; case 2: /* 2 */ ctx->b = 42; return 0; break; default: /* 3 */ ctx->c = 42; return 0; break; } } SEC("tc") __failure __flag(BPF_F_TEST_STATE_FREQ) int test(struct __sk_buff *skb) { struct ctx ctx = { 7, 7, 7 }; bpf_loop(2, loop_cb, &ctx, 0); /* 0 */ /* assume generated checks are in-order: .a first */ if (ctx.a == 42 && ctx.b == 42 && ctx.c == 7) asm volatile("r0 /= 0;":::"r0"); /* 4 */ return 0; } Prior to this commit verifier built the following checkpoint tree for this example: .------------------------------------- Checkpoint / State name | .-------------------------------- Code point number | | .---------------------------- Stack state {ctx.a,ctx.b,ctx.c} | | | .------------------- Callback depth in frame #0 v v v v - (0) {7P,7P,7},depth=0 - (3) {7P,7P,7},depth=1 - (0) {7P,7P,42},depth=1 - (3) {7P,7,42},depth=2 - (0) {7P,7,42},depth=2 loop terminates because of depth limit - (4) {7P,7,42},depth=0 predicted false, ctx.a marked precise - (6) exit (a) - (2) {7P,7,42},depth=2 - (0) {7P,42,42},depth=2 loop terminates because of depth limit - (4) {7P,42,42},depth=0 predicted false, ctx.a marked precise - (6) exit (b) - (1) {7P,7P,42},depth=2 - (0) {42P,7P,42},depth=2 loop terminates because of depth limit - (4) {42P,7P,42},depth=0 predicted false, ctx.{a,b} marked precise - (6) exit - (2) {7P,7,7},depth=1 considered safe, pruned using checkpoint (a) (c) - (1) {7P,7P,7},depth=1 considered safe, pruned using checkpoint (b) Here checkpoint (b) has callback_depth of 2, meaning that it would never reach state {42,42,7}. While checkpoint (c) has callback_depth of 1, and thus could yet explore the state {42,42,7} if not pruned prematurely. This commit makes forbids such premature pruning, allowing verifier to explore states sub-tree starting at (c): (c) - (1) {7,7,7P},depth=1 - (0) {42P,7,7P},depth=1 ... - (2) {42,7,7},depth=2 - (0) {42,42,7},depth=2 loop terminates because of depth limit - (4) {42,42,7},depth=0 predicted true, ctx.{a,b,c} marked precise - (5) division by zero [0] https://lore.kernel.org/bpf/9b251840-7cb8-4d17-bd23-1fc8071d8eef@linux.dev/ Fixes: bb124da69c47 ("bpf: keep track of max number of bpf_loop callback iterations") Suggested-by: Yonghong Song Signed-off-by: Eduard Zingerman Acked-by: Yonghong Song Link: https://lore.kernel.org/r/20240222154121.6991-2-eddyz87@gmail.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index b263f093ee76..ddea9567f755 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -16602,6 +16602,9 @@ static bool func_states_equal(struct bpf_verifier_env *env, struct bpf_func_stat { int i; + if (old->callback_depth > cur->callback_depth) + return false; + for (i = 0; i < MAX_BPF_REG; i++) if (!regsafe(env, &old->regs[i], &cur->regs[i], &env->idmap_scratch, exact)) From 5c2bc5e2f81d3344095ae241032dde20a4ea2b48 Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Thu, 22 Feb 2024 17:41:21 +0200 Subject: [PATCH 200/225] selftests/bpf: test case for callback_depth states pruning logic The test case was minimized from mailing list discussion [0]. It is equivalent to the following C program: struct iter_limit_bug_ctx { __u64 a; __u64 b; __u64 c; }; static __naked void iter_limit_bug_cb(void) { switch (bpf_get_prandom_u32()) { case 1: ctx->a = 42; break; case 2: ctx->b = 42; break; default: ctx->c = 42; break; } } int iter_limit_bug(struct __sk_buff *skb) { struct iter_limit_bug_ctx ctx = { 7, 7, 7 }; bpf_loop(2, iter_limit_bug_cb, &ctx, 0); if (ctx.a == 42 && ctx.b == 42 && ctx.c == 7) asm volatile("r1 /= 0;":::"r1"); return 0; } The main idea is that each loop iteration changes one of the state variables in a non-deterministic manner. Hence it is premature to prune the states that have two iterations left comparing them to states with one iteration left. E.g. {{7,7,7}, callback_depth=0} can reach state {42,42,7}, while {{7,7,7}, callback_depth=1} can't. [0] https://lore.kernel.org/bpf/9b251840-7cb8-4d17-bd23-1fc8071d8eef@linux.dev/ Acked-by: Yonghong Song Signed-off-by: Eduard Zingerman Link: https://lore.kernel.org/r/20240222154121.6991-3-eddyz87@gmail.com Signed-off-by: Alexei Starovoitov --- .../bpf/progs/verifier_iterating_callbacks.c | 70 +++++++++++++++++++ 1 file changed, 70 insertions(+) diff --git a/tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c b/tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c index 5905e036e0ea..a955a6358206 100644 --- a/tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c +++ b/tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c @@ -239,4 +239,74 @@ int bpf_loop_iter_limit_nested(void *unused) return 1000 * a + b + c; } +struct iter_limit_bug_ctx { + __u64 a; + __u64 b; + __u64 c; +}; + +static __naked void iter_limit_bug_cb(void) +{ + /* This is the same as C code below, but written + * in assembly to control which branches are fall-through. + * + * switch (bpf_get_prandom_u32()) { + * case 1: ctx->a = 42; break; + * case 2: ctx->b = 42; break; + * default: ctx->c = 42; break; + * } + */ + asm volatile ( + "r9 = r2;" + "call %[bpf_get_prandom_u32];" + "r1 = r0;" + "r2 = 42;" + "r0 = 0;" + "if r1 == 0x1 goto 1f;" + "if r1 == 0x2 goto 2f;" + "*(u64 *)(r9 + 16) = r2;" + "exit;" + "1: *(u64 *)(r9 + 0) = r2;" + "exit;" + "2: *(u64 *)(r9 + 8) = r2;" + "exit;" + : + : __imm(bpf_get_prandom_u32) + : __clobber_all + ); +} + +SEC("tc") +__failure +__flag(BPF_F_TEST_STATE_FREQ) +int iter_limit_bug(struct __sk_buff *skb) +{ + struct iter_limit_bug_ctx ctx = { 7, 7, 7 }; + + bpf_loop(2, iter_limit_bug_cb, &ctx, 0); + + /* This is the same as C code below, + * written in assembly to guarantee checks order. + * + * if (ctx.a == 42 && ctx.b == 42 && ctx.c == 7) + * asm volatile("r1 /= 0;":::"r1"); + */ + asm volatile ( + "r1 = *(u64 *)%[ctx_a];" + "if r1 != 42 goto 1f;" + "r1 = *(u64 *)%[ctx_b];" + "if r1 != 42 goto 1f;" + "r1 = *(u64 *)%[ctx_c];" + "if r1 != 7 goto 1f;" + "r1 /= 0;" + "1:" + : + : [ctx_a]"m"(ctx.a), + [ctx_b]"m"(ctx.b), + [ctx_c]"m"(ctx.c) + : "r1" + ); + return 0; +} + char _license[] SEC("license") = "GPL"; From f267f262815033452195f46c43b572159262f533 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 5 Mar 2024 10:08:28 +0100 Subject: [PATCH 201/225] xdp, bonding: Fix feature flags when there are no slave devs anymore MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 9b0ed890ac2a ("bonding: do not report NETDEV_XDP_ACT_XSK_ZEROCOPY") changed the driver from reporting everything as supported before a device was bonded into having the driver report that no XDP feature is supported until a real device is bonded as it seems to be more truthful given eventually real underlying devices decide what XDP features are supported. The change however did not take into account when all slave devices get removed from the bond device. In this case after 9b0ed890ac2a, the driver keeps reporting a feature mask of 0x77, that is, NETDEV_XDP_ACT_MASK & ~NETDEV_XDP_ACT_XSK_ZEROCOPY whereas it should have reported a feature mask of 0. Fix it by resetting XDP feature flags in the same way as if no XDP program is attached to the bond device. This was uncovered by the XDP bond selftest which let BPF CI fail. After adjusting the starting masks on the latter to 0 instead of NETDEV_XDP_ACT_MASK the test passes again together with this fix. Fixes: 9b0ed890ac2a ("bonding: do not report NETDEV_XDP_ACT_XSK_ZEROCOPY") Signed-off-by: Daniel Borkmann Cc: Magnus Karlsson Cc: Prashant Batra Cc: Toke Høiland-Jørgensen Cc: Jakub Kicinski Reviewed-by: Toke Høiland-Jørgensen Message-ID: <20240305090829.17131-1-daniel@iogearbox.net> Signed-off-by: Alexei Starovoitov --- drivers/net/bonding/bond_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index a11748b8d69b..cd0683bcca03 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1811,7 +1811,7 @@ void bond_xdp_set_features(struct net_device *bond_dev) ASSERT_RTNL(); - if (!bond_xdp_check(bond)) { + if (!bond_xdp_check(bond) || !bond_has_slaves(bond)) { xdp_clear_features_flag(bond_dev); return; } From 0bfc0336e1348883fdab4689f0c8c56458f36dd8 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 5 Mar 2024 10:08:29 +0100 Subject: [PATCH 202/225] selftests/bpf: Fix up xdp bonding test wrt feature flags MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adjust the XDP feature flags for the bond device when no bond slave devices are attached. After 9b0ed890ac2a ("bonding: do not report NETDEV_XDP_ACT_XSK_ZEROCOPY"), the empty bond device must report 0 as flags instead of NETDEV_XDP_ACT_MASK. # ./vmtest.sh -- ./test_progs -t xdp_bond [...] [ 3.983311] bond1 (unregistering): (slave veth1_1): Releasing backup interface [ 3.995434] bond1 (unregistering): Released all slaves [ 4.022311] bond2: (slave veth2_1): Releasing backup interface #507/1 xdp_bonding/xdp_bonding_attach:OK #507/2 xdp_bonding/xdp_bonding_nested:OK #507/3 xdp_bonding/xdp_bonding_features:OK #507/4 xdp_bonding/xdp_bonding_roundrobin:OK #507/5 xdp_bonding/xdp_bonding_activebackup:OK #507/6 xdp_bonding/xdp_bonding_xor_layer2:OK #507/7 xdp_bonding/xdp_bonding_xor_layer23:OK #507/8 xdp_bonding/xdp_bonding_xor_layer34:OK #507/9 xdp_bonding/xdp_bonding_redirect_multi:OK #507 xdp_bonding:OK Summary: 1/9 PASSED, 0 SKIPPED, 0 FAILED [ 4.185255] bond2 (unregistering): Released all slaves [...] Fixes: 9b0ed890ac2a ("bonding: do not report NETDEV_XDP_ACT_XSK_ZEROCOPY") Signed-off-by: Daniel Borkmann Reviewed-by: Toke Høiland-Jørgensen Message-ID: <20240305090829.17131-2-daniel@iogearbox.net> Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/prog_tests/xdp_bonding.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c b/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c index c3b45745cbcc..6d8b54124cb3 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c @@ -511,7 +511,7 @@ static void test_xdp_bonding_features(struct skeletons *skeletons) if (!ASSERT_OK(err, "bond bpf_xdp_query")) goto out; - if (!ASSERT_EQ(query_opts.feature_flags, NETDEV_XDP_ACT_MASK, + if (!ASSERT_EQ(query_opts.feature_flags, 0, "bond query_opts.feature_flags")) goto out; @@ -601,7 +601,7 @@ static void test_xdp_bonding_features(struct skeletons *skeletons) if (!ASSERT_OK(err, "bond bpf_xdp_query")) goto out; - ASSERT_EQ(query_opts.feature_flags, NETDEV_XDP_ACT_MASK, + ASSERT_EQ(query_opts.feature_flags, 0, "bond query_opts.feature_flags"); out: bpf_link__destroy(link); From 2487007aa3b9fafbd2cb14068f49791ce1d7ede5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Tue, 5 Mar 2024 22:31:32 +0100 Subject: [PATCH 203/225] cpumap: Zero-initialise xdp_rxq_info struct before running XDP program MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When running an XDP program that is attached to a cpumap entry, we don't initialise the xdp_rxq_info data structure being used in the xdp_buff that backs the XDP program invocation. Tobias noticed that this leads to random values being returned as the xdp_md->rx_queue_index value for XDP programs running in a cpumap. This means we're basically returning the contents of the uninitialised memory, which is bad. Fix this by zero-initialising the rxq data structure before running the XDP program. Fixes: 9216477449f3 ("bpf: cpumap: Add the possibility to attach an eBPF program to cpumap") Reported-by: Tobias Böhm Signed-off-by: Toke Høiland-Jørgensen Link: https://lore.kernel.org/r/20240305213132.11955-1-toke@redhat.com Signed-off-by: Martin KaFai Lau --- kernel/bpf/cpumap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c index 8a0bb80fe48a..ef82ffc90cbe 100644 --- a/kernel/bpf/cpumap.c +++ b/kernel/bpf/cpumap.c @@ -178,7 +178,7 @@ static int cpu_map_bpf_prog_run_xdp(struct bpf_cpu_map_entry *rcpu, void **frames, int n, struct xdp_cpumap_stats *stats) { - struct xdp_rxq_info rxq; + struct xdp_rxq_info rxq = {}; struct xdp_buff xdp; int i, nframes = 0; From 289e922582af5b4721ba02e86bde4d9ba918158a Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 4 Mar 2024 17:35:32 -0800 Subject: [PATCH 204/225] dpll: move all dpll<>netdev helpers to dpll code Older versions of GCC really want to know the full definition of the type involved in rcu_assign_pointer(). struct dpll_pin is defined in a local header, net/core can't reach it. Move all the netdev <> dpll code into dpll, where the type is known. Otherwise we'd need multiple function calls to jump between the compilation units. This is the same problem the commit under fixes was trying to address, but with rcu_assign_pointer() not rcu_dereference(). Some of the exports are not needed, networking core can't be a module, we only need exports for the helpers used by drivers. Reported-by: Geert Uytterhoeven Link: https://lore.kernel.org/all/35a869c8-52e8-177-1d4d-e57578b99b6@linux-m68k.org/ Fixes: 640f41ed33b5 ("dpll: fix build failure due to rcu_dereference_check() on unknown type") Reviewed-by: Jiri Pirko Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20240305013532.694866-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- Documentation/driver-api/dpll.rst | 2 +- drivers/dpll/dpll_core.c | 25 +++++++++--- drivers/dpll/dpll_netlink.c | 38 ++++++++++++------- drivers/net/ethernet/intel/ice/ice_dpll.c | 4 +- .../net/ethernet/mellanox/mlx5/core/dpll.c | 4 +- include/linux/dpll.h | 26 ++++++------- include/linux/netdevice.h | 4 -- net/core/dev.c | 22 ----------- net/core/rtnetlink.c | 4 +- 9 files changed, 64 insertions(+), 65 deletions(-) diff --git a/Documentation/driver-api/dpll.rst b/Documentation/driver-api/dpll.rst index e3d593841aa7..ea8d16600e16 100644 --- a/Documentation/driver-api/dpll.rst +++ b/Documentation/driver-api/dpll.rst @@ -545,7 +545,7 @@ In such scenario, dpll device input signal shall be also configurable to drive dpll with signal recovered from the PHY netdevice. This is done by exposing a pin to the netdevice - attaching pin to the netdevice itself with -``netdev_dpll_pin_set(struct net_device *dev, struct dpll_pin *dpll_pin)``. +``dpll_netdev_pin_set(struct net_device *dev, struct dpll_pin *dpll_pin)``. Exposed pin id handle ``DPLL_A_PIN_ID`` is then identifiable by the user as it is attached to rtnetlink respond to get ``RTM_NEWLINK`` command in nested attribute ``IFLA_DPLL_PIN``. diff --git a/drivers/dpll/dpll_core.c b/drivers/dpll/dpll_core.c index 241db366b2c7..7f686d179fc9 100644 --- a/drivers/dpll/dpll_core.c +++ b/drivers/dpll/dpll_core.c @@ -42,11 +42,6 @@ struct dpll_pin_registration { void *priv; }; -struct dpll_pin *netdev_dpll_pin(const struct net_device *dev) -{ - return rcu_dereference_rtnl(dev->dpll_pin); -} - struct dpll_device *dpll_device_get_by_id(int id) { if (xa_get_mark(&dpll_device_xa, id, DPLL_REGISTERED)) @@ -513,6 +508,26 @@ err_pin_prop: return ERR_PTR(ret); } +static void dpll_netdev_pin_assign(struct net_device *dev, struct dpll_pin *dpll_pin) +{ + rtnl_lock(); + rcu_assign_pointer(dev->dpll_pin, dpll_pin); + rtnl_unlock(); +} + +void dpll_netdev_pin_set(struct net_device *dev, struct dpll_pin *dpll_pin) +{ + WARN_ON(!dpll_pin); + dpll_netdev_pin_assign(dev, dpll_pin); +} +EXPORT_SYMBOL(dpll_netdev_pin_set); + +void dpll_netdev_pin_clear(struct net_device *dev) +{ + dpll_netdev_pin_assign(dev, NULL); +} +EXPORT_SYMBOL(dpll_netdev_pin_clear); + /** * dpll_pin_get - find existing or create new dpll pin * @clock_id: clock_id of creator diff --git a/drivers/dpll/dpll_netlink.c b/drivers/dpll/dpll_netlink.c index 4ca9ad16cd95..b57355e0c214 100644 --- a/drivers/dpll/dpll_netlink.c +++ b/drivers/dpll/dpll_netlink.c @@ -8,6 +8,7 @@ */ #include #include +#include #include #include "dpll_core.h" #include "dpll_netlink.h" @@ -47,18 +48,6 @@ dpll_msg_add_dev_parent_handle(struct sk_buff *msg, u32 id) return 0; } -/** - * dpll_msg_pin_handle_size - get size of pin handle attribute for given pin - * @pin: pin pointer - * - * Return: byte size of pin handle attribute for given pin. - */ -size_t dpll_msg_pin_handle_size(struct dpll_pin *pin) -{ - return pin ? nla_total_size(4) : 0; /* DPLL_A_PIN_ID */ -} -EXPORT_SYMBOL_GPL(dpll_msg_pin_handle_size); - /** * dpll_msg_add_pin_handle - attach pin handle attribute to a given message * @msg: pointer to sk_buff message to attach a pin handle @@ -68,7 +57,7 @@ EXPORT_SYMBOL_GPL(dpll_msg_pin_handle_size); * * 0 - success * * -EMSGSIZE - no space in message to attach pin handle */ -int dpll_msg_add_pin_handle(struct sk_buff *msg, struct dpll_pin *pin) +static int dpll_msg_add_pin_handle(struct sk_buff *msg, struct dpll_pin *pin) { if (!pin) return 0; @@ -76,7 +65,28 @@ int dpll_msg_add_pin_handle(struct sk_buff *msg, struct dpll_pin *pin) return -EMSGSIZE; return 0; } -EXPORT_SYMBOL_GPL(dpll_msg_add_pin_handle); + +static struct dpll_pin *dpll_netdev_pin(const struct net_device *dev) +{ + return rcu_dereference_rtnl(dev->dpll_pin); +} + +/** + * dpll_netdev_pin_handle_size - get size of pin handle attribute of a netdev + * @dev: netdev from which to get the pin + * + * Return: byte size of pin handle attribute, or 0 if @dev has no pin. + */ +size_t dpll_netdev_pin_handle_size(const struct net_device *dev) +{ + return dpll_netdev_pin(dev) ? nla_total_size(4) : 0; /* DPLL_A_PIN_ID */ +} + +int dpll_netdev_add_pin_handle(struct sk_buff *msg, + const struct net_device *dev) +{ + return dpll_msg_add_pin_handle(msg, dpll_netdev_pin(dev)); +} static int dpll_msg_add_mode(struct sk_buff *msg, struct dpll_device *dpll, diff --git a/drivers/net/ethernet/intel/ice/ice_dpll.c b/drivers/net/ethernet/intel/ice/ice_dpll.c index adfa1f2a80a6..c59e972dbaae 100644 --- a/drivers/net/ethernet/intel/ice/ice_dpll.c +++ b/drivers/net/ethernet/intel/ice/ice_dpll.c @@ -1597,7 +1597,7 @@ static void ice_dpll_deinit_rclk_pin(struct ice_pf *pf) } if (WARN_ON_ONCE(!vsi || !vsi->netdev)) return; - netdev_dpll_pin_clear(vsi->netdev); + dpll_netdev_pin_clear(vsi->netdev); dpll_pin_put(rclk->pin); } @@ -1641,7 +1641,7 @@ ice_dpll_init_rclk_pins(struct ice_pf *pf, struct ice_dpll_pin *pin, } if (WARN_ON((!vsi || !vsi->netdev))) return -EINVAL; - netdev_dpll_pin_set(vsi->netdev, pf->dplls.rclk.pin); + dpll_netdev_pin_set(vsi->netdev, pf->dplls.rclk.pin); return 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/dpll.c b/drivers/net/ethernet/mellanox/mlx5/core/dpll.c index 928bf24d4b12..d74a5aaf4268 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/dpll.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/dpll.c @@ -261,7 +261,7 @@ static void mlx5_dpll_netdev_dpll_pin_set(struct mlx5_dpll *mdpll, { if (mdpll->tracking_netdev) return; - netdev_dpll_pin_set(netdev, mdpll->dpll_pin); + dpll_netdev_pin_set(netdev, mdpll->dpll_pin); mdpll->tracking_netdev = netdev; } @@ -269,7 +269,7 @@ static void mlx5_dpll_netdev_dpll_pin_clear(struct mlx5_dpll *mdpll) { if (!mdpll->tracking_netdev) return; - netdev_dpll_pin_clear(mdpll->tracking_netdev); + dpll_netdev_pin_clear(mdpll->tracking_netdev); mdpll->tracking_netdev = NULL; } diff --git a/include/linux/dpll.h b/include/linux/dpll.h index c60591308ae8..e37344f6a231 100644 --- a/include/linux/dpll.h +++ b/include/linux/dpll.h @@ -122,15 +122,24 @@ struct dpll_pin_properties { }; #if IS_ENABLED(CONFIG_DPLL) -size_t dpll_msg_pin_handle_size(struct dpll_pin *pin); -int dpll_msg_add_pin_handle(struct sk_buff *msg, struct dpll_pin *pin); +void dpll_netdev_pin_set(struct net_device *dev, struct dpll_pin *dpll_pin); +void dpll_netdev_pin_clear(struct net_device *dev); + +size_t dpll_netdev_pin_handle_size(const struct net_device *dev); +int dpll_netdev_add_pin_handle(struct sk_buff *msg, + const struct net_device *dev); #else -static inline size_t dpll_msg_pin_handle_size(struct dpll_pin *pin) +static inline void +dpll_netdev_pin_set(struct net_device *dev, struct dpll_pin *dpll_pin) { } +static inline void dpll_netdev_pin_clear(struct net_device *dev) { } + +static inline size_t dpll_netdev_pin_handle_size(const struct net_device *dev) { return 0; } -static inline int dpll_msg_add_pin_handle(struct sk_buff *msg, struct dpll_pin *pin) +static inline int +dpll_netdev_add_pin_handle(struct sk_buff *msg, const struct net_device *dev) { return 0; } @@ -169,13 +178,4 @@ int dpll_device_change_ntf(struct dpll_device *dpll); int dpll_pin_change_ntf(struct dpll_pin *pin); -#if !IS_ENABLED(CONFIG_DPLL) -static inline struct dpll_pin *netdev_dpll_pin(const struct net_device *dev) -{ - return NULL; -} -#else -struct dpll_pin *netdev_dpll_pin(const struct net_device *dev); -#endif - #endif diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 735a9386fcf8..78a09af89e39 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -79,8 +79,6 @@ struct xdp_buff; struct xdp_frame; struct xdp_metadata_ops; struct xdp_md; -/* DPLL specific */ -struct dpll_pin; typedef u32 xdp_features_t; @@ -4042,8 +4040,6 @@ int dev_get_mac_address(struct sockaddr *sa, struct net *net, char *dev_name); int dev_get_port_parent_id(struct net_device *dev, struct netdev_phys_item_id *ppid, bool recurse); bool netdev_port_same_parent_id(struct net_device *a, struct net_device *b); -void netdev_dpll_pin_set(struct net_device *dev, struct dpll_pin *dpll_pin); -void netdev_dpll_pin_clear(struct net_device *dev); struct sk_buff *validate_xmit_skb_list(struct sk_buff *skb, struct net_device *dev, bool *again); struct sk_buff *dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, diff --git a/net/core/dev.c b/net/core/dev.c index 0230391c78f7..76e6438f4858 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -9074,28 +9074,6 @@ bool netdev_port_same_parent_id(struct net_device *a, struct net_device *b) } EXPORT_SYMBOL(netdev_port_same_parent_id); -static void netdev_dpll_pin_assign(struct net_device *dev, struct dpll_pin *dpll_pin) -{ -#if IS_ENABLED(CONFIG_DPLL) - rtnl_lock(); - rcu_assign_pointer(dev->dpll_pin, dpll_pin); - rtnl_unlock(); -#endif -} - -void netdev_dpll_pin_set(struct net_device *dev, struct dpll_pin *dpll_pin) -{ - WARN_ON(!dpll_pin); - netdev_dpll_pin_assign(dev, dpll_pin); -} -EXPORT_SYMBOL(netdev_dpll_pin_set); - -void netdev_dpll_pin_clear(struct net_device *dev) -{ - netdev_dpll_pin_assign(dev, NULL); -} -EXPORT_SYMBOL(netdev_dpll_pin_clear); - /** * dev_change_proto_down - set carrier according to proto_down. * diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index ae86f751efc3..bd50e9fe3234 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1057,7 +1057,7 @@ static size_t rtnl_dpll_pin_size(const struct net_device *dev) { size_t size = nla_total_size(0); /* nest IFLA_DPLL_PIN */ - size += dpll_msg_pin_handle_size(netdev_dpll_pin(dev)); + size += dpll_netdev_pin_handle_size(dev); return size; } @@ -1792,7 +1792,7 @@ static int rtnl_fill_dpll_pin(struct sk_buff *skb, if (!dpll_pin_nest) return -EMSGSIZE; - ret = dpll_msg_add_pin_handle(skb, netdev_dpll_pin(dev)); + ret = dpll_netdev_add_pin_handle(skb, dev); if (ret < 0) goto nest_cancel; From b7fb7729c94fb2d23c79ff44f7a2da089c92d81c Mon Sep 17 00:00:00 2001 From: "Tobias Jakobi (Compleo)" Date: Mon, 4 Mar 2024 16:41:35 +0100 Subject: [PATCH 205/225] net: dsa: microchip: fix register write order in ksz8_ind_write8() This bug was noticed while re-implementing parts of the kernel driver in userspace using spidev. The goal was to enable some of the errata workarounds that Microchip describes in their errata sheet [1]. Both the errata sheet and the regular datasheet of e.g. the KSZ8795 imply that you need to do this for indirect register accesses: - write a 16-bit value to a control register pair (this value consists of the indirect register table, and the offset inside the table) - either read or write an 8-bit value from the data storage register (indicated by REG_IND_BYTE in the kernel) The current implementation has the order swapped. It can be proven, by reading back some indirect register with known content (the EEE register modified in ksz8_handle_global_errata() is one of these), that this implementation does not work. Private discussion with Oleksij Rempel of Pengutronix has revealed that the workaround was apparantly never tested on actual hardware. [1] https://ww1.microchip.com/downloads/aemDocuments/documents/OTH/ProductDocuments/Errata/KSZ87xx-Errata-DS80000687C.pdf Signed-off-by: Tobias Jakobi (Compleo) Reviewed-by: Oleksij Rempel Fixes: 7b6e6235b664 ("net: dsa: microchip: ksz8795: handle eee specif erratum") Link: https://lore.kernel.org/r/20240304154135.161332-1-tobias.jakobi.compleo@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/microchip/ksz8795.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/dsa/microchip/ksz8795.c b/drivers/net/dsa/microchip/ksz8795.c index 61b71bcfe396..c3da97abce20 100644 --- a/drivers/net/dsa/microchip/ksz8795.c +++ b/drivers/net/dsa/microchip/ksz8795.c @@ -49,9 +49,9 @@ static int ksz8_ind_write8(struct ksz_device *dev, u8 table, u16 addr, u8 data) mutex_lock(&dev->alu_mutex); ctrl_addr = IND_ACC_TABLE(table) | addr; - ret = ksz_write8(dev, regs[REG_IND_BYTE], data); + ret = ksz_write16(dev, regs[REG_IND_CTRL_0], ctrl_addr); if (!ret) - ret = ksz_write16(dev, regs[REG_IND_CTRL_0], ctrl_addr); + ret = ksz_write8(dev, regs[REG_IND_BYTE], data); mutex_unlock(&dev->alu_mutex); From a50026bdb867c8caf9d29e18f9fe9e1390312619 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Tue, 5 Mar 2024 21:33:36 +0800 Subject: [PATCH 206/225] iov_iter: get rid of 'copy_mc' flag This flag is only set by one single user: the magical core dumping code that looks up user pages one by one, and then writes them out using their kernel addresses (by using a BVEC_ITER). That actually ends up being a huge problem, because while we do use copy_mc_to_kernel() for this case and it is able to handle the possible machine checks involved, nothing else is really ready to handle the failures caused by the machine check. In particular, as reported by Tong Tiangen, we don't actually support fault_in_iov_iter_readable() on a machine check area. As a result, the usual logic for writing things to a file under a filesystem lock, which involves doing a copy with page faults disabled and then if that fails trying to fault pages in without holding the locks with fault_in_iov_iter_readable() does not work at all. We could decide to always just make the MC copy "succeed" (and filling the destination with zeroes), and that would then create a core dump file that just ignores any machine checks. But honestly, this single special case has been problematic before, and means that all the normal iov_iter code ends up slightly more complex and slower. See for example commit c9eec08bac96 ("iov_iter: Don't deal with iter->copy_mc in memcpy_from_iter_mc()") where David Howells re-organized the code just to avoid having to check the 'copy_mc' flags inside the inner iov_iter loops. So considering that we have exactly one user, and that one user is a non-critical special case that doesn't actually ever trigger in real life (Tong found this with manual error injection), the sane solution is to just decide that the onus on handling the machine check lines on that user instead. Ergo, do the copy_mc_to_kernel() in the core dump logic itself, copying the user data to a stable kernel page before writing it out. Fixes: f1982740f5e7 ("iov_iter: Convert iterate*() to inline funcs") Signed-off-by: Linus Torvalds Signed-off-by: Tong Tiangen Link: https://lore.kernel.org/r/20240305133336.3804360-1-tongtiangen@huawei.com Link: https://lore.kernel.org/all/4e80924d-9c85-f13a-722a-6a5d2b1c225a@huawei.com/ Tested-by: David Howells Reviewed-by: David Howells Reviewed-by: Jens Axboe Reported-by: Tong Tiangen Signed-off-by: Christian Brauner --- fs/coredump.c | 45 ++++++++++++++++++++++++++++++++++++++++++--- include/linux/uio.h | 16 ---------------- lib/iov_iter.c | 23 ----------------------- 3 files changed, 42 insertions(+), 42 deletions(-) diff --git a/fs/coredump.c b/fs/coredump.c index f258c17c1841..be6403b4b14b 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -872,6 +872,9 @@ static int dump_emit_page(struct coredump_params *cprm, struct page *page) loff_t pos; ssize_t n; + if (!page) + return 0; + if (cprm->to_skip) { if (!__dump_skip(cprm, cprm->to_skip)) return 0; @@ -884,7 +887,6 @@ static int dump_emit_page(struct coredump_params *cprm, struct page *page) pos = file->f_pos; bvec_set_page(&bvec, page, PAGE_SIZE, 0); iov_iter_bvec(&iter, ITER_SOURCE, &bvec, 1, PAGE_SIZE); - iov_iter_set_copy_mc(&iter); n = __kernel_write_iter(cprm->file, &iter, &pos); if (n != PAGE_SIZE) return 0; @@ -895,10 +897,44 @@ static int dump_emit_page(struct coredump_params *cprm, struct page *page) return 1; } +/* + * If we might get machine checks from kernel accesses during the + * core dump, let's get those errors early rather than during the + * IO. This is not performance-critical enough to warrant having + * all the machine check logic in the iovec paths. + */ +#ifdef copy_mc_to_kernel + +#define dump_page_alloc() alloc_page(GFP_KERNEL) +#define dump_page_free(x) __free_page(x) +static struct page *dump_page_copy(struct page *src, struct page *dst) +{ + void *buf = kmap_local_page(src); + size_t left = copy_mc_to_kernel(page_address(dst), buf, PAGE_SIZE); + kunmap_local(buf); + return left ? NULL : dst; +} + +#else + +/* We just want to return non-NULL; it's never used. */ +#define dump_page_alloc() ERR_PTR(-EINVAL) +#define dump_page_free(x) ((void)(x)) +static inline struct page *dump_page_copy(struct page *src, struct page *dst) +{ + return src; +} +#endif + int dump_user_range(struct coredump_params *cprm, unsigned long start, unsigned long len) { unsigned long addr; + struct page *dump_page; + + dump_page = dump_page_alloc(); + if (!dump_page) + return 0; for (addr = start; addr < start + len; addr += PAGE_SIZE) { struct page *page; @@ -912,14 +948,17 @@ int dump_user_range(struct coredump_params *cprm, unsigned long start, */ page = get_dump_page(addr); if (page) { - int stop = !dump_emit_page(cprm, page); + int stop = !dump_emit_page(cprm, dump_page_copy(page, dump_page)); put_page(page); - if (stop) + if (stop) { + dump_page_free(dump_page); return 0; + } } else { dump_skip(cprm, PAGE_SIZE); } } + dump_page_free(dump_page); return 1; } #endif diff --git a/include/linux/uio.h b/include/linux/uio.h index bea9c89922d9..00cebe2b70de 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -40,7 +40,6 @@ struct iov_iter_state { struct iov_iter { u8 iter_type; - bool copy_mc; bool nofault; bool data_source; size_t iov_offset; @@ -248,22 +247,8 @@ size_t _copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i); #ifdef CONFIG_ARCH_HAS_COPY_MC size_t _copy_mc_to_iter(const void *addr, size_t bytes, struct iov_iter *i); -static inline void iov_iter_set_copy_mc(struct iov_iter *i) -{ - i->copy_mc = true; -} - -static inline bool iov_iter_is_copy_mc(const struct iov_iter *i) -{ - return i->copy_mc; -} #else #define _copy_mc_to_iter _copy_to_iter -static inline void iov_iter_set_copy_mc(struct iov_iter *i) { } -static inline bool iov_iter_is_copy_mc(const struct iov_iter *i) -{ - return false; -} #endif size_t iov_iter_zero(size_t bytes, struct iov_iter *); @@ -355,7 +340,6 @@ static inline void iov_iter_ubuf(struct iov_iter *i, unsigned int direction, WARN_ON(direction & ~(READ | WRITE)); *i = (struct iov_iter) { .iter_type = ITER_UBUF, - .copy_mc = false, .data_source = direction, .ubuf = buf, .count = count, diff --git a/lib/iov_iter.c b/lib/iov_iter.c index e0aa6b440ca5..cf2eb2b2f983 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -166,7 +166,6 @@ void iov_iter_init(struct iov_iter *i, unsigned int direction, WARN_ON(direction & ~(READ | WRITE)); *i = (struct iov_iter) { .iter_type = ITER_IOVEC, - .copy_mc = false, .nofault = false, .data_source = direction, .__iov = iov, @@ -244,27 +243,9 @@ size_t _copy_mc_to_iter(const void *addr, size_t bytes, struct iov_iter *i) EXPORT_SYMBOL_GPL(_copy_mc_to_iter); #endif /* CONFIG_ARCH_HAS_COPY_MC */ -static __always_inline -size_t memcpy_from_iter_mc(void *iter_from, size_t progress, - size_t len, void *to, void *priv2) -{ - return copy_mc_to_kernel(to + progress, iter_from, len); -} - -static size_t __copy_from_iter_mc(void *addr, size_t bytes, struct iov_iter *i) -{ - if (unlikely(i->count < bytes)) - bytes = i->count; - if (unlikely(!bytes)) - return 0; - return iterate_bvec(i, bytes, addr, NULL, memcpy_from_iter_mc); -} - static __always_inline size_t __copy_from_iter(void *addr, size_t bytes, struct iov_iter *i) { - if (unlikely(iov_iter_is_copy_mc(i))) - return __copy_from_iter_mc(addr, bytes, i); return iterate_and_advance(i, bytes, addr, copy_from_user_iter, memcpy_from_iter); } @@ -633,7 +614,6 @@ void iov_iter_kvec(struct iov_iter *i, unsigned int direction, WARN_ON(direction & ~(READ | WRITE)); *i = (struct iov_iter){ .iter_type = ITER_KVEC, - .copy_mc = false, .data_source = direction, .kvec = kvec, .nr_segs = nr_segs, @@ -650,7 +630,6 @@ void iov_iter_bvec(struct iov_iter *i, unsigned int direction, WARN_ON(direction & ~(READ | WRITE)); *i = (struct iov_iter){ .iter_type = ITER_BVEC, - .copy_mc = false, .data_source = direction, .bvec = bvec, .nr_segs = nr_segs, @@ -679,7 +658,6 @@ void iov_iter_xarray(struct iov_iter *i, unsigned int direction, BUG_ON(direction & ~1); *i = (struct iov_iter) { .iter_type = ITER_XARRAY, - .copy_mc = false, .data_source = direction, .xarray = xarray, .xarray_start = start, @@ -703,7 +681,6 @@ void iov_iter_discard(struct iov_iter *i, unsigned int direction, size_t count) BUG_ON(direction != READ); *i = (struct iov_iter){ .iter_type = ITER_DISCARD, - .copy_mc = false, .data_source = false, .count = count, .iov_offset = 0 From c055fc00c07be1f0df7375ab0036cebd1106ed38 Mon Sep 17 00:00:00 2001 From: Edward Adam Davis Date: Tue, 5 Mar 2024 08:13:08 +0800 Subject: [PATCH 207/225] net/rds: fix WARNING in rds_conn_connect_if_down If connection isn't established yet, get_mr() will fail, trigger connection after get_mr(). Fixes: 584a8279a44a ("RDS: RDMA: return appropriate error on rdma map failures") Reported-and-tested-by: syzbot+d4faee732755bba9838e@syzkaller.appspotmail.com Signed-off-by: Edward Adam Davis Signed-off-by: David S. Miller --- net/rds/rdma.c | 3 +++ net/rds/send.c | 6 +----- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/net/rds/rdma.c b/net/rds/rdma.c index fba82d36593a..a4e3c5de998b 100644 --- a/net/rds/rdma.c +++ b/net/rds/rdma.c @@ -301,6 +301,9 @@ static int __rds_rdma_map(struct rds_sock *rs, struct rds_get_mr_args *args, kfree(sg); } ret = PTR_ERR(trans_private); + /* Trigger connection so that its ready for the next retry */ + if (ret == -ENODEV) + rds_conn_connect_if_down(cp->cp_conn); goto out; } diff --git a/net/rds/send.c b/net/rds/send.c index 5e57a1581dc6..2899def23865 100644 --- a/net/rds/send.c +++ b/net/rds/send.c @@ -1313,12 +1313,8 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len) /* Parse any control messages the user may have included. */ ret = rds_cmsg_send(rs, rm, msg, &allocated_mr, &vct); - if (ret) { - /* Trigger connection so that its ready for the next retry */ - if (ret == -EAGAIN) - rds_conn_connect_if_down(conn); + if (ret) goto out; - } if (rm->rdma.op_active && !conn->c_trans->xmit_rdma) { printk_ratelimited(KERN_NOTICE "rdma_op %p conn xmit_rdma %p\n", From 16603605b667b70da974bea8216c93e7db043bf1 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 1 Mar 2024 00:11:10 +0100 Subject: [PATCH 208/225] netfilter: nf_tables: disallow anonymous set with timeout flag Anonymous sets are never used with timeout from userspace, reject this. Exception to this rule is NFT_SET_EVAL to ensure legacy meters still work. Cc: stable@vger.kernel.org Fixes: 761da2935d6e ("netfilter: nf_tables: add set timeout API support") Reported-by: lonial con Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 7e938c7397dd..bd21067f25cf 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -5001,6 +5001,9 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info, if ((flags & (NFT_SET_EVAL | NFT_SET_OBJECT)) == (NFT_SET_EVAL | NFT_SET_OBJECT)) return -EOPNOTSUPP; + if ((flags & (NFT_SET_ANONYMOUS | NFT_SET_TIMEOUT | NFT_SET_EVAL)) == + (NFT_SET_ANONYMOUS | NFT_SET_TIMEOUT)) + return -EOPNOTSUPP; } desc.dtype = 0; From 5f4fc4bd5cddb4770ab120ce44f02695c4505562 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 1 Mar 2024 01:04:11 +0100 Subject: [PATCH 209/225] netfilter: nf_tables: reject constant set with timeout This set combination is weird: it allows for elements to be added/deleted, but once bound to the rule it cannot be updated anymore. Eventually, all elements expire, leading to an empty set which cannot be updated anymore. Reject this flags combination. Cc: stable@vger.kernel.org Fixes: 761da2935d6e ("netfilter: nf_tables: add set timeout API support") Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index bd21067f25cf..fb07455143a5 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -5004,6 +5004,9 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info, if ((flags & (NFT_SET_ANONYMOUS | NFT_SET_TIMEOUT | NFT_SET_EVAL)) == (NFT_SET_ANONYMOUS | NFT_SET_TIMEOUT)) return -EOPNOTSUPP; + if ((flags & (NFT_SET_CONSTANT | NFT_SET_TIMEOUT)) == + (NFT_SET_CONSTANT | NFT_SET_TIMEOUT)) + return -EOPNOTSUPP; } desc.dtype = 0; From 99993789966a6eb4f1295193dc543686899892d3 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 1 Mar 2024 13:38:15 +0100 Subject: [PATCH 210/225] netfilter: nft_ct: fix l3num expectations with inet pseudo family Following is rejected but should be allowed: table inet t { ct expectation exp1 { [..] l3proto ip Valid combos are: table ip t, l3proto ip table ip6 t, l3proto ip6 table inet t, l3proto ip OR l3proto ip6 Disallow inet pseudeo family, the l3num must be a on-wire protocol known to conntrack. Retain NFPROTO_INET case to make it clear its rejected intentionally rather as oversight. Fixes: 8059918a1377 ("netfilter: nft_ct: sanitize layer 3 and 4 protocol number in custom expectations") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_ct.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index bfd3e5a14dab..255640013ab8 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -1256,14 +1256,13 @@ static int nft_ct_expect_obj_init(const struct nft_ctx *ctx, switch (priv->l3num) { case NFPROTO_IPV4: case NFPROTO_IPV6: - if (priv->l3num != ctx->family) - return -EINVAL; + if (priv->l3num == ctx->family || ctx->family == NFPROTO_INET) + break; - fallthrough; - case NFPROTO_INET: - break; + return -EINVAL; + case NFPROTO_INET: /* tuple.src.l3num supports NFPROTO_IPV4/6 only */ default: - return -EOPNOTSUPP; + return -EAFNOSUPPORT; } priv->l4proto = nla_get_u8(tb[NFTA_CT_EXPECT_L4PROTO]); From 552705a3650bbf46a22b1adedc1b04181490fc36 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 4 Mar 2024 14:22:12 +0100 Subject: [PATCH 211/225] netfilter: nf_tables: mark set as dead when unbinding anonymous set with timeout While the rhashtable set gc runs asynchronously, a race allows it to collect elements from anonymous sets with timeouts while it is being released from the commit path. Mingi Cho originally reported this issue in a different path in 6.1.x with a pipapo set with low timeouts which is not possible upstream since 7395dfacfff6 ("netfilter: nf_tables: use timestamp to check for set element timeout"). Fix this by setting on the dead flag for anonymous sets to skip async gc in this case. According to 08e4c8c5919f ("netfilter: nf_tables: mark newset as dead on transaction abort"), Florian plans to accelerate abort path by releasing objects via workqueue, therefore, this sets on the dead flag for abort path too. Cc: stable@vger.kernel.org Fixes: 5f68718b34a5 ("netfilter: nf_tables: GC transaction API to avoid race with control plane") Reported-by: Mingi Cho Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index fb07455143a5..1683dc196b59 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -5430,6 +5430,7 @@ static void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set, if (list_empty(&set->bindings) && nft_set_is_anonymous(set)) { list_del_rcu(&set->list); + set->dead = 1; if (event) nf_tables_set_notify(ctx, set, NFT_MSG_DELSET, GFP_KERNEL); From 767146637efc528b5e3d31297df115e85a2fd362 Mon Sep 17 00:00:00 2001 From: Lena Wang Date: Tue, 5 Mar 2024 11:38:55 +0000 Subject: [PATCH 212/225] netfilter: nf_conntrack_h323: Add protection for bmp length out of range UBSAN load reports an exception of BRK#5515 SHIFT_ISSUE:Bitwise shifts that are out of bounds for their data type. vmlinux get_bitmap(b=75) + 712 vmlinux decode_seq(bs=0xFFFFFFD008037000, f=0xFFFFFFD008037018, level=134443100) + 1956 vmlinux decode_choice(base=0xFFFFFFD0080370F0, level=23843636) + 1216 vmlinux decode_seq(f=0xFFFFFFD0080371A8, level=134443500) + 812 vmlinux decode_choice(base=0xFFFFFFD008037280, level=0) + 1216 vmlinux DecodeRasMessage() + 304 vmlinux ras_help() + 684 vmlinux nf_confirm() + 188 Due to abnormal data in skb->data, the extension bitmap length exceeds 32 when decoding ras message then uses the length to make a shift operation. It will change into negative after several loop. UBSAN load could detect a negative shift as an undefined behaviour and reports exception. So we add the protection to avoid the length exceeding 32. Or else it will return out of range error and stop decoding. Fixes: 5e35941d9901 ("[NETFILTER]: Add H.323 conntrack/NAT helper") Signed-off-by: Lena Wang Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_h323_asn1.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/netfilter/nf_conntrack_h323_asn1.c b/net/netfilter/nf_conntrack_h323_asn1.c index e697a824b001..540d97715bd2 100644 --- a/net/netfilter/nf_conntrack_h323_asn1.c +++ b/net/netfilter/nf_conntrack_h323_asn1.c @@ -533,6 +533,8 @@ static int decode_seq(struct bitstr *bs, const struct field_t *f, /* Get fields bitmap */ if (nf_h323_error_boundary(bs, 0, f->sz)) return H323_ERROR_BOUND; + if (f->sz > 32) + return H323_ERROR_RANGE; bmp = get_bitmap(bs, f->sz); if (base) *(unsigned int *)base = bmp; @@ -589,6 +591,8 @@ static int decode_seq(struct bitstr *bs, const struct field_t *f, bmp2_len = get_bits(bs, 7) + 1; if (nf_h323_error_boundary(bs, 0, bmp2_len)) return H323_ERROR_BOUND; + if (bmp2_len > 32) + return H323_ERROR_RANGE; bmp2 = get_bitmap(bs, bmp2_len); bmp |= bmp2 >> f->sz; if (base) From 958d6145a6d9ba9e075c921aead8753fb91c9101 Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Mon, 4 Mar 2024 16:20:35 +0800 Subject: [PATCH 213/225] netrom: Fix a data-race around sysctl_netrom_default_path_quality We need to protect the reader reading sysctl_netrom_default_path_quality because the value can be changed concurrently. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Jason Xing Signed-off-by: Paolo Abeni --- net/netrom/nr_route.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netrom/nr_route.c b/net/netrom/nr_route.c index baea3cbd76ca..6f709fdffc11 100644 --- a/net/netrom/nr_route.c +++ b/net/netrom/nr_route.c @@ -153,7 +153,7 @@ static int __must_check nr_add_node(ax25_address *nr, const char *mnemonic, nr_neigh->digipeat = NULL; nr_neigh->ax25 = NULL; nr_neigh->dev = dev; - nr_neigh->quality = sysctl_netrom_default_path_quality; + nr_neigh->quality = READ_ONCE(sysctl_netrom_default_path_quality); nr_neigh->locked = 0; nr_neigh->count = 0; nr_neigh->number = nr_neigh_no++; From cfd9f4a740f772298308b2e6070d2c744fb5cf79 Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Mon, 4 Mar 2024 16:20:36 +0800 Subject: [PATCH 214/225] netrom: Fix a data-race around sysctl_netrom_obsolescence_count_initialiser We need to protect the reader reading the sysctl value because the value can be changed concurrently. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Jason Xing Signed-off-by: Paolo Abeni --- net/netrom/nr_route.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netrom/nr_route.c b/net/netrom/nr_route.c index 6f709fdffc11..b8ddd8048f35 100644 --- a/net/netrom/nr_route.c +++ b/net/netrom/nr_route.c @@ -766,7 +766,7 @@ int nr_route_frame(struct sk_buff *skb, ax25_cb *ax25) if (ax25 != NULL) { ret = nr_add_node(nr_src, "", &ax25->dest_addr, ax25->digipeat, ax25->ax25_dev->dev, 0, - sysctl_netrom_obsolescence_count_initialiser); + READ_ONCE(sysctl_netrom_obsolescence_count_initialiser)); if (ret) return ret; } From 119cae5ea3f9e35cdada8e572cc067f072fa825a Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Mon, 4 Mar 2024 16:20:37 +0800 Subject: [PATCH 215/225] netrom: Fix data-races around sysctl_netrom_network_ttl_initialiser We need to protect the reader reading the sysctl value because the value can be changed concurrently. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Jason Xing Signed-off-by: Paolo Abeni --- net/netrom/nr_dev.c | 2 +- net/netrom/nr_out.c | 2 +- net/netrom/nr_subr.c | 5 +++-- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/net/netrom/nr_dev.c b/net/netrom/nr_dev.c index 3aaac4a22b38..2c34389c3ce6 100644 --- a/net/netrom/nr_dev.c +++ b/net/netrom/nr_dev.c @@ -81,7 +81,7 @@ static int nr_header(struct sk_buff *skb, struct net_device *dev, buff[6] |= AX25_SSSID_SPARE; buff += AX25_ADDR_LEN; - *buff++ = sysctl_netrom_network_ttl_initialiser; + *buff++ = READ_ONCE(sysctl_netrom_network_ttl_initialiser); *buff++ = NR_PROTO_IP; *buff++ = NR_PROTO_IP; diff --git a/net/netrom/nr_out.c b/net/netrom/nr_out.c index 44929657f5b7..5e531394a724 100644 --- a/net/netrom/nr_out.c +++ b/net/netrom/nr_out.c @@ -204,7 +204,7 @@ void nr_transmit_buffer(struct sock *sk, struct sk_buff *skb) dptr[6] |= AX25_SSSID_SPARE; dptr += AX25_ADDR_LEN; - *dptr++ = sysctl_netrom_network_ttl_initialiser; + *dptr++ = READ_ONCE(sysctl_netrom_network_ttl_initialiser); if (!nr_route_frame(skb, NULL)) { kfree_skb(skb); diff --git a/net/netrom/nr_subr.c b/net/netrom/nr_subr.c index e2d2af924cff..c3bbd5880850 100644 --- a/net/netrom/nr_subr.c +++ b/net/netrom/nr_subr.c @@ -182,7 +182,8 @@ void nr_write_internal(struct sock *sk, int frametype) *dptr++ = nr->my_id; *dptr++ = frametype; *dptr++ = nr->window; - if (nr->bpqext) *dptr++ = sysctl_netrom_network_ttl_initialiser; + if (nr->bpqext) + *dptr++ = READ_ONCE(sysctl_netrom_network_ttl_initialiser); break; case NR_DISCREQ: @@ -236,7 +237,7 @@ void __nr_transmit_reply(struct sk_buff *skb, int mine, unsigned char cmdflags) dptr[6] |= AX25_SSSID_SPARE; dptr += AX25_ADDR_LEN; - *dptr++ = sysctl_netrom_network_ttl_initialiser; + *dptr++ = READ_ONCE(sysctl_netrom_network_ttl_initialiser); if (mine) { *dptr++ = 0; From 60a7a152abd494ed4f69098cf0f322e6bb140612 Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Mon, 4 Mar 2024 16:20:38 +0800 Subject: [PATCH 216/225] netrom: Fix a data-race around sysctl_netrom_transport_timeout We need to protect the reader reading the sysctl value because the value can be changed concurrently. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Jason Xing Signed-off-by: Paolo Abeni --- net/netrom/af_netrom.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index 0eed00184adf..4d0e0834d527 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -453,7 +453,7 @@ static int nr_create(struct net *net, struct socket *sock, int protocol, nr_init_timers(sk); nr->t1 = - msecs_to_jiffies(sysctl_netrom_transport_timeout); + msecs_to_jiffies(READ_ONCE(sysctl_netrom_transport_timeout)); nr->t2 = msecs_to_jiffies(sysctl_netrom_transport_acknowledge_delay); nr->n2 = From e799299aafed417cc1f32adccb2a0e5268b3f6d5 Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Mon, 4 Mar 2024 16:20:39 +0800 Subject: [PATCH 217/225] netrom: Fix a data-race around sysctl_netrom_transport_maximum_tries We need to protect the reader reading the sysctl value because the value can be changed concurrently. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Jason Xing Signed-off-by: Paolo Abeni --- net/netrom/af_netrom.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index 4d0e0834d527..312fc745db7f 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -457,7 +457,7 @@ static int nr_create(struct net *net, struct socket *sock, int protocol, nr->t2 = msecs_to_jiffies(sysctl_netrom_transport_acknowledge_delay); nr->n2 = - msecs_to_jiffies(sysctl_netrom_transport_maximum_tries); + msecs_to_jiffies(READ_ONCE(sysctl_netrom_transport_maximum_tries)); nr->t4 = msecs_to_jiffies(sysctl_netrom_transport_busy_delay); nr->idle = From 806f462ba9029d41aadf8ec93f2f99c5305deada Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Mon, 4 Mar 2024 16:20:40 +0800 Subject: [PATCH 218/225] netrom: Fix a data-race around sysctl_netrom_transport_acknowledge_delay We need to protect the reader reading the sysctl value because the value can be changed concurrently. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Jason Xing Signed-off-by: Paolo Abeni --- net/netrom/af_netrom.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index 312fc745db7f..8ada0da3c0e0 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -455,7 +455,7 @@ static int nr_create(struct net *net, struct socket *sock, int protocol, nr->t1 = msecs_to_jiffies(READ_ONCE(sysctl_netrom_transport_timeout)); nr->t2 = - msecs_to_jiffies(sysctl_netrom_transport_acknowledge_delay); + msecs_to_jiffies(READ_ONCE(sysctl_netrom_transport_acknowledge_delay)); nr->n2 = msecs_to_jiffies(READ_ONCE(sysctl_netrom_transport_maximum_tries)); nr->t4 = From 43547d8699439a67b78d6bb39015113f7aa360fd Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Mon, 4 Mar 2024 16:20:41 +0800 Subject: [PATCH 219/225] netrom: Fix a data-race around sysctl_netrom_transport_busy_delay We need to protect the reader reading the sysctl value because the value can be changed concurrently. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Jason Xing Signed-off-by: Paolo Abeni --- net/netrom/af_netrom.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index 8ada0da3c0e0..10eee02ef99e 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -459,7 +459,7 @@ static int nr_create(struct net *net, struct socket *sock, int protocol, nr->n2 = msecs_to_jiffies(READ_ONCE(sysctl_netrom_transport_maximum_tries)); nr->t4 = - msecs_to_jiffies(sysctl_netrom_transport_busy_delay); + msecs_to_jiffies(READ_ONCE(sysctl_netrom_transport_busy_delay)); nr->idle = msecs_to_jiffies(sysctl_netrom_transport_no_activity_timeout); nr->window = sysctl_netrom_transport_requested_window_size; From a2e706841488f474c06e9b33f71afc947fb3bf56 Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Mon, 4 Mar 2024 16:20:42 +0800 Subject: [PATCH 220/225] netrom: Fix a data-race around sysctl_netrom_transport_requested_window_size We need to protect the reader reading the sysctl value because the value can be changed concurrently. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Jason Xing Signed-off-by: Paolo Abeni --- net/netrom/af_netrom.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index 10eee02ef99e..e65418fb9d88 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -462,7 +462,7 @@ static int nr_create(struct net *net, struct socket *sock, int protocol, msecs_to_jiffies(READ_ONCE(sysctl_netrom_transport_busy_delay)); nr->idle = msecs_to_jiffies(sysctl_netrom_transport_no_activity_timeout); - nr->window = sysctl_netrom_transport_requested_window_size; + nr->window = READ_ONCE(sysctl_netrom_transport_requested_window_size); nr->bpqext = 1; nr->state = NR_STATE_0; From f99b494b40431f0ca416859f2345746199398e2b Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Mon, 4 Mar 2024 16:20:43 +0800 Subject: [PATCH 221/225] netrom: Fix a data-race around sysctl_netrom_transport_no_activity_timeout We need to protect the reader reading the sysctl value because the value can be changed concurrently. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Jason Xing Signed-off-by: Paolo Abeni --- net/netrom/af_netrom.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index e65418fb9d88..1671be042ffe 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -461,7 +461,7 @@ static int nr_create(struct net *net, struct socket *sock, int protocol, nr->t4 = msecs_to_jiffies(READ_ONCE(sysctl_netrom_transport_busy_delay)); nr->idle = - msecs_to_jiffies(sysctl_netrom_transport_no_activity_timeout); + msecs_to_jiffies(READ_ONCE(sysctl_netrom_transport_no_activity_timeout)); nr->window = READ_ONCE(sysctl_netrom_transport_requested_window_size); nr->bpqext = 1; From b5dffcb8f71bdd02a4e5799985b51b12f4eeaf76 Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Mon, 4 Mar 2024 16:20:44 +0800 Subject: [PATCH 222/225] netrom: Fix a data-race around sysctl_netrom_routing_control We need to protect the reader reading the sysctl value because the value can be changed concurrently. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Jason Xing Signed-off-by: Paolo Abeni --- net/netrom/nr_route.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netrom/nr_route.c b/net/netrom/nr_route.c index b8ddd8048f35..89e12e6eea2e 100644 --- a/net/netrom/nr_route.c +++ b/net/netrom/nr_route.c @@ -780,7 +780,7 @@ int nr_route_frame(struct sk_buff *skb, ax25_cb *ax25) return ret; } - if (!sysctl_netrom_routing_control && ax25 != NULL) + if (!READ_ONCE(sysctl_netrom_routing_control) && ax25 != NULL) return 0; /* Its Time-To-Live has expired */ From bc76645ebdd01be9b9994dac39685a3d0f6f7985 Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Mon, 4 Mar 2024 16:20:45 +0800 Subject: [PATCH 223/225] netrom: Fix a data-race around sysctl_netrom_link_fails_count We need to protect the reader reading the sysctl value because the value can be changed concurrently. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Jason Xing Signed-off-by: Paolo Abeni --- net/netrom/nr_route.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netrom/nr_route.c b/net/netrom/nr_route.c index 89e12e6eea2e..70480869ad1c 100644 --- a/net/netrom/nr_route.c +++ b/net/netrom/nr_route.c @@ -728,7 +728,7 @@ void nr_link_failed(ax25_cb *ax25, int reason) nr_neigh->ax25 = NULL; ax25_cb_put(ax25); - if (++nr_neigh->failed < sysctl_netrom_link_fails_count) { + if (++nr_neigh->failed < READ_ONCE(sysctl_netrom_link_fails_count)) { nr_neigh_put(nr_neigh); return; } From d380ce70058a4ccddc3e5f5c2063165dc07672c6 Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Mon, 4 Mar 2024 16:20:46 +0800 Subject: [PATCH 224/225] netrom: Fix data-races around sysctl_net_busy_read We need to protect the reader reading the sysctl value because the value can be changed concurrently. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Jason Xing Signed-off-by: Paolo Abeni --- net/netrom/af_netrom.c | 2 +- net/netrom/nr_in.c | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index 1671be042ffe..104a80b75477 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -954,7 +954,7 @@ int nr_rx_frame(struct sk_buff *skb, struct net_device *dev) * G8PZT's Xrouter which is sending packets with command type 7 * as an extension of the protocol. */ - if (sysctl_netrom_reset_circuit && + if (READ_ONCE(sysctl_netrom_reset_circuit) && (frametype != NR_RESET || flags != 0)) nr_transmit_reset(skb, 1); diff --git a/net/netrom/nr_in.c b/net/netrom/nr_in.c index 2f084b6f69d7..97944db6b5ac 100644 --- a/net/netrom/nr_in.c +++ b/net/netrom/nr_in.c @@ -97,7 +97,7 @@ static int nr_state1_machine(struct sock *sk, struct sk_buff *skb, break; case NR_RESET: - if (sysctl_netrom_reset_circuit) + if (READ_ONCE(sysctl_netrom_reset_circuit)) nr_disconnect(sk, ECONNRESET); break; @@ -128,7 +128,7 @@ static int nr_state2_machine(struct sock *sk, struct sk_buff *skb, break; case NR_RESET: - if (sysctl_netrom_reset_circuit) + if (READ_ONCE(sysctl_netrom_reset_circuit)) nr_disconnect(sk, ECONNRESET); break; @@ -262,7 +262,7 @@ static int nr_state3_machine(struct sock *sk, struct sk_buff *skb, int frametype break; case NR_RESET: - if (sysctl_netrom_reset_circuit) + if (READ_ONCE(sysctl_netrom_reset_circuit)) nr_disconnect(sk, ECONNRESET); break; From ba18deddd6d502da71fd6b6143c53042271b82bd Mon Sep 17 00:00:00 2001 From: Yongzhi Liu Date: Wed, 6 Mar 2024 18:57:14 +0800 Subject: [PATCH 225/225] net: pds_core: Fix possible double free in error handling path When auxiliary_device_add() returns error and then calls auxiliary_device_uninit(), Callback function pdsc_auxbus_dev_release calls kfree(padev) to free memory. We shouldn't call kfree(padev) again in the error handling path. Fix this by cleaning up the redundant kfree() and putting the error handling back to where the errors happened. Fixes: 4569cce43bc6 ("pds_core: add auxiliary_bus devices") Signed-off-by: Yongzhi Liu Reviewed-by: Wojciech Drewek Reviewed-by: Shannon Nelson Link: https://lore.kernel.org/r/20240306105714.20597-1-hyperlyzcs@gmail.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/amd/pds_core/auxbus.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/amd/pds_core/auxbus.c b/drivers/net/ethernet/amd/pds_core/auxbus.c index 11c23a7f3172..fd1a5149c003 100644 --- a/drivers/net/ethernet/amd/pds_core/auxbus.c +++ b/drivers/net/ethernet/amd/pds_core/auxbus.c @@ -160,23 +160,19 @@ static struct pds_auxiliary_dev *pdsc_auxbus_dev_register(struct pdsc *cf, if (err < 0) { dev_warn(cf->dev, "auxiliary_device_init of %s failed: %pe\n", name, ERR_PTR(err)); - goto err_out; + kfree(padev); + return ERR_PTR(err); } err = auxiliary_device_add(aux_dev); if (err) { dev_warn(cf->dev, "auxiliary_device_add of %s failed: %pe\n", name, ERR_PTR(err)); - goto err_out_uninit; + auxiliary_device_uninit(aux_dev); + return ERR_PTR(err); } return padev; - -err_out_uninit: - auxiliary_device_uninit(aux_dev); -err_out: - kfree(padev); - return ERR_PTR(err); } int pdsc_auxbus_dev_del(struct pdsc *cf, struct pdsc *pf)