From 4a69516f97bc35134f41de959000ea6cb93a1d50 Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Tue, 14 Mar 2023 13:52:57 +0100 Subject: [PATCH 01/57] dt-bindings: dmaengine: qcom: gpi: Add QCM2290 GPI DMA Add a compatible for the single GPI DMA controller on QCM2290. It uses the same 0x10000 offset as SM6350. Signed-off-by: Konrad Dybcio Acked-by: Krzysztof Kozlowski Reviewed-by: Bhupesh Sharma Link: https://lore.kernel.org/r/20230314-topic-2290_compats-v1-2-47e26c3c0365@linaro.org Signed-off-by: Vinod Koul --- Documentation/devicetree/bindings/dma/qcom,gpi.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/dma/qcom,gpi.yaml b/Documentation/devicetree/bindings/dma/qcom,gpi.yaml index fc5de7b6f19e..f61145c91b6d 100644 --- a/Documentation/devicetree/bindings/dma/qcom,gpi.yaml +++ b/Documentation/devicetree/bindings/dma/qcom,gpi.yaml @@ -24,6 +24,7 @@ properties: - qcom,sm6350-gpi-dma - items: - enum: + - qcom,qcm2290-gpi-dma - qcom,qdu1000-gpi-dma - qcom,sc7280-gpi-dma - qcom,sm6115-gpi-dma From d1e71a3a7ab9db0168b6885171e0576383216ac8 Mon Sep 17 00:00:00 2001 From: Biju Das Date: Wed, 15 Mar 2023 06:45:01 +0000 Subject: [PATCH 02/57] dmaengine: sh: rz-dmac: Add reset support Add reset support for DMAC module found on RZ/G2L alike SoCs. For booting the board, reset release of the DMAC module is required otherwise we don't get GIC interrupts. Currently the reset release was done by the bootloader now move this to the driver instead. Signed-off-by: Biju Das Reviewed-by: Lad Prabhakar Link: https://lore.kernel.org/r/20230315064501.21491-1-biju.das.jz@bp.renesas.com Signed-off-by: Vinod Koul --- drivers/dma/sh/rz-dmac.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/dma/sh/rz-dmac.c b/drivers/dma/sh/rz-dmac.c index 476847a4916b..6b62e01ba658 100644 --- a/drivers/dma/sh/rz-dmac.c +++ b/drivers/dma/sh/rz-dmac.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -92,6 +93,7 @@ struct rz_dmac_chan { struct rz_dmac { struct dma_device engine; struct device *dev; + struct reset_control *rstc; void __iomem *base; void __iomem *ext_base; @@ -889,6 +891,11 @@ static int rz_dmac_probe(struct platform_device *pdev) /* Initialize the channels. */ INIT_LIST_HEAD(&dmac->engine.channels); + dmac->rstc = devm_reset_control_array_get_exclusive(&pdev->dev); + if (IS_ERR(dmac->rstc)) + return dev_err_probe(&pdev->dev, PTR_ERR(dmac->rstc), + "failed to get resets\n"); + pm_runtime_enable(&pdev->dev); ret = pm_runtime_resume_and_get(&pdev->dev); if (ret < 0) { @@ -896,6 +903,10 @@ static int rz_dmac_probe(struct platform_device *pdev) goto err_pm_disable; } + ret = reset_control_deassert(dmac->rstc); + if (ret) + goto err_pm_runtime_put; + for (i = 0; i < dmac->n_channels; i++) { ret = rz_dmac_chan_probe(dmac, &dmac->channels[i], i); if (ret < 0) @@ -940,6 +951,7 @@ static int rz_dmac_probe(struct platform_device *pdev) dma_register_err: of_dma_controller_free(pdev->dev.of_node); err: + reset_control_assert(dmac->rstc); channel_num = i ? i - 1 : 0; for (i = 0; i < channel_num; i++) { struct rz_dmac_chan *channel = &dmac->channels[i]; @@ -950,6 +962,7 @@ err: channel->lmdesc.base_dma); } +err_pm_runtime_put: pm_runtime_put(&pdev->dev); err_pm_disable: pm_runtime_disable(&pdev->dev); @@ -972,6 +985,7 @@ static int rz_dmac_remove(struct platform_device *pdev) } of_dma_controller_free(pdev->dev.of_node); dma_async_device_unregister(&dmac->engine); + reset_control_assert(dmac->rstc); pm_runtime_put(&pdev->dev); pm_runtime_disable(&pdev->dev); From 5aaf9079d740ebe57f10dfefb1850011d6bb7b2a Mon Sep 17 00:00:00 2001 From: Biju Das Date: Wed, 15 Mar 2023 06:47:25 +0000 Subject: [PATCH 03/57] dt-bindings: dma: rz-dmac: Document clock-names and reset-names Document clock-names and reset-names properties as we have multiple clocks and resets. Signed-off-by: Biju Das Acked-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20230315064726.22739-1-biju.das.jz@bp.renesas.com Signed-off-by: Vinod Koul --- .../devicetree/bindings/dma/renesas,rz-dmac.yaml | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/Documentation/devicetree/bindings/dma/renesas,rz-dmac.yaml b/Documentation/devicetree/bindings/dma/renesas,rz-dmac.yaml index f638d3934e71..c284abc6784a 100644 --- a/Documentation/devicetree/bindings/dma/renesas,rz-dmac.yaml +++ b/Documentation/devicetree/bindings/dma/renesas,rz-dmac.yaml @@ -54,6 +54,11 @@ properties: - description: DMA main clock - description: DMA register access clock + clock-names: + items: + - const: main + - const: register + '#dma-cells': const: 1 description: @@ -77,16 +82,23 @@ properties: - description: Reset for DMA ARESETN reset terminal - description: Reset for DMA RST_ASYNC reset terminal + reset-names: + items: + - const: arst + - const: rst_async + required: - compatible - reg - interrupts - interrupt-names - clocks + - clock-names - '#dma-cells' - dma-channels - power-domains - resets + - reset-names additionalProperties: false @@ -124,9 +136,11 @@ examples: "ch12", "ch13", "ch14", "ch15"; clocks = <&cpg CPG_MOD R9A07G044_DMAC_ACLK>, <&cpg CPG_MOD R9A07G044_DMAC_PCLK>; + clock-names = "main", "register"; power-domains = <&cpg>; resets = <&cpg R9A07G044_DMAC_ARESETN>, <&cpg R9A07G044_DMAC_RST_ASYNC>; + reset-names = "arst", "rst_async"; #dma-cells = <1>; dma-channels = <16>; }; From 9f7d4718211a82b2a87ae74d841d6e3938cd5c28 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Mon, 13 Mar 2023 22:52:31 +0100 Subject: [PATCH 04/57] dmaengine: imx-dma: Remove a redundant memset() call The desc->desc structure is already zeroed when 'desc' is kzalloc()'ed. There is no need to clear it twice. Remove the redundant memset(). Signed-off-by: Christophe JAILLET Link: https://lore.kernel.org/r/95a81d623bffde2e5d14e22fad7e8c9a9a7203f6.1678743528.git.christophe.jaillet@wanadoo.fr Signed-off-by: Vinod Koul --- drivers/dma/imx-dma.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/dma/imx-dma.c b/drivers/dma/imx-dma.c index 80086977973f..f040751690af 100644 --- a/drivers/dma/imx-dma.c +++ b/drivers/dma/imx-dma.c @@ -750,7 +750,6 @@ static int imxdma_alloc_chan_resources(struct dma_chan *chan) desc = kzalloc(sizeof(*desc), GFP_KERNEL); if (!desc) break; - memset(&desc->desc, 0, sizeof(struct dma_async_tx_descriptor)); dma_async_tx_descriptor_init(&desc->desc, chan); desc->desc.tx_submit = imxdma_tx_submit; /* txd.flags will be overwritten in prep funcs */ From 3765af04f4fda327463e5b631a4a2d62ae7bb93f Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Fri, 10 Mar 2023 08:47:03 -0600 Subject: [PATCH 05/57] dmaengine: Use of_property_present() for testing DT property presence It is preferred to use typed property access functions (i.e. of_property_read_ functions) rather than low-level of_get_property/of_find_property functions for reading properties. As part of this, convert of_get_property/of_find_property calls to the recently added of_property_present() helper when we just want to test for presence of a property and nothing more. Signed-off-by: Rob Herring Link: https://lore.kernel.org/r/20230310144704.1541976-1-robh@kernel.org Signed-off-by: Vinod Koul --- drivers/dma/of-dma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/of-dma.c b/drivers/dma/of-dma.c index ac61ecda2926..775a7f408b9a 100644 --- a/drivers/dma/of-dma.c +++ b/drivers/dma/of-dma.c @@ -264,7 +264,7 @@ struct dma_chan *of_dma_request_slave_channel(struct device_node *np, } /* Silently fail if there is not even the "dmas" property */ - if (!of_find_property(np, "dmas", NULL)) + if (!of_property_present(np, "dmas")) return ERR_PTR(-ENODEV); count = of_property_count_strings(np, "dma-names"); From 8f6707d0773be31972768abd6e0bf7b8515b5b1a Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Tue, 7 Mar 2023 15:46:15 -0600 Subject: [PATCH 06/57] dmaengine: ioat: use PCI core macros for PCIe Capability The PCIe Capability is defined by the PCIe spec, so use the PCI_EXP_DEVCTL macros defined by the PCI core instead of defining copies in IOAT. This makes it easier to find all uses of the PCIe Device Control register. No functional change intended. Signed-off-by: Bjorn Helgaas Acked-by: Dave Jiang Link: https://lore.kernel.org/r/20230307214615.887354-1-helgaas@kernel.org Signed-off-by: Vinod Koul --- drivers/dma/ioat/init.c | 6 +++--- drivers/dma/ioat/registers.h | 7 ------- 2 files changed, 3 insertions(+), 10 deletions(-) diff --git a/drivers/dma/ioat/init.c b/drivers/dma/ioat/init.c index 5d707ff63554..fa7c0f9aa61d 100644 --- a/drivers/dma/ioat/init.c +++ b/drivers/dma/ioat/init.c @@ -1191,13 +1191,13 @@ static int ioat3_dma_probe(struct ioatdma_device *ioat_dma, int dca) ioat_dma->dca = ioat_dca_init(pdev, ioat_dma->reg_base); /* disable relaxed ordering */ - err = pcie_capability_read_word(pdev, IOAT_DEVCTRL_OFFSET, &val16); + err = pcie_capability_read_word(pdev, PCI_EXP_DEVCTL, &val16); if (err) return pcibios_err_to_errno(err); /* clear relaxed ordering enable */ - val16 &= ~IOAT_DEVCTRL_ROE; - err = pcie_capability_write_word(pdev, IOAT_DEVCTRL_OFFSET, val16); + val16 &= ~PCI_EXP_DEVCTL_RELAX_EN; + err = pcie_capability_write_word(pdev, PCI_EXP_DEVCTL, val16); if (err) return pcibios_err_to_errno(err); diff --git a/drivers/dma/ioat/registers.h b/drivers/dma/ioat/registers.h index f55a5f92f185..54cf0ad39887 100644 --- a/drivers/dma/ioat/registers.h +++ b/drivers/dma/ioat/registers.h @@ -14,13 +14,6 @@ #define IOAT_PCI_CHANERR_INT_OFFSET 0x180 #define IOAT_PCI_CHANERRMASK_INT_OFFSET 0x184 -/* PCIe config registers */ - -/* EXPCAPID + N */ -#define IOAT_DEVCTRL_OFFSET 0x8 -/* relaxed ordering enable */ -#define IOAT_DEVCTRL_ROE 0x10 - /* MMIO Device Registers */ #define IOAT_CHANCNT_OFFSET 0x00 /* 8-bit */ From e32622f84ae289dc7a04e9f01cd62cb914fdc5c6 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Tue, 7 Mar 2023 13:26:54 -0600 Subject: [PATCH 07/57] dmaengine: ioat: Drop redundant pci_enable_pcie_error_reporting() pci_enable_pcie_error_reporting() enables the device to send ERR_* Messages. Since f26e58bf6f54 ("PCI/AER: Enable error reporting when AER is native"), the PCI core does this for all devices during enumeration, so the driver doesn't need to do it itself. Remove the redundant pci_enable_pcie_error_reporting() call from the driver. Also remove the corresponding pci_disable_pcie_error_reporting() from the driver .remove() path. Note that this only controls ERR_* Messages from the device. An ERR_* Message may cause the Root Port to generate an interrupt, depending on the AER Root Error Command register managed by the AER service driver. Signed-off-by: Bjorn Helgaas Acked-by: Dave Jiang Link: https://lore.kernel.org/r/20230307192655.874008-2-helgaas@kernel.org Signed-off-by: Vinod Koul --- drivers/dma/ioat/init.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/dma/ioat/init.c b/drivers/dma/ioat/init.c index fa7c0f9aa61d..c4602bfc9c74 100644 --- a/drivers/dma/ioat/init.c +++ b/drivers/dma/ioat/init.c @@ -15,7 +15,6 @@ #include #include #include -#include #include #include "dma.h" #include "registers.h" @@ -1380,15 +1379,11 @@ static int ioat_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (is_skx_ioat(pdev)) device->version = IOAT_VER_3_2; err = ioat3_dma_probe(device, ioat_dca_enabled); - - if (device->version >= IOAT_VER_3_3) - pci_enable_pcie_error_reporting(pdev); } else return -ENODEV; if (err) { dev_err(dev, "Intel(R) I/OAT DMA Engine init failed\n"); - pci_disable_pcie_error_reporting(pdev); return -ENODEV; } @@ -1411,7 +1406,6 @@ static void ioat_remove(struct pci_dev *pdev) device->dca = NULL; } - pci_disable_pcie_error_reporting(pdev); ioat_dma_remove(device); } From 3c5cc03979b72620c31159560984f5b2c0dcba22 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Tue, 7 Mar 2023 13:26:55 -0600 Subject: [PATCH 08/57] dmaengine: idxd: Remove unnecessary aer.h include is unused, so remove it. Signed-off-by: Bjorn Helgaas Cc: Fenghua Yu Cc: Dave Jiang Acked-by: Fenghua Yu Acked-by: Dave Jiang Link: https://lore.kernel.org/r/20230307192655.874008-3-helgaas@kernel.org Signed-off-by: Vinod Koul --- drivers/dma/idxd/init.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index 640d3048368e..a85efcda7095 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -9,7 +9,6 @@ #include #include #include -#include #include #include #include From fe8f1a2e9b7c4519a145ab8110dec59fea3c532c Mon Sep 17 00:00:00 2001 From: Janne Grunau Date: Tue, 7 Mar 2023 13:10:19 +0100 Subject: [PATCH 09/57] dt-bindings: dma: apple,admac: Add t8112-admac compatible MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The block found on Apple's M2 SoC is compatible with the existing driver so add its per-SoC compatible. Acked-by: Vinod Koul Acked-by: Martin PoviĊĦer Acked-by: Krzysztof Kozlowski Signed-off-by: Janne Grunau Link: https://lore.kernel.org/r/20230202-asahi-t8112-dt-v3-13-d1a5f6383d95@jannau.net Signed-off-by: Vinod Koul --- Documentation/devicetree/bindings/dma/apple,admac.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/dma/apple,admac.yaml b/Documentation/devicetree/bindings/dma/apple,admac.yaml index 05163d124ec3..ab193bc8bdbb 100644 --- a/Documentation/devicetree/bindings/dma/apple,admac.yaml +++ b/Documentation/devicetree/bindings/dma/apple,admac.yaml @@ -26,6 +26,7 @@ properties: - enum: - apple,t6000-admac - apple,t8103-admac + - apple,t8112-admac - const: apple,admac reg: From 7511f28792c6c724cc584a42f3eeb2fa5af689c4 Mon Sep 17 00:00:00 2001 From: Mark Salter Date: Wed, 29 Mar 2023 13:21:29 -0400 Subject: [PATCH 10/57] dmaengine: tegra: explicitly select DMA_VIRTUAL_CHANNELS Enabling TEGRA186_GPC_DMA will cause this build failure unless some other DMA driver which uses DMA_VIRTUAL_CHANNELS is enabled: ERROR: modpost: "vchan_dma_desc_free_list" [drivers/dma/tegra186-gpc-dma.ko] undefined! ERROR: modpost: "vchan_init" [drivers/dma/tegra186-gpc-dma.ko] undefined! ERROR: modpost: "vchan_tx_submit" [drivers/dma/tegra186-gpc-dma.ko] undefined! ERROR: modpost: "vchan_tx_desc_free" [drivers/dma/tegra186-gpc-dma.ko] undefined! ERROR: modpost: "vchan_find_desc" [drivers/dma/tegra186-gpc-dma.ko] undefined! make[1]: *** [scripts/Makefile.modpost:136: Module.symvers] Error 1 Add an explicit select of DMA_VIRTUAL_CHANNELS to avoid this. Signed-off-by: Mark Salter Link: https://lore.kernel.org/r/20230329172129.88403-1-msalter@redhat.com Signed-off-by: Vinod Koul --- drivers/dma/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig index fb7073fc034f..f5f422f9b850 100644 --- a/drivers/dma/Kconfig +++ b/drivers/dma/Kconfig @@ -623,6 +623,7 @@ config TEGRA186_GPC_DMA depends on (ARCH_TEGRA || COMPILE_TEST) && ARCH_DMA_ADDR_T_64BIT depends on IOMMU_API select DMA_ENGINE + select DMA_VIRTUAL_CHANNELS help Support for the NVIDIA Tegra General Purpose Central DMA controller. The DMA controller has multiple DMA channels which can be configured From fbe05149e40bc6891b1ed6097ac26dbaa32d2bf8 Mon Sep 17 00:00:00 2001 From: Vignesh Raghavendra Date: Wed, 29 Mar 2023 21:23:49 +0530 Subject: [PATCH 11/57] dmaengine: ti: k3-udma: Add system suspend/resume support The K3 platforms configure the DMA resources with the help of the TI's System Firmware's Device Manager(DM) over TISCI. The group of DMA related Resource Manager[1] TISCI messages includes: INTA, RINGACC, UDMAP, and PSI-L. This configuration however, does not persist in the DM after leaving from Suspend-to-RAM state. We have to restore the DMA channel configuration over TISCI for all configured channels when returning from suspend. The TISCI resource management calls for each DMA type (UDMA, PKTDMA, BCDMA) happen in device_free_chan_resources() and device_alloc_chan_resources(). In pm_suspend() we store the current udma_chan_config for channels that still have attached clients and call device_free_chan_resources(). In pm_resume() restore the udma_channel_config from backup and call device_alloc_chan_resources() for those channels. Drivers like CPSW that use k3-udma-glue already do their own DMA resource management so use the late system suspend/resume hooks. [1] https://software-dl.ti.com/tisci/esd/latest/2_tisci_msgs/index.html#resource-management-rm Signed-off-by: Vignesh Raghavendra [g-vlaev@ti.com: Add patch description and config backup] [g-vlaev@ti.com: Supend only channels with clients] Signed-off-by: Georgi Vlaev Acked-by: Peter Ujfalusi Link: https://lore.kernel.org/r/20230329155349.2566010-1-vigneshr@ti.com Signed-off-by: Vinod Koul --- drivers/dma/ti/k3-udma.c | 54 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/drivers/dma/ti/k3-udma.c b/drivers/dma/ti/k3-udma.c index 7e23a6fdef95..f652a217be76 100644 --- a/drivers/dma/ti/k3-udma.c +++ b/drivers/dma/ti/k3-udma.c @@ -305,6 +305,8 @@ struct udma_chan { /* Channel configuration parameters */ struct udma_chan_config config; + /* Channel configuration parameters (backup) */ + struct udma_chan_config backup_config; /* dmapool for packet mode descriptors */ bool use_dma_pool; @@ -5522,11 +5524,63 @@ static int udma_probe(struct platform_device *pdev) return ret; } +static int udma_pm_suspend(struct device *dev) +{ + struct udma_dev *ud = dev_get_drvdata(dev); + struct dma_device *dma_dev = &ud->ddev; + struct dma_chan *chan; + struct udma_chan *uc; + + list_for_each_entry(chan, &dma_dev->channels, device_node) { + if (chan->client_count) { + uc = to_udma_chan(chan); + /* backup the channel configuration */ + memcpy(&uc->backup_config, &uc->config, + sizeof(struct udma_chan_config)); + dev_dbg(dev, "Suspending channel %s\n", + dma_chan_name(chan)); + ud->ddev.device_free_chan_resources(chan); + } + } + + return 0; +} + +static int udma_pm_resume(struct device *dev) +{ + struct udma_dev *ud = dev_get_drvdata(dev); + struct dma_device *dma_dev = &ud->ddev; + struct dma_chan *chan; + struct udma_chan *uc; + int ret; + + list_for_each_entry(chan, &dma_dev->channels, device_node) { + if (chan->client_count) { + uc = to_udma_chan(chan); + /* restore the channel configuration */ + memcpy(&uc->config, &uc->backup_config, + sizeof(struct udma_chan_config)); + dev_dbg(dev, "Resuming channel %s\n", + dma_chan_name(chan)); + ret = ud->ddev.device_alloc_chan_resources(chan); + if (ret) + return ret; + } + } + + return 0; +} + +static const struct dev_pm_ops udma_pm_ops = { + SET_LATE_SYSTEM_SLEEP_PM_OPS(udma_pm_suspend, udma_pm_resume) +}; + static struct platform_driver udma_driver = { .driver = { .name = "ti-udma", .of_match_table = udma_of_match, .suppress_bind_attrs = true, + .pm = &udma_pm_ops, }, .probe = udma_probe, }; From 827026ae2e56ec05ef1155661079badbbfc0b038 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 26 Mar 2023 09:06:37 +0200 Subject: [PATCH 12/57] dmaengine: mv_xor_v2: Fix an error code. If the probe is deferred, -EPROBE_DEFER should be returned, not +EPROBE_DEFER. Fixes: 3cd2c313f1d6 ("dmaengine: mv_xor_v2: Fix clock resource by adding a register clock") Signed-off-by: Christophe JAILLET Link: https://lore.kernel.org/r/201170dff832a3c496d125772e10070cd834ebf2.1679814350.git.christophe.jaillet@wanadoo.fr Signed-off-by: Vinod Koul --- drivers/dma/mv_xor_v2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/mv_xor_v2.c b/drivers/dma/mv_xor_v2.c index 89790beba305..0991b8265829 100644 --- a/drivers/dma/mv_xor_v2.c +++ b/drivers/dma/mv_xor_v2.c @@ -752,7 +752,7 @@ static int mv_xor_v2_probe(struct platform_device *pdev) xor_dev->clk = devm_clk_get(&pdev->dev, NULL); if (PTR_ERR(xor_dev->clk) == -EPROBE_DEFER) { - ret = EPROBE_DEFER; + ret = -EPROBE_DEFER; goto disable_reg_clk; } if (!IS_ERR(xor_dev->clk)) { From 376c2c9b04e97ddb1fbb9d9156456e2331891506 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 26 Mar 2023 09:06:38 +0200 Subject: [PATCH 13/57] dmaengine: mv_xor_v2: Use some clk_ helper functions to simplify code Use devm_clk_get_[optional_]enabled() instead of hand writing it. It saves some LoC. Signed-off-by: Christophe JAILLET Link: https://lore.kernel.org/r/cc14e490f4e6002a17c9c7d283fe6a93179766c2.1679814350.git.christophe.jaillet@wanadoo.fr Signed-off-by: Vinod Koul --- drivers/dma/mv_xor_v2.c | 35 +++++++---------------------------- 1 file changed, 7 insertions(+), 28 deletions(-) diff --git a/drivers/dma/mv_xor_v2.c b/drivers/dma/mv_xor_v2.c index 0991b8265829..cea8aa946f9c 100644 --- a/drivers/dma/mv_xor_v2.c +++ b/drivers/dma/mv_xor_v2.c @@ -739,32 +739,18 @@ static int mv_xor_v2_probe(struct platform_device *pdev) if (ret) return ret; - xor_dev->reg_clk = devm_clk_get(&pdev->dev, "reg"); - if (PTR_ERR(xor_dev->reg_clk) != -ENOENT) { - if (!IS_ERR(xor_dev->reg_clk)) { - ret = clk_prepare_enable(xor_dev->reg_clk); - if (ret) - return ret; - } else { - return PTR_ERR(xor_dev->reg_clk); - } - } + xor_dev->reg_clk = devm_clk_get_optional_enabled(&pdev->dev, "reg"); + if (IS_ERR(xor_dev->reg_clk)) + return PTR_ERR(xor_dev->reg_clk); - xor_dev->clk = devm_clk_get(&pdev->dev, NULL); - if (PTR_ERR(xor_dev->clk) == -EPROBE_DEFER) { - ret = -EPROBE_DEFER; - goto disable_reg_clk; - } - if (!IS_ERR(xor_dev->clk)) { - ret = clk_prepare_enable(xor_dev->clk); - if (ret) - goto disable_reg_clk; - } + xor_dev->clk = devm_clk_get_enabled(&pdev->dev, NULL); + if (IS_ERR(xor_dev->clk)) + return PTR_ERR(xor_dev->clk); ret = platform_msi_domain_alloc_irqs(&pdev->dev, 1, mv_xor_v2_set_msi_msg); if (ret) - goto disable_clk; + return ret; xor_dev->irq = msi_get_virq(&pdev->dev, 0); @@ -866,10 +852,6 @@ free_hw_desq: xor_dev->hw_desq_virt, xor_dev->hw_desq); free_msi_irqs: platform_msi_domain_free_irqs(&pdev->dev); -disable_clk: - clk_disable_unprepare(xor_dev->clk); -disable_reg_clk: - clk_disable_unprepare(xor_dev->reg_clk); return ret; } @@ -889,9 +871,6 @@ static int mv_xor_v2_remove(struct platform_device *pdev) tasklet_kill(&xor_dev->irq_tasklet); - clk_disable_unprepare(xor_dev->clk); - clk_disable_unprepare(xor_dev->reg_clk); - return 0; } From 9e410fe3dc9a938bc47f71dff254be7419bd40d2 Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Fri, 3 Mar 2023 13:34:11 -0800 Subject: [PATCH 14/57] dmaengine: idxd: Add descriptor definitions for 16 bytes of pattern in memory fill operation The memory fill operation (0x04) can fill in memory with either 8 bytes or 16 bytes of pattern. To fill in memory with 16 bytes of pattern, the first 8 bytes are provided in pattern lower in bytes 16-23 and the next 8 bytes are in pattern upper in bytes 40-47 in the descriptor. Currently only 8 bytes of pattern is enabled. Add descriptor definitions for pattern lower and pattern upper so that user can use 16 bytes of pattern to fill memory. Signed-off-by: Fenghua Yu Reviewed-by: Dave Jiang Link: https://lore.kernel.org/r/20230303213413.3357431-2-fenghua.yu@intel.com Signed-off-by: Vinod Koul --- include/uapi/linux/idxd.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/include/uapi/linux/idxd.h b/include/uapi/linux/idxd.h index 1d553bedbdb5..c43d7df5fc15 100644 --- a/include/uapi/linux/idxd.h +++ b/include/uapi/linux/idxd.h @@ -180,6 +180,7 @@ struct dsa_hw_desc { uint64_t rdback_addr; uint64_t pattern; uint64_t desc_list_addr; + uint64_t pattern_lower; }; union { uint64_t dst_addr; @@ -244,6 +245,9 @@ struct dsa_hw_desc { uint16_t dest_app_tag_seed; }; + /* Fill */ + uint64_t pattern_upper; + uint8_t op_specific[24]; }; } __attribute__((packed)); From 12bbc2c2605516e781cd86e3cde9fe1f889b72cc Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Fri, 3 Mar 2023 13:34:12 -0800 Subject: [PATCH 15/57] dmaengine: idxd: Add descriptor definitions for DIX generate operation The Data Integrity Extension (DIX) generate operation (0x17) computes the Data Integrity Field (DIF) on the source data and writes only the computed DIF for each source block to the PI destination address. Add descriptor definitions for this operation so that user can use DSA to accelerate DIX generate operation. Signed-off-by: Fenghua Yu Reviewed-by: Dave Jiang Link: https://lore.kernel.org/r/20230303213413.3357431-3-fenghua.yu@intel.com Signed-off-by: Vinod Koul --- include/uapi/linux/idxd.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/include/uapi/linux/idxd.h b/include/uapi/linux/idxd.h index c43d7df5fc15..4c12e93a6aa6 100644 --- a/include/uapi/linux/idxd.h +++ b/include/uapi/linux/idxd.h @@ -78,6 +78,7 @@ enum dsa_opcode { DSA_OPCODE_DIF_INS, DSA_OPCODE_DIF_STRP, DSA_OPCODE_DIF_UPDT, + DSA_OPCODE_DIX_GEN = 0x17, DSA_OPCODE_CFLUSH = 0x20, }; @@ -248,6 +249,17 @@ struct dsa_hw_desc { /* Fill */ uint64_t pattern_upper; + /* DIX generate */ + struct { + uint8_t dix_gen_res; + uint8_t dest_dif_flags; + uint8_t dif_flags; + uint8_t dix_gen_res2[13]; + uint32_t ref_tag_seed; + uint16_t app_tag_mask; + uint16_t app_tag_seed; + }; + uint8_t op_specific[24]; }; } __attribute__((packed)); @@ -326,6 +338,14 @@ struct dsa_completion_record { uint16_t dif_upd_dest_app_tag; }; + /* DIX generate */ + struct { + uint64_t dix_gen_res; + uint32_t dix_ref_tag; + uint16_t dix_app_tag_mask; + uint16_t dix_app_tag; + }; + uint8_t op_specific[16]; }; } __attribute__((packed)); From 6fec8938b7b4fe2b2c503fe87b2783a50bff0415 Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Fri, 3 Mar 2023 13:34:13 -0800 Subject: [PATCH 16/57] dmaengine: idxd: Add descriptor definitions for translation fetch operation The translation fetch operation (0x0A) fetches address translations for the address range specified in the descriptor by issuing address translation (ATS) requests to the IOMMU. Add descriptor definitions for the operation so that user can use DSA to accelerate translation fetch. Signed-off-by: Fenghua Yu Reviewed-by: Dave Jiang Link: https://lore.kernel.org/r/20230303213413.3357431-4-fenghua.yu@intel.com Signed-off-by: Vinod Koul --- include/uapi/linux/idxd.h | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/include/uapi/linux/idxd.h b/include/uapi/linux/idxd.h index 4c12e93a6aa6..fc47635b57dc 100644 --- a/include/uapi/linux/idxd.h +++ b/include/uapi/linux/idxd.h @@ -72,6 +72,7 @@ enum dsa_opcode { DSA_OPCODE_CR_DELTA, DSA_OPCODE_AP_DELTA, DSA_OPCODE_DUALCAST, + DSA_OPCODE_TRANSL_FETCH, DSA_OPCODE_CRCGEN = 0x10, DSA_OPCODE_COPY_CRC, DSA_OPCODE_DIF_CHECK, @@ -182,6 +183,7 @@ struct dsa_hw_desc { uint64_t pattern; uint64_t desc_list_addr; uint64_t pattern_lower; + uint64_t transl_fetch_addr; }; union { uint64_t dst_addr; @@ -192,6 +194,7 @@ struct dsa_hw_desc { union { uint32_t xfer_size; uint32_t desc_count; + uint32_t region_size; }; uint16_t int_handle; uint16_t rsvd1; @@ -249,6 +252,12 @@ struct dsa_hw_desc { /* Fill */ uint64_t pattern_upper; + /* Translation fetch */ + struct { + uint64_t transl_fetch_res; + uint32_t region_stride; + }; + /* DIX generate */ struct { uint8_t dix_gen_res; From 34ca00662eb7f184f249dd2168ebea78d945e3e4 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Fri, 3 Mar 2023 13:37:31 -0800 Subject: [PATCH 17/57] dmaengine: idxd: reformat swerror output to standard Linux bitmap output SWERROR register is 4 64bit wide registers. Currently the sysfs attribute just outputs 4 64bit hex integers. Convert to output with %*pb format specifier. Signed-off-by: Dave Jiang Co-developed-by: Fenghua Yu Signed-off-by: Fenghua Yu Link: https://lore.kernel.org/r/20230303213732.3357494-2-fenghua.yu@intel.com Signed-off-by: Vinod Koul --- drivers/dma/idxd/idxd.h | 1 + drivers/dma/idxd/init.c | 2 +- drivers/dma/idxd/sysfs.c | 10 ++++------ 3 files changed, 6 insertions(+), 7 deletions(-) diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h index 7ced8d283d98..ad7a1e8a0e1c 100644 --- a/drivers/dma/idxd/idxd.h +++ b/drivers/dma/idxd/idxd.h @@ -598,6 +598,7 @@ int idxd_register_driver(void); void idxd_unregister_driver(void); void idxd_wqs_quiesce(struct idxd_device *idxd); bool idxd_queue_int_handle_resubmit(struct idxd_desc *desc); +void multi_u64_to_bmap(unsigned long *bmap, u64 *val, int count); /* device interrupt control */ irqreturn_t idxd_misc_thread(int vec, void *data); diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index a85efcda7095..bddff1d4359d 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -388,7 +388,7 @@ static void idxd_read_table_offsets(struct idxd_device *idxd) dev_dbg(dev, "IDXD Perfmon Offset: %#x\n", idxd->perfmon_offset); } -static void multi_u64_to_bmap(unsigned long *bmap, u64 *val, int count) +void multi_u64_to_bmap(unsigned long *bmap, u64 *val, int count) { int i, j, nr; diff --git a/drivers/dma/idxd/sysfs.c b/drivers/dma/idxd/sysfs.c index 18cd8151dee0..927c9d645121 100644 --- a/drivers/dma/idxd/sysfs.c +++ b/drivers/dma/idxd/sysfs.c @@ -1452,15 +1452,13 @@ static ssize_t errors_show(struct device *dev, struct device_attribute *attr, char *buf) { struct idxd_device *idxd = confdev_to_idxd(dev); - int i, out = 0; + DECLARE_BITMAP(swerr_bmap, 256); + bitmap_zero(swerr_bmap, 256); spin_lock(&idxd->dev_lock); - for (i = 0; i < 4; i++) - out += sysfs_emit_at(buf, out, "%#018llx ", idxd->sw_err.bits[i]); + multi_u64_to_bmap(swerr_bmap, &idxd->sw_err.bits[0], 4); spin_unlock(&idxd->dev_lock); - out--; - out += sysfs_emit_at(buf, out, "\n"); - return out; + return sysfs_emit(buf, "%*pb\n", 256, swerr_bmap); } static DEVICE_ATTR_RO(errors); From 9f0d99b327edd217462852635498694aaf471b33 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Fri, 3 Mar 2023 13:37:32 -0800 Subject: [PATCH 18/57] dmaengine: idxd: expose IAA CAP register via sysfs knob Add IAA (IAX) capability mask sysfs attribute to expose to applications. The mask provides application knowledge of what capabilities this IAA device supports. This mask is available for IAA 2.0 device or later. Signed-off-by: Dave Jiang Co-developed-by: Fenghua Yu Signed-off-by: Fenghua Yu Link: https://lore.kernel.org/r/20230303213732.3357494-3-fenghua.yu@intel.com Signed-off-by: Vinod Koul --- .../ABI/stable/sysfs-driver-dma-idxd | 8 +++++++ drivers/dma/idxd/idxd.h | 1 + drivers/dma/idxd/init.c | 4 ++++ drivers/dma/idxd/registers.h | 21 ++++++++++++++++ drivers/dma/idxd/sysfs.c | 24 +++++++++++++++++++ 5 files changed, 58 insertions(+) diff --git a/Documentation/ABI/stable/sysfs-driver-dma-idxd b/Documentation/ABI/stable/sysfs-driver-dma-idxd index 3becc9a82bdf..d5e3dd3d8434 100644 --- a/Documentation/ABI/stable/sysfs-driver-dma-idxd +++ b/Documentation/ABI/stable/sysfs-driver-dma-idxd @@ -136,6 +136,14 @@ Description: The last executed device administrative command's status/error. Also last configuration error overloaded. Writing to it will clear the status. +What: /sys/bus/dsa/devices/dsa/iaa_cap +Date: Sept 14, 2022 +KernelVersion: 6.0.0 +Contact: dmaengine@vger.kernel.org +Description: IAA (IAX) capability mask. Exported to user space for application + consumption. This attribute should only be visible on IAA devices + that are version 2 or later. + What: /sys/bus/dsa/devices/wq./block_on_fault Date: Oct 27, 2020 KernelVersion: 5.11.0 diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h index ad7a1e8a0e1c..eca2c9d76db6 100644 --- a/drivers/dma/idxd/idxd.h +++ b/drivers/dma/idxd/idxd.h @@ -232,6 +232,7 @@ struct idxd_hw { union engine_cap_reg engine_cap; struct opcap opcap; u32 cmd_cap; + union iaa_cap_reg iaa_cap; }; enum idxd_device_state { diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index bddff1d4359d..a78cfd361c04 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -460,6 +460,10 @@ static void idxd_read_caps(struct idxd_device *idxd) dev_dbg(dev, "opcap[%d]: %#llx\n", i, idxd->hw.opcap.bits[i]); } multi_u64_to_bmap(idxd->opcap_bmap, &idxd->hw.opcap.bits[0], 4); + + /* read iaa cap */ + if (idxd->data->type == IDXD_TYPE_IAX && idxd->hw.version >= DEVICE_VERSION_2) + idxd->hw.iaa_cap.bits = ioread64(idxd->reg_base + IDXD_IAACAP_OFFSET); } static struct idxd_device *idxd_alloc(struct pci_dev *pdev, struct idxd_driver_data *data) diff --git a/drivers/dma/idxd/registers.h b/drivers/dma/idxd/registers.h index fe3b8d04f9db..338289a66f00 100644 --- a/drivers/dma/idxd/registers.h +++ b/drivers/dma/idxd/registers.h @@ -276,6 +276,27 @@ union sw_err_reg { u64 bits[4]; } __packed; +union iaa_cap_reg { + struct { + u64 dec_aecs_format_ver:1; + u64 drop_init_bits:1; + u64 chaining:1; + u64 force_array_output_mod:1; + u64 load_part_aecs:1; + u64 comp_early_abort:1; + u64 nested_comp:1; + u64 diction_comp:1; + u64 header_gen:1; + u64 crypto_gcm:1; + u64 crypto_cfb:1; + u64 crypto_xts:1; + u64 rsvd:52; + }; + u64 bits; +} __packed; + +#define IDXD_IAACAP_OFFSET 0x180 + union msix_perm { struct { u32 rsvd:2; diff --git a/drivers/dma/idxd/sysfs.c b/drivers/dma/idxd/sysfs.c index 927c9d645121..2eba8cab25a1 100644 --- a/drivers/dma/idxd/sysfs.c +++ b/drivers/dma/idxd/sysfs.c @@ -1561,6 +1561,18 @@ static ssize_t cmd_status_store(struct device *dev, struct device_attribute *att } static DEVICE_ATTR_RW(cmd_status); +static ssize_t iaa_cap_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct idxd_device *idxd = confdev_to_idxd(dev); + + if (idxd->hw.version < DEVICE_VERSION_2) + return -EOPNOTSUPP; + + return sysfs_emit(buf, "%#llx\n", idxd->hw.iaa_cap.bits); +} +static DEVICE_ATTR_RO(iaa_cap); + static bool idxd_device_attr_max_batch_size_invisible(struct attribute *attr, struct idxd_device *idxd) { @@ -1583,6 +1595,14 @@ static bool idxd_device_attr_read_buffers_invisible(struct attribute *attr, idxd->data->type == IDXD_TYPE_IAX; } +static bool idxd_device_attr_iaa_cap_invisible(struct attribute *attr, + struct idxd_device *idxd) +{ + return attr == &dev_attr_iaa_cap.attr && + (idxd->data->type != IDXD_TYPE_IAX || + idxd->hw.version < DEVICE_VERSION_2); +} + static umode_t idxd_device_attr_visible(struct kobject *kobj, struct attribute *attr, int n) { @@ -1595,6 +1615,9 @@ static umode_t idxd_device_attr_visible(struct kobject *kobj, if (idxd_device_attr_read_buffers_invisible(attr, idxd)) return 0; + if (idxd_device_attr_iaa_cap_invisible(attr, idxd)) + return 0; + return attr->mode; } @@ -1620,6 +1643,7 @@ static struct attribute *idxd_device_attributes[] = { &dev_attr_read_buffer_limit.attr, &dev_attr_cdev_major.attr, &dev_attr_cmd_status.attr, + &dev_attr_iaa_cap.attr, NULL, }; From 86de3bbfe45e5a7e64d4f24a1c034725072420d8 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 27 Mar 2023 11:48:33 +0200 Subject: [PATCH 19/57] dmaengine: sh: rz-dmac: Remove unused rz_dmac_chan.*_word_size The src_word_size and dst_word_size members of the rz_dmac_chan structure were never used, so they can be removed. Signed-off-by: Geert Uytterhoeven Reviewed-by: Biju Das Link: https://lore.kernel.org/r/021bdf56f1716276a55bcfb1ea81bba5f1d42b3d.1679910274.git.geert+renesas@glider.be Signed-off-by: Vinod Koul --- drivers/dma/sh/rz-dmac.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/dma/sh/rz-dmac.c b/drivers/dma/sh/rz-dmac.c index 6b62e01ba658..9479f29692d3 100644 --- a/drivers/dma/sh/rz-dmac.c +++ b/drivers/dma/sh/rz-dmac.c @@ -67,8 +67,6 @@ struct rz_dmac_chan { struct rz_dmac_desc *desc; int descs_allocated; - enum dma_slave_buswidth src_word_size; - enum dma_slave_buswidth dst_word_size; dma_addr_t src_per_address; dma_addr_t dst_per_address; @@ -603,9 +601,7 @@ static int rz_dmac_config(struct dma_chan *chan, u32 val; channel->src_per_address = config->src_addr; - channel->src_word_size = config->src_addr_width; channel->dst_per_address = config->dst_addr; - channel->dst_word_size = config->dst_addr_width; val = rz_dmac_ds_to_val_mapping(config->dst_addr_width); if (val == CHCFG_DS_INVALID) From f806bea3093cb3568e01b4375d5e1d7c8c47c1d4 Mon Sep 17 00:00:00 2001 From: Vignesh Raghavendra Date: Thu, 23 Mar 2023 17:31:07 +0530 Subject: [PATCH 20/57] dmaengine: ti: k3-udma: Workaround errata i2234 Per [1], UDMA TR15 transactions may hang if ICNT0 is less than 64B Work around is to set EOL flag is to 1 for ICNT0. Since, there is no performance penalty / side effects of setting EOL flag event ICNTO > 64B, just set the flag for all UDMAP TR15 descriptors. [1] https://www.ti.com/lit/er/sprz455a/sprz455a.pdf Errata doc for J721E DRA829/TDA4VM Processors Silicon Revision 1.1/1.0 (Rev. A) Signed-off-by: Vignesh Raghavendra [j-choudhary@ti.com: minor cleanups] Signed-off-by: Jayesh Choudhary Link: https://lore.kernel.org/r/20230323120107.27638-1-j-choudhary@ti.com Signed-off-by: Vinod Koul --- drivers/dma/ti/k3-udma.c | 20 +++++++++++--------- include/linux/dma/ti-cppi5.h | 1 + 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/drivers/dma/ti/k3-udma.c b/drivers/dma/ti/k3-udma.c index f652a217be76..c07feaff69c0 100644 --- a/drivers/dma/ti/k3-udma.c +++ b/drivers/dma/ti/k3-udma.c @@ -2966,6 +2966,7 @@ udma_prep_slave_sg_triggered_tr(struct udma_chan *uc, struct scatterlist *sgl, struct scatterlist *sgent; struct cppi5_tr_type15_t *tr_req = NULL; enum dma_slave_buswidth dev_width; + u32 csf = CPPI5_TR_CSF_SUPR_EVT; u16 tr_cnt0, tr_cnt1; dma_addr_t dev_addr; struct udma_desc *d; @@ -3036,6 +3037,7 @@ udma_prep_slave_sg_triggered_tr(struct udma_chan *uc, struct scatterlist *sgl, if (uc->ud->match_data->type == DMA_TYPE_UDMA) { asel = 0; + csf |= CPPI5_TR_CSF_EOL_ICNT0; } else { asel = (u64)uc->config.asel << K3_ADDRESS_ASEL_SHIFT; dev_addr |= asel; @@ -3059,7 +3061,7 @@ udma_prep_slave_sg_triggered_tr(struct udma_chan *uc, struct scatterlist *sgl, cppi5_tr_init(&tr_req[tr_idx].flags, CPPI5_TR_TYPE15, false, true, CPPI5_TR_EVENT_SIZE_COMPLETION, 0); - cppi5_tr_csf_set(&tr_req[tr_idx].flags, CPPI5_TR_CSF_SUPR_EVT); + cppi5_tr_csf_set(&tr_req[tr_idx].flags, csf); cppi5_tr_set_trigger(&tr_req[tr_idx].flags, uc->config.tr_trigger_type, CPPI5_TR_TRIGGER_TYPE_ICNT2_DEC, 0, 0); @@ -3105,8 +3107,7 @@ udma_prep_slave_sg_triggered_tr(struct udma_chan *uc, struct scatterlist *sgl, cppi5_tr_init(&tr_req[tr_idx].flags, CPPI5_TR_TYPE15, false, true, CPPI5_TR_EVENT_SIZE_COMPLETION, 0); - cppi5_tr_csf_set(&tr_req[tr_idx].flags, - CPPI5_TR_CSF_SUPR_EVT); + cppi5_tr_csf_set(&tr_req[tr_idx].flags, csf); cppi5_tr_set_trigger(&tr_req[tr_idx].flags, uc->config.tr_trigger_type, CPPI5_TR_TRIGGER_TYPE_ICNT2_DEC, @@ -3150,8 +3151,7 @@ udma_prep_slave_sg_triggered_tr(struct udma_chan *uc, struct scatterlist *sgl, d->residue += sg_len; } - cppi5_tr_csf_set(&tr_req[tr_idx - 1].flags, - CPPI5_TR_CSF_SUPR_EVT | CPPI5_TR_CSF_EOP); + cppi5_tr_csf_set(&tr_req[tr_idx - 1].flags, csf | CPPI5_TR_CSF_EOP); return d; } @@ -3680,6 +3680,7 @@ udma_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src, int num_tr; size_t tr_size = sizeof(struct cppi5_tr_type15_t); u16 tr0_cnt0, tr0_cnt1, tr1_cnt0; + u32 csf = CPPI5_TR_CSF_SUPR_EVT; if (uc->config.dir != DMA_MEM_TO_MEM) { dev_err(chan->device->dev, @@ -3710,13 +3711,15 @@ udma_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src, if (uc->ud->match_data->type != DMA_TYPE_UDMA) { src |= (u64)uc->ud->asel << K3_ADDRESS_ASEL_SHIFT; dest |= (u64)uc->ud->asel << K3_ADDRESS_ASEL_SHIFT; + } else { + csf |= CPPI5_TR_CSF_EOL_ICNT0; } tr_req = d->hwdesc[0].tr_req_base; cppi5_tr_init(&tr_req[0].flags, CPPI5_TR_TYPE15, false, true, CPPI5_TR_EVENT_SIZE_COMPLETION, 0); - cppi5_tr_csf_set(&tr_req[0].flags, CPPI5_TR_CSF_SUPR_EVT); + cppi5_tr_csf_set(&tr_req[0].flags, csf); tr_req[0].addr = src; tr_req[0].icnt0 = tr0_cnt0; @@ -3735,7 +3738,7 @@ udma_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src, if (num_tr == 2) { cppi5_tr_init(&tr_req[1].flags, CPPI5_TR_TYPE15, false, true, CPPI5_TR_EVENT_SIZE_COMPLETION, 0); - cppi5_tr_csf_set(&tr_req[1].flags, CPPI5_TR_CSF_SUPR_EVT); + cppi5_tr_csf_set(&tr_req[1].flags, csf); tr_req[1].addr = src + tr0_cnt1 * tr0_cnt0; tr_req[1].icnt0 = tr1_cnt0; @@ -3750,8 +3753,7 @@ udma_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src, tr_req[1].dicnt3 = 1; } - cppi5_tr_csf_set(&tr_req[num_tr - 1].flags, - CPPI5_TR_CSF_SUPR_EVT | CPPI5_TR_CSF_EOP); + cppi5_tr_csf_set(&tr_req[num_tr - 1].flags, csf | CPPI5_TR_CSF_EOP); if (uc->config.metadata_size) d->vd.tx.metadata_ops = &metadata_ops; diff --git a/include/linux/dma/ti-cppi5.h b/include/linux/dma/ti-cppi5.h index efa2f0309f00..c53c0f6e3b1a 100644 --- a/include/linux/dma/ti-cppi5.h +++ b/include/linux/dma/ti-cppi5.h @@ -616,6 +616,7 @@ static inline void *cppi5_hdesc_get_swdata(struct cppi5_host_desc_t *desc) #define CPPI5_TR_CSF_SUPR_EVT BIT(2) #define CPPI5_TR_CSF_EOL_ADV_SHIFT (4U) #define CPPI5_TR_CSF_EOL_ADV_MASK GENMASK(6, 4) +#define CPPI5_TR_CSF_EOL_ICNT0 BIT(4) #define CPPI5_TR_CSF_EOP BIT(7) /** From a747051efa16d4304aacaf44fa0771667e28ca54 Mon Sep 17 00:00:00 2001 From: Tom Rix Date: Wed, 22 Mar 2023 08:10:01 -0400 Subject: [PATCH 21/57] dmaengine: tegra-apb: remove unused tdma_read function clang with W=1 reports drivers/dma/tegra20-apb-dma.c:236:19: error: unused function 'tdma_read' [-Werror,-Wunused-function] static inline u32 tdma_read(struct tegra_dma *tdma, u32 reg) ^ This function is not used so remove it. Signed-off-by: Tom Rix Reviewed-by: Jon Hunter Link: https://lore.kernel.org/r/20230322121001.2569909-1-trix@redhat.com Signed-off-by: Vinod Koul --- drivers/dma/tegra20-apb-dma.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/dma/tegra20-apb-dma.c b/drivers/dma/tegra20-apb-dma.c index eaafcbe4ca94..cc6b91f48979 100644 --- a/drivers/dma/tegra20-apb-dma.c +++ b/drivers/dma/tegra20-apb-dma.c @@ -233,11 +233,6 @@ static inline void tdma_write(struct tegra_dma *tdma, u32 reg, u32 val) writel(val, tdma->base_addr + reg); } -static inline u32 tdma_read(struct tegra_dma *tdma, u32 reg) -{ - return readl(tdma->base_addr + reg); -} - static inline void tdc_write(struct tegra_dma_channel *tdc, u32 reg, u32 val) { From e9f92b9999055110249dacdc5f1a6dfda2a155f6 Mon Sep 17 00:00:00 2001 From: Harini Katakam Date: Thu, 16 Mar 2023 15:03:18 +0530 Subject: [PATCH 22/57] dmaengine: zynqmp_dma: Sync DMA and coherent masks Align ZDMA DMA as well as coherent memory masks to 44 bit. This is required when using >32 bit memory regions. Signed-off-by: Harini Katakam Acked-by: Michal Simek Link: https://lore.kernel.org/r/20230316093318.6722-1-harini.katakam@amd.com Signed-off-by: Vinod Koul --- drivers/dma/xilinx/zynqmp_dma.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/dma/xilinx/zynqmp_dma.c b/drivers/dma/xilinx/zynqmp_dma.c index ce359058c638..9360f43b8e0f 100644 --- a/drivers/dma/xilinx/zynqmp_dma.c +++ b/drivers/dma/xilinx/zynqmp_dma.c @@ -1060,7 +1060,11 @@ static int zynqmp_dma_probe(struct platform_device *pdev) zdev->dev = &pdev->dev; INIT_LIST_HEAD(&zdev->common.channels); - dma_set_mask(&pdev->dev, DMA_BIT_MASK(44)); + ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(44)); + if (ret) { + dev_err(&pdev->dev, "DMA not available for address range\n"); + return ret; + } dma_cap_set(DMA_MEMCPY, zdev->common.cap_mask); p = &zdev->common; From 3366c6fe2769fe653107626b3273e03156d9cac8 Mon Sep 17 00:00:00 2001 From: Tom Rix Date: Mon, 20 Mar 2023 19:12:09 -0400 Subject: [PATCH 23/57] dmaengine: ti: edma: remove unused edma_and function clang with W=1 reports drivers/dma/ti/edma.c:321:20: error: unused function 'edma_and' [-Werror,-Wunused-function] static inline void edma_and(struct edma_cc *ecc, int offset, unsigned and) ^ This function is not used, so remove it. Signed-off-by: Tom Rix Link: https://lore.kernel.org/r/20230320231209.1728940-1-trix@redhat.com Signed-off-by: Vinod Koul --- drivers/dma/ti/edma.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/drivers/dma/ti/edma.c b/drivers/dma/ti/edma.c index fa06d7e6d8e3..9ea91c640c32 100644 --- a/drivers/dma/ti/edma.c +++ b/drivers/dma/ti/edma.c @@ -318,14 +318,6 @@ static inline void edma_modify(struct edma_cc *ecc, int offset, unsigned and, edma_write(ecc, offset, val); } -static inline void edma_and(struct edma_cc *ecc, int offset, unsigned and) -{ - unsigned val = edma_read(ecc, offset); - - val &= and; - edma_write(ecc, offset, val); -} - static inline void edma_or(struct edma_cc *ecc, int offset, unsigned or) { unsigned val = edma_read(ecc, offset); From d77c4502d6238bbfed7cb9422748477839d506f2 Mon Sep 17 00:00:00 2001 From: Tom Rix Date: Mon, 20 Mar 2023 19:49:06 -0400 Subject: [PATCH 24/57] dmaengine: dw-edma: remove unused readq_ch and writeq_ch functions clang with W=1 reports drivers/dma/dw-edma/dw-edma-v0-core.c:162:20: error: unused function 'writeq_ch' [-Werror,-Wunused-function] static inline void writeq_ch(struct dw_edma *dw, enum dw_edma_dir dir, u16 ch, ^ drivers/dma/dw-edma/dw-edma-v0-core.c:185:19: error: unused function 'readq_ch' [-Werror,-Wunused-function] static inline u64 readq_ch(struct dw_edma *dw, enum dw_edma_dir dir, u16 ch, ^ These functions and their wrapping macros are not used, so remove them. Signed-off-by: Tom Rix Link: https://lore.kernel.org/r/20230320234906.1730308-1-trix@redhat.com Signed-off-by: Vinod Koul --- drivers/dma/dw-edma/dw-edma-v0-core.c | 56 --------------------------- 1 file changed, 56 deletions(-) diff --git a/drivers/dma/dw-edma/dw-edma-v0-core.c b/drivers/dma/dw-edma/dw-edma-v0-core.c index 72e79a0c0a4e..32f834a3848a 100644 --- a/drivers/dma/dw-edma/dw-edma-v0-core.c +++ b/drivers/dma/dw-edma/dw-edma-v0-core.c @@ -159,62 +159,6 @@ static inline u32 readl_ch(struct dw_edma *dw, enum dw_edma_dir dir, u16 ch, #define GET_CH_32(dw, dir, ch, name) \ readl_ch(dw, dir, ch, &(__dw_ch_regs(dw, dir, ch)->name)) -static inline void writeq_ch(struct dw_edma *dw, enum dw_edma_dir dir, u16 ch, - u64 value, void __iomem *addr) -{ - if (dw->chip->mf == EDMA_MF_EDMA_LEGACY) { - u32 viewport_sel; - unsigned long flags; - - raw_spin_lock_irqsave(&dw->lock, flags); - - viewport_sel = FIELD_PREP(EDMA_V0_VIEWPORT_MASK, ch); - if (dir == EDMA_DIR_READ) - viewport_sel |= BIT(31); - - writel(viewport_sel, - &(__dw_regs(dw)->type.legacy.viewport_sel)); - writeq(value, addr); - - raw_spin_unlock_irqrestore(&dw->lock, flags); - } else { - writeq(value, addr); - } -} - -static inline u64 readq_ch(struct dw_edma *dw, enum dw_edma_dir dir, u16 ch, - const void __iomem *addr) -{ - u64 value; - - if (dw->chip->mf == EDMA_MF_EDMA_LEGACY) { - u32 viewport_sel; - unsigned long flags; - - raw_spin_lock_irqsave(&dw->lock, flags); - - viewport_sel = FIELD_PREP(EDMA_V0_VIEWPORT_MASK, ch); - if (dir == EDMA_DIR_READ) - viewport_sel |= BIT(31); - - writel(viewport_sel, - &(__dw_regs(dw)->type.legacy.viewport_sel)); - value = readq(addr); - - raw_spin_unlock_irqrestore(&dw->lock, flags); - } else { - value = readq(addr); - } - - return value; -} - -#define SET_CH_64(dw, dir, ch, name, value) \ - writeq_ch(dw, dir, ch, value, &(__dw_ch_regs(dw, dir, ch)->name)) - -#define GET_CH_64(dw, dir, ch, name) \ - readq_ch(dw, dir, ch, &(__dw_ch_regs(dw, dir, ch)->name)) - /* eDMA management callbacks */ void dw_edma_v0_core_off(struct dw_edma *dw) { From f60a150822a7946c97059dad3416ea3de501eaed Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Sun, 19 Mar 2023 11:32:22 -0500 Subject: [PATCH 25/57] dmaengine: bestcomm: Use of_address_to_resource() Replace of_get_address() and of_translate_address() calls with single call to of_address_to_resource(). Signed-off-by: Rob Herring Link: https://lore.kernel.org/r/20230319163222.226377-1-robh@kernel.org Signed-off-by: Vinod Koul --- drivers/dma/bestcomm/sram.c | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/drivers/dma/bestcomm/sram.c b/drivers/dma/bestcomm/sram.c index c465758e7193..0553956f7456 100644 --- a/drivers/dma/bestcomm/sram.c +++ b/drivers/dma/bestcomm/sram.c @@ -38,7 +38,7 @@ int bcom_sram_init(struct device_node *sram_node, char *owner) { int rv; const u32 *regaddr_p; - u64 regaddr64, size64; + struct resource res; unsigned int psize; /* Create our state struct */ @@ -56,21 +56,18 @@ int bcom_sram_init(struct device_node *sram_node, char *owner) } /* Get address and size of the sram */ - regaddr_p = of_get_address(sram_node, 0, &size64, NULL); - if (!regaddr_p) { + rv = of_address_to_resource(sram_node, 0, &res); + if (rv) { printk(KERN_ERR "%s: bcom_sram_init: " "Invalid device node !\n", owner); - rv = -EINVAL; goto error_free; } - regaddr64 = of_translate_address(sram_node, regaddr_p); - - bcom_sram->base_phys = (phys_addr_t) regaddr64; - bcom_sram->size = (unsigned int) size64; + bcom_sram->base_phys = res.start; + bcom_sram->size = resource_size(&res); /* Request region */ - if (!request_mem_region(bcom_sram->base_phys, bcom_sram->size, owner)) { + if (!request_mem_region(res.start, resource_size(&res), owner)) { printk(KERN_ERR "%s: bcom_sram_init: " "Couldn't request region !\n", owner); rv = -EBUSY; @@ -79,7 +76,7 @@ int bcom_sram_init(struct device_node *sram_node, char *owner) /* Map SRAM */ /* sram is not really __iomem */ - bcom_sram->base_virt = (void*) ioremap(bcom_sram->base_phys, bcom_sram->size); + bcom_sram->base_virt = (void *)ioremap(res.start, resource_size(&res)); if (!bcom_sram->base_virt) { printk(KERN_ERR "%s: bcom_sram_init: " @@ -120,7 +117,7 @@ int bcom_sram_init(struct device_node *sram_node, char *owner) return 0; error_release: - release_mem_region(bcom_sram->base_phys, bcom_sram->size); + release_mem_region(res.start, resource_size(&res)); error_free: kfree(bcom_sram); bcom_sram = NULL; From 82e6051a48957a89066d15b17bb85d2f662f2bad Mon Sep 17 00:00:00 2001 From: Apurva Nandan Date: Thu, 9 Mar 2023 01:45:13 +0530 Subject: [PATCH 26/57] dmaengine: ti: k3-psil: Add PSI-L thread support for J784s4 Add psil thread IDs for J784s4 and include J784s4 in the set of "k3_soc_devices" in k3-psil.c. Signed-off-by: Apurva Nandan Signed-off-by: Hari Nagalla Signed-off-by: Siddharth Vadapalli Signed-off-by: Vignesh Raghavendra [vaishnav.a@ti.com: add MCSPI-TX and 3rd CSI2RX instance entries] Signed-off-by: Vaishnav Achath [j-choudhary@ti.com: add sa2ul entries, minor cleanups] Signed-off-by: Jayesh Choudhary Acked-by: Peter Ujfalusi Link: https://lore.kernel.org/r/20230308201513.116638-1-j-choudhary@ti.com Signed-off-by: Vinod Koul --- drivers/dma/ti/Makefile | 3 +- drivers/dma/ti/k3-psil-j784s4.c | 354 ++++++++++++++++++++++++++++++++ drivers/dma/ti/k3-psil-priv.h | 1 + drivers/dma/ti/k3-psil.c | 1 + drivers/dma/ti/k3-udma.c | 1 + 5 files changed, 359 insertions(+), 1 deletion(-) create mode 100644 drivers/dma/ti/k3-psil-j784s4.c diff --git a/drivers/dma/ti/Makefile b/drivers/dma/ti/Makefile index bd1e07fda559..acc950bf609c 100644 --- a/drivers/dma/ti/Makefile +++ b/drivers/dma/ti/Makefile @@ -11,6 +11,7 @@ k3-psil-lib-objs := k3-psil.o \ k3-psil-am64.o \ k3-psil-j721s2.o \ k3-psil-am62.o \ - k3-psil-am62a.o + k3-psil-am62a.o \ + k3-psil-j784s4.o obj-$(CONFIG_TI_K3_PSIL) += k3-psil-lib.o obj-$(CONFIG_TI_DMA_CROSSBAR) += dma-crossbar.o diff --git a/drivers/dma/ti/k3-psil-j784s4.c b/drivers/dma/ti/k3-psil-j784s4.c new file mode 100644 index 000000000000..12bfa2478f92 --- /dev/null +++ b/drivers/dma/ti/k3-psil-j784s4.c @@ -0,0 +1,354 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2021 Texas Instruments Incorporated - https://www.ti.com + */ + +#include + +#include "k3-psil-priv.h" + +#define PSIL_PDMA_XY_TR(x) \ + { \ + .thread_id = x, \ + .ep_config = { \ + .ep_type = PSIL_EP_PDMA_XY, \ + }, \ + } + +#define PSIL_PDMA_XY_PKT(x) \ + { \ + .thread_id = x, \ + .ep_config = { \ + .ep_type = PSIL_EP_PDMA_XY, \ + .pkt_mode = 1, \ + }, \ + } + +#define PSIL_PDMA_MCASP(x) \ + { \ + .thread_id = x, \ + .ep_config = { \ + .ep_type = PSIL_EP_PDMA_XY, \ + .pdma_acc32 = 1, \ + .pdma_burst = 1, \ + }, \ + } + +#define PSIL_ETHERNET(x) \ + { \ + .thread_id = x, \ + .ep_config = { \ + .ep_type = PSIL_EP_NATIVE, \ + .pkt_mode = 1, \ + .needs_epib = 1, \ + .psd_size = 16, \ + }, \ + } + +#define PSIL_SA2UL(x, tx) \ + { \ + .thread_id = x, \ + .ep_config = { \ + .ep_type = PSIL_EP_NATIVE, \ + .pkt_mode = 1, \ + .needs_epib = 1, \ + .psd_size = 64, \ + .notdpkt = tx, \ + }, \ + } + +#define PSIL_CSI2RX(x) \ + { \ + .thread_id = x, \ + .ep_config = { \ + .ep_type = PSIL_EP_NATIVE, \ + }, \ + } + +/* PSI-L source thread IDs, used for RX (DMA_DEV_TO_MEM) */ +static struct psil_ep j784s4_src_ep_map[] = { + /* PDMA_MCASP - McASP0-4 */ + PSIL_PDMA_MCASP(0x4400), + PSIL_PDMA_MCASP(0x4401), + PSIL_PDMA_MCASP(0x4402), + PSIL_PDMA_MCASP(0x4403), + PSIL_PDMA_MCASP(0x4404), + /* PDMA_SPI_G0 - SPI0-3 */ + PSIL_PDMA_XY_PKT(0x4600), + PSIL_PDMA_XY_PKT(0x4601), + PSIL_PDMA_XY_PKT(0x4602), + PSIL_PDMA_XY_PKT(0x4603), + PSIL_PDMA_XY_PKT(0x4604), + PSIL_PDMA_XY_PKT(0x4605), + PSIL_PDMA_XY_PKT(0x4606), + PSIL_PDMA_XY_PKT(0x4607), + PSIL_PDMA_XY_PKT(0x4608), + PSIL_PDMA_XY_PKT(0x4609), + PSIL_PDMA_XY_PKT(0x460a), + PSIL_PDMA_XY_PKT(0x460b), + PSIL_PDMA_XY_PKT(0x460c), + PSIL_PDMA_XY_PKT(0x460d), + PSIL_PDMA_XY_PKT(0x460e), + PSIL_PDMA_XY_PKT(0x460f), + /* PDMA_SPI_G1 - SPI4-7 */ + PSIL_PDMA_XY_PKT(0x4620), + PSIL_PDMA_XY_PKT(0x4621), + PSIL_PDMA_XY_PKT(0x4622), + PSIL_PDMA_XY_PKT(0x4623), + PSIL_PDMA_XY_PKT(0x4624), + PSIL_PDMA_XY_PKT(0x4625), + PSIL_PDMA_XY_PKT(0x4626), + PSIL_PDMA_XY_PKT(0x4627), + PSIL_PDMA_XY_PKT(0x4628), + PSIL_PDMA_XY_PKT(0x4629), + PSIL_PDMA_XY_PKT(0x462a), + PSIL_PDMA_XY_PKT(0x462b), + PSIL_PDMA_XY_PKT(0x462c), + PSIL_PDMA_XY_PKT(0x462d), + PSIL_PDMA_XY_PKT(0x462e), + PSIL_PDMA_XY_PKT(0x462f), + /* MAIN_CPSW2G */ + PSIL_ETHERNET(0x4640), + /* PDMA_USART_G0 - UART0-1 */ + PSIL_PDMA_XY_PKT(0x4700), + PSIL_PDMA_XY_PKT(0x4701), + /* PDMA_USART_G1 - UART2-3 */ + PSIL_PDMA_XY_PKT(0x4702), + PSIL_PDMA_XY_PKT(0x4703), + /* PDMA_USART_G2 - UART4-9 */ + PSIL_PDMA_XY_PKT(0x4704), + PSIL_PDMA_XY_PKT(0x4705), + PSIL_PDMA_XY_PKT(0x4706), + PSIL_PDMA_XY_PKT(0x4707), + PSIL_PDMA_XY_PKT(0x4708), + PSIL_PDMA_XY_PKT(0x4709), + /* CSI2RX */ + PSIL_CSI2RX(0x4900), + PSIL_CSI2RX(0x4901), + PSIL_CSI2RX(0x4902), + PSIL_CSI2RX(0x4903), + PSIL_CSI2RX(0x4940), + PSIL_CSI2RX(0x4941), + PSIL_CSI2RX(0x4942), + PSIL_CSI2RX(0x4943), + PSIL_CSI2RX(0x4944), + PSIL_CSI2RX(0x4945), + PSIL_CSI2RX(0x4946), + PSIL_CSI2RX(0x4947), + PSIL_CSI2RX(0x4948), + PSIL_CSI2RX(0x4949), + PSIL_CSI2RX(0x494a), + PSIL_CSI2RX(0x494b), + PSIL_CSI2RX(0x494c), + PSIL_CSI2RX(0x494d), + PSIL_CSI2RX(0x494e), + PSIL_CSI2RX(0x494f), + PSIL_CSI2RX(0x4950), + PSIL_CSI2RX(0x4951), + PSIL_CSI2RX(0x4952), + PSIL_CSI2RX(0x4953), + PSIL_CSI2RX(0x4954), + PSIL_CSI2RX(0x4955), + PSIL_CSI2RX(0x4956), + PSIL_CSI2RX(0x4957), + PSIL_CSI2RX(0x4958), + PSIL_CSI2RX(0x4959), + PSIL_CSI2RX(0x495a), + PSIL_CSI2RX(0x495b), + PSIL_CSI2RX(0x495c), + PSIL_CSI2RX(0x495d), + PSIL_CSI2RX(0x495e), + PSIL_CSI2RX(0x495f), + PSIL_CSI2RX(0x4960), + PSIL_CSI2RX(0x4961), + PSIL_CSI2RX(0x4962), + PSIL_CSI2RX(0x4963), + PSIL_CSI2RX(0x4964), + PSIL_CSI2RX(0x4965), + PSIL_CSI2RX(0x4966), + PSIL_CSI2RX(0x4967), + PSIL_CSI2RX(0x4968), + PSIL_CSI2RX(0x4969), + PSIL_CSI2RX(0x496a), + PSIL_CSI2RX(0x496b), + PSIL_CSI2RX(0x496c), + PSIL_CSI2RX(0x496d), + PSIL_CSI2RX(0x496e), + PSIL_CSI2RX(0x496f), + PSIL_CSI2RX(0x4970), + PSIL_CSI2RX(0x4971), + PSIL_CSI2RX(0x4972), + PSIL_CSI2RX(0x4973), + PSIL_CSI2RX(0x4974), + PSIL_CSI2RX(0x4975), + PSIL_CSI2RX(0x4976), + PSIL_CSI2RX(0x4977), + PSIL_CSI2RX(0x4978), + PSIL_CSI2RX(0x4979), + PSIL_CSI2RX(0x497a), + PSIL_CSI2RX(0x497b), + PSIL_CSI2RX(0x497c), + PSIL_CSI2RX(0x497d), + PSIL_CSI2RX(0x497e), + PSIL_CSI2RX(0x497f), + PSIL_CSI2RX(0x4980), + PSIL_CSI2RX(0x4981), + PSIL_CSI2RX(0x4982), + PSIL_CSI2RX(0x4983), + PSIL_CSI2RX(0x4984), + PSIL_CSI2RX(0x4985), + PSIL_CSI2RX(0x4986), + PSIL_CSI2RX(0x4987), + PSIL_CSI2RX(0x4988), + PSIL_CSI2RX(0x4989), + PSIL_CSI2RX(0x498a), + PSIL_CSI2RX(0x498b), + PSIL_CSI2RX(0x498c), + PSIL_CSI2RX(0x498d), + PSIL_CSI2RX(0x498e), + PSIL_CSI2RX(0x498f), + PSIL_CSI2RX(0x4990), + PSIL_CSI2RX(0x4991), + PSIL_CSI2RX(0x4992), + PSIL_CSI2RX(0x4993), + PSIL_CSI2RX(0x4994), + PSIL_CSI2RX(0x4995), + PSIL_CSI2RX(0x4996), + PSIL_CSI2RX(0x4997), + PSIL_CSI2RX(0x4998), + PSIL_CSI2RX(0x4999), + PSIL_CSI2RX(0x499a), + PSIL_CSI2RX(0x499b), + PSIL_CSI2RX(0x499c), + PSIL_CSI2RX(0x499d), + PSIL_CSI2RX(0x499e), + PSIL_CSI2RX(0x499f), + /* MAIN_CPSW9G */ + PSIL_ETHERNET(0x4a00), + /* MAIN-SA2UL */ + PSIL_SA2UL(0x4a40, 0), + PSIL_SA2UL(0x4a41, 0), + PSIL_SA2UL(0x4a42, 0), + PSIL_SA2UL(0x4a43, 0), + /* MCU_CPSW0 */ + PSIL_ETHERNET(0x7000), + /* MCU_PDMA0 (MCU_PDMA_MISC_G0) - SPI0 */ + PSIL_PDMA_XY_PKT(0x7100), + PSIL_PDMA_XY_PKT(0x7101), + PSIL_PDMA_XY_PKT(0x7102), + PSIL_PDMA_XY_PKT(0x7103), + /* MCU_PDMA1 (MCU_PDMA_MISC_G1) - SPI1-2 */ + PSIL_PDMA_XY_PKT(0x7200), + PSIL_PDMA_XY_PKT(0x7201), + PSIL_PDMA_XY_PKT(0x7202), + PSIL_PDMA_XY_PKT(0x7203), + PSIL_PDMA_XY_PKT(0x7204), + PSIL_PDMA_XY_PKT(0x7205), + PSIL_PDMA_XY_PKT(0x7206), + PSIL_PDMA_XY_PKT(0x7207), + /* MCU_PDMA2 (MCU_PDMA_MISC_G2) - UART0 */ + PSIL_PDMA_XY_PKT(0x7300), + /* MCU_PDMA_ADC - ADC0-1 */ + PSIL_PDMA_XY_TR(0x7400), + PSIL_PDMA_XY_TR(0x7401), + PSIL_PDMA_XY_TR(0x7402), + PSIL_PDMA_XY_TR(0x7403), + /* MCU_SA2UL */ + PSIL_SA2UL(0x7500, 0), + PSIL_SA2UL(0x7501, 0), + PSIL_SA2UL(0x7502, 0), + PSIL_SA2UL(0x7503, 0), +}; + +/* PSI-L destination thread IDs, used for TX (DMA_MEM_TO_DEV) */ +static struct psil_ep j784s4_dst_ep_map[] = { + /* MAIN_CPSW2G */ + PSIL_ETHERNET(0xc640), + PSIL_ETHERNET(0xc641), + PSIL_ETHERNET(0xc642), + PSIL_ETHERNET(0xc643), + PSIL_ETHERNET(0xc644), + PSIL_ETHERNET(0xc645), + PSIL_ETHERNET(0xc646), + PSIL_ETHERNET(0xc647), + /* MAIN_CPSW9G */ + PSIL_ETHERNET(0xca00), + PSIL_ETHERNET(0xca01), + PSIL_ETHERNET(0xca02), + PSIL_ETHERNET(0xca03), + PSIL_ETHERNET(0xca04), + PSIL_ETHERNET(0xca05), + PSIL_ETHERNET(0xca06), + PSIL_ETHERNET(0xca07), + /* MAIN-SA2UL */ + PSIL_SA2UL(0xca40, 1), + PSIL_SA2UL(0xca41, 1), + /* PDMA_SPI_G0 - SPI0-3 */ + PSIL_PDMA_XY_PKT(0xc600), + PSIL_PDMA_XY_PKT(0xc601), + PSIL_PDMA_XY_PKT(0xc602), + PSIL_PDMA_XY_PKT(0xc603), + PSIL_PDMA_XY_PKT(0xc604), + PSIL_PDMA_XY_PKT(0xc605), + PSIL_PDMA_XY_PKT(0xc606), + PSIL_PDMA_XY_PKT(0xc607), + PSIL_PDMA_XY_PKT(0xc608), + PSIL_PDMA_XY_PKT(0xc609), + PSIL_PDMA_XY_PKT(0xc60a), + PSIL_PDMA_XY_PKT(0xc60b), + PSIL_PDMA_XY_PKT(0xc60c), + PSIL_PDMA_XY_PKT(0xc60d), + PSIL_PDMA_XY_PKT(0xc60e), + PSIL_PDMA_XY_PKT(0xc60f), + /* PDMA_SPI_G1 - SPI4-7 */ + PSIL_PDMA_XY_PKT(0xc620), + PSIL_PDMA_XY_PKT(0xc621), + PSIL_PDMA_XY_PKT(0xc622), + PSIL_PDMA_XY_PKT(0xc623), + PSIL_PDMA_XY_PKT(0xc624), + PSIL_PDMA_XY_PKT(0xc625), + PSIL_PDMA_XY_PKT(0xc626), + PSIL_PDMA_XY_PKT(0xc627), + PSIL_PDMA_XY_PKT(0xc628), + PSIL_PDMA_XY_PKT(0xc629), + PSIL_PDMA_XY_PKT(0xc62a), + PSIL_PDMA_XY_PKT(0xc62b), + PSIL_PDMA_XY_PKT(0xc62c), + PSIL_PDMA_XY_PKT(0xc62d), + PSIL_PDMA_XY_PKT(0xc62e), + PSIL_PDMA_XY_PKT(0xc62f), + /* MCU_CPSW0 */ + PSIL_ETHERNET(0xf000), + PSIL_ETHERNET(0xf001), + PSIL_ETHERNET(0xf002), + PSIL_ETHERNET(0xf003), + PSIL_ETHERNET(0xf004), + PSIL_ETHERNET(0xf005), + PSIL_ETHERNET(0xf006), + PSIL_ETHERNET(0xf007), + /* MCU_PDMA_MISC_G0 - SPI0 */ + PSIL_PDMA_XY_PKT(0xf100), + PSIL_PDMA_XY_PKT(0xf101), + PSIL_PDMA_XY_PKT(0xf102), + PSIL_PDMA_XY_PKT(0xf103), + /* MCU_PDMA_MISC_G1 - SPI1-2 */ + PSIL_PDMA_XY_PKT(0xf200), + PSIL_PDMA_XY_PKT(0xf201), + PSIL_PDMA_XY_PKT(0xf202), + PSIL_PDMA_XY_PKT(0xf203), + PSIL_PDMA_XY_PKT(0xf204), + PSIL_PDMA_XY_PKT(0xf205), + PSIL_PDMA_XY_PKT(0xf206), + PSIL_PDMA_XY_PKT(0xf207), + /* MCU_SA2UL */ + PSIL_SA2UL(0xf500, 1), + PSIL_SA2UL(0xf501, 1), +}; + +struct psil_ep_map j784s4_ep_map = { + .name = "j784s4", + .src = j784s4_src_ep_map, + .src_count = ARRAY_SIZE(j784s4_src_ep_map), + .dst = j784s4_dst_ep_map, + .dst_count = ARRAY_SIZE(j784s4_dst_ep_map), +}; diff --git a/drivers/dma/ti/k3-psil-priv.h b/drivers/dma/ti/k3-psil-priv.h index abd650bb7600..c383723d1c8f 100644 --- a/drivers/dma/ti/k3-psil-priv.h +++ b/drivers/dma/ti/k3-psil-priv.h @@ -44,5 +44,6 @@ extern struct psil_ep_map am64_ep_map; extern struct psil_ep_map j721s2_ep_map; extern struct psil_ep_map am62_ep_map; extern struct psil_ep_map am62a_ep_map; +extern struct psil_ep_map j784s4_ep_map; #endif /* K3_PSIL_PRIV_H_ */ diff --git a/drivers/dma/ti/k3-psil.c b/drivers/dma/ti/k3-psil.c index 2da6988a0e7b..c11389d67a3f 100644 --- a/drivers/dma/ti/k3-psil.c +++ b/drivers/dma/ti/k3-psil.c @@ -25,6 +25,7 @@ static const struct soc_device_attribute k3_soc_devices[] = { { .family = "J721S2", .data = &j721s2_ep_map }, { .family = "AM62X", .data = &am62_ep_map }, { .family = "AM62AX", .data = &am62a_ep_map }, + { .family = "J784S4", .data = &j784s4_ep_map }, { /* sentinel */ } }; diff --git a/drivers/dma/ti/k3-udma.c b/drivers/dma/ti/k3-udma.c index c07feaff69c0..fc3a2a05ab7b 100644 --- a/drivers/dma/ti/k3-udma.c +++ b/drivers/dma/ti/k3-udma.c @@ -4416,6 +4416,7 @@ static const struct soc_device_attribute k3_soc_devices[] = { { .family = "J721S2", .data = &j721e_soc_data}, { .family = "AM62X", .data = &am64_soc_data }, { .family = "AM62AX", .data = &am64_soc_data }, + { .family = "J784S4", .data = &j721e_soc_data }, { /* sentinel */ } }; From 91d6a468e335571f1e67e046050dea9af5fa4ebe Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Mon, 10 Apr 2023 02:33:55 +0300 Subject: [PATCH 27/57] dma: gpi: remove spurious unlock in gpi_ch_init gpi_ch_init() doesn't lock the ctrl_lock mutex, so there is no need to unlock it too. Instead the mutex is handled by the function gpi_alloc_chan_resources(), which properly locks and unlocks the mutex. ===================================== WARNING: bad unlock balance detected! 6.3.0-rc5-00253-g99792582ded1-dirty #15 Not tainted ------------------------------------- kworker/u16:0/9 is trying to release lock (&gpii->ctrl_lock) at: [] gpi_alloc_chan_resources+0x108/0x5bc but there are no more locks to release! other info that might help us debug this: 6 locks held by kworker/u16:0/9: #0: ffff575740010938 ((wq_completion)events_unbound){+.+.}-{0:0}, at: process_one_work+0x220/0x594 #1: ffff80000809bdd0 (deferred_probe_work){+.+.}-{0:0}, at: process_one_work+0x220/0x594 #2: ffff575740f2a0f8 (&dev->mutex){....}-{3:3}, at: __device_attach+0x38/0x188 #3: ffff57574b5570f8 (&dev->mutex){....}-{3:3}, at: __device_attach+0x38/0x188 #4: ffffb99d06a2f180 (of_dma_lock){+.+.}-{3:3}, at: of_dma_request_slave_channel+0x138/0x280 #5: ffffb99d06a2ee20 (dma_list_mutex){+.+.}-{3:3}, at: dma_get_slave_channel+0x28/0x10c stack backtrace: CPU: 7 PID: 9 Comm: kworker/u16:0 Not tainted 6.3.0-rc5-00253-g99792582ded1-dirty #15 Hardware name: Google Pixel 3 (DT) Workqueue: events_unbound deferred_probe_work_func Call trace: dump_backtrace+0xa0/0xfc show_stack+0x18/0x24 dump_stack_lvl+0x60/0xac dump_stack+0x18/0x24 print_unlock_imbalance_bug+0x130/0x148 lock_release+0x270/0x300 __mutex_unlock_slowpath+0x48/0x2cc mutex_unlock+0x20/0x2c gpi_alloc_chan_resources+0x108/0x5bc dma_chan_get+0x84/0x188 dma_get_slave_channel+0x5c/0x10c gpi_of_dma_xlate+0x110/0x1a0 of_dma_request_slave_channel+0x174/0x280 dma_request_chan+0x3c/0x2d4 geni_i2c_probe+0x544/0x63c platform_probe+0x68/0xc4 really_probe+0x148/0x2ac __driver_probe_device+0x78/0xe0 driver_probe_device+0x3c/0x160 __device_attach_driver+0xb8/0x138 bus_for_each_drv+0x84/0xe0 __device_attach+0x9c/0x188 device_initial_probe+0x14/0x20 bus_probe_device+0xac/0xb0 device_add+0x60c/0x7d8 of_device_add+0x44/0x60 of_platform_device_create_pdata+0x90/0x124 of_platform_bus_create+0x15c/0x3c8 of_platform_populate+0x58/0xf8 devm_of_platform_populate+0x58/0xbc geni_se_probe+0xf0/0x164 platform_probe+0x68/0xc4 really_probe+0x148/0x2ac __driver_probe_device+0x78/0xe0 driver_probe_device+0x3c/0x160 __device_attach_driver+0xb8/0x138 bus_for_each_drv+0x84/0xe0 __device_attach+0x9c/0x188 device_initial_probe+0x14/0x20 bus_probe_device+0xac/0xb0 deferred_probe_work_func+0x8c/0xc8 process_one_work+0x2bc/0x594 worker_thread+0x228/0x438 kthread+0x108/0x10c ret_from_fork+0x10/0x20 Fixes: 5d0c3533a19f ("dmaengine: qcom: Add GPI dma driver") Signed-off-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20230409233355.453741-1-dmitry.baryshkov@linaro.org Signed-off-by: Vinod Koul --- drivers/dma/qcom/gpi.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/dma/qcom/gpi.c b/drivers/dma/qcom/gpi.c index 59a36cbf9b5f..932628b319c8 100644 --- a/drivers/dma/qcom/gpi.c +++ b/drivers/dma/qcom/gpi.c @@ -1966,7 +1966,6 @@ error_alloc_ev_ring: error_config_int: gpi_free_ring(&gpii->ev_ring, gpii); exit_gpi_init: - mutex_unlock(&gpii->ctrl_lock); return ret; } From 619d8ea96d984d007be88b1e7a4ff9159bf44594 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Mon, 10 Apr 2023 18:26:54 -0500 Subject: [PATCH 28/57] dmaengine: qcom_hidma: Add explicit platform_device.h and of_device.h includes qcom_hidma uses of_dma_configure() which is declared in of_device.h. platform_device.h and of_device.h get implicitly included by of_platform.h, but that is going to be removed soon. Signed-off-by: Rob Herring Acked-by: Sinan Kaya Link: https://lore.kernel.org/r/20230410232654.1561462-1-robh@kernel.org Signed-off-by: Vinod Koul --- drivers/dma/qcom/hidma_mgmt.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/dma/qcom/hidma_mgmt.c b/drivers/dma/qcom/hidma_mgmt.c index 62026607f3f8..05e96b31d871 100644 --- a/drivers/dma/qcom/hidma_mgmt.c +++ b/drivers/dma/qcom/hidma_mgmt.c @@ -12,6 +12,8 @@ #include #include #include +#include +#include #include #include #include From a251994a441ee0a69ba7062c8cd2d08ead3db379 Mon Sep 17 00:00:00 2001 From: Shunsuke Mie Date: Tue, 11 Apr 2023 19:17:57 +0900 Subject: [PATCH 29/57] dmaengine: dw-edma: Fix to change for continuous transfer The dw-edma driver stops after processing a DMA request even if a request remains in the issued queue, which is not the expected behavior. The DMA engine API requires continuous processing. Add a trigger to start after one processing finished if there are requests remain. Fixes: e63d79d1ffcd ("dmaengine: Add Synopsys eDMA IP core driver") Signed-off-by: Shunsuke Mie Link: https://lore.kernel.org/r/20230411101758.438472-1-mie@igel.co.jp Signed-off-by: Vinod Koul --- drivers/dma/dw-edma/dw-edma-core.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/drivers/dma/dw-edma/dw-edma-core.c b/drivers/dma/dw-edma/dw-edma-core.c index 1906a836f0aa..26a395d02f5d 100644 --- a/drivers/dma/dw-edma/dw-edma-core.c +++ b/drivers/dma/dw-edma/dw-edma-core.c @@ -181,7 +181,7 @@ static void vchan_free_desc(struct virt_dma_desc *vdesc) dw_edma_free_desc(vd2dw_edma_desc(vdesc)); } -static void dw_edma_start_transfer(struct dw_edma_chan *chan) +static int dw_edma_start_transfer(struct dw_edma_chan *chan) { struct dw_edma_chunk *child; struct dw_edma_desc *desc; @@ -189,16 +189,16 @@ static void dw_edma_start_transfer(struct dw_edma_chan *chan) vd = vchan_next_desc(&chan->vc); if (!vd) - return; + return 0; desc = vd2dw_edma_desc(vd); if (!desc) - return; + return 0; child = list_first_entry_or_null(&desc->chunk->list, struct dw_edma_chunk, list); if (!child) - return; + return 0; dw_edma_v0_core_start(child, !desc->xfer_sz); desc->xfer_sz += child->ll_region.sz; @@ -206,6 +206,8 @@ static void dw_edma_start_transfer(struct dw_edma_chan *chan) list_del(&child->list); kfree(child); desc->chunks_alloc--; + + return 1; } static void dw_edma_device_caps(struct dma_chan *dchan, @@ -602,14 +604,14 @@ static void dw_edma_done_interrupt(struct dw_edma_chan *chan) switch (chan->request) { case EDMA_REQ_NONE: desc = vd2dw_edma_desc(vd); - if (desc->chunks_alloc) { - chan->status = EDMA_ST_BUSY; - dw_edma_start_transfer(chan); - } else { + if (!desc->chunks_alloc) { list_del(&vd->node); vchan_cookie_complete(vd); - chan->status = EDMA_ST_IDLE; } + + /* Continue transferring if there are remaining chunks or issued requests. + */ + chan->status = dw_edma_start_transfer(chan) ? EDMA_ST_BUSY : EDMA_ST_IDLE; break; case EDMA_REQ_STOP: From 970b17dfe264a9085ba4e593730ecfd496b950ab Mon Sep 17 00:00:00 2001 From: Shunsuke Mie Date: Tue, 11 Apr 2023 19:17:58 +0900 Subject: [PATCH 30/57] dmaengine: dw-edma: Fix to enable to issue dma request on DMA processing The issue_pending request is ignored while driver is processing a DMA request. Fix to issue the pending requests on any dma channel status. Fixes: e63d79d1ffcd ("dmaengine: Add Synopsys eDMA IP core driver") Signed-off-by: Shunsuke Mie Link: https://lore.kernel.org/r/20230411101758.438472-2-mie@igel.co.jp Signed-off-by: Vinod Koul --- drivers/dma/dw-edma/dw-edma-core.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/dma/dw-edma/dw-edma-core.c b/drivers/dma/dw-edma/dw-edma-core.c index 26a395d02f5d..7d2b73ef0872 100644 --- a/drivers/dma/dw-edma/dw-edma-core.c +++ b/drivers/dma/dw-edma/dw-edma-core.c @@ -308,9 +308,12 @@ static void dw_edma_device_issue_pending(struct dma_chan *dchan) struct dw_edma_chan *chan = dchan2dw_edma_chan(dchan); unsigned long flags; + if (!chan->configured) + return; + spin_lock_irqsave(&chan->vc.lock, flags); - if (chan->configured && chan->request == EDMA_REQ_NONE && - chan->status == EDMA_ST_IDLE && vchan_issue_pending(&chan->vc)) { + if (vchan_issue_pending(&chan->vc) && chan->request == EDMA_REQ_NONE && + chan->status == EDMA_ST_IDLE) { chan->status = EDMA_ST_BUSY; dw_edma_start_transfer(chan); } From 790f3c8b8f9f63b1f5a3ffd06630ed3d0df9804c Mon Sep 17 00:00:00 2001 From: Walker Chen Date: Wed, 22 Mar 2023 17:48:18 +0800 Subject: [PATCH 31/57] dmaengine: dw-axi-dmac: Add support for StarFive JH7110 DMA Add DMA reset operation in device probe and use different configuration on CH_CFG registers according to match data. Update all uses of of_device_is_compatible with of_device_get_match_data. Signed-off-by: Walker Chen Reviewed-by: Emil Renner Berthing Link: https://lore.kernel.org/r/20230322094820.24738-3-walker.chen@starfivetech.com Signed-off-by: Vinod Koul --- .../dma/dw-axi-dmac/dw-axi-dmac-platform.c | 38 ++++++++++++++++--- drivers/dma/dw-axi-dmac/dw-axi-dmac.h | 1 + 2 files changed, 34 insertions(+), 5 deletions(-) diff --git a/drivers/dma/dw-axi-dmac/dw-axi-dmac-platform.c b/drivers/dma/dw-axi-dmac/dw-axi-dmac-platform.c index 4169e1d7d5ca..6cfcb541d8c3 100644 --- a/drivers/dma/dw-axi-dmac/dw-axi-dmac-platform.c +++ b/drivers/dma/dw-axi-dmac/dw-axi-dmac-platform.c @@ -21,10 +21,12 @@ #include #include #include +#include #include #include #include #include +#include #include #include @@ -46,6 +48,10 @@ DMA_SLAVE_BUSWIDTH_32_BYTES | \ DMA_SLAVE_BUSWIDTH_64_BYTES) +#define AXI_DMA_FLAG_HAS_APB_REGS BIT(0) +#define AXI_DMA_FLAG_HAS_RESETS BIT(1) +#define AXI_DMA_FLAG_USE_CFG2 BIT(2) + static inline void axi_dma_iowrite32(struct axi_dma_chip *chip, u32 reg, u32 val) { @@ -86,7 +92,8 @@ static inline void axi_chan_config_write(struct axi_dma_chan *chan, cfg_lo = (config->dst_multblk_type << CH_CFG_L_DST_MULTBLK_TYPE_POS | config->src_multblk_type << CH_CFG_L_SRC_MULTBLK_TYPE_POS); - if (chan->chip->dw->hdata->reg_map_8_channels) { + if (chan->chip->dw->hdata->reg_map_8_channels && + !chan->chip->dw->hdata->use_cfg2) { cfg_hi = config->tt_fc << CH_CFG_H_TT_FC_POS | config->hs_sel_src << CH_CFG_H_HS_SEL_SRC_POS | config->hs_sel_dst << CH_CFG_H_HS_SEL_DST_POS | @@ -1367,10 +1374,11 @@ static int parse_device_properties(struct axi_dma_chip *chip) static int dw_probe(struct platform_device *pdev) { - struct device_node *node = pdev->dev.of_node; struct axi_dma_chip *chip; struct dw_axi_dma *dw; struct dw_axi_dma_hcfg *hdata; + struct reset_control *resets; + unsigned int flags; u32 i; int ret; @@ -1398,12 +1406,25 @@ static int dw_probe(struct platform_device *pdev) if (IS_ERR(chip->regs)) return PTR_ERR(chip->regs); - if (of_device_is_compatible(node, "intel,kmb-axi-dma")) { + flags = (uintptr_t)of_device_get_match_data(&pdev->dev); + if (flags & AXI_DMA_FLAG_HAS_APB_REGS) { chip->apb_regs = devm_platform_ioremap_resource(pdev, 1); if (IS_ERR(chip->apb_regs)) return PTR_ERR(chip->apb_regs); } + if (flags & AXI_DMA_FLAG_HAS_RESETS) { + resets = devm_reset_control_array_get_exclusive(&pdev->dev); + if (IS_ERR(resets)) + return PTR_ERR(resets); + + ret = reset_control_deassert(resets); + if (ret) + return ret; + } + + chip->dw->hdata->use_cfg2 = !!(flags & AXI_DMA_FLAG_USE_CFG2); + chip->core_clk = devm_clk_get(chip->dev, "core-clk"); if (IS_ERR(chip->core_clk)) return PTR_ERR(chip->core_clk); @@ -1554,8 +1575,15 @@ static const struct dev_pm_ops dw_axi_dma_pm_ops = { }; static const struct of_device_id dw_dma_of_id_table[] = { - { .compatible = "snps,axi-dma-1.01a" }, - { .compatible = "intel,kmb-axi-dma" }, + { + .compatible = "snps,axi-dma-1.01a" + }, { + .compatible = "intel,kmb-axi-dma", + .data = (void *)AXI_DMA_FLAG_HAS_APB_REGS, + }, { + .compatible = "starfive,jh7110-axi-dma", + .data = (void *)(AXI_DMA_FLAG_HAS_RESETS | AXI_DMA_FLAG_USE_CFG2), + }, {} }; MODULE_DEVICE_TABLE(of, dw_dma_of_id_table); diff --git a/drivers/dma/dw-axi-dmac/dw-axi-dmac.h b/drivers/dma/dw-axi-dmac/dw-axi-dmac.h index e9d5eb0fd594..eb267cb24f67 100644 --- a/drivers/dma/dw-axi-dmac/dw-axi-dmac.h +++ b/drivers/dma/dw-axi-dmac/dw-axi-dmac.h @@ -33,6 +33,7 @@ struct dw_axi_dma_hcfg { /* Register map for DMAX_NUM_CHANNELS <= 8 */ bool reg_map_8_channels; bool restrict_axi_burst_len; + bool use_cfg2; }; struct axi_dma_chan { From ce62432cb8bb56a5fde544d01213e952c3a92f8b Mon Sep 17 00:00:00 2001 From: Walker Chen Date: Wed, 22 Mar 2023 17:48:19 +0800 Subject: [PATCH 32/57] dmaengine: dw-axi-dmac: Increase polling time to DMA transmission completion status The bit DMAC_CHEN[0] is automatically cleared by hardware to disable the channel after the last AMBA transfer of the DMA transfer to the destination has completed. Software can therefore poll this bit to determine when this channel is free for a new DMA transfer. This time requires at least 40 milliseconds on JH7110 SoC, otherwise an error message 'failed to stop' will be reported. Signed-off-by: Walker Chen Link: https://lore.kernel.org/r/20230322094820.24738-4-walker.chen@starfivetech.com Signed-off-by: Vinod Koul --- drivers/dma/dw-axi-dmac/dw-axi-dmac-platform.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/dw-axi-dmac/dw-axi-dmac-platform.c b/drivers/dma/dw-axi-dmac/dw-axi-dmac-platform.c index 6cfcb541d8c3..6937cc0c0b65 100644 --- a/drivers/dma/dw-axi-dmac/dw-axi-dmac-platform.c +++ b/drivers/dma/dw-axi-dmac/dw-axi-dmac-platform.c @@ -1147,7 +1147,7 @@ static int dma_chan_terminate_all(struct dma_chan *dchan) axi_chan_disable(chan); ret = readl_poll_timeout_atomic(chan->chip->regs + DMAC_CHEN, val, - !(val & chan_active), 1000, 10000); + !(val & chan_active), 1000, 50000); if (ret == -ETIMEDOUT) dev_warn(dchan2dev(dchan), "%s failed to stop\n", axi_chan_name(chan)); From 2de5ddb5e68c94b781b3789bca1ce52000d7d0e0 Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Tue, 14 Feb 2023 17:18:21 +0200 Subject: [PATCH 33/57] dmaengine: at_xdmac: disable/enable clock directly on suspend/resume Runtime PM APIs for at_xdmac just plays with clk_enable()/clk_disable() letting aside the clk_prepare()/clk_unprepare() that needs to be executed as the clock is also prepared on probe. Thus instead of using runtime PM force suspend/resume APIs use clk_disable_unprepare() + pm_runtime_put_noidle() on suspend and clk_prepare_enable() + pm_runtime_get_noresume() on resume. This approach as been chosen instead of using runtime PM force suspend/resume with clk_unprepare()/clk_prepare() as it looks simpler and the final code is better. While at it added the missing pm_runtime_mark_last_busy() on suspend before decrementing the reference counter. Fixes: 650b0e990cbd ("dmaengine: at_xdmac: add runtime pm support") Signed-off-by: Claudiu Beznea Link: https://lore.kernel.org/r/20230214151827.1050280-2-claudiu.beznea@microchip.com Signed-off-by: Vinod Koul --- drivers/dma/at_xdmac.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/dma/at_xdmac.c b/drivers/dma/at_xdmac.c index 1f0fab180f8f..f654ecaafb90 100644 --- a/drivers/dma/at_xdmac.c +++ b/drivers/dma/at_xdmac.c @@ -2130,7 +2130,11 @@ static int __maybe_unused atmel_xdmac_suspend(struct device *dev) atxdmac->save_gim = at_xdmac_read(atxdmac, AT_XDMAC_GIM); at_xdmac_off(atxdmac); - return pm_runtime_force_suspend(atxdmac->dev); + pm_runtime_mark_last_busy(atxdmac->dev); + pm_runtime_put_noidle(atxdmac->dev); + clk_disable_unprepare(atxdmac->clk); + + return 0; } static int __maybe_unused atmel_xdmac_resume(struct device *dev) @@ -2142,10 +2146,12 @@ static int __maybe_unused atmel_xdmac_resume(struct device *dev) int i; int ret; - ret = pm_runtime_force_resume(atxdmac->dev); - if (ret < 0) + ret = clk_prepare_enable(atxdmac->clk); + if (ret) return ret; + pm_runtime_get_noresume(atxdmac->dev); + at_xdmac_axi_config(pdev); /* Clear pending interrupts. */ From e53957e1ec5196671e49a48f90a5c9555153189a Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Tue, 14 Feb 2023 17:18:22 +0200 Subject: [PATCH 34/57] dmaengine: at_xdmac: fix imbalanced runtime PM reference counter In case there are channels not paused during suspend (which on AT91 case is valid for serial driver when no_console_suspend boot argument is used) the at_xdmac_runtime_suspend_descriptors() was called more than one time due to at_xdmac_off(). To fix this add a new argument to at_xdmac_off() to specify if runtime PM reference counter needs to be decremented for queued active descriptors. Along with it moved the at_xdmac_runtime_suspend_descriptors() call under at_xdmac_chan_is_paused() check on suspend path as for the rest of channels the suspend is delayed by atmel_xdmac_prepare() in case channel is enabled. Same approach has been applied on resume path. Fixes: 650b0e990cbd ("dmaengine: at_xdmac: add runtime pm support") Signed-off-by: Claudiu Beznea Link: https://lore.kernel.org/r/20230214151827.1050280-3-claudiu.beznea@microchip.com Signed-off-by: Vinod Koul --- drivers/dma/at_xdmac.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/drivers/dma/at_xdmac.c b/drivers/dma/at_xdmac.c index f654ecaafb90..af3b494f9ba9 100644 --- a/drivers/dma/at_xdmac.c +++ b/drivers/dma/at_xdmac.c @@ -412,7 +412,7 @@ static bool at_xdmac_chan_is_enabled(struct at_xdmac_chan *atchan) return ret; } -static void at_xdmac_off(struct at_xdmac *atxdmac) +static void at_xdmac_off(struct at_xdmac *atxdmac, bool suspend_descriptors) { struct dma_chan *chan, *_chan; struct at_xdmac_chan *atchan; @@ -431,7 +431,7 @@ static void at_xdmac_off(struct at_xdmac *atxdmac) at_xdmac_write(atxdmac, AT_XDMAC_GID, -1L); /* Decrement runtime PM ref counter for each active descriptor. */ - if (!list_empty(&atxdmac->dma.channels)) { + if (!list_empty(&atxdmac->dma.channels) && suspend_descriptors) { list_for_each_entry_safe(chan, _chan, &atxdmac->dma.channels, device_node) { atchan = to_at_xdmac_chan(chan); @@ -2118,18 +2118,18 @@ static int __maybe_unused atmel_xdmac_suspend(struct device *dev) atchan->save_cc = at_xdmac_chan_read(atchan, AT_XDMAC_CC); if (at_xdmac_chan_is_cyclic(atchan)) { - if (!at_xdmac_chan_is_paused(atchan)) + if (!at_xdmac_chan_is_paused(atchan)) { at_xdmac_device_pause(chan); + at_xdmac_runtime_suspend_descriptors(atchan); + } atchan->save_cim = at_xdmac_chan_read(atchan, AT_XDMAC_CIM); atchan->save_cnda = at_xdmac_chan_read(atchan, AT_XDMAC_CNDA); atchan->save_cndc = at_xdmac_chan_read(atchan, AT_XDMAC_CNDC); } - - at_xdmac_runtime_suspend_descriptors(atchan); } atxdmac->save_gim = at_xdmac_read(atxdmac, AT_XDMAC_GIM); - at_xdmac_off(atxdmac); + at_xdmac_off(atxdmac, false); pm_runtime_mark_last_busy(atxdmac->dev); pm_runtime_put_noidle(atxdmac->dev); clk_disable_unprepare(atxdmac->clk); @@ -2165,14 +2165,14 @@ static int __maybe_unused atmel_xdmac_resume(struct device *dev) list_for_each_entry_safe(chan, _chan, &atxdmac->dma.channels, device_node) { atchan = to_at_xdmac_chan(chan); - ret = at_xdmac_runtime_resume_descriptors(atchan); - if (ret < 0) - return ret; - at_xdmac_chan_write(atchan, AT_XDMAC_CC, atchan->save_cc); if (at_xdmac_chan_is_cyclic(atchan)) { - if (at_xdmac_chan_is_paused(atchan)) + if (at_xdmac_chan_is_paused(atchan)) { + ret = at_xdmac_runtime_resume_descriptors(atchan); + if (ret < 0) + return ret; at_xdmac_device_resume(chan); + } at_xdmac_chan_write(atchan, AT_XDMAC_CNDA, atchan->save_cnda); at_xdmac_chan_write(atchan, AT_XDMAC_CNDC, atchan->save_cndc); at_xdmac_chan_write(atchan, AT_XDMAC_CIE, atchan->save_cim); @@ -2318,7 +2318,7 @@ static int at_xdmac_probe(struct platform_device *pdev) INIT_LIST_HEAD(&atxdmac->dma.channels); /* Disable all chans and interrupts. */ - at_xdmac_off(atxdmac); + at_xdmac_off(atxdmac, true); for (i = 0; i < nr_channels; i++) { struct at_xdmac_chan *atchan = &atxdmac->chan[i]; @@ -2382,7 +2382,7 @@ static int at_xdmac_remove(struct platform_device *pdev) struct at_xdmac *atxdmac = (struct at_xdmac *)platform_get_drvdata(pdev); int i; - at_xdmac_off(atxdmac); + at_xdmac_off(atxdmac, true); of_dma_controller_free(pdev->dev.of_node); dma_async_device_unregister(&atxdmac->dma); pm_runtime_disable(atxdmac->dev); From 44fe8440bda545b5d167329df88c47609a645168 Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Tue, 14 Feb 2023 17:18:23 +0200 Subject: [PATCH 35/57] dmaengine: at_xdmac: do not resume channels paused by consumers In case there are DMA channels not paused by consumers in suspend process (valid on AT91 SoCs for serial driver when no_console_suspend) the driver pauses them (using at_xdmac_device_pause() which is also the same function called by dmaengine_pause()) and then in the resume process the driver resumes them calling at_xdmac_device_resume() which is the same function called by dmaengine_resume()). This is good for DMA channels not paused by consumers but for drivers that calls dmaengine_pause()/dmaegine_resume() on suspend/resume path this may lead to DMA channel being enabled before the IP is enabled. For IPs that needs strict ordering with regards to DMA channel enablement this will lead to wrong behavior. To fix this add a new set of functions at_xdmac_device_pause_internal()/at_xdmac_device_resume_internal() to be called only on suspend/resume. Fixes: e1f7c9eee707 ("dmaengine: at_xdmac: creation of the atmel eXtended DMA Controller driver") Signed-off-by: Claudiu Beznea Link: https://lore.kernel.org/r/20230214151827.1050280-4-claudiu.beznea@microchip.com Signed-off-by: Vinod Koul --- drivers/dma/at_xdmac.c | 52 ++++++++++++++++++++++++++++++++++++------ 1 file changed, 45 insertions(+), 7 deletions(-) diff --git a/drivers/dma/at_xdmac.c b/drivers/dma/at_xdmac.c index af3b494f9ba9..fa1e2e0da02f 100644 --- a/drivers/dma/at_xdmac.c +++ b/drivers/dma/at_xdmac.c @@ -187,6 +187,7 @@ enum atc_status { AT_XDMAC_CHAN_IS_CYCLIC = 0, AT_XDMAC_CHAN_IS_PAUSED, + AT_XDMAC_CHAN_IS_PAUSED_INTERNAL, }; struct at_xdmac_layout { @@ -347,6 +348,11 @@ static inline int at_xdmac_chan_is_paused(struct at_xdmac_chan *atchan) return test_bit(AT_XDMAC_CHAN_IS_PAUSED, &atchan->status); } +static inline int at_xdmac_chan_is_paused_internal(struct at_xdmac_chan *atchan) +{ + return test_bit(AT_XDMAC_CHAN_IS_PAUSED_INTERNAL, &atchan->status); +} + static inline bool at_xdmac_chan_is_peripheral_xfer(u32 cfg) { return cfg & AT_XDMAC_CC_TYPE_PER_TRAN; @@ -1898,6 +1904,26 @@ static int at_xdmac_device_config(struct dma_chan *chan, return ret; } +static void at_xdmac_device_pause_set(struct at_xdmac *atxdmac, + struct at_xdmac_chan *atchan) +{ + at_xdmac_write(atxdmac, atxdmac->layout->grws, atchan->mask); + while (at_xdmac_chan_read(atchan, AT_XDMAC_CC) & + (AT_XDMAC_CC_WRIP | AT_XDMAC_CC_RDIP)) + cpu_relax(); +} + +static void at_xdmac_device_pause_internal(struct at_xdmac_chan *atchan) +{ + struct at_xdmac *atxdmac = to_at_xdmac(atchan->chan.device); + unsigned long flags; + + spin_lock_irqsave(&atchan->lock, flags); + set_bit(AT_XDMAC_CHAN_IS_PAUSED_INTERNAL, &atchan->status); + at_xdmac_device_pause_set(atxdmac, atchan); + spin_unlock_irqrestore(&atchan->lock, flags); +} + static int at_xdmac_device_pause(struct dma_chan *chan) { struct at_xdmac_chan *atchan = to_at_xdmac_chan(chan); @@ -1915,11 +1941,8 @@ static int at_xdmac_device_pause(struct dma_chan *chan) return ret; spin_lock_irqsave(&atchan->lock, flags); - at_xdmac_write(atxdmac, atxdmac->layout->grws, atchan->mask); - while (at_xdmac_chan_read(atchan, AT_XDMAC_CC) - & (AT_XDMAC_CC_WRIP | AT_XDMAC_CC_RDIP)) - cpu_relax(); + at_xdmac_device_pause_set(atxdmac, atchan); /* Decrement runtime PM ref counter for each active descriptor. */ at_xdmac_runtime_suspend_descriptors(atchan); @@ -1931,6 +1954,17 @@ static int at_xdmac_device_pause(struct dma_chan *chan) return 0; } +static void at_xdmac_device_resume_internal(struct at_xdmac_chan *atchan) +{ + struct at_xdmac *atxdmac = to_at_xdmac(atchan->chan.device); + unsigned long flags; + + spin_lock_irqsave(&atchan->lock, flags); + at_xdmac_write(atxdmac, atxdmac->layout->grwr, atchan->mask); + clear_bit(AT_XDMAC_CHAN_IS_PAUSED_INTERNAL, &atchan->status); + spin_unlock_irqrestore(&atchan->lock, flags); +} + static int at_xdmac_device_resume(struct dma_chan *chan) { struct at_xdmac_chan *atchan = to_at_xdmac_chan(chan); @@ -2119,7 +2153,7 @@ static int __maybe_unused atmel_xdmac_suspend(struct device *dev) atchan->save_cc = at_xdmac_chan_read(atchan, AT_XDMAC_CC); if (at_xdmac_chan_is_cyclic(atchan)) { if (!at_xdmac_chan_is_paused(atchan)) { - at_xdmac_device_pause(chan); + at_xdmac_device_pause_internal(atchan); at_xdmac_runtime_suspend_descriptors(atchan); } atchan->save_cim = at_xdmac_chan_read(atchan, AT_XDMAC_CIM); @@ -2167,11 +2201,15 @@ static int __maybe_unused atmel_xdmac_resume(struct device *dev) at_xdmac_chan_write(atchan, AT_XDMAC_CC, atchan->save_cc); if (at_xdmac_chan_is_cyclic(atchan)) { - if (at_xdmac_chan_is_paused(atchan)) { + /* + * Resume only channels not explicitly paused by + * consumers. + */ + if (at_xdmac_chan_is_paused_internal(atchan)) { ret = at_xdmac_runtime_resume_descriptors(atchan); if (ret < 0) return ret; - at_xdmac_device_resume(chan); + at_xdmac_device_resume_internal(atchan); } at_xdmac_chan_write(atchan, AT_XDMAC_CNDA, atchan->save_cnda); at_xdmac_chan_write(atchan, AT_XDMAC_CNDC, atchan->save_cndc); From 7c5eb63d16b01c202aaa95f374ae15a807745a73 Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Tue, 14 Feb 2023 17:18:24 +0200 Subject: [PATCH 36/57] dmaengine: at_xdmac: restore the content of grws register In case the system suspends to a deep sleep state where power to DMA controller is cut-off we need to restore the content of GRWS register. This is a write only register and writing bit X tells the controller to suspend read and write requests for channel X. Thus set GRWS before restoring the content of GE (Global Enable) regiter. Fixes: e1f7c9eee707 ("dmaengine: at_xdmac: creation of the atmel eXtended DMA Controller driver") Signed-off-by: Claudiu Beznea Link: https://lore.kernel.org/r/20230214151827.1050280-5-claudiu.beznea@microchip.com Signed-off-by: Vinod Koul --- drivers/dma/at_xdmac.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/dma/at_xdmac.c b/drivers/dma/at_xdmac.c index fa1e2e0da02f..34c004a4b23c 100644 --- a/drivers/dma/at_xdmac.c +++ b/drivers/dma/at_xdmac.c @@ -2211,6 +2211,15 @@ static int __maybe_unused atmel_xdmac_resume(struct device *dev) return ret; at_xdmac_device_resume_internal(atchan); } + + /* + * We may resume from a deep sleep state where power + * to DMA controller is cut-off. Thus, restore the + * suspend state of channels set though dmaengine API. + */ + else if (at_xdmac_chan_is_paused(atchan)) + at_xdmac_device_pause_set(atxdmac, atchan); + at_xdmac_chan_write(atchan, AT_XDMAC_CNDA, atchan->save_cnda); at_xdmac_chan_write(atchan, AT_XDMAC_CNDC, atchan->save_cndc); at_xdmac_chan_write(atchan, AT_XDMAC_CIE, atchan->save_cim); From f8435befd81dd85b7b610598551fadf675849bc1 Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Tue, 14 Feb 2023 17:18:25 +0200 Subject: [PATCH 37/57] dmaengine: at_xdmac: do not enable all cyclic channels Do not global enable all the cyclic channels in at_xdmac_resume(). Instead save the global status in at_xdmac_suspend() and re-enable the cyclic channel only if it was active before suspend. Fixes: e1f7c9eee707 ("dmaengine: at_xdmac: creation of the atmel eXtended DMA Controller driver") Signed-off-by: Claudiu Beznea Link: https://lore.kernel.org/r/20230214151827.1050280-6-claudiu.beznea@microchip.com Signed-off-by: Vinod Koul --- drivers/dma/at_xdmac.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/dma/at_xdmac.c b/drivers/dma/at_xdmac.c index 34c004a4b23c..96f1b69f8a75 100644 --- a/drivers/dma/at_xdmac.c +++ b/drivers/dma/at_xdmac.c @@ -246,6 +246,7 @@ struct at_xdmac { int irq; struct clk *clk; u32 save_gim; + u32 save_gs; struct dma_pool *at_xdmac_desc_pool; const struct at_xdmac_layout *layout; struct at_xdmac_chan chan[]; @@ -2162,6 +2163,7 @@ static int __maybe_unused atmel_xdmac_suspend(struct device *dev) } } atxdmac->save_gim = at_xdmac_read(atxdmac, AT_XDMAC_GIM); + atxdmac->save_gs = at_xdmac_read(atxdmac, AT_XDMAC_GS); at_xdmac_off(atxdmac, false); pm_runtime_mark_last_busy(atxdmac->dev); @@ -2224,7 +2226,8 @@ static int __maybe_unused atmel_xdmac_resume(struct device *dev) at_xdmac_chan_write(atchan, AT_XDMAC_CNDC, atchan->save_cndc); at_xdmac_chan_write(atchan, AT_XDMAC_CIE, atchan->save_cim); wmb(); - at_xdmac_write(atxdmac, AT_XDMAC_GE, atchan->mask); + if (atxdmac->save_gs & atchan->mask) + at_xdmac_write(atxdmac, AT_XDMAC_GE, atchan->mask); } } From 5056eae6c32d709bab506ca032ac4f7effc9db2f Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Tue, 14 Feb 2023 17:18:26 +0200 Subject: [PATCH 38/57] dmaengine: at_xdmac: add a warning message regarding for unpaused channels Add a warning message on suspend to let the user that there are channels not paused by their consumers. Signed-off-by: Claudiu Beznea Link: https://lore.kernel.org/r/20230214151827.1050280-7-claudiu.beznea@microchip.com Signed-off-by: Vinod Koul --- drivers/dma/at_xdmac.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/dma/at_xdmac.c b/drivers/dma/at_xdmac.c index 96f1b69f8a75..7ff6ca01e0b5 100644 --- a/drivers/dma/at_xdmac.c +++ b/drivers/dma/at_xdmac.c @@ -2154,6 +2154,8 @@ static int __maybe_unused atmel_xdmac_suspend(struct device *dev) atchan->save_cc = at_xdmac_chan_read(atchan, AT_XDMAC_CC); if (at_xdmac_chan_is_cyclic(atchan)) { if (!at_xdmac_chan_is_paused(atchan)) { + dev_warn(chan2dev(chan), "%s: channel %d not paused\n", + __func__, chan->chan_id); at_xdmac_device_pause_internal(atchan); at_xdmac_runtime_suspend_descriptors(atchan); } From 09ebe227c2236c6d0ce427ce10da3e4d66946f57 Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Tue, 14 Feb 2023 17:18:27 +0200 Subject: [PATCH 39/57] dmaengine: at_xdmac: align declaration of ret with the rest of variables Align the declaration of ret in atmel_xdmac_resume() with the rest of variables. Do this by adding ret to the line with declaration for i variable. Signed-off-by: Claudiu Beznea Link: https://lore.kernel.org/r/20230214151827.1050280-8-claudiu.beznea@microchip.com Signed-off-by: Vinod Koul --- drivers/dma/at_xdmac.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/dma/at_xdmac.c b/drivers/dma/at_xdmac.c index 7ff6ca01e0b5..7da6d9b6098e 100644 --- a/drivers/dma/at_xdmac.c +++ b/drivers/dma/at_xdmac.c @@ -2181,8 +2181,7 @@ static int __maybe_unused atmel_xdmac_resume(struct device *dev) struct at_xdmac_chan *atchan; struct dma_chan *chan, *_chan; struct platform_device *pdev = container_of(dev, struct platform_device, dev); - int i; - int ret; + int i, ret; ret = clk_prepare_enable(atxdmac->clk); if (ret) From 894abe0dcfe2ad76dac25691264a85fdc2d524c3 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Fri, 31 Mar 2023 13:21:41 -0500 Subject: [PATCH 40/57] dt-bindings: dma: Drop unneeded quotes Cleanup bindings dropping unneeded quotes. Once all these are fixed, checking for this can be enabled in yamllint. Signed-off-by: Rob Herring Acked-by: Peter Ujfalusi Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20230331182141.1900348-1-robh@kernel.org Signed-off-by: Vinod Koul --- Documentation/devicetree/bindings/dma/ti/k3-udma.yaml | 2 +- .../devicetree/bindings/dma/xilinx/xlnx,zynqmp-dma-1.0.yaml | 2 +- .../devicetree/bindings/dma/xilinx/xlnx,zynqmp-dpdma.yaml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Documentation/devicetree/bindings/dma/ti/k3-udma.yaml b/Documentation/devicetree/bindings/dma/ti/k3-udma.yaml index 97f6ae9b1236..22f6c5e2f7f4 100644 --- a/Documentation/devicetree/bindings/dma/ti/k3-udma.yaml +++ b/Documentation/devicetree/bindings/dma/ti/k3-udma.yaml @@ -43,7 +43,7 @@ description: | configuration of the legacy peripheral. allOf: - - $ref: "../dma-controller.yaml#" + - $ref: ../dma-controller.yaml# - $ref: /schemas/arm/keystone/ti,k3-sci-common.yaml# properties: diff --git a/Documentation/devicetree/bindings/dma/xilinx/xlnx,zynqmp-dma-1.0.yaml b/Documentation/devicetree/bindings/dma/xilinx/xlnx,zynqmp-dma-1.0.yaml index c0a1408b12ec..23ada8f87526 100644 --- a/Documentation/devicetree/bindings/dma/xilinx/xlnx,zynqmp-dma-1.0.yaml +++ b/Documentation/devicetree/bindings/dma/xilinx/xlnx,zynqmp-dma-1.0.yaml @@ -15,7 +15,7 @@ maintainers: - Michael Tretter allOf: - - $ref: "../dma-controller.yaml#" + - $ref: ../dma-controller.yaml# properties: "#dma-cells": diff --git a/Documentation/devicetree/bindings/dma/xilinx/xlnx,zynqmp-dpdma.yaml b/Documentation/devicetree/bindings/dma/xilinx/xlnx,zynqmp-dpdma.yaml index 825294e3f0e8..d6cbd95ec26d 100644 --- a/Documentation/devicetree/bindings/dma/xilinx/xlnx,zynqmp-dpdma.yaml +++ b/Documentation/devicetree/bindings/dma/xilinx/xlnx,zynqmp-dpdma.yaml @@ -16,7 +16,7 @@ maintainers: - Laurent Pinchart allOf: - - $ref: "../dma-controller.yaml#" + - $ref: ../dma-controller.yaml# properties: "#dma-cells": From c9566127f021c891e14c937b1b711874b7880e48 Mon Sep 17 00:00:00 2001 From: Walker Chen Date: Wed, 22 Mar 2023 17:48:17 +0800 Subject: [PATCH 41/57] dt-bindings: dma: snps,dw-axi-dmac: constrain the items of resets for JH7110 dma The DMA controller needs two reset items to work properly on JH7110 SoC, so there is need to constrain the items' value to 2, other platforms have 1 reset item at most. Reviewed-by: Rob Herring Signed-off-by: Walker Chen Link: https://lore.kernel.org/r/20230322094820.24738-2-walker.chen@starfivetech.com Signed-off-by: Vinod Koul --- .../bindings/dma/snps,dw-axi-dmac.yaml | 23 ++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/dma/snps,dw-axi-dmac.yaml b/Documentation/devicetree/bindings/dma/snps,dw-axi-dmac.yaml index 5c81194e2300..363cf8bd150d 100644 --- a/Documentation/devicetree/bindings/dma/snps,dw-axi-dmac.yaml +++ b/Documentation/devicetree/bindings/dma/snps,dw-axi-dmac.yaml @@ -20,6 +20,7 @@ properties: enum: - snps,axi-dma-1.01a - intel,kmb-axi-dma + - starfive,jh7110-axi-dma reg: minItems: 1 @@ -58,7 +59,8 @@ properties: maximum: 8 resets: - maxItems: 1 + minItems: 1 + maxItems: 2 snps,dma-masters: description: | @@ -109,6 +111,25 @@ required: - snps,priority - snps,block-size +if: + properties: + compatible: + contains: + enum: + - starfive,jh7110-axi-dma +then: + properties: + resets: + minItems: 2 + items: + - description: AXI reset line + - description: AHB reset line + - description: module reset +else: + properties: + resets: + maxItems: 1 + additionalProperties: false examples: From 0c40bfb4c2dfad00a15337bb6213f92a797e3695 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Fri, 7 Apr 2023 13:31:28 -0700 Subject: [PATCH 42/57] dmaengine: idxd: make misc interrupt one shot Current code continuously processes the interrupt as long as the hardware is setting the status bit. There's no reason to do that since the threaded handler will get called again if another interrupt is asserted. Also through testing, it has shown that if a misprogrammed (or malicious) agent can continuously submit descriptors with bad completion record and causes errors to be reported via the misc interrupt. Continuous processing by the thread can cause software hang watchdog to kick off since the thread isn't giving up the CPU. Reported-by: Sanjay Kumar Tested-by: Tony Zhu Signed-off-by: Dave Jiang Co-developed-by: Fenghua Yu Signed-off-by: Fenghua Yu Link: https://lore.kernel.org/r/20230407203143.2189681-2-fenghua.yu@intel.com Signed-off-by: Vinod Koul --- drivers/dma/idxd/irq.c | 38 ++++++++++++-------------------------- 1 file changed, 12 insertions(+), 26 deletions(-) diff --git a/drivers/dma/idxd/irq.c b/drivers/dma/idxd/irq.c index aa314ebec587..0d639303b515 100644 --- a/drivers/dma/idxd/irq.c +++ b/drivers/dma/idxd/irq.c @@ -217,13 +217,22 @@ static void idxd_int_handle_revoke(struct work_struct *work) kfree(revoke); } -static int process_misc_interrupts(struct idxd_device *idxd, u32 cause) +irqreturn_t idxd_misc_thread(int vec, void *data) { + struct idxd_irq_entry *irq_entry = data; + struct idxd_device *idxd = ie_to_idxd(irq_entry); struct device *dev = &idxd->pdev->dev; union gensts_reg gensts; u32 val = 0; int i; bool err = false; + u32 cause; + + cause = ioread32(idxd->reg_base + IDXD_INTCAUSE_OFFSET); + if (!cause) + return IRQ_NONE; + + iowrite32(cause, idxd->reg_base + IDXD_INTCAUSE_OFFSET); if (cause & IDXD_INTC_HALT_STATE) goto halt; @@ -301,7 +310,7 @@ static int process_misc_interrupts(struct idxd_device *idxd, u32 cause) val); if (!err) - return 0; + goto out; halt: gensts.bits = ioread32(idxd->reg_base + IDXD_GENSTATS_OFFSET); @@ -324,33 +333,10 @@ halt: "idxd halted, need %s.\n", gensts.reset_type == IDXD_DEVICE_RESET_FLR ? "FLR" : "system reset"); - return -ENXIO; } } - return 0; -} - -irqreturn_t idxd_misc_thread(int vec, void *data) -{ - struct idxd_irq_entry *irq_entry = data; - struct idxd_device *idxd = ie_to_idxd(irq_entry); - int rc; - u32 cause; - - cause = ioread32(idxd->reg_base + IDXD_INTCAUSE_OFFSET); - if (cause) - iowrite32(cause, idxd->reg_base + IDXD_INTCAUSE_OFFSET); - - while (cause) { - rc = process_misc_interrupts(idxd, cause); - if (rc < 0) - break; - cause = ioread32(idxd->reg_base + IDXD_INTCAUSE_OFFSET); - if (cause) - iowrite32(cause, idxd->reg_base + IDXD_INTCAUSE_OFFSET); - } - +out: return IRQ_HANDLED; } From 1649091f9180470f96f001724a4902d5d82bbd75 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Fri, 7 Apr 2023 13:31:29 -0700 Subject: [PATCH 43/57] dmaengine: idxd: add event log size sysfs attribute Add support for changing of the event log size. Event log is a feature added to DSA 2.0 hardware to improve error reporting. It supersedes the SWERROR register on DSA 1.0 hardware and hope to prevent loss of reported errors. The error log size determines how many error entries supported for the device. It can be configured by the user via sysfs attribute. Tested-by: Tony Zhu Signed-off-by: Dave Jiang Co-developed-by: Fenghua Yu Signed-off-by: Fenghua Yu Link: https://lore.kernel.org/r/20230407203143.2189681-3-fenghua.yu@intel.com Signed-off-by: Vinod Koul --- .../ABI/stable/sysfs-driver-dma-idxd | 8 +++ drivers/dma/idxd/idxd.h | 5 ++ drivers/dma/idxd/init.c | 23 ++++++++ drivers/dma/idxd/registers.h | 7 ++- drivers/dma/idxd/sysfs.c | 52 +++++++++++++++++++ 5 files changed, 94 insertions(+), 1 deletion(-) diff --git a/Documentation/ABI/stable/sysfs-driver-dma-idxd b/Documentation/ABI/stable/sysfs-driver-dma-idxd index d5e3dd3d8434..e01916611452 100644 --- a/Documentation/ABI/stable/sysfs-driver-dma-idxd +++ b/Documentation/ABI/stable/sysfs-driver-dma-idxd @@ -144,6 +144,14 @@ Description: IAA (IAX) capability mask. Exported to user space for application consumption. This attribute should only be visible on IAA devices that are version 2 or later. +What: /sys/bus/dsa/devices/dsa/event_log_size +Date: Sept 14, 2022 +KernelVersion: 6.4.0 +Contact: dmaengine@vger.kernel.org +Description: The event log size to be configured. Default is 64 entries and + occupies 4k size if the evl entry is 64 bytes. It's visible + only on platforms that support the capability. + What: /sys/bus/dsa/devices/wq./block_on_fault Date: Oct 27, 2020 KernelVersion: 5.11.0 diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h index eca2c9d76db6..2a71273f1822 100644 --- a/drivers/dma/idxd/idxd.h +++ b/drivers/dma/idxd/idxd.h @@ -261,6 +261,10 @@ struct idxd_driver_data { int align; }; +struct idxd_evl { + u16 size; +}; + struct idxd_device { struct idxd_dev idxd_dev; struct idxd_driver_data *data; @@ -317,6 +321,7 @@ struct idxd_device { struct idxd_pmu *idxd_pmu; unsigned long *opcap_bmap; + struct idxd_evl *evl; }; /* IDXD software descriptor */ diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index a78cfd361c04..525de59df82a 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -331,6 +331,23 @@ static void idxd_cleanup_internals(struct idxd_device *idxd) destroy_workqueue(idxd->wq); } +static int idxd_init_evl(struct idxd_device *idxd) +{ + struct device *dev = &idxd->pdev->dev; + struct idxd_evl *evl; + + if (idxd->hw.gen_cap.evl_support == 0) + return 0; + + evl = kzalloc_node(sizeof(*evl), GFP_KERNEL, dev_to_node(dev)); + if (!evl) + return -ENOMEM; + + evl->size = IDXD_EVL_SIZE_MIN; + idxd->evl = evl; + return 0; +} + static int idxd_setup_internals(struct idxd_device *idxd) { struct device *dev = &idxd->pdev->dev; @@ -356,8 +373,14 @@ static int idxd_setup_internals(struct idxd_device *idxd) goto err_wkq_create; } + rc = idxd_init_evl(idxd); + if (rc < 0) + goto err_evl; + return 0; + err_evl: + destroy_workqueue(idxd->wq); err_wkq_create: for (i = 0; i < idxd->max_groups; i++) put_device(group_confdev(idxd->groups[i])); diff --git a/drivers/dma/idxd/registers.h b/drivers/dma/idxd/registers.h index 338289a66f00..ea3a499a3c3c 100644 --- a/drivers/dma/idxd/registers.h +++ b/drivers/dma/idxd/registers.h @@ -31,7 +31,9 @@ union gen_cap_reg { u64 rsvd:3; u64 dest_readback:1; u64 drain_readback:1; - u64 rsvd2:6; + u64 rsvd2:3; + u64 evl_support:2; + u64 rsvd4:1; u64 max_xfer_shift:5; u64 max_batch_shift:4; u64 max_ims_mult:6; @@ -297,6 +299,9 @@ union iaa_cap_reg { #define IDXD_IAACAP_OFFSET 0x180 +#define IDXD_EVL_SIZE_MIN 0x0040 +#define IDXD_EVL_SIZE_MAX 0xffff + union msix_perm { struct { u32 rsvd:2; diff --git a/drivers/dma/idxd/sysfs.c b/drivers/dma/idxd/sysfs.c index 2eba8cab25a1..85644e5bde83 100644 --- a/drivers/dma/idxd/sysfs.c +++ b/drivers/dma/idxd/sysfs.c @@ -1573,6 +1573,46 @@ static ssize_t iaa_cap_show(struct device *dev, } static DEVICE_ATTR_RO(iaa_cap); +static ssize_t event_log_size_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct idxd_device *idxd = confdev_to_idxd(dev); + + if (!idxd->evl) + return -EOPNOTSUPP; + + return sysfs_emit(buf, "%u\n", idxd->evl->size); +} + +static ssize_t event_log_size_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct idxd_device *idxd = confdev_to_idxd(dev); + unsigned long val; + int rc; + + if (!idxd->evl) + return -EOPNOTSUPP; + + rc = kstrtoul(buf, 10, &val); + if (rc < 0) + return -EINVAL; + + if (idxd->state == IDXD_DEV_ENABLED) + return -EPERM; + + if (!test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags)) + return -EPERM; + + if (val < IDXD_EVL_SIZE_MIN || val > IDXD_EVL_SIZE_MAX) + return -EINVAL; + + idxd->evl->size = val; + return count; +} +static DEVICE_ATTR_RW(event_log_size); + static bool idxd_device_attr_max_batch_size_invisible(struct attribute *attr, struct idxd_device *idxd) { @@ -1603,6 +1643,13 @@ static bool idxd_device_attr_iaa_cap_invisible(struct attribute *attr, idxd->hw.version < DEVICE_VERSION_2); } +static bool idxd_device_attr_event_log_size_invisible(struct attribute *attr, + struct idxd_device *idxd) +{ + return (attr == &dev_attr_event_log_size.attr && + !idxd->hw.gen_cap.evl_support); +} + static umode_t idxd_device_attr_visible(struct kobject *kobj, struct attribute *attr, int n) { @@ -1618,6 +1665,9 @@ static umode_t idxd_device_attr_visible(struct kobject *kobj, if (idxd_device_attr_iaa_cap_invisible(attr, idxd)) return 0; + if (idxd_device_attr_event_log_size_invisible(attr, idxd)) + return 0; + return attr->mode; } @@ -1644,6 +1694,7 @@ static struct attribute *idxd_device_attributes[] = { &dev_attr_cdev_major.attr, &dev_attr_cmd_status.attr, &dev_attr_iaa_cap.attr, + &dev_attr_event_log_size.attr, NULL, }; @@ -1665,6 +1716,7 @@ static void idxd_conf_device_release(struct device *dev) bitmap_free(idxd->wq_enable_map); kfree(idxd->wqs); kfree(idxd->engines); + kfree(idxd->evl); ida_free(&idxd_ida, idxd->id); bitmap_free(idxd->opcap_bmap); kfree(idxd); From 244da66cda359227d80ccb41dbcb99da40eae186 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Fri, 7 Apr 2023 13:31:30 -0700 Subject: [PATCH 44/57] dmaengine: idxd: setup event log configuration Add setup of event log feature for supported device. Event log addresses error reporting that was lacking in gen 1 DSA devices where a second error event does not get reported when a first event is pending software handling. The event log allows a circular buffer that the device can push error events to. It is up to the user to create a large enough event log ring in order to capture the expected events. The evl size can be set in the device sysfs attribute. By default 64 entries are supported as minimal when event log is enabled. Tested-by: Tony Zhu Signed-off-by: Dave Jiang Co-developed-by: Fenghua Yu Signed-off-by: Fenghua Yu Link: https://lore.kernel.org/r/20230407203143.2189681-4-fenghua.yu@intel.com Signed-off-by: Vinod Koul --- drivers/dma/idxd/device.c | 89 +++++++++++++++++++++++++++++++++++- drivers/dma/idxd/idxd.h | 19 ++++++++ drivers/dma/idxd/init.c | 1 + drivers/dma/idxd/registers.h | 72 ++++++++++++++++++++++++++++- drivers/dma/idxd/sysfs.c | 3 +- include/uapi/linux/idxd.h | 1 + 6 files changed, 181 insertions(+), 4 deletions(-) diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c index 5f321f3b4242..230fe9bb56ae 100644 --- a/drivers/dma/idxd/device.c +++ b/drivers/dma/idxd/device.c @@ -752,6 +752,83 @@ void idxd_device_clear_state(struct idxd_device *idxd) spin_unlock(&idxd->dev_lock); } +static int idxd_device_evl_setup(struct idxd_device *idxd) +{ + union gencfg_reg gencfg; + union evlcfg_reg evlcfg; + union genctrl_reg genctrl; + struct device *dev = &idxd->pdev->dev; + void *addr; + dma_addr_t dma_addr; + int size; + struct idxd_evl *evl = idxd->evl; + + if (!evl) + return 0; + + size = evl_size(idxd); + /* + * Address needs to be page aligned. However, dma_alloc_coherent() provides + * at minimal page size aligned address. No manual alignment required. + */ + addr = dma_alloc_coherent(dev, size, &dma_addr, GFP_KERNEL); + if (!addr) + return -ENOMEM; + + memset(addr, 0, size); + + spin_lock(&evl->lock); + evl->log = addr; + evl->dma = dma_addr; + evl->log_size = size; + + memset(&evlcfg, 0, sizeof(evlcfg)); + evlcfg.bits[0] = dma_addr & GENMASK(63, 12); + evlcfg.size = evl->size; + + iowrite64(evlcfg.bits[0], idxd->reg_base + IDXD_EVLCFG_OFFSET); + iowrite64(evlcfg.bits[1], idxd->reg_base + IDXD_EVLCFG_OFFSET + 8); + + genctrl.bits = ioread32(idxd->reg_base + IDXD_GENCTRL_OFFSET); + genctrl.evl_int_en = 1; + iowrite32(genctrl.bits, idxd->reg_base + IDXD_GENCTRL_OFFSET); + + gencfg.bits = ioread32(idxd->reg_base + IDXD_GENCFG_OFFSET); + gencfg.evl_en = 1; + iowrite32(gencfg.bits, idxd->reg_base + IDXD_GENCFG_OFFSET); + + spin_unlock(&evl->lock); + return 0; +} + +static void idxd_device_evl_free(struct idxd_device *idxd) +{ + union gencfg_reg gencfg; + union genctrl_reg genctrl; + struct device *dev = &idxd->pdev->dev; + struct idxd_evl *evl = idxd->evl; + + gencfg.bits = ioread32(idxd->reg_base + IDXD_GENCFG_OFFSET); + if (!gencfg.evl_en) + return; + + spin_lock(&evl->lock); + gencfg.evl_en = 0; + iowrite32(gencfg.bits, idxd->reg_base + IDXD_GENCFG_OFFSET); + + genctrl.bits = ioread32(idxd->reg_base + IDXD_GENCTRL_OFFSET); + genctrl.evl_int_en = 0; + iowrite32(genctrl.bits, idxd->reg_base + IDXD_GENCTRL_OFFSET); + + iowrite64(0, idxd->reg_base + IDXD_EVLCFG_OFFSET); + iowrite64(0, idxd->reg_base + IDXD_EVLCFG_OFFSET + 8); + + dma_free_coherent(dev, evl->log_size, evl->log, evl->dma); + evl->log = NULL; + evl->size = IDXD_EVL_SIZE_MIN; + spin_unlock(&evl->lock); +} + static void idxd_group_config_write(struct idxd_group *group) { struct idxd_device *idxd = group->idxd; @@ -1451,15 +1528,24 @@ int idxd_device_drv_probe(struct idxd_dev *idxd_dev) if (rc < 0) return -ENXIO; + rc = idxd_device_evl_setup(idxd); + if (rc < 0) { + idxd->cmd_status = IDXD_SCMD_DEV_EVL_ERR; + return rc; + } + /* Start device */ rc = idxd_device_enable(idxd); - if (rc < 0) + if (rc < 0) { + idxd_device_evl_free(idxd); return rc; + } /* Setup DMA device without channels */ rc = idxd_register_dma_device(idxd); if (rc < 0) { idxd_device_disable(idxd); + idxd_device_evl_free(idxd); idxd->cmd_status = IDXD_SCMD_DEV_DMA_ERR; return rc; } @@ -1488,6 +1574,7 @@ void idxd_device_drv_remove(struct idxd_dev *idxd_dev) idxd_device_disable(idxd); if (test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags)) idxd_device_reset(idxd); + idxd_device_evl_free(idxd); } static enum idxd_dev_type dev_types[] = { diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h index 2a71273f1822..c74681f02b18 100644 --- a/drivers/dma/idxd/idxd.h +++ b/drivers/dma/idxd/idxd.h @@ -262,7 +262,15 @@ struct idxd_driver_data { }; struct idxd_evl { + /* Lock to protect event log access. */ + spinlock_t lock; + void *log; + dma_addr_t dma; + /* Total size of event log = number of entries * entry size. */ + unsigned int log_size; + /* The number of entries in the event log. */ u16 size; + u16 head; }; struct idxd_device { @@ -324,6 +332,17 @@ struct idxd_device { struct idxd_evl *evl; }; +static inline unsigned int evl_ent_size(struct idxd_device *idxd) +{ + return idxd->hw.gen_cap.evl_support ? + (32 * (1 << idxd->hw.gen_cap.evl_support)) : 0; +} + +static inline unsigned int evl_size(struct idxd_device *idxd) +{ + return idxd->evl->size * evl_ent_size(idxd); +} + /* IDXD software descriptor */ struct idxd_desc { union { diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index 525de59df82a..f44719a11c95 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -343,6 +343,7 @@ static int idxd_init_evl(struct idxd_device *idxd) if (!evl) return -ENOMEM; + spin_lock_init(&evl->lock); evl->size = IDXD_EVL_SIZE_MIN; idxd->evl = evl; return 0; diff --git a/drivers/dma/idxd/registers.h b/drivers/dma/idxd/registers.h index ea3a499a3c3c..11bb97cf7481 100644 --- a/drivers/dma/idxd/registers.h +++ b/drivers/dma/idxd/registers.h @@ -3,6 +3,8 @@ #ifndef _IDXD_REGISTERS_H_ #define _IDXD_REGISTERS_H_ +#include + /* PCI Config */ #define PCI_DEVICE_ID_INTEL_DSA_SPR0 0x0b25 #define PCI_DEVICE_ID_INTEL_IAX_SPR0 0x0cfe @@ -119,7 +121,8 @@ union gencfg_reg { u32 rdbuf_limit:8; u32 rsvd:4; u32 user_int_en:1; - u32 rsvd2:19; + u32 evl_en:1; + u32 rsvd2:18; }; u32 bits; } __packed; @@ -129,7 +132,8 @@ union genctrl_reg { struct { u32 softerr_int_en:1; u32 halt_int_en:1; - u32 rsvd:30; + u32 evl_int_en:1; + u32 rsvd:29; }; u32 bits; } __packed; @@ -299,6 +303,21 @@ union iaa_cap_reg { #define IDXD_IAACAP_OFFSET 0x180 +#define IDXD_EVLCFG_OFFSET 0xe0 +union evlcfg_reg { + struct { + u64 pasid_en:1; + u64 priv:1; + u64 rsvd:10; + u64 base_addr:52; + + u64 size:16; + u64 pasid:20; + u64 rsvd2:28; + }; + u64 bits[2]; +} __packed; + #define IDXD_EVL_SIZE_MIN 0x0040 #define IDXD_EVL_SIZE_MAX 0xffff @@ -539,4 +558,53 @@ union filter_cfg { u64 val; } __packed; +struct __evl_entry { + u64 rsvd:2; + u64 desc_valid:1; + u64 wq_idx_valid:1; + u64 batch:1; + u64 fault_rw:1; + u64 priv:1; + u64 err_info_valid:1; + u64 error:8; + u64 wq_idx:8; + u64 batch_id:8; + u64 operation:8; + u64 pasid:20; + u64 rsvd2:4; + + u16 batch_idx; + u16 rsvd3; + union { + /* Invalid Flags 0x11 */ + u32 invalid_flags; + /* Invalid Int Handle 0x19 */ + /* Page fault 0x1a */ + /* Page fault 0x06, 0x1f, only operand_id */ + /* Page fault before drain or in batch, 0x26, 0x27 */ + struct { + u16 int_handle; + u16 rci:1; + u16 ims:1; + u16 rcr:1; + u16 first_err_in_batch:1; + u16 rsvd4_2:9; + u16 operand_id:3; + }; + }; + u64 fault_addr; + u64 rsvd5; +} __packed; + +struct dsa_evl_entry { + struct __evl_entry e; + struct dsa_completion_record cr; +} __packed; + +struct iax_evl_entry { + struct __evl_entry e; + u64 rsvd[4]; + struct iax_completion_record cr; +} __packed; + #endif diff --git a/drivers/dma/idxd/sysfs.c b/drivers/dma/idxd/sysfs.c index 85644e5bde83..163fdfaa5022 100644 --- a/drivers/dma/idxd/sysfs.c +++ b/drivers/dma/idxd/sysfs.c @@ -1605,7 +1605,8 @@ static ssize_t event_log_size_store(struct device *dev, if (!test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags)) return -EPERM; - if (val < IDXD_EVL_SIZE_MIN || val > IDXD_EVL_SIZE_MAX) + if (val < IDXD_EVL_SIZE_MIN || val > IDXD_EVL_SIZE_MAX || + (val * evl_ent_size(idxd) > ULONG_MAX - idxd->evl->dma)) return -EINVAL; idxd->evl->size = val; diff --git a/include/uapi/linux/idxd.h b/include/uapi/linux/idxd.h index fc47635b57dc..5d05bf12f2bd 100644 --- a/include/uapi/linux/idxd.h +++ b/include/uapi/linux/idxd.h @@ -30,6 +30,7 @@ enum idxd_scmd_stat { IDXD_SCMD_WQ_NO_PRIV = 0x800f0000, IDXD_SCMD_WQ_IRQ_ERR = 0x80100000, IDXD_SCMD_WQ_USER_NO_IOMMU = 0x80110000, + IDXD_SCMD_DEV_EVL_ERR = 0x80120000, }; #define IDXD_SCMD_SOFTERR_MASK 0x80000000 From 2f431ba908d2ef05da478d10925207728f1ff483 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Fri, 7 Apr 2023 13:31:31 -0700 Subject: [PATCH 45/57] dmaengine: idxd: add interrupt handling for event log An event log interrupt is raised in the misc interrupt INTCAUSE register when an event is written by the hardware. Add basic event log processing support to the interrupt handler. The event log is a ring where the hardware owns the tail and the software owns the head. The hardware will advance the tail index when an additional event has been pushed to memory. The software will process the log entry and then advances the head. The log is full when (tail + 1) % log_size = head. The hardware will stop writing when the log is full. The user is expected to create a log size large enough to handle all the expected events. Tested-by: Tony Zhu Signed-off-by: Dave Jiang Co-developed-by: Fenghua Yu Signed-off-by: Fenghua Yu Link: https://lore.kernel.org/r/20230407203143.2189681-5-fenghua.yu@intel.com Signed-off-by: Vinod Koul --- drivers/dma/idxd/irq.c | 48 ++++++++++++++++++++++++++++++++++++ drivers/dma/idxd/registers.h | 19 ++++++++++++++ include/uapi/linux/idxd.h | 1 + 3 files changed, 68 insertions(+) diff --git a/drivers/dma/idxd/irq.c b/drivers/dma/idxd/irq.c index 0d639303b515..52b8b7d9db22 100644 --- a/drivers/dma/idxd/irq.c +++ b/drivers/dma/idxd/irq.c @@ -217,6 +217,49 @@ static void idxd_int_handle_revoke(struct work_struct *work) kfree(revoke); } +static void process_evl_entry(struct idxd_device *idxd, struct __evl_entry *entry_head) +{ + struct device *dev = &idxd->pdev->dev; + u8 status; + + status = DSA_COMP_STATUS(entry_head->error); + dev_warn_ratelimited(dev, "Device error %#x operation: %#x fault addr: %#llx\n", + status, entry_head->operation, entry_head->fault_addr); +} + +static void process_evl_entries(struct idxd_device *idxd) +{ + union evl_status_reg evl_status; + unsigned int h, t; + struct idxd_evl *evl = idxd->evl; + struct __evl_entry *entry_head; + unsigned int ent_size = evl_ent_size(idxd); + u32 size; + + evl_status.bits = 0; + evl_status.int_pending = 1; + + spin_lock(&evl->lock); + /* Clear interrupt pending bit */ + iowrite32(evl_status.bits_upper32, + idxd->reg_base + IDXD_EVLSTATUS_OFFSET + sizeof(u32)); + h = evl->head; + evl_status.bits = ioread64(idxd->reg_base + IDXD_EVLSTATUS_OFFSET); + t = evl_status.tail; + size = idxd->evl->size; + + while (h != t) { + entry_head = (struct __evl_entry *)(evl->log + (h * ent_size)); + process_evl_entry(idxd, entry_head); + h = (h + 1) % size; + } + + evl->head = h; + evl_status.head = h; + iowrite32(evl_status.bits_lower32, idxd->reg_base + IDXD_EVLSTATUS_OFFSET); + spin_unlock(&evl->lock); +} + irqreturn_t idxd_misc_thread(int vec, void *data) { struct idxd_irq_entry *irq_entry = data; @@ -304,6 +347,11 @@ irqreturn_t idxd_misc_thread(int vec, void *data) perfmon_counter_overflow(idxd); } + if (cause & IDXD_INTC_EVL) { + val |= IDXD_INTC_EVL; + process_evl_entries(idxd); + } + val ^= cause; if (val) dev_warn_once(dev, "Unexpected interrupt cause bits set: %#x\n", diff --git a/drivers/dma/idxd/registers.h b/drivers/dma/idxd/registers.h index 11bb97cf7481..148db94f9373 100644 --- a/drivers/dma/idxd/registers.h +++ b/drivers/dma/idxd/registers.h @@ -168,6 +168,7 @@ enum idxd_device_reset_type { #define IDXD_INTC_OCCUPY 0x04 #define IDXD_INTC_PERFMON_OVFL 0x08 #define IDXD_INTC_HALT_STATE 0x10 +#define IDXD_INTC_EVL 0x20 #define IDXD_INTC_INT_HANDLE_REVOKED 0x80000000 #define IDXD_CMD_OFFSET 0xa0 @@ -558,6 +559,24 @@ union filter_cfg { u64 val; } __packed; +#define IDXD_EVLSTATUS_OFFSET 0xf0 + +union evl_status_reg { + struct { + u32 head:16; + u32 rsvd:16; + u32 tail:16; + u32 rsvd2:14; + u32 int_pending:1; + u32 rsvd3:1; + }; + struct { + u32 bits_lower32; + u32 bits_upper32; + }; + u64 bits; +} __packed; + struct __evl_entry { u64 rsvd:2; u64 desc_valid:1; diff --git a/include/uapi/linux/idxd.h b/include/uapi/linux/idxd.h index 5d05bf12f2bd..0bc8eea18586 100644 --- a/include/uapi/linux/idxd.h +++ b/include/uapi/linux/idxd.h @@ -170,6 +170,7 @@ enum iax_completion_status { #define DSA_COMP_STATUS_MASK 0x7f #define DSA_COMP_STATUS_WRITE 0x80 +#define DSA_COMP_STATUS(status) ((status) & DSA_COMP_STATUS_MASK) struct dsa_hw_desc { uint32_t pasid:20; From 5fbe6503b52f5665560584f62adab5db70ac910e Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Fri, 7 Apr 2023 13:31:32 -0700 Subject: [PATCH 46/57] dmanegine: idxd: add debugfs for event log dump Add debugfs entry to dump the content of the event log for debugging. The function will dump all non-zero entries in the event log. It will note which entries are processed and which entries are still pending processing at the time of the dump. The entries may not always be in chronological order due to the log is a circular buffer. Tested-by: Tony Zhu Signed-off-by: Dave Jiang Co-developed-by: Fenghua Yu Signed-off-by: Fenghua Yu Link: https://lore.kernel.org/r/20230407203143.2189681-6-fenghua.yu@intel.com Signed-off-by: Vinod Koul --- drivers/dma/idxd/Makefile | 2 +- drivers/dma/idxd/debugfs.c | 138 +++++++++++++++++++++++++++++++++++++ drivers/dma/idxd/idxd.h | 9 +++ drivers/dma/idxd/init.c | 12 ++++ include/uapi/linux/idxd.h | 6 +- 5 files changed, 164 insertions(+), 3 deletions(-) create mode 100644 drivers/dma/idxd/debugfs.c diff --git a/drivers/dma/idxd/Makefile b/drivers/dma/idxd/Makefile index a1e9f2b3a37c..dc096839ac63 100644 --- a/drivers/dma/idxd/Makefile +++ b/drivers/dma/idxd/Makefile @@ -1,7 +1,7 @@ ccflags-y += -DDEFAULT_SYMBOL_NAMESPACE=IDXD obj-$(CONFIG_INTEL_IDXD) += idxd.o -idxd-y := init.o irq.o device.o sysfs.o submit.o dma.o cdev.o +idxd-y := init.o irq.o device.o sysfs.o submit.o dma.o cdev.o debugfs.o idxd-$(CONFIG_INTEL_IDXD_PERFMON) += perfmon.o diff --git a/drivers/dma/idxd/debugfs.c b/drivers/dma/idxd/debugfs.c new file mode 100644 index 000000000000..9cfbd9b14c4c --- /dev/null +++ b/drivers/dma/idxd/debugfs.c @@ -0,0 +1,138 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright(c) 2021 Intel Corporation. All rights rsvd. */ +#include +#include +#include +#include +#include +#include +#include +#include "idxd.h" +#include "registers.h" + +static struct dentry *idxd_debugfs_dir; + +static void dump_event_entry(struct idxd_device *idxd, struct seq_file *s, + u16 index, int *count, bool processed) +{ + struct idxd_evl *evl = idxd->evl; + struct dsa_evl_entry *entry; + struct dsa_completion_record *cr; + u64 *raw; + int i; + int evl_strides = evl_ent_size(idxd) / sizeof(u64); + + entry = (struct dsa_evl_entry *)evl->log + index; + + if (!entry->e.desc_valid) + return; + + seq_printf(s, "Event Log entry %d (real index %u) processed: %u\n", + *count, index, processed); + + seq_printf(s, "desc valid %u wq idx valid %u\n" + "batch %u fault rw %u priv %u error 0x%x\n" + "wq idx %u op %#x pasid %u batch idx %u\n" + "fault addr %#llx\n", + entry->e.desc_valid, entry->e.wq_idx_valid, + entry->e.batch, entry->e.fault_rw, entry->e.priv, + entry->e.error, entry->e.wq_idx, entry->e.operation, + entry->e.pasid, entry->e.batch_idx, entry->e.fault_addr); + + cr = &entry->cr; + seq_printf(s, "status %#x result %#x fault_info %#x bytes_completed %u\n" + "fault addr %#llx inv flags %#x\n\n", + cr->status, cr->result, cr->fault_info, cr->bytes_completed, + cr->fault_addr, cr->invalid_flags); + + raw = (u64 *)entry; + + for (i = 0; i < evl_strides; i++) + seq_printf(s, "entry[%d] = %#llx\n", i, raw[i]); + + seq_puts(s, "\n"); + *count += 1; +} + +static int debugfs_evl_show(struct seq_file *s, void *d) +{ + struct idxd_device *idxd = s->private; + struct idxd_evl *evl = idxd->evl; + union evl_status_reg evl_status; + u16 h, t, evl_size, i; + int count = 0; + bool processed = true; + + if (!evl || !evl->log) + return 0; + + spin_lock(&evl->lock); + + h = evl->head; + evl_status.bits = ioread64(idxd->reg_base + IDXD_EVLSTATUS_OFFSET); + t = evl_status.tail; + evl_size = evl->size; + + seq_printf(s, "Event Log head %u tail %u interrupt pending %u\n\n", + evl_status.head, evl_status.tail, evl_status.int_pending); + + i = t; + while (1) { + i = (i + 1) % evl_size; + if (i == t) + break; + + if (processed && i == h) + processed = false; + dump_event_entry(idxd, s, i, &count, processed); + } + + spin_unlock(&evl->lock); + return 0; +} + +DEFINE_SHOW_ATTRIBUTE(debugfs_evl); + +int idxd_device_init_debugfs(struct idxd_device *idxd) +{ + if (IS_ERR_OR_NULL(idxd_debugfs_dir)) + return 0; + + idxd->dbgfs_dir = debugfs_create_dir(dev_name(idxd_confdev(idxd)), idxd_debugfs_dir); + if (IS_ERR(idxd->dbgfs_dir)) + return PTR_ERR(idxd->dbgfs_dir); + + if (idxd->evl) { + idxd->dbgfs_evl_file = debugfs_create_file("event_log", 0400, + idxd->dbgfs_dir, idxd, + &debugfs_evl_fops); + if (IS_ERR(idxd->dbgfs_evl_file)) { + debugfs_remove_recursive(idxd->dbgfs_dir); + idxd->dbgfs_dir = NULL; + return PTR_ERR(idxd->dbgfs_evl_file); + } + } + + return 0; +} + +void idxd_device_remove_debugfs(struct idxd_device *idxd) +{ + debugfs_remove_recursive(idxd->dbgfs_dir); +} + +int idxd_init_debugfs(void) +{ + if (!debugfs_initialized()) + return 0; + + idxd_debugfs_dir = debugfs_create_dir(KBUILD_MODNAME, NULL); + if (IS_ERR(idxd_debugfs_dir)) + return PTR_ERR(idxd_debugfs_dir); + return 0; +} + +void idxd_remove_debugfs(void) +{ + debugfs_remove_recursive(idxd_debugfs_dir); +} diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h index c74681f02b18..b923b90b7299 100644 --- a/drivers/dma/idxd/idxd.h +++ b/drivers/dma/idxd/idxd.h @@ -330,6 +330,9 @@ struct idxd_device { unsigned long *opcap_bmap; struct idxd_evl *evl; + + struct dentry *dbgfs_dir; + struct dentry *dbgfs_evl_file; }; static inline unsigned int evl_ent_size(struct idxd_device *idxd) @@ -704,4 +707,10 @@ static inline void perfmon_init(void) {} static inline void perfmon_exit(void) {} #endif +/* debugfs */ +int idxd_device_init_debugfs(struct idxd_device *idxd); +void idxd_device_remove_debugfs(struct idxd_device *idxd); +int idxd_init_debugfs(void); +void idxd_remove_debugfs(void); + #endif diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index f44719a11c95..e21c4b7a1d32 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -669,6 +669,10 @@ static int idxd_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) goto err_dev_register; } + rc = idxd_device_init_debugfs(idxd); + if (rc) + dev_warn(dev, "IDXD debugfs failed to setup\n"); + dev_info(&pdev->dev, "Intel(R) Accelerator Device (v%x)\n", idxd->hw.version); @@ -731,6 +735,7 @@ static void idxd_remove(struct pci_dev *pdev) idxd_shutdown(pdev); if (device_pasid_enabled(idxd)) idxd_disable_system_pasid(idxd); + idxd_device_remove_debugfs(idxd); irq_entry = idxd_get_ie(idxd, 0); free_irq(irq_entry->vector, irq_entry); @@ -788,6 +793,10 @@ static int __init idxd_init_module(void) if (err) goto err_cdev_register; + err = idxd_init_debugfs(); + if (err) + goto err_debugfs; + err = pci_register_driver(&idxd_pci_driver); if (err) goto err_pci_register; @@ -795,6 +804,8 @@ static int __init idxd_init_module(void) return 0; err_pci_register: + idxd_remove_debugfs(); +err_debugfs: idxd_cdev_remove(); err_cdev_register: idxd_driver_unregister(&idxd_user_drv); @@ -815,5 +826,6 @@ static void __exit idxd_exit_module(void) pci_unregister_driver(&idxd_pci_driver); idxd_cdev_remove(); perfmon_exit(); + idxd_remove_debugfs(); } module_exit(idxd_exit_module); diff --git a/include/uapi/linux/idxd.h b/include/uapi/linux/idxd.h index 0bc8eea18586..e86199d09a91 100644 --- a/include/uapi/linux/idxd.h +++ b/include/uapi/linux/idxd.h @@ -311,7 +311,8 @@ struct dsa_completion_record { uint8_t result; uint8_t dif_status; }; - uint16_t rsvd; + uint8_t fault_info; + uint8_t rsvd; uint32_t bytes_completed; uint64_t fault_addr; union { @@ -368,7 +369,8 @@ struct dsa_raw_completion_record { struct iax_completion_record { volatile uint8_t status; uint8_t error_code; - uint16_t rsvd; + uint8_t fault_info; + uint8_t rsvd; uint32_t bytes_completed; uint64_t fault_addr; uint32_t invalid_flags; From 2f30decd2f23a376d2ed73dfe4c601421edf501a Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Fri, 7 Apr 2023 13:31:33 -0700 Subject: [PATCH 47/57] dmaengine: idxd: add per DSA wq workqueue for processing cr faults Add a workqueue for user submitted completion record fault processing. The workqueue creation and destruction lifetime will be tied to the user sub-driver since it will only be used when the wq is a user type. Tested-by: Tony Zhu Signed-off-by: Dave Jiang Co-developed-by: Fenghua Yu Signed-off-by: Fenghua Yu Link: https://lore.kernel.org/r/20230407203143.2189681-7-fenghua.yu@intel.com Signed-off-by: Vinod Koul --- drivers/dma/idxd/cdev.c | 11 +++++++++++ drivers/dma/idxd/idxd.h | 1 + 2 files changed, 12 insertions(+) diff --git a/drivers/dma/idxd/cdev.c b/drivers/dma/idxd/cdev.c index 674bfefca088..cbe29e1a6a44 100644 --- a/drivers/dma/idxd/cdev.c +++ b/drivers/dma/idxd/cdev.c @@ -330,6 +330,13 @@ static int idxd_user_drv_probe(struct idxd_dev *idxd_dev) } mutex_lock(&wq->wq_lock); + + wq->wq = create_workqueue(dev_name(wq_confdev(wq))); + if (!wq->wq) { + rc = -ENOMEM; + goto wq_err; + } + wq->type = IDXD_WQT_USER; rc = drv_enable_wq(wq); if (rc < 0) @@ -348,7 +355,9 @@ static int idxd_user_drv_probe(struct idxd_dev *idxd_dev) err_cdev: drv_disable_wq(wq); err: + destroy_workqueue(wq->wq); wq->type = IDXD_WQT_NONE; +wq_err: mutex_unlock(&wq->wq_lock); return rc; } @@ -361,6 +370,8 @@ static void idxd_user_drv_remove(struct idxd_dev *idxd_dev) idxd_wq_del_cdev(wq); drv_disable_wq(wq); wq->type = IDXD_WQT_NONE; + destroy_workqueue(wq->wq); + wq->wq = NULL; mutex_unlock(&wq->wq_lock); } diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h index b923b90b7299..6e56361ae658 100644 --- a/drivers/dma/idxd/idxd.h +++ b/drivers/dma/idxd/idxd.h @@ -185,6 +185,7 @@ struct idxd_wq { struct idxd_dev idxd_dev; struct idxd_cdev *idxd_cdev; struct wait_queue_head err_queue; + struct workqueue_struct *wq; struct idxd_device *idxd; int id; struct idxd_irq_entry ie; From c2f156bf168fb42cd6ecd0a8e2204dbe542b8516 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Fri, 7 Apr 2023 13:31:34 -0700 Subject: [PATCH 48/57] dmaengine: idxd: create kmem cache for event log fault items Add a kmem cache per device for allocating event log fault context. The context allows an event log entry to be copied and passed to a software workqueue to be processed. Due to each device can have different sized event log entry depending on device type, it's not possible to have a global kmem cache. Tested-by: Tony Zhu Signed-off-by: Dave Jiang Co-developed-by: Fenghua Yu Signed-off-by: Fenghua Yu Link: https://lore.kernel.org/r/20230407203143.2189681-8-fenghua.yu@intel.com Signed-off-by: Vinod Koul --- drivers/dma/idxd/idxd.h | 10 ++++++++++ drivers/dma/idxd/init.c | 9 +++++++++ drivers/dma/idxd/sysfs.c | 1 + 3 files changed, 20 insertions(+) diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h index 6e56361ae658..c5d99c179902 100644 --- a/drivers/dma/idxd/idxd.h +++ b/drivers/dma/idxd/idxd.h @@ -274,6 +274,15 @@ struct idxd_evl { u16 head; }; +struct idxd_evl_fault { + struct work_struct work; + struct idxd_wq *wq; + u8 status; + + /* make this last member always */ + struct __evl_entry entry[]; +}; + struct idxd_device { struct idxd_dev idxd_dev; struct idxd_driver_data *data; @@ -331,6 +340,7 @@ struct idxd_device { unsigned long *opcap_bmap; struct idxd_evl *evl; + struct kmem_cache *evl_cache; struct dentry *dbgfs_dir; struct dentry *dbgfs_evl_file; diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index e21c4b7a1d32..7828ac2d31aa 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -345,6 +345,15 @@ static int idxd_init_evl(struct idxd_device *idxd) spin_lock_init(&evl->lock); evl->size = IDXD_EVL_SIZE_MIN; + + idxd->evl_cache = kmem_cache_create(dev_name(idxd_confdev(idxd)), + sizeof(struct idxd_evl_fault) + evl_ent_size(idxd), + 0, 0, NULL); + if (!idxd->evl_cache) { + kfree(evl); + return -ENOMEM; + } + idxd->evl = evl; return 0; } diff --git a/drivers/dma/idxd/sysfs.c b/drivers/dma/idxd/sysfs.c index 163fdfaa5022..8b9dfa0d2b99 100644 --- a/drivers/dma/idxd/sysfs.c +++ b/drivers/dma/idxd/sysfs.c @@ -1718,6 +1718,7 @@ static void idxd_conf_device_release(struct device *dev) kfree(idxd->wqs); kfree(idxd->engines); kfree(idxd->evl); + kmem_cache_destroy(idxd->evl_cache); ida_free(&idxd_ida, idxd->id); bitmap_free(idxd->opcap_bmap); kfree(idxd); From b022f59725f0ae846191abbd6d2e611d7f60f826 Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Fri, 7 Apr 2023 13:31:35 -0700 Subject: [PATCH 49/57] dmaengine: idxd: add idxd_copy_cr() to copy user completion record during page fault handling Define idxd_copy_cr() to copy completion record to fault address in user address that is found by work queue (wq) and PASID. It will be used to write the user's completion record that the hardware device is not able to write due to user completion record page fault. An xarray is added to associate the PASID and mm with the struct idxd_user_context so mm can be found by PASID and wq. It is called when handling the completion record fault in a kernel thread context. Switch to the mm using kthread_use_vm() and copy the completion record to the mm via copy_to_user(). Once the copy is completed, switch back to the current mm using kthread_unuse_mm(). Suggested-by: Christoph Hellwig Suggested-by: Jason Gunthorpe Suggested-by: Tony Luck Tested-by: Tony Zhu Signed-off-by: Fenghua Yu Reviewed-by: Dave Jiang Link: https://lore.kernel.org/r/20230407203143.2189681-9-fenghua.yu@intel.com Signed-off-by: Vinod Koul --- drivers/dma/idxd/cdev.c | 107 +++++++++++++++++++++++++++++++++++++-- drivers/dma/idxd/idxd.h | 6 +++ drivers/dma/idxd/init.c | 2 + drivers/dma/idxd/sysfs.c | 1 + 4 files changed, 111 insertions(+), 5 deletions(-) diff --git a/drivers/dma/idxd/cdev.c b/drivers/dma/idxd/cdev.c index cbe29e1a6a44..8b8a0a0fb054 100644 --- a/drivers/dma/idxd/cdev.c +++ b/drivers/dma/idxd/cdev.c @@ -11,7 +11,9 @@ #include #include #include +#include #include +#include #include "registers.h" #include "idxd.h" @@ -34,6 +36,7 @@ struct idxd_user_context { struct idxd_wq *wq; struct task_struct *task; unsigned int pasid; + struct mm_struct *mm; unsigned int flags; struct iommu_sva *sva; }; @@ -68,6 +71,19 @@ static inline struct idxd_wq *inode_wq(struct inode *inode) return idxd_cdev->wq; } +static void idxd_xa_pasid_remove(struct idxd_user_context *ctx) +{ + struct idxd_wq *wq = ctx->wq; + void *ptr; + + mutex_lock(&wq->uc_lock); + ptr = xa_cmpxchg(&wq->upasid_xa, ctx->pasid, ctx, NULL, GFP_KERNEL); + if (ptr != (void *)ctx) + dev_warn(&wq->idxd->pdev->dev, "xarray cmpxchg failed for pasid %u\n", + ctx->pasid); + mutex_unlock(&wq->uc_lock); +} + static int idxd_cdev_open(struct inode *inode, struct file *filp) { struct idxd_user_context *ctx; @@ -108,20 +124,26 @@ static int idxd_cdev_open(struct inode *inode, struct file *filp) pasid = iommu_sva_get_pasid(sva); if (pasid == IOMMU_PASID_INVALID) { - iommu_sva_unbind_device(sva); rc = -EINVAL; - goto failed; + goto failed_get_pasid; } ctx->sva = sva; ctx->pasid = pasid; + ctx->mm = current->mm; + + mutex_lock(&wq->uc_lock); + rc = xa_insert(&wq->upasid_xa, pasid, ctx, GFP_KERNEL); + mutex_unlock(&wq->uc_lock); + if (rc < 0) + dev_warn(dev, "PASID entry already exist in xarray.\n"); if (wq_dedicated(wq)) { rc = idxd_wq_set_pasid(wq, pasid); if (rc < 0) { iommu_sva_unbind_device(sva); dev_err(dev, "wq set pasid failed: %d\n", rc); - goto failed; + goto failed_set_pasid; } } } @@ -130,7 +152,13 @@ static int idxd_cdev_open(struct inode *inode, struct file *filp) mutex_unlock(&wq->wq_lock); return 0; - failed: +failed_set_pasid: + if (device_user_pasid_enabled(idxd)) + idxd_xa_pasid_remove(ctx); +failed_get_pasid: + if (device_user_pasid_enabled(idxd)) + iommu_sva_unbind_device(sva); +failed: mutex_unlock(&wq->wq_lock); kfree(ctx); return rc; @@ -161,8 +189,10 @@ static int idxd_cdev_release(struct inode *node, struct file *filep) } } - if (ctx->sva) + if (ctx->sva) { iommu_sva_unbind_device(ctx->sva); + idxd_xa_pasid_remove(ctx); + } kfree(ctx); mutex_lock(&wq->wq_lock); idxd_wq_put(wq); @@ -418,3 +448,70 @@ void idxd_cdev_remove(void) ida_destroy(&ictx[i].minor_ida); } } + +/** + * idxd_copy_cr - copy completion record to user address space found by wq and + * PASID + * @wq: work queue + * @pasid: PASID + * @addr: user fault address to write + * @cr: completion record + * @len: number of bytes to copy + * + * This is called by a work that handles completion record fault. + * + * Return: number of bytes copied. + */ +int idxd_copy_cr(struct idxd_wq *wq, ioasid_t pasid, unsigned long addr, + void *cr, int len) +{ + struct device *dev = &wq->idxd->pdev->dev; + int left = len, status_size = 1; + struct idxd_user_context *ctx; + struct mm_struct *mm; + + mutex_lock(&wq->uc_lock); + + ctx = xa_load(&wq->upasid_xa, pasid); + if (!ctx) { + dev_warn(dev, "No user context\n"); + goto out; + } + + mm = ctx->mm; + /* + * The completion record fault handling work is running in kernel + * thread context. It temporarily switches to the mm to copy cr + * to addr in the mm. + */ + kthread_use_mm(mm); + left = copy_to_user((void __user *)addr + status_size, cr + status_size, + len - status_size); + /* + * Copy status only after the rest of completion record is copied + * successfully so that the user gets the complete completion record + * when a non-zero status is polled. + */ + if (!left) { + u8 status; + + /* + * Ensure that the completion record's status field is written + * after the rest of the completion record has been written. + * This ensures that the user receives the correct completion + * record information once polling for a non-zero status. + */ + wmb(); + status = *(u8 *)cr; + if (put_user(status, (u8 __user *)addr)) + left += status_size; + } else { + left += status_size; + } + kthread_unuse_mm(mm); + +out: + mutex_unlock(&wq->uc_lock); + + return len - left; +} diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h index c5d99c179902..b3f9a12adce2 100644 --- a/drivers/dma/idxd/idxd.h +++ b/drivers/dma/idxd/idxd.h @@ -215,6 +215,10 @@ struct idxd_wq { char name[WQ_NAME_SIZE + 1]; u64 max_xfer_bytes; u32 max_batch_size; + + /* Lock to protect upasid_xa access. */ + struct mutex uc_lock; + struct xarray upasid_xa; }; struct idxd_engine { @@ -702,6 +706,8 @@ void idxd_cdev_remove(void); int idxd_cdev_get_major(struct idxd_device *idxd); int idxd_wq_add_cdev(struct idxd_wq *wq); void idxd_wq_del_cdev(struct idxd_wq *wq); +int idxd_copy_cr(struct idxd_wq *wq, ioasid_t pasid, unsigned long addr, + void *buf, int len); /* perfmon */ #if IS_ENABLED(CONFIG_INTEL_IDXD_PERFMON) diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index 7828ac2d31aa..e6faff58733d 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -199,6 +199,8 @@ static int idxd_setup_wqs(struct idxd_device *idxd) } bitmap_copy(wq->opcap_bmap, idxd->opcap_bmap, IDXD_MAX_OPCAP_BITS); } + mutex_init(&wq->uc_lock); + xa_init(&wq->upasid_xa); idxd->wqs[i] = wq; } diff --git a/drivers/dma/idxd/sysfs.c b/drivers/dma/idxd/sysfs.c index 8b9dfa0d2b99..465d2e7627e4 100644 --- a/drivers/dma/idxd/sysfs.c +++ b/drivers/dma/idxd/sysfs.c @@ -1292,6 +1292,7 @@ static void idxd_conf_wq_release(struct device *dev) bitmap_free(wq->opcap_bmap); kfree(wq->wqcfg); + xa_destroy(&wq->upasid_xa); kfree(wq); } From c40bd7d9737bdcfb02d42765bc6c59b338151123 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Fri, 7 Apr 2023 13:31:36 -0700 Subject: [PATCH 50/57] dmaengine: idxd: process user page faults for completion record DSA supports page fault handling through PRS. However, the DMA engine that's processing the descriptor is blocked until the PRS response is received. Other workqueues sharing the engine are also blocked. Page fault handing by the driver with PRS disabled can be used to mitigate the stalling. With PRS disabled while ATS remain enabled, DSA handles page faults on a completion record by reporting an event in the event log. In this instance, the descriptor is completed and the event log contains the completion record address and the contents of the completion record. Add support to the event log handling code to fault in the completion record and copy the content of the completion record to user memory. A bitmap is introduced to keep track of discarded event log entries. When the user process initiates ->release() of the char device, it no longer is interested in any remaining event log entries tied to the relevant wq and PASID. The driver will mark the event log entry index in the bitmap. Upon encountering the entries during processing, the event log handler will just clear the bitmap bit and skip the entry rather than attempt to process the event log entry. Tested-by: Tony Zhu Signed-off-by: Dave Jiang Co-developed-by: Fenghua Yu Signed-off-by: Fenghua Yu Link: https://lore.kernel.org/r/20230407203143.2189681-10-fenghua.yu@intel.com Signed-off-by: Vinod Koul --- drivers/dma/idxd/cdev.c | 30 ++++++++++++++ drivers/dma/idxd/device.c | 22 +++++++++- drivers/dma/idxd/idxd.h | 2 + drivers/dma/idxd/init.c | 2 + drivers/dma/idxd/irq.c | 87 ++++++++++++++++++++++++++++++++++++--- include/uapi/linux/idxd.h | 1 + 6 files changed, 137 insertions(+), 7 deletions(-) diff --git a/drivers/dma/idxd/cdev.c b/drivers/dma/idxd/cdev.c index 8b8a0a0fb054..0a51c33198f6 100644 --- a/drivers/dma/idxd/cdev.c +++ b/drivers/dma/idxd/cdev.c @@ -164,6 +164,35 @@ failed: return rc; } +static void idxd_cdev_evl_drain_pasid(struct idxd_wq *wq, u32 pasid) +{ + struct idxd_device *idxd = wq->idxd; + struct idxd_evl *evl = idxd->evl; + union evl_status_reg status; + u16 h, t, size; + int ent_size = evl_ent_size(idxd); + struct __evl_entry *entry_head; + + if (!evl) + return; + + spin_lock(&evl->lock); + status.bits = ioread64(idxd->reg_base + IDXD_EVLSTATUS_OFFSET); + t = status.tail; + h = evl->head; + size = evl->size; + + while (h != t) { + entry_head = (struct __evl_entry *)(evl->log + (h * ent_size)); + if (entry_head->pasid == pasid && entry_head->wq_idx == wq->id) + set_bit(h, evl->bmap); + h = (h + 1) % size; + } + spin_unlock(&evl->lock); + + drain_workqueue(wq->wq); +} + static int idxd_cdev_release(struct inode *node, struct file *filep) { struct idxd_user_context *ctx = filep->private_data; @@ -190,6 +219,7 @@ static int idxd_cdev_release(struct inode *node, struct file *filep) } if (ctx->sva) { + idxd_cdev_evl_drain_pasid(wq, ctx->pasid); iommu_sva_unbind_device(ctx->sva); idxd_xa_pasid_remove(ctx); } diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c index 230fe9bb56ae..fd97b2b58734 100644 --- a/drivers/dma/idxd/device.c +++ b/drivers/dma/idxd/device.c @@ -762,18 +762,29 @@ static int idxd_device_evl_setup(struct idxd_device *idxd) dma_addr_t dma_addr; int size; struct idxd_evl *evl = idxd->evl; + unsigned long *bmap; + int rc; if (!evl) return 0; size = evl_size(idxd); + + bmap = bitmap_zalloc(size, GFP_KERNEL); + if (!bmap) { + rc = -ENOMEM; + goto err_bmap; + } + /* * Address needs to be page aligned. However, dma_alloc_coherent() provides * at minimal page size aligned address. No manual alignment required. */ addr = dma_alloc_coherent(dev, size, &dma_addr, GFP_KERNEL); - if (!addr) - return -ENOMEM; + if (!addr) { + rc = -ENOMEM; + goto err_alloc; + } memset(addr, 0, size); @@ -781,6 +792,7 @@ static int idxd_device_evl_setup(struct idxd_device *idxd) evl->log = addr; evl->dma = dma_addr; evl->log_size = size; + evl->bmap = bmap; memset(&evlcfg, 0, sizeof(evlcfg)); evlcfg.bits[0] = dma_addr & GENMASK(63, 12); @@ -799,6 +811,11 @@ static int idxd_device_evl_setup(struct idxd_device *idxd) spin_unlock(&evl->lock); return 0; + +err_alloc: + bitmap_free(bmap); +err_bmap: + return rc; } static void idxd_device_evl_free(struct idxd_device *idxd) @@ -824,6 +841,7 @@ static void idxd_device_evl_free(struct idxd_device *idxd) iowrite64(0, idxd->reg_base + IDXD_EVLCFG_OFFSET + 8); dma_free_coherent(dev, evl->log_size, evl->log, evl->dma); + bitmap_free(evl->bmap); evl->log = NULL; evl->size = IDXD_EVL_SIZE_MIN; spin_unlock(&evl->lock); diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h index b3f9a12adce2..3963c83165a6 100644 --- a/drivers/dma/idxd/idxd.h +++ b/drivers/dma/idxd/idxd.h @@ -264,6 +264,7 @@ struct idxd_driver_data { struct device_type *dev_type; int compl_size; int align; + int evl_cr_off; }; struct idxd_evl { @@ -276,6 +277,7 @@ struct idxd_evl { /* The number of entries in the event log. */ u16 size; u16 head; + unsigned long *bmap; }; struct idxd_evl_fault { diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index e6faff58733d..18344593b83f 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -46,6 +46,7 @@ static struct idxd_driver_data idxd_driver_data[] = { .compl_size = sizeof(struct dsa_completion_record), .align = 32, .dev_type = &dsa_device_type, + .evl_cr_off = offsetof(struct dsa_evl_entry, cr), }, [IDXD_TYPE_IAX] = { .name_prefix = "iax", @@ -53,6 +54,7 @@ static struct idxd_driver_data idxd_driver_data[] = { .compl_size = sizeof(struct iax_completion_record), .align = 64, .dev_type = &iax_device_type, + .evl_cr_off = offsetof(struct iax_evl_entry, cr), }, }; diff --git a/drivers/dma/idxd/irq.c b/drivers/dma/idxd/irq.c index 52b8b7d9db22..96983975f974 100644 --- a/drivers/dma/idxd/irq.c +++ b/drivers/dma/idxd/irq.c @@ -7,6 +7,8 @@ #include #include #include +#include +#include #include #include "../dmaengine.h" #include "idxd.h" @@ -217,14 +219,89 @@ static void idxd_int_handle_revoke(struct work_struct *work) kfree(revoke); } -static void process_evl_entry(struct idxd_device *idxd, struct __evl_entry *entry_head) +static void idxd_evl_fault_work(struct work_struct *work) +{ + struct idxd_evl_fault *fault = container_of(work, struct idxd_evl_fault, work); + struct idxd_wq *wq = fault->wq; + struct idxd_device *idxd = wq->idxd; + struct device *dev = &idxd->pdev->dev; + struct __evl_entry *entry_head = fault->entry; + void *cr = (void *)entry_head + idxd->data->evl_cr_off; + int cr_size = idxd->data->compl_size, copied; + + switch (fault->status) { + case DSA_COMP_CRA_XLAT: + case DSA_COMP_DRAIN_EVL: + /* + * Copy completion record to fault_addr in user address space + * that is found by wq and PASID. + */ + copied = idxd_copy_cr(wq, entry_head->pasid, + entry_head->fault_addr, + cr, cr_size); + /* + * The task that triggered the page fault is unknown currently + * because multiple threads may share the user address + * space or the task exits already before this fault. + * So if the copy fails, SIGSEGV can not be sent to the task. + * Just print an error for the failure. The user application + * waiting for the completion record will time out on this + * failure. + */ + if (copied != cr_size) { + dev_dbg_ratelimited(dev, "Failed to write to completion record. (%d:%d)\n", + cr_size, copied); + } + break; + default: + dev_dbg_ratelimited(dev, "Unrecognized error code: %#x\n", + DSA_COMP_STATUS(entry_head->error)); + break; + } + + kmem_cache_free(idxd->evl_cache, fault); +} + +static void process_evl_entry(struct idxd_device *idxd, + struct __evl_entry *entry_head, unsigned int index) { struct device *dev = &idxd->pdev->dev; + struct idxd_evl *evl = idxd->evl; u8 status; - status = DSA_COMP_STATUS(entry_head->error); - dev_warn_ratelimited(dev, "Device error %#x operation: %#x fault addr: %#llx\n", - status, entry_head->operation, entry_head->fault_addr); + if (test_bit(index, evl->bmap)) { + clear_bit(index, evl->bmap); + } else { + status = DSA_COMP_STATUS(entry_head->error); + + if (status == DSA_COMP_CRA_XLAT || status == DSA_COMP_DRAIN_EVL) { + struct idxd_evl_fault *fault; + int ent_size = evl_ent_size(idxd); + + if (entry_head->rci) + dev_dbg(dev, "Completion Int Req set, ignoring!\n"); + + if (!entry_head->rcr && status == DSA_COMP_DRAIN_EVL) + return; + + fault = kmem_cache_alloc(idxd->evl_cache, GFP_ATOMIC); + if (fault) { + struct idxd_wq *wq = idxd->wqs[entry_head->wq_idx]; + + fault->wq = wq; + fault->status = status; + memcpy(&fault->entry, entry_head, ent_size); + INIT_WORK(&fault->work, idxd_evl_fault_work); + queue_work(wq->wq, &fault->work); + } else { + dev_warn(dev, "Failed to service fault work.\n"); + } + } else { + dev_warn_ratelimited(dev, "Device error %#x operation: %#x fault addr: %#llx\n", + status, entry_head->operation, + entry_head->fault_addr); + } + } } static void process_evl_entries(struct idxd_device *idxd) @@ -250,7 +327,7 @@ static void process_evl_entries(struct idxd_device *idxd) while (h != t) { entry_head = (struct __evl_entry *)(evl->log + (h * ent_size)); - process_evl_entry(idxd, entry_head); + process_evl_entry(idxd, entry_head, h); h = (h + 1) % size; } diff --git a/include/uapi/linux/idxd.h b/include/uapi/linux/idxd.h index e86199d09a91..4b584d5afd87 100644 --- a/include/uapi/linux/idxd.h +++ b/include/uapi/linux/idxd.h @@ -135,6 +135,7 @@ enum dsa_completion_status { DSA_COMP_HW_ERR1, DSA_COMP_HW_ERR_DRB, DSA_COMP_TRANSLATION_FAIL, + DSA_COMP_DRAIN_EVL = 0x26, }; enum iax_completion_status { From 6926987185a3ae92c31b99ce1bfdfb04e95057c0 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Fri, 7 Apr 2023 13:31:37 -0700 Subject: [PATCH 51/57] dmaengine: idxd: add descs_completed field for completion record The descs_completed field for a completion record is part of a batch descriptor completion record. It takes the same location as bytes_completed in a normal descriptor field. Add to expose to user. Tested-by: Tony Zhu Signed-off-by: Dave Jiang Co-developed-by: Fenghua Yu Signed-off-by: Fenghua Yu Link: https://lore.kernel.org/r/20230407203143.2189681-11-fenghua.yu@intel.com Signed-off-by: Vinod Koul --- include/uapi/linux/idxd.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/include/uapi/linux/idxd.h b/include/uapi/linux/idxd.h index 4b584d5afd87..76ad71bf751e 100644 --- a/include/uapi/linux/idxd.h +++ b/include/uapi/linux/idxd.h @@ -314,7 +314,10 @@ struct dsa_completion_record { }; uint8_t fault_info; uint8_t rsvd; - uint32_t bytes_completed; + union { + uint32_t bytes_completed; + uint32_t descs_completed; + }; uint64_t fault_addr; union { /* common record */ From 2442b7473ad03671378d2d95651bd6bbe09a0943 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Fri, 7 Apr 2023 13:31:38 -0700 Subject: [PATCH 52/57] dmaengine: idxd: process batch descriptor completion record faults Add event log processing for faulting of user batch descriptor completion record. When encountering an event log entry for a page fault on a completion record, the driver is expected to do the following: 1. If the "first error in batch" bit in event log entry error info is set, discard any previously recorded errors associated with the "batch identifier". 2. Fix the page fault according to the fault address in the event log. If successful, write the completion record to the fault address in user space. 3. If an error is encountered while writing the completion record and it is associated to a descriptor in the batch, the driver associates the error with the batch identifier of the event log entry and tracks it until the event log entry for the corresponding batch desc is encountered. While processing an event log entry for a batch descriptor with error indicating that one or more descs in the batch had event log entries, the driver will do the following before writing the batch completion record: 1. If the status field of the completion record is 0x1, the driver will change it to error code 0x5 (one or more operations in batch completed with status not successful) and changes the result field to 1. 2. If the status is error code 0x6 (page fault on batch descriptor list address), change the result field to 1. 3. If status is any other value, the completion record is not changed. 4. Clear the recorded error in preparation for next batch with same batch identifier. The result field is for user software to determine whether to set the "Batch Error" flag bit in the descriptor for continuation of partial batch descriptor completion. See DSA spec 2.0 for additional information. If no error has been recorded for the batch, the batch completion record is written to user space as is. Tested-by: Tony Zhu Signed-off-by: Dave Jiang Co-developed-by: Fenghua Yu Signed-off-by: Fenghua Yu Link: https://lore.kernel.org/r/20230407203143.2189681-12-fenghua.yu@intel.com Signed-off-by: Vinod Koul --- drivers/dma/idxd/idxd.h | 3 ++ drivers/dma/idxd/init.c | 4 ++ drivers/dma/idxd/irq.c | 91 ++++++++++++++++++++++++++---------- drivers/dma/idxd/registers.h | 4 +- include/uapi/linux/idxd.h | 1 + 5 files changed, 78 insertions(+), 25 deletions(-) diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h index 3963c83165a6..4c4baa80c731 100644 --- a/drivers/dma/idxd/idxd.h +++ b/drivers/dma/idxd/idxd.h @@ -265,6 +265,8 @@ struct idxd_driver_data { int compl_size; int align; int evl_cr_off; + int cr_status_off; + int cr_result_off; }; struct idxd_evl { @@ -278,6 +280,7 @@ struct idxd_evl { u16 size; u16 head; unsigned long *bmap; + bool batch_fail[IDXD_MAX_BATCH_IDENT]; }; struct idxd_evl_fault { diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index 18344593b83f..92f60d364392 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -47,6 +47,8 @@ static struct idxd_driver_data idxd_driver_data[] = { .align = 32, .dev_type = &dsa_device_type, .evl_cr_off = offsetof(struct dsa_evl_entry, cr), + .cr_status_off = offsetof(struct dsa_completion_record, status), + .cr_result_off = offsetof(struct dsa_completion_record, result), }, [IDXD_TYPE_IAX] = { .name_prefix = "iax", @@ -55,6 +57,8 @@ static struct idxd_driver_data idxd_driver_data[] = { .align = 64, .dev_type = &iax_device_type, .evl_cr_off = offsetof(struct iax_evl_entry, cr), + .cr_status_off = offsetof(struct iax_completion_record, status), + .cr_result_off = offsetof(struct iax_completion_record, error_code), }, }; diff --git a/drivers/dma/idxd/irq.c b/drivers/dma/idxd/irq.c index 96983975f974..c660d63a3eb8 100644 --- a/drivers/dma/idxd/irq.c +++ b/drivers/dma/idxd/irq.c @@ -225,37 +225,79 @@ static void idxd_evl_fault_work(struct work_struct *work) struct idxd_wq *wq = fault->wq; struct idxd_device *idxd = wq->idxd; struct device *dev = &idxd->pdev->dev; + struct idxd_evl *evl = idxd->evl; struct __evl_entry *entry_head = fault->entry; void *cr = (void *)entry_head + idxd->data->evl_cr_off; - int cr_size = idxd->data->compl_size, copied; + int cr_size = idxd->data->compl_size; + u8 *status = (u8 *)cr + idxd->data->cr_status_off; + u8 *result = (u8 *)cr + idxd->data->cr_result_off; + int copied, copy_size; + bool *bf; switch (fault->status) { case DSA_COMP_CRA_XLAT: - case DSA_COMP_DRAIN_EVL: - /* - * Copy completion record to fault_addr in user address space - * that is found by wq and PASID. - */ - copied = idxd_copy_cr(wq, entry_head->pasid, - entry_head->fault_addr, - cr, cr_size); - /* - * The task that triggered the page fault is unknown currently - * because multiple threads may share the user address - * space or the task exits already before this fault. - * So if the copy fails, SIGSEGV can not be sent to the task. - * Just print an error for the failure. The user application - * waiting for the completion record will time out on this - * failure. - */ - if (copied != cr_size) { - dev_dbg_ratelimited(dev, "Failed to write to completion record. (%d:%d)\n", - cr_size, copied); + if (entry_head->batch && entry_head->first_err_in_batch) + evl->batch_fail[entry_head->batch_id] = false; + + copy_size = cr_size; + break; + case DSA_COMP_BATCH_EVL_ERR: + bf = &evl->batch_fail[entry_head->batch_id]; + + copy_size = entry_head->rcr || *bf ? cr_size : 0; + if (*bf) { + if (*status == DSA_COMP_SUCCESS) + *status = DSA_COMP_BATCH_FAIL; + *result = 1; + *bf = false; } break; + case DSA_COMP_DRAIN_EVL: + copy_size = cr_size; + break; default: - dev_dbg_ratelimited(dev, "Unrecognized error code: %#x\n", - DSA_COMP_STATUS(entry_head->error)); + copy_size = 0; + dev_dbg_ratelimited(dev, "Unrecognized error code: %#x\n", fault->status); + break; + } + + if (copy_size == 0) + return; + + /* + * Copy completion record to fault_addr in user address space + * that is found by wq and PASID. + */ + copied = idxd_copy_cr(wq, entry_head->pasid, entry_head->fault_addr, + cr, copy_size); + /* + * The task that triggered the page fault is unknown currently + * because multiple threads may share the user address + * space or the task exits already before this fault. + * So if the copy fails, SIGSEGV can not be sent to the task. + * Just print an error for the failure. The user application + * waiting for the completion record will time out on this + * failure. + */ + switch (fault->status) { + case DSA_COMP_CRA_XLAT: + if (copied != copy_size) { + dev_dbg_ratelimited(dev, "Failed to write to completion record: (%d:%d)\n", + copy_size, copied); + if (entry_head->batch) + evl->batch_fail[entry_head->batch_id] = true; + } + break; + case DSA_COMP_BATCH_EVL_ERR: + if (copied != copy_size) { + dev_dbg_ratelimited(dev, "Failed to write to batch completion record: (%d:%d)\n", + copy_size, copied); + } + break; + case DSA_COMP_DRAIN_EVL: + if (copied != copy_size) + dev_dbg_ratelimited(dev, "Failed to write to drain completion record: (%d:%d)\n", + copy_size, copied); break; } @@ -274,7 +316,8 @@ static void process_evl_entry(struct idxd_device *idxd, } else { status = DSA_COMP_STATUS(entry_head->error); - if (status == DSA_COMP_CRA_XLAT || status == DSA_COMP_DRAIN_EVL) { + if (status == DSA_COMP_CRA_XLAT || status == DSA_COMP_DRAIN_EVL || + status == DSA_COMP_BATCH_EVL_ERR) { struct idxd_evl_fault *fault; int ent_size = evl_ent_size(idxd); diff --git a/drivers/dma/idxd/registers.h b/drivers/dma/idxd/registers.h index 148db94f9373..9f3959d001b6 100644 --- a/drivers/dma/idxd/registers.h +++ b/drivers/dma/idxd/registers.h @@ -35,7 +35,7 @@ union gen_cap_reg { u64 drain_readback:1; u64 rsvd2:3; u64 evl_support:2; - u64 rsvd4:1; + u64 batch_continuation:1; u64 max_xfer_shift:5; u64 max_batch_shift:4; u64 max_ims_mult:6; @@ -577,6 +577,8 @@ union evl_status_reg { u64 bits; } __packed; +#define IDXD_MAX_BATCH_IDENT 256 + struct __evl_entry { u64 rsvd:2; u64 desc_valid:1; diff --git a/include/uapi/linux/idxd.h b/include/uapi/linux/idxd.h index 76ad71bf751e..606b52e88ce3 100644 --- a/include/uapi/linux/idxd.h +++ b/include/uapi/linux/idxd.h @@ -136,6 +136,7 @@ enum dsa_completion_status { DSA_COMP_HW_ERR_DRB, DSA_COMP_TRANSLATION_FAIL, DSA_COMP_DRAIN_EVL = 0x26, + DSA_COMP_BATCH_EVL_ERR, }; enum iax_completion_status { From fecae134ee10b7de69461c197450f7c05677e733 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Fri, 7 Apr 2023 13:31:39 -0700 Subject: [PATCH 53/57] dmaengine: idxd: add per file user counters for completion record faults Add counters per opened file for the char device in order to keep track how many completion record faults occurred and how many of those faults failed the writeback by the driver after attempt to fault in the page. The counters are managed by xarray that associates the PASID with struct idxd_user_context. Tested-by: Tony Zhu Signed-off-by: Dave Jiang Co-developed-by: Fenghua Yu Signed-off-by: Fenghua Yu Link: https://lore.kernel.org/r/20230407203143.2189681-13-fenghua.yu@intel.com Signed-off-by: Vinod Koul --- drivers/dma/idxd/cdev.c | 18 ++++++++++++++++++ drivers/dma/idxd/idxd.h | 7 +++++++ drivers/dma/idxd/irq.c | 4 ++++ 3 files changed, 29 insertions(+) diff --git a/drivers/dma/idxd/cdev.c b/drivers/dma/idxd/cdev.c index 0a51c33198f6..5c8e964e671b 100644 --- a/drivers/dma/idxd/cdev.c +++ b/drivers/dma/idxd/cdev.c @@ -39,6 +39,7 @@ struct idxd_user_context { struct mm_struct *mm; unsigned int flags; struct iommu_sva *sva; + u64 counters[COUNTER_MAX]; }; static void idxd_cdev_dev_release(struct device *dev) @@ -84,6 +85,23 @@ static void idxd_xa_pasid_remove(struct idxd_user_context *ctx) mutex_unlock(&wq->uc_lock); } +void idxd_user_counter_increment(struct idxd_wq *wq, u32 pasid, int index) +{ + struct idxd_user_context *ctx; + + if (index >= COUNTER_MAX) + return; + + mutex_lock(&wq->uc_lock); + ctx = xa_load(&wq->upasid_xa, pasid); + if (!ctx) { + mutex_unlock(&wq->uc_lock); + return; + } + ctx->counters[index]++; + mutex_unlock(&wq->uc_lock); +} + static int idxd_cdev_open(struct inode *inode, struct file *filp) { struct idxd_user_context *ctx; diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h index 4c4baa80c731..9fb26d017285 100644 --- a/drivers/dma/idxd/idxd.h +++ b/drivers/dma/idxd/idxd.h @@ -127,6 +127,12 @@ struct idxd_pmu { #define IDXD_MAX_PRIORITY 0xf +enum { + COUNTER_FAULTS = 0, + COUNTER_FAULT_FAILS, + COUNTER_MAX +}; + enum idxd_wq_state { IDXD_WQ_DISABLED = 0, IDXD_WQ_ENABLED, @@ -713,6 +719,7 @@ int idxd_wq_add_cdev(struct idxd_wq *wq); void idxd_wq_del_cdev(struct idxd_wq *wq); int idxd_copy_cr(struct idxd_wq *wq, ioasid_t pasid, unsigned long addr, void *buf, int len); +void idxd_user_counter_increment(struct idxd_wq *wq, u32 pasid, int index); /* perfmon */ #if IS_ENABLED(CONFIG_INTEL_IDXD_PERFMON) diff --git a/drivers/dma/idxd/irq.c b/drivers/dma/idxd/irq.c index c660d63a3eb8..f4b0f59c95ba 100644 --- a/drivers/dma/idxd/irq.c +++ b/drivers/dma/idxd/irq.c @@ -240,6 +240,7 @@ static void idxd_evl_fault_work(struct work_struct *work) evl->batch_fail[entry_head->batch_id] = false; copy_size = cr_size; + idxd_user_counter_increment(wq, entry_head->pasid, COUNTER_FAULTS); break; case DSA_COMP_BATCH_EVL_ERR: bf = &evl->batch_fail[entry_head->batch_id]; @@ -251,6 +252,7 @@ static void idxd_evl_fault_work(struct work_struct *work) *result = 1; *bf = false; } + idxd_user_counter_increment(wq, entry_head->pasid, COUNTER_FAULTS); break; case DSA_COMP_DRAIN_EVL: copy_size = cr_size; @@ -282,6 +284,7 @@ static void idxd_evl_fault_work(struct work_struct *work) switch (fault->status) { case DSA_COMP_CRA_XLAT: if (copied != copy_size) { + idxd_user_counter_increment(wq, entry_head->pasid, COUNTER_FAULT_FAILS); dev_dbg_ratelimited(dev, "Failed to write to completion record: (%d:%d)\n", copy_size, copied); if (entry_head->batch) @@ -290,6 +293,7 @@ static void idxd_evl_fault_work(struct work_struct *work) break; case DSA_COMP_BATCH_EVL_ERR: if (copied != copy_size) { + idxd_user_counter_increment(wq, entry_head->pasid, COUNTER_FAULT_FAILS); dev_dbg_ratelimited(dev, "Failed to write to batch completion record: (%d:%d)\n", copy_size, copied); } From e6fd6d7e5f0fe4a17a08e892afb5db800e7794ec Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Fri, 7 Apr 2023 13:31:40 -0700 Subject: [PATCH 54/57] dmaengine: idxd: add a device to represent the file opened Embed a struct device for the user file context in order to export sysfs attributes related with the opened file. Tie the lifetime of the file context to the device. The sysfs entry will be added under the char device. Tested-by: Tony Zhu Signed-off-by: Dave Jiang Co-developed-by: Fenghua Yu Signed-off-by: Fenghua Yu Link: https://lore.kernel.org/r/20230407203143.2189681-14-fenghua.yu@intel.com Signed-off-by: Vinod Koul --- drivers/dma/idxd/cdev.c | 119 ++++++++++++++++++++++++++++++++-------- drivers/dma/idxd/idxd.h | 2 + 2 files changed, 97 insertions(+), 24 deletions(-) diff --git a/drivers/dma/idxd/cdev.c b/drivers/dma/idxd/cdev.c index 5c8e964e671b..e07411053e21 100644 --- a/drivers/dma/idxd/cdev.c +++ b/drivers/dma/idxd/cdev.c @@ -23,6 +23,13 @@ struct idxd_cdev_context { struct ida minor_ida; }; +/* + * Since user file names are global in DSA devices, define their ida's as + * global to avoid conflict file names. + */ +static DEFINE_IDA(file_ida); +static DEFINE_MUTEX(ida_lock); + /* * ictx is an array based off of accelerator types. enum idxd_type * is used as index @@ -39,7 +46,60 @@ struct idxd_user_context { struct mm_struct *mm; unsigned int flags; struct iommu_sva *sva; + struct idxd_dev idxd_dev; u64 counters[COUNTER_MAX]; + int id; +}; + +static void idxd_cdev_evl_drain_pasid(struct idxd_wq *wq, u32 pasid); +static void idxd_xa_pasid_remove(struct idxd_user_context *ctx); + +static inline struct idxd_user_context *dev_to_uctx(struct device *dev) +{ + struct idxd_dev *idxd_dev = confdev_to_idxd_dev(dev); + + return container_of(idxd_dev, struct idxd_user_context, idxd_dev); +} + +static void idxd_file_dev_release(struct device *dev) +{ + struct idxd_user_context *ctx = dev_to_uctx(dev); + struct idxd_wq *wq = ctx->wq; + struct idxd_device *idxd = wq->idxd; + int rc; + + mutex_lock(&ida_lock); + ida_free(&file_ida, ctx->id); + mutex_unlock(&ida_lock); + + /* Wait for in-flight operations to complete. */ + if (wq_shared(wq)) { + idxd_device_drain_pasid(idxd, ctx->pasid); + } else { + if (device_user_pasid_enabled(idxd)) { + /* The wq disable in the disable pasid function will drain the wq */ + rc = idxd_wq_disable_pasid(wq); + if (rc < 0) + dev_err(dev, "wq disable pasid failed.\n"); + } else { + idxd_wq_drain(wq); + } + } + + if (ctx->sva) { + idxd_cdev_evl_drain_pasid(wq, ctx->pasid); + iommu_sva_unbind_device(ctx->sva); + idxd_xa_pasid_remove(ctx); + } + kfree(ctx); + mutex_lock(&wq->wq_lock); + idxd_wq_put(wq); + mutex_unlock(&wq->wq_lock); +} + +static struct device_type idxd_cdev_file_type = { + .name = "idxd_file", + .release = idxd_file_dev_release, }; static void idxd_cdev_dev_release(struct device *dev) @@ -107,10 +167,11 @@ static int idxd_cdev_open(struct inode *inode, struct file *filp) struct idxd_user_context *ctx; struct idxd_device *idxd; struct idxd_wq *wq; - struct device *dev; + struct device *dev, *fdev; int rc = 0; struct iommu_sva *sva; unsigned int pasid; + struct idxd_cdev *idxd_cdev; wq = inode_wq(inode); idxd = wq->idxd; @@ -166,10 +227,41 @@ static int idxd_cdev_open(struct inode *inode, struct file *filp) } } + idxd_cdev = wq->idxd_cdev; + mutex_lock(&ida_lock); + ctx->id = ida_alloc(&file_ida, GFP_KERNEL); + mutex_unlock(&ida_lock); + if (ctx->id < 0) { + dev_warn(dev, "ida alloc failure\n"); + goto failed_ida; + } + ctx->idxd_dev.type = IDXD_DEV_CDEV_FILE; + fdev = user_ctx_dev(ctx); + device_initialize(fdev); + fdev->parent = cdev_dev(idxd_cdev); + fdev->bus = &dsa_bus_type; + fdev->type = &idxd_cdev_file_type; + + rc = dev_set_name(fdev, "file%d", ctx->id); + if (rc < 0) { + dev_warn(dev, "set name failure\n"); + goto failed_dev_name; + } + + rc = device_add(fdev); + if (rc < 0) { + dev_warn(dev, "file device add failure\n"); + goto failed_dev_add; + } + idxd_wq_get(wq); mutex_unlock(&wq->wq_lock); return 0; +failed_dev_add: +failed_dev_name: + put_device(fdev); +failed_ida: failed_set_pasid: if (device_user_pasid_enabled(idxd)) idxd_xa_pasid_remove(ctx); @@ -217,34 +309,12 @@ static int idxd_cdev_release(struct inode *node, struct file *filep) struct idxd_wq *wq = ctx->wq; struct idxd_device *idxd = wq->idxd; struct device *dev = &idxd->pdev->dev; - int rc; dev_dbg(dev, "%s called\n", __func__); filep->private_data = NULL; - /* Wait for in-flight operations to complete. */ - if (wq_shared(wq)) { - idxd_device_drain_pasid(idxd, ctx->pasid); - } else { - if (device_user_pasid_enabled(idxd)) { - /* The wq disable in the disable pasid function will drain the wq */ - rc = idxd_wq_disable_pasid(wq); - if (rc < 0) - dev_err(dev, "wq disable pasid failed.\n"); - } else { - idxd_wq_drain(wq); - } - } + device_unregister(user_ctx_dev(ctx)); - if (ctx->sva) { - idxd_cdev_evl_drain_pasid(wq, ctx->pasid); - iommu_sva_unbind_device(ctx->sva); - idxd_xa_pasid_remove(ctx); - } - kfree(ctx); - mutex_lock(&wq->wq_lock); - idxd_wq_put(wq); - mutex_unlock(&wq->wq_lock); return 0; } @@ -375,6 +445,7 @@ void idxd_wq_del_cdev(struct idxd_wq *wq) struct idxd_cdev *idxd_cdev; idxd_cdev = wq->idxd_cdev; + ida_destroy(&file_ida); wq->idxd_cdev = NULL; cdev_device_del(&idxd_cdev->cdev, cdev_dev(idxd_cdev)); put_device(cdev_dev(idxd_cdev)); diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h index 9fb26d017285..bd544eb2ddcb 100644 --- a/drivers/dma/idxd/idxd.h +++ b/drivers/dma/idxd/idxd.h @@ -32,6 +32,7 @@ enum idxd_dev_type { IDXD_DEV_GROUP, IDXD_DEV_ENGINE, IDXD_DEV_CDEV, + IDXD_DEV_CDEV_FILE, IDXD_DEV_MAX_TYPE, }; @@ -405,6 +406,7 @@ enum idxd_completion_status { #define engine_confdev(engine) &engine->idxd_dev.conf_dev #define group_confdev(group) &group->idxd_dev.conf_dev #define cdev_dev(cdev) &cdev->idxd_dev.conf_dev +#define user_ctx_dev(ctx) (&(ctx)->idxd_dev.conf_dev) #define confdev_to_idxd_dev(dev) container_of(dev, struct idxd_dev, conf_dev) #define idxd_dev_to_idxd(idxd_dev) container_of(idxd_dev, struct idxd_device, idxd_dev) From 244009b07e7d0728726f266cc3485d7fd400d0d5 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Fri, 7 Apr 2023 13:31:41 -0700 Subject: [PATCH 55/57] dmaengine: idxd: expose fault counters to sysfs Expose cr_faults and cr_fault_failures counters to the user space. This allows a user app to keep track of how many fault the application is causing with the completion record (CR) and also the number of failures of the CR writeback. Having a high number of cr_fault_failures is bad as the app is submitting descriptors with the CR addresses that are bad. User monitoring daemon may want to consider killing the application as it may be malicious and attempting to flood the device event log. Tested-by: Tony Zhu Signed-off-by: Dave Jiang Co-developed-by: Fenghua Yu Signed-off-by: Fenghua Yu Link: https://lore.kernel.org/r/20230407203143.2189681-15-fenghua.yu@intel.com Signed-off-by: Vinod Koul --- .../ABI/stable/sysfs-driver-dma-idxd | 17 +++++++ drivers/dma/idxd/cdev.c | 46 +++++++++++++++++++ 2 files changed, 63 insertions(+) diff --git a/Documentation/ABI/stable/sysfs-driver-dma-idxd b/Documentation/ABI/stable/sysfs-driver-dma-idxd index e01916611452..73ab86196a41 100644 --- a/Documentation/ABI/stable/sysfs-driver-dma-idxd +++ b/Documentation/ABI/stable/sysfs-driver-dma-idxd @@ -318,3 +318,20 @@ Description: Allows control of the number of batch descriptors that can be 1 (1/2 of max value), 2 (1/4 of the max value), and 3 (1/8 of the max value). It's visible only on platforms that support the capability. + +What: /sys/bus/dsa/devices/wq./dsa\!wq./file/cr_faults +Date: Sept 14, 2022 +KernelVersion: 6.4.0 +Contact: dmaengine@vger.kernel.org +Description: Show the number of Completion Record (CR) faults this application + has caused. + +What: /sys/bus/dsa/devices/wq./dsa\!wq./file/cr_fault_failures +Date: Sept 14, 2022 +KernelVersion: 6.4.0 +Contact: dmaengine@vger.kernel.org +Description: Show the number of Completion Record (CR) faults failures that this + application has caused. The failure counter is incremented when the + driver cannot fault in the address for the CR. Typically this is caused + by a bad address programmed in the submitted descriptor or a malicious + submitter is using bad CR address on purpose. diff --git a/drivers/dma/idxd/cdev.c b/drivers/dma/idxd/cdev.c index e07411053e21..2fb905f2545b 100644 --- a/drivers/dma/idxd/cdev.c +++ b/drivers/dma/idxd/cdev.c @@ -61,6 +61,51 @@ static inline struct idxd_user_context *dev_to_uctx(struct device *dev) return container_of(idxd_dev, struct idxd_user_context, idxd_dev); } +static ssize_t cr_faults_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct idxd_user_context *ctx = dev_to_uctx(dev); + + return sysfs_emit(buf, "%llu\n", ctx->counters[COUNTER_FAULTS]); +} +static DEVICE_ATTR_RO(cr_faults); + +static ssize_t cr_fault_failures_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct idxd_user_context *ctx = dev_to_uctx(dev); + + return sysfs_emit(buf, "%llu\n", ctx->counters[COUNTER_FAULT_FAILS]); +} +static DEVICE_ATTR_RO(cr_fault_failures); + +static struct attribute *cdev_file_attributes[] = { + &dev_attr_cr_faults.attr, + &dev_attr_cr_fault_failures.attr, + NULL +}; + +static umode_t cdev_file_attr_visible(struct kobject *kobj, struct attribute *a, int n) +{ + struct device *dev = container_of(kobj, typeof(*dev), kobj); + struct idxd_user_context *ctx = dev_to_uctx(dev); + struct idxd_wq *wq = ctx->wq; + + if (!wq_pasid_enabled(wq)) + return 0; + + return a->mode; +} + +static const struct attribute_group cdev_file_attribute_group = { + .attrs = cdev_file_attributes, + .is_visible = cdev_file_attr_visible, +}; + +static const struct attribute_group *cdev_file_attribute_groups[] = { + &cdev_file_attribute_group, + NULL +}; + static void idxd_file_dev_release(struct device *dev) { struct idxd_user_context *ctx = dev_to_uctx(dev); @@ -100,6 +145,7 @@ static void idxd_file_dev_release(struct device *dev) static struct device_type idxd_cdev_file_type = { .name = "idxd_file", .release = idxd_file_dev_release, + .groups = cdev_file_attribute_groups, }; static void idxd_cdev_dev_release(struct device *dev) From a62b8f87c770fa4109ce515e4d8a0d4701a4ca5f Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Fri, 7 Apr 2023 13:31:42 -0700 Subject: [PATCH 56/57] dmaengine: idxd: add pid to exported sysfs attribute for opened file Provide the pid of the application for the opened file. This allows the monitor daemon to easily correlate which app opened the file and easily kill the app by pid if that is desired action. Tested-by: Tony Zhu Signed-off-by: Dave Jiang Co-developed-by: Fenghua Yu Signed-off-by: Fenghua Yu Link: https://lore.kernel.org/r/20230407203143.2189681-16-fenghua.yu@intel.com Signed-off-by: Vinod Koul --- Documentation/ABI/stable/sysfs-driver-dma-idxd | 8 ++++++++ drivers/dma/idxd/cdev.c | 11 +++++++++++ 2 files changed, 19 insertions(+) diff --git a/Documentation/ABI/stable/sysfs-driver-dma-idxd b/Documentation/ABI/stable/sysfs-driver-dma-idxd index 73ab86196a41..5d0df57f5298 100644 --- a/Documentation/ABI/stable/sysfs-driver-dma-idxd +++ b/Documentation/ABI/stable/sysfs-driver-dma-idxd @@ -335,3 +335,11 @@ Description: Show the number of Completion Record (CR) faults failures that this driver cannot fault in the address for the CR. Typically this is caused by a bad address programmed in the submitted descriptor or a malicious submitter is using bad CR address on purpose. + +What: /sys/bus/dsa/devices/wq./dsa\!wq./file/pid +Date: Sept 14, 2022 +KernelVersion: 6.4.0 +Contact: dmaengine@vger.kernel.org +Description: Show the process id of the application that opened the file. This is + helpful information for a monitor daemon that wants to kill the + application that opened the file. diff --git a/drivers/dma/idxd/cdev.c b/drivers/dma/idxd/cdev.c index 2fb905f2545b..ecbf67c2ad2b 100644 --- a/drivers/dma/idxd/cdev.c +++ b/drivers/dma/idxd/cdev.c @@ -49,6 +49,7 @@ struct idxd_user_context { struct idxd_dev idxd_dev; u64 counters[COUNTER_MAX]; int id; + pid_t pid; }; static void idxd_cdev_evl_drain_pasid(struct idxd_wq *wq, u32 pasid); @@ -78,9 +79,18 @@ static ssize_t cr_fault_failures_show(struct device *dev, } static DEVICE_ATTR_RO(cr_fault_failures); +static ssize_t pid_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct idxd_user_context *ctx = dev_to_uctx(dev); + + return sysfs_emit(buf, "%u\n", ctx->pid); +} +static DEVICE_ATTR_RO(pid); + static struct attribute *cdev_file_attributes[] = { &dev_attr_cr_faults.attr, &dev_attr_cr_fault_failures.attr, + &dev_attr_pid.attr, NULL }; @@ -238,6 +248,7 @@ static int idxd_cdev_open(struct inode *inode, struct file *filp) ctx->wq = wq; filp->private_data = ctx; + ctx->pid = current->pid; if (device_user_pasid_enabled(idxd)) { sva = iommu_sva_bind_device(dev, current->mm); From f2dc327131b5cbb2cbb467cec23836f2e9d4cf46 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Fri, 7 Apr 2023 13:31:43 -0700 Subject: [PATCH 57/57] dmaengine: idxd: add per wq PRS disable Add sysfs knob for per wq Page Request Service disable. This knob disables PRS support for the specific wq. When this bit is set, it also overrides the wq's block on fault enabling. Tested-by: Tony Zhu Signed-off-by: Dave Jiang Co-developed-by: Fenghua Yu Signed-off-by: Fenghua Yu Link: https://lore.kernel.org/r/20230407203143.2189681-17-fenghua.yu@intel.com Signed-off-by: Vinod Koul --- .../ABI/stable/sysfs-driver-dma-idxd | 10 ++++ drivers/dma/idxd/device.c | 6 +- drivers/dma/idxd/idxd.h | 1 + drivers/dma/idxd/registers.h | 5 +- drivers/dma/idxd/sysfs.c | 57 ++++++++++++++++++- 5 files changed, 74 insertions(+), 5 deletions(-) diff --git a/Documentation/ABI/stable/sysfs-driver-dma-idxd b/Documentation/ABI/stable/sysfs-driver-dma-idxd index 5d0df57f5298..534b7a3d59fc 100644 --- a/Documentation/ABI/stable/sysfs-driver-dma-idxd +++ b/Documentation/ABI/stable/sysfs-driver-dma-idxd @@ -235,6 +235,16 @@ Contact: dmaengine@vger.kernel.org Description: Indicate whether ATS disable is turned on for the workqueue. 0 indicates ATS is on, and 1 indicates ATS is off for the workqueue. +What: /sys/bus/dsa/devices/wq./prs_disable +Date: Sept 14, 2022 +KernelVersion: 6.4.0 +Contact: dmaengine@vger.kernel.org +Description: Controls whether PRS disable is turned on for the workqueue. + 0 indicates PRS is on, and 1 indicates PRS is off for the + workqueue. This option overrides block_on_fault attribute + if set. It's visible only on platforms that support the + capability. + What: /sys/bus/dsa/devices/wq./occupancy Date May 25, 2021 KernelVersion: 5.14.0 diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c index fd97b2b58734..3c80b9681c72 100644 --- a/drivers/dma/idxd/device.c +++ b/drivers/dma/idxd/device.c @@ -967,12 +967,16 @@ static int idxd_wq_config_write(struct idxd_wq *wq) wq->wqcfg->priority = wq->priority; if (idxd->hw.gen_cap.block_on_fault && - test_bit(WQ_FLAG_BLOCK_ON_FAULT, &wq->flags)) + test_bit(WQ_FLAG_BLOCK_ON_FAULT, &wq->flags) && + !test_bit(WQ_FLAG_PRS_DISABLE, &wq->flags)) wq->wqcfg->bof = 1; if (idxd->hw.wq_cap.wq_ats_support) wq->wqcfg->wq_ats_disable = test_bit(WQ_FLAG_ATS_DISABLE, &wq->flags); + if (idxd->hw.wq_cap.wq_prs_support) + wq->wqcfg->wq_prs_disable = test_bit(WQ_FLAG_PRS_DISABLE, &wq->flags); + /* bytes 12-15 */ wq->wqcfg->max_xfer_shift = ilog2(wq->max_xfer_bytes); idxd_wqcfg_set_max_batch_shift(idxd->data->type, wq->wqcfg, ilog2(wq->max_batch_size)); diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h index bd544eb2ddcb..e44b1d45ccd5 100644 --- a/drivers/dma/idxd/idxd.h +++ b/drivers/dma/idxd/idxd.h @@ -143,6 +143,7 @@ enum idxd_wq_flag { WQ_FLAG_DEDICATED = 0, WQ_FLAG_BLOCK_ON_FAULT, WQ_FLAG_ATS_DISABLE, + WQ_FLAG_PRS_DISABLE, }; enum idxd_wq_type { diff --git a/drivers/dma/idxd/registers.h b/drivers/dma/idxd/registers.h index 9f3959d001b6..7b54a3939ea1 100644 --- a/drivers/dma/idxd/registers.h +++ b/drivers/dma/idxd/registers.h @@ -59,7 +59,8 @@ union wq_cap_reg { u64 occupancy:1; u64 occupancy_int:1; u64 op_config:1; - u64 rsvd3:9; + u64 wq_prs_support:1; + u64 rsvd4:8; }; u64 bits; } __packed; @@ -371,7 +372,7 @@ union wqcfg { u32 mode:1; /* shared or dedicated */ u32 bof:1; /* block on fault */ u32 wq_ats_disable:1; - u32 rsvd2:1; + u32 wq_prs_disable:1; u32 priority:4; u32 pasid:20; u32 pasid_en:1; diff --git a/drivers/dma/idxd/sysfs.c b/drivers/dma/idxd/sysfs.c index 465d2e7627e4..293739ac5596 100644 --- a/drivers/dma/idxd/sysfs.c +++ b/drivers/dma/idxd/sysfs.c @@ -822,10 +822,14 @@ static ssize_t wq_block_on_fault_store(struct device *dev, if (rc < 0) return rc; - if (bof) + if (bof) { + if (test_bit(WQ_FLAG_PRS_DISABLE, &wq->flags)) + return -EOPNOTSUPP; + set_bit(WQ_FLAG_BLOCK_ON_FAULT, &wq->flags); - else + } else { clear_bit(WQ_FLAG_BLOCK_ON_FAULT, &wq->flags); + } return count; } @@ -1109,6 +1113,44 @@ static ssize_t wq_ats_disable_store(struct device *dev, struct device_attribute static struct device_attribute dev_attr_wq_ats_disable = __ATTR(ats_disable, 0644, wq_ats_disable_show, wq_ats_disable_store); +static ssize_t wq_prs_disable_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct idxd_wq *wq = confdev_to_wq(dev); + + return sysfs_emit(buf, "%u\n", test_bit(WQ_FLAG_PRS_DISABLE, &wq->flags)); +} + +static ssize_t wq_prs_disable_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct idxd_wq *wq = confdev_to_wq(dev); + struct idxd_device *idxd = wq->idxd; + bool prs_dis; + int rc; + + if (wq->state != IDXD_WQ_DISABLED) + return -EPERM; + + if (!idxd->hw.wq_cap.wq_prs_support) + return -EOPNOTSUPP; + + rc = kstrtobool(buf, &prs_dis); + if (rc < 0) + return rc; + + if (prs_dis) { + set_bit(WQ_FLAG_PRS_DISABLE, &wq->flags); + /* when PRS is disabled, BOF needs to be off as well */ + clear_bit(WQ_FLAG_BLOCK_ON_FAULT, &wq->flags); + } else { + clear_bit(WQ_FLAG_PRS_DISABLE, &wq->flags); + } + return count; +} + +static struct device_attribute dev_attr_wq_prs_disable = + __ATTR(prs_disable, 0644, wq_prs_disable_show, wq_prs_disable_store); + static ssize_t wq_occupancy_show(struct device *dev, struct device_attribute *attr, char *buf) { struct idxd_wq *wq = confdev_to_wq(dev); @@ -1239,6 +1281,7 @@ static struct attribute *idxd_wq_attributes[] = { &dev_attr_wq_max_transfer_size.attr, &dev_attr_wq_max_batch_size.attr, &dev_attr_wq_ats_disable.attr, + &dev_attr_wq_prs_disable.attr, &dev_attr_wq_occupancy.attr, &dev_attr_wq_enqcmds_retries.attr, &dev_attr_wq_op_config.attr, @@ -1260,6 +1303,13 @@ static bool idxd_wq_attr_max_batch_size_invisible(struct attribute *attr, idxd->data->type == IDXD_TYPE_IAX; } +static bool idxd_wq_attr_wq_prs_disable_invisible(struct attribute *attr, + struct idxd_device *idxd) +{ + return attr == &dev_attr_wq_prs_disable.attr && + !idxd->hw.wq_cap.wq_prs_support; +} + static umode_t idxd_wq_attr_visible(struct kobject *kobj, struct attribute *attr, int n) { @@ -1273,6 +1323,9 @@ static umode_t idxd_wq_attr_visible(struct kobject *kobj, if (idxd_wq_attr_max_batch_size_invisible(attr, idxd)) return 0; + if (idxd_wq_attr_wq_prs_disable_invisible(attr, idxd)) + return 0; + return attr->mode; }