From 54dae6d5d3f091568322c4a69ddd3459a81545ad Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sun, 12 Nov 2023 19:45:22 +0100 Subject: [PATCH 01/16] dt-bindings: iommu: arm,smmu: document clocks for the SM8350 GPU SMMU Document the clocks for Qualcomm SM8350 Adreno GPU SMMU, already used in DTS: sm8350-hdk.dtb: iommu@3da0000: clock-names: False schema does not allow ['bus', 'iface', 'ahb', 'hlos1_vote_gpu_smmu', 'cx_gmu', 'hub_cx_int', 'hub_aon'] Signed-off-by: Krzysztof Kozlowski Acked-by: Conor Dooley Link: https://lore.kernel.org/r/20231112184522.3759-1-krzysztof.kozlowski@linaro.org Signed-off-by: Will Deacon --- .../devicetree/bindings/iommu/arm,smmu.yaml | 25 ++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml index aa9e1c0895a5..e89f8a5c44e4 100644 --- a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml +++ b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml @@ -429,6 +429,30 @@ allOf: - description: interface clock required to access smmu's registers through the TCU's programming interface. + - if: + properties: + compatible: + items: + - enum: + - qcom,sm8350-smmu-500 + - const: qcom,adreno-smmu + - const: qcom,smmu-500 + - const: arm,mmu-500 + then: + properties: + clock-names: + items: + - const: bus + - const: iface + - const: ahb + - const: hlos1_vote_gpu_smmu + - const: cx_gmu + - const: hub_cx_int + - const: hub_aon + clocks: + minItems: 7 + maxItems: 7 + - if: properties: compatible: @@ -472,7 +496,6 @@ allOf: - qcom,sdx65-smmu-500 - qcom,sm6350-smmu-500 - qcom,sm6375-smmu-500 - - qcom,sm8350-smmu-500 - qcom,sm8450-smmu-500 - qcom,sm8550-smmu-500 then: From 61683b47df44fa7efd4039676436e711f7e31c70 Mon Sep 17 00:00:00 2001 From: Neil Armstrong Date: Tue, 28 Nov 2023 09:41:23 +0100 Subject: [PATCH 02/16] dt-bindings: iommu: arm,smmu: document the SM8650 System MMU Document the System MMU on the SM8650 Platform. Acked-by: Rob Herring Signed-off-by: Neil Armstrong Link: https://lore.kernel.org/r/20231128-topic-sm8650-upstream-bindings-smmu-v2-1-d3fbcaf1ea92@linaro.org Signed-off-by: Will Deacon --- Documentation/devicetree/bindings/iommu/arm,smmu.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml index e89f8a5c44e4..b4b76437369a 100644 --- a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml +++ b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml @@ -56,6 +56,7 @@ properties: - qcom,sm8350-smmu-500 - qcom,sm8450-smmu-500 - qcom,sm8550-smmu-500 + - qcom,sm8650-smmu-500 - const: qcom,smmu-500 - const: arm,mmu-500 @@ -498,6 +499,7 @@ allOf: - qcom,sm6375-smmu-500 - qcom,sm8450-smmu-500 - qcom,sm8550-smmu-500 + - qcom,sm8650-smmu-500 then: properties: clock-names: false From fa27b35c9102e3e2b75455517f7091b294cc0552 Mon Sep 17 00:00:00 2001 From: Rajendra Nayak Date: Fri, 24 Nov 2023 15:36:05 +0530 Subject: [PATCH 03/16] dt-bindings: arm-smmu: Add compatible for X1E80100 SoC Add the SoC specific compatible for X1E80100 implementing arm,mmu-500. Signed-off-by: Rajendra Nayak Co-developed-by: Sibi Sankar Signed-off-by: Sibi Sankar Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20231124100608.29964-3-quic_sibis@quicinc.com Signed-off-by: Will Deacon --- Documentation/devicetree/bindings/iommu/arm,smmu.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml index b4b76437369a..a5adfba93924 100644 --- a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml +++ b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml @@ -57,6 +57,7 @@ properties: - qcom,sm8450-smmu-500 - qcom,sm8550-smmu-500 - qcom,sm8650-smmu-500 + - qcom,x1e80100-smmu-500 - const: qcom,smmu-500 - const: arm,mmu-500 @@ -500,6 +501,7 @@ allOf: - qcom,sm8450-smmu-500 - qcom,sm8550-smmu-500 - qcom,sm8650-smmu-500 + - qcom,x1e80100-smmu-500 then: properties: clock-names: false From 4fff78dc2490fc0c67669c610dbc42921dd23a1c Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Mon, 4 Dec 2023 13:55:20 +0100 Subject: [PATCH 04/16] dt-bindings: arm-smmu: Document SM8[45]50 GPU SMMU SM8450 and SM8550 both use a Qualcomm-modified MMU500 for their GPU. In both cases, it requires a set of clocks to be enabled. Describe that. Reviewed-by: Krzysztof Kozlowski Signed-off-by: Konrad Dybcio Link: https://lore.kernel.org/r/20231127-topic-a7xx_dt-v2-1-2a437588e563@linaro.org Signed-off-by: Will Deacon --- .../devicetree/bindings/iommu/arm,smmu.yaml | 48 ++++++++++++++++++- 1 file changed, 46 insertions(+), 2 deletions(-) diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml index a5adfba93924..a4042ae24770 100644 --- a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml +++ b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml @@ -91,6 +91,8 @@ properties: - qcom,sm8150-smmu-500 - qcom,sm8250-smmu-500 - qcom,sm8350-smmu-500 + - qcom,sm8450-smmu-500 + - qcom,sm8550-smmu-500 - const: qcom,adreno-smmu - const: qcom,smmu-500 - const: arm,mmu-500 @@ -479,6 +481,50 @@ allOf: - description: Voter clock required for HLOS SMMU access - description: Interface clock required for register access + - if: + properties: + compatible: + const: qcom,sm8450-smmu-500 + then: + properties: + clock-names: + items: + - const: gmu + - const: hub + - const: hlos + - const: bus + - const: iface + - const: ahb + + clocks: + items: + - description: GMU clock + - description: GPU HUB clock + - description: HLOS vote clock + - description: GPU memory bus clock + - description: GPU SNoC bus clock + - description: GPU AHB clock + + - if: + properties: + compatible: + const: qcom,sm8550-smmu-500 + then: + properties: + clock-names: + items: + - const: hlos + - const: bus + - const: iface + - const: ahb + + clocks: + items: + - description: HLOS vote clock + - description: GPU memory bus clock + - description: GPU SNoC bus clock + - description: GPU AHB clock + # Disallow clocks for all other platforms with specific compatibles - if: properties: @@ -498,8 +544,6 @@ allOf: - qcom,sdx65-smmu-500 - qcom,sm6350-smmu-500 - qcom,sm6375-smmu-500 - - qcom,sm8450-smmu-500 - - qcom,sm8550-smmu-500 - qcom,sm8650-smmu-500 - qcom,x1e80100-smmu-500 then: From afc95681c3068956fed1241a1ff1612c066c75ac Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Sun, 10 Dec 2023 10:06:53 -0800 Subject: [PATCH 05/16] iommu/arm-smmu-qcom: Add missing GMU entry to match table In some cases the firmware expects cbndx 1 to be assigned to the GMU, so we also want the default domain for the GMU to be an identy domain. This way it does not get a context bank assigned. Without this, both of_dma_configure() and drm/msm's iommu_domain_attach() will trigger allocating and configuring a context bank. So GMU ends up attached to both cbndx 1 and later cbndx 2. This arrangement seemingly confounds and surprises the firmware if the GPU later triggers a translation fault, resulting (on sc8280xp / lenovo x13s, at least) in the SMMU getting wedged and the GPU stuck without memory access. Cc: stable@vger.kernel.org Signed-off-by: Rob Clark Tested-by: Johan Hovold Reviewed-by: Robin Murphy Link: https://lore.kernel.org/r/20231210180655.75542-1-robdclark@gmail.com Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c index 549ae4dba3a6..d326fa230b96 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c @@ -243,6 +243,7 @@ static int qcom_adreno_smmu_init_context(struct arm_smmu_domain *smmu_domain, static const struct of_device_id qcom_smmu_client_of_match[] __maybe_unused = { { .compatible = "qcom,adreno" }, + { .compatible = "qcom,adreno-gmu" }, { .compatible = "qcom,mdp4" }, { .compatible = "qcom,mdss" }, { .compatible = "qcom,sc7180-mdss" }, From 28af105cb65043f20a16c5e2519d66390b1f9bb9 Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Wed, 29 Nov 2023 15:44:02 +0100 Subject: [PATCH 06/16] iommu/arm-smmu-qcom: Add QCM2290 MDSS compatible Add the QCM2290 MDSS compatible to clients compatible list, as it also needs the workarounds. Reviewed-by: Dmitry Baryshkov Signed-off-by: Konrad Dybcio Link: https://lore.kernel.org/r/20231125-topic-rb1_feat-v3-5-4cbb567743bb@linaro.org Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c index d326fa230b96..8b04ece00420 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c @@ -246,6 +246,7 @@ static const struct of_device_id qcom_smmu_client_of_match[] __maybe_unused = { { .compatible = "qcom,adreno-gmu" }, { .compatible = "qcom,mdp4" }, { .compatible = "qcom,mdss" }, + { .compatible = "qcom,qcm2290-mdss" }, { .compatible = "qcom,sc7180-mdss" }, { .compatible = "qcom,sc7180-mss-pil" }, { .compatible = "qcom,sc7280-mdss" }, From 268dd4edb748a1e2f298b04854681453df6e77a2 Mon Sep 17 00:00:00 2001 From: Vladimir Lypak Date: Wed, 11 Oct 2023 19:57:26 +0200 Subject: [PATCH 07/16] iommu/qcom: restore IOMMU state if needed If the IOMMU has a power domain then some state will be lost in qcom_iommu_suspend and TZ will reset device if we don't call qcom_scm_restore_sec_cfg before accessing it again. Signed-off-by: Vladimir Lypak [luca@z3ntu.xyz: reword commit message a bit] Signed-off-by: Luca Weiss Link: https://lore.kernel.org/r/20231011-msm8953-iommu-restore-v1-1-48a0c93809a2@z3ntu.xyz Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu/qcom_iommu.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/arm/arm-smmu/qcom_iommu.c b/drivers/iommu/arm/arm-smmu/qcom_iommu.c index 97b2122032b2..67abeb02cf53 100644 --- a/drivers/iommu/arm/arm-smmu/qcom_iommu.c +++ b/drivers/iommu/arm/arm-smmu/qcom_iommu.c @@ -900,8 +900,16 @@ static void qcom_iommu_device_remove(struct platform_device *pdev) static int __maybe_unused qcom_iommu_resume(struct device *dev) { struct qcom_iommu_dev *qcom_iommu = dev_get_drvdata(dev); + int ret; - return clk_bulk_prepare_enable(CLK_NUM, qcom_iommu->clks); + ret = clk_bulk_prepare_enable(CLK_NUM, qcom_iommu->clks); + if (ret < 0) + return ret; + + if (dev->pm_domain) + return qcom_scm_restore_sec_cfg(qcom_iommu->sec_id, 0); + + return ret; } static int __maybe_unused qcom_iommu_suspend(struct device *dev) From b41932f544586e606a7493cbc6dfb3c97c6b44d7 Mon Sep 17 00:00:00 2001 From: Wenkai Lin Date: Wed, 6 Dec 2023 08:57:27 +0800 Subject: [PATCH 08/16] iommu/arm-smmu-v3: disable stall for quiet_cd In the stall model, invalid transactions were expected to be stalled and aborted by the IOPF handler. However, when killing a test case with a huge amount of data, the accelerator streamline can not stop until all data is consumed even if the page fault handler reports errors. As a result, the kill may take a long time, about 10 seconds with numerous iopf interrupts. So disable stall for quiet_cd in the non-force stall model, since force stall model (STALL_MODEL==0b10) requires CD.S must be 1. Signed-off-by: Zhangfei Gao Signed-off-by: Wenkai Lin Suggested-by: Jean-Philippe Brucker Reviewed-by: Jason Gunthorpe Reviewed-by: Jean-Philippe Brucker Link: https://lore.kernel.org/r/20231206005727.46150-1-zhangfei.gao@linaro.org Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 7445454c2af2..7086e5fa41ff 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -1063,6 +1063,7 @@ int arm_smmu_write_ctx_desc(struct arm_smmu_master *master, int ssid, bool cd_live; __le64 *cdptr; struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table; + struct arm_smmu_device *smmu = master->smmu; if (WARN_ON(ssid >= (1 << cd_table->s1cdmax))) return -E2BIG; @@ -1077,6 +1078,8 @@ int arm_smmu_write_ctx_desc(struct arm_smmu_master *master, int ssid, if (!cd) { /* (5) */ val = 0; } else if (cd == &quiet_cd) { /* (4) */ + if (!(smmu->features & ARM_SMMU_FEAT_STALL_FORCE)) + val &= ~(CTXDESC_CD_0_S | CTXDESC_CD_0_R); val |= CTXDESC_CD_0_TCR_EPD0; } else if (cd_live) { /* (3) */ val &= ~CTXDESC_CD_0_ASID; From 57b89048874c9edd4a17f34c126b4a4188b7b444 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 5 Dec 2023 15:14:33 -0400 Subject: [PATCH 09/16] iommu/arm-smmu-v3: Add a type for the STE Instead of passing a naked __le16 * around to represent a STE wrap it in a "struct arm_smmu_ste" with an array of the correct size. This makes it much clearer which functions will comprise the "STE API". Reviewed-by: Moritz Fischer Reviewed-by: Michael Shavit Reviewed-by: Eric Auger Reviewed-by: Nicolin Chen Tested-by: Shameer Kolothum Tested-by: Nicolin Chen Signed-off-by: Jason Gunthorpe Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 59 ++++++++++----------- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 7 ++- 2 files changed, 35 insertions(+), 31 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 7086e5fa41ff..ad1e055e420d 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -1252,7 +1252,7 @@ static void arm_smmu_sync_ste_for_sid(struct arm_smmu_device *smmu, u32 sid) } static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid, - __le64 *dst) + struct arm_smmu_ste *dst) { /* * This is hideously complicated, but we only really care about @@ -1270,7 +1270,7 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid, * 2. Write everything apart from dword 0, sync, write dword 0, sync * 3. Update Config, sync */ - u64 val = le64_to_cpu(dst[0]); + u64 val = le64_to_cpu(dst->data[0]); bool ste_live = false; struct arm_smmu_device *smmu = NULL; struct arm_smmu_ctx_desc_cfg *cd_table = NULL; @@ -1328,10 +1328,10 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid, else val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_BYPASS); - dst[0] = cpu_to_le64(val); - dst[1] = cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG, + dst->data[0] = cpu_to_le64(val); + dst->data[1] = cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG, STRTAB_STE_1_SHCFG_INCOMING)); - dst[2] = 0; /* Nuke the VMID */ + dst->data[2] = 0; /* Nuke the VMID */ /* * The SMMU can perform negative caching, so we must sync * the STE regardless of whether the old value was live. @@ -1346,7 +1346,7 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid, STRTAB_STE_1_STRW_EL2 : STRTAB_STE_1_STRW_NSEL1; BUG_ON(ste_live); - dst[1] = cpu_to_le64( + dst->data[1] = cpu_to_le64( FIELD_PREP(STRTAB_STE_1_S1DSS, STRTAB_STE_1_S1DSS_SSID0) | FIELD_PREP(STRTAB_STE_1_S1CIR, STRTAB_STE_1_S1C_CACHE_WBRA) | FIELD_PREP(STRTAB_STE_1_S1COR, STRTAB_STE_1_S1C_CACHE_WBRA) | @@ -1355,7 +1355,7 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid, if (smmu->features & ARM_SMMU_FEAT_STALLS && !master->stall_enabled) - dst[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD); + dst->data[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD); val |= (cd_table->cdtab_dma & STRTAB_STE_0_S1CTXPTR_MASK) | FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS) | @@ -1365,7 +1365,7 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid, if (s2_cfg) { BUG_ON(ste_live); - dst[2] = cpu_to_le64( + dst->data[2] = cpu_to_le64( FIELD_PREP(STRTAB_STE_2_S2VMID, s2_cfg->vmid) | FIELD_PREP(STRTAB_STE_2_VTCR, s2_cfg->vtcr) | #ifdef __BIG_ENDIAN @@ -1374,18 +1374,18 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid, STRTAB_STE_2_S2PTW | STRTAB_STE_2_S2AA64 | STRTAB_STE_2_S2R); - dst[3] = cpu_to_le64(s2_cfg->vttbr & STRTAB_STE_3_S2TTB_MASK); + dst->data[3] = cpu_to_le64(s2_cfg->vttbr & STRTAB_STE_3_S2TTB_MASK); val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S2_TRANS); } if (master->ats_enabled) - dst[1] |= cpu_to_le64(FIELD_PREP(STRTAB_STE_1_EATS, + dst->data[1] |= cpu_to_le64(FIELD_PREP(STRTAB_STE_1_EATS, STRTAB_STE_1_EATS_TRANS)); arm_smmu_sync_ste_for_sid(smmu, sid); /* See comment in arm_smmu_write_ctx_desc() */ - WRITE_ONCE(dst[0], cpu_to_le64(val)); + WRITE_ONCE(dst->data[0], cpu_to_le64(val)); arm_smmu_sync_ste_for_sid(smmu, sid); /* It's likely that we'll want to use the new STE soon */ @@ -1393,7 +1393,8 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid, arm_smmu_cmdq_issue_cmd(smmu, &prefetch_cmd); } -static void arm_smmu_init_bypass_stes(__le64 *strtab, unsigned int nent, bool force) +static void arm_smmu_init_bypass_stes(struct arm_smmu_ste *strtab, + unsigned int nent, bool force) { unsigned int i; u64 val = STRTAB_STE_0_V; @@ -1404,11 +1405,11 @@ static void arm_smmu_init_bypass_stes(__le64 *strtab, unsigned int nent, bool fo val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_BYPASS); for (i = 0; i < nent; ++i) { - strtab[0] = cpu_to_le64(val); - strtab[1] = cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG, - STRTAB_STE_1_SHCFG_INCOMING)); - strtab[2] = 0; - strtab += STRTAB_STE_DWORDS; + strtab->data[0] = cpu_to_le64(val); + strtab->data[1] = cpu_to_le64(FIELD_PREP( + STRTAB_STE_1_SHCFG, STRTAB_STE_1_SHCFG_INCOMING)); + strtab->data[2] = 0; + strtab++; } } @@ -2212,26 +2213,23 @@ static int arm_smmu_domain_finalise(struct iommu_domain *domain) return 0; } -static __le64 *arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid) +static struct arm_smmu_ste * +arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid) { - __le64 *step; struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg; if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) { - struct arm_smmu_strtab_l1_desc *l1_desc; - int idx; + unsigned int idx1, idx2; /* Two-level walk */ - idx = (sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS; - l1_desc = &cfg->l1_desc[idx]; - idx = (sid & ((1 << STRTAB_SPLIT) - 1)) * STRTAB_STE_DWORDS; - step = &l1_desc->l2ptr[idx]; + idx1 = (sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS; + idx2 = sid & ((1 << STRTAB_SPLIT) - 1); + return &cfg->l1_desc[idx1].l2ptr[idx2]; } else { /* Simple linear lookup */ - step = &cfg->strtab[sid * STRTAB_STE_DWORDS]; + return (struct arm_smmu_ste *)&cfg + ->strtab[sid * STRTAB_STE_DWORDS]; } - - return step; } static void arm_smmu_install_ste_for_dev(struct arm_smmu_master *master) @@ -2241,7 +2239,8 @@ static void arm_smmu_install_ste_for_dev(struct arm_smmu_master *master) for (i = 0; i < master->num_streams; ++i) { u32 sid = master->streams[i].id; - __le64 *step = arm_smmu_get_step_for_sid(smmu, sid); + struct arm_smmu_ste *step = + arm_smmu_get_step_for_sid(smmu, sid); /* Bridged PCI devices may end up with duplicated IDs */ for (j = 0; j < i; j++) @@ -3772,7 +3771,7 @@ static void arm_smmu_rmr_install_bypass_ste(struct arm_smmu_device *smmu) iort_get_rmr_sids(dev_fwnode(smmu->dev), &rmr_list); list_for_each_entry(e, &rmr_list, list) { - __le64 *step; + struct arm_smmu_ste *step; struct iommu_iort_rmr_data *rmr; int ret, i; diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h index 961205ba86d2..03f9e526cbd9 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h @@ -206,6 +206,11 @@ #define STRTAB_L1_DESC_L2PTR_MASK GENMASK_ULL(51, 6) #define STRTAB_STE_DWORDS 8 + +struct arm_smmu_ste { + __le64 data[STRTAB_STE_DWORDS]; +}; + #define STRTAB_STE_0_V (1UL << 0) #define STRTAB_STE_0_CFG GENMASK_ULL(3, 1) #define STRTAB_STE_0_CFG_ABORT 0 @@ -571,7 +576,7 @@ struct arm_smmu_priq { struct arm_smmu_strtab_l1_desc { u8 span; - __le64 *l2ptr; + struct arm_smmu_ste *l2ptr; dma_addr_t l2ptr_dma; }; From 12a48fe90d0919e4e594815122403f793a090803 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 5 Dec 2023 15:14:34 -0400 Subject: [PATCH 10/16] iommu/arm-smmu-v3: Master cannot be NULL in arm_smmu_write_strtab_ent() The only caller is arm_smmu_install_ste_for_dev() which never has a NULL master. Remove the confusing if. Reviewed-by: Moritz Fischer Reviewed-by: Michael Shavit Reviewed-by: Eric Auger Reviewed-by: Nicolin Chen Tested-by: Shameer Kolothum Tested-by: Nicolin Chen Signed-off-by: Jason Gunthorpe Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index ad1e055e420d..46b2555b7c5a 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -1272,10 +1272,10 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid, */ u64 val = le64_to_cpu(dst->data[0]); bool ste_live = false; - struct arm_smmu_device *smmu = NULL; + struct arm_smmu_device *smmu = master->smmu; struct arm_smmu_ctx_desc_cfg *cd_table = NULL; struct arm_smmu_s2_cfg *s2_cfg = NULL; - struct arm_smmu_domain *smmu_domain = NULL; + struct arm_smmu_domain *smmu_domain = master->domain; struct arm_smmu_cmdq_ent prefetch_cmd = { .opcode = CMDQ_OP_PREFETCH_CFG, .prefetch = { @@ -1283,11 +1283,6 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid, }, }; - if (master) { - smmu_domain = master->domain; - smmu = master->smmu; - } - if (smmu_domain) { switch (smmu_domain->stage) { case ARM_SMMU_DOMAIN_S1: From 9fde008337d3be8afa7b3b42c1787aac4b6853c9 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 5 Dec 2023 15:14:35 -0400 Subject: [PATCH 11/16] iommu/arm-smmu-v3: Remove ARM_SMMU_DOMAIN_NESTED Currently this is exactly the same as ARM_SMMU_DOMAIN_S2, so just remove it. The ongoing work to add nesting support through iommufd will do something a little different. Reviewed-by: Moritz Fischer Reviewed-by: Eric Auger Reviewed-by: Nicolin Chen Tested-by: Shameer Kolothum Tested-by: Nicolin Chen Signed-off-by: Jason Gunthorpe Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 4 +--- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 1 - 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 46b2555b7c5a..5387c90b3293 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -1289,7 +1289,6 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid, cd_table = &master->cd_table; break; case ARM_SMMU_DOMAIN_S2: - case ARM_SMMU_DOMAIN_NESTED: s2_cfg = &smmu_domain->s2_cfg; break; default: @@ -2170,7 +2169,6 @@ static int arm_smmu_domain_finalise(struct iommu_domain *domain) fmt = ARM_64_LPAE_S1; finalise_stage_fn = arm_smmu_domain_finalise_s1; break; - case ARM_SMMU_DOMAIN_NESTED: case ARM_SMMU_DOMAIN_S2: ias = smmu->ias; oas = smmu->oas; @@ -2739,7 +2737,7 @@ static int arm_smmu_enable_nesting(struct iommu_domain *domain) if (smmu_domain->smmu) ret = -EPERM; else - smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED; + smmu_domain->stage = ARM_SMMU_DOMAIN_S2; mutex_unlock(&smmu_domain->init_mutex); return ret; diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h index 03f9e526cbd9..27ddf1acd12c 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h @@ -715,7 +715,6 @@ struct arm_smmu_master { enum arm_smmu_domain_stage { ARM_SMMU_DOMAIN_S1 = 0, ARM_SMMU_DOMAIN_S2, - ARM_SMMU_DOMAIN_NESTED, ARM_SMMU_DOMAIN_BYPASS, }; From ff0f802974136c7f4c576da227565dc27cc1c69d Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 17 Oct 2023 15:11:40 -0300 Subject: [PATCH 12/16] iommu/arm-smmu: Reorganize arm_smmu_domain_add_master() Make arm_smmu_domain_add_master() not use the smmu_domain to detect the s2cr configuration, instead pass it in as a parameter. It always returns zero so make it return void. Since it no longer really does anything to do with a domain call it arm_smmu_master_install_s2crs(). This is done to make the next two patches able to re-use this code without forcing the creation of a struct arm_smmu_domain. Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/1-v2-c86cc8c2230e+160bb-smmu_newapi_jgg@nvidia.com Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu/arm-smmu.c | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c b/drivers/iommu/arm/arm-smmu/arm-smmu.c index d6d1a2a55cc0..e2ec1fe14ed4 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c @@ -1081,21 +1081,14 @@ static void arm_smmu_master_free_smes(struct arm_smmu_master_cfg *cfg, mutex_unlock(&smmu->stream_map_mutex); } -static int arm_smmu_domain_add_master(struct arm_smmu_domain *smmu_domain, - struct arm_smmu_master_cfg *cfg, - struct iommu_fwspec *fwspec) +static void arm_smmu_master_install_s2crs(struct arm_smmu_master_cfg *cfg, + enum arm_smmu_s2cr_type type, + u8 cbndx, struct iommu_fwspec *fwspec) { - struct arm_smmu_device *smmu = smmu_domain->smmu; + struct arm_smmu_device *smmu = cfg->smmu; struct arm_smmu_s2cr *s2cr = smmu->s2crs; - u8 cbndx = smmu_domain->cfg.cbndx; - enum arm_smmu_s2cr_type type; int i, idx; - if (smmu_domain->stage == ARM_SMMU_DOMAIN_BYPASS) - type = S2CR_TYPE_BYPASS; - else - type = S2CR_TYPE_TRANS; - for_each_cfg_sme(cfg, fwspec, i, idx) { if (type == s2cr[idx].type && cbndx == s2cr[idx].cbndx) continue; @@ -1105,7 +1098,6 @@ static int arm_smmu_domain_add_master(struct arm_smmu_domain *smmu_domain, s2cr[idx].cbndx = cbndx; arm_smmu_write_s2cr(smmu, idx); } - return 0; } static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) @@ -1153,7 +1145,12 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) } /* Looks ok, so add the device to the domain */ - ret = arm_smmu_domain_add_master(smmu_domain, cfg, fwspec); + arm_smmu_master_install_s2crs(cfg, + smmu_domain->stage == + ARM_SMMU_DOMAIN_BYPASS ? + S2CR_TYPE_BYPASS : + S2CR_TYPE_TRANS, + smmu_domain->cfg.cbndx, fwspec); /* * Setup an autosuspend delay to avoid bouncing runpm state. From 22bb7b41476a1b8dc282dc5b3ca5470090b52e46 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 17 Oct 2023 15:11:41 -0300 Subject: [PATCH 13/16] iommu/arm-smmu: Convert to a global static identity domain Create a global static identity domain with it's own arm_smmu_attach_dev_identity() that simply calls arm_smmu_master_install_s2crs() with the identity parameters. This is done by giving the attach path for identity its own unique implementation that simply calls arm_smmu_master_install_s2crs(). Remove ARM_SMMU_DOMAIN_BYPASS and all checks of IOMMU_DOMAIN_IDENTITY. Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/2-v2-c86cc8c2230e+160bb-smmu_newapi_jgg@nvidia.com [will: Move duplicated autosuspend logic into a helper function] Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu/arm-smmu.c | 81 ++++++++++++++++++--------- drivers/iommu/arm/arm-smmu/arm-smmu.h | 1 - 2 files changed, 53 insertions(+), 29 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c b/drivers/iommu/arm/arm-smmu/arm-smmu.c index e2ec1fe14ed4..dec912c27141 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c @@ -82,6 +82,23 @@ static inline void arm_smmu_rpm_put(struct arm_smmu_device *smmu) pm_runtime_put_autosuspend(smmu->dev); } +static void arm_smmu_rpm_use_autosuspend(struct arm_smmu_device *smmu) +{ + /* + * Setup an autosuspend delay to avoid bouncing runpm state. + * Otherwise, if a driver for a suspended consumer device + * unmaps buffers, it will runpm resume/suspend for each one. + * + * For example, when used by a GPU device, when an application + * or game exits, it can trigger unmapping 100s or 1000s of + * buffers. With a runpm cycle for each buffer, that adds up + * to 5-10sec worth of reprogramming the context bank, while + * the system appears to be locked up to the user. + */ + pm_runtime_set_autosuspend_delay(smmu->dev, 20); + pm_runtime_use_autosuspend(smmu->dev); +} + static struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom) { return container_of(dom, struct arm_smmu_domain, domain); @@ -624,12 +641,6 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain, if (smmu_domain->smmu) goto out_unlock; - if (domain->type == IOMMU_DOMAIN_IDENTITY) { - smmu_domain->stage = ARM_SMMU_DOMAIN_BYPASS; - smmu_domain->smmu = smmu; - goto out_unlock; - } - /* * Mapping the requested stage onto what we support is surprisingly * complicated, mainly because the spec allows S1+S2 SMMUs without @@ -825,7 +836,7 @@ static void arm_smmu_destroy_domain_context(struct iommu_domain *domain) struct arm_smmu_cfg *cfg = &smmu_domain->cfg; int ret, irq; - if (!smmu || domain->type == IOMMU_DOMAIN_IDENTITY) + if (!smmu) return; ret = arm_smmu_rpm_get(smmu); @@ -854,7 +865,7 @@ static struct iommu_domain *arm_smmu_domain_alloc(unsigned type) { struct arm_smmu_domain *smmu_domain; - if (type != IOMMU_DOMAIN_UNMANAGED && type != IOMMU_DOMAIN_IDENTITY) { + if (type != IOMMU_DOMAIN_UNMANAGED) { if (using_legacy_binding || type != IOMMU_DOMAIN_DMA) return NULL; } @@ -1145,32 +1156,45 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) } /* Looks ok, so add the device to the domain */ - arm_smmu_master_install_s2crs(cfg, - smmu_domain->stage == - ARM_SMMU_DOMAIN_BYPASS ? - S2CR_TYPE_BYPASS : - S2CR_TYPE_TRANS, + arm_smmu_master_install_s2crs(cfg, S2CR_TYPE_TRANS, smmu_domain->cfg.cbndx, fwspec); - - /* - * Setup an autosuspend delay to avoid bouncing runpm state. - * Otherwise, if a driver for a suspended consumer device - * unmaps buffers, it will runpm resume/suspend for each one. - * - * For example, when used by a GPU device, when an application - * or game exits, it can trigger unmapping 100s or 1000s of - * buffers. With a runpm cycle for each buffer, that adds up - * to 5-10sec worth of reprogramming the context bank, while - * the system appears to be locked up to the user. - */ - pm_runtime_set_autosuspend_delay(smmu->dev, 20); - pm_runtime_use_autosuspend(smmu->dev); - + arm_smmu_rpm_use_autosuspend(smmu); rpm_put: arm_smmu_rpm_put(smmu); return ret; } +static int arm_smmu_attach_dev_identity(struct iommu_domain *domain, + struct device *dev) +{ + struct arm_smmu_master_cfg *cfg = dev_iommu_priv_get(dev); + struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); + struct arm_smmu_device *smmu; + int ret; + + if (!cfg) + return -ENODEV; + smmu = cfg->smmu; + + ret = arm_smmu_rpm_get(smmu); + if (ret < 0) + return ret; + + arm_smmu_master_install_s2crs(cfg, S2CR_TYPE_BYPASS, 0, fwspec); + arm_smmu_rpm_use_autosuspend(smmu); + arm_smmu_rpm_put(smmu); + return 0; +} + +static const struct iommu_domain_ops arm_smmu_identity_ops = { + .attach_dev = arm_smmu_attach_dev_identity, +}; + +static struct iommu_domain arm_smmu_identity_domain = { + .type = IOMMU_DOMAIN_IDENTITY, + .ops = &arm_smmu_identity_ops, +}; + static int arm_smmu_map_pages(struct iommu_domain *domain, unsigned long iova, phys_addr_t paddr, size_t pgsize, size_t pgcount, int prot, gfp_t gfp, size_t *mapped) @@ -1557,6 +1581,7 @@ static int arm_smmu_def_domain_type(struct device *dev) } static struct iommu_ops arm_smmu_ops = { + .identity_domain = &arm_smmu_identity_domain, .capable = arm_smmu_capable, .domain_alloc = arm_smmu_domain_alloc, .probe_device = arm_smmu_probe_device, diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.h b/drivers/iommu/arm/arm-smmu/arm-smmu.h index 703fd5817ec1..836ed6799a80 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu.h +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.h @@ -361,7 +361,6 @@ enum arm_smmu_domain_stage { ARM_SMMU_DOMAIN_S1 = 0, ARM_SMMU_DOMAIN_S2, ARM_SMMU_DOMAIN_NESTED, - ARM_SMMU_DOMAIN_BYPASS, }; struct arm_smmu_domain { From bbbf11eea38c0cb167edc5ce5fe76324e87ad074 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 17 Oct 2023 15:11:42 -0300 Subject: [PATCH 14/16] iommu/arm-smmu: Implement IOMMU_DOMAIN_BLOCKED Using the same design as IDENTITY setup a S2CR_TYPE_FAULT s2cr for the device. Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/3-v2-c86cc8c2230e+160bb-smmu_newapi_jgg@nvidia.com Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu/arm-smmu.c | 28 ++++++++++++++++++++++++--- 1 file changed, 25 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c b/drivers/iommu/arm/arm-smmu/arm-smmu.c index dec912c27141..c76b68a296bc 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c @@ -1164,8 +1164,8 @@ rpm_put: return ret; } -static int arm_smmu_attach_dev_identity(struct iommu_domain *domain, - struct device *dev) +static int arm_smmu_attach_dev_type(struct device *dev, + enum arm_smmu_s2cr_type type) { struct arm_smmu_master_cfg *cfg = dev_iommu_priv_get(dev); struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); @@ -1180,12 +1180,18 @@ static int arm_smmu_attach_dev_identity(struct iommu_domain *domain, if (ret < 0) return ret; - arm_smmu_master_install_s2crs(cfg, S2CR_TYPE_BYPASS, 0, fwspec); + arm_smmu_master_install_s2crs(cfg, type, 0, fwspec); arm_smmu_rpm_use_autosuspend(smmu); arm_smmu_rpm_put(smmu); return 0; } +static int arm_smmu_attach_dev_identity(struct iommu_domain *domain, + struct device *dev) +{ + return arm_smmu_attach_dev_type(dev, S2CR_TYPE_BYPASS); +} + static const struct iommu_domain_ops arm_smmu_identity_ops = { .attach_dev = arm_smmu_attach_dev_identity, }; @@ -1195,6 +1201,21 @@ static struct iommu_domain arm_smmu_identity_domain = { .ops = &arm_smmu_identity_ops, }; +static int arm_smmu_attach_dev_blocked(struct iommu_domain *domain, + struct device *dev) +{ + return arm_smmu_attach_dev_type(dev, S2CR_TYPE_FAULT); +} + +static const struct iommu_domain_ops arm_smmu_blocked_ops = { + .attach_dev = arm_smmu_attach_dev_blocked, +}; + +static struct iommu_domain arm_smmu_blocked_domain = { + .type = IOMMU_DOMAIN_BLOCKED, + .ops = &arm_smmu_blocked_ops, +}; + static int arm_smmu_map_pages(struct iommu_domain *domain, unsigned long iova, phys_addr_t paddr, size_t pgsize, size_t pgcount, int prot, gfp_t gfp, size_t *mapped) @@ -1582,6 +1603,7 @@ static int arm_smmu_def_domain_type(struct device *dev) static struct iommu_ops arm_smmu_ops = { .identity_domain = &arm_smmu_identity_domain, + .blocked_domain = &arm_smmu_blocked_domain, .capable = arm_smmu_capable, .domain_alloc = arm_smmu_domain_alloc, .probe_device = arm_smmu_probe_device, From e0976331ad114af8e379e18483c346c6c79ca858 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 17 Oct 2023 15:11:43 -0300 Subject: [PATCH 15/16] iommu/arm-smmu: Pass arm_smmu_domain to internal functions Keep the types consistent, all the callers of these functions already have obtained a struct arm_smmu_domain, don't needlessly go to/from an iommu_domain through the internal call chains. Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/4-v2-c86cc8c2230e+160bb-smmu_newapi_jgg@nvidia.com Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu/arm-smmu.c | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c b/drivers/iommu/arm/arm-smmu/arm-smmu.c index c76b68a296bc..664d53dfb3bd 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c @@ -409,8 +409,7 @@ static irqreturn_t arm_smmu_context_fault(int irq, void *dev) { u32 fsr, fsynr, cbfrsynra; unsigned long iova; - struct iommu_domain *domain = dev; - struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); + struct arm_smmu_domain *smmu_domain = dev; struct arm_smmu_device *smmu = smmu_domain->smmu; int idx = smmu_domain->cfg.cbndx; int ret; @@ -423,7 +422,7 @@ static irqreturn_t arm_smmu_context_fault(int irq, void *dev) iova = arm_smmu_cb_readq(smmu, idx, ARM_SMMU_CB_FAR); cbfrsynra = arm_smmu_gr1_read(smmu, ARM_SMMU_GR1_CBFRSYNRA(idx)); - ret = report_iommu_fault(domain, NULL, iova, + ret = report_iommu_fault(&smmu_domain->domain, NULL, iova, fsynr & ARM_SMMU_FSYNR0_WNR ? IOMMU_FAULT_WRITE : IOMMU_FAULT_READ); if (ret == -ENOSYS) @@ -624,7 +623,7 @@ static int arm_smmu_alloc_context_bank(struct arm_smmu_domain *smmu_domain, return __arm_smmu_alloc_bitmap(smmu->context_map, start, smmu->num_context_banks); } -static int arm_smmu_init_domain_context(struct iommu_domain *domain, +static int arm_smmu_init_domain_context(struct arm_smmu_domain *smmu_domain, struct arm_smmu_device *smmu, struct device *dev) { @@ -633,7 +632,7 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain, struct io_pgtable_ops *pgtbl_ops; struct io_pgtable_cfg pgtbl_cfg; enum io_pgtable_fmt fmt; - struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); + struct iommu_domain *domain = &smmu_domain->domain; struct arm_smmu_cfg *cfg = &smmu_domain->cfg; irqreturn_t (*context_fault)(int irq, void *dev); @@ -807,8 +806,8 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain, else context_fault = arm_smmu_context_fault; - ret = devm_request_irq(smmu->dev, irq, context_fault, - IRQF_SHARED, "arm-smmu-context-fault", domain); + ret = devm_request_irq(smmu->dev, irq, context_fault, IRQF_SHARED, + "arm-smmu-context-fault", smmu_domain); if (ret < 0) { dev_err(smmu->dev, "failed to request context IRQ %d (%u)\n", cfg->irptndx, irq); @@ -829,9 +828,8 @@ out_unlock: return ret; } -static void arm_smmu_destroy_domain_context(struct iommu_domain *domain) +static void arm_smmu_destroy_domain_context(struct arm_smmu_domain *smmu_domain) { - struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); struct arm_smmu_device *smmu = smmu_domain->smmu; struct arm_smmu_cfg *cfg = &smmu_domain->cfg; int ret, irq; @@ -852,7 +850,7 @@ static void arm_smmu_destroy_domain_context(struct iommu_domain *domain) if (cfg->irptndx != ARM_SMMU_INVALID_IRPTNDX) { irq = smmu->irqs[cfg->irptndx]; - devm_free_irq(smmu->dev, irq, domain); + devm_free_irq(smmu->dev, irq, smmu_domain); } free_io_pgtable_ops(smmu_domain->pgtbl_ops); @@ -892,7 +890,7 @@ static void arm_smmu_domain_free(struct iommu_domain *domain) * Free the domain resources. We assume that all devices have * already been detached. */ - arm_smmu_destroy_domain_context(domain); + arm_smmu_destroy_domain_context(smmu_domain); kfree(smmu_domain); } @@ -1142,7 +1140,7 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) return ret; /* Ensure that the domain is finalised */ - ret = arm_smmu_init_domain_context(domain, smmu, dev); + ret = arm_smmu_init_domain_context(smmu_domain, smmu, dev); if (ret < 0) goto rpm_put; From 9b3febc3a3da7fcd81ece10614b7fd6c729ba8b4 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 17 Oct 2023 15:11:44 -0300 Subject: [PATCH 16/16] iommu/arm-smmu: Convert to domain_alloc_paging() Now that the BLOCKED and IDENTITY behaviors are managed with their own domains change to the domain_alloc_paging() op. The check for using_legacy_binding is now redundant, arm_smmu_def_domain_type() always returns IOMMU_DOMAIN_IDENTITY for this mode, so the core code will never attempt to create a DMA domain in the first place. Since commit a4fdd9762272 ("iommu: Use flush queue capability") the core code only passes in IDENTITY/BLOCKED/UNMANAGED/DMA domain types. It will not pass in IDENTITY or BLOCKED if the global statics exist, so the test for DMA is also redundant now too. Call arm_smmu_init_domain_context() early if a dev is available. Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/5-v2-c86cc8c2230e+160bb-smmu_newapi_jgg@nvidia.com [will: Simplify arm_smmu_domain_alloc_paging() since 'cfg' cannot be NULL] Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu/arm-smmu.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c b/drivers/iommu/arm/arm-smmu/arm-smmu.c index 664d53dfb3bd..b0a6b367d8a2 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c @@ -859,14 +859,10 @@ static void arm_smmu_destroy_domain_context(struct arm_smmu_domain *smmu_domain) arm_smmu_rpm_put(smmu); } -static struct iommu_domain *arm_smmu_domain_alloc(unsigned type) +static struct iommu_domain *arm_smmu_domain_alloc_paging(struct device *dev) { struct arm_smmu_domain *smmu_domain; - if (type != IOMMU_DOMAIN_UNMANAGED) { - if (using_legacy_binding || type != IOMMU_DOMAIN_DMA) - return NULL; - } /* * Allocate the domain and initialise some of its data structures. * We can't really do anything meaningful until we've added a @@ -879,6 +875,15 @@ static struct iommu_domain *arm_smmu_domain_alloc(unsigned type) mutex_init(&smmu_domain->init_mutex); spin_lock_init(&smmu_domain->cb_lock); + if (dev) { + struct arm_smmu_master_cfg *cfg = dev_iommu_priv_get(dev); + + if (arm_smmu_init_domain_context(smmu_domain, cfg->smmu, dev)) { + kfree(smmu_domain); + return NULL; + } + } + return &smmu_domain->domain; } @@ -1603,7 +1608,7 @@ static struct iommu_ops arm_smmu_ops = { .identity_domain = &arm_smmu_identity_domain, .blocked_domain = &arm_smmu_blocked_domain, .capable = arm_smmu_capable, - .domain_alloc = arm_smmu_domain_alloc, + .domain_alloc_paging = arm_smmu_domain_alloc_paging, .probe_device = arm_smmu_probe_device, .release_device = arm_smmu_release_device, .probe_finalize = arm_smmu_probe_finalize,