diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index aa3bd3968290..ceb8f9ef4bad 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -378,7 +378,16 @@ #define CMDQ_SYNC_0_CS_SHIFT 12 #define CMDQ_SYNC_0_CS_NONE (0UL << CMDQ_SYNC_0_CS_SHIFT) +#define CMDQ_SYNC_0_CS_IRQ (1UL << CMDQ_SYNC_0_CS_SHIFT) #define CMDQ_SYNC_0_CS_SEV (2UL << CMDQ_SYNC_0_CS_SHIFT) +#define CMDQ_SYNC_0_MSH_SHIFT 22 +#define CMDQ_SYNC_0_MSH_ISH (3UL << CMDQ_SYNC_0_MSH_SHIFT) +#define CMDQ_SYNC_0_MSIATTR_SHIFT 24 +#define CMDQ_SYNC_0_MSIATTR_OIWB (0xfUL << CMDQ_SYNC_0_MSIATTR_SHIFT) +#define CMDQ_SYNC_0_MSIDATA_SHIFT 32 +#define CMDQ_SYNC_0_MSIDATA_MASK 0xffffffffUL +#define CMDQ_SYNC_1_MSIADDR_SHIFT 0 +#define CMDQ_SYNC_1_MSIADDR_MASK 0xffffffffffffcUL /* Event queue */ #define EVTQ_ENT_DWORDS 4 @@ -410,6 +419,7 @@ /* High-level queue structures */ #define ARM_SMMU_POLL_TIMEOUT_US 100 #define ARM_SMMU_CMDQ_DRAIN_TIMEOUT_US 1000000 /* 1s! */ +#define ARM_SMMU_SYNC_TIMEOUT_US 1000000 /* 1s! */ #define MSI_IOVA_BASE 0x8000000 #define MSI_IOVA_LENGTH 0x100000 @@ -496,6 +506,10 @@ struct arm_smmu_cmdq_ent { } pri; #define CMDQ_OP_CMD_SYNC 0x46 + struct { + u32 msidata; + u64 msiaddr; + } sync; }; }; @@ -609,6 +623,7 @@ struct arm_smmu_device { int gerr_irq; int combined_irq; + atomic_t sync_nr; unsigned long ias; /* IPA */ unsigned long oas; /* PA */ @@ -627,6 +642,8 @@ struct arm_smmu_device { struct arm_smmu_strtab_cfg strtab_cfg; + u32 sync_count; + /* IOMMU core code handle */ struct iommu_device iommu; }; @@ -871,7 +888,13 @@ static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent) } break; case CMDQ_OP_CMD_SYNC: - cmd[0] |= CMDQ_SYNC_0_CS_SEV; + if (ent->sync.msiaddr) + cmd[0] |= CMDQ_SYNC_0_CS_IRQ; + else + cmd[0] |= CMDQ_SYNC_0_CS_SEV; + cmd[0] |= CMDQ_SYNC_0_MSH_ISH | CMDQ_SYNC_0_MSIATTR_OIWB; + cmd[0] |= (u64)ent->sync.msidata << CMDQ_SYNC_0_MSIDATA_SHIFT; + cmd[1] |= ent->sync.msiaddr & CMDQ_SYNC_1_MSIADDR_MASK; break; default: return -ENOENT; @@ -957,21 +980,44 @@ static void arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu, spin_unlock_irqrestore(&smmu->cmdq.lock, flags); } +/* + * The difference between val and sync_idx is bounded by the maximum size of + * a queue at 2^20 entries, so 32 bits is plenty for wrap-safe arithmetic. + */ +static int arm_smmu_sync_poll_msi(struct arm_smmu_device *smmu, u32 sync_idx) +{ + ktime_t timeout = ktime_add_us(ktime_get(), ARM_SMMU_SYNC_TIMEOUT_US); + u32 val = smp_cond_load_acquire(&smmu->sync_count, + (int)(VAL - sync_idx) >= 0 || + !ktime_before(ktime_get(), timeout)); + + return (int)(val - sync_idx) < 0 ? -ETIMEDOUT : 0; +} + static void arm_smmu_cmdq_issue_sync(struct arm_smmu_device *smmu) { u64 cmd[CMDQ_ENT_DWORDS]; unsigned long flags; bool wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV); + bool msi = (smmu->features & ARM_SMMU_FEAT_MSI) && + (smmu->features & ARM_SMMU_FEAT_COHERENCY); struct arm_smmu_cmdq_ent ent = { .opcode = CMDQ_OP_CMD_SYNC }; int ret; + if (msi) { + ent.sync.msidata = atomic_inc_return_relaxed(&smmu->sync_nr); + ent.sync.msiaddr = virt_to_phys(&smmu->sync_count); + } arm_smmu_cmdq_build_cmd(cmd, &ent); spin_lock_irqsave(&smmu->cmdq.lock, flags); arm_smmu_cmdq_insert_cmd(smmu, cmd); - ret = queue_poll_cons(&smmu->cmdq.q, true, wfe); + if (!msi) + ret = queue_poll_cons(&smmu->cmdq.q, true, wfe); spin_unlock_irqrestore(&smmu->cmdq.lock, flags); + if (msi) + ret = arm_smmu_sync_poll_msi(smmu, ent.sync.msidata); if (ret) dev_err_ratelimited(smmu->dev, "CMD_SYNC timeout\n"); } @@ -2159,6 +2205,7 @@ static int arm_smmu_init_structures(struct arm_smmu_device *smmu) { int ret; + atomic_set(&smmu->sync_nr, 0); ret = arm_smmu_init_queues(smmu); if (ret) return ret;