IOMMU Fixes for Linux v3.14-rc3

The fixes are only for the ARM-SMMU driver. Here is the summary from
 Will Deacon:
 
   - Andreas Herrmann took the driver for a run with a real SATA
     controller, which caused the new mutex-based locking to explode
     since we require mappings in atomic context
 
   - Yifan fixed an issue with the page table creation, which then caused
     breakages with the way in which we flush descriptors out to the
     table walker
 
   - I ran the driver on a system where the SMMU is hooked into a
     coherent interconnect for table walks, and noticed a shareability
     mismatch between the CPU and the SMMU
 
 These issues are all fixed here and have been tested on both arm and
 arm64 based systems.
 
 Besides that I put a fix on-top to make the spinlock irq-safe, so that the
 code-paths can be used in the DMA-API.
 -----BEGIN PGP SIGNATURE-----
 Version: GnuPG v1.4.11 (GNU/Linux)
 
 iQIcBAABAgAGBQJTB1qJAAoJECvwRC2XARrjnkUQAJ+JxX2FIieZ9/ytg50PbB91
 GvYn9nJWSvHLgP1gsCVa37UAw0F3x9YcD/063AY3V77augz8wo/MaX+JqmGO1Q5Y
 FnsrVqtpAQ4Rgd1fnhSVdNnWYlk26BHW1V7pb6Er2HVIyvhWoZL/8IWrC4sJ66WO
 7YiyPnh+MT6bCAkioVW9qIDyUhiO49ZemdL6Z5H1BgKOwctwmIdGF/apkI2Og8+J
 UgqJ6HCM7bn1HELzJ8D/uBw1amjQTWzya1GgWkdZbzq0rA8R3OH5hkMudN9wEZX+
 S0KeEGiEehfwcIIf3DvV3VPNQoC+tHUVULIt+y/0/hxZuKb8/eiZJHgQAHFrHNTJ
 pCj57Pt96DRckbEIUfD51y3JPY0oKXPkOH1I3mdkR1OCuHM3cVV+be8x2Kof80+b
 y5yF+PkAOqVnStYlsUwk0pLnfYWNRFTZhvY3gYa8E3oocd3UUIwzxoibGTCfIKvx
 8v/lz73IlNZ9R+8mqWAsDtlgbPUJppUf+XisGog+/yG2kCaJXuRP50O65iWbgg38
 ZsnCdGcarHSi/mOxpvox0bm6/71XsmkdnMZRZds9FhvSGZs19M+pmphn7WB6yvQR
 yb4fok72GI7R3khqOj9KdFz6zLmqWLy8jRxie6Vds8TTcJsigptg+uAVIpmzhQje
 NJnSt4aFxQHjKASiLZu+
 =K+3P
 -----END PGP SIGNATURE-----

Merge tag 'iommu-fixes-v3.14-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu

Pull IOMMU fixes from Joerg Roedel:
 "The fixes are only for the ARM-SMMU driver.  Here is the summary from
  Will Deacon:

   - Andreas Herrmann took the driver for a run with a real SATA
     controller, which caused the new mutex-based locking to explode
     since we require mappings in atomic context

   - Yifan fixed an issue with the page table creation, which then
     caused breakages with the way in which we flush descriptors out to
     the table walker

   - I ran the driver on a system where the SMMU is hooked into a
     coherent interconnect for table walks, and noticed a shareability
     mismatch between the CPU and the SMMU

  These issues are all fixed here and have been tested on both arm and
  arm64 based systems.

  Besides that I put a fix on-top to make the spinlock irq-safe, so that
  the code-paths can be used in the DMA-API"

* tag 'iommu-fixes-v3.14-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu:
  arm/smmu: Use irqsafe spinlock for domain lock
  iommu/arm-smmu: fix compilation issue when !CONFIG_ARM_AMBA
  iommu/arm-smmu: set CBARn.BPSHCFG to NSH for s1-s2-bypass contexts
  iommu/arm-smmu: fix table flushing during initial allocations
  iommu/arm-smmu: really fix page table locking
  iommu/arm-smmu: fix pud/pmd entry fill sequence
This commit is contained in:
Linus Torvalds 2014-02-21 09:59:46 -08:00
commit 7777d93489

View File

@ -79,7 +79,6 @@
#define ARM_SMMU_PTE_CONT_SIZE (PAGE_SIZE * ARM_SMMU_PTE_CONT_ENTRIES)
#define ARM_SMMU_PTE_CONT_MASK (~(ARM_SMMU_PTE_CONT_SIZE - 1))
#define ARM_SMMU_PTE_HWTABLE_SIZE (PTRS_PER_PTE * sizeof(pte_t))
/* Stage-1 PTE */
#define ARM_SMMU_PTE_AP_UNPRIV (((pteval_t)1) << 6)
@ -191,6 +190,9 @@
#define ARM_SMMU_GR1_CBAR(n) (0x0 + ((n) << 2))
#define CBAR_VMID_SHIFT 0
#define CBAR_VMID_MASK 0xff
#define CBAR_S1_BPSHCFG_SHIFT 8
#define CBAR_S1_BPSHCFG_MASK 3
#define CBAR_S1_BPSHCFG_NSH 3
#define CBAR_S1_MEMATTR_SHIFT 12
#define CBAR_S1_MEMATTR_MASK 0xf
#define CBAR_S1_MEMATTR_WB 0xf
@ -393,7 +395,7 @@ struct arm_smmu_domain {
struct arm_smmu_cfg root_cfg;
phys_addr_t output_mask;
struct mutex lock;
spinlock_t lock;
};
static DEFINE_SPINLOCK(arm_smmu_devices_lock);
@ -632,6 +634,28 @@ static irqreturn_t arm_smmu_global_fault(int irq, void *dev)
return IRQ_HANDLED;
}
static void arm_smmu_flush_pgtable(struct arm_smmu_device *smmu, void *addr,
size_t size)
{
unsigned long offset = (unsigned long)addr & ~PAGE_MASK;
/* Ensure new page tables are visible to the hardware walker */
if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK) {
dsb();
} else {
/*
* If the SMMU can't walk tables in the CPU caches, treat them
* like non-coherent DMA since we need to flush the new entries
* all the way out to memory. There's no possibility of
* recursion here as the SMMU table walker will not be wired
* through another SMMU.
*/
dma_map_page(smmu->dev, virt_to_page(addr), offset, size,
DMA_TO_DEVICE);
}
}
static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain)
{
u32 reg;
@ -650,11 +674,16 @@ static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain)
if (smmu->version == 1)
reg |= root_cfg->irptndx << CBAR_IRPTNDX_SHIFT;
/* Use the weakest memory type, so it is overridden by the pte */
if (stage1)
reg |= (CBAR_S1_MEMATTR_WB << CBAR_S1_MEMATTR_SHIFT);
else
/*
* Use the weakest shareability/memory types, so they are
* overridden by the ttbcr/pte.
*/
if (stage1) {
reg |= (CBAR_S1_BPSHCFG_NSH << CBAR_S1_BPSHCFG_SHIFT) |
(CBAR_S1_MEMATTR_WB << CBAR_S1_MEMATTR_SHIFT);
} else {
reg |= ARM_SMMU_CB_VMID(root_cfg) << CBAR_VMID_SHIFT;
}
writel_relaxed(reg, gr1_base + ARM_SMMU_GR1_CBAR(root_cfg->cbndx));
if (smmu->version > 1) {
@ -715,6 +744,8 @@ static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain)
}
/* TTBR0 */
arm_smmu_flush_pgtable(smmu, root_cfg->pgd,
PTRS_PER_PGD * sizeof(pgd_t));
reg = __pa(root_cfg->pgd);
writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR0_LO);
reg = (phys_addr_t)__pa(root_cfg->pgd) >> 32;
@ -901,7 +932,7 @@ static int arm_smmu_domain_init(struct iommu_domain *domain)
goto out_free_domain;
smmu_domain->root_cfg.pgd = pgd;
mutex_init(&smmu_domain->lock);
spin_lock_init(&smmu_domain->lock);
domain->priv = smmu_domain;
return 0;
@ -1128,6 +1159,7 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
struct arm_smmu_domain *smmu_domain = domain->priv;
struct arm_smmu_device *device_smmu = dev->archdata.iommu;
struct arm_smmu_master *master;
unsigned long flags;
if (!device_smmu) {
dev_err(dev, "cannot attach to SMMU, is it on the same bus?\n");
@ -1138,7 +1170,7 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
* Sanity check the domain. We don't currently support domains
* that cross between different SMMU chains.
*/
mutex_lock(&smmu_domain->lock);
spin_lock_irqsave(&smmu_domain->lock, flags);
if (!smmu_domain->leaf_smmu) {
/* Now that we have a master, we can finalise the domain */
ret = arm_smmu_init_domain_context(domain, dev);
@ -1153,7 +1185,7 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
dev_name(device_smmu->dev));
goto err_unlock;
}
mutex_unlock(&smmu_domain->lock);
spin_unlock_irqrestore(&smmu_domain->lock, flags);
/* Looks ok, so add the device to the domain */
master = find_smmu_master(smmu_domain->leaf_smmu, dev->of_node);
@ -1163,7 +1195,7 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
return arm_smmu_domain_add_master(smmu_domain, master);
err_unlock:
mutex_unlock(&smmu_domain->lock);
spin_unlock_irqrestore(&smmu_domain->lock, flags);
return ret;
}
@ -1177,23 +1209,6 @@ static void arm_smmu_detach_dev(struct iommu_domain *domain, struct device *dev)
arm_smmu_domain_remove_master(smmu_domain, master);
}
static void arm_smmu_flush_pgtable(struct arm_smmu_device *smmu, void *addr,
size_t size)
{
unsigned long offset = (unsigned long)addr & ~PAGE_MASK;
/*
* If the SMMU can't walk tables in the CPU caches, treat them
* like non-coherent DMA since we need to flush the new entries
* all the way out to memory. There's no possibility of recursion
* here as the SMMU table walker will not be wired through another
* SMMU.
*/
if (!(smmu->features & ARM_SMMU_FEAT_COHERENT_WALK))
dma_map_page(smmu->dev, virt_to_page(addr), offset, size,
DMA_TO_DEVICE);
}
static bool arm_smmu_pte_is_contiguous_range(unsigned long addr,
unsigned long end)
{
@ -1210,12 +1225,11 @@ static int arm_smmu_alloc_init_pte(struct arm_smmu_device *smmu, pmd_t *pmd,
if (pmd_none(*pmd)) {
/* Allocate a new set of tables */
pgtable_t table = alloc_page(PGALLOC_GFP);
pgtable_t table = alloc_page(GFP_ATOMIC|__GFP_ZERO);
if (!table)
return -ENOMEM;
arm_smmu_flush_pgtable(smmu, page_address(table),
ARM_SMMU_PTE_HWTABLE_SIZE);
arm_smmu_flush_pgtable(smmu, page_address(table), PAGE_SIZE);
if (!pgtable_page_ctor(table)) {
__free_page(table);
return -ENOMEM;
@ -1317,9 +1331,15 @@ static int arm_smmu_alloc_init_pmd(struct arm_smmu_device *smmu, pud_t *pud,
#ifndef __PAGETABLE_PMD_FOLDED
if (pud_none(*pud)) {
pmd = pmd_alloc_one(NULL, addr);
pmd = (pmd_t *)get_zeroed_page(GFP_ATOMIC);
if (!pmd)
return -ENOMEM;
arm_smmu_flush_pgtable(smmu, pmd, PAGE_SIZE);
pud_populate(NULL, pud, pmd);
arm_smmu_flush_pgtable(smmu, pud, sizeof(*pud));
pmd += pmd_index(addr);
} else
#endif
pmd = pmd_offset(pud, addr);
@ -1328,8 +1348,6 @@ static int arm_smmu_alloc_init_pmd(struct arm_smmu_device *smmu, pud_t *pud,
next = pmd_addr_end(addr, end);
ret = arm_smmu_alloc_init_pte(smmu, pmd, addr, end, pfn,
flags, stage);
pud_populate(NULL, pud, pmd);
arm_smmu_flush_pgtable(smmu, pud, sizeof(*pud));
phys += next - addr;
} while (pmd++, addr = next, addr < end);
@ -1346,9 +1364,15 @@ static int arm_smmu_alloc_init_pud(struct arm_smmu_device *smmu, pgd_t *pgd,
#ifndef __PAGETABLE_PUD_FOLDED
if (pgd_none(*pgd)) {
pud = pud_alloc_one(NULL, addr);
pud = (pud_t *)get_zeroed_page(GFP_ATOMIC);
if (!pud)
return -ENOMEM;
arm_smmu_flush_pgtable(smmu, pud, PAGE_SIZE);
pgd_populate(NULL, pgd, pud);
arm_smmu_flush_pgtable(smmu, pgd, sizeof(*pgd));
pud += pud_index(addr);
} else
#endif
pud = pud_offset(pgd, addr);
@ -1357,8 +1381,6 @@ static int arm_smmu_alloc_init_pud(struct arm_smmu_device *smmu, pgd_t *pgd,
next = pud_addr_end(addr, end);
ret = arm_smmu_alloc_init_pmd(smmu, pud, addr, next, phys,
flags, stage);
pgd_populate(NULL, pud, pgd);
arm_smmu_flush_pgtable(smmu, pgd, sizeof(*pgd));
phys += next - addr;
} while (pud++, addr = next, addr < end);
@ -1375,6 +1397,7 @@ static int arm_smmu_handle_mapping(struct arm_smmu_domain *smmu_domain,
struct arm_smmu_cfg *root_cfg = &smmu_domain->root_cfg;
pgd_t *pgd = root_cfg->pgd;
struct arm_smmu_device *smmu = root_cfg->smmu;
unsigned long irqflags;
if (root_cfg->cbar == CBAR_TYPE_S2_TRANS) {
stage = 2;
@ -1397,7 +1420,7 @@ static int arm_smmu_handle_mapping(struct arm_smmu_domain *smmu_domain,
if (paddr & ~output_mask)
return -ERANGE;
mutex_lock(&smmu_domain->lock);
spin_lock_irqsave(&smmu_domain->lock, irqflags);
pgd += pgd_index(iova);
end = iova + size;
do {
@ -1413,11 +1436,7 @@ static int arm_smmu_handle_mapping(struct arm_smmu_domain *smmu_domain,
} while (pgd++, iova != end);
out_unlock:
mutex_unlock(&smmu_domain->lock);
/* Ensure new page tables are visible to the hardware walker */
if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
dsb();
spin_unlock_irqrestore(&smmu_domain->lock, irqflags);
return ret;
}
@ -1987,8 +2006,10 @@ static int __init arm_smmu_init(void)
if (!iommu_present(&platform_bus_type))
bus_set_iommu(&platform_bus_type, &arm_smmu_ops);
#ifdef CONFIG_ARM_AMBA
if (!iommu_present(&amba_bustype))
bus_set_iommu(&amba_bustype, &arm_smmu_ops);
#endif
return 0;
}