diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 7dfe681acc3d..761c1b251c1e 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -2283,9 +2283,9 @@ static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, unsigned long phys_pfn, unsigned long nr_pages, int prot) { - struct dma_pte *first_pte = NULL, *pte = NULL; unsigned int largepage_lvl = 0; unsigned long lvl_pages = 0; + struct dma_pte *pte = NULL; phys_addr_t pteval; u64 attr; @@ -2314,7 +2314,7 @@ __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, largepage_lvl = hardware_largepage_caps(domain, iov_pfn, phys_pfn, nr_pages); - first_pte = pte = pfn_to_dma_pte(domain, iov_pfn, &largepage_lvl); + pte = pfn_to_dma_pte(domain, iov_pfn, &largepage_lvl); if (!pte) return -ENOMEM; /* It is large page*/ @@ -2375,34 +2375,14 @@ __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, * recalculate 'pte' and switch back to smaller pages for the * end of the mapping, if the trailing size is not enough to * use another superpage (i.e. nr_pages < lvl_pages). + * + * We leave clflush for the leaf pte changes to iotlb_sync_map() + * callback. */ pte++; if (!nr_pages || first_pte_in_page(pte) || - (largepage_lvl > 1 && nr_pages < lvl_pages)) { - domain_flush_cache(domain, first_pte, - (void *)pte - (void *)first_pte); + (largepage_lvl > 1 && nr_pages < lvl_pages)) pte = NULL; - } - } - - return 0; -} - -static int -domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, - unsigned long phys_pfn, unsigned long nr_pages, int prot) -{ - int iommu_id, ret; - struct intel_iommu *iommu; - - /* Do the real mapping first */ - ret = __domain_mapping(domain, iov_pfn, phys_pfn, nr_pages, prot); - if (ret) - return ret; - - for_each_domain_iommu(iommu_id, domain) { - iommu = g_iommus[iommu_id]; - __mapping_notify_one(iommu, domain, iov_pfn, nr_pages); } return 0; @@ -4943,7 +4923,6 @@ static int intel_iommu_map(struct iommu_domain *domain, struct dmar_domain *dmar_domain = to_dmar_domain(domain); u64 max_addr; int prot = 0; - int ret; if (iommu_prot & IOMMU_READ) prot |= DMA_PTE_READ; @@ -4969,9 +4948,8 @@ static int intel_iommu_map(struct iommu_domain *domain, /* Round up size to next multiple of PAGE_SIZE, if it and the low bits of hpa would take us onto the next page */ size = aligned_nrpages(hpa, size); - ret = domain_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT, - hpa >> VTD_PAGE_SHIFT, size, prot); - return ret; + return __domain_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT, + hpa >> VTD_PAGE_SHIFT, size, prot); } static size_t intel_iommu_unmap(struct iommu_domain *domain, @@ -5424,6 +5402,57 @@ static bool risky_device(struct pci_dev *pdev) return false; } +static void clflush_sync_map(struct dmar_domain *domain, unsigned long clf_pfn, + unsigned long clf_pages) +{ + struct dma_pte *first_pte = NULL, *pte = NULL; + unsigned long lvl_pages = 0; + int level = 0; + + while (clf_pages > 0) { + if (!pte) { + level = 0; + pte = pfn_to_dma_pte(domain, clf_pfn, &level); + if (WARN_ON(!pte)) + return; + first_pte = pte; + lvl_pages = lvl_to_nr_pages(level); + } + + if (WARN_ON(!lvl_pages || clf_pages < lvl_pages)) + return; + + clf_pages -= lvl_pages; + clf_pfn += lvl_pages; + pte++; + + if (!clf_pages || first_pte_in_page(pte) || + (level > 1 && clf_pages < lvl_pages)) { + domain_flush_cache(domain, first_pte, + (void *)pte - (void *)first_pte); + pte = NULL; + } + } +} + +static void intel_iommu_iotlb_sync_map(struct iommu_domain *domain, + unsigned long iova, size_t size) +{ + struct dmar_domain *dmar_domain = to_dmar_domain(domain); + unsigned long pages = aligned_nrpages(iova, size); + unsigned long pfn = iova >> VTD_PAGE_SHIFT; + struct intel_iommu *iommu; + int iommu_id; + + if (!dmar_domain->iommu_coherency) + clflush_sync_map(dmar_domain, pfn, pages); + + for_each_domain_iommu(iommu_id, dmar_domain) { + iommu = g_iommus[iommu_id]; + __mapping_notify_one(iommu, dmar_domain, pfn, pages); + } +} + const struct iommu_ops intel_iommu_ops = { .capable = intel_iommu_capable, .domain_alloc = intel_iommu_domain_alloc, @@ -5436,6 +5465,7 @@ const struct iommu_ops intel_iommu_ops = { .aux_detach_dev = intel_iommu_aux_detach_device, .aux_get_pasid = intel_iommu_aux_get_pasid, .map = intel_iommu_map, + .iotlb_sync_map = intel_iommu_iotlb_sync_map, .unmap = intel_iommu_unmap, .flush_iotlb_all = intel_flush_iotlb_all, .iotlb_sync = intel_iommu_tlb_sync,