From ff7d37a502022149655c18035b99a53391be0383 Mon Sep 17 00:00:00 2001 From: Sowmini Varadhan Date: Thu, 9 Apr 2015 15:33:30 -0400 Subject: [PATCH 1/4] Break up monolithic iommu table/lock into finer graularity pools and lock Investigation of multithreaded iperf experiments on an ethernet interface show the iommu->lock as the hottest lock identified by lockstat, with something of the order of 21M contentions out of 27M acquisitions, and an average wait time of 26 us for the lock. This is not efficient. A more scalable design is to follow the ppc model, where the iommu_map_table has multiple pools, each stretching over a segment of the map, and with a separate lock for each pool. This model allows for better parallelization of the iommu map search. This patch adds the iommu range alloc/free function infrastructure. Signed-off-by: Sowmini Varadhan Acked-by: Benjamin Herrenschmidt Signed-off-by: David S. Miller --- include/linux/iommu-common.h | 51 +++++++ lib/Makefile | 2 +- lib/iommu-common.c | 266 +++++++++++++++++++++++++++++++++++ 3 files changed, 318 insertions(+), 1 deletion(-) create mode 100644 include/linux/iommu-common.h create mode 100644 lib/iommu-common.c diff --git a/include/linux/iommu-common.h b/include/linux/iommu-common.h new file mode 100644 index 000000000000..bbced83b32ee --- /dev/null +++ b/include/linux/iommu-common.h @@ -0,0 +1,51 @@ +#ifndef _LINUX_IOMMU_COMMON_H +#define _LINUX_IOMMU_COMMON_H + +#include +#include +#include + +#define IOMMU_POOL_HASHBITS 4 +#define IOMMU_NR_POOLS (1 << IOMMU_POOL_HASHBITS) + +struct iommu_pool { + unsigned long start; + unsigned long end; + unsigned long hint; + spinlock_t lock; +}; + +struct iommu_map_table { + unsigned long table_map_base; + unsigned long table_shift; + unsigned long nr_pools; + void (*lazy_flush)(struct iommu_map_table *); + unsigned long poolsize; + struct iommu_pool pools[IOMMU_NR_POOLS]; + u32 flags; +#define IOMMU_HAS_LARGE_POOL 0x00000001 +#define IOMMU_NO_SPAN_BOUND 0x00000002 +#define IOMMU_NEED_FLUSH 0x00000004 + struct iommu_pool large_pool; + unsigned long *map; +}; + +extern void iommu_tbl_pool_init(struct iommu_map_table *iommu, + unsigned long num_entries, + u32 table_shift, + void (*lazy_flush)(struct iommu_map_table *), + bool large_pool, u32 npools, + bool skip_span_boundary_check); + +extern unsigned long iommu_tbl_range_alloc(struct device *dev, + struct iommu_map_table *iommu, + unsigned long npages, + unsigned long *handle, + unsigned long mask, + unsigned int align_order); + +extern void iommu_tbl_range_free(struct iommu_map_table *iommu, + u64 dma_addr, unsigned long npages, + unsigned long entry); + +#endif diff --git a/lib/Makefile b/lib/Makefile index da6116b21555..6c37933336a0 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -106,7 +106,7 @@ obj-$(CONFIG_AUDIT_GENERIC) += audit.o obj-$(CONFIG_AUDIT_COMPAT_GENERIC) += compat_audit.o obj-$(CONFIG_SWIOTLB) += swiotlb.o -obj-$(CONFIG_IOMMU_HELPER) += iommu-helper.o +obj-$(CONFIG_IOMMU_HELPER) += iommu-helper.o iommu-common.o obj-$(CONFIG_FAULT_INJECTION) += fault-inject.o obj-$(CONFIG_NOTIFIER_ERROR_INJECTION) += notifier-error-inject.o obj-$(CONFIG_CPU_NOTIFIER_ERROR_INJECT) += cpu-notifier-error-inject.o diff --git a/lib/iommu-common.c b/lib/iommu-common.c new file mode 100644 index 000000000000..b99f1d744a8d --- /dev/null +++ b/lib/iommu-common.c @@ -0,0 +1,266 @@ +/* + * IOMMU mmap management and range allocation functions. + * Based almost entirely upon the powerpc iommu allocator. + */ + +#include +#include +#include +#include +#include +#include +#include + +unsigned long iommu_large_alloc = 15; + +static DEFINE_PER_CPU(unsigned int, iommu_pool_hash); + +static inline bool need_flush(struct iommu_map_table *iommu) +{ + return (iommu->lazy_flush != NULL && + (iommu->flags & IOMMU_NEED_FLUSH) != 0); +} + +static inline void set_flush(struct iommu_map_table *iommu) +{ + iommu->flags |= IOMMU_NEED_FLUSH; +} + +static inline void clear_flush(struct iommu_map_table *iommu) +{ + iommu->flags &= ~IOMMU_NEED_FLUSH; +} + +static void setup_iommu_pool_hash(void) +{ + unsigned int i; + static bool do_once; + + if (do_once) + return; + do_once = true; + for_each_possible_cpu(i) + per_cpu(iommu_pool_hash, i) = hash_32(i, IOMMU_POOL_HASHBITS); +} + +/* + * Initialize iommu_pool entries for the iommu_map_table. `num_entries' + * is the number of table entries. If `large_pool' is set to true, + * the top 1/4 of the table will be set aside for pool allocations + * of more than iommu_large_alloc pages. + */ +extern void iommu_tbl_pool_init(struct iommu_map_table *iommu, + unsigned long num_entries, + u32 table_shift, + void (*lazy_flush)(struct iommu_map_table *), + bool large_pool, u32 npools, + bool skip_span_boundary_check) +{ + unsigned int start, i; + struct iommu_pool *p = &(iommu->large_pool); + + setup_iommu_pool_hash(); + if (npools == 0) + iommu->nr_pools = IOMMU_NR_POOLS; + else + iommu->nr_pools = npools; + BUG_ON(npools > IOMMU_NR_POOLS); + + iommu->table_shift = table_shift; + iommu->lazy_flush = lazy_flush; + start = 0; + if (skip_span_boundary_check) + iommu->flags |= IOMMU_NO_SPAN_BOUND; + if (large_pool) + iommu->flags |= IOMMU_HAS_LARGE_POOL; + + if (!large_pool) + iommu->poolsize = num_entries/iommu->nr_pools; + else + iommu->poolsize = (num_entries * 3 / 4)/iommu->nr_pools; + for (i = 0; i < iommu->nr_pools; i++) { + spin_lock_init(&(iommu->pools[i].lock)); + iommu->pools[i].start = start; + iommu->pools[i].hint = start; + start += iommu->poolsize; /* start for next pool */ + iommu->pools[i].end = start - 1; + } + if (!large_pool) + return; + /* initialize large_pool */ + spin_lock_init(&(p->lock)); + p->start = start; + p->hint = p->start; + p->end = num_entries; +} +EXPORT_SYMBOL(iommu_tbl_pool_init); + +unsigned long iommu_tbl_range_alloc(struct device *dev, + struct iommu_map_table *iommu, + unsigned long npages, + unsigned long *handle, + unsigned long mask, + unsigned int align_order) +{ + unsigned int pool_hash = __this_cpu_read(iommu_pool_hash); + unsigned long n, end, start, limit, boundary_size; + struct iommu_pool *pool; + int pass = 0; + unsigned int pool_nr; + unsigned int npools = iommu->nr_pools; + unsigned long flags; + bool large_pool = ((iommu->flags & IOMMU_HAS_LARGE_POOL) != 0); + bool largealloc = (large_pool && npages > iommu_large_alloc); + unsigned long shift; + unsigned long align_mask = 0; + + if (align_order > 0) + align_mask = 0xffffffffffffffffl >> (64 - align_order); + + /* Sanity check */ + if (unlikely(npages == 0)) { + WARN_ON_ONCE(1); + return DMA_ERROR_CODE; + } + + if (largealloc) { + pool = &(iommu->large_pool); + pool_nr = 0; /* to keep compiler happy */ + } else { + /* pick out pool_nr */ + pool_nr = pool_hash & (npools - 1); + pool = &(iommu->pools[pool_nr]); + } + spin_lock_irqsave(&pool->lock, flags); + + again: + if (pass == 0 && handle && *handle && + (*handle >= pool->start) && (*handle < pool->end)) + start = *handle; + else + start = pool->hint; + + limit = pool->end; + + /* The case below can happen if we have a small segment appended + * to a large, or when the previous alloc was at the very end of + * the available space. If so, go back to the beginning. If a + * flush is needed, it will get done based on the return value + * from iommu_area_alloc() below. + */ + if (start >= limit) + start = pool->start; + shift = iommu->table_map_base >> iommu->table_shift; + if (limit + shift > mask) { + limit = mask - shift + 1; + /* If we're constrained on address range, first try + * at the masked hint to avoid O(n) search complexity, + * but on second pass, start at 0 in pool 0. + */ + if ((start & mask) >= limit || pass > 0) { + spin_unlock(&(pool->lock)); + pool = &(iommu->pools[0]); + spin_lock(&(pool->lock)); + start = pool->start; + } else { + start &= mask; + } + } + + if (dev) + boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1, + 1 << iommu->table_shift); + else + boundary_size = ALIGN(1UL << 32, 1 << iommu->table_shift); + + boundary_size = boundary_size >> iommu->table_shift; + /* + * if the skip_span_boundary_check had been set during init, we set + * things up so that iommu_is_span_boundary() merely checks if the + * (index + npages) < num_tsb_entries + */ + if ((iommu->flags & IOMMU_NO_SPAN_BOUND) != 0) { + shift = 0; + boundary_size = iommu->poolsize * iommu->nr_pools; + } + n = iommu_area_alloc(iommu->map, limit, start, npages, shift, + boundary_size, align_mask); + if (n == -1) { + if (likely(pass == 0)) { + /* First failure, rescan from the beginning. */ + pool->hint = pool->start; + set_flush(iommu); + pass++; + goto again; + } else if (!largealloc && pass <= iommu->nr_pools) { + spin_unlock(&(pool->lock)); + pool_nr = (pool_nr + 1) & (iommu->nr_pools - 1); + pool = &(iommu->pools[pool_nr]); + spin_lock(&(pool->lock)); + pool->hint = pool->start; + set_flush(iommu); + pass++; + goto again; + } else { + /* give up */ + n = DMA_ERROR_CODE; + goto bail; + } + } + if (n < pool->hint || need_flush(iommu)) { + clear_flush(iommu); + iommu->lazy_flush(iommu); + } + + end = n + npages; + pool->hint = end; + + /* Update handle for SG allocations */ + if (handle) + *handle = end; +bail: + spin_unlock_irqrestore(&(pool->lock), flags); + + return n; +} +EXPORT_SYMBOL(iommu_tbl_range_alloc); + +static struct iommu_pool *get_pool(struct iommu_map_table *tbl, + unsigned long entry) +{ + struct iommu_pool *p; + unsigned long largepool_start = tbl->large_pool.start; + bool large_pool = ((tbl->flags & IOMMU_HAS_LARGE_POOL) != 0); + + /* The large pool is the last pool at the top of the table */ + if (large_pool && entry >= largepool_start) { + p = &tbl->large_pool; + } else { + unsigned int pool_nr = entry / tbl->poolsize; + + BUG_ON(pool_nr >= tbl->nr_pools); + p = &tbl->pools[pool_nr]; + } + return p; +} + +/* Caller supplies the index of the entry into the iommu map table + * itself when the mapping from dma_addr to the entry is not the + * default addr->entry mapping below. + */ +void iommu_tbl_range_free(struct iommu_map_table *iommu, u64 dma_addr, + unsigned long npages, unsigned long entry) +{ + struct iommu_pool *pool; + unsigned long flags; + unsigned long shift = iommu->table_shift; + + if (entry == DMA_ERROR_CODE) /* use default addr->entry mapping */ + entry = (dma_addr - iommu->table_map_base) >> shift; + pool = get_pool(iommu, entry); + + spin_lock_irqsave(&(pool->lock), flags); + bitmap_clear(iommu->map, entry, npages); + spin_unlock_irqrestore(&(pool->lock), flags); +} +EXPORT_SYMBOL(iommu_tbl_range_free); From bb620c3d3925aec0ed4f21010c86df08ec18a8c7 Mon Sep 17 00:00:00 2001 From: Sowmini Varadhan Date: Thu, 9 Apr 2015 15:33:31 -0400 Subject: [PATCH 2/4] sparc: Make sparc64 use scalable lib/iommu-common.c functions In iperf experiments running linux as the Tx side (TCP client) with 10 threads results in a severe performance drop when TSO is disabled, indicating a weakness in the software that can be avoided by using the scalable IOMMU arena DMA allocation. Baseline numbers before this patch: with default settings (TSO enabled) : 9-9.5 Gbps Disable TSO using ethtool- drops badly: 2-3 Gbps. After this patch, iperf client with 10 threads, can give a throughput of at least 8.5 Gbps, even when TSO is disabled. Signed-off-by: Sowmini Varadhan Acked-by: Benjamin Herrenschmidt Signed-off-by: David S. Miller --- arch/sparc/include/asm/iommu_64.h | 7 +- arch/sparc/kernel/iommu.c | 172 +++++++--------------------- arch/sparc/kernel/iommu_common.h | 8 -- arch/sparc/kernel/pci_sun4v.c | 181 +++++++++++++----------------- 4 files changed, 127 insertions(+), 241 deletions(-) diff --git a/arch/sparc/include/asm/iommu_64.h b/arch/sparc/include/asm/iommu_64.h index 2b9321ab064d..cd0d69fa7592 100644 --- a/arch/sparc/include/asm/iommu_64.h +++ b/arch/sparc/include/asm/iommu_64.h @@ -16,6 +16,7 @@ #define IOPTE_WRITE 0x0000000000000002UL #define IOMMU_NUM_CTXS 4096 +#include struct iommu_arena { unsigned long *map; @@ -24,11 +25,10 @@ struct iommu_arena { }; struct iommu { + struct iommu_map_table tbl; spinlock_t lock; - struct iommu_arena arena; - void (*flush_all)(struct iommu *); + u32 dma_addr_mask; iopte_t *page_table; - u32 page_table_map_base; unsigned long iommu_control; unsigned long iommu_tsbbase; unsigned long iommu_flush; @@ -40,7 +40,6 @@ struct iommu { unsigned long dummy_page_pa; unsigned long ctx_lowest_free; DECLARE_BITMAP(ctx_bitmap, IOMMU_NUM_CTXS); - u32 dma_addr_mask; }; struct strbuf { diff --git a/arch/sparc/kernel/iommu.c b/arch/sparc/kernel/iommu.c index bfa4d0c2df42..5320689c06e9 100644 --- a/arch/sparc/kernel/iommu.c +++ b/arch/sparc/kernel/iommu.c @@ -13,6 +13,7 @@ #include #include #include +#include #ifdef CONFIG_PCI #include @@ -45,8 +46,9 @@ "i" (ASI_PHYS_BYPASS_EC_E)) /* Must be invoked under the IOMMU lock. */ -static void iommu_flushall(struct iommu *iommu) +static void iommu_flushall(struct iommu_map_table *iommu_map_table) { + struct iommu *iommu = container_of(iommu_map_table, struct iommu, tbl); if (iommu->iommu_flushinv) { iommu_write(iommu->iommu_flushinv, ~(u64)0); } else { @@ -87,94 +89,6 @@ static inline void iopte_make_dummy(struct iommu *iommu, iopte_t *iopte) iopte_val(*iopte) = val; } -/* Based almost entirely upon the ppc64 iommu allocator. If you use the 'handle' - * facility it must all be done in one pass while under the iommu lock. - * - * On sun4u platforms, we only flush the IOMMU once every time we've passed - * over the entire page table doing allocations. Therefore we only ever advance - * the hint and cannot backtrack it. - */ -unsigned long iommu_range_alloc(struct device *dev, - struct iommu *iommu, - unsigned long npages, - unsigned long *handle) -{ - unsigned long n, end, start, limit, boundary_size; - struct iommu_arena *arena = &iommu->arena; - int pass = 0; - - /* This allocator was derived from x86_64's bit string search */ - - /* Sanity check */ - if (unlikely(npages == 0)) { - if (printk_ratelimit()) - WARN_ON(1); - return DMA_ERROR_CODE; - } - - if (handle && *handle) - start = *handle; - else - start = arena->hint; - - limit = arena->limit; - - /* The case below can happen if we have a small segment appended - * to a large, or when the previous alloc was at the very end of - * the available space. If so, go back to the beginning and flush. - */ - if (start >= limit) { - start = 0; - if (iommu->flush_all) - iommu->flush_all(iommu); - } - - again: - - if (dev) - boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1, - 1 << IO_PAGE_SHIFT); - else - boundary_size = ALIGN(1UL << 32, 1 << IO_PAGE_SHIFT); - - n = iommu_area_alloc(arena->map, limit, start, npages, - iommu->page_table_map_base >> IO_PAGE_SHIFT, - boundary_size >> IO_PAGE_SHIFT, 0); - if (n == -1) { - if (likely(pass < 1)) { - /* First failure, rescan from the beginning. */ - start = 0; - if (iommu->flush_all) - iommu->flush_all(iommu); - pass++; - goto again; - } else { - /* Second failure, give up */ - return DMA_ERROR_CODE; - } - } - - end = n + npages; - - arena->hint = end; - - /* Update handle for SG allocations */ - if (handle) - *handle = end; - - return n; -} - -void iommu_range_free(struct iommu *iommu, dma_addr_t dma_addr, unsigned long npages) -{ - struct iommu_arena *arena = &iommu->arena; - unsigned long entry; - - entry = (dma_addr - iommu->page_table_map_base) >> IO_PAGE_SHIFT; - - bitmap_clear(arena->map, entry, npages); -} - int iommu_table_init(struct iommu *iommu, int tsbsize, u32 dma_offset, u32 dma_addr_mask, int numa_node) @@ -187,22 +101,20 @@ int iommu_table_init(struct iommu *iommu, int tsbsize, /* Setup initial software IOMMU state. */ spin_lock_init(&iommu->lock); iommu->ctx_lowest_free = 1; - iommu->page_table_map_base = dma_offset; + iommu->tbl.table_map_base = dma_offset; iommu->dma_addr_mask = dma_addr_mask; /* Allocate and initialize the free area map. */ sz = num_tsb_entries / 8; sz = (sz + 7UL) & ~7UL; - iommu->arena.map = kmalloc_node(sz, GFP_KERNEL, numa_node); - if (!iommu->arena.map) { - printk(KERN_ERR "IOMMU: Error, kmalloc(arena.map) failed.\n"); + iommu->tbl.map = kmalloc_node(sz, GFP_KERNEL, numa_node); + if (!iommu->tbl.map) return -ENOMEM; - } - memset(iommu->arena.map, 0, sz); - iommu->arena.limit = num_tsb_entries; + memset(iommu->tbl.map, 0, sz); - if (tlb_type != hypervisor) - iommu->flush_all = iommu_flushall; + iommu_tbl_pool_init(&iommu->tbl, num_tsb_entries, IO_PAGE_SHIFT, + (tlb_type != hypervisor ? iommu_flushall : NULL), + false, 1, false); /* Allocate and initialize the dummy page which we * set inactive IO PTEs to point to. @@ -235,18 +147,20 @@ out_free_dummy_page: iommu->dummy_page = 0UL; out_free_map: - kfree(iommu->arena.map); - iommu->arena.map = NULL; + kfree(iommu->tbl.map); + iommu->tbl.map = NULL; return -ENOMEM; } -static inline iopte_t *alloc_npages(struct device *dev, struct iommu *iommu, +static inline iopte_t *alloc_npages(struct device *dev, + struct iommu *iommu, unsigned long npages) { unsigned long entry; - entry = iommu_range_alloc(dev, iommu, npages, NULL); + entry = iommu_tbl_range_alloc(dev, &iommu->tbl, npages, NULL, + (unsigned long)(-1), 0); if (unlikely(entry == DMA_ERROR_CODE)) return NULL; @@ -284,7 +198,7 @@ static void *dma_4u_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addrp, gfp_t gfp, struct dma_attrs *attrs) { - unsigned long flags, order, first_page; + unsigned long order, first_page; struct iommu *iommu; struct page *page; int npages, nid; @@ -306,16 +220,14 @@ static void *dma_4u_alloc_coherent(struct device *dev, size_t size, iommu = dev->archdata.iommu; - spin_lock_irqsave(&iommu->lock, flags); iopte = alloc_npages(dev, iommu, size >> IO_PAGE_SHIFT); - spin_unlock_irqrestore(&iommu->lock, flags); if (unlikely(iopte == NULL)) { free_pages(first_page, order); return NULL; } - *dma_addrp = (iommu->page_table_map_base + + *dma_addrp = (iommu->tbl.table_map_base + ((iopte - iommu->page_table) << IO_PAGE_SHIFT)); ret = (void *) first_page; npages = size >> IO_PAGE_SHIFT; @@ -336,16 +248,12 @@ static void dma_4u_free_coherent(struct device *dev, size_t size, struct dma_attrs *attrs) { struct iommu *iommu; - unsigned long flags, order, npages; + unsigned long order, npages; npages = IO_PAGE_ALIGN(size) >> IO_PAGE_SHIFT; iommu = dev->archdata.iommu; - spin_lock_irqsave(&iommu->lock, flags); - - iommu_range_free(iommu, dvma, npages); - - spin_unlock_irqrestore(&iommu->lock, flags); + iommu_tbl_range_free(&iommu->tbl, dvma, npages, DMA_ERROR_CODE); order = get_order(size); if (order < 10) @@ -375,8 +283,8 @@ static dma_addr_t dma_4u_map_page(struct device *dev, struct page *page, npages = IO_PAGE_ALIGN(oaddr + sz) - (oaddr & IO_PAGE_MASK); npages >>= IO_PAGE_SHIFT; - spin_lock_irqsave(&iommu->lock, flags); base = alloc_npages(dev, iommu, npages); + spin_lock_irqsave(&iommu->lock, flags); ctx = 0; if (iommu->iommu_ctxflush) ctx = iommu_alloc_ctx(iommu); @@ -385,7 +293,7 @@ static dma_addr_t dma_4u_map_page(struct device *dev, struct page *page, if (unlikely(!base)) goto bad; - bus_addr = (iommu->page_table_map_base + + bus_addr = (iommu->tbl.table_map_base + ((base - iommu->page_table) << IO_PAGE_SHIFT)); ret = bus_addr | (oaddr & ~IO_PAGE_MASK); base_paddr = __pa(oaddr & IO_PAGE_MASK); @@ -496,7 +404,7 @@ static void dma_4u_unmap_page(struct device *dev, dma_addr_t bus_addr, npages = IO_PAGE_ALIGN(bus_addr + sz) - (bus_addr & IO_PAGE_MASK); npages >>= IO_PAGE_SHIFT; base = iommu->page_table + - ((bus_addr - iommu->page_table_map_base) >> IO_PAGE_SHIFT); + ((bus_addr - iommu->tbl.table_map_base) >> IO_PAGE_SHIFT); bus_addr &= IO_PAGE_MASK; spin_lock_irqsave(&iommu->lock, flags); @@ -515,11 +423,10 @@ static void dma_4u_unmap_page(struct device *dev, dma_addr_t bus_addr, for (i = 0; i < npages; i++) iopte_make_dummy(iommu, base + i); - iommu_range_free(iommu, bus_addr, npages); - iommu_free_ctx(iommu, ctx); - spin_unlock_irqrestore(&iommu->lock, flags); + + iommu_tbl_range_free(&iommu->tbl, bus_addr, npages, DMA_ERROR_CODE); } static int dma_4u_map_sg(struct device *dev, struct scatterlist *sglist, @@ -567,7 +474,7 @@ static int dma_4u_map_sg(struct device *dev, struct scatterlist *sglist, max_seg_size = dma_get_max_seg_size(dev); seg_boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1, IO_PAGE_SIZE) >> IO_PAGE_SHIFT; - base_shift = iommu->page_table_map_base >> IO_PAGE_SHIFT; + base_shift = iommu->tbl.table_map_base >> IO_PAGE_SHIFT; for_each_sg(sglist, s, nelems, i) { unsigned long paddr, npages, entry, out_entry = 0, slen; iopte_t *base; @@ -581,7 +488,8 @@ static int dma_4u_map_sg(struct device *dev, struct scatterlist *sglist, /* Allocate iommu entries for that segment */ paddr = (unsigned long) SG_ENT_PHYS_ADDRESS(s); npages = iommu_num_pages(paddr, slen, IO_PAGE_SIZE); - entry = iommu_range_alloc(dev, iommu, npages, &handle); + entry = iommu_tbl_range_alloc(dev, &iommu->tbl, npages, + &handle, (unsigned long)(-1), 0); /* Handle failure */ if (unlikely(entry == DMA_ERROR_CODE)) { @@ -594,7 +502,7 @@ static int dma_4u_map_sg(struct device *dev, struct scatterlist *sglist, base = iommu->page_table + entry; /* Convert entry to a dma_addr_t */ - dma_addr = iommu->page_table_map_base + + dma_addr = iommu->tbl.table_map_base + (entry << IO_PAGE_SHIFT); dma_addr |= (s->offset & ~IO_PAGE_MASK); @@ -654,15 +562,17 @@ iommu_map_failed: vaddr = s->dma_address & IO_PAGE_MASK; npages = iommu_num_pages(s->dma_address, s->dma_length, IO_PAGE_SIZE); - iommu_range_free(iommu, vaddr, npages); - entry = (vaddr - iommu->page_table_map_base) + entry = (vaddr - iommu->tbl.table_map_base) >> IO_PAGE_SHIFT; base = iommu->page_table + entry; for (j = 0; j < npages; j++) iopte_make_dummy(iommu, base + j); + iommu_tbl_range_free(&iommu->tbl, vaddr, npages, + DMA_ERROR_CODE); + s->dma_address = DMA_ERROR_CODE; s->dma_length = 0; } @@ -684,10 +594,11 @@ static unsigned long fetch_sg_ctx(struct iommu *iommu, struct scatterlist *sg) if (iommu->iommu_ctxflush) { iopte_t *base; u32 bus_addr; + struct iommu_map_table *tbl = &iommu->tbl; bus_addr = sg->dma_address & IO_PAGE_MASK; base = iommu->page_table + - ((bus_addr - iommu->page_table_map_base) >> IO_PAGE_SHIFT); + ((bus_addr - tbl->table_map_base) >> IO_PAGE_SHIFT); ctx = (iopte_val(*base) & IOPTE_CONTEXT) >> 47UL; } @@ -723,9 +634,8 @@ static void dma_4u_unmap_sg(struct device *dev, struct scatterlist *sglist, if (!len) break; npages = iommu_num_pages(dma_handle, len, IO_PAGE_SIZE); - iommu_range_free(iommu, dma_handle, npages); - entry = ((dma_handle - iommu->page_table_map_base) + entry = ((dma_handle - iommu->tbl.table_map_base) >> IO_PAGE_SHIFT); base = iommu->page_table + entry; @@ -737,6 +647,8 @@ static void dma_4u_unmap_sg(struct device *dev, struct scatterlist *sglist, for (i = 0; i < npages; i++) iopte_make_dummy(iommu, base + i); + iommu_tbl_range_free(&iommu->tbl, dma_handle, npages, + DMA_ERROR_CODE); sg = sg_next(sg); } @@ -770,9 +682,10 @@ static void dma_4u_sync_single_for_cpu(struct device *dev, if (iommu->iommu_ctxflush && strbuf->strbuf_ctxflush) { iopte_t *iopte; + struct iommu_map_table *tbl = &iommu->tbl; iopte = iommu->page_table + - ((bus_addr - iommu->page_table_map_base)>>IO_PAGE_SHIFT); + ((bus_addr - tbl->table_map_base)>>IO_PAGE_SHIFT); ctx = (iopte_val(*iopte) & IOPTE_CONTEXT) >> 47UL; } @@ -805,9 +718,10 @@ static void dma_4u_sync_sg_for_cpu(struct device *dev, if (iommu->iommu_ctxflush && strbuf->strbuf_ctxflush) { iopte_t *iopte; + struct iommu_map_table *tbl = &iommu->tbl; - iopte = iommu->page_table + - ((sglist[0].dma_address - iommu->page_table_map_base) >> IO_PAGE_SHIFT); + iopte = iommu->page_table + ((sglist[0].dma_address - + tbl->table_map_base) >> IO_PAGE_SHIFT); ctx = (iopte_val(*iopte) & IOPTE_CONTEXT) >> 47UL; } diff --git a/arch/sparc/kernel/iommu_common.h b/arch/sparc/kernel/iommu_common.h index 1ec0de4156e7..f4be0d724fc6 100644 --- a/arch/sparc/kernel/iommu_common.h +++ b/arch/sparc/kernel/iommu_common.h @@ -48,12 +48,4 @@ static inline int is_span_boundary(unsigned long entry, return iommu_is_span_boundary(entry, nr, shift, boundary_size); } -unsigned long iommu_range_alloc(struct device *dev, - struct iommu *iommu, - unsigned long npages, - unsigned long *handle); -void iommu_range_free(struct iommu *iommu, - dma_addr_t dma_addr, - unsigned long npages); - #endif /* _IOMMU_COMMON_H */ diff --git a/arch/sparc/kernel/pci_sun4v.c b/arch/sparc/kernel/pci_sun4v.c index 47ddbd496a1e..d2fe57dad433 100644 --- a/arch/sparc/kernel/pci_sun4v.c +++ b/arch/sparc/kernel/pci_sun4v.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -155,15 +156,13 @@ static void *dma_4v_alloc_coherent(struct device *dev, size_t size, iommu = dev->archdata.iommu; - spin_lock_irqsave(&iommu->lock, flags); - entry = iommu_range_alloc(dev, iommu, npages, NULL); - spin_unlock_irqrestore(&iommu->lock, flags); + entry = iommu_tbl_range_alloc(dev, &iommu->tbl, npages, NULL, + (unsigned long)(-1), 0); if (unlikely(entry == DMA_ERROR_CODE)) goto range_alloc_fail; - *dma_addrp = (iommu->page_table_map_base + - (entry << IO_PAGE_SHIFT)); + *dma_addrp = (iommu->tbl.table_map_base + (entry << IO_PAGE_SHIFT)); ret = (void *) first_page; first_page = __pa(first_page); @@ -188,45 +187,46 @@ static void *dma_4v_alloc_coherent(struct device *dev, size_t size, return ret; iommu_map_fail: - /* Interrupts are disabled. */ - spin_lock(&iommu->lock); - iommu_range_free(iommu, *dma_addrp, npages); - spin_unlock_irqrestore(&iommu->lock, flags); + iommu_tbl_range_free(&iommu->tbl, *dma_addrp, npages, DMA_ERROR_CODE); range_alloc_fail: free_pages(first_page, order); return NULL; } +static void dma_4v_iommu_demap(void *demap_arg, unsigned long entry, + unsigned long npages) +{ + u32 devhandle = *(u32 *)demap_arg; + unsigned long num, flags; + + local_irq_save(flags); + do { + num = pci_sun4v_iommu_demap(devhandle, + HV_PCI_TSBID(0, entry), + npages); + + entry += num; + npages -= num; + } while (npages != 0); + local_irq_restore(flags); +} + static void dma_4v_free_coherent(struct device *dev, size_t size, void *cpu, dma_addr_t dvma, struct dma_attrs *attrs) { struct pci_pbm_info *pbm; struct iommu *iommu; - unsigned long flags, order, npages, entry; + unsigned long order, npages, entry; u32 devhandle; npages = IO_PAGE_ALIGN(size) >> IO_PAGE_SHIFT; iommu = dev->archdata.iommu; pbm = dev->archdata.host_controller; devhandle = pbm->devhandle; - entry = ((dvma - iommu->page_table_map_base) >> IO_PAGE_SHIFT); - - spin_lock_irqsave(&iommu->lock, flags); - - iommu_range_free(iommu, dvma, npages); - - do { - unsigned long num; - - num = pci_sun4v_iommu_demap(devhandle, HV_PCI_TSBID(0, entry), - npages); - entry += num; - npages -= num; - } while (npages != 0); - - spin_unlock_irqrestore(&iommu->lock, flags); - + entry = ((dvma - iommu->tbl.table_map_base) >> IO_PAGE_SHIFT); + dma_4v_iommu_demap(&devhandle, entry, npages); + iommu_tbl_range_free(&iommu->tbl, dvma, npages, DMA_ERROR_CODE); order = get_order(size); if (order < 10) free_pages((unsigned long)cpu, order); @@ -253,15 +253,13 @@ static dma_addr_t dma_4v_map_page(struct device *dev, struct page *page, npages = IO_PAGE_ALIGN(oaddr + sz) - (oaddr & IO_PAGE_MASK); npages >>= IO_PAGE_SHIFT; - spin_lock_irqsave(&iommu->lock, flags); - entry = iommu_range_alloc(dev, iommu, npages, NULL); - spin_unlock_irqrestore(&iommu->lock, flags); + entry = iommu_tbl_range_alloc(dev, &iommu->tbl, npages, NULL, + (unsigned long)(-1), 0); if (unlikely(entry == DMA_ERROR_CODE)) goto bad; - bus_addr = (iommu->page_table_map_base + - (entry << IO_PAGE_SHIFT)); + bus_addr = (iommu->tbl.table_map_base + (entry << IO_PAGE_SHIFT)); ret = bus_addr | (oaddr & ~IO_PAGE_MASK); base_paddr = __pa(oaddr & IO_PAGE_MASK); prot = HV_PCI_MAP_ATTR_READ; @@ -290,11 +288,7 @@ bad: return DMA_ERROR_CODE; iommu_map_fail: - /* Interrupts are disabled. */ - spin_lock(&iommu->lock); - iommu_range_free(iommu, bus_addr, npages); - spin_unlock_irqrestore(&iommu->lock, flags); - + iommu_tbl_range_free(&iommu->tbl, bus_addr, npages, DMA_ERROR_CODE); return DMA_ERROR_CODE; } @@ -304,7 +298,7 @@ static void dma_4v_unmap_page(struct device *dev, dma_addr_t bus_addr, { struct pci_pbm_info *pbm; struct iommu *iommu; - unsigned long flags, npages; + unsigned long npages; long entry; u32 devhandle; @@ -321,22 +315,9 @@ static void dma_4v_unmap_page(struct device *dev, dma_addr_t bus_addr, npages = IO_PAGE_ALIGN(bus_addr + sz) - (bus_addr & IO_PAGE_MASK); npages >>= IO_PAGE_SHIFT; bus_addr &= IO_PAGE_MASK; - - spin_lock_irqsave(&iommu->lock, flags); - - iommu_range_free(iommu, bus_addr, npages); - - entry = (bus_addr - iommu->page_table_map_base) >> IO_PAGE_SHIFT; - do { - unsigned long num; - - num = pci_sun4v_iommu_demap(devhandle, HV_PCI_TSBID(0, entry), - npages); - entry += num; - npages -= num; - } while (npages != 0); - - spin_unlock_irqrestore(&iommu->lock, flags); + entry = (bus_addr - iommu->tbl.table_map_base) >> IO_PAGE_SHIFT; + dma_4v_iommu_demap(&devhandle, entry, npages); + iommu_tbl_range_free(&iommu->tbl, bus_addr, npages, DMA_ERROR_CODE); } static int dma_4v_map_sg(struct device *dev, struct scatterlist *sglist, @@ -371,14 +352,14 @@ static int dma_4v_map_sg(struct device *dev, struct scatterlist *sglist, /* Init first segment length for backout at failure */ outs->dma_length = 0; - spin_lock_irqsave(&iommu->lock, flags); + local_irq_save(flags); iommu_batch_start(dev, prot, ~0UL); max_seg_size = dma_get_max_seg_size(dev); seg_boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1, IO_PAGE_SIZE) >> IO_PAGE_SHIFT; - base_shift = iommu->page_table_map_base >> IO_PAGE_SHIFT; + base_shift = iommu->tbl.table_map_base >> IO_PAGE_SHIFT; for_each_sg(sglist, s, nelems, i) { unsigned long paddr, npages, entry, out_entry = 0, slen; @@ -391,7 +372,8 @@ static int dma_4v_map_sg(struct device *dev, struct scatterlist *sglist, /* Allocate iommu entries for that segment */ paddr = (unsigned long) SG_ENT_PHYS_ADDRESS(s); npages = iommu_num_pages(paddr, slen, IO_PAGE_SIZE); - entry = iommu_range_alloc(dev, iommu, npages, &handle); + entry = iommu_tbl_range_alloc(dev, &iommu->tbl, npages, + &handle, (unsigned long)(-1), 0); /* Handle failure */ if (unlikely(entry == DMA_ERROR_CODE)) { @@ -404,8 +386,7 @@ static int dma_4v_map_sg(struct device *dev, struct scatterlist *sglist, iommu_batch_new_entry(entry); /* Convert entry to a dma_addr_t */ - dma_addr = iommu->page_table_map_base + - (entry << IO_PAGE_SHIFT); + dma_addr = iommu->tbl.table_map_base + (entry << IO_PAGE_SHIFT); dma_addr |= (s->offset & ~IO_PAGE_MASK); /* Insert into HW table */ @@ -451,7 +432,7 @@ static int dma_4v_map_sg(struct device *dev, struct scatterlist *sglist, if (unlikely(err < 0L)) goto iommu_map_failed; - spin_unlock_irqrestore(&iommu->lock, flags); + local_irq_restore(flags); if (outcount < incount) { outs = sg_next(outs); @@ -469,7 +450,8 @@ iommu_map_failed: vaddr = s->dma_address & IO_PAGE_MASK; npages = iommu_num_pages(s->dma_address, s->dma_length, IO_PAGE_SIZE); - iommu_range_free(iommu, vaddr, npages); + iommu_tbl_range_free(&iommu->tbl, vaddr, npages, + DMA_ERROR_CODE); /* XXX demap? XXX */ s->dma_address = DMA_ERROR_CODE; s->dma_length = 0; @@ -477,7 +459,7 @@ iommu_map_failed: if (s == outs) break; } - spin_unlock_irqrestore(&iommu->lock, flags); + local_irq_restore(flags); return 0; } @@ -489,7 +471,7 @@ static void dma_4v_unmap_sg(struct device *dev, struct scatterlist *sglist, struct pci_pbm_info *pbm; struct scatterlist *sg; struct iommu *iommu; - unsigned long flags; + unsigned long flags, entry; u32 devhandle; BUG_ON(direction == DMA_NONE); @@ -498,33 +480,27 @@ static void dma_4v_unmap_sg(struct device *dev, struct scatterlist *sglist, pbm = dev->archdata.host_controller; devhandle = pbm->devhandle; - spin_lock_irqsave(&iommu->lock, flags); + local_irq_save(flags); sg = sglist; while (nelems--) { dma_addr_t dma_handle = sg->dma_address; unsigned int len = sg->dma_length; - unsigned long npages, entry; + unsigned long npages; + struct iommu_map_table *tbl = &iommu->tbl; + unsigned long shift = IO_PAGE_SHIFT; if (!len) break; npages = iommu_num_pages(dma_handle, len, IO_PAGE_SIZE); - iommu_range_free(iommu, dma_handle, npages); - - entry = ((dma_handle - iommu->page_table_map_base) >> IO_PAGE_SHIFT); - while (npages) { - unsigned long num; - - num = pci_sun4v_iommu_demap(devhandle, HV_PCI_TSBID(0, entry), - npages); - entry += num; - npages -= num; - } - + entry = ((dma_handle - tbl->table_map_base) >> shift); + dma_4v_iommu_demap(&devhandle, entry, npages); + iommu_tbl_range_free(&iommu->tbl, dma_handle, npages, + DMA_ERROR_CODE); sg = sg_next(sg); } - spin_unlock_irqrestore(&iommu->lock, flags); + local_irq_restore(flags); } static struct dma_map_ops sun4v_dma_ops = { @@ -550,30 +526,33 @@ static void pci_sun4v_scan_bus(struct pci_pbm_info *pbm, struct device *parent) } static unsigned long probe_existing_entries(struct pci_pbm_info *pbm, - struct iommu *iommu) + struct iommu_map_table *iommu) { - struct iommu_arena *arena = &iommu->arena; - unsigned long i, cnt = 0; + struct iommu_pool *pool; + unsigned long i, pool_nr, cnt = 0; u32 devhandle; devhandle = pbm->devhandle; - for (i = 0; i < arena->limit; i++) { - unsigned long ret, io_attrs, ra; + for (pool_nr = 0; pool_nr < iommu->nr_pools; pool_nr++) { + pool = &(iommu->pools[pool_nr]); + for (i = pool->start; i <= pool->end; i++) { + unsigned long ret, io_attrs, ra; - ret = pci_sun4v_iommu_getmap(devhandle, - HV_PCI_TSBID(0, i), - &io_attrs, &ra); - if (ret == HV_EOK) { - if (page_in_phys_avail(ra)) { - pci_sun4v_iommu_demap(devhandle, - HV_PCI_TSBID(0, i), 1); - } else { - cnt++; - __set_bit(i, arena->map); + ret = pci_sun4v_iommu_getmap(devhandle, + HV_PCI_TSBID(0, i), + &io_attrs, &ra); + if (ret == HV_EOK) { + if (page_in_phys_avail(ra)) { + pci_sun4v_iommu_demap(devhandle, + HV_PCI_TSBID(0, + i), 1); + } else { + cnt++; + __set_bit(i, iommu->map); + } } } } - return cnt; } @@ -603,20 +582,22 @@ static int pci_sun4v_iommu_init(struct pci_pbm_info *pbm) /* Setup initial software IOMMU state. */ spin_lock_init(&iommu->lock); iommu->ctx_lowest_free = 1; - iommu->page_table_map_base = dma_offset; + iommu->tbl.table_map_base = dma_offset; iommu->dma_addr_mask = dma_mask; /* Allocate and initialize the free area map. */ sz = (num_tsb_entries + 7) / 8; sz = (sz + 7UL) & ~7UL; - iommu->arena.map = kzalloc(sz, GFP_KERNEL); - if (!iommu->arena.map) { + iommu->tbl.map = kzalloc(sz, GFP_KERNEL); + if (!iommu->tbl.map) { printk(KERN_ERR PFX "Error, kmalloc(arena.map) failed.\n"); return -ENOMEM; } - iommu->arena.limit = num_tsb_entries; - - sz = probe_existing_entries(pbm, iommu); + iommu_tbl_pool_init(&iommu->tbl, num_tsb_entries, IO_PAGE_SHIFT, + NULL, false /* no large_pool */, + 0 /* default npools */, + false /* want span boundary checking */); + sz = probe_existing_entries(pbm, &iommu->tbl); if (sz) printk("%s: Imported %lu TSB entries from OBP\n", pbm->name, sz); From 0ae53ed15d9b87b883b593a9884957cfa4fc2480 Mon Sep 17 00:00:00 2001 From: Sowmini Varadhan Date: Thu, 9 Apr 2015 15:33:32 -0400 Subject: [PATCH 3/4] sparc: Make LDC use common iommu poll management functions Note that this conversion is only being done to consolidate the code and ensure that the common code provides the sufficient abstraction. It is not expected to result in any noticeable performance improvement, as there is typically one ldc_iommu per vnet_port, and each one has 8k entries, with a typical request for 1-4 pages. Thus LDC uses npools == 1. Signed-off-by: Sowmini Varadhan Acked-by: Benjamin Herrenschmidt Signed-off-by: David S. Miller --- arch/sparc/kernel/ldc.c | 153 +++++++++++++++++----------------------- 1 file changed, 65 insertions(+), 88 deletions(-) diff --git a/arch/sparc/kernel/ldc.c b/arch/sparc/kernel/ldc.c index 274a9f59d95c..d2ae0f70059e 100644 --- a/arch/sparc/kernel/ldc.c +++ b/arch/sparc/kernel/ldc.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -27,6 +28,10 @@ #define DRV_MODULE_VERSION "1.1" #define DRV_MODULE_RELDATE "July 22, 2008" +#define COOKIE_PGSZ_CODE 0xf000000000000000ULL +#define COOKIE_PGSZ_CODE_SHIFT 60ULL + + static char version[] = DRV_MODULE_NAME ".c:v" DRV_MODULE_VERSION " (" DRV_MODULE_RELDATE ")\n"; #define LDC_PACKET_SIZE 64 @@ -98,10 +103,10 @@ static const struct ldc_mode_ops stream_ops; int ldom_domaining_enabled; struct ldc_iommu { - /* Protects arena alloc/free. */ + /* Protects ldc_unmap. */ spinlock_t lock; - struct iommu_arena arena; struct ldc_mtable_entry *page_table; + struct iommu_map_table iommu_map_table; }; struct ldc_channel { @@ -998,31 +1003,59 @@ static void free_queue(unsigned long num_entries, struct ldc_packet *q) free_pages((unsigned long)q, order); } +static unsigned long ldc_cookie_to_index(u64 cookie, void *arg) +{ + u64 szcode = cookie >> COOKIE_PGSZ_CODE_SHIFT; + /* struct ldc_iommu *ldc_iommu = (struct ldc_iommu *)arg; */ + + cookie &= ~COOKIE_PGSZ_CODE; + + return (cookie >> (13ULL + (szcode * 3ULL))); +} + +static void ldc_demap(struct ldc_iommu *iommu, unsigned long id, u64 cookie, + unsigned long entry, unsigned long npages) +{ + struct ldc_mtable_entry *base; + unsigned long i, shift; + + shift = (cookie >> COOKIE_PGSZ_CODE_SHIFT) * 3; + base = iommu->page_table + entry; + for (i = 0; i < npages; i++) { + if (base->cookie) + sun4v_ldc_revoke(id, cookie + (i << shift), + base->cookie); + base->mte = 0; + } +} + /* XXX Make this configurable... XXX */ #define LDC_IOTABLE_SIZE (8 * 1024) -static int ldc_iommu_init(struct ldc_channel *lp) +static int ldc_iommu_init(const char *name, struct ldc_channel *lp) { unsigned long sz, num_tsb_entries, tsbsize, order; - struct ldc_iommu *iommu = &lp->iommu; + struct ldc_iommu *ldc_iommu = &lp->iommu; + struct iommu_map_table *iommu = &ldc_iommu->iommu_map_table; struct ldc_mtable_entry *table; unsigned long hv_err; int err; num_tsb_entries = LDC_IOTABLE_SIZE; tsbsize = num_tsb_entries * sizeof(struct ldc_mtable_entry); - - spin_lock_init(&iommu->lock); + spin_lock_init(&ldc_iommu->lock); sz = num_tsb_entries / 8; sz = (sz + 7UL) & ~7UL; - iommu->arena.map = kzalloc(sz, GFP_KERNEL); - if (!iommu->arena.map) { + iommu->map = kzalloc(sz, GFP_KERNEL); + if (!iommu->map) { printk(KERN_ERR PFX "Alloc of arena map failed, sz=%lu\n", sz); return -ENOMEM; } - - iommu->arena.limit = num_tsb_entries; + iommu_tbl_pool_init(iommu, num_tsb_entries, PAGE_SHIFT, + NULL, false /* no large pool */, + 1 /* npools */, + true /* skip span boundary check */); order = get_order(tsbsize); @@ -1037,7 +1070,7 @@ static int ldc_iommu_init(struct ldc_channel *lp) memset(table, 0, PAGE_SIZE << order); - iommu->page_table = table; + ldc_iommu->page_table = table; hv_err = sun4v_ldc_set_map_table(lp->id, __pa(table), num_tsb_entries); @@ -1049,31 +1082,32 @@ static int ldc_iommu_init(struct ldc_channel *lp) out_free_table: free_pages((unsigned long) table, order); - iommu->page_table = NULL; + ldc_iommu->page_table = NULL; out_free_map: - kfree(iommu->arena.map); - iommu->arena.map = NULL; + kfree(iommu->map); + iommu->map = NULL; return err; } static void ldc_iommu_release(struct ldc_channel *lp) { - struct ldc_iommu *iommu = &lp->iommu; + struct ldc_iommu *ldc_iommu = &lp->iommu; + struct iommu_map_table *iommu = &ldc_iommu->iommu_map_table; unsigned long num_tsb_entries, tsbsize, order; (void) sun4v_ldc_set_map_table(lp->id, 0, 0); - num_tsb_entries = iommu->arena.limit; + num_tsb_entries = iommu->poolsize * iommu->nr_pools; tsbsize = num_tsb_entries * sizeof(struct ldc_mtable_entry); order = get_order(tsbsize); - free_pages((unsigned long) iommu->page_table, order); - iommu->page_table = NULL; + free_pages((unsigned long) ldc_iommu->page_table, order); + ldc_iommu->page_table = NULL; - kfree(iommu->arena.map); - iommu->arena.map = NULL; + kfree(iommu->map); + iommu->map = NULL; } struct ldc_channel *ldc_alloc(unsigned long id, @@ -1140,7 +1174,7 @@ struct ldc_channel *ldc_alloc(unsigned long id, lp->id = id; - err = ldc_iommu_init(lp); + err = ldc_iommu_init(name, lp); if (err) goto out_free_ldc; @@ -1885,40 +1919,6 @@ int ldc_read(struct ldc_channel *lp, void *buf, unsigned int size) } EXPORT_SYMBOL(ldc_read); -static long arena_alloc(struct ldc_iommu *iommu, unsigned long npages) -{ - struct iommu_arena *arena = &iommu->arena; - unsigned long n, start, end, limit; - int pass; - - limit = arena->limit; - start = arena->hint; - pass = 0; - -again: - n = bitmap_find_next_zero_area(arena->map, limit, start, npages, 0); - end = n + npages; - if (unlikely(end >= limit)) { - if (likely(pass < 1)) { - limit = start; - start = 0; - pass++; - goto again; - } else { - /* Scanned the whole thing, give up. */ - return -1; - } - } - bitmap_set(arena->map, n, npages); - - arena->hint = end; - - return n; -} - -#define COOKIE_PGSZ_CODE 0xf000000000000000ULL -#define COOKIE_PGSZ_CODE_SHIFT 60ULL - static u64 pagesize_code(void) { switch (PAGE_SIZE) { @@ -1945,23 +1945,14 @@ static u64 make_cookie(u64 index, u64 pgsz_code, u64 page_offset) page_offset); } -static u64 cookie_to_index(u64 cookie, unsigned long *shift) -{ - u64 szcode = cookie >> COOKIE_PGSZ_CODE_SHIFT; - - cookie &= ~COOKIE_PGSZ_CODE; - - *shift = szcode * 3; - - return (cookie >> (13ULL + (szcode * 3ULL))); -} static struct ldc_mtable_entry *alloc_npages(struct ldc_iommu *iommu, unsigned long npages) { long entry; - entry = arena_alloc(iommu, npages); + entry = iommu_tbl_range_alloc(NULL, &iommu->iommu_map_table, + npages, NULL, (unsigned long)-1, 0); if (unlikely(entry < 0)) return NULL; @@ -2090,7 +2081,7 @@ int ldc_map_sg(struct ldc_channel *lp, struct ldc_trans_cookie *cookies, int ncookies, unsigned int map_perm) { - unsigned long i, npages, flags; + unsigned long i, npages; struct ldc_mtable_entry *base; struct cookie_state state; struct ldc_iommu *iommu; @@ -2109,9 +2100,7 @@ int ldc_map_sg(struct ldc_channel *lp, iommu = &lp->iommu; - spin_lock_irqsave(&iommu->lock, flags); base = alloc_npages(iommu, npages); - spin_unlock_irqrestore(&iommu->lock, flags); if (!base) return -ENOMEM; @@ -2136,7 +2125,7 @@ int ldc_map_single(struct ldc_channel *lp, struct ldc_trans_cookie *cookies, int ncookies, unsigned int map_perm) { - unsigned long npages, pa, flags; + unsigned long npages, pa; struct ldc_mtable_entry *base; struct cookie_state state; struct ldc_iommu *iommu; @@ -2152,9 +2141,7 @@ int ldc_map_single(struct ldc_channel *lp, iommu = &lp->iommu; - spin_lock_irqsave(&iommu->lock, flags); base = alloc_npages(iommu, npages); - spin_unlock_irqrestore(&iommu->lock, flags); if (!base) return -ENOMEM; @@ -2172,35 +2159,25 @@ int ldc_map_single(struct ldc_channel *lp, } EXPORT_SYMBOL(ldc_map_single); + static void free_npages(unsigned long id, struct ldc_iommu *iommu, u64 cookie, u64 size) { - struct iommu_arena *arena = &iommu->arena; - unsigned long i, shift, index, npages; - struct ldc_mtable_entry *base; + unsigned long npages, entry; npages = PAGE_ALIGN(((cookie & ~PAGE_MASK) + size)) >> PAGE_SHIFT; - index = cookie_to_index(cookie, &shift); - base = iommu->page_table + index; - BUG_ON(index > arena->limit || - (index + npages) > arena->limit); - - for (i = 0; i < npages; i++) { - if (base->cookie) - sun4v_ldc_revoke(id, cookie + (i << shift), - base->cookie); - base->mte = 0; - __clear_bit(index + i, arena->map); - } + entry = ldc_cookie_to_index(cookie, iommu); + ldc_demap(iommu, id, cookie, entry, npages); + iommu_tbl_range_free(&iommu->iommu_map_table, cookie, npages, entry); } void ldc_unmap(struct ldc_channel *lp, struct ldc_trans_cookie *cookies, int ncookies) { struct ldc_iommu *iommu = &lp->iommu; - unsigned long flags; int i; + unsigned long flags; spin_lock_irqsave(&iommu->lock, flags); for (i = 0; i < ncookies; i++) { From 2f0c0fdc085c0d415457a1c52344f72e12c4cec6 Mon Sep 17 00:00:00 2001 From: Sowmini Varadhan Date: Sat, 18 Apr 2015 12:33:55 -0700 Subject: [PATCH 4/4] iommu-common: Fix PARISC compile-time warnings Fixes warnings due to - no DMA_ERROR_CODE on PARISC, - sizeof (unsigned long) == 4 bytes on PARISC. Signed-off-by: Sowmini Varadhan Signed-off-by: David S. Miller --- lib/iommu-common.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/lib/iommu-common.c b/lib/iommu-common.c index b99f1d744a8d..a1a517cba7ec 100644 --- a/lib/iommu-common.c +++ b/lib/iommu-common.c @@ -11,6 +11,10 @@ #include #include +#ifndef DMA_ERROR_CODE +#define DMA_ERROR_CODE (~(dma_addr_t)0x0) +#endif + unsigned long iommu_large_alloc = 15; static DEFINE_PER_CPU(unsigned int, iommu_pool_hash); @@ -171,7 +175,7 @@ unsigned long iommu_tbl_range_alloc(struct device *dev, boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1, 1 << iommu->table_shift); else - boundary_size = ALIGN(1UL << 32, 1 << iommu->table_shift); + boundary_size = ALIGN(1ULL << 32, 1 << iommu->table_shift); boundary_size = boundary_size >> iommu->table_shift; /*