dma-pool: add additional coherent pools to map to gfp mask

The single atomic pool is allocated from the lowest zone possible since
it is guaranteed to be applicable for any DMA allocation.

Devices may allocate through the DMA API but not have a strict reliance
on GFP_DMA memory.  Since the atomic pool will be used for all
non-blockable allocations, returning all memory from ZONE_DMA may
unnecessarily deplete the zone.

Provision for multiple atomic pools that will map to the optimal gfp
mask of the device.

When allocating non-blockable memory, determine the optimal gfp mask of
the device and use the appropriate atomic pool.

The coherent DMA mask will remain the same between allocation and free
and, thus, memory will be freed to the same atomic pool it was allocated
from.

__dma_atomic_pool_init() will be changed to return struct gen_pool *
later once dynamic expansion is added.

Signed-off-by: David Rientjes <rientjes@google.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
This commit is contained in:
David Rientjes 2020-04-14 17:04:55 -07:00 committed by Christoph Hellwig
parent e860c299ac
commit c84dc6e68a
5 changed files with 91 additions and 54 deletions

View File

@ -952,7 +952,7 @@ static void __iommu_dma_free(struct device *dev, size_t size, void *cpu_addr)
/* Non-coherent atomic allocation? Easy */ /* Non-coherent atomic allocation? Easy */
if (IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) && if (IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) &&
dma_free_from_pool(cpu_addr, alloc_size)) dma_free_from_pool(dev, cpu_addr, alloc_size))
return; return;
if (IS_ENABLED(CONFIG_DMA_REMAP) && is_vmalloc_addr(cpu_addr)) { if (IS_ENABLED(CONFIG_DMA_REMAP) && is_vmalloc_addr(cpu_addr)) {
@ -1035,7 +1035,8 @@ static void *iommu_dma_alloc(struct device *dev, size_t size,
if (IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) && if (IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) &&
!gfpflags_allow_blocking(gfp) && !coherent) !gfpflags_allow_blocking(gfp) && !coherent)
cpu_addr = dma_alloc_from_pool(PAGE_ALIGN(size), &page, gfp); cpu_addr = dma_alloc_from_pool(dev, PAGE_ALIGN(size), &page,
gfp);
else else
cpu_addr = iommu_dma_alloc_pages(dev, size, &page, gfp, attrs); cpu_addr = iommu_dma_alloc_pages(dev, size, &page, gfp, attrs);
if (!cpu_addr) if (!cpu_addr)

View File

@ -67,6 +67,8 @@ static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size,
} }
u64 dma_direct_get_required_mask(struct device *dev); u64 dma_direct_get_required_mask(struct device *dev);
gfp_t dma_direct_optimal_gfp_mask(struct device *dev, u64 dma_mask,
u64 *phys_mask);
void *dma_direct_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, void *dma_direct_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
gfp_t gfp, unsigned long attrs); gfp_t gfp, unsigned long attrs);
void dma_direct_free(struct device *dev, size_t size, void *cpu_addr, void dma_direct_free(struct device *dev, size_t size, void *cpu_addr,

View File

@ -630,9 +630,9 @@ void *dma_common_pages_remap(struct page **pages, size_t size,
pgprot_t prot, const void *caller); pgprot_t prot, const void *caller);
void dma_common_free_remap(void *cpu_addr, size_t size); void dma_common_free_remap(void *cpu_addr, size_t size);
bool dma_in_atomic_pool(void *start, size_t size); void *dma_alloc_from_pool(struct device *dev, size_t size,
void *dma_alloc_from_pool(size_t size, struct page **ret_page, gfp_t flags); struct page **ret_page, gfp_t flags);
bool dma_free_from_pool(void *start, size_t size); bool dma_free_from_pool(struct device *dev, void *start, size_t size);
int int
dma_common_get_sgtable(struct device *dev, struct sg_table *sgt, void *cpu_addr, dma_common_get_sgtable(struct device *dev, struct sg_table *sgt, void *cpu_addr,

View File

@ -45,7 +45,7 @@ u64 dma_direct_get_required_mask(struct device *dev)
return (1ULL << (fls64(max_dma) - 1)) * 2 - 1; return (1ULL << (fls64(max_dma) - 1)) * 2 - 1;
} }
static gfp_t __dma_direct_optimal_gfp_mask(struct device *dev, u64 dma_mask, gfp_t dma_direct_optimal_gfp_mask(struct device *dev, u64 dma_mask,
u64 *phys_limit) u64 *phys_limit)
{ {
u64 dma_limit = min_not_zero(dma_mask, dev->bus_dma_limit); u64 dma_limit = min_not_zero(dma_mask, dev->bus_dma_limit);
@ -89,7 +89,7 @@ struct page *__dma_direct_alloc_pages(struct device *dev, size_t size,
/* we always manually zero the memory once we are done: */ /* we always manually zero the memory once we are done: */
gfp &= ~__GFP_ZERO; gfp &= ~__GFP_ZERO;
gfp |= __dma_direct_optimal_gfp_mask(dev, dev->coherent_dma_mask, gfp |= dma_direct_optimal_gfp_mask(dev, dev->coherent_dma_mask,
&phys_limit); &phys_limit);
page = dma_alloc_contiguous(dev, alloc_size, gfp); page = dma_alloc_contiguous(dev, alloc_size, gfp);
if (page && !dma_coherent_ok(dev, page_to_phys(page), size)) { if (page && !dma_coherent_ok(dev, page_to_phys(page), size)) {
@ -128,7 +128,7 @@ void *dma_direct_alloc_pages(struct device *dev, size_t size,
if (IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) && if (IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) &&
dma_alloc_need_uncached(dev, attrs) && dma_alloc_need_uncached(dev, attrs) &&
!gfpflags_allow_blocking(gfp)) { !gfpflags_allow_blocking(gfp)) {
ret = dma_alloc_from_pool(PAGE_ALIGN(size), &page, gfp); ret = dma_alloc_from_pool(dev, PAGE_ALIGN(size), &page, gfp);
if (!ret) if (!ret)
return NULL; return NULL;
goto done; goto done;
@ -212,7 +212,7 @@ void dma_direct_free_pages(struct device *dev, size_t size, void *cpu_addr,
} }
if (IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) && if (IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) &&
dma_free_from_pool(cpu_addr, PAGE_ALIGN(size))) dma_free_from_pool(dev, cpu_addr, PAGE_ALIGN(size)))
return; return;
if (force_dma_unencrypted(dev)) if (force_dma_unencrypted(dev))

View File

@ -10,7 +10,9 @@
#include <linux/genalloc.h> #include <linux/genalloc.h>
#include <linux/slab.h> #include <linux/slab.h>
static struct gen_pool *atomic_pool __ro_after_init; static struct gen_pool *atomic_pool_dma __ro_after_init;
static struct gen_pool *atomic_pool_dma32 __ro_after_init;
static struct gen_pool *atomic_pool_kernel __ro_after_init;
#define DEFAULT_DMA_COHERENT_POOL_SIZE SZ_256K #define DEFAULT_DMA_COHERENT_POOL_SIZE SZ_256K
static size_t atomic_pool_size __initdata = DEFAULT_DMA_COHERENT_POOL_SIZE; static size_t atomic_pool_size __initdata = DEFAULT_DMA_COHERENT_POOL_SIZE;
@ -22,89 +24,119 @@ static int __init early_coherent_pool(char *p)
} }
early_param("coherent_pool", early_coherent_pool); early_param("coherent_pool", early_coherent_pool);
static gfp_t dma_atomic_pool_gfp(void) static int __init __dma_atomic_pool_init(struct gen_pool **pool,
size_t pool_size, gfp_t gfp)
{ {
if (IS_ENABLED(CONFIG_ZONE_DMA)) const unsigned int order = get_order(pool_size);
return GFP_DMA; const unsigned long nr_pages = pool_size >> PAGE_SHIFT;
if (IS_ENABLED(CONFIG_ZONE_DMA32))
return GFP_DMA32;
return GFP_KERNEL;
}
static int __init dma_atomic_pool_init(void)
{
unsigned int pool_size_order = get_order(atomic_pool_size);
unsigned long nr_pages = atomic_pool_size >> PAGE_SHIFT;
struct page *page; struct page *page;
void *addr; void *addr;
int ret; int ret;
if (dev_get_cma_area(NULL)) if (dev_get_cma_area(NULL))
page = dma_alloc_from_contiguous(NULL, nr_pages, page = dma_alloc_from_contiguous(NULL, nr_pages, order, false);
pool_size_order, false);
else else
page = alloc_pages(dma_atomic_pool_gfp(), pool_size_order); page = alloc_pages(gfp, order);
if (!page) if (!page)
goto out; goto out;
arch_dma_prep_coherent(page, atomic_pool_size); arch_dma_prep_coherent(page, pool_size);
atomic_pool = gen_pool_create(PAGE_SHIFT, -1); *pool = gen_pool_create(PAGE_SHIFT, -1);
if (!atomic_pool) if (!*pool)
goto free_page; goto free_page;
addr = dma_common_contiguous_remap(page, atomic_pool_size, addr = dma_common_contiguous_remap(page, pool_size,
pgprot_dmacoherent(PAGE_KERNEL), pgprot_dmacoherent(PAGE_KERNEL),
__builtin_return_address(0)); __builtin_return_address(0));
if (!addr) if (!addr)
goto destroy_genpool; goto destroy_genpool;
ret = gen_pool_add_virt(atomic_pool, (unsigned long)addr, ret = gen_pool_add_virt(*pool, (unsigned long)addr, page_to_phys(page),
page_to_phys(page), atomic_pool_size, -1); pool_size, -1);
if (ret) if (ret)
goto remove_mapping; goto remove_mapping;
gen_pool_set_algo(atomic_pool, gen_pool_first_fit_order_align, NULL); gen_pool_set_algo(*pool, gen_pool_first_fit_order_align, NULL);
pr_info("DMA: preallocated %zu KiB pool for atomic allocations\n", pr_info("DMA: preallocated %zu KiB %pGg pool for atomic allocations\n",
atomic_pool_size / 1024); pool_size >> 10, &gfp);
return 0; return 0;
remove_mapping: remove_mapping:
dma_common_free_remap(addr, atomic_pool_size); dma_common_free_remap(addr, pool_size);
destroy_genpool: destroy_genpool:
gen_pool_destroy(atomic_pool); gen_pool_destroy(*pool);
atomic_pool = NULL; *pool = NULL;
free_page: free_page:
if (!dma_release_from_contiguous(NULL, page, nr_pages)) if (!dma_release_from_contiguous(NULL, page, nr_pages))
__free_pages(page, pool_size_order); __free_pages(page, order);
out: out:
pr_err("DMA: failed to allocate %zu KiB pool for atomic coherent allocation\n", pr_err("DMA: failed to allocate %zu KiB %pGg pool for atomic allocation\n",
atomic_pool_size / 1024); pool_size >> 10, &gfp);
return -ENOMEM; return -ENOMEM;
} }
static int __init dma_atomic_pool_init(void)
{
int ret = 0;
int err;
ret = __dma_atomic_pool_init(&atomic_pool_kernel, atomic_pool_size,
GFP_KERNEL);
if (IS_ENABLED(CONFIG_ZONE_DMA)) {
err = __dma_atomic_pool_init(&atomic_pool_dma,
atomic_pool_size, GFP_DMA);
if (!ret && err)
ret = err;
}
if (IS_ENABLED(CONFIG_ZONE_DMA32)) {
err = __dma_atomic_pool_init(&atomic_pool_dma32,
atomic_pool_size, GFP_DMA32);
if (!ret && err)
ret = err;
}
return ret;
}
postcore_initcall(dma_atomic_pool_init); postcore_initcall(dma_atomic_pool_init);
bool dma_in_atomic_pool(void *start, size_t size) static inline struct gen_pool *dev_to_pool(struct device *dev)
{ {
if (unlikely(!atomic_pool)) u64 phys_mask;
return false; gfp_t gfp;
return gen_pool_has_addr(atomic_pool, (unsigned long)start, size); gfp = dma_direct_optimal_gfp_mask(dev, dev->coherent_dma_mask,
&phys_mask);
if (IS_ENABLED(CONFIG_ZONE_DMA) && gfp == GFP_DMA)
return atomic_pool_dma;
if (IS_ENABLED(CONFIG_ZONE_DMA32) && gfp == GFP_DMA32)
return atomic_pool_dma32;
return atomic_pool_kernel;
} }
void *dma_alloc_from_pool(size_t size, struct page **ret_page, gfp_t flags) static bool dma_in_atomic_pool(struct device *dev, void *start, size_t size)
{ {
struct gen_pool *pool = dev_to_pool(dev);
if (unlikely(!pool))
return false;
return gen_pool_has_addr(pool, (unsigned long)start, size);
}
void *dma_alloc_from_pool(struct device *dev, size_t size,
struct page **ret_page, gfp_t flags)
{
struct gen_pool *pool = dev_to_pool(dev);
unsigned long val; unsigned long val;
void *ptr = NULL; void *ptr = NULL;
if (!atomic_pool) { if (!pool) {
WARN(1, "coherent pool not initialised!\n"); WARN(1, "%pGg atomic pool not initialised!\n", &flags);
return NULL; return NULL;
} }
val = gen_pool_alloc(atomic_pool, size); val = gen_pool_alloc(pool, size);
if (val) { if (val) {
phys_addr_t phys = gen_pool_virt_to_phys(atomic_pool, val); phys_addr_t phys = gen_pool_virt_to_phys(pool, val);
*ret_page = pfn_to_page(__phys_to_pfn(phys)); *ret_page = pfn_to_page(__phys_to_pfn(phys));
ptr = (void *)val; ptr = (void *)val;
@ -114,10 +146,12 @@ void *dma_alloc_from_pool(size_t size, struct page **ret_page, gfp_t flags)
return ptr; return ptr;
} }
bool dma_free_from_pool(void *start, size_t size) bool dma_free_from_pool(struct device *dev, void *start, size_t size)
{ {
if (!dma_in_atomic_pool(start, size)) struct gen_pool *pool = dev_to_pool(dev);
if (!dma_in_atomic_pool(dev, start, size))
return false; return false;
gen_pool_free(atomic_pool, (unsigned long)start, size); gen_pool_free(pool, (unsigned long)start, size);
return true; return true;
} }