linux/mm/sparse-vmemmap.c
Anshuman Khandual 1d9cfee753 mm/sparsemem: enable vmem_altmap support in vmemmap_populate_basepages()
Patch series "arm64: Enable vmemmap mapping from device memory", v4.

This series enables vmemmap backing memory allocation from device memory
ranges on arm64.  But before that, it enables vmemmap_populate_basepages()
and vmemmap_alloc_block_buf() to accommodate struct vmem_altmap based
alocation requests.

This patch (of 3):

vmemmap_populate_basepages() is used across platforms to allocate backing
memory for vmemmap mapping.  This is used as a standard default choice or
as a fallback when intended huge pages allocation fails.  This just
creates entire vmemmap mapping with base pages (PAGE_SIZE).

On arm64 platforms, vmemmap_populate_basepages() is called instead of the
platform specific vmemmap_populate() when ARM64_SWAPPER_USES_SECTION_MAPS
is not enabled as in case for ARM64_16K_PAGES and ARM64_64K_PAGES configs.

At present vmemmap_populate_basepages() does not support allocating from
driver defined struct vmem_altmap while trying to create vmemmap mapping
for a device memory range.  It prevents ARM64_16K_PAGES and
ARM64_64K_PAGES configs on arm64 from supporting device memory with
vmemap_altmap request.

This enables vmem_altmap support in vmemmap_populate_basepages() unlocking
device memory allocation for vmemap mapping on arm64 platforms with 16K or
64K base page configs.

Each architecture should evaluate and decide on subscribing device memory
based base page allocation through vmemmap_populate_basepages().  Hence
lets keep it disabled on all archs in order to preserve the existing
semantics.  A subsequent patch enables it on arm64.

Signed-off-by: Anshuman Khandual <anshuman.khandual@arm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Tested-by: Jia He <justin.he@arm.com>
Reviewed-by: David Hildenbrand <david@redhat.com>
Acked-by: Will Deacon <will@kernel.org>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Paul Walmsley <paul.walmsley@sifive.com>
Cc: Palmer Dabbelt <palmer@dabbelt.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Mike Rapoport <rppt@linux.ibm.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Pavel Tatashin <pasha.tatashin@soleen.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Hsin-Yi Wang <hsinyi@chromium.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Robin Murphy <robin.murphy@arm.com>
Cc: Steve Capper <steve.capper@arm.com>
Cc: Yu Zhao <yuzhao@google.com>
Link: http://lkml.kernel.org/r/1594004178-8861-1-git-send-email-anshuman.khandual@arm.com
Link: http://lkml.kernel.org/r/1594004178-8861-2-git-send-email-anshuman.khandual@arm.com
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2020-08-07 11:33:27 -07:00

276 lines
7.0 KiB
C

// SPDX-License-Identifier: GPL-2.0
/*
* Virtual Memory Map support
*
* (C) 2007 sgi. Christoph Lameter.
*
* Virtual memory maps allow VM primitives pfn_to_page, page_to_pfn,
* virt_to_page, page_address() to be implemented as a base offset
* calculation without memory access.
*
* However, virtual mappings need a page table and TLBs. Many Linux
* architectures already map their physical space using 1-1 mappings
* via TLBs. For those arches the virtual memory map is essentially
* for free if we use the same page size as the 1-1 mappings. In that
* case the overhead consists of a few additional pages that are
* allocated to create a view of memory for vmemmap.
*
* The architecture is expected to provide a vmemmap_populate() function
* to instantiate the mapping.
*/
#include <linux/mm.h>
#include <linux/mmzone.h>
#include <linux/memblock.h>
#include <linux/memremap.h>
#include <linux/highmem.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/vmalloc.h>
#include <linux/sched.h>
#include <asm/dma.h>
#include <asm/pgalloc.h>
/*
* Allocate a block of memory to be used to back the virtual memory map
* or to back the page tables that are used to create the mapping.
* Uses the main allocators if they are available, else bootmem.
*/
static void * __ref __earlyonly_bootmem_alloc(int node,
unsigned long size,
unsigned long align,
unsigned long goal)
{
return memblock_alloc_try_nid_raw(size, align, goal,
MEMBLOCK_ALLOC_ACCESSIBLE, node);
}
void * __meminit vmemmap_alloc_block(unsigned long size, int node)
{
/* If the main allocator is up use that, fallback to bootmem. */
if (slab_is_available()) {
gfp_t gfp_mask = GFP_KERNEL|__GFP_RETRY_MAYFAIL|__GFP_NOWARN;
int order = get_order(size);
static bool warned;
struct page *page;
page = alloc_pages_node(node, gfp_mask, order);
if (page)
return page_address(page);
if (!warned) {
warn_alloc(gfp_mask & ~__GFP_NOWARN, NULL,
"vmemmap alloc failure: order:%u", order);
warned = true;
}
return NULL;
} else
return __earlyonly_bootmem_alloc(node, size, size,
__pa(MAX_DMA_ADDRESS));
}
/* need to make sure size is all the same during early stage */
void * __meminit vmemmap_alloc_block_buf(unsigned long size, int node)
{
void *ptr = sparse_buffer_alloc(size);
if (!ptr)
ptr = vmemmap_alloc_block(size, node);
return ptr;
}
static unsigned long __meminit vmem_altmap_next_pfn(struct vmem_altmap *altmap)
{
return altmap->base_pfn + altmap->reserve + altmap->alloc
+ altmap->align;
}
static unsigned long __meminit vmem_altmap_nr_free(struct vmem_altmap *altmap)
{
unsigned long allocated = altmap->alloc + altmap->align;
if (altmap->free > allocated)
return altmap->free - allocated;
return 0;
}
/**
* altmap_alloc_block_buf - allocate pages from the device page map
* @altmap: device page map
* @size: size (in bytes) of the allocation
*
* Allocations are aligned to the size of the request.
*/
void * __meminit altmap_alloc_block_buf(unsigned long size,
struct vmem_altmap *altmap)
{
unsigned long pfn, nr_pfns, nr_align;
if (size & ~PAGE_MASK) {
pr_warn_once("%s: allocations must be multiple of PAGE_SIZE (%ld)\n",
__func__, size);
return NULL;
}
pfn = vmem_altmap_next_pfn(altmap);
nr_pfns = size >> PAGE_SHIFT;
nr_align = 1UL << find_first_bit(&nr_pfns, BITS_PER_LONG);
nr_align = ALIGN(pfn, nr_align) - pfn;
if (nr_pfns + nr_align > vmem_altmap_nr_free(altmap))
return NULL;
altmap->alloc += nr_pfns;
altmap->align += nr_align;
pfn += nr_align;
pr_debug("%s: pfn: %#lx alloc: %ld align: %ld nr: %#lx\n",
__func__, pfn, altmap->alloc, altmap->align, nr_pfns);
return __va(__pfn_to_phys(pfn));
}
void __meminit vmemmap_verify(pte_t *pte, int node,
unsigned long start, unsigned long end)
{
unsigned long pfn = pte_pfn(*pte);
int actual_node = early_pfn_to_nid(pfn);
if (node_distance(actual_node, node) > LOCAL_DISTANCE)
pr_warn("[%lx-%lx] potential offnode page_structs\n",
start, end - 1);
}
pte_t * __meminit vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node,
struct vmem_altmap *altmap)
{
pte_t *pte = pte_offset_kernel(pmd, addr);
if (pte_none(*pte)) {
pte_t entry;
void *p;
if (altmap)
p = altmap_alloc_block_buf(PAGE_SIZE, altmap);
else
p = vmemmap_alloc_block_buf(PAGE_SIZE, node);
if (!p)
return NULL;
entry = pfn_pte(__pa(p) >> PAGE_SHIFT, PAGE_KERNEL);
set_pte_at(&init_mm, addr, pte, entry);
}
return pte;
}
static void * __meminit vmemmap_alloc_block_zero(unsigned long size, int node)
{
void *p = vmemmap_alloc_block(size, node);
if (!p)
return NULL;
memset(p, 0, size);
return p;
}
pmd_t * __meminit vmemmap_pmd_populate(pud_t *pud, unsigned long addr, int node)
{
pmd_t *pmd = pmd_offset(pud, addr);
if (pmd_none(*pmd)) {
void *p = vmemmap_alloc_block_zero(PAGE_SIZE, node);
if (!p)
return NULL;
pmd_populate_kernel(&init_mm, pmd, p);
}
return pmd;
}
pud_t * __meminit vmemmap_pud_populate(p4d_t *p4d, unsigned long addr, int node)
{
pud_t *pud = pud_offset(p4d, addr);
if (pud_none(*pud)) {
void *p = vmemmap_alloc_block_zero(PAGE_SIZE, node);
if (!p)
return NULL;
pud_populate(&init_mm, pud, p);
}
return pud;
}
p4d_t * __meminit vmemmap_p4d_populate(pgd_t *pgd, unsigned long addr, int node)
{
p4d_t *p4d = p4d_offset(pgd, addr);
if (p4d_none(*p4d)) {
void *p = vmemmap_alloc_block_zero(PAGE_SIZE, node);
if (!p)
return NULL;
p4d_populate(&init_mm, p4d, p);
}
return p4d;
}
pgd_t * __meminit vmemmap_pgd_populate(unsigned long addr, int node)
{
pgd_t *pgd = pgd_offset_k(addr);
if (pgd_none(*pgd)) {
void *p = vmemmap_alloc_block_zero(PAGE_SIZE, node);
if (!p)
return NULL;
pgd_populate(&init_mm, pgd, p);
}
return pgd;
}
int __meminit vmemmap_populate_basepages(unsigned long start, unsigned long end,
int node, struct vmem_altmap *altmap)
{
unsigned long addr = start;
pgd_t *pgd;
p4d_t *p4d;
pud_t *pud;
pmd_t *pmd;
pte_t *pte;
for (; addr < end; addr += PAGE_SIZE) {
pgd = vmemmap_pgd_populate(addr, node);
if (!pgd)
return -ENOMEM;
p4d = vmemmap_p4d_populate(pgd, addr, node);
if (!p4d)
return -ENOMEM;
pud = vmemmap_pud_populate(p4d, addr, node);
if (!pud)
return -ENOMEM;
pmd = vmemmap_pmd_populate(pud, addr, node);
if (!pmd)
return -ENOMEM;
pte = vmemmap_pte_populate(pmd, addr, node, altmap);
if (!pte)
return -ENOMEM;
vmemmap_verify(pte, node, addr, addr + PAGE_SIZE);
}
return 0;
}
struct page * __meminit __populate_section_memmap(unsigned long pfn,
unsigned long nr_pages, int nid, struct vmem_altmap *altmap)
{
unsigned long start;
unsigned long end;
/*
* The minimum granularity of memmap extensions is
* PAGES_PER_SUBSECTION as allocations are tracked in the
* 'subsection_map' bitmap of the section.
*/
end = ALIGN(pfn + nr_pages, PAGES_PER_SUBSECTION);
pfn &= PAGE_SUBSECTION_MASK;
nr_pages = end - pfn;
start = (unsigned long) pfn_to_page(pfn);
end = start + nr_pages * sizeof(struct page);
if (vmemmap_populate(start, end, nid, altmap))
return NULL;
return pfn_to_page(pfn);
}