linux/arch/arm/mm/init.c
Will Deacon 7b7bf499f7 ARM: 6913/1: sparsemem: allow pfn_valid to be overridden when using SPARSEMEM
In commit eb33575c ("[ARM] Double check memmap is actually valid with a
memmap has unexpected holes V2"), a new function, memmap_valid_within,
was introduced to mmzone.h so that holes in the memmap which pass
pfn_valid in SPARSEMEM configurations can be detected and avoided.

The fix to this problem checks that the pfn <-> page linkages are
correct by calculating the page for the pfn and then checking that
page_to_pfn on that page returns the original pfn. Unfortunately, in
SPARSEMEM configurations, this results in reading from the page flags to
determine the correct section. Since the memmap here has been freed,
junk is read from memory and the check is no longer robust.

In the best case, reading from /proc/pagetypeinfo will give you the
wrong answer. In the worst case, you get SEGVs, Kernel OOPses and hung
CPUs. Furthermore, ioremap implementations that use pfn_valid to
disallow the remapping of normal memory will break.

This patch allows architectures to provide their own pfn_valid function
instead of using the default implementation used by sparsemem. The
architecture-specific version is aware of the memmap state and will
return false when passed a pfn for a freed page within a valid section.

Acked-by: Mel Gorman <mgorman@suse.de>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Tested-by: H Hartley Sweeten <hsweeten@visionengravers.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
2011-05-26 10:23:24 +01:00

719 lines
18 KiB
C

/*
* linux/arch/arm/mm/init.c
*
* Copyright (C) 1995-2005 Russell King
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <linux/kernel.h>
#include <linux/errno.h>
#include <linux/swap.h>
#include <linux/init.h>
#include <linux/bootmem.h>
#include <linux/mman.h>
#include <linux/nodemask.h>
#include <linux/initrd.h>
#include <linux/highmem.h>
#include <linux/gfp.h>
#include <linux/memblock.h>
#include <linux/sort.h>
#include <asm/mach-types.h>
#include <asm/sections.h>
#include <asm/setup.h>
#include <asm/sizes.h>
#include <asm/tlb.h>
#include <asm/fixmap.h>
#include <asm/mach/arch.h>
#include <asm/mach/map.h>
#include "mm.h"
static unsigned long phys_initrd_start __initdata = 0;
static unsigned long phys_initrd_size __initdata = 0;
static int __init early_initrd(char *p)
{
unsigned long start, size;
char *endp;
start = memparse(p, &endp);
if (*endp == ',') {
size = memparse(endp + 1, NULL);
phys_initrd_start = start;
phys_initrd_size = size;
}
return 0;
}
early_param("initrd", early_initrd);
static int __init parse_tag_initrd(const struct tag *tag)
{
printk(KERN_WARNING "ATAG_INITRD is deprecated; "
"please update your bootloader.\n");
phys_initrd_start = __virt_to_phys(tag->u.initrd.start);
phys_initrd_size = tag->u.initrd.size;
return 0;
}
__tagtable(ATAG_INITRD, parse_tag_initrd);
static int __init parse_tag_initrd2(const struct tag *tag)
{
phys_initrd_start = tag->u.initrd.start;
phys_initrd_size = tag->u.initrd.size;
return 0;
}
__tagtable(ATAG_INITRD2, parse_tag_initrd2);
/*
* This keeps memory configuration data used by a couple memory
* initialization functions, as well as show_mem() for the skipping
* of holes in the memory map. It is populated by arm_add_memory().
*/
struct meminfo meminfo;
void show_mem(unsigned int filter)
{
int free = 0, total = 0, reserved = 0;
int shared = 0, cached = 0, slab = 0, i;
struct meminfo * mi = &meminfo;
printk("Mem-info:\n");
show_free_areas(filter);
for_each_bank (i, mi) {
struct membank *bank = &mi->bank[i];
unsigned int pfn1, pfn2;
struct page *page, *end;
pfn1 = bank_pfn_start(bank);
pfn2 = bank_pfn_end(bank);
page = pfn_to_page(pfn1);
end = pfn_to_page(pfn2 - 1) + 1;
do {
total++;
if (PageReserved(page))
reserved++;
else if (PageSwapCache(page))
cached++;
else if (PageSlab(page))
slab++;
else if (!page_count(page))
free++;
else
shared += page_count(page) - 1;
page++;
} while (page < end);
}
printk("%d pages of RAM\n", total);
printk("%d free pages\n", free);
printk("%d reserved pages\n", reserved);
printk("%d slab pages\n", slab);
printk("%d pages shared\n", shared);
printk("%d pages swap cached\n", cached);
}
static void __init find_limits(unsigned long *min, unsigned long *max_low,
unsigned long *max_high)
{
struct meminfo *mi = &meminfo;
int i;
*min = -1UL;
*max_low = *max_high = 0;
for_each_bank (i, mi) {
struct membank *bank = &mi->bank[i];
unsigned long start, end;
start = bank_pfn_start(bank);
end = bank_pfn_end(bank);
if (*min > start)
*min = start;
if (*max_high < end)
*max_high = end;
if (bank->highmem)
continue;
if (*max_low < end)
*max_low = end;
}
}
static void __init arm_bootmem_init(unsigned long start_pfn,
unsigned long end_pfn)
{
struct memblock_region *reg;
unsigned int boot_pages;
phys_addr_t bitmap;
pg_data_t *pgdat;
/*
* Allocate the bootmem bitmap page. This must be in a region
* of memory which has already been mapped.
*/
boot_pages = bootmem_bootmap_pages(end_pfn - start_pfn);
bitmap = memblock_alloc_base(boot_pages << PAGE_SHIFT, L1_CACHE_BYTES,
__pfn_to_phys(end_pfn));
/*
* Initialise the bootmem allocator, handing the
* memory banks over to bootmem.
*/
node_set_online(0);
pgdat = NODE_DATA(0);
init_bootmem_node(pgdat, __phys_to_pfn(bitmap), start_pfn, end_pfn);
/* Free the lowmem regions from memblock into bootmem. */
for_each_memblock(memory, reg) {
unsigned long start = memblock_region_memory_base_pfn(reg);
unsigned long end = memblock_region_memory_end_pfn(reg);
if (end >= end_pfn)
end = end_pfn;
if (start >= end)
break;
free_bootmem(__pfn_to_phys(start), (end - start) << PAGE_SHIFT);
}
/* Reserve the lowmem memblock reserved regions in bootmem. */
for_each_memblock(reserved, reg) {
unsigned long start = memblock_region_reserved_base_pfn(reg);
unsigned long end = memblock_region_reserved_end_pfn(reg);
if (end >= end_pfn)
end = end_pfn;
if (start >= end)
break;
reserve_bootmem(__pfn_to_phys(start),
(end - start) << PAGE_SHIFT, BOOTMEM_DEFAULT);
}
}
#ifdef CONFIG_ZONE_DMA
static void __init arm_adjust_dma_zone(unsigned long *size, unsigned long *hole,
unsigned long dma_size)
{
if (size[0] <= dma_size)
return;
size[ZONE_NORMAL] = size[0] - dma_size;
size[ZONE_DMA] = dma_size;
hole[ZONE_NORMAL] = hole[0];
hole[ZONE_DMA] = 0;
}
#endif
static void __init arm_bootmem_free(unsigned long min, unsigned long max_low,
unsigned long max_high)
{
unsigned long zone_size[MAX_NR_ZONES], zhole_size[MAX_NR_ZONES];
struct memblock_region *reg;
/*
* initialise the zones.
*/
memset(zone_size, 0, sizeof(zone_size));
/*
* The memory size has already been determined. If we need
* to do anything fancy with the allocation of this memory
* to the zones, now is the time to do it.
*/
zone_size[0] = max_low - min;
#ifdef CONFIG_HIGHMEM
zone_size[ZONE_HIGHMEM] = max_high - max_low;
#endif
/*
* Calculate the size of the holes.
* holes = node_size - sum(bank_sizes)
*/
memcpy(zhole_size, zone_size, sizeof(zhole_size));
for_each_memblock(memory, reg) {
unsigned long start = memblock_region_memory_base_pfn(reg);
unsigned long end = memblock_region_memory_end_pfn(reg);
if (start < max_low) {
unsigned long low_end = min(end, max_low);
zhole_size[0] -= low_end - start;
}
#ifdef CONFIG_HIGHMEM
if (end > max_low) {
unsigned long high_start = max(start, max_low);
zhole_size[ZONE_HIGHMEM] -= end - high_start;
}
#endif
}
#ifdef ARM_DMA_ZONE_SIZE
#ifndef CONFIG_ZONE_DMA
#error ARM_DMA_ZONE_SIZE set but no DMA zone to limit allocations
#endif
/*
* Adjust the sizes according to any special requirements for
* this machine type.
*/
arm_adjust_dma_zone(zone_size, zhole_size,
ARM_DMA_ZONE_SIZE >> PAGE_SHIFT);
#endif
free_area_init_node(0, zone_size, min, zhole_size);
}
#ifdef CONFIG_HAVE_ARCH_PFN_VALID
int pfn_valid(unsigned long pfn)
{
return memblock_is_memory(pfn << PAGE_SHIFT);
}
EXPORT_SYMBOL(pfn_valid);
#endif
#ifndef CONFIG_SPARSEMEM
static void arm_memory_present(void)
{
}
#else
static void arm_memory_present(void)
{
struct memblock_region *reg;
for_each_memblock(memory, reg)
memory_present(0, memblock_region_memory_base_pfn(reg),
memblock_region_memory_end_pfn(reg));
}
#endif
static int __init meminfo_cmp(const void *_a, const void *_b)
{
const struct membank *a = _a, *b = _b;
long cmp = bank_pfn_start(a) - bank_pfn_start(b);
return cmp < 0 ? -1 : cmp > 0 ? 1 : 0;
}
void __init arm_memblock_init(struct meminfo *mi, struct machine_desc *mdesc)
{
int i;
sort(&meminfo.bank, meminfo.nr_banks, sizeof(meminfo.bank[0]), meminfo_cmp, NULL);
memblock_init();
for (i = 0; i < mi->nr_banks; i++)
memblock_add(mi->bank[i].start, mi->bank[i].size);
/* Register the kernel text, kernel data and initrd with memblock. */
#ifdef CONFIG_XIP_KERNEL
memblock_reserve(__pa(_sdata), _end - _sdata);
#else
memblock_reserve(__pa(_stext), _end - _stext);
#endif
#ifdef CONFIG_BLK_DEV_INITRD
if (phys_initrd_size &&
memblock_is_region_reserved(phys_initrd_start, phys_initrd_size)) {
pr_err("INITRD: 0x%08lx+0x%08lx overlaps in-use memory region - disabling initrd\n",
phys_initrd_start, phys_initrd_size);
phys_initrd_start = phys_initrd_size = 0;
}
if (phys_initrd_size) {
memblock_reserve(phys_initrd_start, phys_initrd_size);
/* Now convert initrd to virtual addresses */
initrd_start = __phys_to_virt(phys_initrd_start);
initrd_end = initrd_start + phys_initrd_size;
}
#endif
arm_mm_memblock_reserve();
/* reserve any platform specific memblock areas */
if (mdesc->reserve)
mdesc->reserve();
memblock_analyze();
memblock_dump_all();
}
void __init bootmem_init(void)
{
unsigned long min, max_low, max_high;
max_low = max_high = 0;
find_limits(&min, &max_low, &max_high);
arm_bootmem_init(min, max_low);
/*
* Sparsemem tries to allocate bootmem in memory_present(),
* so must be done after the fixed reservations
*/
arm_memory_present();
/*
* sparse_init() needs the bootmem allocator up and running.
*/
sparse_init();
/*
* Now free the memory - free_area_init_node needs
* the sparse mem_map arrays initialized by sparse_init()
* for memmap_init_zone(), otherwise all PFNs are invalid.
*/
arm_bootmem_free(min, max_low, max_high);
high_memory = __va(((phys_addr_t)max_low << PAGE_SHIFT) - 1) + 1;
/*
* This doesn't seem to be used by the Linux memory manager any
* more, but is used by ll_rw_block. If we can get rid of it, we
* also get rid of some of the stuff above as well.
*
* Note: max_low_pfn and max_pfn reflect the number of _pages_ in
* the system, not the maximum PFN.
*/
max_low_pfn = max_low - PHYS_PFN_OFFSET;
max_pfn = max_high - PHYS_PFN_OFFSET;
}
static inline int free_area(unsigned long pfn, unsigned long end, char *s)
{
unsigned int pages = 0, size = (end - pfn) << (PAGE_SHIFT - 10);
for (; pfn < end; pfn++) {
struct page *page = pfn_to_page(pfn);
ClearPageReserved(page);
init_page_count(page);
__free_page(page);
pages++;
}
if (size && s)
printk(KERN_INFO "Freeing %s memory: %dK\n", s, size);
return pages;
}
static inline void
free_memmap(unsigned long start_pfn, unsigned long end_pfn)
{
struct page *start_pg, *end_pg;
unsigned long pg, pgend;
/*
* Convert start_pfn/end_pfn to a struct page pointer.
*/
start_pg = pfn_to_page(start_pfn - 1) + 1;
end_pg = pfn_to_page(end_pfn - 1) + 1;
/*
* Convert to physical addresses, and
* round start upwards and end downwards.
*/
pg = (unsigned long)PAGE_ALIGN(__pa(start_pg));
pgend = (unsigned long)__pa(end_pg) & PAGE_MASK;
/*
* If there are free pages between these,
* free the section of the memmap array.
*/
if (pg < pgend)
free_bootmem(pg, pgend - pg);
}
/*
* The mem_map array can get very big. Free the unused area of the memory map.
*/
static void __init free_unused_memmap(struct meminfo *mi)
{
unsigned long bank_start, prev_bank_end = 0;
unsigned int i;
/*
* This relies on each bank being in address order.
* The banks are sorted previously in bootmem_init().
*/
for_each_bank(i, mi) {
struct membank *bank = &mi->bank[i];
bank_start = bank_pfn_start(bank);
#ifdef CONFIG_SPARSEMEM
/*
* Take care not to free memmap entries that don't exist
* due to SPARSEMEM sections which aren't present.
*/
bank_start = min(bank_start,
ALIGN(prev_bank_end, PAGES_PER_SECTION));
#endif
/*
* If we had a previous bank, and there is a space
* between the current bank and the previous, free it.
*/
if (prev_bank_end && prev_bank_end < bank_start)
free_memmap(prev_bank_end, bank_start);
/*
* Align up here since the VM subsystem insists that the
* memmap entries are valid from the bank end aligned to
* MAX_ORDER_NR_PAGES.
*/
prev_bank_end = ALIGN(bank_pfn_end(bank), MAX_ORDER_NR_PAGES);
}
#ifdef CONFIG_SPARSEMEM
if (!IS_ALIGNED(prev_bank_end, PAGES_PER_SECTION))
free_memmap(prev_bank_end,
ALIGN(prev_bank_end, PAGES_PER_SECTION));
#endif
}
static void __init free_highpages(void)
{
#ifdef CONFIG_HIGHMEM
unsigned long max_low = max_low_pfn + PHYS_PFN_OFFSET;
struct memblock_region *mem, *res;
/* set highmem page free */
for_each_memblock(memory, mem) {
unsigned long start = memblock_region_memory_base_pfn(mem);
unsigned long end = memblock_region_memory_end_pfn(mem);
/* Ignore complete lowmem entries */
if (end <= max_low)
continue;
/* Truncate partial highmem entries */
if (start < max_low)
start = max_low;
/* Find and exclude any reserved regions */
for_each_memblock(reserved, res) {
unsigned long res_start, res_end;
res_start = memblock_region_reserved_base_pfn(res);
res_end = memblock_region_reserved_end_pfn(res);
if (res_end < start)
continue;
if (res_start < start)
res_start = start;
if (res_start > end)
res_start = end;
if (res_end > end)
res_end = end;
if (res_start != start)
totalhigh_pages += free_area(start, res_start,
NULL);
start = res_end;
if (start == end)
break;
}
/* And now free anything which remains */
if (start < end)
totalhigh_pages += free_area(start, end, NULL);
}
totalram_pages += totalhigh_pages;
#endif
}
/*
* mem_init() marks the free areas in the mem_map and tells us how much
* memory is free. This is done after various parts of the system have
* claimed their memory after the kernel image.
*/
void __init mem_init(void)
{
unsigned long reserved_pages, free_pages;
struct memblock_region *reg;
int i;
#ifdef CONFIG_HAVE_TCM
/* These pointers are filled in on TCM detection */
extern u32 dtcm_end;
extern u32 itcm_end;
#endif
max_mapnr = pfn_to_page(max_pfn + PHYS_PFN_OFFSET) - mem_map;
/* this will put all unused low memory onto the freelists */
free_unused_memmap(&meminfo);
totalram_pages += free_all_bootmem();
#ifdef CONFIG_SA1111
/* now that our DMA memory is actually so designated, we can free it */
totalram_pages += free_area(PHYS_PFN_OFFSET,
__phys_to_pfn(__pa(swapper_pg_dir)), NULL);
#endif
free_highpages();
reserved_pages = free_pages = 0;
for_each_bank(i, &meminfo) {
struct membank *bank = &meminfo.bank[i];
unsigned int pfn1, pfn2;
struct page *page, *end;
pfn1 = bank_pfn_start(bank);
pfn2 = bank_pfn_end(bank);
page = pfn_to_page(pfn1);
end = pfn_to_page(pfn2 - 1) + 1;
do {
if (PageReserved(page))
reserved_pages++;
else if (!page_count(page))
free_pages++;
page++;
} while (page < end);
}
/*
* Since our memory may not be contiguous, calculate the
* real number of pages we have in this system
*/
printk(KERN_INFO "Memory:");
num_physpages = 0;
for_each_memblock(memory, reg) {
unsigned long pages = memblock_region_memory_end_pfn(reg) -
memblock_region_memory_base_pfn(reg);
num_physpages += pages;
printk(" %ldMB", pages >> (20 - PAGE_SHIFT));
}
printk(" = %luMB total\n", num_physpages >> (20 - PAGE_SHIFT));
printk(KERN_NOTICE "Memory: %luk/%luk available, %luk reserved, %luK highmem\n",
nr_free_pages() << (PAGE_SHIFT-10),
free_pages << (PAGE_SHIFT-10),
reserved_pages << (PAGE_SHIFT-10),
totalhigh_pages << (PAGE_SHIFT-10));
#define MLK(b, t) b, t, ((t) - (b)) >> 10
#define MLM(b, t) b, t, ((t) - (b)) >> 20
#define MLK_ROUNDUP(b, t) b, t, DIV_ROUND_UP(((t) - (b)), SZ_1K)
printk(KERN_NOTICE "Virtual kernel memory layout:\n"
" vector : 0x%08lx - 0x%08lx (%4ld kB)\n"
#ifdef CONFIG_HAVE_TCM
" DTCM : 0x%08lx - 0x%08lx (%4ld kB)\n"
" ITCM : 0x%08lx - 0x%08lx (%4ld kB)\n"
#endif
" fixmap : 0x%08lx - 0x%08lx (%4ld kB)\n"
#ifdef CONFIG_MMU
" DMA : 0x%08lx - 0x%08lx (%4ld MB)\n"
#endif
" vmalloc : 0x%08lx - 0x%08lx (%4ld MB)\n"
" lowmem : 0x%08lx - 0x%08lx (%4ld MB)\n"
#ifdef CONFIG_HIGHMEM
" pkmap : 0x%08lx - 0x%08lx (%4ld MB)\n"
#endif
" modules : 0x%08lx - 0x%08lx (%4ld MB)\n"
" .init : 0x%p" " - 0x%p" " (%4d kB)\n"
" .text : 0x%p" " - 0x%p" " (%4d kB)\n"
" .data : 0x%p" " - 0x%p" " (%4d kB)\n",
MLK(UL(CONFIG_VECTORS_BASE), UL(CONFIG_VECTORS_BASE) +
(PAGE_SIZE)),
#ifdef CONFIG_HAVE_TCM
MLK(DTCM_OFFSET, (unsigned long) dtcm_end),
MLK(ITCM_OFFSET, (unsigned long) itcm_end),
#endif
MLK(FIXADDR_START, FIXADDR_TOP),
#ifdef CONFIG_MMU
MLM(CONSISTENT_BASE, CONSISTENT_END),
#endif
MLM(VMALLOC_START, VMALLOC_END),
MLM(PAGE_OFFSET, (unsigned long)high_memory),
#ifdef CONFIG_HIGHMEM
MLM(PKMAP_BASE, (PKMAP_BASE) + (LAST_PKMAP) *
(PAGE_SIZE)),
#endif
MLM(MODULES_VADDR, MODULES_END),
MLK_ROUNDUP(__init_begin, __init_end),
MLK_ROUNDUP(_text, _etext),
MLK_ROUNDUP(_sdata, _edata));
#undef MLK
#undef MLM
#undef MLK_ROUNDUP
/*
* Check boundaries twice: Some fundamental inconsistencies can
* be detected at build time already.
*/
#ifdef CONFIG_MMU
BUILD_BUG_ON(VMALLOC_END > CONSISTENT_BASE);
BUG_ON(VMALLOC_END > CONSISTENT_BASE);
BUILD_BUG_ON(TASK_SIZE > MODULES_VADDR);
BUG_ON(TASK_SIZE > MODULES_VADDR);
#endif
#ifdef CONFIG_HIGHMEM
BUILD_BUG_ON(PKMAP_BASE + LAST_PKMAP * PAGE_SIZE > PAGE_OFFSET);
BUG_ON(PKMAP_BASE + LAST_PKMAP * PAGE_SIZE > PAGE_OFFSET);
#endif
if (PAGE_SIZE >= 16384 && num_physpages <= 128) {
extern int sysctl_overcommit_memory;
/*
* On a machine this small we won't get
* anywhere without overcommit, so turn
* it on by default.
*/
sysctl_overcommit_memory = OVERCOMMIT_ALWAYS;
}
}
void free_initmem(void)
{
#ifdef CONFIG_HAVE_TCM
extern char __tcm_start, __tcm_end;
totalram_pages += free_area(__phys_to_pfn(__pa(&__tcm_start)),
__phys_to_pfn(__pa(&__tcm_end)),
"TCM link");
#endif
if (!machine_is_integrator() && !machine_is_cintegrator())
totalram_pages += free_area(__phys_to_pfn(__pa(__init_begin)),
__phys_to_pfn(__pa(__init_end)),
"init");
}
#ifdef CONFIG_BLK_DEV_INITRD
static int keep_initrd;
void free_initrd_mem(unsigned long start, unsigned long end)
{
if (!keep_initrd)
totalram_pages += free_area(__phys_to_pfn(__pa(start)),
__phys_to_pfn(__pa(end)),
"initrd");
}
static int __init keepinitrd_setup(char *__unused)
{
keep_initrd = 1;
return 1;
}
__setup("keepinitrd", keepinitrd_setup);
#endif