2005-04-17 02:20:36 +04:00
/*
* Initialize MMU support .
*
* Copyright ( C ) 1998 - 2003 Hewlett - Packard Co
* David Mosberger - Tang < davidm @ hpl . hp . com >
*/
# include <linux/kernel.h>
# include <linux/init.h>
# include <linux/bootmem.h>
# include <linux/efi.h>
# include <linux/elf.h>
# include <linux/mm.h>
# include <linux/mmzone.h>
# include <linux/module.h>
# include <linux/personality.h>
# include <linux/reboot.h>
# include <linux/slab.h>
# include <linux/swap.h>
# include <linux/proc_fs.h>
# include <linux/bitops.h>
# include <asm/a.out.h>
# include <asm/dma.h>
# include <asm/ia32.h>
# include <asm/io.h>
# include <asm/machvec.h>
# include <asm/numa.h>
# include <asm/patch.h>
# include <asm/pgalloc.h>
# include <asm/sal.h>
# include <asm/sections.h>
# include <asm/system.h>
# include <asm/tlb.h>
# include <asm/uaccess.h>
# include <asm/unistd.h>
# include <asm/mca.h>
DEFINE_PER_CPU ( struct mmu_gather , mmu_gathers ) ;
2005-04-26 00:13:16 +04:00
DEFINE_PER_CPU ( unsigned long * , __pgtable_quicklist ) ;
DEFINE_PER_CPU ( long , __pgtable_quicklist_size ) ;
2005-04-17 02:20:36 +04:00
extern void ia64_tlb_init ( void ) ;
unsigned long MAX_DMA_ADDRESS = PAGE_OFFSET + 0x100000000UL ;
# ifdef CONFIG_VIRTUAL_MEM_MAP
unsigned long vmalloc_end = VMALLOC_END_INIT ;
EXPORT_SYMBOL ( vmalloc_end ) ;
struct page * vmem_map ;
EXPORT_SYMBOL ( vmem_map ) ;
# endif
2005-04-26 00:13:16 +04:00
struct page * zero_page_memmap_ptr ; /* map entry for zero page */
2005-04-17 02:20:36 +04:00
EXPORT_SYMBOL ( zero_page_memmap_ptr ) ;
2005-04-26 00:13:16 +04:00
# define MIN_PGT_PAGES 25UL
2005-04-26 00:16:59 +04:00
# define MAX_PGT_FREES_PER_PASS 16L
2005-04-26 00:13:16 +04:00
# define PGT_FRACTION_OF_NODE_MEM 16
static inline long
max_pgt_pages ( void )
{
u64 node_free_pages , max_pgt_pages ;
# ifndef CONFIG_NUMA
node_free_pages = nr_free_pages ( ) ;
# else
node_free_pages = nr_free_pages_pgdat ( NODE_DATA ( numa_node_id ( ) ) ) ;
# endif
max_pgt_pages = node_free_pages / PGT_FRACTION_OF_NODE_MEM ;
max_pgt_pages = max ( max_pgt_pages , MIN_PGT_PAGES ) ;
return max_pgt_pages ;
}
static inline long
min_pages_to_free ( void )
{
long pages_to_free ;
pages_to_free = pgtable_quicklist_size - max_pgt_pages ( ) ;
pages_to_free = min ( pages_to_free , MAX_PGT_FREES_PER_PASS ) ;
return pages_to_free ;
}
2005-04-17 02:20:36 +04:00
void
2005-04-26 00:13:16 +04:00
check_pgt_cache ( void )
2005-04-17 02:20:36 +04:00
{
2005-04-26 00:13:16 +04:00
long pages_to_free ;
2005-04-17 02:20:36 +04:00
2005-04-26 00:13:16 +04:00
if ( unlikely ( pgtable_quicklist_size < = MIN_PGT_PAGES ) )
return ;
2005-04-17 02:20:36 +04:00
preempt_disable ( ) ;
2005-04-26 00:13:16 +04:00
while ( unlikely ( ( pages_to_free = min_pages_to_free ( ) ) > 0 ) ) {
while ( pages_to_free - - ) {
free_page ( ( unsigned long ) pgtable_quicklist_alloc ( ) ) ;
}
preempt_enable ( ) ;
preempt_disable ( ) ;
2005-04-17 02:20:36 +04:00
}
preempt_enable ( ) ;
}
void
lazy_mmu_prot_update ( pte_t pte )
{
unsigned long addr ;
struct page * page ;
2006-02-23 06:07:20 +03:00
unsigned long order ;
2005-04-17 02:20:36 +04:00
if ( ! pte_exec ( pte ) )
return ; /* not an executable page... */
page = pte_page ( pte ) ;
addr = ( unsigned long ) page_address ( page ) ;
if ( test_bit ( PG_arch_1 , & page - > flags ) )
return ; /* i-cache is already coherent with d-cache */
2006-02-23 06:07:20 +03:00
if ( PageCompound ( page ) ) {
order = ( unsigned long ) ( page [ 1 ] . lru . prev ) ;
flush_icache_range ( addr , addr + ( 1UL < < order < < PAGE_SHIFT ) ) ;
}
else
flush_icache_range ( addr , addr + PAGE_SIZE ) ;
2005-04-17 02:20:36 +04:00
set_bit ( PG_arch_1 , & page - > flags ) ; /* mark page as clean */
}
inline void
ia64_set_rbs_bot ( void )
{
unsigned long stack_size = current - > signal - > rlim [ RLIMIT_STACK ] . rlim_max & - 16 ;
if ( stack_size > MAX_USER_STACK_SIZE )
stack_size = MAX_USER_STACK_SIZE ;
current - > thread . rbs_bot = STACK_TOP - stack_size ;
}
/*
* This performs some platform - dependent address space initialization .
* On IA - 64 , we want to setup the VM area for the register backing
* store ( which grows upwards ) and install the gateway page which is
* used for signal trampolines , etc .
*/
void
ia64_init_addr_space ( void )
{
struct vm_area_struct * vma ;
ia64_set_rbs_bot ( ) ;
/*
* If we ' re out of memory and kmem_cache_alloc ( ) returns NULL , we simply ignore
* the problem . When the process attempts to write to the register backing store
* for the first time , it will get a SEGFAULT in this case .
*/
2006-12-07 07:33:17 +03:00
vma = kmem_cache_alloc ( vm_area_cachep , GFP_KERNEL ) ;
2005-04-17 02:20:36 +04:00
if ( vma ) {
memset ( vma , 0 , sizeof ( * vma ) ) ;
vma - > vm_mm = current - > mm ;
vma - > vm_start = current - > thread . rbs_bot & PAGE_MASK ;
vma - > vm_end = vma - > vm_start + PAGE_SIZE ;
vma - > vm_page_prot = protection_map [ VM_DATA_DEFAULT_FLAGS & 0x7 ] ;
2005-10-30 04:16:20 +03:00
vma - > vm_flags = VM_DATA_DEFAULT_FLAGS | VM_GROWSUP | VM_ACCOUNT ;
2005-04-17 02:20:36 +04:00
down_write ( & current - > mm - > mmap_sem ) ;
if ( insert_vm_struct ( current - > mm , vma ) ) {
up_write ( & current - > mm - > mmap_sem ) ;
kmem_cache_free ( vm_area_cachep , vma ) ;
return ;
}
up_write ( & current - > mm - > mmap_sem ) ;
}
/* map NaT-page at address zero to speed up speculative dereferencing of NULL: */
if ( ! ( current - > personality & MMAP_PAGE_ZERO ) ) {
2006-12-07 07:33:17 +03:00
vma = kmem_cache_alloc ( vm_area_cachep , GFP_KERNEL ) ;
2005-04-17 02:20:36 +04:00
if ( vma ) {
memset ( vma , 0 , sizeof ( * vma ) ) ;
vma - > vm_mm = current - > mm ;
vma - > vm_end = PAGE_SIZE ;
vma - > vm_page_prot = __pgprot ( pgprot_val ( PAGE_READONLY ) | _PAGE_MA_NAT ) ;
vma - > vm_flags = VM_READ | VM_MAYREAD | VM_IO | VM_RESERVED ;
down_write ( & current - > mm - > mmap_sem ) ;
if ( insert_vm_struct ( current - > mm , vma ) ) {
up_write ( & current - > mm - > mmap_sem ) ;
kmem_cache_free ( vm_area_cachep , vma ) ;
return ;
}
up_write ( & current - > mm - > mmap_sem ) ;
}
}
}
void
free_initmem ( void )
{
unsigned long addr , eaddr ;
addr = ( unsigned long ) ia64_imva ( __init_begin ) ;
eaddr = ( unsigned long ) ia64_imva ( __init_end ) ;
while ( addr < eaddr ) {
ClearPageReserved ( virt_to_page ( addr ) ) ;
2006-03-22 11:08:40 +03:00
init_page_count ( virt_to_page ( addr ) ) ;
2005-04-17 02:20:36 +04:00
free_page ( addr ) ;
+ + totalram_pages ;
addr + = PAGE_SIZE ;
}
printk ( KERN_INFO " Freeing unused kernel memory: %ldkB freed \n " ,
( __init_end - __init_begin ) > > 10 ) ;
}
2006-03-23 03:54:15 +03:00
void __init
2005-04-17 02:20:36 +04:00
free_initrd_mem ( unsigned long start , unsigned long end )
{
struct page * page ;
/*
* EFI uses 4 KB pages while the kernel can use 4 KB or bigger .
* Thus EFI and the kernel may have different page sizes . It is
* therefore possible to have the initrd share the same page as
* the end of the kernel ( given current setup ) .
*
* To avoid freeing / using the wrong page ( kernel sized ) we :
* - align up the beginning of initrd
* - align down the end of initrd
*
* | |
* | = = = = = = = = = = = = = | a000
* | |
* | |
* | | 9000
* | /////////////|
* | /////////////|
* | = = = = = = = = = = = = = | 8000
* | ///INITRD////|
* | /////////////|
* | /////////////| 7000
* | |
* | KKKKKKKKKKKKK |
* | = = = = = = = = = = = = = | 6000
* | KKKKKKKKKKKKK |
* | KKKKKKKKKKKKK |
* K = kernel using 8 KB pages
*
* In this example , we must free page 8000 ONLY . So we must align up
* initrd_start and keep initrd_end as is .
*/
start = PAGE_ALIGN ( start ) ;
end = end & PAGE_MASK ;
if ( start < end )
printk ( KERN_INFO " Freeing initrd memory: %ldkB freed \n " , ( end - start ) > > 10 ) ;
for ( ; start < end ; start + = PAGE_SIZE ) {
if ( ! virt_addr_valid ( start ) )
continue ;
page = virt_to_page ( start ) ;
ClearPageReserved ( page ) ;
2006-03-22 11:08:40 +03:00
init_page_count ( page ) ;
2005-04-17 02:20:36 +04:00
free_page ( start ) ;
+ + totalram_pages ;
}
}
/*
* This installs a clean page in the kernel ' s page table .
*/
2006-03-23 03:54:15 +03:00
static struct page * __init
2005-04-17 02:20:36 +04:00
put_kernel_page ( struct page * page , unsigned long address , pgprot_t pgprot )
{
pgd_t * pgd ;
pud_t * pud ;
pmd_t * pmd ;
pte_t * pte ;
if ( ! PageReserved ( page ) )
printk ( KERN_ERR " put_kernel_page: page at 0x%p not in reserved memory \n " ,
page_address ( page ) ) ;
pgd = pgd_offset_k ( address ) ; /* note: this is NOT pgd_offset()! */
{
pud = pud_alloc ( & init_mm , pgd , address ) ;
if ( ! pud )
goto out ;
pmd = pmd_alloc ( & init_mm , pud , address ) ;
if ( ! pmd )
goto out ;
[PATCH] mm: init_mm without ptlock
First step in pushing down the page_table_lock. init_mm.page_table_lock has
been used throughout the architectures (usually for ioremap): not to serialize
kernel address space allocation (that's usually vmlist_lock), but because
pud_alloc,pmd_alloc,pte_alloc_kernel expect caller holds it.
Reverse that: don't lock or unlock init_mm.page_table_lock in any of the
architectures; instead rely on pud_alloc,pmd_alloc,pte_alloc_kernel to take
and drop it when allocating a new one, to check lest a racing task already
did. Similarly no page_table_lock in vmalloc's map_vm_area.
Some temporary ugliness in __pud_alloc and __pmd_alloc: since they also handle
user mms, which are converted only by a later patch, for now they have to lock
differently according to whether or not it's init_mm.
If sources get muddled, there's a danger that an arch source taking
init_mm.page_table_lock will be mixed with common source also taking it (or
neither take it). So break the rules and make another change, which should
break the build for such a mismatch: remove the redundant mm arg from
pte_alloc_kernel (ppc64 scrapped its distinct ioremap_mm in 2.6.13).
Exceptions: arm26 used pte_alloc_kernel on user mm, now pte_alloc_map; ia64
used pte_alloc_map on init_mm, now pte_alloc_kernel; parisc had bad args to
pmd_alloc and pte_alloc_kernel in unused USE_HPPA_IOREMAP code; ppc64
map_io_page forgot to unlock on failure; ppc mmu_mapin_ram and ppc64 im_free
took page_table_lock for no good reason.
Signed-off-by: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-10-30 04:16:21 +03:00
pte = pte_alloc_kernel ( pmd , address ) ;
2005-04-17 02:20:36 +04:00
if ( ! pte )
goto out ;
[PATCH] mm: init_mm without ptlock
First step in pushing down the page_table_lock. init_mm.page_table_lock has
been used throughout the architectures (usually for ioremap): not to serialize
kernel address space allocation (that's usually vmlist_lock), but because
pud_alloc,pmd_alloc,pte_alloc_kernel expect caller holds it.
Reverse that: don't lock or unlock init_mm.page_table_lock in any of the
architectures; instead rely on pud_alloc,pmd_alloc,pte_alloc_kernel to take
and drop it when allocating a new one, to check lest a racing task already
did. Similarly no page_table_lock in vmalloc's map_vm_area.
Some temporary ugliness in __pud_alloc and __pmd_alloc: since they also handle
user mms, which are converted only by a later patch, for now they have to lock
differently according to whether or not it's init_mm.
If sources get muddled, there's a danger that an arch source taking
init_mm.page_table_lock will be mixed with common source also taking it (or
neither take it). So break the rules and make another change, which should
break the build for such a mismatch: remove the redundant mm arg from
pte_alloc_kernel (ppc64 scrapped its distinct ioremap_mm in 2.6.13).
Exceptions: arm26 used pte_alloc_kernel on user mm, now pte_alloc_map; ia64
used pte_alloc_map on init_mm, now pte_alloc_kernel; parisc had bad args to
pmd_alloc and pte_alloc_kernel in unused USE_HPPA_IOREMAP code; ppc64
map_io_page forgot to unlock on failure; ppc mmu_mapin_ram and ppc64 im_free
took page_table_lock for no good reason.
Signed-off-by: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-10-30 04:16:21 +03:00
if ( ! pte_none ( * pte ) )
2005-04-17 02:20:36 +04:00
goto out ;
set_pte ( pte , mk_pte ( page , pgprot ) ) ;
}
[PATCH] mm: init_mm without ptlock
First step in pushing down the page_table_lock. init_mm.page_table_lock has
been used throughout the architectures (usually for ioremap): not to serialize
kernel address space allocation (that's usually vmlist_lock), but because
pud_alloc,pmd_alloc,pte_alloc_kernel expect caller holds it.
Reverse that: don't lock or unlock init_mm.page_table_lock in any of the
architectures; instead rely on pud_alloc,pmd_alloc,pte_alloc_kernel to take
and drop it when allocating a new one, to check lest a racing task already
did. Similarly no page_table_lock in vmalloc's map_vm_area.
Some temporary ugliness in __pud_alloc and __pmd_alloc: since they also handle
user mms, which are converted only by a later patch, for now they have to lock
differently according to whether or not it's init_mm.
If sources get muddled, there's a danger that an arch source taking
init_mm.page_table_lock will be mixed with common source also taking it (or
neither take it). So break the rules and make another change, which should
break the build for such a mismatch: remove the redundant mm arg from
pte_alloc_kernel (ppc64 scrapped its distinct ioremap_mm in 2.6.13).
Exceptions: arm26 used pte_alloc_kernel on user mm, now pte_alloc_map; ia64
used pte_alloc_map on init_mm, now pte_alloc_kernel; parisc had bad args to
pmd_alloc and pte_alloc_kernel in unused USE_HPPA_IOREMAP code; ppc64
map_io_page forgot to unlock on failure; ppc mmu_mapin_ram and ppc64 im_free
took page_table_lock for no good reason.
Signed-off-by: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-10-30 04:16:21 +03:00
out :
2005-04-17 02:20:36 +04:00
/* no need for flush_tlb */
return page ;
}
2006-03-12 20:08:26 +03:00
static void __init
2005-04-17 02:20:36 +04:00
setup_gate ( void )
{
struct page * page ;
/*
2005-06-08 21:45:00 +04:00
* Map the gate page twice : once read - only to export the ELF
* headers etc . and once execute - only page to enable
* privilege - promotion via " epc " :
2005-04-17 02:20:36 +04:00
*/
page = virt_to_page ( ia64_imva ( __start_gate_section ) ) ;
put_kernel_page ( page , GATE_ADDR , PAGE_READONLY ) ;
# ifdef HAVE_BUGGY_SEGREL
page = virt_to_page ( ia64_imva ( __start_gate_section + PAGE_SIZE ) ) ;
put_kernel_page ( page , GATE_ADDR + PAGE_SIZE , PAGE_GATE ) ;
# else
put_kernel_page ( page , GATE_ADDR + PERCPU_PAGE_SIZE , PAGE_GATE ) ;
2005-06-08 21:45:00 +04:00
/* Fill in the holes (if any) with read-only zero pages: */
{
unsigned long addr ;
for ( addr = GATE_ADDR + PAGE_SIZE ;
addr < GATE_ADDR + PERCPU_PAGE_SIZE ;
addr + = PAGE_SIZE )
{
put_kernel_page ( ZERO_PAGE ( 0 ) , addr ,
PAGE_READONLY ) ;
put_kernel_page ( ZERO_PAGE ( 0 ) , addr + PERCPU_PAGE_SIZE ,
PAGE_READONLY ) ;
}
}
2005-04-17 02:20:36 +04:00
# endif
ia64_patch_gate ( ) ;
}
void __devinit
ia64_mmu_init ( void * my_cpu_data )
{
unsigned long psr , pta , impl_va_bits ;
extern void __devinit tlb_init ( void ) ;
# ifdef CONFIG_DISABLE_VHPT
# define VHPT_ENABLE_BIT 0
# else
# define VHPT_ENABLE_BIT 1
# endif
/* Pin mapping for percpu area into TLB */
psr = ia64_clear_ic ( ) ;
ia64_itr ( 0x2 , IA64_TR_PERCPU_DATA , PERCPU_ADDR ,
pte_val ( pfn_pte ( __pa ( my_cpu_data ) > > PAGE_SHIFT , PAGE_KERNEL ) ) ,
PERCPU_PAGE_SHIFT ) ;
ia64_set_psr ( psr ) ;
ia64_srlz_i ( ) ;
/*
* Check if the virtually mapped linear page table ( VMLPT ) overlaps with a mapped
* address space . The IA - 64 architecture guarantees that at least 50 bits of
* virtual address space are implemented but if we pick a large enough page size
* ( e . g . , 64 KB ) , the mapped address space is big enough that it will overlap with
* VMLPT . I assume that once we run on machines big enough to warrant 64 KB pages ,
* IMPL_VA_MSB will be significantly bigger , so this is unlikely to become a
* problem in practice . Alternatively , we could truncate the top of the mapped
* address space to not permit mappings that would overlap with the VMLPT .
* - - davidm 00 / 12 / 06
*/
# define pte_bits 3
# define mapped_space_bits (3*(PAGE_SHIFT - pte_bits) + PAGE_SHIFT)
/*
* The virtual page table has to cover the entire implemented address space within
* a region even though not all of this space may be mappable . The reason for
* this is that the Access bit and Dirty bit fault handlers perform
* non - speculative accesses to the virtual page table , so the address range of the
* virtual page table itself needs to be covered by virtual page table .
*/
# define vmlpt_bits (impl_va_bits - PAGE_SHIFT + pte_bits)
# define POW2(n) (1ULL << (n))
impl_va_bits = ffz ( ~ ( local_cpu_data - > unimpl_va_mask | ( 7UL < < 61 ) ) ) ;
if ( impl_va_bits < 51 | | impl_va_bits > 61 )
panic ( " CPU has bogus IMPL_VA_MSB value of %lu! \n " , impl_va_bits - 1 ) ;
2005-08-24 07:07:00 +04:00
/*
* mapped_space_bits - PAGE_SHIFT is the total number of ptes we need ,
* which must fit into " vmlpt_bits - pte_bits " slots . Second half of
* the test makes sure that our mapped space doesn ' t overlap the
* unimplemented hole in the middle of the region .
*/
if ( ( mapped_space_bits - PAGE_SHIFT > vmlpt_bits - pte_bits ) | |
( mapped_space_bits > impl_va_bits - 1 ) )
panic ( " Cannot build a big enough virtual-linear page table "
" to cover mapped address space. \n "
" Try using a smaller page size. \n " ) ;
2005-04-17 02:20:36 +04:00
/* place the VMLPT at the end of each page-table mapped region: */
pta = POW2 ( 61 ) - POW2 ( vmlpt_bits ) ;
/*
* Set the ( virtually mapped linear ) page table address . Bit
* 8 selects between the short and long format , bits 2 - 7 the
* size of the table , and bit 0 whether the VHPT walker is
* enabled .
*/
ia64_set_pta ( pta | ( 0 < < 8 ) | ( vmlpt_bits < < 2 ) | VHPT_ENABLE_BIT ) ;
ia64_tlb_init ( ) ;
# ifdef CONFIG_HUGETLB_PAGE
ia64_set_rr ( HPAGE_REGION_BASE , HPAGE_SHIFT < < 2 ) ;
ia64_srlz_d ( ) ;
# endif
}
# ifdef CONFIG_VIRTUAL_MEM_MAP
2006-06-28 20:55:43 +04:00
int vmemmap_find_next_valid_pfn ( int node , int i )
{
unsigned long end_address , hole_next_pfn ;
unsigned long stop_address ;
pg_data_t * pgdat = NODE_DATA ( node ) ;
end_address = ( unsigned long ) & vmem_map [ pgdat - > node_start_pfn + i ] ;
end_address = PAGE_ALIGN ( end_address ) ;
stop_address = ( unsigned long ) & vmem_map [
pgdat - > node_start_pfn + pgdat - > node_spanned_pages ] ;
do {
pgd_t * pgd ;
pud_t * pud ;
pmd_t * pmd ;
pte_t * pte ;
pgd = pgd_offset_k ( end_address ) ;
if ( pgd_none ( * pgd ) ) {
end_address + = PGDIR_SIZE ;
continue ;
}
pud = pud_offset ( pgd , end_address ) ;
if ( pud_none ( * pud ) ) {
end_address + = PUD_SIZE ;
continue ;
}
pmd = pmd_offset ( pud , end_address ) ;
if ( pmd_none ( * pmd ) ) {
end_address + = PMD_SIZE ;
continue ;
}
pte = pte_offset_kernel ( pmd , end_address ) ;
retry_pte :
if ( pte_none ( * pte ) ) {
end_address + = PAGE_SIZE ;
pte + + ;
if ( ( end_address < stop_address ) & &
( end_address ! = ALIGN ( end_address , 1UL < < PMD_SHIFT ) ) )
goto retry_pte ;
continue ;
}
/* Found next valid vmem_map page */
break ;
} while ( end_address < stop_address ) ;
end_address = min ( end_address , stop_address ) ;
end_address = end_address - ( unsigned long ) vmem_map + sizeof ( struct page ) - 1 ;
hole_next_pfn = end_address / sizeof ( struct page ) ;
return hole_next_pfn - pgdat - > node_start_pfn ;
}
2005-04-17 02:20:36 +04:00
2006-03-23 03:54:15 +03:00
int __init
2005-04-17 02:20:36 +04:00
create_mem_map_page_table ( u64 start , u64 end , void * arg )
{
unsigned long address , start_page , end_page ;
struct page * map_start , * map_end ;
int node ;
pgd_t * pgd ;
pud_t * pud ;
pmd_t * pmd ;
pte_t * pte ;
map_start = vmem_map + ( __pa ( start ) > > PAGE_SHIFT ) ;
map_end = vmem_map + ( __pa ( end ) > > PAGE_SHIFT ) ;
start_page = ( unsigned long ) map_start & PAGE_MASK ;
end_page = PAGE_ALIGN ( ( unsigned long ) map_end ) ;
node = paddr_to_nid ( __pa ( start ) ) ;
for ( address = start_page ; address < end_page ; address + = PAGE_SIZE ) {
pgd = pgd_offset_k ( address ) ;
if ( pgd_none ( * pgd ) )
pgd_populate ( & init_mm , pgd , alloc_bootmem_pages_node ( NODE_DATA ( node ) , PAGE_SIZE ) ) ;
pud = pud_offset ( pgd , address ) ;
if ( pud_none ( * pud ) )
pud_populate ( & init_mm , pud , alloc_bootmem_pages_node ( NODE_DATA ( node ) , PAGE_SIZE ) ) ;
pmd = pmd_offset ( pud , address ) ;
if ( pmd_none ( * pmd ) )
pmd_populate_kernel ( & init_mm , pmd , alloc_bootmem_pages_node ( NODE_DATA ( node ) , PAGE_SIZE ) ) ;
pte = pte_offset_kernel ( pmd , address ) ;
if ( pte_none ( * pte ) )
set_pte ( pte , pfn_pte ( __pa ( alloc_bootmem_pages_node ( NODE_DATA ( node ) , PAGE_SIZE ) ) > > PAGE_SHIFT ,
PAGE_KERNEL ) ) ;
}
return 0 ;
}
struct memmap_init_callback_data {
struct page * start ;
struct page * end ;
int nid ;
unsigned long zone ;
} ;
static int
virtual_memmap_init ( u64 start , u64 end , void * arg )
{
struct memmap_init_callback_data * args ;
struct page * map_start , * map_end ;
args = ( struct memmap_init_callback_data * ) arg ;
map_start = vmem_map + ( __pa ( start ) > > PAGE_SHIFT ) ;
map_end = vmem_map + ( __pa ( end ) > > PAGE_SHIFT ) ;
if ( map_start < args - > start )
map_start = args - > start ;
if ( map_end > args - > end )
map_end = args - > end ;
/*
* We have to initialize " out of bounds " struct page elements that fit completely
* on the same pages that were allocated for the " in bounds " elements because they
* may be referenced later ( and found to be " reserved " ) .
*/
map_start - = ( ( unsigned long ) map_start & ( PAGE_SIZE - 1 ) ) / sizeof ( struct page ) ;
map_end + = ( ( PAGE_ALIGN ( ( unsigned long ) map_end ) - ( unsigned long ) map_end )
/ sizeof ( struct page ) ) ;
if ( map_start < map_end )
memmap_init_zone ( ( unsigned long ) ( map_end - map_start ) ,
2007-01-11 10:15:30 +03:00
args - > nid , args - > zone , page_to_pfn ( map_start ) ,
MEMMAP_EARLY ) ;
2005-04-17 02:20:36 +04:00
return 0 ;
}
void
memmap_init ( unsigned long size , int nid , unsigned long zone ,
unsigned long start_pfn )
{
if ( ! vmem_map )
2007-01-11 10:15:30 +03:00
memmap_init_zone ( size , nid , zone , start_pfn , MEMMAP_EARLY ) ;
2005-04-17 02:20:36 +04:00
else {
struct page * start ;
struct memmap_init_callback_data args ;
start = pfn_to_page ( start_pfn ) ;
args . start = start ;
args . end = start + size ;
args . nid = nid ;
args . zone = zone ;
efi_memmap_walk ( virtual_memmap_init , & args ) ;
}
}
int
ia64_pfn_valid ( unsigned long pfn )
{
char byte ;
struct page * pg = pfn_to_page ( pfn ) ;
return ( __get_user ( byte , ( char __user * ) pg ) = = 0 )
& & ( ( ( ( u64 ) pg & PAGE_MASK ) = = ( ( ( u64 ) ( pg + 1 ) - 1 ) & PAGE_MASK ) )
| | ( __get_user ( byte , ( char __user * ) ( pg + 1 ) - 1 ) = = 0 ) ) ;
}
EXPORT_SYMBOL ( ia64_pfn_valid ) ;
2006-03-23 03:54:15 +03:00
int __init
2005-04-17 02:20:36 +04:00
find_largest_hole ( u64 start , u64 end , void * arg )
{
u64 * max_gap = arg ;
static u64 last_end = PAGE_OFFSET ;
/* NOTE: this algorithm assumes efi memmap table is ordered */
if ( * max_gap < ( start - last_end ) )
* max_gap = start - last_end ;
last_end = end ;
return 0 ;
}
2006-09-27 12:49:54 +04:00
int __init
2006-12-12 22:18:55 +03:00
register_active_ranges ( u64 start , u64 end , void * arg )
2006-09-27 12:49:54 +04:00
{
2006-12-12 22:18:55 +03:00
add_active_range ( 0 , __pa ( start ) > > PAGE_SHIFT , __pa ( end ) > > PAGE_SHIFT ) ;
2006-09-27 12:49:54 +04:00
return 0 ;
}
2005-04-17 02:20:36 +04:00
# endif /* CONFIG_VIRTUAL_MEM_MAP */
2006-03-23 03:54:15 +03:00
static int __init
2005-04-17 02:20:36 +04:00
count_reserved_pages ( u64 start , u64 end , void * arg )
{
unsigned long num_reserved = 0 ;
unsigned long * count = arg ;
for ( ; start < end ; start + = PAGE_SIZE )
if ( PageReserved ( virt_to_page ( start ) ) )
+ + num_reserved ;
* count + = num_reserved ;
return 0 ;
}
/*
* Boot command - line option " nolwsys " can be used to disable the use of any light - weight
* system call handler . When this option is in effect , all fsyscalls will end up bubbling
* down into the kernel and calling the normal ( heavy - weight ) syscall handler . This is
* useful for performance testing , but conceivably could also come in handy for debugging
* purposes .
*/
2006-03-12 20:10:59 +03:00
static int nolwsys __initdata ;
2005-04-17 02:20:36 +04:00
static int __init
nolwsys_setup ( char * s )
{
nolwsys = 1 ;
return 1 ;
}
__setup ( " nolwsys " , nolwsys_setup ) ;
2006-03-23 03:54:15 +03:00
void __init
2005-04-17 02:20:36 +04:00
mem_init ( void )
{
long reserved_pages , codesize , datasize , initsize ;
pg_data_t * pgdat ;
int i ;
static struct kcore_list kcore_mem , kcore_vmem , kcore_kernel ;
2005-04-26 00:13:16 +04:00
BUG_ON ( PTRS_PER_PGD * sizeof ( pgd_t ) ! = PAGE_SIZE ) ;
BUG_ON ( PTRS_PER_PMD * sizeof ( pmd_t ) ! = PAGE_SIZE ) ;
BUG_ON ( PTRS_PER_PTE * sizeof ( pte_t ) ! = PAGE_SIZE ) ;
2005-04-17 02:20:36 +04:00
# ifdef CONFIG_PCI
/*
* This needs to be called _after_ the command line has been parsed but _before_
* any drivers that may need the PCI DMA interface are initialized or bootmem has
* been freed .
*/
platform_dma_init ( ) ;
# endif
2005-10-04 23:13:57 +04:00
# ifdef CONFIG_FLATMEM
2005-04-17 02:20:36 +04:00
if ( ! mem_map )
BUG ( ) ;
max_mapnr = max_low_pfn ;
# endif
high_memory = __va ( max_low_pfn * PAGE_SIZE ) ;
kclist_add ( & kcore_mem , __va ( 0 ) , max_low_pfn * PAGE_SIZE ) ;
kclist_add ( & kcore_vmem , ( void * ) VMALLOC_START , VMALLOC_END - VMALLOC_START ) ;
kclist_add ( & kcore_kernel , _stext , _end - _stext ) ;
2006-03-27 13:15:59 +04:00
for_each_online_pgdat ( pgdat )
2005-06-30 20:52:00 +04:00
if ( pgdat - > bdata - > node_bootmem_map )
totalram_pages + = free_all_bootmem_node ( pgdat ) ;
2005-04-17 02:20:36 +04:00
reserved_pages = 0 ;
efi_memmap_walk ( count_reserved_pages , & reserved_pages ) ;
codesize = ( unsigned long ) _etext - ( unsigned long ) _stext ;
datasize = ( unsigned long ) _edata - ( unsigned long ) _etext ;
initsize = ( unsigned long ) __init_end - ( unsigned long ) __init_begin ;
printk ( KERN_INFO " Memory: %luk/%luk available (%luk code, %luk reserved, "
" %luk data, %luk init) \n " , ( unsigned long ) nr_free_pages ( ) < < ( PAGE_SHIFT - 10 ) ,
num_physpages < < ( PAGE_SHIFT - 10 ) , codesize > > 10 ,
reserved_pages < < ( PAGE_SHIFT - 10 ) , datasize > > 10 , initsize > > 10 ) ;
/*
* For fsyscall entrpoints with no light - weight handler , use the ordinary
* ( heavy - weight ) handler , but mark it by setting bit 0 , so the fsyscall entry
* code can tell them apart .
*/
for ( i = 0 ; i < NR_syscalls ; + + i ) {
extern unsigned long fsyscall_table [ NR_syscalls ] ;
extern unsigned long sys_call_table [ NR_syscalls ] ;
if ( ! fsyscall_table [ i ] | | nolwsys )
fsyscall_table [ i ] = sys_call_table [ i ] | 1 ;
}
setup_gate ( ) ;
# ifdef CONFIG_IA32_SUPPORT
ia32_mem_init ( ) ;
# endif
}
2006-01-07 05:50:38 +03:00
# ifdef CONFIG_MEMORY_HOTPLUG
void online_page ( struct page * page )
{
ClearPageReserved ( page ) ;
2006-03-22 11:08:40 +03:00
init_page_count ( page ) ;
2006-01-07 05:50:38 +03:00
__free_page ( page ) ;
totalram_pages + + ;
num_physpages + + ;
}
2006-06-27 13:53:30 +04:00
int arch_add_memory ( int nid , u64 start , u64 size )
2006-01-07 05:50:38 +03:00
{
pg_data_t * pgdat ;
struct zone * zone ;
unsigned long start_pfn = start > > PAGE_SHIFT ;
unsigned long nr_pages = size > > PAGE_SHIFT ;
int ret ;
2006-06-27 13:53:30 +04:00
pgdat = NODE_DATA ( nid ) ;
2006-01-07 05:50:38 +03:00
zone = pgdat - > node_zones + ZONE_NORMAL ;
ret = __add_pages ( zone , start_pfn , nr_pages ) ;
if ( ret )
printk ( " %s: Problem encountered in __add_pages() as ret=%d \n " ,
__FUNCTION__ , ret ) ;
return ret ;
}
int remove_memory ( u64 start , u64 size )
{
return - EINVAL ;
}
2006-04-27 13:25:00 +04:00
EXPORT_SYMBOL_GPL ( remove_memory ) ;
2006-01-07 05:50:38 +03:00
# endif