2005-04-17 02:20:36 +04:00
/*
* linux / mm / vmalloc . c
*
* Copyright ( C ) 1993 Linus Torvalds
* Support of BIGMEM added by Gerhard Wichert , Siemens AG , July 1999
* SMP - safe vmalloc / vfree / ioremap , Tigran Aivazian < tigran @ veritas . com > , May 2000
* Major rework to support vmap / vunmap , Christoph Hellwig , SGI , August 2002
2005-10-30 04:15:41 +03:00
* Numa awareness , Christoph Lameter , SGI , June 2005
2005-04-17 02:20:36 +04:00
*/
# include <linux/mm.h>
# include <linux/module.h>
# include <linux/highmem.h>
# include <linux/slab.h>
# include <linux/spinlock.h>
# include <linux/interrupt.h>
2008-04-28 13:12:40 +04:00
# include <linux/seq_file.h>
2008-04-30 11:55:01 +04:00
# include <linux/debugobjects.h>
2005-04-17 02:20:36 +04:00
# include <linux/vmalloc.h>
2008-04-28 13:12:42 +04:00
# include <linux/kallsyms.h>
2005-04-17 02:20:36 +04:00
# include <asm/uaccess.h>
# include <asm/tlbflush.h>
DEFINE_RWLOCK ( vmlist_lock ) ;
struct vm_struct * vmlist ;
2006-09-26 10:31:02 +04:00
static void * __vmalloc_node ( unsigned long size , gfp_t gfp_mask , pgprot_t prot ,
2008-04-28 13:12:42 +04:00
int node , void * caller ) ;
2006-09-26 10:31:02 +04:00
2005-04-17 02:20:36 +04:00
static void vunmap_pte_range ( pmd_t * pmd , unsigned long addr , unsigned long end )
{
pte_t * pte ;
pte = pte_offset_kernel ( pmd , addr ) ;
do {
pte_t ptent = ptep_get_and_clear ( & init_mm , addr , pte ) ;
WARN_ON ( ! pte_none ( ptent ) & & ! pte_present ( ptent ) ) ;
} while ( pte + + , addr + = PAGE_SIZE , addr ! = end ) ;
}
static inline void vunmap_pmd_range ( pud_t * pud , unsigned long addr ,
unsigned long end )
{
pmd_t * pmd ;
unsigned long next ;
pmd = pmd_offset ( pud , addr ) ;
do {
next = pmd_addr_end ( addr , end ) ;
if ( pmd_none_or_clear_bad ( pmd ) )
continue ;
vunmap_pte_range ( pmd , addr , next ) ;
} while ( pmd + + , addr = next , addr ! = end ) ;
}
static inline void vunmap_pud_range ( pgd_t * pgd , unsigned long addr ,
unsigned long end )
{
pud_t * pud ;
unsigned long next ;
pud = pud_offset ( pgd , addr ) ;
do {
next = pud_addr_end ( addr , end ) ;
if ( pud_none_or_clear_bad ( pud ) )
continue ;
vunmap_pmd_range ( pud , addr , next ) ;
} while ( pud + + , addr = next , addr ! = end ) ;
}
2007-06-04 09:15:35 +04:00
void unmap_kernel_range ( unsigned long addr , unsigned long size )
2005-04-17 02:20:36 +04:00
{
pgd_t * pgd ;
unsigned long next ;
2007-06-04 09:15:35 +04:00
unsigned long start = addr ;
unsigned long end = addr + size ;
2005-04-17 02:20:36 +04:00
BUG_ON ( addr > = end ) ;
pgd = pgd_offset_k ( addr ) ;
flush_cache_vunmap ( addr , end ) ;
do {
next = pgd_addr_end ( addr , end ) ;
if ( pgd_none_or_clear_bad ( pgd ) )
continue ;
vunmap_pud_range ( pgd , addr , next ) ;
} while ( pgd + + , addr = next , addr ! = end ) ;
2007-06-04 09:15:35 +04:00
flush_tlb_kernel_range ( start , end ) ;
}
static void unmap_vm_area ( struct vm_struct * area )
{
unmap_kernel_range ( ( unsigned long ) area - > addr , area - > size ) ;
2005-04-17 02:20:36 +04:00
}
static int vmap_pte_range ( pmd_t * pmd , unsigned long addr ,
unsigned long end , pgprot_t prot , struct page * * * pages )
{
pte_t * pte ;
[PATCH] mm: init_mm without ptlock
First step in pushing down the page_table_lock. init_mm.page_table_lock has
been used throughout the architectures (usually for ioremap): not to serialize
kernel address space allocation (that's usually vmlist_lock), but because
pud_alloc,pmd_alloc,pte_alloc_kernel expect caller holds it.
Reverse that: don't lock or unlock init_mm.page_table_lock in any of the
architectures; instead rely on pud_alloc,pmd_alloc,pte_alloc_kernel to take
and drop it when allocating a new one, to check lest a racing task already
did. Similarly no page_table_lock in vmalloc's map_vm_area.
Some temporary ugliness in __pud_alloc and __pmd_alloc: since they also handle
user mms, which are converted only by a later patch, for now they have to lock
differently according to whether or not it's init_mm.
If sources get muddled, there's a danger that an arch source taking
init_mm.page_table_lock will be mixed with common source also taking it (or
neither take it). So break the rules and make another change, which should
break the build for such a mismatch: remove the redundant mm arg from
pte_alloc_kernel (ppc64 scrapped its distinct ioremap_mm in 2.6.13).
Exceptions: arm26 used pte_alloc_kernel on user mm, now pte_alloc_map; ia64
used pte_alloc_map on init_mm, now pte_alloc_kernel; parisc had bad args to
pmd_alloc and pte_alloc_kernel in unused USE_HPPA_IOREMAP code; ppc64
map_io_page forgot to unlock on failure; ppc mmu_mapin_ram and ppc64 im_free
took page_table_lock for no good reason.
Signed-off-by: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-10-30 04:16:21 +03:00
pte = pte_alloc_kernel ( pmd , addr ) ;
2005-04-17 02:20:36 +04:00
if ( ! pte )
return - ENOMEM ;
do {
struct page * page = * * pages ;
WARN_ON ( ! pte_none ( * pte ) ) ;
if ( ! page )
return - ENOMEM ;
set_pte_at ( & init_mm , addr , pte , mk_pte ( page , prot ) ) ;
( * pages ) + + ;
} while ( pte + + , addr + = PAGE_SIZE , addr ! = end ) ;
return 0 ;
}
static inline int vmap_pmd_range ( pud_t * pud , unsigned long addr ,
unsigned long end , pgprot_t prot , struct page * * * pages )
{
pmd_t * pmd ;
unsigned long next ;
pmd = pmd_alloc ( & init_mm , pud , addr ) ;
if ( ! pmd )
return - ENOMEM ;
do {
next = pmd_addr_end ( addr , end ) ;
if ( vmap_pte_range ( pmd , addr , next , prot , pages ) )
return - ENOMEM ;
} while ( pmd + + , addr = next , addr ! = end ) ;
return 0 ;
}
static inline int vmap_pud_range ( pgd_t * pgd , unsigned long addr ,
unsigned long end , pgprot_t prot , struct page * * * pages )
{
pud_t * pud ;
unsigned long next ;
pud = pud_alloc ( & init_mm , pgd , addr ) ;
if ( ! pud )
return - ENOMEM ;
do {
next = pud_addr_end ( addr , end ) ;
if ( vmap_pmd_range ( pud , addr , next , prot , pages ) )
return - ENOMEM ;
} while ( pud + + , addr = next , addr ! = end ) ;
return 0 ;
}
int map_vm_area ( struct vm_struct * area , pgprot_t prot , struct page * * * pages )
{
pgd_t * pgd ;
unsigned long next ;
unsigned long addr = ( unsigned long ) area - > addr ;
unsigned long end = addr + area - > size - PAGE_SIZE ;
int err ;
BUG_ON ( addr > = end ) ;
pgd = pgd_offset_k ( addr ) ;
do {
next = pgd_addr_end ( addr , end ) ;
err = vmap_pud_range ( pgd , addr , next , prot , pages ) ;
if ( err )
break ;
} while ( pgd + + , addr = next , addr ! = end ) ;
flush_cache_vmap ( ( unsigned long ) area - > addr , end ) ;
return err ;
}
2007-07-19 12:49:21 +04:00
EXPORT_SYMBOL_GPL ( map_vm_area ) ;
2005-04-17 02:20:36 +04:00
2008-02-05 09:28:31 +03:00
/*
* Map a vmalloc ( ) - space virtual address to the physical page .
*/
2008-02-05 09:28:32 +03:00
struct page * vmalloc_to_page ( const void * vmalloc_addr )
2008-02-05 09:28:31 +03:00
{
unsigned long addr = ( unsigned long ) vmalloc_addr ;
struct page * page = NULL ;
pgd_t * pgd = pgd_offset_k ( addr ) ;
pud_t * pud ;
pmd_t * pmd ;
pte_t * ptep , pte ;
if ( ! pgd_none ( * pgd ) ) {
pud = pud_offset ( pgd , addr ) ;
if ( ! pud_none ( * pud ) ) {
pmd = pmd_offset ( pud , addr ) ;
if ( ! pmd_none ( * pmd ) ) {
ptep = pte_offset_map ( pmd , addr ) ;
pte = * ptep ;
if ( pte_present ( pte ) )
page = pte_page ( pte ) ;
pte_unmap ( ptep ) ;
}
}
}
return page ;
}
EXPORT_SYMBOL ( vmalloc_to_page ) ;
/*
* Map a vmalloc ( ) - space virtual address to the physical page frame number .
*/
2008-02-05 09:28:32 +03:00
unsigned long vmalloc_to_pfn ( const void * vmalloc_addr )
2008-02-05 09:28:31 +03:00
{
return page_to_pfn ( vmalloc_to_page ( vmalloc_addr ) ) ;
}
EXPORT_SYMBOL ( vmalloc_to_pfn ) ;
2008-04-28 13:12:42 +04:00
static struct vm_struct *
__get_vm_area_node ( unsigned long size , unsigned long flags , unsigned long start ,
unsigned long end , int node , gfp_t gfp_mask , void * caller )
2005-04-17 02:20:36 +04:00
{
struct vm_struct * * p , * tmp , * area ;
unsigned long align = 1 ;
unsigned long addr ;
2006-10-28 21:38:34 +04:00
BUG_ON ( in_interrupt ( ) ) ;
2005-04-17 02:20:36 +04:00
if ( flags & VM_IOREMAP ) {
int bit = fls ( size ) ;
if ( bit > IOREMAP_MAX_ORDER )
bit = IOREMAP_MAX_ORDER ;
else if ( bit < PAGE_SHIFT )
bit = PAGE_SHIFT ;
align = 1ul < < bit ;
}
addr = ALIGN ( start , align ) ;
size = PAGE_ALIGN ( size ) ;
2006-11-16 12:19:29 +03:00
if ( unlikely ( ! size ) )
return NULL ;
2005-04-17 02:20:36 +04:00
2007-10-16 12:25:41 +04:00
area = kmalloc_node ( sizeof ( * area ) , gfp_mask & GFP_RECLAIM_MASK , node ) ;
2005-04-17 02:20:36 +04:00
if ( unlikely ( ! area ) )
return NULL ;
/*
* We always allocate a guard page .
*/
size + = PAGE_SIZE ;
write_lock ( & vmlist_lock ) ;
for ( p = & vmlist ; ( tmp = * p ) ! = NULL ; p = & tmp - > next ) {
if ( ( unsigned long ) tmp - > addr < addr ) {
if ( ( unsigned long ) tmp - > addr + tmp - > size > = addr )
addr = ALIGN ( tmp - > size +
( unsigned long ) tmp - > addr , align ) ;
continue ;
}
if ( ( size + addr ) < addr )
goto out ;
if ( size + addr < = ( unsigned long ) tmp - > addr )
goto found ;
addr = ALIGN ( tmp - > size + ( unsigned long ) tmp - > addr , align ) ;
if ( addr > end - size )
goto out ;
}
2008-02-05 09:29:18 +03:00
if ( ( size + addr ) < addr )
goto out ;
if ( addr > end - size )
goto out ;
2005-04-17 02:20:36 +04:00
found :
area - > next = * p ;
* p = area ;
area - > flags = flags ;
area - > addr = ( void * ) addr ;
area - > size = size ;
area - > pages = NULL ;
area - > nr_pages = 0 ;
area - > phys_addr = 0 ;
2008-04-28 13:12:42 +04:00
area - > caller = caller ;
2005-04-17 02:20:36 +04:00
write_unlock ( & vmlist_lock ) ;
return area ;
out :
write_unlock ( & vmlist_lock ) ;
kfree ( area ) ;
if ( printk_ratelimit ( ) )
printk ( KERN_WARNING " allocation failed: out of vmalloc space - use vmalloc=<size> to increase size. \n " ) ;
return NULL ;
}
2005-10-30 04:15:41 +03:00
struct vm_struct * __get_vm_area ( unsigned long size , unsigned long flags ,
unsigned long start , unsigned long end )
{
2008-04-28 13:12:42 +04:00
return __get_vm_area_node ( size , flags , start , end , - 1 , GFP_KERNEL ,
__builtin_return_address ( 0 ) ) ;
2005-10-30 04:15:41 +03:00
}
2007-07-19 12:49:21 +04:00
EXPORT_SYMBOL_GPL ( __get_vm_area ) ;
2005-10-30 04:15:41 +03:00
2005-04-17 02:20:36 +04:00
/**
2007-10-20 03:27:18 +04:00
* get_vm_area - reserve a contiguous kernel virtual area
2005-04-17 02:20:36 +04:00
* @ size : size of the area
* @ flags : % VM_IOREMAP for I / O mappings or VM_ALLOC
*
* Search an area of @ size in the kernel virtual mapping area ,
* and reserved it for out purposes . Returns the area descriptor
* on success or % NULL on failure .
*/
struct vm_struct * get_vm_area ( unsigned long size , unsigned long flags )
{
2008-04-28 13:12:42 +04:00
return __get_vm_area_node ( size , flags , VMALLOC_START , VMALLOC_END ,
- 1 , GFP_KERNEL , __builtin_return_address ( 0 ) ) ;
}
struct vm_struct * get_vm_area_caller ( unsigned long size , unsigned long flags ,
void * caller )
{
return __get_vm_area_node ( size , flags , VMALLOC_START , VMALLOC_END ,
- 1 , GFP_KERNEL , caller ) ;
2005-04-17 02:20:36 +04:00
}
2006-10-28 21:38:34 +04:00
struct vm_struct * get_vm_area_node ( unsigned long size , unsigned long flags ,
int node , gfp_t gfp_mask )
2005-10-30 04:15:41 +03:00
{
2006-10-28 21:38:34 +04:00
return __get_vm_area_node ( size , flags , VMALLOC_START , VMALLOC_END , node ,
2008-04-28 13:12:42 +04:00
gfp_mask , __builtin_return_address ( 0 ) ) ;
2005-10-30 04:15:41 +03:00
}
2006-06-23 13:03:20 +04:00
/* Caller must hold vmlist_lock */
2008-02-05 09:28:32 +03:00
static struct vm_struct * __find_vm_area ( const void * addr )
2006-06-23 13:03:20 +04:00
{
struct vm_struct * tmp ;
for ( tmp = vmlist ; tmp ! = NULL ; tmp = tmp - > next ) {
if ( tmp - > addr = = addr )
break ;
}
return tmp ;
}
2005-05-21 01:27:57 +04:00
/* Caller must hold vmlist_lock */
2008-02-05 09:28:32 +03:00
static struct vm_struct * __remove_vm_area ( const void * addr )
2005-04-17 02:20:36 +04:00
{
struct vm_struct * * p , * tmp ;
for ( p = & vmlist ; ( tmp = * p ) ! = NULL ; p = & tmp - > next ) {
if ( tmp - > addr = = addr )
goto found ;
}
return NULL ;
found :
unmap_vm_area ( tmp ) ;
* p = tmp - > next ;
/*
* Remove the guard page .
*/
tmp - > size - = PAGE_SIZE ;
return tmp ;
}
2005-05-21 01:27:57 +04:00
/**
2007-10-20 03:27:18 +04:00
* remove_vm_area - find and remove a continuous kernel virtual area
2005-05-21 01:27:57 +04:00
* @ addr : base address
*
* Search for the kernel VM area starting at @ addr , and remove it .
* This function returns the found VM area , but using it is NOT safe
* on SMP machines , except for its size or flags .
*/
2008-02-05 09:28:32 +03:00
struct vm_struct * remove_vm_area ( const void * addr )
2005-05-21 01:27:57 +04:00
{
struct vm_struct * v ;
write_lock ( & vmlist_lock ) ;
v = __remove_vm_area ( addr ) ;
write_unlock ( & vmlist_lock ) ;
return v ;
}
2008-02-05 09:28:32 +03:00
static void __vunmap ( const void * addr , int deallocate_pages )
2005-04-17 02:20:36 +04:00
{
struct vm_struct * area ;
if ( ! addr )
return ;
if ( ( PAGE_SIZE - 1 ) & ( unsigned long ) addr ) {
printk ( KERN_ERR " Trying to vfree() bad address (%p) \n " , addr ) ;
WARN_ON ( 1 ) ;
return ;
}
area = remove_vm_area ( addr ) ;
if ( unlikely ( ! area ) ) {
printk ( KERN_ERR " Trying to vfree() nonexistent vm area (%p) \n " ,
addr ) ;
WARN_ON ( 1 ) ;
return ;
}
2006-07-03 11:24:33 +04:00
debug_check_no_locks_freed ( addr , area - > size ) ;
2008-04-30 11:55:01 +04:00
debug_check_no_obj_freed ( addr , area - > size ) ;
2006-07-03 11:24:33 +04:00
2005-04-17 02:20:36 +04:00
if ( deallocate_pages ) {
int i ;
for ( i = 0 ; i < area - > nr_pages ; i + + ) {
2008-02-05 09:28:34 +03:00
struct page * page = area - > pages [ i ] ;
BUG_ON ( ! page ) ;
__free_page ( page ) ;
2005-04-17 02:20:36 +04:00
}
2006-07-14 11:23:56 +04:00
if ( area - > flags & VM_VPAGES )
2005-04-17 02:20:36 +04:00
vfree ( area - > pages ) ;
else
kfree ( area - > pages ) ;
}
kfree ( area ) ;
return ;
}
/**
* vfree - release memory allocated by vmalloc ( )
* @ addr : memory base address
*
2007-10-20 03:27:18 +04:00
* Free the virtually continuous memory area starting at @ addr , as
2005-09-10 00:10:16 +04:00
* obtained from vmalloc ( ) , vmalloc_32 ( ) or __vmalloc ( ) . If @ addr is
* NULL , no operation is performed .
2005-04-17 02:20:36 +04:00
*
2005-09-10 00:10:16 +04:00
* Must not be called in interrupt context .
2005-04-17 02:20:36 +04:00
*/
2008-02-05 09:28:32 +03:00
void vfree ( const void * addr )
2005-04-17 02:20:36 +04:00
{
BUG_ON ( in_interrupt ( ) ) ;
__vunmap ( addr , 1 ) ;
}
EXPORT_SYMBOL ( vfree ) ;
/**
* vunmap - release virtual mapping obtained by vmap ( )
* @ addr : memory base address
*
* Free the virtually contiguous memory area starting at @ addr ,
* which was created from the page array passed to vmap ( ) .
*
2005-09-10 00:10:16 +04:00
* Must not be called in interrupt context .
2005-04-17 02:20:36 +04:00
*/
2008-02-05 09:28:32 +03:00
void vunmap ( const void * addr )
2005-04-17 02:20:36 +04:00
{
BUG_ON ( in_interrupt ( ) ) ;
__vunmap ( addr , 0 ) ;
}
EXPORT_SYMBOL ( vunmap ) ;
/**
* vmap - map an array of pages into virtually contiguous space
* @ pages : array of page pointers
* @ count : number of pages to map
* @ flags : vm_area - > flags
* @ prot : page protection for the mapping
*
* Maps @ count pages from @ pages into contiguous kernel virtual
* space .
*/
void * vmap ( struct page * * pages , unsigned int count ,
unsigned long flags , pgprot_t prot )
{
struct vm_struct * area ;
if ( count > num_physpages )
return NULL ;
2008-04-28 13:12:42 +04:00
area = get_vm_area_caller ( ( count < < PAGE_SHIFT ) , flags ,
__builtin_return_address ( 0 ) ) ;
2005-04-17 02:20:36 +04:00
if ( ! area )
return NULL ;
2008-04-28 13:12:42 +04:00
2005-04-17 02:20:36 +04:00
if ( map_vm_area ( area , prot , & pages ) ) {
vunmap ( area - > addr ) ;
return NULL ;
}
return area - > addr ;
}
EXPORT_SYMBOL ( vmap ) ;
2008-02-05 09:29:09 +03:00
static void * __vmalloc_area_node ( struct vm_struct * area , gfp_t gfp_mask ,
2008-04-28 13:12:42 +04:00
pgprot_t prot , int node , void * caller )
2005-04-17 02:20:36 +04:00
{
struct page * * pages ;
unsigned int nr_pages , array_size , i ;
nr_pages = ( area - > size - PAGE_SIZE ) > > PAGE_SHIFT ;
array_size = ( nr_pages * sizeof ( struct page * ) ) ;
area - > nr_pages = nr_pages ;
/* Please note that the recursion is strictly bounded. */
2006-07-14 11:23:56 +04:00
if ( array_size > PAGE_SIZE ) {
2007-07-17 15:03:29 +04:00
pages = __vmalloc_node ( array_size , gfp_mask | __GFP_ZERO ,
2008-04-28 13:12:42 +04:00
PAGE_KERNEL , node , caller ) ;
2006-07-14 11:23:56 +04:00
area - > flags | = VM_VPAGES ;
2006-10-17 11:09:57 +04:00
} else {
pages = kmalloc_node ( array_size ,
2007-10-16 12:25:41 +04:00
( gfp_mask & GFP_RECLAIM_MASK ) | __GFP_ZERO ,
2006-10-17 11:09:57 +04:00
node ) ;
}
2005-04-17 02:20:36 +04:00
area - > pages = pages ;
2008-04-28 13:12:42 +04:00
area - > caller = caller ;
2005-04-17 02:20:36 +04:00
if ( ! area - > pages ) {
remove_vm_area ( area - > addr ) ;
kfree ( area ) ;
return NULL ;
}
for ( i = 0 ; i < area - > nr_pages ; i + + ) {
2008-02-05 09:28:34 +03:00
struct page * page ;
2005-10-30 04:15:41 +03:00
if ( node < 0 )
2008-02-05 09:28:34 +03:00
page = alloc_page ( gfp_mask ) ;
2005-10-30 04:15:41 +03:00
else
2008-02-05 09:28:34 +03:00
page = alloc_pages_node ( node , gfp_mask , 0 ) ;
if ( unlikely ( ! page ) ) {
2005-04-17 02:20:36 +04:00
/* Successfully allocated i pages, free them in __vunmap() */
area - > nr_pages = i ;
goto fail ;
}
2008-02-05 09:28:34 +03:00
area - > pages [ i ] = page ;
2005-04-17 02:20:36 +04:00
}
if ( map_vm_area ( area , prot , & pages ) )
goto fail ;
return area - > addr ;
fail :
vfree ( area - > addr ) ;
return NULL ;
}
2005-10-30 04:15:41 +03:00
void * __vmalloc_area ( struct vm_struct * area , gfp_t gfp_mask , pgprot_t prot )
{
2008-04-28 13:12:42 +04:00
return __vmalloc_area_node ( area , gfp_mask , prot , - 1 ,
__builtin_return_address ( 0 ) ) ;
2005-10-30 04:15:41 +03:00
}
2005-04-17 02:20:36 +04:00
/**
2005-10-30 04:15:41 +03:00
* __vmalloc_node - allocate virtually contiguous memory
2005-04-17 02:20:36 +04:00
* @ size : allocation size
* @ gfp_mask : flags for the page level allocator
* @ prot : protection mask for the allocated pages
2005-11-07 12:01:10 +03:00
* @ node : node to use for allocation or - 1
2008-05-01 15:34:48 +04:00
* @ caller : caller ' s return address
2005-04-17 02:20:36 +04:00
*
* Allocate enough pages to cover @ size from the page level
* allocator with @ gfp_mask flags . Map them into contiguous
* kernel virtual space , using a pagetable protection of @ prot .
*/
2006-09-26 10:31:02 +04:00
static void * __vmalloc_node ( unsigned long size , gfp_t gfp_mask , pgprot_t prot ,
2008-04-28 13:12:42 +04:00
int node , void * caller )
2005-04-17 02:20:36 +04:00
{
struct vm_struct * area ;
size = PAGE_ALIGN ( size ) ;
if ( ! size | | ( size > > PAGE_SHIFT ) > num_physpages )
return NULL ;
2008-04-28 13:12:42 +04:00
area = __get_vm_area_node ( size , VM_ALLOC , VMALLOC_START , VMALLOC_END ,
node , gfp_mask , caller ) ;
2005-04-17 02:20:36 +04:00
if ( ! area )
return NULL ;
2008-04-28 13:12:42 +04:00
return __vmalloc_area_node ( area , gfp_mask , prot , node , caller ) ;
2005-04-17 02:20:36 +04:00
}
2005-10-30 04:15:41 +03:00
void * __vmalloc ( unsigned long size , gfp_t gfp_mask , pgprot_t prot )
{
2008-04-28 13:12:42 +04:00
return __vmalloc_node ( size , gfp_mask , prot , - 1 ,
__builtin_return_address ( 0 ) ) ;
2005-10-30 04:15:41 +03:00
}
2005-04-17 02:20:36 +04:00
EXPORT_SYMBOL ( __vmalloc ) ;
/**
* vmalloc - allocate virtually contiguous memory
* @ size : allocation size
* Allocate enough pages to cover @ size from the page level
* allocator and map them into contiguous kernel virtual space .
*
2006-10-04 01:21:02 +04:00
* For tight control over page level allocator and protection flags
2005-04-17 02:20:36 +04:00
* use __vmalloc ( ) instead .
*/
void * vmalloc ( unsigned long size )
{
2008-04-28 13:12:42 +04:00
return __vmalloc_node ( size , GFP_KERNEL | __GFP_HIGHMEM , PAGE_KERNEL ,
- 1 , __builtin_return_address ( 0 ) ) ;
2005-04-17 02:20:36 +04:00
}
EXPORT_SYMBOL ( vmalloc ) ;
2006-06-23 13:03:20 +04:00
/**
2006-09-27 12:50:13 +04:00
* vmalloc_user - allocate zeroed virtually contiguous memory for userspace
* @ size : allocation size
2006-06-23 13:03:20 +04:00
*
2006-09-27 12:50:13 +04:00
* The resulting memory area is zeroed so it can be mapped to userspace
* without leaking data .
2006-06-23 13:03:20 +04:00
*/
void * vmalloc_user ( unsigned long size )
{
struct vm_struct * area ;
void * ret ;
ret = __vmalloc ( size , GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO , PAGE_KERNEL ) ;
2006-11-10 23:27:48 +03:00
if ( ret ) {
write_lock ( & vmlist_lock ) ;
area = __find_vm_area ( ret ) ;
area - > flags | = VM_USERMAP ;
write_unlock ( & vmlist_lock ) ;
}
2006-06-23 13:03:20 +04:00
return ret ;
}
EXPORT_SYMBOL ( vmalloc_user ) ;
2005-10-30 04:15:41 +03:00
/**
* vmalloc_node - allocate memory on a specific node
* @ size : allocation size
2005-11-07 12:01:10 +03:00
* @ node : numa node
2005-10-30 04:15:41 +03:00
*
* Allocate enough pages to cover @ size from the page level
* allocator and map them into contiguous kernel virtual space .
*
2006-10-04 01:21:02 +04:00
* For tight control over page level allocator and protection flags
2005-10-30 04:15:41 +03:00
* use __vmalloc ( ) instead .
*/
void * vmalloc_node ( unsigned long size , int node )
{
2008-04-28 13:12:42 +04:00
return __vmalloc_node ( size , GFP_KERNEL | __GFP_HIGHMEM , PAGE_KERNEL ,
node , __builtin_return_address ( 0 ) ) ;
2005-10-30 04:15:41 +03:00
}
EXPORT_SYMBOL ( vmalloc_node ) ;
2005-05-01 19:59:25 +04:00
# ifndef PAGE_KERNEL_EXEC
# define PAGE_KERNEL_EXEC PAGE_KERNEL
# endif
2005-04-17 02:20:36 +04:00
/**
* vmalloc_exec - allocate virtually contiguous , executable memory
* @ size : allocation size
*
* Kernel - internal function to allocate enough pages to cover @ size
* the page level allocator and map them into contiguous and
* executable kernel virtual space .
*
2006-10-04 01:21:02 +04:00
* For tight control over page level allocator and protection flags
2005-04-17 02:20:36 +04:00
* use __vmalloc ( ) instead .
*/
void * vmalloc_exec ( unsigned long size )
{
return __vmalloc ( size , GFP_KERNEL | __GFP_HIGHMEM , PAGE_KERNEL_EXEC ) ;
}
2007-05-02 21:27:12 +04:00
# if defined(CONFIG_64BIT) && defined(CONFIG_ZONE_DMA32)
2007-07-19 12:49:10 +04:00
# define GFP_VMALLOC32 GFP_DMA32 | GFP_KERNEL
2007-05-02 21:27:12 +04:00
# elif defined(CONFIG_64BIT) && defined(CONFIG_ZONE_DMA)
2007-07-19 12:49:10 +04:00
# define GFP_VMALLOC32 GFP_DMA | GFP_KERNEL
2007-05-02 21:27:12 +04:00
# else
# define GFP_VMALLOC32 GFP_KERNEL
# endif
2005-04-17 02:20:36 +04:00
/**
* vmalloc_32 - allocate virtually contiguous memory ( 32 bit addressable )
* @ size : allocation size
*
* Allocate enough 32 bit PA addressable pages to cover @ size from the
* page level allocator and map them into contiguous kernel virtual space .
*/
void * vmalloc_32 ( unsigned long size )
{
2007-05-02 21:27:12 +04:00
return __vmalloc ( size , GFP_VMALLOC32 , PAGE_KERNEL ) ;
2005-04-17 02:20:36 +04:00
}
EXPORT_SYMBOL ( vmalloc_32 ) ;
2006-06-23 13:03:20 +04:00
/**
2006-09-27 12:50:13 +04:00
* vmalloc_32_user - allocate zeroed virtually contiguous 32 bit memory
2006-06-23 13:03:20 +04:00
* @ size : allocation size
2006-09-27 12:50:13 +04:00
*
* The resulting memory area is 32 bit addressable and zeroed so it can be
* mapped to userspace without leaking data .
2006-06-23 13:03:20 +04:00
*/
void * vmalloc_32_user ( unsigned long size )
{
struct vm_struct * area ;
void * ret ;
2007-05-02 21:27:12 +04:00
ret = __vmalloc ( size , GFP_VMALLOC32 | __GFP_ZERO , PAGE_KERNEL ) ;
2006-11-10 23:27:48 +03:00
if ( ret ) {
write_lock ( & vmlist_lock ) ;
area = __find_vm_area ( ret ) ;
area - > flags | = VM_USERMAP ;
write_unlock ( & vmlist_lock ) ;
}
2006-06-23 13:03:20 +04:00
return ret ;
}
EXPORT_SYMBOL ( vmalloc_32_user ) ;
2005-04-17 02:20:36 +04:00
long vread ( char * buf , char * addr , unsigned long count )
{
struct vm_struct * tmp ;
char * vaddr , * buf_start = buf ;
unsigned long n ;
/* Don't allow overflow */
if ( ( unsigned long ) addr + count < count )
count = - ( unsigned long ) addr ;
read_lock ( & vmlist_lock ) ;
for ( tmp = vmlist ; tmp ; tmp = tmp - > next ) {
vaddr = ( char * ) tmp - > addr ;
if ( addr > = vaddr + tmp - > size - PAGE_SIZE )
continue ;
while ( addr < vaddr ) {
if ( count = = 0 )
goto finished ;
* buf = ' \0 ' ;
buf + + ;
addr + + ;
count - - ;
}
n = vaddr + tmp - > size - PAGE_SIZE - addr ;
do {
if ( count = = 0 )
goto finished ;
* buf = * addr ;
buf + + ;
addr + + ;
count - - ;
} while ( - - n > 0 ) ;
}
finished :
read_unlock ( & vmlist_lock ) ;
return buf - buf_start ;
}
long vwrite ( char * buf , char * addr , unsigned long count )
{
struct vm_struct * tmp ;
char * vaddr , * buf_start = buf ;
unsigned long n ;
/* Don't allow overflow */
if ( ( unsigned long ) addr + count < count )
count = - ( unsigned long ) addr ;
read_lock ( & vmlist_lock ) ;
for ( tmp = vmlist ; tmp ; tmp = tmp - > next ) {
vaddr = ( char * ) tmp - > addr ;
if ( addr > = vaddr + tmp - > size - PAGE_SIZE )
continue ;
while ( addr < vaddr ) {
if ( count = = 0 )
goto finished ;
buf + + ;
addr + + ;
count - - ;
}
n = vaddr + tmp - > size - PAGE_SIZE - addr ;
do {
if ( count = = 0 )
goto finished ;
* addr = * buf ;
buf + + ;
addr + + ;
count - - ;
} while ( - - n > 0 ) ;
}
finished :
read_unlock ( & vmlist_lock ) ;
return buf - buf_start ;
}
2006-06-23 13:03:20 +04:00
/**
* remap_vmalloc_range - map vmalloc pages to userspace
* @ vma : vma to cover ( map full range of vma )
* @ addr : vmalloc memory
* @ pgoff : number of pages into addr before first page to map
2008-03-20 03:00:40 +03:00
*
* Returns : 0 for success , - Exxx on failure
2006-06-23 13:03:20 +04:00
*
* This function checks that addr is a valid vmalloc ' ed area , and
* that it is big enough to cover the vma . Will return failure if
* that criteria isn ' t met .
*
2007-02-10 12:45:59 +03:00
* Similar to remap_pfn_range ( ) ( see mm / memory . c )
2006-06-23 13:03:20 +04:00
*/
int remap_vmalloc_range ( struct vm_area_struct * vma , void * addr ,
unsigned long pgoff )
{
struct vm_struct * area ;
unsigned long uaddr = vma - > vm_start ;
unsigned long usize = vma - > vm_end - vma - > vm_start ;
int ret ;
if ( ( PAGE_SIZE - 1 ) & ( unsigned long ) addr )
return - EINVAL ;
read_lock ( & vmlist_lock ) ;
area = __find_vm_area ( addr ) ;
if ( ! area )
goto out_einval_locked ;
if ( ! ( area - > flags & VM_USERMAP ) )
goto out_einval_locked ;
if ( usize + ( pgoff < < PAGE_SHIFT ) > area - > size - PAGE_SIZE )
goto out_einval_locked ;
read_unlock ( & vmlist_lock ) ;
addr + = pgoff < < PAGE_SHIFT ;
do {
struct page * page = vmalloc_to_page ( addr ) ;
ret = vm_insert_page ( vma , uaddr , page ) ;
if ( ret )
return ret ;
uaddr + = PAGE_SIZE ;
addr + = PAGE_SIZE ;
usize - = PAGE_SIZE ;
} while ( usize > 0 ) ;
/* Prevent "things" like memory migration? VM_flags need a cleanup... */
vma - > vm_flags | = VM_RESERVED ;
return ret ;
out_einval_locked :
read_unlock ( & vmlist_lock ) ;
return - EINVAL ;
}
EXPORT_SYMBOL ( remap_vmalloc_range ) ;
2007-05-08 11:27:03 +04:00
/*
* Implement a stub for vmalloc_sync_all ( ) if the architecture chose not to
* have one .
*/
void __attribute__ ( ( weak ) ) vmalloc_sync_all ( void )
{
}
2007-07-18 05:37:04 +04:00
2008-02-08 15:22:04 +03:00
static int f ( pte_t * pte , pgtable_t table , unsigned long addr , void * data )
2007-07-18 05:37:04 +04:00
{
/* apply_to_page_range() does all the hard work. */
return 0 ;
}
/**
* alloc_vm_area - allocate a range of kernel address space
* @ size : size of the area
2008-03-20 03:00:40 +03:00
*
* Returns : NULL on failure , vm_struct on success
2007-07-18 05:37:04 +04:00
*
* This function reserves a range of kernel address space , and
* allocates pagetables to map that range . No actual mappings
* are created . If the kernel address space is not shared
* between processes , it syncs the pagetable across all
* processes .
*/
struct vm_struct * alloc_vm_area ( size_t size )
{
struct vm_struct * area ;
2008-04-28 13:12:42 +04:00
area = get_vm_area_caller ( size , VM_IOREMAP ,
__builtin_return_address ( 0 ) ) ;
2007-07-18 05:37:04 +04:00
if ( area = = NULL )
return NULL ;
/*
* This ensures that page tables are constructed for this region
* of kernel virtual address space and mapped into init_mm .
*/
if ( apply_to_page_range ( & init_mm , ( unsigned long ) area - > addr ,
area - > size , f , NULL ) ) {
free_vm_area ( area ) ;
return NULL ;
}
/* Make sure the pagetables are constructed in process kernel
mappings */
vmalloc_sync_all ( ) ;
return area ;
}
EXPORT_SYMBOL_GPL ( alloc_vm_area ) ;
void free_vm_area ( struct vm_struct * area )
{
struct vm_struct * ret ;
ret = remove_vm_area ( area - > addr ) ;
BUG_ON ( ret ! = area ) ;
kfree ( area ) ;
}
EXPORT_SYMBOL_GPL ( free_vm_area ) ;
2008-04-28 13:12:40 +04:00
# ifdef CONFIG_PROC_FS
static void * s_start ( struct seq_file * m , loff_t * pos )
{
loff_t n = * pos ;
struct vm_struct * v ;
read_lock ( & vmlist_lock ) ;
v = vmlist ;
while ( n > 0 & & v ) {
n - - ;
v = v - > next ;
}
if ( ! n )
return v ;
return NULL ;
}
static void * s_next ( struct seq_file * m , void * p , loff_t * pos )
{
struct vm_struct * v = p ;
+ + * pos ;
return v - > next ;
}
static void s_stop ( struct seq_file * m , void * p )
{
read_unlock ( & vmlist_lock ) ;
}
static int s_show ( struct seq_file * m , void * p )
{
struct vm_struct * v = p ;
seq_printf ( m , " 0x%p-0x%p %7ld " ,
v - > addr , v - > addr + v - > size , v - > size ) ;
2008-04-28 13:12:42 +04:00
if ( v - > caller ) {
char buff [ 2 * KSYM_NAME_LEN ] ;
seq_putc ( m , ' ' ) ;
sprint_symbol ( buff , ( unsigned long ) v - > caller ) ;
seq_puts ( m , buff ) ;
}
2008-04-28 13:12:40 +04:00
if ( v - > nr_pages )
seq_printf ( m , " pages=%d " , v - > nr_pages ) ;
if ( v - > phys_addr )
seq_printf ( m , " phys=%lx " , v - > phys_addr ) ;
if ( v - > flags & VM_IOREMAP )
seq_printf ( m , " ioremap " ) ;
if ( v - > flags & VM_ALLOC )
seq_printf ( m , " vmalloc " ) ;
if ( v - > flags & VM_MAP )
seq_printf ( m , " vmap " ) ;
if ( v - > flags & VM_USERMAP )
seq_printf ( m , " user " ) ;
if ( v - > flags & VM_VPAGES )
seq_printf ( m , " vpages " ) ;
seq_putc ( m , ' \n ' ) ;
return 0 ;
}
const struct seq_operations vmalloc_op = {
. start = s_start ,
. next = s_next ,
. stop = s_stop ,
. show = s_show ,
} ;
# endif