2005-04-17 02:20:36 +04:00
/*
* linux / mm / fremap . c
*
* Explicit pagetable population and nonlinear ( random ) mappings support .
*
* started by Ingo Molnar , Copyright ( C ) 2002 , 2003
*/
2007-10-17 10:29:23 +04:00
# include <linux/backing-dev.h>
2005-04-17 02:20:36 +04:00
# include <linux/mm.h>
# include <linux/swap.h>
# include <linux/file.h>
# include <linux/mman.h>
# include <linux/pagemap.h>
# include <linux/swapops.h>
# include <linux/rmap.h>
# include <linux/module.h>
# include <linux/syscalls.h>
# include <asm/mmu_context.h>
# include <asm/cacheflush.h>
# include <asm/tlbflush.h>
2007-07-19 12:47:03 +04:00
static void zap_pte ( struct mm_struct * mm , struct vm_area_struct * vma ,
2005-04-17 02:20:36 +04:00
unsigned long addr , pte_t * ptep )
{
pte_t pte = * ptep ;
if ( pte_present ( pte ) ) {
2007-07-19 12:47:03 +04:00
struct page * page ;
2005-11-29 01:34:23 +03:00
flush_cache_page ( vma , addr , pte_pfn ( pte ) ) ;
2005-04-17 02:20:36 +04:00
pte = ptep_clear_flush ( vma , addr , ptep ) ;
2005-11-29 01:34:23 +03:00
page = vm_normal_page ( vma , addr , pte ) ;
if ( page ) {
if ( pte_dirty ( pte ) )
set_page_dirty ( page ) ;
2006-12-22 12:09:33 +03:00
page_remove_rmap ( page , vma ) ;
2005-11-29 01:34:23 +03:00
page_cache_release ( page ) ;
2007-07-19 12:47:03 +04:00
update_hiwater_rss ( mm ) ;
dec_mm_counter ( mm , file_rss ) ;
2005-04-17 02:20:36 +04:00
}
} else {
if ( ! pte_file ( pte ) )
free_swap_and_cache ( pte_to_swp_entry ( pte ) ) ;
2006-10-01 10:29:31 +04:00
pte_clear_not_present_full ( mm , addr , ptep , 0 ) ;
2005-04-17 02:20:36 +04:00
}
}
/*
* Install a file pte to a given virtual memory address , release any
* previously existing mapping .
*/
2007-07-19 12:47:03 +04:00
static int install_file_pte ( struct mm_struct * mm , struct vm_area_struct * vma ,
2005-04-17 02:20:36 +04:00
unsigned long addr , unsigned long pgoff , pgprot_t prot )
{
int err = - ENOMEM ;
pte_t * pte ;
2005-10-30 04:16:23 +03:00
spinlock_t * ptl ;
2005-04-17 02:20:36 +04:00
2005-11-30 01:03:14 +03:00
pte = get_locked_pte ( mm , addr , & ptl ) ;
2005-04-17 02:20:36 +04:00
if ( ! pte )
2005-10-30 04:16:23 +03:00
goto out ;
2005-04-17 02:20:36 +04:00
2007-07-19 12:47:03 +04:00
if ( ! pte_none ( * pte ) )
zap_pte ( mm , vma , addr , pte ) ;
2005-04-17 02:20:36 +04:00
set_pte_at ( mm , addr , pte , pgoff_to_pte ( pgoff ) ) ;
2006-06-23 13:03:45 +04:00
/*
* We don ' t need to run update_mmu_cache ( ) here because the " file pte "
* being installed by install_file_pte ( ) is not a real pte - it ' s a
* non - present entry ( like a swap entry ) , noting what file offset should
* be mapped there when there ' s a fault ( in a non - linear vma where
* that ' s not obvious ) .
*/
2005-10-30 04:16:23 +03:00
pte_unmap_unlock ( pte , ptl ) ;
err = 0 ;
out :
2005-04-17 02:20:36 +04:00
return err ;
}
2007-07-19 12:46:59 +04:00
static int populate_range ( struct mm_struct * mm , struct vm_area_struct * vma ,
unsigned long addr , unsigned long size , pgoff_t pgoff )
{
int err ;
do {
err = install_file_pte ( mm , vma , addr , pgoff , vma - > vm_page_prot ) ;
if ( err )
return err ;
size - = PAGE_SIZE ;
addr + = PAGE_SIZE ;
pgoff + + ;
} while ( size ) ;
return 0 ;
}
2007-10-17 10:31:29 +04:00
/**
* sys_remap_file_pages - remap arbitrary pages of an existing VM_SHARED vma
2005-04-17 02:20:36 +04:00
* @ start : start of the remapped virtual memory range
* @ size : size of the remapped virtual memory range
2007-10-17 10:31:29 +04:00
* @ prot : new protection bits of the range ( see NOTE )
* @ pgoff : to - be - mapped page of the backing store file
2005-04-17 02:20:36 +04:00
* @ flags : 0 or MAP_NONBLOCKED - the later will cause no IO .
*
2007-10-17 10:31:29 +04:00
* sys_remap_file_pages remaps arbitrary pages of an existing VM_SHARED vma
* ( shared backing store file ) .
*
* This syscall works purely via pagetables , so it ' s the most efficient
2005-04-17 02:20:36 +04:00
* way to map the same ( large ) file into a given virtual window . Unlike
* mmap ( ) / mremap ( ) it does not create any new vmas . The new mappings are
* also safe across swapout .
*
2008-03-20 03:00:40 +03:00
* NOTE : the @ prot parameter right now is ignored ( but must be zero ) ,
2007-10-17 10:31:29 +04:00
* and the vma ' s default protection is used . Arbitrary protections
* might be implemented in the future .
2005-04-17 02:20:36 +04:00
*/
asmlinkage long sys_remap_file_pages ( unsigned long start , unsigned long size ,
2007-10-17 10:31:29 +04:00
unsigned long prot , unsigned long pgoff , unsigned long flags )
2005-04-17 02:20:36 +04:00
{
struct mm_struct * mm = current - > mm ;
struct address_space * mapping ;
unsigned long end = start + size ;
struct vm_area_struct * vma ;
int err = - EINVAL ;
int has_write_lock = 0 ;
2007-10-17 10:31:29 +04:00
if ( prot )
2005-04-17 02:20:36 +04:00
return err ;
/*
* Sanitize the syscall parameters :
*/
start = start & PAGE_MASK ;
size = size & PAGE_MASK ;
/* Does the address range wrap, or is the span zero-sized? */
if ( start + size < = start )
return err ;
/* Can we represent this offset inside this architecture's pte's? */
# if PTE_FILE_MAX_BITS < BITS_PER_LONG
if ( pgoff + ( size > > PAGE_SHIFT ) > = ( 1UL < < PTE_FILE_MAX_BITS ) )
return err ;
# endif
/* We need down_write() to change vma->vm_flags. */
down_read ( & mm - > mmap_sem ) ;
retry :
vma = find_vma ( mm , start ) ;
/*
* Make sure the vma is shared , that it supports prefaulting ,
* and that the remapped range is valid and fully within
* the single existing vma . vm_private_data is used as a
2005-11-22 08:32:16 +03:00
* swapout cursor in a VM_NONLINEAR vma .
2005-04-17 02:20:36 +04:00
*/
2007-07-19 12:46:59 +04:00
if ( ! vma | | ! ( vma - > vm_flags & VM_SHARED ) )
goto out ;
if ( vma - > vm_private_data & & ! ( vma - > vm_flags & VM_NONLINEAR ) )
goto out ;
2007-10-08 21:05:48 +04:00
if ( ! ( vma - > vm_flags & VM_CAN_NONLINEAR ) )
2007-07-19 12:46:59 +04:00
goto out ;
if ( end < = start | | start < vma - > vm_start | | end > vma - > vm_end )
goto out ;
/* Must set VM_NONLINEAR before any pages are populated. */
if ( ! ( vma - > vm_flags & VM_NONLINEAR ) ) {
/* Don't need a nonlinear mapping, exit success */
if ( pgoff = = linear_page_index ( vma , start ) ) {
err = 0 ;
goto out ;
}
if ( ! has_write_lock ) {
up_read ( & mm - > mmap_sem ) ;
down_write ( & mm - > mmap_sem ) ;
has_write_lock = 1 ;
goto retry ;
}
mapping = vma - > vm_file - > f_mapping ;
2007-07-19 12:47:24 +04:00
/*
* page_mkclean doesn ' t work on nonlinear vmas , so if
* dirty pages need to be accounted , emulate with linear
* vmas .
*/
if ( mapping_cap_account_dirty ( mapping ) ) {
unsigned long addr ;
2008-02-05 09:27:18 +03:00
struct file * file = vma - > vm_file ;
2007-07-19 12:47:24 +04:00
flags & = MAP_NONBLOCK ;
2008-02-05 09:27:18 +03:00
get_file ( file ) ;
addr = mmap_region ( file , start , size ,
2007-07-19 12:47:24 +04:00
flags , vma - > vm_flags , pgoff , 1 ) ;
2008-02-05 09:27:18 +03:00
fput ( file ) ;
2007-07-19 12:47:24 +04:00
if ( IS_ERR_VALUE ( addr ) ) {
err = addr ;
} else {
BUG_ON ( addr ! = start ) ;
err = 0 ;
}
goto out ;
}
2007-07-19 12:46:59 +04:00
spin_lock ( & mapping - > i_mmap_lock ) ;
flush_dcache_mmap_lock ( mapping ) ;
vma - > vm_flags | = VM_NONLINEAR ;
vma_prio_tree_remove ( vma , & mapping - > i_mmap ) ;
vma_nonlinear_insert ( vma , & mapping - > i_mmap_nonlinear ) ;
flush_dcache_mmap_unlock ( mapping ) ;
spin_unlock ( & mapping - > i_mmap_lock ) ;
}
2007-07-19 12:47:03 +04:00
err = populate_range ( mm , vma , start , size , pgoff ) ;
if ( ! err & & ! ( flags & MAP_NONBLOCK ) ) {
if ( unlikely ( has_write_lock ) ) {
downgrade_write ( & mm - > mmap_sem ) ;
has_write_lock = 0 ;
2005-04-17 02:20:36 +04:00
}
2007-07-19 12:47:03 +04:00
make_pages_present ( start , start + size ) ;
}
2005-04-17 02:20:36 +04:00
2007-07-19 12:46:59 +04:00
/*
* We can ' t clear VM_NONLINEAR because we ' d have to do
* it after - > populate completes , and that would prevent
* downgrading the lock . ( Locks can ' t be upgraded ) .
*/
2005-04-17 02:20:36 +04:00
2007-07-19 12:46:59 +04:00
out :
2005-04-17 02:20:36 +04:00
if ( likely ( ! has_write_lock ) )
up_read ( & mm - > mmap_sem ) ;
else
up_write ( & mm - > mmap_sem ) ;
return err ;
}