2005-04-17 02:20:36 +04:00
/*
* mm / mremap . c
*
* ( C ) Copyright 1996 Linus Torvalds
*
* Address space accounting code < alan @ redhat . com >
* ( C ) Copyright 2002 Red Hat Inc , All Rights Reserved
*/
# include <linux/mm.h>
# include <linux/hugetlb.h>
# include <linux/slab.h>
# include <linux/shm.h>
# include <linux/mman.h>
# include <linux/swap.h>
# include <linux/fs.h>
# include <linux/highmem.h>
# include <linux/security.h>
# include <linux/syscalls.h>
# include <asm/uaccess.h>
# include <asm/cacheflush.h>
# include <asm/tlbflush.h>
2005-10-30 04:16:00 +03:00
static pmd_t * get_old_pmd ( struct mm_struct * mm , unsigned long addr )
2005-04-17 02:20:36 +04:00
{
pgd_t * pgd ;
pud_t * pud ;
pmd_t * pmd ;
2005-10-30 04:16:00 +03:00
/*
* We don ' t need page_table_lock : we have mmap_sem exclusively .
*/
2005-04-17 02:20:36 +04:00
pgd = pgd_offset ( mm , addr ) ;
if ( pgd_none_or_clear_bad ( pgd ) )
return NULL ;
pud = pud_offset ( pgd , addr ) ;
if ( pud_none_or_clear_bad ( pud ) )
return NULL ;
pmd = pmd_offset ( pud , addr ) ;
if ( pmd_none_or_clear_bad ( pmd ) )
return NULL ;
2005-10-30 04:16:00 +03:00
return pmd ;
2005-04-17 02:20:36 +04:00
}
2005-10-30 04:16:00 +03:00
static pmd_t * alloc_new_pmd ( struct mm_struct * mm , unsigned long addr )
2005-04-17 02:20:36 +04:00
{
pgd_t * pgd ;
pud_t * pud ;
2005-10-30 04:16:00 +03:00
pmd_t * pmd = NULL ;
pte_t * pte ;
2005-04-17 02:20:36 +04:00
2005-10-30 04:16:00 +03:00
/*
* We do need page_table_lock : because allocators expect that .
*/
spin_lock ( & mm - > page_table_lock ) ;
2005-04-17 02:20:36 +04:00
pgd = pgd_offset ( mm , addr ) ;
pud = pud_alloc ( mm , pgd , addr ) ;
if ( ! pud )
2005-10-30 04:16:00 +03:00
goto out ;
2005-04-17 02:20:36 +04:00
pmd = pmd_alloc ( mm , pud , addr ) ;
2005-10-30 04:16:00 +03:00
if ( ! pmd )
goto out ;
pte = pte_alloc_map ( mm , pmd , addr ) ;
if ( ! pte ) {
pmd = NULL ;
goto out ;
}
pte_unmap ( pte ) ;
out :
spin_unlock ( & mm - > page_table_lock ) ;
return pmd ;
2005-04-17 02:20:36 +04:00
}
2005-10-30 04:16:00 +03:00
static void move_ptes ( struct vm_area_struct * vma , pmd_t * old_pmd ,
unsigned long old_addr , unsigned long old_end ,
struct vm_area_struct * new_vma , pmd_t * new_pmd ,
unsigned long new_addr )
2005-04-17 02:20:36 +04:00
{
struct address_space * mapping = NULL ;
struct mm_struct * mm = vma - > vm_mm ;
2005-10-30 04:16:00 +03:00
pte_t * old_pte , * new_pte , pte ;
2005-04-17 02:20:36 +04:00
if ( vma - > vm_file ) {
/*
* Subtle point from Rajesh Venkatasubramanian : before
* moving file - based ptes , we must lock vmtruncate out ,
* since it might clean the dst vma before the src vma ,
* and we propagate stale pages into the dst afterward .
*/
mapping = vma - > vm_file - > f_mapping ;
spin_lock ( & mapping - > i_mmap_lock ) ;
if ( new_vma - > vm_truncate_count & &
new_vma - > vm_truncate_count ! = vma - > vm_truncate_count )
new_vma - > vm_truncate_count = 0 ;
}
2005-10-30 04:16:00 +03:00
spin_lock ( & mm - > page_table_lock ) ;
old_pte = pte_offset_map ( old_pmd , old_addr ) ;
new_pte = pte_offset_map_nested ( new_pmd , new_addr ) ;
for ( ; old_addr < old_end ; old_pte + + , old_addr + = PAGE_SIZE ,
new_pte + + , new_addr + = PAGE_SIZE ) {
if ( pte_none ( * old_pte ) )
continue ;
pte = ptep_clear_flush ( vma , old_addr , old_pte ) ;
/* ZERO_PAGE can be dependant on virtual addr */
pte = move_pte ( pte , new_vma - > vm_page_prot , old_addr , new_addr ) ;
set_pte_at ( mm , new_addr , new_pte , pte ) ;
2005-04-17 02:20:36 +04:00
}
2005-10-30 04:16:00 +03:00
pte_unmap_nested ( new_pte - 1 ) ;
pte_unmap ( old_pte - 1 ) ;
2005-04-17 02:20:36 +04:00
spin_unlock ( & mm - > page_table_lock ) ;
if ( mapping )
spin_unlock ( & mapping - > i_mmap_lock ) ;
}
2005-10-30 04:16:00 +03:00
# define LATENCY_LIMIT (64 * PAGE_SIZE)
2005-04-17 02:20:36 +04:00
static unsigned long move_page_tables ( struct vm_area_struct * vma ,
unsigned long old_addr , struct vm_area_struct * new_vma ,
unsigned long new_addr , unsigned long len )
{
2005-10-30 04:16:00 +03:00
unsigned long extent , next , old_end ;
pmd_t * old_pmd , * new_pmd ;
2005-04-17 02:20:36 +04:00
2005-10-30 04:16:00 +03:00
old_end = old_addr + len ;
flush_cache_range ( vma , old_addr , old_end ) ;
2005-04-17 02:20:36 +04:00
2005-10-30 04:16:00 +03:00
for ( ; old_addr < old_end ; old_addr + = extent , new_addr + = extent ) {
2005-04-17 02:20:36 +04:00
cond_resched ( ) ;
2005-10-30 04:16:00 +03:00
next = ( old_addr + PMD_SIZE ) & PMD_MASK ;
if ( next - 1 > old_end )
next = old_end ;
extent = next - old_addr ;
old_pmd = get_old_pmd ( vma - > vm_mm , old_addr ) ;
if ( ! old_pmd )
continue ;
new_pmd = alloc_new_pmd ( vma - > vm_mm , new_addr ) ;
if ( ! new_pmd )
break ;
next = ( new_addr + PMD_SIZE ) & PMD_MASK ;
if ( extent > next - new_addr )
extent = next - new_addr ;
if ( extent > LATENCY_LIMIT )
extent = LATENCY_LIMIT ;
move_ptes ( vma , old_pmd , old_addr , old_addr + extent ,
new_vma , new_pmd , new_addr ) ;
2005-04-17 02:20:36 +04:00
}
2005-10-30 04:16:00 +03:00
return len + old_addr - old_end ; /* how much done */
2005-04-17 02:20:36 +04:00
}
static unsigned long move_vma ( struct vm_area_struct * vma ,
unsigned long old_addr , unsigned long old_len ,
unsigned long new_len , unsigned long new_addr )
{
struct mm_struct * mm = vma - > vm_mm ;
struct vm_area_struct * new_vma ;
unsigned long vm_flags = vma - > vm_flags ;
unsigned long new_pgoff ;
unsigned long moved_len ;
unsigned long excess = 0 ;
int split = 0 ;
/*
* We ' d prefer to avoid failure later on in do_munmap :
* which may split one vma into three before unmapping .
*/
if ( mm - > map_count > = sysctl_max_map_count - 3 )
return - ENOMEM ;
new_pgoff = vma - > vm_pgoff + ( ( old_addr - vma - > vm_start ) > > PAGE_SHIFT ) ;
new_vma = copy_vma ( & vma , new_addr , new_len , new_pgoff ) ;
if ( ! new_vma )
return - ENOMEM ;
moved_len = move_page_tables ( vma , old_addr , new_vma , new_addr , old_len ) ;
if ( moved_len < old_len ) {
/*
* On error , move entries back from new area to old ,
* which will succeed since page tables still there ,
* and then proceed to unmap new area instead of old .
*/
move_page_tables ( new_vma , new_addr , vma , old_addr , moved_len ) ;
vma = new_vma ;
old_len = new_len ;
old_addr = new_addr ;
new_addr = - ENOMEM ;
}
/* Conceal VM_ACCOUNT so old reservation is not undone */
if ( vm_flags & VM_ACCOUNT ) {
vma - > vm_flags & = ~ VM_ACCOUNT ;
excess = vma - > vm_end - vma - > vm_start - old_len ;
if ( old_addr > vma - > vm_start & &
old_addr + old_len < vma - > vm_end )
split = 1 ;
}
2005-05-17 08:53:18 +04:00
/*
* if we failed to move page tables we still do total_vm increment
* since do_munmap ( ) will decrement it by old_len = = new_len
*/
mm - > total_vm + = new_len > > PAGE_SHIFT ;
2005-10-30 04:15:56 +03:00
vm_stat_account ( mm , vma - > vm_flags , vma - > vm_file , new_len > > PAGE_SHIFT ) ;
2005-05-17 08:53:18 +04:00
2005-04-17 02:20:36 +04:00
if ( do_munmap ( mm , old_addr , old_len ) < 0 ) {
/* OOM: unable to split vma, just get accounts right */
vm_unacct_memory ( excess > > PAGE_SHIFT ) ;
excess = 0 ;
}
/* Restore VM_ACCOUNT if one or two pieces of vma left */
if ( excess ) {
vma - > vm_flags | = VM_ACCOUNT ;
if ( split )
vma - > vm_next - > vm_flags | = VM_ACCOUNT ;
}
if ( vm_flags & VM_LOCKED ) {
mm - > locked_vm + = new_len > > PAGE_SHIFT ;
if ( new_len > old_len )
make_pages_present ( new_addr + old_len ,
new_addr + new_len ) ;
}
return new_addr ;
}
/*
* Expand ( or shrink ) an existing mapping , potentially moving it at the
* same time ( controlled by the MREMAP_MAYMOVE flag and available VM space )
*
* MREMAP_FIXED option added 5 - Dec - 1999 by Benjamin LaHaise
* This option implies MREMAP_MAYMOVE .
*/
unsigned long do_mremap ( unsigned long addr ,
unsigned long old_len , unsigned long new_len ,
unsigned long flags , unsigned long new_addr )
{
struct vm_area_struct * vma ;
unsigned long ret = - EINVAL ;
unsigned long charged = 0 ;
if ( flags & ~ ( MREMAP_FIXED | MREMAP_MAYMOVE ) )
goto out ;
if ( addr & ~ PAGE_MASK )
goto out ;
old_len = PAGE_ALIGN ( old_len ) ;
new_len = PAGE_ALIGN ( new_len ) ;
/*
* We allow a zero old - len as a special case
* for DOS - emu " duplicate shm area " thing . But
* a zero new - len is nonsensical .
*/
if ( ! new_len )
goto out ;
/* new_addr is only valid if MREMAP_FIXED is specified */
if ( flags & MREMAP_FIXED ) {
if ( new_addr & ~ PAGE_MASK )
goto out ;
if ( ! ( flags & MREMAP_MAYMOVE ) )
goto out ;
if ( new_len > TASK_SIZE | | new_addr > TASK_SIZE - new_len )
goto out ;
/* Check if the location we're moving into overlaps the
* old location at all , and fail if it does .
*/
if ( ( new_addr < = addr ) & & ( new_addr + new_len ) > addr )
goto out ;
if ( ( addr < = new_addr ) & & ( addr + old_len ) > new_addr )
goto out ;
ret = do_munmap ( current - > mm , new_addr , new_len ) ;
if ( ret )
goto out ;
}
/*
* Always allow a shrinking remap : that just unmaps
* the unnecessary pages . .
* do_munmap does all the needed commit accounting
*/
if ( old_len > = new_len ) {
ret = do_munmap ( current - > mm , addr + new_len , old_len - new_len ) ;
if ( ret & & old_len ! = new_len )
goto out ;
ret = addr ;
if ( ! ( flags & MREMAP_FIXED ) | | ( new_addr = = addr ) )
goto out ;
old_len = new_len ;
}
/*
* Ok , we need to grow . . or relocate .
*/
ret = - EFAULT ;
vma = find_vma ( current - > mm , addr ) ;
if ( ! vma | | vma - > vm_start > addr )
goto out ;
if ( is_vm_hugetlb_page ( vma ) ) {
ret = - EINVAL ;
goto out ;
}
/* We can't remap across vm area boundaries */
if ( old_len > vma - > vm_end - addr )
goto out ;
if ( vma - > vm_flags & VM_DONTEXPAND ) {
if ( new_len > old_len )
goto out ;
}
if ( vma - > vm_flags & VM_LOCKED ) {
unsigned long locked , lock_limit ;
locked = current - > mm - > locked_vm < < PAGE_SHIFT ;
lock_limit = current - > signal - > rlim [ RLIMIT_MEMLOCK ] . rlim_cur ;
locked + = new_len - old_len ;
ret = - EAGAIN ;
if ( locked > lock_limit & & ! capable ( CAP_IPC_LOCK ) )
goto out ;
}
2005-05-01 19:58:35 +04:00
if ( ! may_expand_vm ( current - > mm , ( new_len - old_len ) > > PAGE_SHIFT ) ) {
ret = - ENOMEM ;
2005-04-17 02:20:36 +04:00
goto out ;
2005-05-01 19:58:35 +04:00
}
2005-04-17 02:20:36 +04:00
if ( vma - > vm_flags & VM_ACCOUNT ) {
charged = ( new_len - old_len ) > > PAGE_SHIFT ;
if ( security_vm_enough_memory ( charged ) )
goto out_nc ;
}
/* old_len exactly to the end of the area..
* And we ' re not relocating the area .
*/
if ( old_len = = vma - > vm_end - addr & &
! ( ( flags & MREMAP_FIXED ) & & ( addr ! = new_addr ) ) & &
( old_len ! = new_len | | ! ( flags & MREMAP_MAYMOVE ) ) ) {
unsigned long max_addr = TASK_SIZE ;
if ( vma - > vm_next )
max_addr = vma - > vm_next - > vm_start ;
/* can we just expand the current mapping? */
if ( max_addr - addr > = new_len ) {
int pages = ( new_len - old_len ) > > PAGE_SHIFT ;
vma_adjust ( vma , vma - > vm_start ,
addr + new_len , vma - > vm_pgoff , NULL ) ;
current - > mm - > total_vm + = pages ;
2005-10-30 04:15:56 +03:00
vm_stat_account ( vma - > vm_mm , vma - > vm_flags ,
2005-04-17 02:20:36 +04:00
vma - > vm_file , pages ) ;
if ( vma - > vm_flags & VM_LOCKED ) {
current - > mm - > locked_vm + = pages ;
make_pages_present ( addr + old_len ,
addr + new_len ) ;
}
ret = addr ;
goto out ;
}
}
/*
* We weren ' t able to just expand or shrink the area ,
* we need to create a new one and move it . .
*/
ret = - ENOMEM ;
if ( flags & MREMAP_MAYMOVE ) {
if ( ! ( flags & MREMAP_FIXED ) ) {
unsigned long map_flags = 0 ;
if ( vma - > vm_flags & VM_MAYSHARE )
map_flags | = MAP_SHARED ;
new_addr = get_unmapped_area ( vma - > vm_file , 0 , new_len ,
vma - > vm_pgoff , map_flags ) ;
ret = new_addr ;
if ( new_addr & ~ PAGE_MASK )
goto out ;
}
ret = move_vma ( vma , addr , old_len , new_len , new_addr ) ;
}
out :
if ( ret & ~ PAGE_MASK )
vm_unacct_memory ( charged ) ;
out_nc :
return ret ;
}
asmlinkage unsigned long sys_mremap ( unsigned long addr ,
unsigned long old_len , unsigned long new_len ,
unsigned long flags , unsigned long new_addr )
{
unsigned long ret ;
down_write ( & current - > mm - > mmap_sem ) ;
ret = do_mremap ( addr , old_len , new_len , flags , new_addr ) ;
up_write ( & current - > mm - > mmap_sem ) ;
return ret ;
}