2005-04-16 15:20:36 -07:00
/*
* linux / mm / msync . c
*
* Copyright ( C ) 1994 - 1999 Linus Torvalds
*/
/*
* The msync ( ) system call .
*/
# include <linux/slab.h>
# include <linux/pagemap.h>
# include <linux/mm.h>
# include <linux/mman.h>
# include <linux/hugetlb.h>
# include <linux/syscalls.h>
# include <asm/pgtable.h>
# include <asm/tlbflush.h>
2005-10-29 18:15:50 -07:00
static void msync_pte_range ( struct vm_area_struct * vma , pmd_t * pmd ,
2005-04-16 15:20:36 -07:00
unsigned long addr , unsigned long end )
{
pte_t * pte ;
2005-10-29 18:16:27 -07:00
spinlock_t * ptl ;
2005-10-29 18:15:53 -07:00
int progress = 0 ;
2005-04-16 15:20:36 -07:00
2005-10-29 18:15:53 -07:00
again :
2005-10-29 18:16:27 -07:00
pte = pte_offset_map_lock ( vma - > vm_mm , pmd , addr , & ptl ) ;
2005-04-16 15:20:36 -07:00
do {
struct page * page ;
2005-10-29 18:15:53 -07:00
if ( progress > = 64 ) {
progress = 0 ;
2005-10-29 18:16:27 -07:00
if ( need_resched ( ) | | need_lockbreak ( ptl ) )
2005-10-29 18:15:53 -07:00
break ;
}
progress + + ;
2005-04-16 15:20:36 -07:00
if ( ! pte_present ( * pte ) )
continue ;
2005-06-21 17:15:13 -07:00
if ( ! pte_maybe_dirty ( * pte ) )
continue ;
2005-11-28 14:34:23 -08:00
page = vm_normal_page ( vma , addr , * pte ) ;
if ( ! page )
2005-04-16 15:20:36 -07:00
continue ;
if ( ptep_clear_flush_dirty ( vma , addr , pte ) | |
page_test_and_clear_dirty ( page ) )
set_page_dirty ( page ) ;
2005-10-29 18:15:53 -07:00
progress + = 3 ;
2005-04-16 15:20:36 -07:00
} while ( pte + + , addr + = PAGE_SIZE , addr ! = end ) ;
2005-10-29 18:16:27 -07:00
pte_unmap_unlock ( pte - 1 , ptl ) ;
cond_resched ( ) ;
2005-10-29 18:15:53 -07:00
if ( addr ! = end )
goto again ;
2005-04-16 15:20:36 -07:00
}
2005-10-29 18:15:50 -07:00
static inline void msync_pmd_range ( struct vm_area_struct * vma , pud_t * pud ,
2005-04-16 15:20:36 -07:00
unsigned long addr , unsigned long end )
{
pmd_t * pmd ;
unsigned long next ;
pmd = pmd_offset ( pud , addr ) ;
do {
next = pmd_addr_end ( addr , end ) ;
if ( pmd_none_or_clear_bad ( pmd ) )
continue ;
2005-10-29 18:15:50 -07:00
msync_pte_range ( vma , pmd , addr , next ) ;
2005-04-16 15:20:36 -07:00
} while ( pmd + + , addr = next , addr ! = end ) ;
}
2005-10-29 18:15:50 -07:00
static inline void msync_pud_range ( struct vm_area_struct * vma , pgd_t * pgd ,
2005-04-16 15:20:36 -07:00
unsigned long addr , unsigned long end )
{
pud_t * pud ;
unsigned long next ;
pud = pud_offset ( pgd , addr ) ;
do {
next = pud_addr_end ( addr , end ) ;
if ( pud_none_or_clear_bad ( pud ) )
continue ;
2005-10-29 18:15:50 -07:00
msync_pmd_range ( vma , pud , addr , next ) ;
2005-04-16 15:20:36 -07:00
} while ( pud + + , addr = next , addr ! = end ) ;
}
2005-10-29 18:15:50 -07:00
static void msync_page_range ( struct vm_area_struct * vma ,
2005-04-16 15:20:36 -07:00
unsigned long addr , unsigned long end )
{
pgd_t * pgd ;
unsigned long next ;
/* For hugepages we can't go walking the page table normally,
* but that ' s ok , hugetlbfs is memory based , so we don ' t need
2005-10-29 18:16:12 -07:00
* to do anything more on an msync ( ) .
*/
2005-11-28 14:34:23 -08:00
if ( vma - > vm_flags & VM_HUGETLB )
2005-04-16 15:20:36 -07:00
return ;
BUG_ON ( addr > = end ) ;
2005-10-29 18:16:27 -07:00
pgd = pgd_offset ( vma - > vm_mm , addr ) ;
2005-04-16 15:20:36 -07:00
flush_cache_range ( vma , addr , end ) ;
do {
next = pgd_addr_end ( addr , end ) ;
if ( pgd_none_or_clear_bad ( pgd ) )
continue ;
2005-10-29 18:15:50 -07:00
msync_pud_range ( vma , pgd , addr , next ) ;
2005-04-16 15:20:36 -07:00
} while ( pgd + + , addr = next , addr ! = end ) ;
}
/*
* MS_SYNC syncs the entire file - including mappings .
*
* MS_ASYNC does not start I / O ( it used to , up to 2.5 .67 ) . Instead , it just
* marks the relevant pages dirty . The application may now run fsync ( ) to
* write out the dirty pages and wait on the writeout and check the result .
* Or the application may run fadvise ( FADV_DONTNEED ) against the fd to start
* async writeout immediately .
* So my _not_ starting I / O in MS_ASYNC we provide complete flexibility to
* applications .
*/
static int msync_interval ( struct vm_area_struct * vma ,
unsigned long addr , unsigned long end , int flags )
{
int ret = 0 ;
struct file * file = vma - > vm_file ;
if ( ( flags & MS_INVALIDATE ) & & ( vma - > vm_flags & VM_LOCKED ) )
return - EBUSY ;
if ( file & & ( vma - > vm_flags & VM_SHARED ) ) {
2005-10-29 18:15:53 -07:00
msync_page_range ( vma , addr , end ) ;
2005-04-16 15:20:36 -07:00
if ( flags & MS_SYNC ) {
struct address_space * mapping = file - > f_mapping ;
int err ;
ret = filemap_fdatawrite ( mapping ) ;
if ( file - > f_op & & file - > f_op - > fsync ) {
/*
2006-01-09 15:59:24 -08:00
* We don ' t take i_mutex here because mmap_sem
2005-04-16 15:20:36 -07:00
* is already held .
*/
err = file - > f_op - > fsync ( file , file - > f_dentry , 1 ) ;
if ( err & & ! ret )
ret = err ;
}
err = filemap_fdatawait ( mapping ) ;
if ( ! ret )
ret = err ;
}
}
return ret ;
}
asmlinkage long sys_msync ( unsigned long start , size_t len , int flags )
{
unsigned long end ;
struct vm_area_struct * vma ;
int unmapped_error , error = - EINVAL ;
if ( flags & MS_SYNC )
current - > flags | = PF_SYNCWRITE ;
down_read ( & current - > mm - > mmap_sem ) ;
if ( flags & ~ ( MS_ASYNC | MS_INVALIDATE | MS_SYNC ) )
goto out ;
if ( start & ~ PAGE_MASK )
goto out ;
if ( ( flags & MS_ASYNC ) & & ( flags & MS_SYNC ) )
goto out ;
error = - ENOMEM ;
len = ( len + ~ PAGE_MASK ) & PAGE_MASK ;
end = start + len ;
if ( end < start )
goto out ;
error = 0 ;
if ( end = = start )
goto out ;
/*
* If the interval [ start , end ) covers some unmapped address ranges ,
* just ignore them , but return - ENOMEM at the end .
*/
vma = find_vma ( current - > mm , start ) ;
unmapped_error = 0 ;
for ( ; ; ) {
/* Still start < end. */
error = - ENOMEM ;
if ( ! vma )
goto out ;
/* Here start < vma->vm_end. */
if ( start < vma - > vm_start ) {
unmapped_error = - ENOMEM ;
start = vma - > vm_start ;
}
/* Here vma->vm_start <= start < vma->vm_end. */
if ( end < = vma - > vm_end ) {
if ( start < end ) {
error = msync_interval ( vma , start , end , flags ) ;
if ( error )
goto out ;
}
error = unmapped_error ;
goto out ;
}
/* Here vma->vm_start <= start < vma->vm_end < end. */
error = msync_interval ( vma , start , vma - > vm_end , flags ) ;
if ( error )
goto out ;
start = vma - > vm_end ;
vma = vma - > vm_next ;
}
out :
up_read ( & current - > mm - > mmap_sem ) ;
current - > flags & = ~ PF_SYNCWRITE ;
return error ;
}