2005-04-17 02:20:36 +04:00
/*
* linux / mm / msync . c
*
* Copyright ( C ) 1994 - 1999 Linus Torvalds
*/
/*
* The msync ( ) system call .
*/
# include <linux/slab.h>
# include <linux/pagemap.h>
2006-03-24 14:18:15 +03:00
# include <linux/fs.h>
2005-04-17 02:20:36 +04:00
# include <linux/mm.h>
# include <linux/mman.h>
# include <linux/hugetlb.h>
2006-03-24 14:18:12 +03:00
# include <linux/writeback.h>
# include <linux/file.h>
2005-04-17 02:20:36 +04:00
# include <linux/syscalls.h>
# include <asm/pgtable.h>
# include <asm/tlbflush.h>
2006-03-24 14:18:12 +03:00
static unsigned long msync_pte_range ( struct vm_area_struct * vma , pmd_t * pmd ,
2005-04-17 02:20:36 +04:00
unsigned long addr , unsigned long end )
{
pte_t * pte ;
2005-10-30 04:16:27 +03:00
spinlock_t * ptl ;
2005-10-30 04:15:53 +03:00
int progress = 0 ;
2006-03-24 14:18:12 +03:00
unsigned long ret = 0 ;
2005-04-17 02:20:36 +04:00
2005-10-30 04:15:53 +03:00
again :
2005-10-30 04:16:27 +03:00
pte = pte_offset_map_lock ( vma - > vm_mm , pmd , addr , & ptl ) ;
2005-04-17 02:20:36 +04:00
do {
struct page * page ;
2005-10-30 04:15:53 +03:00
if ( progress > = 64 ) {
progress = 0 ;
2005-10-30 04:16:27 +03:00
if ( need_resched ( ) | | need_lockbreak ( ptl ) )
2005-10-30 04:15:53 +03:00
break ;
}
progress + + ;
2005-04-17 02:20:36 +04:00
if ( ! pte_present ( * pte ) )
continue ;
2005-06-22 04:15:13 +04:00
if ( ! pte_maybe_dirty ( * pte ) )
continue ;
2005-11-29 01:34:23 +03:00
page = vm_normal_page ( vma , addr , * pte ) ;
if ( ! page )
2005-04-17 02:20:36 +04:00
continue ;
if ( ptep_clear_flush_dirty ( vma , addr , pte ) | |
2006-03-24 14:18:12 +03:00
page_test_and_clear_dirty ( page ) )
ret + = set_page_dirty ( page ) ;
2005-10-30 04:15:53 +03:00
progress + = 3 ;
2005-04-17 02:20:36 +04:00
} while ( pte + + , addr + = PAGE_SIZE , addr ! = end ) ;
2005-10-30 04:16:27 +03:00
pte_unmap_unlock ( pte - 1 , ptl ) ;
cond_resched ( ) ;
2005-10-30 04:15:53 +03:00
if ( addr ! = end )
goto again ;
2006-03-24 14:18:12 +03:00
return ret ;
2005-04-17 02:20:36 +04:00
}
2006-03-24 14:18:12 +03:00
static inline unsigned long msync_pmd_range ( struct vm_area_struct * vma ,
pud_t * pud , unsigned long addr , unsigned long end )
2005-04-17 02:20:36 +04:00
{
pmd_t * pmd ;
unsigned long next ;
2006-03-24 14:18:12 +03:00
unsigned long ret = 0 ;
2005-04-17 02:20:36 +04:00
pmd = pmd_offset ( pud , addr ) ;
do {
next = pmd_addr_end ( addr , end ) ;
if ( pmd_none_or_clear_bad ( pmd ) )
continue ;
2006-03-24 14:18:12 +03:00
ret + = msync_pte_range ( vma , pmd , addr , next ) ;
2005-04-17 02:20:36 +04:00
} while ( pmd + + , addr = next , addr ! = end ) ;
2006-03-24 14:18:12 +03:00
return ret ;
2005-04-17 02:20:36 +04:00
}
2006-03-24 14:18:12 +03:00
static inline unsigned long msync_pud_range ( struct vm_area_struct * vma ,
pgd_t * pgd , unsigned long addr , unsigned long end )
2005-04-17 02:20:36 +04:00
{
pud_t * pud ;
unsigned long next ;
2006-03-24 14:18:12 +03:00
unsigned long ret = 0 ;
2005-04-17 02:20:36 +04:00
pud = pud_offset ( pgd , addr ) ;
do {
next = pud_addr_end ( addr , end ) ;
if ( pud_none_or_clear_bad ( pud ) )
continue ;
2006-03-24 14:18:12 +03:00
ret + = msync_pmd_range ( vma , pud , addr , next ) ;
2005-04-17 02:20:36 +04:00
} while ( pud + + , addr = next , addr ! = end ) ;
2006-03-24 14:18:12 +03:00
return ret ;
2005-04-17 02:20:36 +04:00
}
2006-03-24 14:18:12 +03:00
static unsigned long msync_page_range ( struct vm_area_struct * vma ,
2005-04-17 02:20:36 +04:00
unsigned long addr , unsigned long end )
{
pgd_t * pgd ;
unsigned long next ;
2006-03-24 14:18:12 +03:00
unsigned long ret = 0 ;
2005-04-17 02:20:36 +04:00
/* For hugepages we can't go walking the page table normally,
* but that ' s ok , hugetlbfs is memory based , so we don ' t need
2005-10-30 04:16:12 +03:00
* to do anything more on an msync ( ) .
*/
2005-11-29 01:34:23 +03:00
if ( vma - > vm_flags & VM_HUGETLB )
2006-03-24 14:18:12 +03:00
return 0 ;
2005-04-17 02:20:36 +04:00
BUG_ON ( addr > = end ) ;
2005-10-30 04:16:27 +03:00
pgd = pgd_offset ( vma - > vm_mm , addr ) ;
2005-04-17 02:20:36 +04:00
flush_cache_range ( vma , addr , end ) ;
do {
next = pgd_addr_end ( addr , end ) ;
if ( pgd_none_or_clear_bad ( pgd ) )
continue ;
2006-03-24 14:18:12 +03:00
ret + = msync_pud_range ( vma , pgd , addr , next ) ;
2005-04-17 02:20:36 +04:00
} while ( pgd + + , addr = next , addr ! = end ) ;
2006-03-24 14:18:12 +03:00
return ret ;
2005-04-17 02:20:36 +04:00
}
/*
* MS_SYNC syncs the entire file - including mappings .
*
* MS_ASYNC does not start I / O ( it used to , up to 2.5 .67 ) . Instead , it just
* marks the relevant pages dirty . The application may now run fsync ( ) to
* write out the dirty pages and wait on the writeout and check the result .
* Or the application may run fadvise ( FADV_DONTNEED ) against the fd to start
* async writeout immediately .
2006-03-24 20:30:53 +03:00
* So by _not_ starting I / O in MS_ASYNC we provide complete flexibility to
2005-04-17 02:20:36 +04:00
* applications .
*/
2006-03-24 14:18:12 +03:00
static int msync_interval ( struct vm_area_struct * vma , unsigned long addr ,
unsigned long end , int flags ,
unsigned long * nr_pages_dirtied )
2005-04-17 02:20:36 +04:00
{
struct file * file = vma - > vm_file ;
if ( ( flags & MS_INVALIDATE ) & & ( vma - > vm_flags & VM_LOCKED ) )
return - EBUSY ;
2006-03-24 14:18:13 +03:00
if ( file & & ( vma - > vm_flags & VM_SHARED ) )
2006-03-24 14:18:12 +03:00
* nr_pages_dirtied = msync_page_range ( vma , addr , end ) ;
2006-03-24 14:18:13 +03:00
return 0 ;
2005-04-17 02:20:36 +04:00
}
asmlinkage long sys_msync ( unsigned long start , size_t len , int flags )
{
unsigned long end ;
struct vm_area_struct * vma ;
2006-03-24 14:18:14 +03:00
int unmapped_error = 0 ;
int error = - EINVAL ;
2006-03-24 14:18:12 +03:00
int done = 0 ;
2005-04-17 02:20:36 +04:00
if ( flags & ~ ( MS_ASYNC | MS_INVALIDATE | MS_SYNC ) )
goto out ;
if ( start & ~ PAGE_MASK )
goto out ;
if ( ( flags & MS_ASYNC ) & & ( flags & MS_SYNC ) )
goto out ;
error = - ENOMEM ;
len = ( len + ~ PAGE_MASK ) & PAGE_MASK ;
end = start + len ;
if ( end < start )
goto out ;
error = 0 ;
if ( end = = start )
goto out ;
/*
* If the interval [ start , end ) covers some unmapped address ranges ,
* just ignore them , but return - ENOMEM at the end .
*/
2006-03-24 14:18:12 +03:00
down_read ( & current - > mm - > mmap_sem ) ;
if ( flags & MS_SYNC )
current - > flags | = PF_SYNCWRITE ;
2005-04-17 02:20:36 +04:00
vma = find_vma ( current - > mm , start ) ;
2006-03-24 14:18:14 +03:00
if ( ! vma ) {
error = - ENOMEM ;
goto out_unlock ;
}
2006-03-24 14:18:12 +03:00
do {
unsigned long nr_pages_dirtied = 0 ;
struct file * file ;
2005-04-17 02:20:36 +04:00
/* Here start < vma->vm_end. */
if ( start < vma - > vm_start ) {
unmapped_error = - ENOMEM ;
start = vma - > vm_start ;
}
/* Here vma->vm_start <= start < vma->vm_end. */
if ( end < = vma - > vm_end ) {
if ( start < end ) {
2006-03-24 14:18:12 +03:00
error = msync_interval ( vma , start , end , flags ,
& nr_pages_dirtied ) ;
2005-04-17 02:20:36 +04:00
if ( error )
2006-03-24 14:18:12 +03:00
goto out_unlock ;
2005-04-17 02:20:36 +04:00
}
error = unmapped_error ;
2006-03-24 14:18:12 +03:00
done = 1 ;
} else {
/* Here vma->vm_start <= start < vma->vm_end < end. */
error = msync_interval ( vma , start , vma - > vm_end , flags ,
& nr_pages_dirtied ) ;
if ( error )
goto out_unlock ;
2005-04-17 02:20:36 +04:00
}
2006-03-24 14:18:12 +03:00
file = vma - > vm_file ;
2005-04-17 02:20:36 +04:00
start = vma - > vm_end ;
2006-03-24 14:18:12 +03:00
if ( ( flags & MS_ASYNC ) & & file & & nr_pages_dirtied ) {
get_file ( file ) ;
up_read ( & current - > mm - > mmap_sem ) ;
balance_dirty_pages_ratelimited_nr ( file - > f_mapping ,
nr_pages_dirtied ) ;
fput ( file ) ;
down_read ( & current - > mm - > mmap_sem ) ;
vma = find_vma ( current - > mm , start ) ;
2006-03-24 14:18:13 +03:00
} else if ( ( flags & MS_SYNC ) & & file & &
( vma - > vm_flags & VM_SHARED ) ) {
get_file ( file ) ;
up_read ( & current - > mm - > mmap_sem ) ;
2006-03-24 14:18:15 +03:00
error = do_fsync ( file , 0 ) ;
2006-03-24 14:18:13 +03:00
fput ( file ) ;
down_read ( & current - > mm - > mmap_sem ) ;
if ( error )
goto out_unlock ;
vma = find_vma ( current - > mm , start ) ;
2006-03-24 14:18:12 +03:00
} else {
vma = vma - > vm_next ;
}
2006-03-24 14:18:14 +03:00
} while ( vma & & ! done ) ;
2006-03-24 14:18:12 +03:00
out_unlock :
2005-04-17 02:20:36 +04:00
current - > flags & = ~ PF_SYNCWRITE ;
2006-03-24 14:18:12 +03:00
up_read ( & current - > mm - > mmap_sem ) ;
out :
2005-04-17 02:20:36 +04:00
return error ;
}