2005-04-17 02:20:36 +04:00
/*
* linux / mm / mincore . c
*
2006-12-16 20:44:32 +03:00
* Copyright ( C ) 1994 - 2006 Linus Torvalds
2005-04-17 02:20:36 +04:00
*/
/*
* The mincore ( ) system call .
*/
# include <linux/slab.h>
# include <linux/pagemap.h>
# include <linux/mm.h>
# include <linux/mman.h>
# include <linux/syscalls.h>
2007-02-12 11:51:39 +03:00
# include <linux/swap.h>
# include <linux/swapops.h>
2005-04-17 02:20:36 +04:00
# include <asm/uaccess.h>
# include <asm/pgtable.h>
/*
* Later we can get more picky about what " in core " means precisely .
* For now , simply check to see if the page is in the page cache ,
* and is up to date ; i . e . that no page - in operation would be required
* at this time if an application were to map and access this page .
*/
2007-02-12 11:51:39 +03:00
static unsigned char mincore_page ( struct address_space * mapping , pgoff_t pgoff )
2005-04-17 02:20:36 +04:00
{
unsigned char present = 0 ;
2007-02-12 11:51:39 +03:00
struct page * page ;
2005-04-17 02:20:36 +04:00
2007-02-12 11:51:39 +03:00
/*
* When tmpfs swaps out a page from a file , any process mapping that
* file will not get a swp_entry_t in its pte , but rather it is like
* any other file mapping ( ie . marked ! present and faulted in with
2008-04-28 13:12:10 +04:00
* tmpfs ' s . fault ) . So swapped out tmpfs mappings are tested here .
2007-02-12 11:51:39 +03:00
*
* However when tmpfs moves the page from pagecache and into swapcache ,
* it is still in core , but the find_get_page below won ' t find it .
* No big deal , but make a note of it .
*/
page = find_get_page ( mapping , pgoff ) ;
2005-04-17 02:20:36 +04:00
if ( page ) {
present = PageUptodate ( page ) ;
page_cache_release ( page ) ;
}
return present ;
}
2006-12-16 20:44:32 +03:00
/*
* Do a chunk of " sys_mincore() " . We ' ve already checked
* all the arguments , we hold the mmap semaphore : we should
* just return the amount of info we ' re asked for .
*/
static long do_mincore ( unsigned long addr , unsigned char * vec , unsigned long pages )
2005-04-17 02:20:36 +04:00
{
2007-02-12 11:51:39 +03:00
pgd_t * pgd ;
pud_t * pud ;
pmd_t * pmd ;
pte_t * ptep ;
spinlock_t * ptl ;
unsigned long nr ;
int i ;
pgoff_t pgoff ;
2006-12-16 20:44:32 +03:00
struct vm_area_struct * vma = find_vma ( current - > mm , addr ) ;
2005-04-17 02:20:36 +04:00
2006-12-16 20:44:32 +03:00
/*
2006-12-17 03:01:50 +03:00
* find_vma ( ) didn ' t find anything above us , or we ' re
* in an unmapped hole in the address space : ENOMEM .
2006-12-16 20:44:32 +03:00
*/
2006-12-17 03:01:50 +03:00
if ( ! vma | | addr < vma - > vm_start )
return - ENOMEM ;
2005-04-17 02:20:36 +04:00
2006-12-16 20:44:32 +03:00
/*
2007-02-12 11:51:39 +03:00
* Calculate how many pages there are left in the last level of the
* PTE array for our address .
2006-12-16 20:44:32 +03:00
*/
2007-02-12 11:51:39 +03:00
nr = PTRS_PER_PTE - ( ( addr > > PAGE_SHIFT ) & ( PTRS_PER_PTE - 1 ) ) ;
2007-02-14 14:39:01 +03:00
/*
* Don ' t overrun this vma
*/
nr = min ( nr , ( vma - > vm_end - addr ) > > PAGE_SHIFT ) ;
/*
* Don ' t return more than the caller asked for
*/
nr = min ( nr , pages ) ;
2005-04-17 02:20:36 +04:00
2007-02-12 11:51:39 +03:00
pgd = pgd_offset ( vma - > vm_mm , addr ) ;
if ( pgd_none_or_clear_bad ( pgd ) )
goto none_mapped ;
pud = pud_offset ( pgd , addr ) ;
if ( pud_none_or_clear_bad ( pud ) )
goto none_mapped ;
pmd = pmd_offset ( pud , addr ) ;
if ( pmd_none_or_clear_bad ( pmd ) )
goto none_mapped ;
ptep = pte_offset_map_lock ( vma - > vm_mm , pmd , addr , & ptl ) ;
for ( i = 0 ; i < nr ; i + + , ptep + + , addr + = PAGE_SIZE ) {
unsigned char present ;
pte_t pte = * ptep ;
2005-04-17 02:20:36 +04:00
2007-02-12 11:51:39 +03:00
if ( pte_present ( pte ) ) {
present = 1 ;
} else if ( pte_none ( pte ) ) {
if ( vma - > vm_file ) {
pgoff = linear_page_index ( vma , addr ) ;
present = mincore_page ( vma - > vm_file - > f_mapping ,
pgoff ) ;
} else
present = 0 ;
} else if ( pte_file ( pte ) ) {
pgoff = pte_to_pgoff ( pte ) ;
present = mincore_page ( vma - > vm_file - > f_mapping , pgoff ) ;
} else { /* pte is a swap entry */
swp_entry_t entry = pte_to_swp_entry ( pte ) ;
if ( is_migration_entry ( entry ) ) {
/* migration entries are always uptodate */
present = 1 ;
} else {
2007-02-14 14:35:02 +03:00
# ifdef CONFIG_SWAP
2007-02-12 11:51:39 +03:00
pgoff = entry . val ;
present = mincore_page ( & swapper_space , pgoff ) ;
2007-02-14 14:35:02 +03:00
# else
WARN_ON ( 1 ) ;
present = 1 ;
# endif
2007-02-12 11:51:39 +03:00
}
}
2007-02-14 14:36:32 +03:00
vec [ i ] = present ;
2007-02-12 11:51:39 +03:00
}
pte_unmap_unlock ( ptep - 1 , ptl ) ;
return nr ;
none_mapped :
if ( vma - > vm_file ) {
pgoff = linear_page_index ( vma , addr ) ;
for ( i = 0 ; i < nr ; i + + , pgoff + + )
vec [ i ] = mincore_page ( vma - > vm_file - > f_mapping , pgoff ) ;
2007-02-14 14:36:32 +03:00
} else {
for ( i = 0 ; i < nr ; i + + )
vec [ i ] = 0 ;
2007-02-12 11:51:39 +03:00
}
2005-04-17 02:20:36 +04:00
2006-12-16 20:44:32 +03:00
return nr ;
2005-04-17 02:20:36 +04:00
}
/*
* The mincore ( 2 ) system call .
*
* mincore ( ) returns the memory residency status of the pages in the
* current process ' s address space specified by [ addr , addr + len ) .
* The status is returned in a vector of bytes . The least significant
* bit of each byte is 1 if the referenced page is in memory , otherwise
* it is zero .
*
* Because the status of a page can change after mincore ( ) checks it
* but before it returns to the application , the returned vector may
* contain stale information . Only locked pages are guaranteed to
* remain in memory .
*
* return values :
* zero - success
* - EFAULT - vec points to an illegal address
* - EINVAL - addr is not a multiple of PAGE_CACHE_SIZE
* - ENOMEM - Addresses in the range [ addr , addr + len ] are
* invalid for the address space of this process , or
* specify one or more pages which are not currently
* mapped
* - EAGAIN - A kernel resource was temporarily unavailable .
*/
2009-01-14 16:14:16 +03:00
SYSCALL_DEFINE3 ( mincore , unsigned long , start , size_t , len ,
unsigned char __user * , vec )
2005-04-17 02:20:36 +04:00
{
2006-12-16 20:44:32 +03:00
long retval ;
unsigned long pages ;
unsigned char * tmp ;
2005-04-17 02:20:36 +04:00
2006-12-16 20:44:32 +03:00
/* Check the start address: needs to be page-aligned.. */
if ( start & ~ PAGE_CACHE_MASK )
return - EINVAL ;
2005-04-17 02:20:36 +04:00
2006-12-16 20:44:32 +03:00
/* ..and we need to be passed a valid user-space range */
if ( ! access_ok ( VERIFY_READ , ( void __user * ) start , len ) )
return - ENOMEM ;
2005-04-17 02:20:36 +04:00
2006-12-16 20:44:32 +03:00
/* This also avoids any overflows on PAGE_CACHE_ALIGN */
pages = len > > PAGE_SHIFT ;
pages + = ( len & ~ PAGE_MASK ) ! = 0 ;
2005-04-17 02:20:36 +04:00
2006-12-16 20:44:32 +03:00
if ( ! access_ok ( VERIFY_WRITE , vec , pages ) )
return - EFAULT ;
2005-04-17 02:20:36 +04:00
2006-12-16 20:44:32 +03:00
tmp = ( void * ) __get_free_page ( GFP_USER ) ;
if ( ! tmp )
2006-12-17 03:01:50 +03:00
return - EAGAIN ;
2006-12-16 20:44:32 +03:00
retval = 0 ;
while ( pages ) {
/*
* Do at most PAGE_SIZE entries per iteration , due to
* the temporary buffer size .
*/
down_read ( & current - > mm - > mmap_sem ) ;
2006-12-17 18:52:47 +03:00
retval = do_mincore ( start , tmp , min ( pages , PAGE_SIZE ) ) ;
2006-12-16 20:44:32 +03:00
up_read ( & current - > mm - > mmap_sem ) ;
if ( retval < = 0 )
break ;
if ( copy_to_user ( vec , tmp , retval ) ) {
retval = - EFAULT ;
break ;
2005-04-17 02:20:36 +04:00
}
2006-12-16 20:44:32 +03:00
pages - = retval ;
vec + = retval ;
start + = retval < < PAGE_SHIFT ;
retval = 0 ;
2005-04-17 02:20:36 +04:00
}
2006-12-16 20:44:32 +03:00
free_page ( ( unsigned long ) tmp ) ;
return retval ;
2005-04-17 02:20:36 +04:00
}