2005-06-23 22:05:25 -07:00
/*
* linux / mm / filemap_xip . c
*
* Copyright ( C ) 2005 IBM Corporation
* Author : Carsten Otte < cotte @ de . ibm . com >
*
* derived from linux / mm / filemap . c - Copyright ( C ) Linus Torvalds
*
*/
# include <linux/fs.h>
# include <linux/pagemap.h>
# include <linux/module.h>
# include <linux/uio.h>
# include <linux/rmap.h>
Detach sched.h from mm.h
First thing mm.h does is including sched.h solely for can_do_mlock() inline
function which has "current" dereference inside. By dealing with can_do_mlock()
mm.h can be detached from sched.h which is good. See below, why.
This patch
a) removes unconditional inclusion of sched.h from mm.h
b) makes can_do_mlock() normal function in mm/mlock.c
c) exports can_do_mlock() to not break compilation
d) adds sched.h inclusions back to files that were getting it indirectly.
e) adds less bloated headers to some files (asm/signal.h, jiffies.h) that were
getting them indirectly
Net result is:
a) mm.h users would get less code to open, read, preprocess, parse, ... if
they don't need sched.h
b) sched.h stops being dependency for significant number of files:
on x86_64 allmodconfig touching sched.h results in recompile of 4083 files,
after patch it's only 3744 (-8.3%).
Cross-compile tested on
all arm defconfigs, all mips defconfigs, all powerpc defconfigs,
alpha alpha-up
arm
i386 i386-up i386-defconfig i386-allnoconfig
ia64 ia64-up
m68k
mips
parisc parisc-up
powerpc powerpc-up
s390 s390-up
sparc sparc-up
sparc64 sparc64-up
um-x86_64
x86_64 x86_64-up x86_64-defconfig x86_64-allnoconfig
as well as my two usual configs.
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2007-05-21 01:22:52 +04:00
# include <linux/sched.h>
2005-06-23 22:05:25 -07:00
# include <asm/tlbflush.h>
2008-04-28 02:13:02 -07:00
# include <asm/io.h>
2005-06-23 22:05:25 -07:00
2007-03-29 01:20:39 -07:00
/*
* We do use our own empty page to avoid interference with other users
* of ZERO_PAGE ( ) , such as / dev / zero
*/
static struct page * __xip_sparse_page ;
static struct page * xip_sparse_page ( void )
{
if ( ! __xip_sparse_page ) {
2008-01-08 15:32:57 -08:00
struct page * page = alloc_page ( GFP_HIGHUSER | __GFP_ZERO ) ;
if ( page ) {
2007-03-29 01:20:39 -07:00
static DEFINE_SPINLOCK ( xip_alloc_lock ) ;
spin_lock ( & xip_alloc_lock ) ;
if ( ! __xip_sparse_page )
2008-01-08 15:32:57 -08:00
__xip_sparse_page = page ;
2007-03-29 01:20:39 -07:00
else
2008-01-08 15:32:57 -08:00
__free_page ( page ) ;
2007-03-29 01:20:39 -07:00
spin_unlock ( & xip_alloc_lock ) ;
}
}
return __xip_sparse_page ;
}
2005-06-23 22:05:25 -07:00
/*
* This is a file read routine for execute in place files , and uses
2008-04-28 02:13:02 -07:00
* the mapping - > a_ops - > get_xip_mem ( ) function for the actual low - level
2005-06-23 22:05:25 -07:00
* stuff .
*
* Note the struct file * is not used at all . It may be NULL .
*/
2008-04-28 02:13:02 -07:00
static ssize_t
2005-06-23 22:05:25 -07:00
do_xip_mapping_read ( struct address_space * mapping ,
struct file_ra_state * _ra ,
struct file * filp ,
2008-04-28 02:13:02 -07:00
char __user * buf ,
size_t len ,
loff_t * ppos )
2005-06-23 22:05:25 -07:00
{
struct inode * inode = mapping - > host ;
2008-02-08 04:20:11 -08:00
pgoff_t index , end_index ;
unsigned long offset ;
2008-04-28 02:13:02 -07:00
loff_t isize , pos ;
size_t copied = 0 , error = 0 ;
2005-06-23 22:05:25 -07:00
2008-04-28 02:13:02 -07:00
BUG_ON ( ! mapping - > a_ops - > get_xip_mem ) ;
2005-06-23 22:05:25 -07:00
2008-04-28 02:13:02 -07:00
pos = * ppos ;
index = pos > > PAGE_CACHE_SHIFT ;
offset = pos & ~ PAGE_CACHE_MASK ;
2005-06-23 22:05:25 -07:00
isize = i_size_read ( inode ) ;
if ( ! isize )
goto out ;
end_index = ( isize - 1 ) > > PAGE_CACHE_SHIFT ;
2008-04-28 02:13:02 -07:00
do {
unsigned long nr , left ;
void * xip_mem ;
unsigned long xip_pfn ;
int zero = 0 ;
2005-06-23 22:05:25 -07:00
/* nr is the maximum number of bytes to copy from this page */
nr = PAGE_CACHE_SIZE ;
if ( index > = end_index ) {
if ( index > end_index )
goto out ;
nr = ( ( isize - 1 ) & ~ PAGE_CACHE_MASK ) + 1 ;
if ( nr < = offset ) {
goto out ;
}
}
nr = nr - offset ;
2008-04-28 02:13:02 -07:00
if ( nr > len )
nr = len ;
2005-06-23 22:05:25 -07:00
2008-04-28 02:13:02 -07:00
error = mapping - > a_ops - > get_xip_mem ( mapping , index , 0 ,
& xip_mem , & xip_pfn ) ;
if ( unlikely ( error ) ) {
if ( error = = - ENODATA ) {
2005-06-23 22:05:25 -07:00
/* sparse */
2008-04-28 02:13:02 -07:00
zero = 1 ;
} else
2005-06-23 22:05:25 -07:00
goto out ;
2005-07-15 03:56:30 -07:00
}
2005-06-23 22:05:25 -07:00
/* If users can be writing to this page using arbitrary
* virtual addresses , take care about potential aliasing
* before reading the page on the kernel side .
*/
if ( mapping_writably_mapped ( mapping ) )
2008-04-28 02:13:02 -07:00
/* address based flush */ ;
2005-06-23 22:05:25 -07:00
/*
2008-04-28 02:13:02 -07:00
* Ok , we have the mem , so now we can copy it to user space . . .
2005-06-23 22:05:25 -07:00
*
* The actor routine returns how many bytes were actually used . .
* NOTE ! This may not be the same as how much of a user buffer
* we filled up ( we may be padding etc ) , so we can only update
* " pos " here ( the actor routine has to update the user buffer
* pointers and the remaining count ) .
*/
2008-04-28 02:13:02 -07:00
if ( ! zero )
left = __copy_to_user ( buf + copied , xip_mem + offset , nr ) ;
else
left = __clear_user ( buf + copied , nr ) ;
2005-06-23 22:05:25 -07:00
2008-04-28 02:13:02 -07:00
if ( left ) {
error = - EFAULT ;
goto out ;
}
2005-06-23 22:05:25 -07:00
2008-04-28 02:13:02 -07:00
copied + = ( nr - left ) ;
offset + = ( nr - left ) ;
index + = offset > > PAGE_CACHE_SHIFT ;
offset & = ~ PAGE_CACHE_MASK ;
} while ( copied < len ) ;
2005-06-23 22:05:25 -07:00
out :
2008-04-28 02:13:02 -07:00
* ppos = pos + copied ;
2005-06-23 22:05:25 -07:00
if ( filp )
file_accessed ( filp ) ;
2008-04-28 02:13:02 -07:00
return ( copied ? copied : error ) ;
2005-06-23 22:05:25 -07:00
}
ssize_t
2005-06-23 22:05:28 -07:00
xip_file_read ( struct file * filp , char __user * buf , size_t len , loff_t * ppos )
2005-06-23 22:05:25 -07:00
{
2005-06-23 22:05:28 -07:00
if ( ! access_ok ( VERIFY_WRITE , buf , len ) )
return - EFAULT ;
2005-06-23 22:05:25 -07:00
2008-04-28 02:13:02 -07:00
return do_xip_mapping_read ( filp - > f_mapping , & filp - > f_ra , filp ,
buf , len , ppos ) ;
2005-06-23 22:05:25 -07:00
}
2005-06-23 22:05:28 -07:00
EXPORT_SYMBOL_GPL ( xip_file_read ) ;
2005-06-23 22:05:25 -07:00
/*
* __xip_unmap is invoked from xip_unmap and
* xip_write
*
* This function walks all vmas of the address_space and unmaps the
2007-03-29 01:20:39 -07:00
* __xip_sparse_page when found at pgoff .
2005-06-23 22:05:25 -07:00
*/
static void
__xip_unmap ( struct address_space * mapping ,
unsigned long pgoff )
{
struct vm_area_struct * vma ;
struct mm_struct * mm ;
struct prio_tree_iter iter ;
unsigned long address ;
pte_t * pte ;
pte_t pteval ;
2005-10-29 18:16:31 -07:00
spinlock_t * ptl ;
2005-10-29 18:16:31 -07:00
struct page * page ;
2005-06-23 22:05:25 -07:00
2007-03-29 01:20:39 -07:00
page = __xip_sparse_page ;
if ( ! page )
return ;
2005-06-23 22:05:25 -07:00
spin_lock ( & mapping - > i_mmap_lock ) ;
vma_prio_tree_foreach ( vma , & iter , & mapping - > i_mmap , pgoff , pgoff ) {
mm = vma - > vm_mm ;
address = vma - > vm_start +
( ( pgoff - vma - > vm_pgoff ) < < PAGE_SHIFT ) ;
BUG_ON ( address < vma - > vm_start | | address > = vma - > vm_end ) ;
2005-10-29 18:16:31 -07:00
pte = page_check_address ( page , mm , address , & ptl ) ;
if ( pte ) {
2005-06-23 22:05:25 -07:00
/* Nuke the page table entry. */
2005-07-12 13:58:18 -07:00
flush_cache_page ( vma , address , pte_pfn ( * pte ) ) ;
2005-06-23 22:05:25 -07:00
pteval = ptep_clear_flush ( vma , address , pte ) ;
2006-12-22 01:09:33 -08:00
page_remove_rmap ( page , vma ) ;
2005-10-29 18:16:12 -07:00
dec_mm_counter ( mm , file_rss ) ;
2005-06-23 22:05:25 -07:00
BUG_ON ( pte_dirty ( pteval ) ) ;
2005-10-29 18:16:31 -07:00
pte_unmap_unlock ( pte , ptl ) ;
2005-10-29 18:16:12 -07:00
page_cache_release ( page ) ;
2005-06-23 22:05:25 -07:00
}
}
spin_unlock ( & mapping - > i_mmap_lock ) ;
}
/*
2007-07-19 01:46:59 -07:00
* xip_fault ( ) is invoked via the vma operations vector for a
2005-06-23 22:05:25 -07:00
* mapped memory region to read in file data during a page fault .
*
2007-07-19 01:46:59 -07:00
* This function is derived from filemap_fault , but used for execute in place
2005-06-23 22:05:25 -07:00
*/
2008-04-28 02:13:02 -07:00
static int xip_file_fault ( struct vm_area_struct * vma , struct vm_fault * vmf )
2005-06-23 22:05:25 -07:00
{
2008-04-28 02:13:02 -07:00
struct file * file = vma - > vm_file ;
2005-06-23 22:05:25 -07:00
struct address_space * mapping = file - > f_mapping ;
struct inode * inode = mapping - > host ;
2007-07-19 01:46:59 -07:00
pgoff_t size ;
2008-04-28 02:13:02 -07:00
void * xip_mem ;
unsigned long xip_pfn ;
struct page * page ;
int error ;
2005-06-23 22:05:25 -07:00
2007-07-19 01:46:59 -07:00
/* XXX: are VM_FAULT_ codes OK? */
2005-06-23 22:05:25 -07:00
size = ( i_size_read ( inode ) + PAGE_CACHE_SIZE - 1 ) > > PAGE_CACHE_SHIFT ;
2007-07-19 01:47:03 -07:00
if ( vmf - > pgoff > = size )
return VM_FAULT_SIGBUS ;
2005-06-23 22:05:25 -07:00
2008-04-28 02:13:02 -07:00
error = mapping - > a_ops - > get_xip_mem ( mapping , vmf - > pgoff , 0 ,
& xip_mem , & xip_pfn ) ;
if ( likely ( ! error ) )
goto found ;
if ( error ! = - ENODATA )
2007-07-19 01:47:03 -07:00
return VM_FAULT_OOM ;
2005-06-23 22:05:25 -07:00
/* sparse block */
2008-04-28 02:13:02 -07:00
if ( ( vma - > vm_flags & ( VM_WRITE | VM_MAYWRITE ) ) & &
( vma - > vm_flags & ( VM_SHARED | VM_MAYSHARE ) ) & &
2005-06-23 22:05:25 -07:00
( ! ( mapping - > host - > i_sb - > s_flags & MS_RDONLY ) ) ) {
2008-04-28 02:13:02 -07:00
int err ;
2005-06-23 22:05:25 -07:00
/* maybe shared writable, allocate new block */
2008-04-28 02:13:02 -07:00
error = mapping - > a_ops - > get_xip_mem ( mapping , vmf - > pgoff , 1 ,
& xip_mem , & xip_pfn ) ;
if ( error )
2007-07-19 01:47:03 -07:00
return VM_FAULT_SIGBUS ;
2008-04-28 02:13:02 -07:00
/* unmap sparse mappings at pgoff from all other vmas */
2007-07-19 01:47:03 -07:00
__xip_unmap ( mapping , vmf - > pgoff ) ;
2008-04-28 02:13:02 -07:00
found :
err = vm_insert_mixed ( vma , ( unsigned long ) vmf - > virtual_address ,
xip_pfn ) ;
if ( err = = - ENOMEM )
return VM_FAULT_OOM ;
BUG_ON ( err ) ;
return VM_FAULT_NOPAGE ;
2005-06-23 22:05:25 -07:00
} else {
2007-03-29 01:20:39 -07:00
/* not shared and writable, use xip_sparse_page() */
page = xip_sparse_page ( ) ;
2007-07-19 01:47:03 -07:00
if ( ! page )
return VM_FAULT_OOM ;
2005-06-23 22:05:25 -07:00
2008-04-28 02:13:02 -07:00
page_cache_get ( page ) ;
vmf - > page = page ;
return 0 ;
}
2005-06-23 22:05:25 -07:00
}
static struct vm_operations_struct xip_file_vm_ops = {
2007-07-19 01:46:59 -07:00
. fault = xip_file_fault ,
2005-06-23 22:05:25 -07:00
} ;
int xip_file_mmap ( struct file * file , struct vm_area_struct * vma )
{
2008-04-28 02:13:02 -07:00
BUG_ON ( ! file - > f_mapping - > a_ops - > get_xip_mem ) ;
2005-06-23 22:05:25 -07:00
file_accessed ( file ) ;
vma - > vm_ops = & xip_file_vm_ops ;
2008-04-28 02:13:02 -07:00
vma - > vm_flags | = VM_CAN_NONLINEAR | VM_MIXEDMAP ;
2005-06-23 22:05:25 -07:00
return 0 ;
}
EXPORT_SYMBOL_GPL ( xip_file_mmap ) ;
static ssize_t
2005-06-23 22:05:28 -07:00
__xip_file_write ( struct file * filp , const char __user * buf ,
size_t count , loff_t pos , loff_t * ppos )
2005-06-23 22:05:25 -07:00
{
2005-06-23 22:05:28 -07:00
struct address_space * mapping = filp - > f_mapping ;
2006-06-28 04:26:44 -07:00
const struct address_space_operations * a_ops = mapping - > a_ops ;
2005-06-23 22:05:25 -07:00
struct inode * inode = mapping - > host ;
long status = 0 ;
size_t bytes ;
ssize_t written = 0 ;
2008-04-28 02:13:02 -07:00
BUG_ON ( ! mapping - > a_ops - > get_xip_mem ) ;
2005-06-23 22:05:25 -07:00
do {
unsigned long index ;
unsigned long offset ;
size_t copied ;
2008-04-28 02:13:02 -07:00
void * xip_mem ;
unsigned long xip_pfn ;
2005-06-23 22:05:25 -07:00
offset = ( pos & ( PAGE_CACHE_SIZE - 1 ) ) ; /* Within page */
index = pos > > PAGE_CACHE_SHIFT ;
bytes = PAGE_CACHE_SIZE - offset ;
if ( bytes > count )
bytes = count ;
2008-04-28 02:13:02 -07:00
status = a_ops - > get_xip_mem ( mapping , index , 0 ,
& xip_mem , & xip_pfn ) ;
if ( status = = - ENODATA ) {
2005-06-23 22:05:25 -07:00
/* we allocate a new page unmap it */
2008-04-28 02:13:02 -07:00
status = a_ops - > get_xip_mem ( mapping , index , 1 ,
& xip_mem , & xip_pfn ) ;
if ( ! status )
2005-06-23 22:05:28 -07:00
/* unmap page at pgoff from all other vmas */
__xip_unmap ( mapping , index ) ;
2005-06-23 22:05:25 -07:00
}
2008-04-28 02:13:02 -07:00
if ( status )
2005-06-23 22:05:25 -07:00
break ;
2007-10-16 01:24:58 -07:00
copied = bytes -
2008-04-28 02:13:02 -07:00
__copy_from_user_nocache ( xip_mem + offset , buf , bytes ) ;
2007-10-16 01:24:58 -07:00
2005-06-23 22:05:25 -07:00
if ( likely ( copied > 0 ) ) {
status = copied ;
if ( status > = 0 ) {
written + = status ;
count - = status ;
pos + = status ;
buf + = status ;
}
}
if ( unlikely ( copied ! = bytes ) )
if ( status > = 0 )
status = - EFAULT ;
if ( status < 0 )
break ;
} while ( count ) ;
* ppos = pos ;
/*
* No need to use i_size_read ( ) here , the i_size
2006-01-09 15:59:24 -08:00
* cannot change under us because we hold i_mutex .
2005-06-23 22:05:25 -07:00
*/
if ( pos > inode - > i_size ) {
i_size_write ( inode , pos ) ;
mark_inode_dirty ( inode ) ;
}
return written ? written : status ;
}
2005-06-23 22:05:28 -07:00
ssize_t
xip_file_write ( struct file * filp , const char __user * buf , size_t len ,
loff_t * ppos )
2005-06-23 22:05:25 -07:00
{
2005-06-23 22:05:28 -07:00
struct address_space * mapping = filp - > f_mapping ;
struct inode * inode = mapping - > host ;
size_t count ;
loff_t pos ;
ssize_t ret ;
2005-06-23 22:05:25 -07:00
2006-01-09 15:59:24 -08:00
mutex_lock ( & inode - > i_mutex ) ;
2005-06-23 22:05:25 -07:00
2005-06-23 22:05:28 -07:00
if ( ! access_ok ( VERIFY_READ , buf , len ) ) {
ret = - EFAULT ;
goto out_up ;
2005-06-23 22:05:25 -07:00
}
pos = * ppos ;
2005-06-23 22:05:28 -07:00
count = len ;
2005-06-23 22:05:25 -07:00
vfs_check_frozen ( inode - > i_sb , SB_FREEZE_WRITE ) ;
2005-06-23 22:05:28 -07:00
/* We can write back this queue in page reclaim */
current - > backing_dev_info = mapping - > backing_dev_info ;
2005-06-23 22:05:25 -07:00
2005-06-23 22:05:28 -07:00
ret = generic_write_checks ( filp , & pos , & count , S_ISBLK ( inode - > i_mode ) ) ;
if ( ret )
goto out_backing ;
2005-06-23 22:05:25 -07:00
if ( count = = 0 )
2005-06-23 22:05:28 -07:00
goto out_backing ;
2005-06-23 22:05:25 -07:00
2006-12-08 02:36:44 -08:00
ret = remove_suid ( filp - > f_path . dentry ) ;
2005-06-23 22:05:28 -07:00
if ( ret )
goto out_backing ;
2005-06-23 22:05:25 -07:00
2006-01-09 20:52:01 -08:00
file_update_time ( filp ) ;
2005-06-23 22:05:25 -07:00
2005-06-23 22:05:28 -07:00
ret = __xip_file_write ( filp , buf , count , pos , ppos ) ;
2005-06-23 22:05:25 -07:00
2005-06-23 22:05:28 -07:00
out_backing :
current - > backing_dev_info = NULL ;
out_up :
2006-01-09 15:59:24 -08:00
mutex_unlock ( & inode - > i_mutex ) ;
2005-06-23 22:05:25 -07:00
return ret ;
}
2005-06-23 22:05:28 -07:00
EXPORT_SYMBOL_GPL ( xip_file_write ) ;
2005-06-23 22:05:25 -07:00
/*
* truncate a page used for execute in place
2008-04-28 02:13:02 -07:00
* functionality is analog to block_truncate_page but does use get_xip_mem
2005-06-23 22:05:25 -07:00
* to get the page instead of page cache
*/
int
xip_truncate_page ( struct address_space * mapping , loff_t from )
{
pgoff_t index = from > > PAGE_CACHE_SHIFT ;
unsigned offset = from & ( PAGE_CACHE_SIZE - 1 ) ;
unsigned blocksize ;
unsigned length ;
2008-04-28 02:13:02 -07:00
void * xip_mem ;
unsigned long xip_pfn ;
int err ;
2005-06-23 22:05:25 -07:00
2008-04-28 02:13:02 -07:00
BUG_ON ( ! mapping - > a_ops - > get_xip_mem ) ;
2005-06-23 22:05:25 -07:00
blocksize = 1 < < mapping - > host - > i_blkbits ;
length = offset & ( blocksize - 1 ) ;
/* Block boundary? Nothing to do */
if ( ! length )
return 0 ;
length = blocksize - length ;
2008-04-28 02:13:02 -07:00
err = mapping - > a_ops - > get_xip_mem ( mapping , index , 0 ,
& xip_mem , & xip_pfn ) ;
if ( unlikely ( err ) ) {
if ( err = = - ENODATA )
2005-06-23 22:05:25 -07:00
/* Hole? No need to truncate */
return 0 ;
2005-06-23 22:05:28 -07:00
else
2008-04-28 02:13:02 -07:00
return err ;
2005-07-15 03:56:30 -07:00
}
2008-04-28 02:13:02 -07:00
memset ( xip_mem + offset , 0 , length ) ;
2005-06-23 22:05:28 -07:00
return 0 ;
2005-06-23 22:05:25 -07:00
}
EXPORT_SYMBOL_GPL ( xip_truncate_page ) ;