2005-06-24 09:05:25 +04:00
/*
* linux / mm / filemap_xip . c
*
* Copyright ( C ) 2005 IBM Corporation
* Author : Carsten Otte < cotte @ de . ibm . com >
*
* derived from linux / mm / filemap . c - Copyright ( C ) Linus Torvalds
*
*/
# include <linux/fs.h>
# include <linux/pagemap.h>
# include <linux/module.h>
# include <linux/uio.h>
# include <linux/rmap.h>
Detach sched.h from mm.h
First thing mm.h does is including sched.h solely for can_do_mlock() inline
function which has "current" dereference inside. By dealing with can_do_mlock()
mm.h can be detached from sched.h which is good. See below, why.
This patch
a) removes unconditional inclusion of sched.h from mm.h
b) makes can_do_mlock() normal function in mm/mlock.c
c) exports can_do_mlock() to not break compilation
d) adds sched.h inclusions back to files that were getting it indirectly.
e) adds less bloated headers to some files (asm/signal.h, jiffies.h) that were
getting them indirectly
Net result is:
a) mm.h users would get less code to open, read, preprocess, parse, ... if
they don't need sched.h
b) sched.h stops being dependency for significant number of files:
on x86_64 allmodconfig touching sched.h results in recompile of 4083 files,
after patch it's only 3744 (-8.3%).
Cross-compile tested on
all arm defconfigs, all mips defconfigs, all powerpc defconfigs,
alpha alpha-up
arm
i386 i386-up i386-defconfig i386-allnoconfig
ia64 ia64-up
m68k
mips
parisc parisc-up
powerpc powerpc-up
s390 s390-up
sparc sparc-up
sparc64 sparc64-up
um-x86_64
x86_64 x86_64-up x86_64-defconfig x86_64-allnoconfig
as well as my two usual configs.
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2007-05-21 01:22:52 +04:00
# include <linux/sched.h>
2005-06-24 09:05:25 +04:00
# include <asm/tlbflush.h>
# include "filemap.h"
2007-03-29 12:20:39 +04:00
/*
* We do use our own empty page to avoid interference with other users
* of ZERO_PAGE ( ) , such as / dev / zero
*/
static struct page * __xip_sparse_page ;
static struct page * xip_sparse_page ( void )
{
if ( ! __xip_sparse_page ) {
unsigned long zeroes = get_zeroed_page ( GFP_HIGHUSER ) ;
if ( zeroes ) {
static DEFINE_SPINLOCK ( xip_alloc_lock ) ;
spin_lock ( & xip_alloc_lock ) ;
if ( ! __xip_sparse_page )
__xip_sparse_page = virt_to_page ( zeroes ) ;
else
free_page ( zeroes ) ;
spin_unlock ( & xip_alloc_lock ) ;
}
}
return __xip_sparse_page ;
}
2005-06-24 09:05:25 +04:00
/*
* This is a file read routine for execute in place files , and uses
* the mapping - > a_ops - > get_xip_page ( ) function for the actual low - level
* stuff .
*
* Note the struct file * is not used at all . It may be NULL .
*/
static void
do_xip_mapping_read ( struct address_space * mapping ,
struct file_ra_state * _ra ,
struct file * filp ,
loff_t * ppos ,
read_descriptor_t * desc ,
read_actor_t actor )
{
struct inode * inode = mapping - > host ;
unsigned long index , end_index , offset ;
loff_t isize ;
BUG_ON ( ! mapping - > a_ops - > get_xip_page ) ;
index = * ppos > > PAGE_CACHE_SHIFT ;
offset = * ppos & ~ PAGE_CACHE_MASK ;
isize = i_size_read ( inode ) ;
if ( ! isize )
goto out ;
end_index = ( isize - 1 ) > > PAGE_CACHE_SHIFT ;
for ( ; ; ) {
struct page * page ;
unsigned long nr , ret ;
/* nr is the maximum number of bytes to copy from this page */
nr = PAGE_CACHE_SIZE ;
if ( index > = end_index ) {
if ( index > end_index )
goto out ;
nr = ( ( isize - 1 ) & ~ PAGE_CACHE_MASK ) + 1 ;
if ( nr < = offset ) {
goto out ;
}
}
nr = nr - offset ;
page = mapping - > a_ops - > get_xip_page ( mapping ,
index * ( PAGE_SIZE / 512 ) , 0 ) ;
if ( ! page )
goto no_xip_page ;
if ( unlikely ( IS_ERR ( page ) ) ) {
if ( PTR_ERR ( page ) = = - ENODATA ) {
/* sparse */
2005-07-15 14:56:30 +04:00
page = ZERO_PAGE ( 0 ) ;
2005-06-24 09:05:25 +04:00
} else {
desc - > error = PTR_ERR ( page ) ;
goto out ;
}
2005-07-15 14:56:30 +04:00
}
2005-06-24 09:05:25 +04:00
/* If users can be writing to this page using arbitrary
* virtual addresses , take care about potential aliasing
* before reading the page on the kernel side .
*/
if ( mapping_writably_mapped ( mapping ) )
flush_dcache_page ( page ) ;
/*
2005-07-15 14:56:30 +04:00
* Ok , we have the page , so now we can copy it to user space . . .
2005-06-24 09:05:25 +04:00
*
* The actor routine returns how many bytes were actually used . .
* NOTE ! This may not be the same as how much of a user buffer
* we filled up ( we may be padding etc ) , so we can only update
* " pos " here ( the actor routine has to update the user buffer
* pointers and the remaining count ) .
*/
ret = actor ( desc , page , offset , nr ) ;
offset + = ret ;
index + = offset > > PAGE_CACHE_SHIFT ;
offset & = ~ PAGE_CACHE_MASK ;
if ( ret = = nr & & desc - > count )
continue ;
goto out ;
no_xip_page :
/* Did not get the page. Report it */
desc - > error = - EIO ;
goto out ;
}
out :
* ppos = ( ( loff_t ) index < < PAGE_CACHE_SHIFT ) + offset ;
if ( filp )
file_accessed ( filp ) ;
}
ssize_t
2005-06-24 09:05:28 +04:00
xip_file_read ( struct file * filp , char __user * buf , size_t len , loff_t * ppos )
2005-06-24 09:05:25 +04:00
{
2005-06-24 09:05:28 +04:00
read_descriptor_t desc ;
2005-06-24 09:05:25 +04:00
2005-06-24 09:05:28 +04:00
if ( ! access_ok ( VERIFY_WRITE , buf , len ) )
return - EFAULT ;
2005-06-24 09:05:25 +04:00
2005-06-24 09:05:28 +04:00
desc . written = 0 ;
desc . arg . buf = buf ;
desc . count = len ;
desc . error = 0 ;
2005-06-24 09:05:25 +04:00
2005-06-24 09:05:28 +04:00
do_xip_mapping_read ( filp - > f_mapping , & filp - > f_ra , filp ,
ppos , & desc , file_read_actor ) ;
if ( desc . written )
return desc . written ;
else
return desc . error ;
2005-06-24 09:05:25 +04:00
}
2005-06-24 09:05:28 +04:00
EXPORT_SYMBOL_GPL ( xip_file_read ) ;
2005-06-24 09:05:25 +04:00
/*
* __xip_unmap is invoked from xip_unmap and
* xip_write
*
* This function walks all vmas of the address_space and unmaps the
2007-03-29 12:20:39 +04:00
* __xip_sparse_page when found at pgoff .
2005-06-24 09:05:25 +04:00
*/
static void
__xip_unmap ( struct address_space * mapping ,
unsigned long pgoff )
{
struct vm_area_struct * vma ;
struct mm_struct * mm ;
struct prio_tree_iter iter ;
unsigned long address ;
pte_t * pte ;
pte_t pteval ;
2005-10-30 04:16:31 +03:00
spinlock_t * ptl ;
2005-10-30 04:16:31 +03:00
struct page * page ;
2005-06-24 09:05:25 +04:00
2007-03-29 12:20:39 +04:00
page = __xip_sparse_page ;
if ( ! page )
return ;
2005-06-24 09:05:25 +04:00
spin_lock ( & mapping - > i_mmap_lock ) ;
vma_prio_tree_foreach ( vma , & iter , & mapping - > i_mmap , pgoff , pgoff ) {
mm = vma - > vm_mm ;
address = vma - > vm_start +
( ( pgoff - vma - > vm_pgoff ) < < PAGE_SHIFT ) ;
BUG_ON ( address < vma - > vm_start | | address > = vma - > vm_end ) ;
2005-10-30 04:16:31 +03:00
pte = page_check_address ( page , mm , address , & ptl ) ;
if ( pte ) {
2005-06-24 09:05:25 +04:00
/* Nuke the page table entry. */
2005-07-13 00:58:18 +04:00
flush_cache_page ( vma , address , pte_pfn ( * pte ) ) ;
2005-06-24 09:05:25 +04:00
pteval = ptep_clear_flush ( vma , address , pte ) ;
2006-12-22 12:09:33 +03:00
page_remove_rmap ( page , vma ) ;
2005-10-30 04:16:12 +03:00
dec_mm_counter ( mm , file_rss ) ;
2005-06-24 09:05:25 +04:00
BUG_ON ( pte_dirty ( pteval ) ) ;
2005-10-30 04:16:31 +03:00
pte_unmap_unlock ( pte , ptl ) ;
2005-10-30 04:16:12 +03:00
page_cache_release ( page ) ;
2005-06-24 09:05:25 +04:00
}
}
spin_unlock ( & mapping - > i_mmap_lock ) ;
}
/*
2007-07-19 12:46:59 +04:00
* xip_fault ( ) is invoked via the vma operations vector for a
2005-06-24 09:05:25 +04:00
* mapped memory region to read in file data during a page fault .
*
2007-07-19 12:46:59 +04:00
* This function is derived from filemap_fault , but used for execute in place
2005-06-24 09:05:25 +04:00
*/
2007-07-19 12:47:03 +04:00
static int xip_file_fault ( struct vm_area_struct * area , struct vm_fault * vmf )
2005-06-24 09:05:25 +04:00
{
struct file * file = area - > vm_file ;
struct address_space * mapping = file - > f_mapping ;
struct inode * inode = mapping - > host ;
struct page * page ;
2007-07-19 12:46:59 +04:00
pgoff_t size ;
2005-06-24 09:05:25 +04:00
2007-07-19 12:46:59 +04:00
/* XXX: are VM_FAULT_ codes OK? */
2005-06-24 09:05:25 +04:00
size = ( i_size_read ( inode ) + PAGE_CACHE_SIZE - 1 ) > > PAGE_CACHE_SHIFT ;
2007-07-19 12:47:03 +04:00
if ( vmf - > pgoff > = size )
return VM_FAULT_SIGBUS ;
2005-06-24 09:05:25 +04:00
2007-07-19 12:46:59 +04:00
page = mapping - > a_ops - > get_xip_page ( mapping ,
2007-07-19 12:47:03 +04:00
vmf - > pgoff * ( PAGE_SIZE / 512 ) , 0 ) ;
2007-03-29 12:20:39 +04:00
if ( ! IS_ERR ( page ) )
2005-10-30 04:16:12 +03:00
goto out ;
2007-07-19 12:47:03 +04:00
if ( PTR_ERR ( page ) ! = - ENODATA )
return VM_FAULT_OOM ;
2005-06-24 09:05:25 +04:00
/* sparse block */
if ( ( area - > vm_flags & ( VM_WRITE | VM_MAYWRITE ) ) & &
( area - > vm_flags & ( VM_SHARED | VM_MAYSHARE ) ) & &
( ! ( mapping - > host - > i_sb - > s_flags & MS_RDONLY ) ) ) {
/* maybe shared writable, allocate new block */
2007-07-19 12:46:59 +04:00
page = mapping - > a_ops - > get_xip_page ( mapping ,
2007-07-19 12:47:03 +04:00
vmf - > pgoff * ( PAGE_SIZE / 512 ) , 1 ) ;
if ( IS_ERR ( page ) )
return VM_FAULT_SIGBUS ;
2005-06-24 09:05:25 +04:00
/* unmap page at pgoff from all other vmas */
2007-07-19 12:47:03 +04:00
__xip_unmap ( mapping , vmf - > pgoff ) ;
2005-06-24 09:05:25 +04:00
} else {
2007-03-29 12:20:39 +04:00
/* not shared and writable, use xip_sparse_page() */
page = xip_sparse_page ( ) ;
2007-07-19 12:47:03 +04:00
if ( ! page )
return VM_FAULT_OOM ;
2005-06-24 09:05:25 +04:00
}
2005-10-30 04:16:12 +03:00
out :
page_cache_get ( page ) ;
2007-07-19 12:47:03 +04:00
vmf - > page = page ;
2007-07-19 12:47:05 +04:00
return 0 ;
2005-06-24 09:05:25 +04:00
}
static struct vm_operations_struct xip_file_vm_ops = {
2007-07-19 12:46:59 +04:00
. fault = xip_file_fault ,
2005-06-24 09:05:25 +04:00
} ;
int xip_file_mmap ( struct file * file , struct vm_area_struct * vma )
{
BUG_ON ( ! file - > f_mapping - > a_ops - > get_xip_page ) ;
file_accessed ( file ) ;
vma - > vm_ops = & xip_file_vm_ops ;
2007-07-19 12:46:59 +04:00
vma - > vm_flags | = VM_CAN_NONLINEAR ;
2005-06-24 09:05:25 +04:00
return 0 ;
}
EXPORT_SYMBOL_GPL ( xip_file_mmap ) ;
static ssize_t
2005-06-24 09:05:28 +04:00
__xip_file_write ( struct file * filp , const char __user * buf ,
size_t count , loff_t pos , loff_t * ppos )
2005-06-24 09:05:25 +04:00
{
2005-06-24 09:05:28 +04:00
struct address_space * mapping = filp - > f_mapping ;
2006-06-28 15:26:44 +04:00
const struct address_space_operations * a_ops = mapping - > a_ops ;
2005-06-24 09:05:25 +04:00
struct inode * inode = mapping - > host ;
long status = 0 ;
struct page * page ;
size_t bytes ;
ssize_t written = 0 ;
BUG_ON ( ! mapping - > a_ops - > get_xip_page ) ;
do {
unsigned long index ;
unsigned long offset ;
size_t copied ;
offset = ( pos & ( PAGE_CACHE_SIZE - 1 ) ) ; /* Within page */
index = pos > > PAGE_CACHE_SHIFT ;
bytes = PAGE_CACHE_SIZE - offset ;
if ( bytes > count )
bytes = count ;
/*
* Bring in the user page that we will copy from _first_ .
* Otherwise there ' s a nasty deadlock on copying from the
* same page as we ' re writing to , without it being marked
* up - to - date .
*/
fault_in_pages_readable ( buf , bytes ) ;
page = a_ops - > get_xip_page ( mapping ,
2005-06-24 09:05:28 +04:00
index * ( PAGE_SIZE / 512 ) , 0 ) ;
2005-06-24 09:05:25 +04:00
if ( IS_ERR ( page ) & & ( PTR_ERR ( page ) = = - ENODATA ) ) {
/* we allocate a new page unmap it */
page = a_ops - > get_xip_page ( mapping ,
2005-06-24 09:05:28 +04:00
index * ( PAGE_SIZE / 512 ) , 1 ) ;
2005-06-24 09:05:25 +04:00
if ( ! IS_ERR ( page ) )
2005-06-24 09:05:28 +04:00
/* unmap page at pgoff from all other vmas */
__xip_unmap ( mapping , index ) ;
2005-06-24 09:05:25 +04:00
}
if ( IS_ERR ( page ) ) {
status = PTR_ERR ( page ) ;
break ;
}
2005-06-24 09:05:28 +04:00
copied = filemap_copy_from_user ( page , offset , buf , bytes ) ;
2005-06-24 09:05:25 +04:00
flush_dcache_page ( page ) ;
if ( likely ( copied > 0 ) ) {
status = copied ;
if ( status > = 0 ) {
written + = status ;
count - = status ;
pos + = status ;
buf + = status ;
}
}
if ( unlikely ( copied ! = bytes ) )
if ( status > = 0 )
status = - EFAULT ;
if ( status < 0 )
break ;
} while ( count ) ;
* ppos = pos ;
/*
* No need to use i_size_read ( ) here , the i_size
2006-01-10 02:59:24 +03:00
* cannot change under us because we hold i_mutex .
2005-06-24 09:05:25 +04:00
*/
if ( pos > inode - > i_size ) {
i_size_write ( inode , pos ) ;
mark_inode_dirty ( inode ) ;
}
return written ? written : status ;
}
2005-06-24 09:05:28 +04:00
ssize_t
xip_file_write ( struct file * filp , const char __user * buf , size_t len ,
loff_t * ppos )
2005-06-24 09:05:25 +04:00
{
2005-06-24 09:05:28 +04:00
struct address_space * mapping = filp - > f_mapping ;
struct inode * inode = mapping - > host ;
size_t count ;
loff_t pos ;
ssize_t ret ;
2005-06-24 09:05:25 +04:00
2006-01-10 02:59:24 +03:00
mutex_lock ( & inode - > i_mutex ) ;
2005-06-24 09:05:25 +04:00
2005-06-24 09:05:28 +04:00
if ( ! access_ok ( VERIFY_READ , buf , len ) ) {
ret = - EFAULT ;
goto out_up ;
2005-06-24 09:05:25 +04:00
}
pos = * ppos ;
2005-06-24 09:05:28 +04:00
count = len ;
2005-06-24 09:05:25 +04:00
vfs_check_frozen ( inode - > i_sb , SB_FREEZE_WRITE ) ;
2005-06-24 09:05:28 +04:00
/* We can write back this queue in page reclaim */
current - > backing_dev_info = mapping - > backing_dev_info ;
2005-06-24 09:05:25 +04:00
2005-06-24 09:05:28 +04:00
ret = generic_write_checks ( filp , & pos , & count , S_ISBLK ( inode - > i_mode ) ) ;
if ( ret )
goto out_backing ;
2005-06-24 09:05:25 +04:00
if ( count = = 0 )
2005-06-24 09:05:28 +04:00
goto out_backing ;
2005-06-24 09:05:25 +04:00
2006-12-08 13:36:44 +03:00
ret = remove_suid ( filp - > f_path . dentry ) ;
2005-06-24 09:05:28 +04:00
if ( ret )
goto out_backing ;
2005-06-24 09:05:25 +04:00
2006-01-10 07:52:01 +03:00
file_update_time ( filp ) ;
2005-06-24 09:05:25 +04:00
2005-06-24 09:05:28 +04:00
ret = __xip_file_write ( filp , buf , count , pos , ppos ) ;
2005-06-24 09:05:25 +04:00
2005-06-24 09:05:28 +04:00
out_backing :
current - > backing_dev_info = NULL ;
out_up :
2006-01-10 02:59:24 +03:00
mutex_unlock ( & inode - > i_mutex ) ;
2005-06-24 09:05:25 +04:00
return ret ;
}
2005-06-24 09:05:28 +04:00
EXPORT_SYMBOL_GPL ( xip_file_write ) ;
2005-06-24 09:05:25 +04:00
/*
* truncate a page used for execute in place
* functionality is analog to block_truncate_page but does use get_xip_page
* to get the page instead of page cache
*/
int
xip_truncate_page ( struct address_space * mapping , loff_t from )
{
pgoff_t index = from > > PAGE_CACHE_SHIFT ;
unsigned offset = from & ( PAGE_CACHE_SIZE - 1 ) ;
unsigned blocksize ;
unsigned length ;
struct page * page ;
BUG_ON ( ! mapping - > a_ops - > get_xip_page ) ;
blocksize = 1 < < mapping - > host - > i_blkbits ;
length = offset & ( blocksize - 1 ) ;
/* Block boundary? Nothing to do */
if ( ! length )
return 0 ;
length = blocksize - length ;
page = mapping - > a_ops - > get_xip_page ( mapping ,
index * ( PAGE_SIZE / 512 ) , 0 ) ;
if ( ! page )
2005-06-24 09:05:28 +04:00
return - ENOMEM ;
2005-06-24 09:05:25 +04:00
if ( unlikely ( IS_ERR ( page ) ) ) {
2005-06-24 09:05:28 +04:00
if ( PTR_ERR ( page ) = = - ENODATA )
2005-06-24 09:05:25 +04:00
/* Hole? No need to truncate */
return 0 ;
2005-06-24 09:05:28 +04:00
else
return PTR_ERR ( page ) ;
2005-07-15 14:56:30 +04:00
}
2007-05-09 13:35:07 +04:00
zero_user_page ( page , offset , length , KM_USER0 ) ;
2005-06-24 09:05:28 +04:00
return 0 ;
2005-06-24 09:05:25 +04:00
}
EXPORT_SYMBOL_GPL ( xip_truncate_page ) ;