2005-04-17 02:20:36 +04:00
/*
2005-11-02 06:58:39 +03:00
* Copyright ( c ) 2000 - 2005 Silicon Graphics , Inc .
* All Rights Reserved .
2005-04-17 02:20:36 +04:00
*
2005-11-02 06:58:39 +03:00
* This program is free software ; you can redistribute it and / or
* modify it under the terms of the GNU General Public License as
2005-04-17 02:20:36 +04:00
* published by the Free Software Foundation .
*
2005-11-02 06:58:39 +03:00
* This program is distributed in the hope that it would be useful ,
* but WITHOUT ANY WARRANTY ; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the
* GNU General Public License for more details .
2005-04-17 02:20:36 +04:00
*
2005-11-02 06:58:39 +03:00
* You should have received a copy of the GNU General Public License
* along with this program ; if not , write the Free Software Foundation ,
* Inc . , 51 Franklin St , Fifth Floor , Boston , MA 02110 - 1301 USA
2005-04-17 02:20:36 +04:00
*/
# include "xfs.h"
2005-11-02 06:38:42 +03:00
# include "xfs_bit.h"
2005-04-17 02:20:36 +04:00
# include "xfs_log.h"
2005-11-02 06:38:42 +03:00
# include "xfs_inum.h"
2005-04-17 02:20:36 +04:00
# include "xfs_sb.h"
2005-11-02 06:38:42 +03:00
# include "xfs_ag.h"
2005-04-17 02:20:36 +04:00
# include "xfs_dir.h"
# include "xfs_dir2.h"
# include "xfs_trans.h"
# include "xfs_dmapi.h"
# include "xfs_mount.h"
# include "xfs_bmap_btree.h"
# include "xfs_alloc_btree.h"
# include "xfs_ialloc_btree.h"
# include "xfs_dir_sf.h"
# include "xfs_dir2_sf.h"
2005-11-02 06:38:42 +03:00
# include "xfs_attr_sf.h"
2005-04-17 02:20:36 +04:00
# include "xfs_dinode.h"
# include "xfs_inode.h"
2005-11-02 06:38:42 +03:00
# include "xfs_alloc.h"
# include "xfs_btree.h"
2005-04-17 02:20:36 +04:00
# include "xfs_error.h"
# include "xfs_rw.h"
# include "xfs_iomap.h"
# include <linux/mpage.h>
2006-01-11 12:48:14 +03:00
# include <linux/pagevec.h>
2005-04-17 02:20:36 +04:00
# include <linux/writeback.h>
2006-03-14 05:26:27 +03:00
STATIC void
xfs_count_page_state (
struct page * page ,
int * delalloc ,
int * unmapped ,
int * unwritten )
{
struct buffer_head * bh , * head ;
* delalloc = * unmapped = * unwritten = 0 ;
bh = head = page_buffers ( page ) ;
do {
if ( buffer_uptodate ( bh ) & & ! buffer_mapped ( bh ) )
( * unmapped ) = 1 ;
else if ( buffer_unwritten ( bh ) & & ! buffer_delay ( bh ) )
clear_buffer_unwritten ( bh ) ;
else if ( buffer_unwritten ( bh ) )
( * unwritten ) = 1 ;
else if ( buffer_delay ( bh ) )
( * delalloc ) = 1 ;
} while ( ( bh = bh - > b_this_page ) ! = head ) ;
}
2005-04-17 02:20:36 +04:00
# if defined(XFS_RW_TRACE)
void
xfs_page_trace (
int tag ,
struct inode * inode ,
struct page * page ,
int mask )
{
xfs_inode_t * ip ;
vnode_t * vp = LINVFS_GET_VP ( inode ) ;
loff_t isize = i_size_read ( inode ) ;
2006-01-11 07:40:13 +03:00
loff_t offset = page_offset ( page ) ;
2005-04-17 02:20:36 +04:00
int delalloc = - 1 , unmapped = - 1 , unwritten = - 1 ;
if ( page_has_buffers ( page ) )
xfs_count_page_state ( page , & delalloc , & unmapped , & unwritten ) ;
2006-01-11 12:58:44 +03:00
ip = xfs_vtoi ( vp ) ;
2005-04-17 02:20:36 +04:00
if ( ! ip - > i_rwtrace )
return ;
ktrace_enter ( ip - > i_rwtrace ,
( void * ) ( ( unsigned long ) tag ) ,
( void * ) ip ,
( void * ) inode ,
( void * ) page ,
( void * ) ( ( unsigned long ) mask ) ,
( void * ) ( ( unsigned long ) ( ( ip - > i_d . di_size > > 32 ) & 0xffffffff ) ) ,
( void * ) ( ( unsigned long ) ( ip - > i_d . di_size & 0xffffffff ) ) ,
( void * ) ( ( unsigned long ) ( ( isize > > 32 ) & 0xffffffff ) ) ,
( void * ) ( ( unsigned long ) ( isize & 0xffffffff ) ) ,
( void * ) ( ( unsigned long ) ( ( offset > > 32 ) & 0xffffffff ) ) ,
( void * ) ( ( unsigned long ) ( offset & 0xffffffff ) ) ,
( void * ) ( ( unsigned long ) delalloc ) ,
( void * ) ( ( unsigned long ) unmapped ) ,
( void * ) ( ( unsigned long ) unwritten ) ,
( void * ) NULL ,
( void * ) NULL ) ;
}
# else
# define xfs_page_trace(tag, inode, page, mask)
# endif
2005-09-02 10:58:49 +04:00
/*
* Schedule IO completion handling on a xfsdatad if this was
* the final hold on this ioend .
*/
STATIC void
xfs_finish_ioend (
xfs_ioend_t * ioend )
{
if ( atomic_dec_and_test ( & ioend - > io_remaining ) )
queue_work ( xfsdatad_workqueue , & ioend - > io_work ) ;
}
2006-01-11 07:40:13 +03:00
/*
* We ' re now finished for good with this ioend structure .
* Update the page state via the associated buffer_heads ,
* release holds on the inode and bio , and finally free
* up memory . Do not use the ioend after this .
*/
2005-09-02 10:58:49 +04:00
STATIC void
xfs_destroy_ioend (
xfs_ioend_t * ioend )
{
2006-01-11 07:40:13 +03:00
struct buffer_head * bh , * next ;
for ( bh = ioend - > io_buffer_head ; bh ; bh = next ) {
next = bh - > b_private ;
bh - > b_end_io ( bh , ioend - > io_uptodate ) ;
}
2005-09-02 10:58:49 +04:00
vn_iowake ( ioend - > io_vnode ) ;
mempool_free ( ioend , xfs_ioend_pool ) ;
}
/*
2006-01-11 07:40:13 +03:00
* Buffered IO write completion for delayed allocate extents .
* TODO : Update ondisk isize now that we know the file data
* has been flushed ( i . e . the notorious " NULL file " problem ) .
*/
STATIC void
xfs_end_bio_delalloc (
void * data )
{
xfs_ioend_t * ioend = data ;
xfs_destroy_ioend ( ioend ) ;
}
/*
* Buffered IO write completion for regular , written extents .
*/
STATIC void
xfs_end_bio_written (
void * data )
{
xfs_ioend_t * ioend = data ;
xfs_destroy_ioend ( ioend ) ;
}
/*
* IO write completion for unwritten extents .
*
2005-09-02 10:58:49 +04:00
* Issue transactions to convert a buffer range from unwritten
2005-09-05 02:22:52 +04:00
* to written extents .
2005-09-02 10:58:49 +04:00
*/
STATIC void
xfs_end_bio_unwritten (
void * data )
{
xfs_ioend_t * ioend = data ;
vnode_t * vp = ioend - > io_vnode ;
xfs_off_t offset = ioend - > io_offset ;
size_t size = ioend - > io_size ;
int error ;
if ( ioend - > io_uptodate )
VOP_BMAP ( vp , offset , size , BMAPI_UNWRITTEN , NULL , NULL , error ) ;
xfs_destroy_ioend ( ioend ) ;
}
/*
* Allocate and initialise an IO completion structure .
* We need to track unwritten extent write completion here initially .
* We ' ll need to extend this for updating the ondisk inode size later
* ( vs . incore size ) .
*/
STATIC xfs_ioend_t *
xfs_alloc_ioend (
2006-01-11 07:40:13 +03:00
struct inode * inode ,
unsigned int type )
2005-09-02 10:58:49 +04:00
{
xfs_ioend_t * ioend ;
ioend = mempool_alloc ( xfs_ioend_pool , GFP_NOFS ) ;
/*
* Set the count to 1 initially , which will prevent an I / O
* completion callback from happening before we have started
* all the I / O from calling the completion routine too early .
*/
atomic_set ( & ioend - > io_remaining , 1 ) ;
ioend - > io_uptodate = 1 ; /* cleared if any I/O fails */
2006-01-11 07:40:13 +03:00
ioend - > io_list = NULL ;
ioend - > io_type = type ;
2005-09-02 10:58:49 +04:00
ioend - > io_vnode = LINVFS_GET_VP ( inode ) ;
2005-09-05 02:23:35 +04:00
ioend - > io_buffer_head = NULL ;
2006-01-11 07:40:13 +03:00
ioend - > io_buffer_tail = NULL ;
2005-09-02 10:58:49 +04:00
atomic_inc ( & ioend - > io_vnode - > v_iocount ) ;
ioend - > io_offset = 0 ;
ioend - > io_size = 0 ;
2006-01-11 07:40:13 +03:00
if ( type = = IOMAP_UNWRITTEN )
INIT_WORK ( & ioend - > io_work , xfs_end_bio_unwritten , ioend ) ;
else if ( type = = IOMAP_DELAY )
INIT_WORK ( & ioend - > io_work , xfs_end_bio_delalloc , ioend ) ;
else
INIT_WORK ( & ioend - > io_work , xfs_end_bio_written , ioend ) ;
2005-09-02 10:58:49 +04:00
return ioend ;
}
2005-04-17 02:20:36 +04:00
STATIC int
xfs_map_blocks (
struct inode * inode ,
loff_t offset ,
ssize_t count ,
xfs_iomap_t * mapp ,
int flags )
{
vnode_t * vp = LINVFS_GET_VP ( inode ) ;
int error , nmaps = 1 ;
VOP_BMAP ( vp , offset , count , flags , mapp , & nmaps , error ) ;
if ( ! error & & ( flags & ( BMAPI_WRITE | BMAPI_ALLOCATE ) ) )
VMODIFY ( vp ) ;
return - error ;
}
2006-01-11 12:48:33 +03:00
STATIC inline int
xfs_iomap_valid (
2005-04-17 02:20:36 +04:00
xfs_iomap_t * iomapp ,
2006-01-11 12:48:33 +03:00
loff_t offset )
2005-04-17 02:20:36 +04:00
{
2006-01-11 12:48:33 +03:00
return offset > = iomapp - > iomap_offset & &
offset < iomapp - > iomap_offset + iomapp - > iomap_bsize ;
2005-04-17 02:20:36 +04:00
}
2006-01-11 07:40:13 +03:00
/*
* BIO completion handler for buffered IO .
*/
STATIC int
xfs_end_bio (
struct bio * bio ,
unsigned int bytes_done ,
int error )
{
xfs_ioend_t * ioend = bio - > bi_private ;
if ( bio - > bi_size )
return 1 ;
ASSERT ( ioend ) ;
ASSERT ( atomic_read ( & bio - > bi_cnt ) > = 1 ) ;
/* Toss bio and pass work off to an xfsdatad thread */
if ( ! test_bit ( BIO_UPTODATE , & bio - > bi_flags ) )
ioend - > io_uptodate = 0 ;
bio - > bi_private = NULL ;
bio - > bi_end_io = NULL ;
bio_put ( bio ) ;
xfs_finish_ioend ( ioend ) ;
return 0 ;
}
STATIC void
xfs_submit_ioend_bio (
xfs_ioend_t * ioend ,
struct bio * bio )
{
atomic_inc ( & ioend - > io_remaining ) ;
bio - > bi_private = ioend ;
bio - > bi_end_io = xfs_end_bio ;
submit_bio ( WRITE , bio ) ;
ASSERT ( ! bio_flagged ( bio , BIO_EOPNOTSUPP ) ) ;
bio_put ( bio ) ;
}
STATIC struct bio *
xfs_alloc_ioend_bio (
struct buffer_head * bh )
{
struct bio * bio ;
int nvecs = bio_get_nr_vecs ( bh - > b_bdev ) ;
do {
bio = bio_alloc ( GFP_NOIO , nvecs ) ;
nvecs > > = 1 ;
} while ( ! bio ) ;
ASSERT ( bio - > bi_private = = NULL ) ;
bio - > bi_sector = bh - > b_blocknr * ( bh - > b_size > > 9 ) ;
bio - > bi_bdev = bh - > b_bdev ;
bio_get ( bio ) ;
return bio ;
}
STATIC void
xfs_start_buffer_writeback (
struct buffer_head * bh )
{
ASSERT ( buffer_mapped ( bh ) ) ;
ASSERT ( buffer_locked ( bh ) ) ;
ASSERT ( ! buffer_delay ( bh ) ) ;
ASSERT ( ! buffer_unwritten ( bh ) ) ;
mark_buffer_async_write ( bh ) ;
set_buffer_uptodate ( bh ) ;
clear_buffer_dirty ( bh ) ;
}
STATIC void
xfs_start_page_writeback (
struct page * page ,
struct writeback_control * wbc ,
int clear_dirty ,
int buffers )
{
ASSERT ( PageLocked ( page ) ) ;
ASSERT ( ! PageWriteback ( page ) ) ;
set_page_writeback ( page ) ;
if ( clear_dirty )
clear_page_dirty ( page ) ;
unlock_page ( page ) ;
if ( ! buffers ) {
end_page_writeback ( page ) ;
wbc - > pages_skipped + + ; /* We didn't write this page */
}
}
static inline int bio_add_buffer ( struct bio * bio , struct buffer_head * bh )
{
return bio_add_page ( bio , bh - > b_page , bh - > b_size , bh_offset ( bh ) ) ;
}
/*
2006-01-18 05:38:12 +03:00
* Submit all of the bios for all of the ioends we have saved up , covering the
* initial writepage page and also any probed pages .
*
* Because we may have multiple ioends spanning a page , we need to start
* writeback on all the buffers before we submit them for I / O . If we mark the
* buffers as we got , then we can end up with a page that only has buffers
* marked async write and I / O complete on can occur before we mark the other
* buffers async write .
*
* The end result of this is that we trip a bug in end_page_writeback ( ) because
* we call it twice for the one page as the code in end_buffer_async_write ( )
* assumes that all buffers on the page are started at the same time .
*
* The fix is two passes across the ioend list - one to start writeback on the
* bufferheads , and then the second one submit them for I / O .
2006-01-11 07:40:13 +03:00
*/
STATIC void
xfs_submit_ioend (
xfs_ioend_t * ioend )
{
2006-01-18 05:38:12 +03:00
xfs_ioend_t * head = ioend ;
2006-01-11 07:40:13 +03:00
xfs_ioend_t * next ;
struct buffer_head * bh ;
struct bio * bio ;
sector_t lastblock = 0 ;
2006-01-18 05:38:12 +03:00
/* Pass 1 - start writeback */
do {
next = ioend - > io_list ;
for ( bh = ioend - > io_buffer_head ; bh ; bh = bh - > b_private ) {
xfs_start_buffer_writeback ( bh ) ;
}
} while ( ( ioend = next ) ! = NULL ) ;
/* Pass 2 - submit I/O */
ioend = head ;
2006-01-11 07:40:13 +03:00
do {
next = ioend - > io_list ;
bio = NULL ;
for ( bh = ioend - > io_buffer_head ; bh ; bh = bh - > b_private ) {
if ( ! bio ) {
retry :
bio = xfs_alloc_ioend_bio ( bh ) ;
} else if ( bh - > b_blocknr ! = lastblock + 1 ) {
xfs_submit_ioend_bio ( ioend , bio ) ;
goto retry ;
}
if ( bio_add_buffer ( bio , bh ) ! = bh - > b_size ) {
xfs_submit_ioend_bio ( ioend , bio ) ;
goto retry ;
}
lastblock = bh - > b_blocknr ;
}
if ( bio )
xfs_submit_ioend_bio ( ioend , bio ) ;
xfs_finish_ioend ( ioend ) ;
} while ( ( ioend = next ) ! = NULL ) ;
}
/*
* Cancel submission of all buffer_heads so far in this endio .
* Toss the endio too . Only ever called for the initial page
* in a writepage request , so only ever one page .
*/
STATIC void
xfs_cancel_ioend (
xfs_ioend_t * ioend )
{
xfs_ioend_t * next ;
struct buffer_head * bh , * next_bh ;
do {
next = ioend - > io_list ;
bh = ioend - > io_buffer_head ;
do {
next_bh = bh - > b_private ;
clear_buffer_async_write ( bh ) ;
unlock_buffer ( bh ) ;
} while ( ( bh = next_bh ) ! = NULL ) ;
vn_iowake ( ioend - > io_vnode ) ;
mempool_free ( ioend , xfs_ioend_pool ) ;
} while ( ( ioend = next ) ! = NULL ) ;
}
/*
* Test to see if we ' ve been building up a completion structure for
* earlier buffers - - if so , we try to append to this ioend if we
* can , otherwise we finish off any current ioend and start another .
* Return true if we ' ve finished the given ioend .
*/
STATIC void
xfs_add_to_ioend (
struct inode * inode ,
struct buffer_head * bh ,
2006-01-11 12:49:16 +03:00
xfs_off_t offset ,
2006-01-11 07:40:13 +03:00
unsigned int type ,
xfs_ioend_t * * result ,
int need_ioend )
{
xfs_ioend_t * ioend = * result ;
if ( ! ioend | | need_ioend | | type ! = ioend - > io_type ) {
xfs_ioend_t * previous = * result ;
ioend = xfs_alloc_ioend ( inode , type ) ;
ioend - > io_offset = offset ;
ioend - > io_buffer_head = bh ;
ioend - > io_buffer_tail = bh ;
if ( previous )
previous - > io_list = ioend ;
* result = ioend ;
} else {
ioend - > io_buffer_tail - > b_private = bh ;
ioend - > io_buffer_tail = bh ;
}
bh - > b_private = NULL ;
ioend - > io_size + = bh - > b_size ;
}
2006-03-14 05:26:43 +03:00
STATIC void
xfs_map_buffer (
struct buffer_head * bh ,
xfs_iomap_t * mp ,
xfs_off_t offset ,
uint block_bits )
{
sector_t bn ;
ASSERT ( mp - > iomap_bn ! = IOMAP_DADDR_NULL ) ;
bn = ( mp - > iomap_bn > > ( block_bits - BBSHIFT ) ) +
( ( offset - mp - > iomap_offset ) > > block_bits ) ;
ASSERT ( bn | | ( mp - > iomap_flags & IOMAP_REALTIME ) ) ;
bh - > b_blocknr = bn ;
set_buffer_mapped ( bh ) ;
}
2005-04-17 02:20:36 +04:00
STATIC void
xfs_map_at_offset (
struct buffer_head * bh ,
2006-01-11 12:48:33 +03:00
loff_t offset ,
2005-04-17 02:20:36 +04:00
int block_bits ,
2006-01-11 12:48:33 +03:00
xfs_iomap_t * iomapp )
2005-04-17 02:20:36 +04:00
{
ASSERT ( ! ( iomapp - > iomap_flags & IOMAP_HOLE ) ) ;
ASSERT ( ! ( iomapp - > iomap_flags & IOMAP_DELAY ) ) ;
lock_buffer ( bh ) ;
2006-03-14 05:26:43 +03:00
xfs_map_buffer ( bh , iomapp , offset , block_bits ) ;
2006-01-11 07:39:08 +03:00
bh - > b_bdev = iomapp - > iomap_target - > bt_bdev ;
2005-04-17 02:20:36 +04:00
set_buffer_mapped ( bh ) ;
clear_buffer_delay ( bh ) ;
2006-01-11 07:40:13 +03:00
clear_buffer_unwritten ( bh ) ;
2005-04-17 02:20:36 +04:00
}
/*
2006-01-11 12:49:28 +03:00
* Look for a page at index that is suitable for clustering .
2005-04-17 02:20:36 +04:00
*/
STATIC unsigned int
2006-01-11 12:49:28 +03:00
xfs_probe_page (
2006-01-11 12:48:14 +03:00
struct page * page ,
2006-01-11 12:49:28 +03:00
unsigned int pg_offset ,
int mapped )
2005-04-17 02:20:36 +04:00
{
int ret = 0 ;
if ( PageWriteback ( page ) )
2006-01-11 12:48:14 +03:00
return 0 ;
2005-04-17 02:20:36 +04:00
if ( page - > mapping & & PageDirty ( page ) ) {
if ( page_has_buffers ( page ) ) {
struct buffer_head * bh , * head ;
bh = head = page_buffers ( page ) ;
do {
2006-01-11 12:49:28 +03:00
if ( ! buffer_uptodate ( bh ) )
break ;
if ( mapped ! = buffer_mapped ( bh ) )
2005-04-17 02:20:36 +04:00
break ;
ret + = bh - > b_size ;
if ( ret > = pg_offset )
break ;
} while ( ( bh = bh - > b_this_page ) ! = head ) ;
} else
2006-01-11 12:49:28 +03:00
ret = mapped ? 0 : PAGE_CACHE_SIZE ;
2005-04-17 02:20:36 +04:00
}
return ret ;
}
2006-01-11 07:40:13 +03:00
STATIC size_t
2006-01-11 12:49:28 +03:00
xfs_probe_cluster (
2005-04-17 02:20:36 +04:00
struct inode * inode ,
struct page * startpage ,
struct buffer_head * bh ,
2006-01-11 12:49:28 +03:00
struct buffer_head * head ,
int mapped )
2005-04-17 02:20:36 +04:00
{
2006-01-11 12:48:14 +03:00
struct pagevec pvec ;
2005-04-17 02:20:36 +04:00
pgoff_t tindex , tlast , tloff ;
2006-01-11 12:48:14 +03:00
size_t total = 0 ;
int done = 0 , i ;
2005-04-17 02:20:36 +04:00
/* First sum forwards in this page */
do {
2006-02-28 04:30:30 +03:00
if ( ! buffer_uptodate ( bh ) | | ( mapped ! = buffer_mapped ( bh ) ) )
2006-01-11 12:48:14 +03:00
return total ;
2005-04-17 02:20:36 +04:00
total + = bh - > b_size ;
} while ( ( bh = bh - > b_this_page ) ! = head ) ;
2006-01-11 12:48:14 +03:00
/* if we reached the end of the page, sum forwards in following pages */
tlast = i_size_read ( inode ) > > PAGE_CACHE_SHIFT ;
tindex = startpage - > index + 1 ;
/* Prune this back to avoid pathological behavior */
tloff = min ( tlast , startpage - > index + 64 ) ;
pagevec_init ( & pvec , 0 ) ;
while ( ! done & & tindex < = tloff ) {
unsigned len = min_t ( pgoff_t , PAGEVEC_SIZE , tlast - tindex + 1 ) ;
if ( ! pagevec_lookup ( & pvec , inode - > i_mapping , tindex , len ) )
break ;
for ( i = 0 ; i < pagevec_count ( & pvec ) ; i + + ) {
struct page * page = pvec . pages [ i ] ;
size_t pg_offset , len = 0 ;
if ( tindex = = tlast ) {
pg_offset =
i_size_read ( inode ) & ( PAGE_CACHE_SIZE - 1 ) ;
2006-01-11 12:48:33 +03:00
if ( ! pg_offset ) {
done = 1 ;
2006-01-11 12:48:14 +03:00
break ;
2006-01-11 12:48:33 +03:00
}
2006-01-11 12:48:14 +03:00
} else
pg_offset = PAGE_CACHE_SIZE ;
if ( page - > index = = tindex & & ! TestSetPageLocked ( page ) ) {
2006-01-11 12:49:28 +03:00
len = xfs_probe_page ( page , pg_offset , mapped ) ;
2006-01-11 12:48:14 +03:00
unlock_page ( page ) ;
}
if ( ! len ) {
done = 1 ;
break ;
}
2005-04-17 02:20:36 +04:00
total + = len ;
2006-01-11 12:48:33 +03:00
tindex + + ;
2005-04-17 02:20:36 +04:00
}
2006-01-11 12:48:14 +03:00
pagevec_release ( & pvec ) ;
cond_resched ( ) ;
2005-04-17 02:20:36 +04:00
}
2006-01-11 12:48:14 +03:00
2005-04-17 02:20:36 +04:00
return total ;
}
/*
2006-01-11 12:48:14 +03:00
* Test if a given page is suitable for writing as part of an unwritten
* or delayed allocate extent .
2005-04-17 02:20:36 +04:00
*/
2006-01-11 12:48:14 +03:00
STATIC int
xfs_is_delayed_page (
struct page * page ,
2006-01-11 07:40:13 +03:00
unsigned int type )
2005-04-17 02:20:36 +04:00
{
if ( PageWriteback ( page ) )
2006-01-11 12:48:14 +03:00
return 0 ;
2005-04-17 02:20:36 +04:00
if ( page - > mapping & & page_has_buffers ( page ) ) {
struct buffer_head * bh , * head ;
int acceptable = 0 ;
bh = head = page_buffers ( page ) ;
do {
2006-01-11 07:40:13 +03:00
if ( buffer_unwritten ( bh ) )
acceptable = ( type = = IOMAP_UNWRITTEN ) ;
else if ( buffer_delay ( bh ) )
acceptable = ( type = = IOMAP_DELAY ) ;
2006-01-11 12:49:28 +03:00
else if ( buffer_mapped ( bh ) )
acceptable = ( type = = 0 ) ;
2006-01-11 07:40:13 +03:00
else
2005-04-17 02:20:36 +04:00
break ;
} while ( ( bh = bh - > b_this_page ) ! = head ) ;
if ( acceptable )
2006-01-11 12:48:14 +03:00
return 1 ;
2005-04-17 02:20:36 +04:00
}
2006-01-11 12:48:14 +03:00
return 0 ;
2005-04-17 02:20:36 +04:00
}
/*
* Allocate & map buffers for page given the extent map . Write it out .
* except for the original page of a writepage , this is called on
* delalloc / unwritten pages only , for the original page it is possible
* that the page has no mapping at all .
*/
2006-01-11 07:40:13 +03:00
STATIC int
2005-04-17 02:20:36 +04:00
xfs_convert_page (
struct inode * inode ,
struct page * page ,
2006-01-11 12:48:14 +03:00
loff_t tindex ,
2006-01-11 12:48:33 +03:00
xfs_iomap_t * mp ,
2006-01-11 07:40:13 +03:00
xfs_ioend_t * * ioendp ,
2005-04-17 02:20:36 +04:00
struct writeback_control * wbc ,
int startio ,
int all_bh )
{
2006-01-11 07:40:13 +03:00
struct buffer_head * bh , * head ;
2006-01-11 12:48:47 +03:00
xfs_off_t end_offset ;
unsigned long p_offset ;
2006-01-11 07:40:13 +03:00
unsigned int type ;
2005-04-17 02:20:36 +04:00
int bbits = inode - > i_blkbits ;
2005-05-06 00:33:20 +04:00
int len , page_dirty ;
2006-01-11 07:40:13 +03:00
int count = 0 , done = 0 , uptodate = 1 ;
2006-01-11 12:48:47 +03:00
xfs_off_t offset = page_offset ( page ) ;
2005-04-17 02:20:36 +04:00
2006-01-11 12:48:14 +03:00
if ( page - > index ! = tindex )
goto fail ;
if ( TestSetPageLocked ( page ) )
goto fail ;
if ( PageWriteback ( page ) )
goto fail_unlock_page ;
if ( page - > mapping ! = inode - > i_mapping )
goto fail_unlock_page ;
if ( ! xfs_is_delayed_page ( page , ( * ioendp ) - > io_type ) )
goto fail_unlock_page ;
2005-05-06 00:33:20 +04:00
/*
* page_dirty is initially a count of buffers on the page before
* EOF and is decrememted as we move each into a cleanable state .
2006-01-11 12:48:47 +03:00
*
* Derivation :
*
* End offset is the highest offset that this page should represent .
* If we are on the last page , ( end_offset & ( PAGE_CACHE_SIZE - 1 ) )
* will evaluate non - zero and be less than PAGE_CACHE_SIZE and
* hence give us the correct page_dirty count . On any other page ,
* it will be zero and in that case we need page_dirty to be the
* count of buffers on the page .
2005-05-06 00:33:20 +04:00
*/
2006-01-11 12:48:47 +03:00
end_offset = min_t ( unsigned long long ,
( xfs_off_t ) ( page - > index + 1 ) < < PAGE_CACHE_SHIFT ,
i_size_read ( inode ) ) ;
2005-05-06 00:33:20 +04:00
len = 1 < < inode - > i_blkbits ;
2006-01-11 12:48:47 +03:00
p_offset = min_t ( unsigned long , end_offset & ( PAGE_CACHE_SIZE - 1 ) ,
PAGE_CACHE_SIZE ) ;
p_offset = p_offset ? roundup ( p_offset , len ) : PAGE_CACHE_SIZE ;
page_dirty = p_offset / len ;
2005-05-06 00:33:20 +04:00
2005-04-17 02:20:36 +04:00
bh = head = page_buffers ( page ) ;
do {
2006-01-11 12:48:47 +03:00
if ( offset > = end_offset )
2005-04-17 02:20:36 +04:00
break ;
2006-01-11 07:40:13 +03:00
if ( ! buffer_uptodate ( bh ) )
uptodate = 0 ;
if ( ! ( PageUptodate ( page ) | | buffer_uptodate ( bh ) ) ) {
done = 1 ;
2005-04-17 02:20:36 +04:00
continue ;
2006-01-11 07:40:13 +03:00
}
2006-01-11 12:48:47 +03:00
if ( buffer_unwritten ( bh ) | | buffer_delay ( bh ) ) {
if ( buffer_unwritten ( bh ) )
type = IOMAP_UNWRITTEN ;
else
type = IOMAP_DELAY ;
if ( ! xfs_iomap_valid ( mp , offset ) ) {
2006-01-11 07:40:13 +03:00
done = 1 ;
2006-01-11 12:48:47 +03:00
continue ;
}
ASSERT ( ! ( mp - > iomap_flags & IOMAP_HOLE ) ) ;
ASSERT ( ! ( mp - > iomap_flags & IOMAP_DELAY ) ) ;
xfs_map_at_offset ( bh , offset , bbits , mp ) ;
if ( startio ) {
2006-01-11 12:49:16 +03:00
xfs_add_to_ioend ( inode , bh , offset ,
2006-01-11 12:48:47 +03:00
type , ioendp , done ) ;
} else {
set_buffer_dirty ( bh ) ;
unlock_buffer ( bh ) ;
mark_buffer_dirty ( bh ) ;
}
page_dirty - - ;
count + + ;
} else {
type = 0 ;
if ( buffer_mapped ( bh ) & & all_bh & & startio ) {
2005-04-17 02:20:36 +04:00
lock_buffer ( bh ) ;
2006-01-11 12:49:16 +03:00
xfs_add_to_ioend ( inode , bh , offset ,
2006-01-11 07:40:13 +03:00
type , ioendp , done ) ;
count + + ;
2005-05-06 00:33:20 +04:00
page_dirty - - ;
2006-01-11 12:48:47 +03:00
} else {
done = 1 ;
2005-04-17 02:20:36 +04:00
}
}
2006-01-11 12:49:16 +03:00
} while ( offset + = len , ( bh = bh - > b_this_page ) ! = head ) ;
2005-04-17 02:20:36 +04:00
2006-01-11 07:40:13 +03:00
if ( uptodate & & bh = = head )
SetPageUptodate ( page ) ;
if ( startio ) {
2006-01-11 12:49:42 +03:00
if ( count ) {
struct backing_dev_info * bdi ;
bdi = inode - > i_mapping - > backing_dev_info ;
2006-02-07 12:27:24 +03:00
wbc - > nr_to_write - - ;
2006-01-11 12:49:42 +03:00
if ( bdi_write_congested ( bdi ) ) {
wbc - > encountered_congestion = 1 ;
done = 1 ;
2006-02-07 12:27:24 +03:00
} else if ( wbc - > nr_to_write < = 0 ) {
2006-01-11 12:49:42 +03:00
done = 1 ;
}
}
2006-01-11 07:40:13 +03:00
xfs_start_page_writeback ( page , wbc , ! page_dirty , count ) ;
2005-04-17 02:20:36 +04:00
}
2006-01-11 07:40:13 +03:00
return done ;
2006-01-11 12:48:14 +03:00
fail_unlock_page :
unlock_page ( page ) ;
fail :
return 1 ;
2005-04-17 02:20:36 +04:00
}
/*
* Convert & write out a cluster of pages in the same extent as defined
* by mp and following the start page .
*/
STATIC void
xfs_cluster_write (
struct inode * inode ,
pgoff_t tindex ,
xfs_iomap_t * iomapp ,
2006-01-11 07:40:13 +03:00
xfs_ioend_t * * ioendp ,
2005-04-17 02:20:36 +04:00
struct writeback_control * wbc ,
int startio ,
int all_bh ,
pgoff_t tlast )
{
2006-01-11 12:48:14 +03:00
struct pagevec pvec ;
int done = 0 , i ;
2005-04-17 02:20:36 +04:00
2006-01-11 12:48:14 +03:00
pagevec_init ( & pvec , 0 ) ;
while ( ! done & & tindex < = tlast ) {
unsigned len = min_t ( pgoff_t , PAGEVEC_SIZE , tlast - tindex + 1 ) ;
if ( ! pagevec_lookup ( & pvec , inode - > i_mapping , tindex , len ) )
2005-04-17 02:20:36 +04:00
break ;
2006-01-11 12:48:14 +03:00
for ( i = 0 ; i < pagevec_count ( & pvec ) ; i + + ) {
done = xfs_convert_page ( inode , pvec . pages [ i ] , tindex + + ,
iomapp , ioendp , wbc , startio , all_bh ) ;
if ( done )
break ;
}
pagevec_release ( & pvec ) ;
cond_resched ( ) ;
2005-04-17 02:20:36 +04:00
}
}
/*
* Calling this without startio set means we are being asked to make a dirty
* page ready for freeing it ' s buffers . When called with startio set then
* we are coming from writepage .
*
* When called with startio set it is important that we write the WHOLE
* page if possible .
* The bh - > b_state ' s cannot know if any of the blocks or which block for
* that matter are dirty due to mmap writes , and therefore bh uptodate is
* only vaild if the page itself isn ' t completely uptodate . Some layers
* may clear the page dirty flag prior to calling write page , under the
* assumption the entire page will be written out ; by not writing out the
* whole page the page can be reused before all valid dirty data is
* written out . Note : in the case of a page that has been dirty ' d by
* mapwrite and but partially setup by block_prepare_write the
* bh - > b_states ' s will not agree and only ones setup by BPW / BCW will have
* valid state , thus the whole page must be written out thing .
*/
STATIC int
xfs_page_state_convert (
struct inode * inode ,
struct page * page ,
struct writeback_control * wbc ,
int startio ,
int unmapped ) /* also implies page uptodate */
{
2006-01-11 07:40:13 +03:00
struct buffer_head * bh , * head ;
2006-01-11 12:48:33 +03:00
xfs_iomap_t iomap ;
2006-01-11 07:40:13 +03:00
xfs_ioend_t * ioend = NULL , * iohead = NULL ;
2005-04-17 02:20:36 +04:00
loff_t offset ;
unsigned long p_offset = 0 ;
2006-01-11 07:40:13 +03:00
unsigned int type ;
2005-04-17 02:20:36 +04:00
__uint64_t end_offset ;
pgoff_t end_index , last_index , tlast ;
2006-01-11 12:49:02 +03:00
ssize_t size , len ;
int flags , err , iomap_valid = 0 , uptodate = 1 ;
2006-01-11 07:40:13 +03:00
int page_dirty , count = 0 , trylock_flag = 0 ;
2006-01-11 12:49:28 +03:00
int all_bh = unmapped ;
2005-04-17 02:20:36 +04:00
2005-05-06 00:31:34 +04:00
/* wait for other IO threads? */
2006-01-11 12:49:42 +03:00
if ( startio & & ( wbc - > sync_mode = = WB_SYNC_NONE & & wbc - > nonblocking ) )
2006-01-11 07:40:13 +03:00
trylock_flag | = BMAPI_TRYLOCK ;
2005-05-06 00:31:34 +04:00
2005-04-17 02:20:36 +04:00
/* Is this page beyond the end of the file? */
offset = i_size_read ( inode ) ;
end_index = offset > > PAGE_CACHE_SHIFT ;
last_index = ( offset - 1 ) > > PAGE_CACHE_SHIFT ;
if ( page - > index > = end_index ) {
if ( ( page - > index > = end_index + 1 ) | |
! ( i_size_read ( inode ) & ( PAGE_CACHE_SIZE - 1 ) ) ) {
2005-11-02 07:14:09 +03:00
if ( startio )
unlock_page ( page ) ;
return 0 ;
2005-04-17 02:20:36 +04:00
}
}
/*
2005-05-06 00:33:20 +04:00
* page_dirty is initially a count of buffers on the page before
* EOF and is decrememted as we move each into a cleanable state .
2006-01-11 07:40:13 +03:00
*
* Derivation :
*
* End offset is the highest offset that this page should represent .
* If we are on the last page , ( end_offset & ( PAGE_CACHE_SIZE - 1 ) )
* will evaluate non - zero and be less than PAGE_CACHE_SIZE and
* hence give us the correct page_dirty count . On any other page ,
* it will be zero and in that case we need page_dirty to be the
* count of buffers on the page .
*/
end_offset = min_t ( unsigned long long ,
( xfs_off_t ) ( page - > index + 1 ) < < PAGE_CACHE_SHIFT , offset ) ;
2005-05-06 00:33:20 +04:00
len = 1 < < inode - > i_blkbits ;
2006-01-11 07:40:13 +03:00
p_offset = min_t ( unsigned long , end_offset & ( PAGE_CACHE_SIZE - 1 ) ,
PAGE_CACHE_SIZE ) ;
p_offset = p_offset ? roundup ( p_offset , len ) : PAGE_CACHE_SIZE ;
2005-05-06 00:33:20 +04:00
page_dirty = p_offset / len ;
bh = head = page_buffers ( page ) ;
2006-01-11 07:40:13 +03:00
offset = page_offset ( page ) ;
2006-01-11 12:49:28 +03:00
flags = - 1 ;
type = 0 ;
2006-01-11 07:40:13 +03:00
/* TODO: cleanup count and page_dirty */
2005-04-17 02:20:36 +04:00
do {
if ( offset > = end_offset )
break ;
if ( ! buffer_uptodate ( bh ) )
uptodate = 0 ;
2006-01-11 07:40:13 +03:00
if ( ! ( PageUptodate ( page ) | | buffer_uptodate ( bh ) ) & & ! startio ) {
2006-01-11 12:48:33 +03:00
/*
* the iomap is actually still valid , but the ioend
* isn ' t . shouldn ' t happen too often .
*/
iomap_valid = 0 ;
2005-04-17 02:20:36 +04:00
continue ;
2006-01-11 07:40:13 +03:00
}
2005-04-17 02:20:36 +04:00
2006-01-11 12:48:33 +03:00
if ( iomap_valid )
iomap_valid = xfs_iomap_valid ( & iomap , offset ) ;
2005-04-17 02:20:36 +04:00
/*
* First case , map an unwritten extent and prepare for
* extent state conversion transaction on completion .
2006-01-11 07:40:13 +03:00
*
2005-04-17 02:20:36 +04:00
* Second case , allocate space for a delalloc buffer .
* We can return EAGAIN here in the release page case .
2006-01-11 12:49:02 +03:00
*
* Third case , an unmapped buffer was found , and we are
* in a path where we need to write the whole page out .
*/
if ( buffer_unwritten ( bh ) | | buffer_delay ( bh ) | |
( ( buffer_uptodate ( bh ) | | PageUptodate ( page ) ) & &
! buffer_mapped ( bh ) & & ( unmapped | | startio ) ) ) {
2006-01-11 12:49:28 +03:00
/*
* Make sure we don ' t use a read - only iomap
*/
if ( flags = = BMAPI_READ )
iomap_valid = 0 ;
2006-01-11 07:40:13 +03:00
if ( buffer_unwritten ( bh ) ) {
type = IOMAP_UNWRITTEN ;
flags = BMAPI_WRITE | BMAPI_IGNSTATE ;
2006-01-11 12:49:02 +03:00
} else if ( buffer_delay ( bh ) ) {
2006-01-11 07:40:13 +03:00
type = IOMAP_DELAY ;
flags = BMAPI_ALLOCATE ;
if ( ! startio )
flags | = trylock_flag ;
2006-01-11 12:49:02 +03:00
} else {
2006-01-11 12:49:28 +03:00
type = IOMAP_NEW ;
2006-01-11 12:49:02 +03:00
flags = BMAPI_WRITE | BMAPI_MMAP ;
2006-01-11 07:40:13 +03:00
}
2006-01-11 12:48:33 +03:00
if ( ! iomap_valid ) {
2006-01-11 12:49:28 +03:00
if ( type = = IOMAP_NEW ) {
size = xfs_probe_cluster ( inode ,
page , bh , head , 0 ) ;
2006-01-11 12:49:02 +03:00
} else {
size = len ;
}
err = xfs_map_blocks ( inode , offset , size ,
& iomap , flags ) ;
2006-01-11 07:40:13 +03:00
if ( err )
2005-04-17 02:20:36 +04:00
goto error ;
2006-01-11 12:48:33 +03:00
iomap_valid = xfs_iomap_valid ( & iomap , offset ) ;
2005-04-17 02:20:36 +04:00
}
2006-01-11 12:48:33 +03:00
if ( iomap_valid ) {
xfs_map_at_offset ( bh , offset ,
inode - > i_blkbits , & iomap ) ;
2005-04-17 02:20:36 +04:00
if ( startio ) {
2006-01-11 12:49:16 +03:00
xfs_add_to_ioend ( inode , bh , offset ,
2006-01-11 12:48:33 +03:00
type , & ioend ,
! iomap_valid ) ;
2005-04-17 02:20:36 +04:00
} else {
set_buffer_dirty ( bh ) ;
unlock_buffer ( bh ) ;
mark_buffer_dirty ( bh ) ;
}
page_dirty - - ;
2006-01-11 07:40:13 +03:00
count + + ;
2005-04-17 02:20:36 +04:00
}
2006-01-11 12:49:02 +03:00
} else if ( buffer_uptodate ( bh ) & & startio ) {
2006-01-11 12:49:28 +03:00
/*
* we got here because the buffer is already mapped .
* That means it must already have extents allocated
* underneath it . Map the extent by reading it .
*/
if ( ! iomap_valid | | type ! = 0 ) {
flags = BMAPI_READ ;
size = xfs_probe_cluster ( inode , page , bh ,
head , 1 ) ;
err = xfs_map_blocks ( inode , offset , size ,
& iomap , flags ) ;
if ( err )
goto error ;
iomap_valid = xfs_iomap_valid ( & iomap , offset ) ;
}
2006-01-11 12:49:02 +03:00
2006-01-11 12:49:28 +03:00
type = 0 ;
2006-01-11 12:49:02 +03:00
if ( ! test_and_set_bit ( BH_Lock , & bh - > b_state ) ) {
ASSERT ( buffer_mapped ( bh ) ) ;
2006-01-11 12:49:28 +03:00
if ( iomap_valid )
all_bh = 1 ;
2006-01-11 12:49:16 +03:00
xfs_add_to_ioend ( inode , bh , offset , type ,
2006-01-11 12:49:02 +03:00
& ioend , ! iomap_valid ) ;
page_dirty - - ;
count + + ;
2006-01-11 07:40:13 +03:00
} else {
2006-01-11 12:48:33 +03:00
iomap_valid = 0 ;
2005-04-17 02:20:36 +04:00
}
2006-01-11 12:49:02 +03:00
} else if ( ( buffer_uptodate ( bh ) | | PageUptodate ( page ) ) & &
( unmapped | | startio ) ) {
iomap_valid = 0 ;
2005-04-17 02:20:36 +04:00
}
2006-01-11 07:40:13 +03:00
if ( ! iohead )
iohead = ioend ;
} while ( offset + = len , ( ( bh = bh - > b_this_page ) ! = head ) ) ;
2005-04-17 02:20:36 +04:00
if ( uptodate & & bh = = head )
SetPageUptodate ( page ) ;
2006-01-11 07:40:13 +03:00
if ( startio )
xfs_start_page_writeback ( page , wbc , 1 , count ) ;
2005-04-17 02:20:36 +04:00
2006-01-11 12:48:33 +03:00
if ( ioend & & iomap_valid ) {
offset = ( iomap . iomap_offset + iomap . iomap_bsize - 1 ) > >
2005-04-17 02:20:36 +04:00
PAGE_CACHE_SHIFT ;
2005-05-06 00:33:01 +04:00
tlast = min_t ( pgoff_t , offset , last_index ) ;
2006-01-11 12:48:33 +03:00
xfs_cluster_write ( inode , page - > index + 1 , & iomap , & ioend ,
2006-01-11 12:49:28 +03:00
wbc , startio , all_bh , tlast ) ;
2005-04-17 02:20:36 +04:00
}
2006-01-11 07:40:13 +03:00
if ( iohead )
xfs_submit_ioend ( iohead ) ;
2005-04-17 02:20:36 +04:00
return page_dirty ;
error :
2006-01-11 07:40:13 +03:00
if ( iohead )
xfs_cancel_ioend ( iohead ) ;
2005-04-17 02:20:36 +04:00
/*
* If it ' s delalloc and we have nowhere to put it ,
* throw it away , unless the lower layers told
* us to try again .
*/
if ( err ! = - EAGAIN ) {
2006-01-11 07:40:13 +03:00
if ( ! unmapped )
2005-04-17 02:20:36 +04:00
block_invalidatepage ( page , 0 ) ;
ClearPageUptodate ( page ) ;
}
return err ;
}
2006-03-14 05:26:27 +03:00
/*
* writepage : Called from one of two places :
*
* 1. we are flushing a delalloc buffer head .
*
* 2. we are writing out a dirty page . Typically the page dirty
* state is cleared before we get here . In this case is it
* conceivable we have no buffer heads .
*
* For delalloc space on the page we need to allocate space and
* flush it . For unmapped buffer heads on the page we should
* allocate space if the page is uptodate . For any other dirty
* buffer heads on the page we should flush them .
*
* If we detect that a transaction would be required to flush
* the page , we have to check the process flags first , if we
* are already in a transaction or disk I / O during allocations
* is off , we need to fail the writepage and redirty the page .
*/
STATIC int
2006-03-14 05:54:26 +03:00
xfs_vm_writepage (
2006-03-14 05:26:27 +03:00
struct page * page ,
struct writeback_control * wbc )
{
int error ;
int need_trans ;
int delalloc , unmapped , unwritten ;
struct inode * inode = page - > mapping - > host ;
xfs_page_trace ( XFS_WRITEPAGE_ENTER , inode , page , 0 ) ;
/*
* We need a transaction if :
* 1. There are delalloc buffers on the page
* 2. The page is uptodate and we have unmapped buffers
* 3. The page is uptodate and we have no buffers
* 4. There are unwritten buffers on the page
*/
if ( ! page_has_buffers ( page ) ) {
unmapped = 1 ;
need_trans = 1 ;
} else {
xfs_count_page_state ( page , & delalloc , & unmapped , & unwritten ) ;
if ( ! PageUptodate ( page ) )
unmapped = 0 ;
need_trans = delalloc + unmapped + unwritten ;
}
/*
* If we need a transaction and the process flags say
* we are already in a transaction , or no IO is allowed
* then mark the page dirty again and leave the page
* as is .
*/
if ( PFLAGS_TEST_FSTRANS ( ) & & need_trans )
goto out_fail ;
/*
* Delay hooking up buffer heads until we have
* made our go / no - go decision .
*/
if ( ! page_has_buffers ( page ) )
create_empty_buffers ( page , 1 < < inode - > i_blkbits , 0 ) ;
/*
* Convert delayed allocate , unwritten or unmapped space
* to real space and flush out to disk .
*/
error = xfs_page_state_convert ( inode , page , wbc , 1 , unmapped ) ;
if ( error = = - EAGAIN )
goto out_fail ;
if ( unlikely ( error < 0 ) )
goto out_unlock ;
return 0 ;
out_fail :
redirty_page_for_writepage ( wbc , page ) ;
unlock_page ( page ) ;
return 0 ;
out_unlock :
unlock_page ( page ) ;
return error ;
}
/*
* Called to move a page into cleanable state - and from there
* to be released . Possibly the page is already clean . We always
* have buffer heads in this call .
*
* Returns 0 if the page is ok to release , 1 otherwise .
*
* Possible scenarios are :
*
* 1. We are being called to release a page which has been written
* to via regular I / O . buffer heads will be dirty and possibly
* delalloc . If no delalloc buffer heads in this case then we
* can just return zero .
*
* 2. We are called to release a page which has been written via
* mmap , all we need to do is ensure there is no delalloc
* state in the buffer heads , if not we can let the caller
* free them and we should come back later via writepage .
*/
STATIC int
2006-03-14 05:54:26 +03:00
xfs_vm_release_page (
2006-03-14 05:26:27 +03:00
struct page * page ,
gfp_t gfp_mask )
{
struct inode * inode = page - > mapping - > host ;
int dirty , delalloc , unmapped , unwritten ;
struct writeback_control wbc = {
. sync_mode = WB_SYNC_ALL ,
. nr_to_write = 1 ,
} ;
xfs_page_trace ( XFS_RELEASEPAGE_ENTER , inode , page , gfp_mask ) ;
xfs_count_page_state ( page , & delalloc , & unmapped , & unwritten ) ;
if ( ! delalloc & & ! unwritten )
goto free_buffers ;
if ( ! ( gfp_mask & __GFP_FS ) )
return 0 ;
/* If we are already inside a transaction or the thread cannot
* do I / O , we cannot release this page .
*/
if ( PFLAGS_TEST_FSTRANS ( ) )
return 0 ;
/*
* Convert delalloc space to real space , do not flush the
* data out to disk , that will be done by the caller .
* Never need to allocate space here - we will always
* come back to writepage in that case .
*/
dirty = xfs_page_state_convert ( inode , page , & wbc , 0 , 0 ) ;
if ( dirty = = 0 & & ! unwritten )
goto free_buffers ;
return 0 ;
free_buffers :
return try_to_free_buffers ( page ) ;
}
2005-04-17 02:20:36 +04:00
STATIC int
2006-03-14 05:54:26 +03:00
__xfs_get_block (
2005-04-17 02:20:36 +04:00
struct inode * inode ,
sector_t iblock ,
unsigned long blocks ,
struct buffer_head * bh_result ,
int create ,
int direct ,
bmapi_flags_t flags )
{
vnode_t * vp = LINVFS_GET_VP ( inode ) ;
xfs_iomap_t iomap ;
2005-11-02 07:13:13 +03:00
xfs_off_t offset ;
ssize_t size ;
2005-04-17 02:20:36 +04:00
int retpbbm = 1 ;
int error ;
2005-11-02 07:13:13 +03:00
offset = ( xfs_off_t ) iblock < < inode - > i_blkbits ;
2005-11-25 08:41:57 +03:00
if ( blocks )
size = ( ssize_t ) min_t ( xfs_off_t , LONG_MAX ,
( xfs_off_t ) blocks < < inode - > i_blkbits ) ;
else
size = 1 < < inode - > i_blkbits ;
2005-04-17 02:20:36 +04:00
VOP_BMAP ( vp , offset , size ,
create ? flags : BMAPI_READ , & iomap , & retpbbm , error ) ;
if ( error )
return - error ;
if ( retpbbm = = 0 )
return 0 ;
if ( iomap . iomap_bn ! = IOMAP_DADDR_NULL ) {
2006-03-14 05:26:43 +03:00
/*
* For unwritten extents do not report a disk address on
2005-04-17 02:20:36 +04:00
* the read case ( treat as if we ' re reading into a hole ) .
*/
if ( create | | ! ( iomap . iomap_flags & IOMAP_UNWRITTEN ) ) {
2006-03-14 05:26:43 +03:00
xfs_map_buffer ( bh_result , & iomap , offset ,
inode - > i_blkbits ) ;
2005-04-17 02:20:36 +04:00
}
if ( create & & ( iomap . iomap_flags & IOMAP_UNWRITTEN ) ) {
if ( direct )
bh_result - > b_private = inode ;
set_buffer_unwritten ( bh_result ) ;
set_buffer_delay ( bh_result ) ;
}
}
/* If this is a realtime file, data might be on a new device */
2006-01-11 07:39:08 +03:00
bh_result - > b_bdev = iomap . iomap_target - > bt_bdev ;
2005-04-17 02:20:36 +04:00
/* If we previously allocated a block out beyond eof and
* we are now coming back to use it then we will need to
* flag it as new even if it has a disk address .
*/
if ( create & &
( ( ! buffer_mapped ( bh_result ) & & ! buffer_uptodate ( bh_result ) ) | |
2005-11-02 07:13:13 +03:00
( offset > = i_size_read ( inode ) ) | | ( iomap . iomap_flags & IOMAP_NEW ) ) )
2005-04-17 02:20:36 +04:00
set_buffer_new ( bh_result ) ;
if ( iomap . iomap_flags & IOMAP_DELAY ) {
BUG_ON ( direct ) ;
if ( create ) {
set_buffer_uptodate ( bh_result ) ;
set_buffer_mapped ( bh_result ) ;
set_buffer_delay ( bh_result ) ;
}
}
if ( blocks ) {
2005-11-02 07:13:13 +03:00
ASSERT ( iomap . iomap_bsize - iomap . iomap_delta > 0 ) ;
offset = min_t ( xfs_off_t ,
iomap . iomap_bsize - iomap . iomap_delta ,
2005-11-25 08:41:57 +03:00
( xfs_off_t ) blocks < < inode - > i_blkbits ) ;
2005-11-02 07:13:13 +03:00
bh_result - > b_size = ( u32 ) min_t ( xfs_off_t , UINT_MAX , offset ) ;
2005-04-17 02:20:36 +04:00
}
return 0 ;
}
int
2006-03-14 05:54:26 +03:00
xfs_get_block (
2005-04-17 02:20:36 +04:00
struct inode * inode ,
sector_t iblock ,
struct buffer_head * bh_result ,
int create )
{
2006-03-14 05:54:26 +03:00
return __xfs_get_block ( inode , iblock , 0 , bh_result ,
2005-04-17 02:20:36 +04:00
create , 0 , BMAPI_WRITE ) ;
}
STATIC int
2006-03-14 05:54:26 +03:00
xfs_get_blocks_direct (
2005-04-17 02:20:36 +04:00
struct inode * inode ,
sector_t iblock ,
unsigned long max_blocks ,
struct buffer_head * bh_result ,
int create )
{
2006-03-14 05:54:26 +03:00
return __xfs_get_block ( inode , iblock , max_blocks , bh_result ,
2005-04-17 02:20:36 +04:00
create , 1 , BMAPI_WRITE | BMAPI_DIRECT ) ;
}
2005-09-05 02:22:52 +04:00
STATIC void
2006-03-14 05:54:26 +03:00
xfs_end_io_direct (
2005-09-05 02:22:52 +04:00
struct kiocb * iocb ,
loff_t offset ,
ssize_t size ,
void * private )
{
xfs_ioend_t * ioend = iocb - > private ;
/*
* Non - NULL private data means we need to issue a transaction to
* convert a range from unwritten to written extents . This needs
* to happen from process contect but aio + dio I / O completion
* happens from irq context so we need to defer it to a workqueue .
* This is not nessecary for synchronous direct I / O , but we do
* it anyway to keep the code uniform and simpler .
*
* The core direct I / O code might be changed to always call the
* completion handler in the future , in which case all this can
* go away .
*/
if ( private & & size > 0 ) {
ioend - > io_offset = offset ;
ioend - > io_size = size ;
xfs_finish_ioend ( ioend ) ;
} else {
ASSERT ( size > = 0 ) ;
xfs_destroy_ioend ( ioend ) ;
}
/*
* blockdev_direct_IO can return an error even afer the I / O
* completion handler was called . Thus we need to protect
* against double - freeing .
*/
iocb - > private = NULL ;
}
2005-04-17 02:20:36 +04:00
STATIC ssize_t
2006-03-14 05:54:26 +03:00
xfs_vm_direct_IO (
2005-04-17 02:20:36 +04:00
int rw ,
struct kiocb * iocb ,
const struct iovec * iov ,
loff_t offset ,
unsigned long nr_segs )
{
struct file * file = iocb - > ki_filp ;
struct inode * inode = file - > f_mapping - > host ;
vnode_t * vp = LINVFS_GET_VP ( inode ) ;
xfs_iomap_t iomap ;
int maps = 1 ;
int error ;
2005-09-05 02:22:52 +04:00
ssize_t ret ;
2005-04-17 02:20:36 +04:00
VOP_BMAP ( vp , offset , 0 , BMAPI_DEVICE , & iomap , & maps , error ) ;
if ( error )
return - error ;
2006-01-11 07:40:13 +03:00
iocb - > private = xfs_alloc_ioend ( inode , IOMAP_UNWRITTEN ) ;
2005-09-05 02:22:52 +04:00
ret = blockdev_direct_IO_own_locking ( rw , iocb , inode ,
2006-01-11 07:39:08 +03:00
iomap . iomap_target - > bt_bdev ,
2005-04-17 02:20:36 +04:00
iov , offset , nr_segs ,
2006-03-14 05:54:26 +03:00
xfs_get_blocks_direct ,
xfs_end_io_direct ) ;
2005-09-05 02:22:52 +04:00
if ( unlikely ( ret < = 0 & & iocb - > private ) )
xfs_destroy_ioend ( iocb - > private ) ;
return ret ;
2005-04-17 02:20:36 +04:00
}
2006-03-14 05:26:27 +03:00
STATIC int
2006-03-14 05:54:26 +03:00
xfs_vm_prepare_write (
2006-03-14 05:26:27 +03:00
struct file * file ,
struct page * page ,
unsigned int from ,
unsigned int to )
{
2006-03-14 05:54:26 +03:00
return block_prepare_write ( page , from , to , xfs_get_block ) ;
2006-03-14 05:26:27 +03:00
}
2005-04-17 02:20:36 +04:00
STATIC sector_t
2006-03-14 05:54:26 +03:00
xfs_vm_bmap (
2005-04-17 02:20:36 +04:00
struct address_space * mapping ,
sector_t block )
{
struct inode * inode = ( struct inode * ) mapping - > host ;
vnode_t * vp = LINVFS_GET_VP ( inode ) ;
int error ;
2006-03-14 05:54:26 +03:00
vn_trace_entry ( vp , __FUNCTION__ , ( inst_t * ) __return_address ) ;
2005-04-17 02:20:36 +04:00
VOP_RWLOCK ( vp , VRWLOCK_READ ) ;
VOP_FLUSH_PAGES ( vp , ( xfs_off_t ) 0 , - 1 , 0 , FI_REMAPF , error ) ;
VOP_RWUNLOCK ( vp , VRWLOCK_READ ) ;
2006-03-14 05:54:26 +03:00
return generic_block_bmap ( mapping , block , xfs_get_block ) ;
2005-04-17 02:20:36 +04:00
}
STATIC int
2006-03-14 05:54:26 +03:00
xfs_vm_readpage (
2005-04-17 02:20:36 +04:00
struct file * unused ,
struct page * page )
{
2006-03-14 05:54:26 +03:00
return mpage_readpage ( page , xfs_get_block ) ;
2005-04-17 02:20:36 +04:00
}
STATIC int
2006-03-14 05:54:26 +03:00
xfs_vm_readpages (
2005-04-17 02:20:36 +04:00
struct file * unused ,
struct address_space * mapping ,
struct list_head * pages ,
unsigned nr_pages )
{
2006-03-14 05:54:26 +03:00
return mpage_readpages ( mapping , pages , nr_pages , xfs_get_block ) ;
2005-04-17 02:20:36 +04:00
}
2005-09-02 10:40:17 +04:00
STATIC int
2006-03-14 05:54:26 +03:00
xfs_vm_invalidate_page (
2005-09-02 10:40:17 +04:00
struct page * page ,
unsigned long offset )
{
xfs_page_trace ( XFS_INVALIDPAGE_ENTER ,
page - > mapping - > host , page , offset ) ;
return block_invalidatepage ( page , offset ) ;
}
2006-03-14 05:54:26 +03:00
struct address_space_operations xfs_address_space_operations = {
. readpage = xfs_vm_readpage ,
. readpages = xfs_vm_readpages ,
. writepage = xfs_vm_writepage ,
2005-04-17 02:20:36 +04:00
. sync_page = block_sync_page ,
2006-03-14 05:54:26 +03:00
. releasepage = xfs_vm_release_page ,
. invalidatepage = xfs_vm_invalidate_page ,
. prepare_write = xfs_vm_prepare_write ,
2005-04-17 02:20:36 +04:00
. commit_write = generic_commit_write ,
2006-03-14 05:54:26 +03:00
. bmap = xfs_vm_bmap ,
. direct_IO = xfs_vm_direct_IO ,
2006-02-01 14:05:41 +03:00
. migratepage = buffer_migrate_page ,
2005-04-17 02:20:36 +04:00
} ;