2005-04-16 15:20:36 -07:00
/*
2005-11-02 14:58:39 +11:00
* Copyright ( c ) 2000 - 2005 Silicon Graphics , Inc .
* All Rights Reserved .
2005-04-16 15:20:36 -07:00
*
2005-11-02 14:58:39 +11:00
* This program is free software ; you can redistribute it and / or
* modify it under the terms of the GNU General Public License as
2005-04-16 15:20:36 -07:00
* published by the Free Software Foundation .
*
2005-11-02 14:58:39 +11:00
* This program is distributed in the hope that it would be useful ,
* but WITHOUT ANY WARRANTY ; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the
* GNU General Public License for more details .
2005-04-16 15:20:36 -07:00
*
2005-11-02 14:58:39 +11:00
* You should have received a copy of the GNU General Public License
* along with this program ; if not , write the Free Software Foundation ,
* Inc . , 51 Franklin St , Fifth Floor , Boston , MA 02110 - 1301 USA
2005-04-16 15:20:36 -07:00
*/
# include "xfs.h"
2005-11-02 14:38:42 +11:00
# include "xfs_bit.h"
2005-04-16 15:20:36 -07:00
# include "xfs_log.h"
2005-11-02 14:38:42 +11:00
# include "xfs_inum.h"
2005-04-16 15:20:36 -07:00
# include "xfs_sb.h"
2005-11-02 14:38:42 +11:00
# include "xfs_ag.h"
2005-04-16 15:20:36 -07:00
# include "xfs_dir2.h"
# include "xfs_trans.h"
# include "xfs_dmapi.h"
# include "xfs_mount.h"
# include "xfs_bmap_btree.h"
# include "xfs_alloc_btree.h"
# include "xfs_ialloc_btree.h"
# include "xfs_dir2_sf.h"
2005-11-02 14:38:42 +11:00
# include "xfs_attr_sf.h"
2005-04-16 15:20:36 -07:00
# include "xfs_dinode.h"
# include "xfs_inode.h"
2005-11-02 14:38:42 +11:00
# include "xfs_alloc.h"
# include "xfs_btree.h"
2005-04-16 15:20:36 -07:00
# include "xfs_error.h"
# include "xfs_rw.h"
# include "xfs_iomap.h"
# include <linux/mpage.h>
2006-01-11 20:48:14 +11:00
# include <linux/pagevec.h>
2005-04-16 15:20:36 -07:00
# include <linux/writeback.h>
2006-03-14 13:26:27 +11:00
STATIC void
xfs_count_page_state (
struct page * page ,
int * delalloc ,
int * unmapped ,
int * unwritten )
{
struct buffer_head * bh , * head ;
* delalloc = * unmapped = * unwritten = 0 ;
bh = head = page_buffers ( page ) ;
do {
if ( buffer_uptodate ( bh ) & & ! buffer_mapped ( bh ) )
( * unmapped ) = 1 ;
else if ( buffer_unwritten ( bh ) & & ! buffer_delay ( bh ) )
clear_buffer_unwritten ( bh ) ;
else if ( buffer_unwritten ( bh ) )
( * unwritten ) = 1 ;
else if ( buffer_delay ( bh ) )
( * delalloc ) = 1 ;
} while ( ( bh = bh - > b_this_page ) ! = head ) ;
}
2005-04-16 15:20:36 -07:00
# if defined(XFS_RW_TRACE)
void
xfs_page_trace (
int tag ,
struct inode * inode ,
struct page * page ,
2006-09-28 10:56:43 +10:00
unsigned long pgoff )
2005-04-16 15:20:36 -07:00
{
xfs_inode_t * ip ;
2006-06-09 17:00:52 +10:00
bhv_vnode_t * vp = vn_from_inode ( inode ) ;
2005-04-16 15:20:36 -07:00
loff_t isize = i_size_read ( inode ) ;
2006-01-11 15:40:13 +11:00
loff_t offset = page_offset ( page ) ;
2005-04-16 15:20:36 -07:00
int delalloc = - 1 , unmapped = - 1 , unwritten = - 1 ;
if ( page_has_buffers ( page ) )
xfs_count_page_state ( page , & delalloc , & unmapped , & unwritten ) ;
2006-01-11 20:58:44 +11:00
ip = xfs_vtoi ( vp ) ;
2005-04-16 15:20:36 -07:00
if ( ! ip - > i_rwtrace )
return ;
ktrace_enter ( ip - > i_rwtrace ,
( void * ) ( ( unsigned long ) tag ) ,
( void * ) ip ,
( void * ) inode ,
( void * ) page ,
2006-09-28 10:56:43 +10:00
( void * ) pgoff ,
2005-04-16 15:20:36 -07:00
( void * ) ( ( unsigned long ) ( ( ip - > i_d . di_size > > 32 ) & 0xffffffff ) ) ,
( void * ) ( ( unsigned long ) ( ip - > i_d . di_size & 0xffffffff ) ) ,
( void * ) ( ( unsigned long ) ( ( isize > > 32 ) & 0xffffffff ) ) ,
( void * ) ( ( unsigned long ) ( isize & 0xffffffff ) ) ,
( void * ) ( ( unsigned long ) ( ( offset > > 32 ) & 0xffffffff ) ) ,
( void * ) ( ( unsigned long ) ( offset & 0xffffffff ) ) ,
( void * ) ( ( unsigned long ) delalloc ) ,
( void * ) ( ( unsigned long ) unmapped ) ,
( void * ) ( ( unsigned long ) unwritten ) ,
2006-03-22 12:44:15 +11:00
( void * ) ( ( unsigned long ) current_pid ( ) ) ,
2005-04-16 15:20:36 -07:00
( void * ) NULL ) ;
}
# else
2006-09-28 10:56:43 +10:00
# define xfs_page_trace(tag, inode, page, pgoff)
2005-04-16 15:20:36 -07:00
# endif
2005-09-02 16:58:49 +10:00
/*
* Schedule IO completion handling on a xfsdatad if this was
* the final hold on this ioend .
*/
STATIC void
xfs_finish_ioend (
xfs_ioend_t * ioend )
{
if ( atomic_dec_and_test ( & ioend - > io_remaining ) )
queue_work ( xfsdatad_workqueue , & ioend - > io_work ) ;
}
2006-01-11 15:40:13 +11:00
/*
* We ' re now finished for good with this ioend structure .
* Update the page state via the associated buffer_heads ,
* release holds on the inode and bio , and finally free
* up memory . Do not use the ioend after this .
*/
2005-09-02 16:58:49 +10:00
STATIC void
xfs_destroy_ioend (
xfs_ioend_t * ioend )
{
2006-01-11 15:40:13 +11:00
struct buffer_head * bh , * next ;
for ( bh = ioend - > io_buffer_head ; bh ; bh = next ) {
next = bh - > b_private ;
2006-06-09 14:58:38 +10:00
bh - > b_end_io ( bh , ! ioend - > io_error ) ;
2006-01-11 15:40:13 +11:00
}
2006-06-09 14:58:38 +10:00
if ( unlikely ( ioend - > io_error ) )
vn_ioerror ( ioend - > io_vnode , ioend - > io_error , __FILE__ , __LINE__ ) ;
2005-09-02 16:58:49 +10:00
vn_iowake ( ioend - > io_vnode ) ;
mempool_free ( ioend , xfs_ioend_pool ) ;
}
/*
2006-01-11 15:40:13 +11:00
* Buffered IO write completion for delayed allocate extents .
* TODO : Update ondisk isize now that we know the file data
* has been flushed ( i . e . the notorious " NULL file " problem ) .
*/
STATIC void
xfs_end_bio_delalloc (
2006-11-22 14:57:56 +00:00
struct work_struct * work )
2006-01-11 15:40:13 +11:00
{
2006-11-22 14:57:56 +00:00
xfs_ioend_t * ioend =
container_of ( work , xfs_ioend_t , io_work ) ;
2006-01-11 15:40:13 +11:00
xfs_destroy_ioend ( ioend ) ;
}
/*
* Buffered IO write completion for regular , written extents .
*/
STATIC void
xfs_end_bio_written (
2006-11-22 14:57:56 +00:00
struct work_struct * work )
2006-01-11 15:40:13 +11:00
{
2006-11-22 14:57:56 +00:00
xfs_ioend_t * ioend =
container_of ( work , xfs_ioend_t , io_work ) ;
2006-01-11 15:40:13 +11:00
xfs_destroy_ioend ( ioend ) ;
}
/*
* IO write completion for unwritten extents .
*
2005-09-02 16:58:49 +10:00
* Issue transactions to convert a buffer range from unwritten
2005-09-05 08:22:52 +10:00
* to written extents .
2005-09-02 16:58:49 +10:00
*/
STATIC void
xfs_end_bio_unwritten (
2006-11-22 14:57:56 +00:00
struct work_struct * work )
2005-09-02 16:58:49 +10:00
{
2006-11-22 14:57:56 +00:00
xfs_ioend_t * ioend =
container_of ( work , xfs_ioend_t , io_work ) ;
2006-06-09 17:00:52 +10:00
bhv_vnode_t * vp = ioend - > io_vnode ;
2005-09-02 16:58:49 +10:00
xfs_off_t offset = ioend - > io_offset ;
size_t size = ioend - > io_size ;
2006-06-09 14:58:38 +10:00
if ( likely ( ! ioend - > io_error ) )
2006-06-09 17:00:52 +10:00
bhv_vop_bmap ( vp , offset , size , BMAPI_UNWRITTEN , NULL , NULL ) ;
2005-09-02 16:58:49 +10:00
xfs_destroy_ioend ( ioend ) ;
}
/*
* Allocate and initialise an IO completion structure .
* We need to track unwritten extent write completion here initially .
* We ' ll need to extend this for updating the ondisk inode size later
* ( vs . incore size ) .
*/
STATIC xfs_ioend_t *
xfs_alloc_ioend (
2006-01-11 15:40:13 +11:00
struct inode * inode ,
unsigned int type )
2005-09-02 16:58:49 +10:00
{
xfs_ioend_t * ioend ;
ioend = mempool_alloc ( xfs_ioend_pool , GFP_NOFS ) ;
/*
* Set the count to 1 initially , which will prevent an I / O
* completion callback from happening before we have started
* all the I / O from calling the completion routine too early .
*/
atomic_set ( & ioend - > io_remaining , 1 ) ;
2006-06-09 14:58:38 +10:00
ioend - > io_error = 0 ;
2006-01-11 15:40:13 +11:00
ioend - > io_list = NULL ;
ioend - > io_type = type ;
2006-03-17 17:25:36 +11:00
ioend - > io_vnode = vn_from_inode ( inode ) ;
2005-09-05 08:23:35 +10:00
ioend - > io_buffer_head = NULL ;
2006-01-11 15:40:13 +11:00
ioend - > io_buffer_tail = NULL ;
2005-09-02 16:58:49 +10:00
atomic_inc ( & ioend - > io_vnode - > v_iocount ) ;
ioend - > io_offset = 0 ;
ioend - > io_size = 0 ;
2006-01-11 15:40:13 +11:00
if ( type = = IOMAP_UNWRITTEN )
2006-11-22 14:57:56 +00:00
INIT_WORK ( & ioend - > io_work , xfs_end_bio_unwritten ) ;
2006-01-11 15:40:13 +11:00
else if ( type = = IOMAP_DELAY )
2006-11-22 14:57:56 +00:00
INIT_WORK ( & ioend - > io_work , xfs_end_bio_delalloc ) ;
2006-01-11 15:40:13 +11:00
else
2006-11-22 14:57:56 +00:00
INIT_WORK ( & ioend - > io_work , xfs_end_bio_written ) ;
2005-09-02 16:58:49 +10:00
return ioend ;
}
2005-04-16 15:20:36 -07:00
STATIC int
xfs_map_blocks (
struct inode * inode ,
loff_t offset ,
ssize_t count ,
xfs_iomap_t * mapp ,
int flags )
{
2006-06-09 17:00:52 +10:00
bhv_vnode_t * vp = vn_from_inode ( inode ) ;
2005-04-16 15:20:36 -07:00
int error , nmaps = 1 ;
2006-06-09 17:00:52 +10:00
error = bhv_vop_bmap ( vp , offset , count , flags , mapp , & nmaps ) ;
2005-04-16 15:20:36 -07:00
if ( ! error & & ( flags & ( BMAPI_WRITE | BMAPI_ALLOCATE ) ) )
VMODIFY ( vp ) ;
return - error ;
}
2006-01-11 20:48:33 +11:00
STATIC inline int
xfs_iomap_valid (
2005-04-16 15:20:36 -07:00
xfs_iomap_t * iomapp ,
2006-01-11 20:48:33 +11:00
loff_t offset )
2005-04-16 15:20:36 -07:00
{
2006-01-11 20:48:33 +11:00
return offset > = iomapp - > iomap_offset & &
offset < iomapp - > iomap_offset + iomapp - > iomap_bsize ;
2005-04-16 15:20:36 -07:00
}
2006-01-11 15:40:13 +11:00
/*
* BIO completion handler for buffered IO .
*/
STATIC int
xfs_end_bio (
struct bio * bio ,
unsigned int bytes_done ,
int error )
{
xfs_ioend_t * ioend = bio - > bi_private ;
if ( bio - > bi_size )
return 1 ;
ASSERT ( atomic_read ( & bio - > bi_cnt ) > = 1 ) ;
2006-06-09 14:58:38 +10:00
ioend - > io_error = test_bit ( BIO_UPTODATE , & bio - > bi_flags ) ? 0 : error ;
2006-01-11 15:40:13 +11:00
/* Toss bio and pass work off to an xfsdatad thread */
bio - > bi_private = NULL ;
bio - > bi_end_io = NULL ;
bio_put ( bio ) ;
2006-06-09 14:58:38 +10:00
2006-01-11 15:40:13 +11:00
xfs_finish_ioend ( ioend ) ;
return 0 ;
}
STATIC void
xfs_submit_ioend_bio (
xfs_ioend_t * ioend ,
struct bio * bio )
{
atomic_inc ( & ioend - > io_remaining ) ;
bio - > bi_private = ioend ;
bio - > bi_end_io = xfs_end_bio ;
submit_bio ( WRITE , bio ) ;
ASSERT ( ! bio_flagged ( bio , BIO_EOPNOTSUPP ) ) ;
bio_put ( bio ) ;
}
STATIC struct bio *
xfs_alloc_ioend_bio (
struct buffer_head * bh )
{
struct bio * bio ;
int nvecs = bio_get_nr_vecs ( bh - > b_bdev ) ;
do {
bio = bio_alloc ( GFP_NOIO , nvecs ) ;
nvecs > > = 1 ;
} while ( ! bio ) ;
ASSERT ( bio - > bi_private = = NULL ) ;
bio - > bi_sector = bh - > b_blocknr * ( bh - > b_size > > 9 ) ;
bio - > bi_bdev = bh - > b_bdev ;
bio_get ( bio ) ;
return bio ;
}
STATIC void
xfs_start_buffer_writeback (
struct buffer_head * bh )
{
ASSERT ( buffer_mapped ( bh ) ) ;
ASSERT ( buffer_locked ( bh ) ) ;
ASSERT ( ! buffer_delay ( bh ) ) ;
ASSERT ( ! buffer_unwritten ( bh ) ) ;
mark_buffer_async_write ( bh ) ;
set_buffer_uptodate ( bh ) ;
clear_buffer_dirty ( bh ) ;
}
STATIC void
xfs_start_page_writeback (
struct page * page ,
struct writeback_control * wbc ,
int clear_dirty ,
int buffers )
{
ASSERT ( PageLocked ( page ) ) ;
ASSERT ( ! PageWriteback ( page ) ) ;
set_page_writeback ( page ) ;
if ( clear_dirty )
clear_page_dirty ( page ) ;
unlock_page ( page ) ;
if ( ! buffers ) {
end_page_writeback ( page ) ;
wbc - > pages_skipped + + ; /* We didn't write this page */
}
}
static inline int bio_add_buffer ( struct bio * bio , struct buffer_head * bh )
{
return bio_add_page ( bio , bh - > b_page , bh - > b_size , bh_offset ( bh ) ) ;
}
/*
2006-01-18 13:38:12 +11:00
* Submit all of the bios for all of the ioends we have saved up , covering the
* initial writepage page and also any probed pages .
*
* Because we may have multiple ioends spanning a page , we need to start
* writeback on all the buffers before we submit them for I / O . If we mark the
* buffers as we got , then we can end up with a page that only has buffers
* marked async write and I / O complete on can occur before we mark the other
* buffers async write .
*
* The end result of this is that we trip a bug in end_page_writeback ( ) because
* we call it twice for the one page as the code in end_buffer_async_write ( )
* assumes that all buffers on the page are started at the same time .
*
* The fix is two passes across the ioend list - one to start writeback on the
2006-03-29 08:55:14 +10:00
* buffer_heads , and then submit them for I / O on the second pass .
2006-01-11 15:40:13 +11:00
*/
STATIC void
xfs_submit_ioend (
xfs_ioend_t * ioend )
{
2006-01-18 13:38:12 +11:00
xfs_ioend_t * head = ioend ;
2006-01-11 15:40:13 +11:00
xfs_ioend_t * next ;
struct buffer_head * bh ;
struct bio * bio ;
sector_t lastblock = 0 ;
2006-01-18 13:38:12 +11:00
/* Pass 1 - start writeback */
do {
next = ioend - > io_list ;
for ( bh = ioend - > io_buffer_head ; bh ; bh = bh - > b_private ) {
xfs_start_buffer_writeback ( bh ) ;
}
} while ( ( ioend = next ) ! = NULL ) ;
/* Pass 2 - submit I/O */
ioend = head ;
2006-01-11 15:40:13 +11:00
do {
next = ioend - > io_list ;
bio = NULL ;
for ( bh = ioend - > io_buffer_head ; bh ; bh = bh - > b_private ) {
if ( ! bio ) {
retry :
bio = xfs_alloc_ioend_bio ( bh ) ;
} else if ( bh - > b_blocknr ! = lastblock + 1 ) {
xfs_submit_ioend_bio ( ioend , bio ) ;
goto retry ;
}
if ( bio_add_buffer ( bio , bh ) ! = bh - > b_size ) {
xfs_submit_ioend_bio ( ioend , bio ) ;
goto retry ;
}
lastblock = bh - > b_blocknr ;
}
if ( bio )
xfs_submit_ioend_bio ( ioend , bio ) ;
xfs_finish_ioend ( ioend ) ;
} while ( ( ioend = next ) ! = NULL ) ;
}
/*
* Cancel submission of all buffer_heads so far in this endio .
* Toss the endio too . Only ever called for the initial page
* in a writepage request , so only ever one page .
*/
STATIC void
xfs_cancel_ioend (
xfs_ioend_t * ioend )
{
xfs_ioend_t * next ;
struct buffer_head * bh , * next_bh ;
do {
next = ioend - > io_list ;
bh = ioend - > io_buffer_head ;
do {
next_bh = bh - > b_private ;
clear_buffer_async_write ( bh ) ;
unlock_buffer ( bh ) ;
} while ( ( bh = next_bh ) ! = NULL ) ;
vn_iowake ( ioend - > io_vnode ) ;
mempool_free ( ioend , xfs_ioend_pool ) ;
} while ( ( ioend = next ) ! = NULL ) ;
}
/*
* Test to see if we ' ve been building up a completion structure for
* earlier buffers - - if so , we try to append to this ioend if we
* can , otherwise we finish off any current ioend and start another .
* Return true if we ' ve finished the given ioend .
*/
STATIC void
xfs_add_to_ioend (
struct inode * inode ,
struct buffer_head * bh ,
2006-01-11 20:49:16 +11:00
xfs_off_t offset ,
2006-01-11 15:40:13 +11:00
unsigned int type ,
xfs_ioend_t * * result ,
int need_ioend )
{
xfs_ioend_t * ioend = * result ;
if ( ! ioend | | need_ioend | | type ! = ioend - > io_type ) {
xfs_ioend_t * previous = * result ;
ioend = xfs_alloc_ioend ( inode , type ) ;
ioend - > io_offset = offset ;
ioend - > io_buffer_head = bh ;
ioend - > io_buffer_tail = bh ;
if ( previous )
previous - > io_list = ioend ;
* result = ioend ;
} else {
ioend - > io_buffer_tail - > b_private = bh ;
ioend - > io_buffer_tail = bh ;
}
bh - > b_private = NULL ;
ioend - > io_size + = bh - > b_size ;
}
2006-03-14 13:26:43 +11:00
STATIC void
xfs_map_buffer (
struct buffer_head * bh ,
xfs_iomap_t * mp ,
xfs_off_t offset ,
uint block_bits )
{
sector_t bn ;
ASSERT ( mp - > iomap_bn ! = IOMAP_DADDR_NULL ) ;
bn = ( mp - > iomap_bn > > ( block_bits - BBSHIFT ) ) +
( ( offset - mp - > iomap_offset ) > > block_bits ) ;
ASSERT ( bn | | ( mp - > iomap_flags & IOMAP_REALTIME ) ) ;
bh - > b_blocknr = bn ;
set_buffer_mapped ( bh ) ;
}
2005-04-16 15:20:36 -07:00
STATIC void
xfs_map_at_offset (
struct buffer_head * bh ,
2006-01-11 20:48:33 +11:00
loff_t offset ,
2005-04-16 15:20:36 -07:00
int block_bits ,
2006-01-11 20:48:33 +11:00
xfs_iomap_t * iomapp )
2005-04-16 15:20:36 -07:00
{
ASSERT ( ! ( iomapp - > iomap_flags & IOMAP_HOLE ) ) ;
ASSERT ( ! ( iomapp - > iomap_flags & IOMAP_DELAY ) ) ;
lock_buffer ( bh ) ;
2006-03-14 13:26:43 +11:00
xfs_map_buffer ( bh , iomapp , offset , block_bits ) ;
2006-01-11 15:39:08 +11:00
bh - > b_bdev = iomapp - > iomap_target - > bt_bdev ;
2005-04-16 15:20:36 -07:00
set_buffer_mapped ( bh ) ;
clear_buffer_delay ( bh ) ;
2006-01-11 15:40:13 +11:00
clear_buffer_unwritten ( bh ) ;
2005-04-16 15:20:36 -07:00
}
/*
2006-01-11 20:49:28 +11:00
* Look for a page at index that is suitable for clustering .
2005-04-16 15:20:36 -07:00
*/
STATIC unsigned int
2006-01-11 20:49:28 +11:00
xfs_probe_page (
2006-01-11 20:48:14 +11:00
struct page * page ,
2006-01-11 20:49:28 +11:00
unsigned int pg_offset ,
int mapped )
2005-04-16 15:20:36 -07:00
{
int ret = 0 ;
if ( PageWriteback ( page ) )
2006-01-11 20:48:14 +11:00
return 0 ;
2005-04-16 15:20:36 -07:00
if ( page - > mapping & & PageDirty ( page ) ) {
if ( page_has_buffers ( page ) ) {
struct buffer_head * bh , * head ;
bh = head = page_buffers ( page ) ;
do {
2006-01-11 20:49:28 +11:00
if ( ! buffer_uptodate ( bh ) )
break ;
if ( mapped ! = buffer_mapped ( bh ) )
2005-04-16 15:20:36 -07:00
break ;
ret + = bh - > b_size ;
if ( ret > = pg_offset )
break ;
} while ( ( bh = bh - > b_this_page ) ! = head ) ;
} else
2006-01-11 20:49:28 +11:00
ret = mapped ? 0 : PAGE_CACHE_SIZE ;
2005-04-16 15:20:36 -07:00
}
return ret ;
}
2006-01-11 15:40:13 +11:00
STATIC size_t
2006-01-11 20:49:28 +11:00
xfs_probe_cluster (
2005-04-16 15:20:36 -07:00
struct inode * inode ,
struct page * startpage ,
struct buffer_head * bh ,
2006-01-11 20:49:28 +11:00
struct buffer_head * head ,
int mapped )
2005-04-16 15:20:36 -07:00
{
2006-01-11 20:48:14 +11:00
struct pagevec pvec ;
2005-04-16 15:20:36 -07:00
pgoff_t tindex , tlast , tloff ;
2006-01-11 20:48:14 +11:00
size_t total = 0 ;
int done = 0 , i ;
2005-04-16 15:20:36 -07:00
/* First sum forwards in this page */
do {
2006-02-28 12:30:30 +11:00
if ( ! buffer_uptodate ( bh ) | | ( mapped ! = buffer_mapped ( bh ) ) )
2006-01-11 20:48:14 +11:00
return total ;
2005-04-16 15:20:36 -07:00
total + = bh - > b_size ;
} while ( ( bh = bh - > b_this_page ) ! = head ) ;
2006-01-11 20:48:14 +11:00
/* if we reached the end of the page, sum forwards in following pages */
tlast = i_size_read ( inode ) > > PAGE_CACHE_SHIFT ;
tindex = startpage - > index + 1 ;
/* Prune this back to avoid pathological behavior */
tloff = min ( tlast , startpage - > index + 64 ) ;
pagevec_init ( & pvec , 0 ) ;
while ( ! done & & tindex < = tloff ) {
unsigned len = min_t ( pgoff_t , PAGEVEC_SIZE , tlast - tindex + 1 ) ;
if ( ! pagevec_lookup ( & pvec , inode - > i_mapping , tindex , len ) )
break ;
for ( i = 0 ; i < pagevec_count ( & pvec ) ; i + + ) {
struct page * page = pvec . pages [ i ] ;
size_t pg_offset , len = 0 ;
if ( tindex = = tlast ) {
pg_offset =
i_size_read ( inode ) & ( PAGE_CACHE_SIZE - 1 ) ;
2006-01-11 20:48:33 +11:00
if ( ! pg_offset ) {
done = 1 ;
2006-01-11 20:48:14 +11:00
break ;
2006-01-11 20:48:33 +11:00
}
2006-01-11 20:48:14 +11:00
} else
pg_offset = PAGE_CACHE_SIZE ;
if ( page - > index = = tindex & & ! TestSetPageLocked ( page ) ) {
2006-01-11 20:49:28 +11:00
len = xfs_probe_page ( page , pg_offset , mapped ) ;
2006-01-11 20:48:14 +11:00
unlock_page ( page ) ;
}
if ( ! len ) {
done = 1 ;
break ;
}
2005-04-16 15:20:36 -07:00
total + = len ;
2006-01-11 20:48:33 +11:00
tindex + + ;
2005-04-16 15:20:36 -07:00
}
2006-01-11 20:48:14 +11:00
pagevec_release ( & pvec ) ;
cond_resched ( ) ;
2005-04-16 15:20:36 -07:00
}
2006-01-11 20:48:14 +11:00
2005-04-16 15:20:36 -07:00
return total ;
}
/*
2006-01-11 20:48:14 +11:00
* Test if a given page is suitable for writing as part of an unwritten
* or delayed allocate extent .
2005-04-16 15:20:36 -07:00
*/
2006-01-11 20:48:14 +11:00
STATIC int
xfs_is_delayed_page (
struct page * page ,
2006-01-11 15:40:13 +11:00
unsigned int type )
2005-04-16 15:20:36 -07:00
{
if ( PageWriteback ( page ) )
2006-01-11 20:48:14 +11:00
return 0 ;
2005-04-16 15:20:36 -07:00
if ( page - > mapping & & page_has_buffers ( page ) ) {
struct buffer_head * bh , * head ;
int acceptable = 0 ;
bh = head = page_buffers ( page ) ;
do {
2006-01-11 15:40:13 +11:00
if ( buffer_unwritten ( bh ) )
acceptable = ( type = = IOMAP_UNWRITTEN ) ;
else if ( buffer_delay ( bh ) )
acceptable = ( type = = IOMAP_DELAY ) ;
2006-03-22 12:47:40 +11:00
else if ( buffer_dirty ( bh ) & & buffer_mapped ( bh ) )
2006-01-11 20:49:28 +11:00
acceptable = ( type = = 0 ) ;
2006-01-11 15:40:13 +11:00
else
2005-04-16 15:20:36 -07:00
break ;
} while ( ( bh = bh - > b_this_page ) ! = head ) ;
if ( acceptable )
2006-01-11 20:48:14 +11:00
return 1 ;
2005-04-16 15:20:36 -07:00
}
2006-01-11 20:48:14 +11:00
return 0 ;
2005-04-16 15:20:36 -07:00
}
/*
* Allocate & map buffers for page given the extent map . Write it out .
* except for the original page of a writepage , this is called on
* delalloc / unwritten pages only , for the original page it is possible
* that the page has no mapping at all .
*/
2006-01-11 15:40:13 +11:00
STATIC int
2005-04-16 15:20:36 -07:00
xfs_convert_page (
struct inode * inode ,
struct page * page ,
2006-01-11 20:48:14 +11:00
loff_t tindex ,
2006-01-11 20:48:33 +11:00
xfs_iomap_t * mp ,
2006-01-11 15:40:13 +11:00
xfs_ioend_t * * ioendp ,
2005-04-16 15:20:36 -07:00
struct writeback_control * wbc ,
int startio ,
int all_bh )
{
2006-01-11 15:40:13 +11:00
struct buffer_head * bh , * head ;
2006-01-11 20:48:47 +11:00
xfs_off_t end_offset ;
unsigned long p_offset ;
2006-01-11 15:40:13 +11:00
unsigned int type ;
2005-04-16 15:20:36 -07:00
int bbits = inode - > i_blkbits ;
2005-05-05 13:33:20 -07:00
int len , page_dirty ;
2006-01-11 15:40:13 +11:00
int count = 0 , done = 0 , uptodate = 1 ;
2006-01-11 20:48:47 +11:00
xfs_off_t offset = page_offset ( page ) ;
2005-04-16 15:20:36 -07:00
2006-01-11 20:48:14 +11:00
if ( page - > index ! = tindex )
goto fail ;
if ( TestSetPageLocked ( page ) )
goto fail ;
if ( PageWriteback ( page ) )
goto fail_unlock_page ;
if ( page - > mapping ! = inode - > i_mapping )
goto fail_unlock_page ;
if ( ! xfs_is_delayed_page ( page , ( * ioendp ) - > io_type ) )
goto fail_unlock_page ;
2005-05-05 13:33:20 -07:00
/*
* page_dirty is initially a count of buffers on the page before
2006-03-29 08:55:14 +10:00
* EOF and is decremented as we move each into a cleanable state .
2006-01-11 20:48:47 +11:00
*
* Derivation :
*
* End offset is the highest offset that this page should represent .
* If we are on the last page , ( end_offset & ( PAGE_CACHE_SIZE - 1 ) )
* will evaluate non - zero and be less than PAGE_CACHE_SIZE and
* hence give us the correct page_dirty count . On any other page ,
* it will be zero and in that case we need page_dirty to be the
* count of buffers on the page .
2005-05-05 13:33:20 -07:00
*/
2006-01-11 20:48:47 +11:00
end_offset = min_t ( unsigned long long ,
( xfs_off_t ) ( page - > index + 1 ) < < PAGE_CACHE_SHIFT ,
i_size_read ( inode ) ) ;
2005-05-05 13:33:20 -07:00
len = 1 < < inode - > i_blkbits ;
2006-01-11 20:48:47 +11:00
p_offset = min_t ( unsigned long , end_offset & ( PAGE_CACHE_SIZE - 1 ) ,
PAGE_CACHE_SIZE ) ;
p_offset = p_offset ? roundup ( p_offset , len ) : PAGE_CACHE_SIZE ;
page_dirty = p_offset / len ;
2005-05-05 13:33:20 -07:00
2005-04-16 15:20:36 -07:00
bh = head = page_buffers ( page ) ;
do {
2006-01-11 20:48:47 +11:00
if ( offset > = end_offset )
2005-04-16 15:20:36 -07:00
break ;
2006-01-11 15:40:13 +11:00
if ( ! buffer_uptodate ( bh ) )
uptodate = 0 ;
if ( ! ( PageUptodate ( page ) | | buffer_uptodate ( bh ) ) ) {
done = 1 ;
2005-04-16 15:20:36 -07:00
continue ;
2006-01-11 15:40:13 +11:00
}
2006-01-11 20:48:47 +11:00
if ( buffer_unwritten ( bh ) | | buffer_delay ( bh ) ) {
if ( buffer_unwritten ( bh ) )
type = IOMAP_UNWRITTEN ;
else
type = IOMAP_DELAY ;
if ( ! xfs_iomap_valid ( mp , offset ) ) {
2006-01-11 15:40:13 +11:00
done = 1 ;
2006-01-11 20:48:47 +11:00
continue ;
}
ASSERT ( ! ( mp - > iomap_flags & IOMAP_HOLE ) ) ;
ASSERT ( ! ( mp - > iomap_flags & IOMAP_DELAY ) ) ;
xfs_map_at_offset ( bh , offset , bbits , mp ) ;
if ( startio ) {
2006-01-11 20:49:16 +11:00
xfs_add_to_ioend ( inode , bh , offset ,
2006-01-11 20:48:47 +11:00
type , ioendp , done ) ;
} else {
set_buffer_dirty ( bh ) ;
unlock_buffer ( bh ) ;
mark_buffer_dirty ( bh ) ;
}
page_dirty - - ;
count + + ;
} else {
type = 0 ;
if ( buffer_mapped ( bh ) & & all_bh & & startio ) {
2005-04-16 15:20:36 -07:00
lock_buffer ( bh ) ;
2006-01-11 20:49:16 +11:00
xfs_add_to_ioend ( inode , bh , offset ,
2006-01-11 15:40:13 +11:00
type , ioendp , done ) ;
count + + ;
2005-05-05 13:33:20 -07:00
page_dirty - - ;
2006-01-11 20:48:47 +11:00
} else {
done = 1 ;
2005-04-16 15:20:36 -07:00
}
}
2006-01-11 20:49:16 +11:00
} while ( offset + = len , ( bh = bh - > b_this_page ) ! = head ) ;
2005-04-16 15:20:36 -07:00
2006-01-11 15:40:13 +11:00
if ( uptodate & & bh = = head )
SetPageUptodate ( page ) ;
if ( startio ) {
2006-01-11 20:49:42 +11:00
if ( count ) {
struct backing_dev_info * bdi ;
bdi = inode - > i_mapping - > backing_dev_info ;
2006-02-07 20:27:24 +11:00
wbc - > nr_to_write - - ;
2006-01-11 20:49:42 +11:00
if ( bdi_write_congested ( bdi ) ) {
wbc - > encountered_congestion = 1 ;
done = 1 ;
2006-02-07 20:27:24 +11:00
} else if ( wbc - > nr_to_write < = 0 ) {
2006-01-11 20:49:42 +11:00
done = 1 ;
}
}
2006-01-11 15:40:13 +11:00
xfs_start_page_writeback ( page , wbc , ! page_dirty , count ) ;
2005-04-16 15:20:36 -07:00
}
2006-01-11 15:40:13 +11:00
return done ;
2006-01-11 20:48:14 +11:00
fail_unlock_page :
unlock_page ( page ) ;
fail :
return 1 ;
2005-04-16 15:20:36 -07:00
}
/*
* Convert & write out a cluster of pages in the same extent as defined
* by mp and following the start page .
*/
STATIC void
xfs_cluster_write (
struct inode * inode ,
pgoff_t tindex ,
xfs_iomap_t * iomapp ,
2006-01-11 15:40:13 +11:00
xfs_ioend_t * * ioendp ,
2005-04-16 15:20:36 -07:00
struct writeback_control * wbc ,
int startio ,
int all_bh ,
pgoff_t tlast )
{
2006-01-11 20:48:14 +11:00
struct pagevec pvec ;
int done = 0 , i ;
2005-04-16 15:20:36 -07:00
2006-01-11 20:48:14 +11:00
pagevec_init ( & pvec , 0 ) ;
while ( ! done & & tindex < = tlast ) {
unsigned len = min_t ( pgoff_t , PAGEVEC_SIZE , tlast - tindex + 1 ) ;
if ( ! pagevec_lookup ( & pvec , inode - > i_mapping , tindex , len ) )
2005-04-16 15:20:36 -07:00
break ;
2006-01-11 20:48:14 +11:00
for ( i = 0 ; i < pagevec_count ( & pvec ) ; i + + ) {
done = xfs_convert_page ( inode , pvec . pages [ i ] , tindex + + ,
iomapp , ioendp , wbc , startio , all_bh ) ;
if ( done )
break ;
}
pagevec_release ( & pvec ) ;
cond_resched ( ) ;
2005-04-16 15:20:36 -07:00
}
}
/*
* Calling this without startio set means we are being asked to make a dirty
* page ready for freeing it ' s buffers . When called with startio set then
* we are coming from writepage .
*
* When called with startio set it is important that we write the WHOLE
* page if possible .
* The bh - > b_state ' s cannot know if any of the blocks or which block for
* that matter are dirty due to mmap writes , and therefore bh uptodate is
2006-03-29 08:55:14 +10:00
* only valid if the page itself isn ' t completely uptodate . Some layers
2005-04-16 15:20:36 -07:00
* may clear the page dirty flag prior to calling write page , under the
* assumption the entire page will be written out ; by not writing out the
* whole page the page can be reused before all valid dirty data is
* written out . Note : in the case of a page that has been dirty ' d by
* mapwrite and but partially setup by block_prepare_write the
* bh - > b_states ' s will not agree and only ones setup by BPW / BCW will have
* valid state , thus the whole page must be written out thing .
*/
STATIC int
xfs_page_state_convert (
struct inode * inode ,
struct page * page ,
struct writeback_control * wbc ,
int startio ,
int unmapped ) /* also implies page uptodate */
{
2006-01-11 15:40:13 +11:00
struct buffer_head * bh , * head ;
2006-01-11 20:48:33 +11:00
xfs_iomap_t iomap ;
2006-01-11 15:40:13 +11:00
xfs_ioend_t * ioend = NULL , * iohead = NULL ;
2005-04-16 15:20:36 -07:00
loff_t offset ;
unsigned long p_offset = 0 ;
2006-01-11 15:40:13 +11:00
unsigned int type ;
2005-04-16 15:20:36 -07:00
__uint64_t end_offset ;
pgoff_t end_index , last_index , tlast ;
2006-01-11 20:49:02 +11:00
ssize_t size , len ;
int flags , err , iomap_valid = 0 , uptodate = 1 ;
2006-04-11 15:10:55 +10:00
int page_dirty , count = 0 ;
int trylock = 0 ;
2006-01-11 20:49:28 +11:00
int all_bh = unmapped ;
2005-04-16 15:20:36 -07:00
2006-04-11 15:10:55 +10:00
if ( startio ) {
if ( wbc - > sync_mode = = WB_SYNC_NONE & & wbc - > nonblocking )
trylock | = BMAPI_TRYLOCK ;
}
2005-05-05 13:31:34 -07:00
2005-04-16 15:20:36 -07:00
/* Is this page beyond the end of the file? */
offset = i_size_read ( inode ) ;
end_index = offset > > PAGE_CACHE_SHIFT ;
last_index = ( offset - 1 ) > > PAGE_CACHE_SHIFT ;
if ( page - > index > = end_index ) {
if ( ( page - > index > = end_index + 1 ) | |
! ( i_size_read ( inode ) & ( PAGE_CACHE_SIZE - 1 ) ) ) {
2005-11-02 15:14:09 +11:00
if ( startio )
unlock_page ( page ) ;
return 0 ;
2005-04-16 15:20:36 -07:00
}
}
/*
2005-05-05 13:33:20 -07:00
* page_dirty is initially a count of buffers on the page before
2006-03-29 08:55:14 +10:00
* EOF and is decremented as we move each into a cleanable state .
2006-01-11 15:40:13 +11:00
*
* Derivation :
*
* End offset is the highest offset that this page should represent .
* If we are on the last page , ( end_offset & ( PAGE_CACHE_SIZE - 1 ) )
* will evaluate non - zero and be less than PAGE_CACHE_SIZE and
* hence give us the correct page_dirty count . On any other page ,
* it will be zero and in that case we need page_dirty to be the
* count of buffers on the page .
*/
end_offset = min_t ( unsigned long long ,
( xfs_off_t ) ( page - > index + 1 ) < < PAGE_CACHE_SHIFT , offset ) ;
2005-05-05 13:33:20 -07:00
len = 1 < < inode - > i_blkbits ;
2006-01-11 15:40:13 +11:00
p_offset = min_t ( unsigned long , end_offset & ( PAGE_CACHE_SIZE - 1 ) ,
PAGE_CACHE_SIZE ) ;
p_offset = p_offset ? roundup ( p_offset , len ) : PAGE_CACHE_SIZE ;
2005-05-05 13:33:20 -07:00
page_dirty = p_offset / len ;
bh = head = page_buffers ( page ) ;
2006-01-11 15:40:13 +11:00
offset = page_offset ( page ) ;
2006-01-11 20:49:28 +11:00
flags = - 1 ;
type = 0 ;
2006-01-11 15:40:13 +11:00
/* TODO: cleanup count and page_dirty */
2005-04-16 15:20:36 -07:00
do {
if ( offset > = end_offset )
break ;
if ( ! buffer_uptodate ( bh ) )
uptodate = 0 ;
2006-01-11 15:40:13 +11:00
if ( ! ( PageUptodate ( page ) | | buffer_uptodate ( bh ) ) & & ! startio ) {
2006-01-11 20:48:33 +11:00
/*
* the iomap is actually still valid , but the ioend
* isn ' t . shouldn ' t happen too often .
*/
iomap_valid = 0 ;
2005-04-16 15:20:36 -07:00
continue ;
2006-01-11 15:40:13 +11:00
}
2005-04-16 15:20:36 -07:00
2006-01-11 20:48:33 +11:00
if ( iomap_valid )
iomap_valid = xfs_iomap_valid ( & iomap , offset ) ;
2005-04-16 15:20:36 -07:00
/*
* First case , map an unwritten extent and prepare for
* extent state conversion transaction on completion .
2006-01-11 15:40:13 +11:00
*
2005-04-16 15:20:36 -07:00
* Second case , allocate space for a delalloc buffer .
* We can return EAGAIN here in the release page case .
2006-01-11 20:49:02 +11:00
*
* Third case , an unmapped buffer was found , and we are
* in a path where we need to write the whole page out .
*/
if ( buffer_unwritten ( bh ) | | buffer_delay ( bh ) | |
( ( buffer_uptodate ( bh ) | | PageUptodate ( page ) ) & &
! buffer_mapped ( bh ) & & ( unmapped | | startio ) ) ) {
2006-01-11 20:49:28 +11:00
/*
* Make sure we don ' t use a read - only iomap
*/
if ( flags = = BMAPI_READ )
iomap_valid = 0 ;
2006-01-11 15:40:13 +11:00
if ( buffer_unwritten ( bh ) ) {
type = IOMAP_UNWRITTEN ;
2006-04-11 15:10:55 +10:00
flags = BMAPI_WRITE | BMAPI_IGNSTATE ;
2006-01-11 20:49:02 +11:00
} else if ( buffer_delay ( bh ) ) {
2006-01-11 15:40:13 +11:00
type = IOMAP_DELAY ;
2006-04-11 15:10:55 +10:00
flags = BMAPI_ALLOCATE | trylock ;
2006-01-11 20:49:02 +11:00
} else {
2006-01-11 20:49:28 +11:00
type = IOMAP_NEW ;
2006-04-11 15:10:55 +10:00
flags = BMAPI_WRITE | BMAPI_MMAP ;
2006-01-11 15:40:13 +11:00
}
2006-01-11 20:48:33 +11:00
if ( ! iomap_valid ) {
2006-01-11 20:49:28 +11:00
if ( type = = IOMAP_NEW ) {
size = xfs_probe_cluster ( inode ,
page , bh , head , 0 ) ;
2006-01-11 20:49:02 +11:00
} else {
size = len ;
}
err = xfs_map_blocks ( inode , offset , size ,
& iomap , flags ) ;
2006-01-11 15:40:13 +11:00
if ( err )
2005-04-16 15:20:36 -07:00
goto error ;
2006-01-11 20:48:33 +11:00
iomap_valid = xfs_iomap_valid ( & iomap , offset ) ;
2005-04-16 15:20:36 -07:00
}
2006-01-11 20:48:33 +11:00
if ( iomap_valid ) {
xfs_map_at_offset ( bh , offset ,
inode - > i_blkbits , & iomap ) ;
2005-04-16 15:20:36 -07:00
if ( startio ) {
2006-01-11 20:49:16 +11:00
xfs_add_to_ioend ( inode , bh , offset ,
2006-01-11 20:48:33 +11:00
type , & ioend ,
! iomap_valid ) ;
2005-04-16 15:20:36 -07:00
} else {
set_buffer_dirty ( bh ) ;
unlock_buffer ( bh ) ;
mark_buffer_dirty ( bh ) ;
}
page_dirty - - ;
2006-01-11 15:40:13 +11:00
count + + ;
2005-04-16 15:20:36 -07:00
}
2006-01-11 20:49:02 +11:00
} else if ( buffer_uptodate ( bh ) & & startio ) {
2006-01-11 20:49:28 +11:00
/*
* we got here because the buffer is already mapped .
* That means it must already have extents allocated
* underneath it . Map the extent by reading it .
*/
if ( ! iomap_valid | | type ! = 0 ) {
flags = BMAPI_READ ;
size = xfs_probe_cluster ( inode , page , bh ,
head , 1 ) ;
err = xfs_map_blocks ( inode , offset , size ,
& iomap , flags ) ;
if ( err )
goto error ;
iomap_valid = xfs_iomap_valid ( & iomap , offset ) ;
}
2006-01-11 20:49:02 +11:00
2006-01-11 20:49:28 +11:00
type = 0 ;
2006-01-11 20:49:02 +11:00
if ( ! test_and_set_bit ( BH_Lock , & bh - > b_state ) ) {
ASSERT ( buffer_mapped ( bh ) ) ;
2006-01-11 20:49:28 +11:00
if ( iomap_valid )
all_bh = 1 ;
2006-01-11 20:49:16 +11:00
xfs_add_to_ioend ( inode , bh , offset , type ,
2006-01-11 20:49:02 +11:00
& ioend , ! iomap_valid ) ;
page_dirty - - ;
count + + ;
2006-01-11 15:40:13 +11:00
} else {
2006-01-11 20:48:33 +11:00
iomap_valid = 0 ;
2005-04-16 15:20:36 -07:00
}
2006-01-11 20:49:02 +11:00
} else if ( ( buffer_uptodate ( bh ) | | PageUptodate ( page ) ) & &
( unmapped | | startio ) ) {
iomap_valid = 0 ;
2005-04-16 15:20:36 -07:00
}
2006-01-11 15:40:13 +11:00
if ( ! iohead )
iohead = ioend ;
} while ( offset + = len , ( ( bh = bh - > b_this_page ) ! = head ) ) ;
2005-04-16 15:20:36 -07:00
if ( uptodate & & bh = = head )
SetPageUptodate ( page ) ;
2006-01-11 15:40:13 +11:00
if ( startio )
xfs_start_page_writeback ( page , wbc , 1 , count ) ;
2005-04-16 15:20:36 -07:00
2006-01-11 20:48:33 +11:00
if ( ioend & & iomap_valid ) {
offset = ( iomap . iomap_offset + iomap . iomap_bsize - 1 ) > >
2005-04-16 15:20:36 -07:00
PAGE_CACHE_SHIFT ;
2005-05-05 13:33:01 -07:00
tlast = min_t ( pgoff_t , offset , last_index ) ;
2006-01-11 20:48:33 +11:00
xfs_cluster_write ( inode , page - > index + 1 , & iomap , & ioend ,
2006-01-11 20:49:28 +11:00
wbc , startio , all_bh , tlast ) ;
2005-04-16 15:20:36 -07:00
}
2006-01-11 15:40:13 +11:00
if ( iohead )
xfs_submit_ioend ( iohead ) ;
2005-04-16 15:20:36 -07:00
return page_dirty ;
error :
2006-01-11 15:40:13 +11:00
if ( iohead )
xfs_cancel_ioend ( iohead ) ;
2005-04-16 15:20:36 -07:00
/*
* If it ' s delalloc and we have nowhere to put it ,
* throw it away , unless the lower layers told
* us to try again .
*/
if ( err ! = - EAGAIN ) {
2006-01-11 15:40:13 +11:00
if ( ! unmapped )
2005-04-16 15:20:36 -07:00
block_invalidatepage ( page , 0 ) ;
ClearPageUptodate ( page ) ;
}
return err ;
}
2006-03-14 13:26:27 +11:00
/*
* writepage : Called from one of two places :
*
* 1. we are flushing a delalloc buffer head .
*
* 2. we are writing out a dirty page . Typically the page dirty
* state is cleared before we get here . In this case is it
* conceivable we have no buffer heads .
*
* For delalloc space on the page we need to allocate space and
* flush it . For unmapped buffer heads on the page we should
* allocate space if the page is uptodate . For any other dirty
* buffer heads on the page we should flush them .
*
* If we detect that a transaction would be required to flush
* the page , we have to check the process flags first , if we
* are already in a transaction or disk I / O during allocations
* is off , we need to fail the writepage and redirty the page .
*/
STATIC int
2006-03-14 13:54:26 +11:00
xfs_vm_writepage (
2006-03-14 13:26:27 +11:00
struct page * page ,
struct writeback_control * wbc )
{
int error ;
int need_trans ;
int delalloc , unmapped , unwritten ;
struct inode * inode = page - > mapping - > host ;
xfs_page_trace ( XFS_WRITEPAGE_ENTER , inode , page , 0 ) ;
/*
* We need a transaction if :
* 1. There are delalloc buffers on the page
* 2. The page is uptodate and we have unmapped buffers
* 3. The page is uptodate and we have no buffers
* 4. There are unwritten buffers on the page
*/
if ( ! page_has_buffers ( page ) ) {
unmapped = 1 ;
need_trans = 1 ;
} else {
xfs_count_page_state ( page , & delalloc , & unmapped , & unwritten ) ;
if ( ! PageUptodate ( page ) )
unmapped = 0 ;
need_trans = delalloc + unmapped + unwritten ;
}
/*
* If we need a transaction and the process flags say
* we are already in a transaction , or no IO is allowed
* then mark the page dirty again and leave the page
* as is .
*/
2006-06-09 14:59:13 +10:00
if ( current_test_flags ( PF_FSTRANS ) & & need_trans )
2006-03-14 13:26:27 +11:00
goto out_fail ;
/*
* Delay hooking up buffer heads until we have
* made our go / no - go decision .
*/
if ( ! page_has_buffers ( page ) )
create_empty_buffers ( page , 1 < < inode - > i_blkbits , 0 ) ;
/*
* Convert delayed allocate , unwritten or unmapped space
* to real space and flush out to disk .
*/
error = xfs_page_state_convert ( inode , page , wbc , 1 , unmapped ) ;
if ( error = = - EAGAIN )
goto out_fail ;
if ( unlikely ( error < 0 ) )
goto out_unlock ;
return 0 ;
out_fail :
redirty_page_for_writepage ( wbc , page ) ;
unlock_page ( page ) ;
return 0 ;
out_unlock :
unlock_page ( page ) ;
return error ;
}
2006-06-09 15:27:16 +10:00
STATIC int
xfs_vm_writepages (
struct address_space * mapping ,
struct writeback_control * wbc )
{
2006-06-09 17:00:52 +10:00
struct bhv_vnode * vp = vn_from_inode ( mapping - > host ) ;
2006-06-09 15:27:16 +10:00
if ( VN_TRUNC ( vp ) )
VUNTRUNCATE ( vp ) ;
return generic_writepages ( mapping , wbc ) ;
}
2006-03-14 13:26:27 +11:00
/*
* Called to move a page into cleanable state - and from there
* to be released . Possibly the page is already clean . We always
* have buffer heads in this call .
*
* Returns 0 if the page is ok to release , 1 otherwise .
*
* Possible scenarios are :
*
* 1. We are being called to release a page which has been written
* to via regular I / O . buffer heads will be dirty and possibly
* delalloc . If no delalloc buffer heads in this case then we
* can just return zero .
*
* 2. We are called to release a page which has been written via
* mmap , all we need to do is ensure there is no delalloc
* state in the buffer heads , if not we can let the caller
* free them and we should come back later via writepage .
*/
STATIC int
2006-03-17 17:26:25 +11:00
xfs_vm_releasepage (
2006-03-14 13:26:27 +11:00
struct page * page ,
gfp_t gfp_mask )
{
struct inode * inode = page - > mapping - > host ;
int dirty , delalloc , unmapped , unwritten ;
struct writeback_control wbc = {
. sync_mode = WB_SYNC_ALL ,
. nr_to_write = 1 ,
} ;
2006-09-28 10:56:43 +10:00
xfs_page_trace ( XFS_RELEASEPAGE_ENTER , inode , page , 0 ) ;
2006-03-14 13:26:27 +11:00
2006-03-17 17:26:25 +11:00
if ( ! page_has_buffers ( page ) )
return 0 ;
2006-03-14 13:26:27 +11:00
xfs_count_page_state ( page , & delalloc , & unmapped , & unwritten ) ;
if ( ! delalloc & & ! unwritten )
goto free_buffers ;
if ( ! ( gfp_mask & __GFP_FS ) )
return 0 ;
/* If we are already inside a transaction or the thread cannot
* do I / O , we cannot release this page .
*/
2006-06-09 14:59:13 +10:00
if ( current_test_flags ( PF_FSTRANS ) )
2006-03-14 13:26:27 +11:00
return 0 ;
/*
* Convert delalloc space to real space , do not flush the
* data out to disk , that will be done by the caller .
* Never need to allocate space here - we will always
* come back to writepage in that case .
*/
dirty = xfs_page_state_convert ( inode , page , & wbc , 0 , 0 ) ;
if ( dirty = = 0 & & ! unwritten )
goto free_buffers ;
return 0 ;
free_buffers :
return try_to_free_buffers ( page ) ;
}
2005-04-16 15:20:36 -07:00
STATIC int
2006-03-29 10:44:40 +10:00
__xfs_get_blocks (
2005-04-16 15:20:36 -07:00
struct inode * inode ,
sector_t iblock ,
struct buffer_head * bh_result ,
int create ,
int direct ,
bmapi_flags_t flags )
{
2006-06-09 17:00:52 +10:00
bhv_vnode_t * vp = vn_from_inode ( inode ) ;
2005-04-16 15:20:36 -07:00
xfs_iomap_t iomap ;
2005-11-02 15:13:13 +11:00
xfs_off_t offset ;
ssize_t size ;
2006-03-29 10:44:40 +10:00
int niomap = 1 ;
2005-04-16 15:20:36 -07:00
int error ;
2005-11-02 15:13:13 +11:00
offset = ( xfs_off_t ) iblock < < inode - > i_blkbits ;
2006-03-29 10:44:40 +10:00
ASSERT ( bh_result - > b_size > = ( 1 < < inode - > i_blkbits ) ) ;
size = bh_result - > b_size ;
2006-06-09 17:00:52 +10:00
error = bhv_vop_bmap ( vp , offset , size ,
create ? flags : BMAPI_READ , & iomap , & niomap ) ;
2005-04-16 15:20:36 -07:00
if ( error )
return - error ;
2006-03-29 10:44:40 +10:00
if ( niomap = = 0 )
2005-04-16 15:20:36 -07:00
return 0 ;
if ( iomap . iomap_bn ! = IOMAP_DADDR_NULL ) {
2006-03-14 13:26:43 +11:00
/*
* For unwritten extents do not report a disk address on
2005-04-16 15:20:36 -07:00
* the read case ( treat as if we ' re reading into a hole ) .
*/
if ( create | | ! ( iomap . iomap_flags & IOMAP_UNWRITTEN ) ) {
2006-03-14 13:26:43 +11:00
xfs_map_buffer ( bh_result , & iomap , offset ,
inode - > i_blkbits ) ;
2005-04-16 15:20:36 -07:00
}
if ( create & & ( iomap . iomap_flags & IOMAP_UNWRITTEN ) ) {
if ( direct )
bh_result - > b_private = inode ;
set_buffer_unwritten ( bh_result ) ;
set_buffer_delay ( bh_result ) ;
}
}
2006-03-29 10:44:40 +10:00
/*
* If this is a realtime file , data may be on a different device .
* to that pointed to from the buffer_head b_bdev currently .
*/
2006-01-11 15:39:08 +11:00
bh_result - > b_bdev = iomap . iomap_target - > bt_bdev ;
2005-04-16 15:20:36 -07:00
2006-03-29 10:44:40 +10:00
/*
* If we previously allocated a block out beyond eof and we are
* now coming back to use it then we will need to flag it as new
* even if it has a disk address .
2005-04-16 15:20:36 -07:00
*/
if ( create & &
( ( ! buffer_mapped ( bh_result ) & & ! buffer_uptodate ( bh_result ) ) | |
2005-11-02 15:13:13 +11:00
( offset > = i_size_read ( inode ) ) | | ( iomap . iomap_flags & IOMAP_NEW ) ) )
2005-04-16 15:20:36 -07:00
set_buffer_new ( bh_result ) ;
if ( iomap . iomap_flags & IOMAP_DELAY ) {
BUG_ON ( direct ) ;
if ( create ) {
set_buffer_uptodate ( bh_result ) ;
set_buffer_mapped ( bh_result ) ;
set_buffer_delay ( bh_result ) ;
}
}
2006-03-29 10:44:40 +10:00
if ( direct | | size > ( 1 < < inode - > i_blkbits ) ) {
2005-11-02 15:13:13 +11:00
ASSERT ( iomap . iomap_bsize - iomap . iomap_delta > 0 ) ;
offset = min_t ( xfs_off_t ,
2006-03-29 10:44:40 +10:00
iomap . iomap_bsize - iomap . iomap_delta , size ) ;
bh_result - > b_size = ( ssize_t ) min_t ( xfs_off_t , LONG_MAX , offset ) ;
2005-04-16 15:20:36 -07:00
}
return 0 ;
}
int
2006-03-29 10:44:40 +10:00
xfs_get_blocks (
2005-04-16 15:20:36 -07:00
struct inode * inode ,
sector_t iblock ,
struct buffer_head * bh_result ,
int create )
{
2006-03-29 10:44:40 +10:00
return __xfs_get_blocks ( inode , iblock ,
2006-03-26 01:38:01 -08:00
bh_result , create , 0 , BMAPI_WRITE ) ;
2005-04-16 15:20:36 -07:00
}
STATIC int
2006-03-14 13:54:26 +11:00
xfs_get_blocks_direct (
2005-04-16 15:20:36 -07:00
struct inode * inode ,
sector_t iblock ,
struct buffer_head * bh_result ,
int create )
{
2006-03-29 10:44:40 +10:00
return __xfs_get_blocks ( inode , iblock ,
2006-03-26 01:38:02 -08:00
bh_result , create , 1 , BMAPI_WRITE | BMAPI_DIRECT ) ;
2005-04-16 15:20:36 -07:00
}
2005-09-05 08:22:52 +10:00
STATIC void
2006-03-14 13:54:26 +11:00
xfs_end_io_direct (
2005-09-05 08:22:52 +10:00
struct kiocb * iocb ,
loff_t offset ,
ssize_t size ,
void * private )
{
xfs_ioend_t * ioend = iocb - > private ;
/*
* Non - NULL private data means we need to issue a transaction to
* convert a range from unwritten to written extents . This needs
2006-03-29 08:55:14 +10:00
* to happen from process context but aio + dio I / O completion
2005-09-05 08:22:52 +10:00
* happens from irq context so we need to defer it to a workqueue .
2006-03-29 08:55:14 +10:00
* This is not necessary for synchronous direct I / O , but we do
2005-09-05 08:22:52 +10:00
* it anyway to keep the code uniform and simpler .
*
* The core direct I / O code might be changed to always call the
* completion handler in the future , in which case all this can
* go away .
*/
if ( private & & size > 0 ) {
ioend - > io_offset = offset ;
ioend - > io_size = size ;
xfs_finish_ioend ( ioend ) ;
} else {
xfs_destroy_ioend ( ioend ) ;
}
/*
2006-03-29 08:55:14 +10:00
* blockdev_direct_IO can return an error even after the I / O
2005-09-05 08:22:52 +10:00
* completion handler was called . Thus we need to protect
* against double - freeing .
*/
iocb - > private = NULL ;
}
2005-04-16 15:20:36 -07:00
STATIC ssize_t
2006-03-14 13:54:26 +11:00
xfs_vm_direct_IO (
2005-04-16 15:20:36 -07:00
int rw ,
struct kiocb * iocb ,
const struct iovec * iov ,
loff_t offset ,
unsigned long nr_segs )
{
struct file * file = iocb - > ki_filp ;
struct inode * inode = file - > f_mapping - > host ;
2006-06-09 17:00:52 +10:00
bhv_vnode_t * vp = vn_from_inode ( inode ) ;
2005-04-16 15:20:36 -07:00
xfs_iomap_t iomap ;
int maps = 1 ;
int error ;
2005-09-05 08:22:52 +10:00
ssize_t ret ;
2005-04-16 15:20:36 -07:00
2006-06-09 17:00:52 +10:00
error = bhv_vop_bmap ( vp , offset , 0 , BMAPI_DEVICE , & iomap , & maps ) ;
2005-04-16 15:20:36 -07:00
if ( error )
return - error ;
2006-01-11 15:40:13 +11:00
iocb - > private = xfs_alloc_ioend ( inode , IOMAP_UNWRITTEN ) ;
2005-09-05 08:22:52 +10:00
2006-09-07 14:27:05 +10:00
if ( rw = = WRITE ) {
ret = blockdev_direct_IO_own_locking ( rw , iocb , inode ,
iomap . iomap_target - > bt_bdev ,
iov , offset , nr_segs ,
xfs_get_blocks_direct ,
xfs_end_io_direct ) ;
} else {
ret = blockdev_direct_IO_no_locking ( rw , iocb , inode ,
iomap . iomap_target - > bt_bdev ,
iov , offset , nr_segs ,
xfs_get_blocks_direct ,
xfs_end_io_direct ) ;
}
2005-09-05 08:22:52 +10:00
2006-12-10 02:21:05 -08:00
if ( unlikely ( ret ! = - EIOCBQUEUED & & iocb - > private ) )
2005-09-05 08:22:52 +10:00
xfs_destroy_ioend ( iocb - > private ) ;
return ret ;
2005-04-16 15:20:36 -07:00
}
2006-03-14 13:26:27 +11:00
STATIC int
2006-03-14 13:54:26 +11:00
xfs_vm_prepare_write (
2006-03-14 13:26:27 +11:00
struct file * file ,
struct page * page ,
unsigned int from ,
unsigned int to )
{
2006-03-29 10:44:40 +10:00
return block_prepare_write ( page , from , to , xfs_get_blocks ) ;
2006-03-14 13:26:27 +11:00
}
2005-04-16 15:20:36 -07:00
STATIC sector_t
2006-03-14 13:54:26 +11:00
xfs_vm_bmap (
2005-04-16 15:20:36 -07:00
struct address_space * mapping ,
sector_t block )
{
struct inode * inode = ( struct inode * ) mapping - > host ;
2006-06-09 17:00:52 +10:00
bhv_vnode_t * vp = vn_from_inode ( inode ) ;
2005-04-16 15:20:36 -07:00
2006-03-14 13:54:26 +11:00
vn_trace_entry ( vp , __FUNCTION__ , ( inst_t * ) __return_address ) ;
2006-06-09 17:00:52 +10:00
bhv_vop_rwlock ( vp , VRWLOCK_READ ) ;
bhv_vop_flush_pages ( vp , ( xfs_off_t ) 0 , - 1 , 0 , FI_REMAPF ) ;
bhv_vop_rwunlock ( vp , VRWLOCK_READ ) ;
2006-03-29 10:44:40 +10:00
return generic_block_bmap ( mapping , block , xfs_get_blocks ) ;
2005-04-16 15:20:36 -07:00
}
STATIC int
2006-03-14 13:54:26 +11:00
xfs_vm_readpage (
2005-04-16 15:20:36 -07:00
struct file * unused ,
struct page * page )
{
2006-03-29 10:44:40 +10:00
return mpage_readpage ( page , xfs_get_blocks ) ;
2005-04-16 15:20:36 -07:00
}
STATIC int
2006-03-14 13:54:26 +11:00
xfs_vm_readpages (
2005-04-16 15:20:36 -07:00
struct file * unused ,
struct address_space * mapping ,
struct list_head * pages ,
unsigned nr_pages )
{
2006-03-29 10:44:40 +10:00
return mpage_readpages ( mapping , pages , nr_pages , xfs_get_blocks ) ;
2005-04-16 15:20:36 -07:00
}
2006-03-26 01:37:18 -08:00
STATIC void
2006-03-17 17:26:25 +11:00
xfs_vm_invalidatepage (
2005-09-02 16:40:17 +10:00
struct page * page ,
unsigned long offset )
{
xfs_page_trace ( XFS_INVALIDPAGE_ENTER ,
page - > mapping - > host , page , offset ) ;
2006-03-26 01:37:18 -08:00
block_invalidatepage ( page , offset ) ;
2005-09-02 16:40:17 +10:00
}
2006-06-28 04:26:44 -07:00
const struct address_space_operations xfs_address_space_operations = {
2006-03-14 13:54:26 +11:00
. readpage = xfs_vm_readpage ,
. readpages = xfs_vm_readpages ,
. writepage = xfs_vm_writepage ,
2006-06-09 15:27:16 +10:00
. writepages = xfs_vm_writepages ,
2005-04-16 15:20:36 -07:00
. sync_page = block_sync_page ,
2006-03-17 17:26:25 +11:00
. releasepage = xfs_vm_releasepage ,
. invalidatepage = xfs_vm_invalidatepage ,
2006-03-14 13:54:26 +11:00
. prepare_write = xfs_vm_prepare_write ,
2005-04-16 15:20:36 -07:00
. commit_write = generic_commit_write ,
2006-03-14 13:54:26 +11:00
. bmap = xfs_vm_bmap ,
. direct_IO = xfs_vm_direct_IO ,
2006-02-01 03:05:41 -08:00
. migratepage = buffer_migrate_page ,
2005-04-16 15:20:36 -07:00
} ;