2005-04-17 02:20:36 +04:00
/*
2005-11-02 06:58:39 +03:00
* Copyright ( c ) 2000 - 2005 Silicon Graphics , Inc .
* All Rights Reserved .
2005-04-17 02:20:36 +04:00
*
2005-11-02 06:58:39 +03:00
* This program is free software ; you can redistribute it and / or
* modify it under the terms of the GNU General Public License as
2005-04-17 02:20:36 +04:00
* published by the Free Software Foundation .
*
2005-11-02 06:58:39 +03:00
* This program is distributed in the hope that it would be useful ,
* but WITHOUT ANY WARRANTY ; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the
* GNU General Public License for more details .
2005-04-17 02:20:36 +04:00
*
2005-11-02 06:58:39 +03:00
* You should have received a copy of the GNU General Public License
* along with this program ; if not , write the Free Software Foundation ,
* Inc . , 51 Franklin St , Fifth Floor , Boston , MA 02110 - 1301 USA
2005-04-17 02:20:36 +04:00
*/
# include "xfs.h"
2010-02-15 12:44:46 +03:00
# include "xfs_fs.h"
2005-11-02 06:38:42 +03:00
# include "xfs_bit.h"
2005-04-17 02:20:36 +04:00
# include "xfs_log.h"
2005-11-02 06:38:42 +03:00
# include "xfs_inum.h"
2005-04-17 02:20:36 +04:00
# include "xfs_sb.h"
2005-11-02 06:38:42 +03:00
# include "xfs_ag.h"
2005-04-17 02:20:36 +04:00
# include "xfs_trans.h"
# include "xfs_mount.h"
# include "xfs_bmap_btree.h"
# include "xfs_alloc.h"
# include "xfs_dinode.h"
# include "xfs_inode.h"
2010-02-15 12:44:48 +03:00
# include "xfs_inode_item.h"
2010-02-15 12:44:46 +03:00
# include "xfs_bmap.h"
2005-04-17 02:20:36 +04:00
# include "xfs_error.h"
2007-08-29 04:58:01 +04:00
# include "xfs_vnodeops.h"
2008-11-28 06:23:32 +03:00
# include "xfs_da_btree.h"
2008-12-03 15:55:34 +03:00
# include "xfs_ioctl.h"
2010-02-15 12:44:46 +03:00
# include "xfs_trace.h"
2005-04-17 02:20:36 +04:00
# include <linux/dcache.h>
2011-01-14 15:07:43 +03:00
# include <linux/falloc.h>
2005-04-17 02:20:36 +04:00
2009-09-27 22:29:37 +04:00
static const struct vm_operations_struct xfs_file_vm_ops ;
2005-04-17 02:20:36 +04:00
2011-01-12 03:37:10 +03:00
/*
* Locking primitives for read and write IO paths to ensure we consistently use
* and order the inode - > i_mutex , ip - > i_lock and ip - > i_iolock .
*/
static inline void
xfs_rw_ilock (
struct xfs_inode * ip ,
int type )
{
if ( type & XFS_IOLOCK_EXCL )
mutex_lock ( & VFS_I ( ip ) - > i_mutex ) ;
xfs_ilock ( ip , type ) ;
}
static inline void
xfs_rw_iunlock (
struct xfs_inode * ip ,
int type )
{
xfs_iunlock ( ip , type ) ;
if ( type & XFS_IOLOCK_EXCL )
mutex_unlock ( & VFS_I ( ip ) - > i_mutex ) ;
}
static inline void
xfs_rw_ilock_demote (
struct xfs_inode * ip ,
int type )
{
xfs_ilock_demote ( ip , type ) ;
if ( type & XFS_IOLOCK_EXCL )
mutex_unlock ( & VFS_I ( ip ) - > i_mutex ) ;
}
2010-02-15 12:44:46 +03:00
/*
* xfs_iozero
*
* xfs_iozero clears the specified range of buffer supplied ,
* and marks all the affected blocks as valid and modified . If
* an affected block is not allocated , it will be allocated . If
* an affected block is not completely overwritten , and is not
* valid before the operation , it will be read from disk before
* being partially zeroed .
*/
STATIC int
xfs_iozero (
struct xfs_inode * ip , /* inode */
loff_t pos , /* offset in file */
size_t count ) /* size of data to zero */
{
struct page * page ;
struct address_space * mapping ;
int status ;
mapping = VFS_I ( ip ) - > i_mapping ;
do {
unsigned offset , bytes ;
void * fsdata ;
offset = ( pos & ( PAGE_CACHE_SIZE - 1 ) ) ; /* Within page */
bytes = PAGE_CACHE_SIZE - offset ;
if ( bytes > count )
bytes = count ;
status = pagecache_write_begin ( NULL , mapping , pos , bytes ,
AOP_FLAG_UNINTERRUPTIBLE ,
& page , & fsdata ) ;
if ( status )
break ;
zero_user ( page , offset , bytes ) ;
status = pagecache_write_end ( NULL , mapping , pos , bytes , bytes ,
page , fsdata ) ;
WARN_ON ( status < = 0 ) ; /* can't return less than zero! */
pos + = bytes ;
count - = bytes ;
status = 0 ;
} while ( count ) ;
return ( - status ) ;
}
2010-02-15 12:44:48 +03:00
STATIC int
xfs_file_fsync (
struct file * file ,
int datasync )
{
2010-05-26 19:53:25 +04:00
struct inode * inode = file - > f_mapping - > host ;
struct xfs_inode * ip = XFS_I ( inode ) ;
2011-06-16 16:02:23 +04:00
struct xfs_mount * mp = ip - > i_mount ;
2010-02-15 12:44:48 +03:00
struct xfs_trans * tp ;
int error = 0 ;
int log_flushed = 0 ;
2010-06-24 05:57:09 +04:00
trace_xfs_file_fsync ( ip ) ;
2010-02-15 12:44:48 +03:00
2011-06-16 16:02:23 +04:00
if ( XFS_FORCED_SHUTDOWN ( mp ) )
2010-02-15 12:44:48 +03:00
return - XFS_ERROR ( EIO ) ;
xfs_iflags_clear ( ip , XFS_ITRUNCATED ) ;
2010-04-20 11:00:59 +04:00
xfs_ioend_wait ( ip ) ;
2011-06-16 16:02:23 +04:00
if ( mp - > m_flags & XFS_MOUNT_BARRIER ) {
/*
* If we have an RT and / or log subvolume we need to make sure
* to flush the write cache the device used for file data
* first . This is to ensure newly written file data make
* it to disk before logging the new inode size in case of
* an extending write .
*/
if ( XFS_IS_REALTIME_INODE ( ip ) )
xfs_blkdev_issue_flush ( mp - > m_rtdev_targp ) ;
else if ( mp - > m_logdev_targp ! = mp - > m_ddev_targp )
xfs_blkdev_issue_flush ( mp - > m_ddev_targp ) ;
}
2010-02-15 12:44:48 +03:00
/*
* We always need to make sure that the required inode state is safe on
* disk . The inode might be clean but we still might need to force the
* log because of committed transactions that haven ' t hit the disk yet .
* Likewise , there could be unflushed non - transactional changes to the
* inode core that have to go to disk and this requires us to issue
* a synchronous transaction to capture these changes correctly .
*
* This code relies on the assumption that if the i_update_core field
* of the inode is clear and the inode is unpinned then it is clean
* and no action is required .
*/
xfs_ilock ( ip , XFS_ILOCK_SHARED ) ;
2010-02-15 12:44:49 +03:00
/*
* First check if the VFS inode is marked dirty . All the dirtying
* of non - transactional updates no goes through mark_inode_dirty * ,
* which allows us to distinguish beteeen pure timestamp updates
* and i_size updates which need to be caught for fdatasync .
* After that also theck for the dirty state in the XFS inode , which
* might gets cleared when the inode gets written out via the AIL
* or xfs_iflush_cluster .
*/
2010-05-26 19:53:25 +04:00
if ( ( ( inode - > i_state & I_DIRTY_DATASYNC ) | |
( ( inode - > i_state & I_DIRTY_SYNC ) & & ! datasync ) ) & &
2010-02-15 12:44:49 +03:00
ip - > i_update_core ) {
2010-02-15 12:44:48 +03:00
/*
* Kick off a transaction to log the inode core to get the
* updates . The sync transaction will also force the log .
*/
xfs_iunlock ( ip , XFS_ILOCK_SHARED ) ;
2011-06-16 16:02:23 +04:00
tp = xfs_trans_alloc ( mp , XFS_TRANS_FSYNC_TS ) ;
2010-02-15 12:44:48 +03:00
error = xfs_trans_reserve ( tp , 0 ,
2011-06-16 16:02:23 +04:00
XFS_FSYNC_TS_LOG_RES ( mp ) , 0 , 0 , 0 ) ;
2010-02-15 12:44:48 +03:00
if ( error ) {
xfs_trans_cancel ( tp , 0 ) ;
return - error ;
}
xfs_ilock ( ip , XFS_ILOCK_EXCL ) ;
/*
* Note - it ' s possible that we might have pushed ourselves out
* of the way during trans_reserve which would flush the inode .
* But there ' s no guarantee that the inode buffer has actually
* gone out yet ( it ' s delwri ) . Plus the buffer could be pinned
* anyway if it ' s part of an inode in another recent
* transaction . So we play it safe and fire off the
* transaction anyway .
*/
2010-06-24 05:36:58 +04:00
xfs_trans_ijoin ( tp , ip ) ;
2010-02-15 12:44:48 +03:00
xfs_trans_log_inode ( tp , ip , XFS_ILOG_CORE ) ;
xfs_trans_set_sync ( tp ) ;
error = _xfs_trans_commit ( tp , 0 , & log_flushed ) ;
xfs_iunlock ( ip , XFS_ILOCK_EXCL ) ;
} else {
/*
* Timestamps / size haven ' t changed since last inode flush or
* inode transaction commit . That means either nothing got
* written or a transaction committed which caught the updates .
* If the latter happened and the transaction hasn ' t hit the
* disk yet , the inode will be still be pinned . If it is ,
* force the log .
*/
if ( xfs_ipincount ( ip ) ) {
2011-06-16 16:02:23 +04:00
error = _xfs_log_force_lsn ( mp ,
2010-02-17 22:34:57 +03:00
ip - > i_itemp - > ili_last_lsn ,
XFS_LOG_SYNC , & log_flushed ) ;
2010-02-15 12:44:48 +03:00
}
2010-02-17 22:34:57 +03:00
xfs_iunlock ( ip , XFS_ILOCK_SHARED ) ;
2010-02-15 12:44:48 +03:00
}
2011-06-16 16:02:23 +04:00
/*
* If we only have a single device , and the log force about was
* a no - op we might have to flush the data device cache here .
* This can only happen for fdatasync / O_DSYNC if we were overwriting
* an already allocated file and thus do not have any metadata to
* commit .
*/
if ( ( mp - > m_flags & XFS_MOUNT_BARRIER ) & &
mp - > m_logdev_targp = = mp - > m_ddev_targp & &
! XFS_IS_REALTIME_INODE ( ip ) & &
! log_flushed )
xfs_blkdev_issue_flush ( mp - > m_ddev_targp ) ;
2010-02-15 12:44:48 +03:00
return - error ;
}
2010-02-15 12:44:47 +03:00
STATIC ssize_t
xfs_file_aio_read (
2010-02-15 12:44:46 +03:00
struct kiocb * iocb ,
const struct iovec * iovp ,
2010-02-15 12:44:47 +03:00
unsigned long nr_segs ,
loff_t pos )
2010-02-15 12:44:46 +03:00
{
struct file * file = iocb - > ki_filp ;
struct inode * inode = file - > f_mapping - > host ;
2010-02-15 12:44:47 +03:00
struct xfs_inode * ip = XFS_I ( inode ) ;
struct xfs_mount * mp = ip - > i_mount ;
2010-02-15 12:44:46 +03:00
size_t size = 0 ;
ssize_t ret = 0 ;
2010-02-15 12:44:47 +03:00
int ioflags = 0 ;
2010-02-15 12:44:46 +03:00
xfs_fsize_t n ;
unsigned long seg ;
XFS_STATS_INC ( xs_read_calls ) ;
2010-02-15 12:44:47 +03:00
BUG_ON ( iocb - > ki_pos ! = pos ) ;
if ( unlikely ( file - > f_flags & O_DIRECT ) )
ioflags | = IO_ISDIRECT ;
if ( file - > f_mode & FMODE_NOCMTIME )
ioflags | = IO_INVIS ;
2010-02-15 12:44:46 +03:00
/* START copy & waste from filemap.c */
2010-02-15 12:44:47 +03:00
for ( seg = 0 ; seg < nr_segs ; seg + + ) {
2010-02-15 12:44:46 +03:00
const struct iovec * iv = & iovp [ seg ] ;
/*
* If any segment has a negative length , or the cumulative
* length ever wraps negative then return - EINVAL .
*/
size + = iv - > iov_len ;
if ( unlikely ( ( ssize_t ) ( size | iv - > iov_len ) < 0 ) )
return XFS_ERROR ( - EINVAL ) ;
}
/* END copy & waste from filemap.c */
if ( unlikely ( ioflags & IO_ISDIRECT ) ) {
xfs_buftarg_t * target =
XFS_IS_REALTIME_INODE ( ip ) ?
mp - > m_rtdev_targp : mp - > m_ddev_targp ;
2010-02-15 12:44:47 +03:00
if ( ( iocb - > ki_pos & target - > bt_smask ) | |
2010-02-15 12:44:46 +03:00
( size & target - > bt_smask ) ) {
2010-02-15 12:44:47 +03:00
if ( iocb - > ki_pos = = ip - > i_size )
return 0 ;
2010-02-15 12:44:46 +03:00
return - XFS_ERROR ( EINVAL ) ;
}
}
2010-02-15 12:44:47 +03:00
n = XFS_MAXIOFFSET ( mp ) - iocb - > ki_pos ;
if ( n < = 0 | | size = = 0 )
2010-02-15 12:44:46 +03:00
return 0 ;
if ( n < size )
size = n ;
if ( XFS_FORCED_SHUTDOWN ( mp ) )
return - EIO ;
if ( unlikely ( ioflags & IO_ISDIRECT ) ) {
2011-01-12 03:37:10 +03:00
xfs_rw_ilock ( ip , XFS_IOLOCK_EXCL ) ;
2010-02-15 12:44:47 +03:00
if ( inode - > i_mapping - > nrpages ) {
ret = - xfs_flushinval_pages ( ip ,
( iocb - > ki_pos & PAGE_CACHE_MASK ) ,
- 1 , FI_REMAPF_LOCKED ) ;
2011-01-12 03:37:10 +03:00
if ( ret ) {
xfs_rw_iunlock ( ip , XFS_IOLOCK_EXCL ) ;
return ret ;
}
2010-02-15 12:44:47 +03:00
}
2011-01-12 03:37:10 +03:00
xfs_rw_ilock_demote ( ip , XFS_IOLOCK_EXCL ) ;
} else
xfs_rw_ilock ( ip , XFS_IOLOCK_SHARED ) ;
2010-02-15 12:44:46 +03:00
2010-02-15 12:44:47 +03:00
trace_xfs_file_read ( ip , size , iocb - > ki_pos , ioflags ) ;
2010-02-15 12:44:46 +03:00
2010-02-15 12:44:47 +03:00
ret = generic_file_aio_read ( iocb , iovp , nr_segs , iocb - > ki_pos ) ;
2010-02-15 12:44:46 +03:00
if ( ret > 0 )
XFS_STATS_ADD ( xs_read_bytes , ret ) ;
2011-01-12 03:37:10 +03:00
xfs_rw_iunlock ( ip , XFS_IOLOCK_SHARED ) ;
2010-02-15 12:44:46 +03:00
return ret ;
}
2010-02-15 12:44:47 +03:00
STATIC ssize_t
xfs_file_splice_read (
2010-02-15 12:44:46 +03:00
struct file * infilp ,
loff_t * ppos ,
struct pipe_inode_info * pipe ,
size_t count ,
2010-02-15 12:44:47 +03:00
unsigned int flags )
2010-02-15 12:44:46 +03:00
{
2010-02-15 12:44:47 +03:00
struct xfs_inode * ip = XFS_I ( infilp - > f_mapping - > host ) ;
int ioflags = 0 ;
2010-02-15 12:44:46 +03:00
ssize_t ret ;
XFS_STATS_INC ( xs_read_calls ) ;
2010-02-15 12:44:47 +03:00
if ( infilp - > f_mode & FMODE_NOCMTIME )
ioflags | = IO_INVIS ;
2010-02-15 12:44:46 +03:00
if ( XFS_FORCED_SHUTDOWN ( ip - > i_mount ) )
return - EIO ;
2011-01-12 03:37:10 +03:00
xfs_rw_ilock ( ip , XFS_IOLOCK_SHARED ) ;
2010-02-15 12:44:46 +03:00
trace_xfs_file_splice_read ( ip , count , * ppos , ioflags ) ;
ret = generic_file_splice_read ( infilp , ppos , pipe , count , flags ) ;
if ( ret > 0 )
XFS_STATS_ADD ( xs_read_bytes , ret ) ;
2011-01-12 03:37:10 +03:00
xfs_rw_iunlock ( ip , XFS_IOLOCK_SHARED ) ;
2010-02-15 12:44:46 +03:00
return ret ;
}
2011-01-11 02:14:06 +03:00
STATIC void
xfs_aio_write_isize_update (
struct inode * inode ,
loff_t * ppos ,
ssize_t bytes_written )
{
struct xfs_inode * ip = XFS_I ( inode ) ;
xfs_fsize_t isize = i_size_read ( inode ) ;
if ( bytes_written > 0 )
XFS_STATS_ADD ( xs_write_bytes , bytes_written ) ;
if ( unlikely ( bytes_written < 0 & & bytes_written ! = - EFAULT & &
* ppos > isize ) )
* ppos = isize ;
if ( * ppos > ip - > i_size ) {
2011-01-12 03:37:10 +03:00
xfs_rw_ilock ( ip , XFS_ILOCK_EXCL ) ;
2011-01-11 02:14:06 +03:00
if ( * ppos > ip - > i_size )
ip - > i_size = * ppos ;
2011-01-12 03:37:10 +03:00
xfs_rw_iunlock ( ip , XFS_ILOCK_EXCL ) ;
2011-01-11 02:14:06 +03:00
}
}
2011-01-11 02:14:16 +03:00
/*
* If this was a direct or synchronous I / O that failed ( such as ENOSPC ) then
2011-03-31 05:57:33 +04:00
* part of the I / O may have been written to disk before the error occurred . In
2011-01-11 02:14:16 +03:00
* this case the on - disk file size may have been adjusted beyond the in - memory
* file size and now needs to be truncated back .
*/
STATIC void
xfs_aio_write_newsize_update (
struct xfs_inode * ip )
{
if ( ip - > i_new_size ) {
2011-01-12 03:37:10 +03:00
xfs_rw_ilock ( ip , XFS_ILOCK_EXCL ) ;
2011-01-11 02:14:16 +03:00
ip - > i_new_size = 0 ;
if ( ip - > i_d . di_size > ip - > i_size )
ip - > i_d . di_size = ip - > i_size ;
2011-01-12 03:37:10 +03:00
xfs_rw_iunlock ( ip , XFS_ILOCK_EXCL ) ;
2011-01-11 02:14:16 +03:00
}
}
2011-01-12 03:37:10 +03:00
/*
* xfs_file_splice_write ( ) does not use xfs_rw_ilock ( ) because
* generic_file_splice_write ( ) takes the i_mutex itself . This , in theory ,
* couuld cause lock inversions between the aio_write path and the splice path
* if someone is doing concurrent splice ( 2 ) based writes and write ( 2 ) based
* writes to the same inode . The only real way to fix this is to re - implement
* the generic code here with correct locking orders .
*/
2010-02-15 12:44:47 +03:00
STATIC ssize_t
xfs_file_splice_write (
2010-02-15 12:44:46 +03:00
struct pipe_inode_info * pipe ,
struct file * outfilp ,
loff_t * ppos ,
size_t count ,
2010-02-15 12:44:47 +03:00
unsigned int flags )
2010-02-15 12:44:46 +03:00
{
struct inode * inode = outfilp - > f_mapping - > host ;
2010-02-15 12:44:47 +03:00
struct xfs_inode * ip = XFS_I ( inode ) ;
2011-01-11 02:14:06 +03:00
xfs_fsize_t new_size ;
2010-02-15 12:44:47 +03:00
int ioflags = 0 ;
ssize_t ret ;
2010-02-15 12:44:46 +03:00
XFS_STATS_INC ( xs_write_calls ) ;
2010-02-15 12:44:47 +03:00
if ( outfilp - > f_mode & FMODE_NOCMTIME )
ioflags | = IO_INVIS ;
2010-02-15 12:44:46 +03:00
if ( XFS_FORCED_SHUTDOWN ( ip - > i_mount ) )
return - EIO ;
xfs_ilock ( ip , XFS_IOLOCK_EXCL ) ;
new_size = * ppos + count ;
xfs_ilock ( ip , XFS_ILOCK_EXCL ) ;
if ( new_size > ip - > i_size )
ip - > i_new_size = new_size ;
xfs_iunlock ( ip , XFS_ILOCK_EXCL ) ;
trace_xfs_file_splice_write ( ip , count , * ppos , ioflags ) ;
ret = generic_file_splice_write ( pipe , outfilp , ppos , count , flags ) ;
2011-01-11 02:14:06 +03:00
xfs_aio_write_isize_update ( inode , ppos , ret ) ;
2011-01-11 02:14:16 +03:00
xfs_aio_write_newsize_update ( ip ) ;
2010-02-15 12:44:46 +03:00
xfs_iunlock ( ip , XFS_IOLOCK_EXCL ) ;
return ret ;
}
/*
* This routine is called to handle zeroing any space in the last
* block of the file that is beyond the EOF . We do this since the
* size is being increased without writing anything to that block
* and we don ' t want anyone to read the garbage on the disk .
*/
STATIC int /* error (positive) */
xfs_zero_last_block (
xfs_inode_t * ip ,
xfs_fsize_t offset ,
xfs_fsize_t isize )
{
xfs_fileoff_t last_fsb ;
xfs_mount_t * mp = ip - > i_mount ;
int nimaps ;
int zero_offset ;
int zero_len ;
int error = 0 ;
xfs_bmbt_irec_t imap ;
ASSERT ( xfs_isilocked ( ip , XFS_ILOCK_EXCL ) ) ;
zero_offset = XFS_B_FSB_OFFSET ( mp , isize ) ;
if ( zero_offset = = 0 ) {
/*
* There are no extra bytes in the last block on disk to
* zero , so return .
*/
return 0 ;
}
last_fsb = XFS_B_TO_FSBT ( mp , isize ) ;
nimaps = 1 ;
error = xfs_bmapi ( NULL , ip , last_fsb , 1 , 0 , NULL , 0 , & imap ,
2010-06-23 12:11:15 +04:00
& nimaps , NULL ) ;
2010-02-15 12:44:46 +03:00
if ( error ) {
return error ;
}
ASSERT ( nimaps > 0 ) ;
/*
* If the block underlying isize is just a hole , then there
* is nothing to zero .
*/
if ( imap . br_startblock = = HOLESTARTBLOCK ) {
return 0 ;
}
/*
* Zero the part of the last block beyond the EOF , and write it
* out sync . We need to drop the ilock while we do this so we
* don ' t deadlock when the buffer cache calls back to us .
*/
xfs_iunlock ( ip , XFS_ILOCK_EXCL ) ;
zero_len = mp - > m_sb . sb_blocksize - zero_offset ;
if ( isize + zero_len > offset )
zero_len = offset - isize ;
error = xfs_iozero ( ip , isize , zero_len ) ;
xfs_ilock ( ip , XFS_ILOCK_EXCL ) ;
ASSERT ( error > = 0 ) ;
return error ;
}
/*
* Zero any on disk space between the current EOF and the new ,
* larger EOF . This handles the normal case of zeroing the remainder
* of the last block in the file and the unusual case of zeroing blocks
* out beyond the size of the file . This second case only happens
* with fixed size extents and when the system crashes before the inode
* size was updated but after blocks were allocated . If fill is set ,
* then any holes in the range are filled and zeroed . If not , the holes
* are left alone as holes .
*/
int /* error (positive) */
xfs_zero_eof (
xfs_inode_t * ip ,
xfs_off_t offset , /* starting I/O offset */
xfs_fsize_t isize ) /* current inode size */
{
xfs_mount_t * mp = ip - > i_mount ;
xfs_fileoff_t start_zero_fsb ;
xfs_fileoff_t end_zero_fsb ;
xfs_fileoff_t zero_count_fsb ;
xfs_fileoff_t last_fsb ;
xfs_fileoff_t zero_off ;
xfs_fsize_t zero_len ;
int nimaps ;
int error = 0 ;
xfs_bmbt_irec_t imap ;
ASSERT ( xfs_isilocked ( ip , XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL ) ) ;
ASSERT ( offset > isize ) ;
/*
* First handle zeroing the block on which isize resides .
* We only zero a part of that block so it is handled specially .
*/
error = xfs_zero_last_block ( ip , offset , isize ) ;
if ( error ) {
ASSERT ( xfs_isilocked ( ip , XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL ) ) ;
return error ;
}
/*
* Calculate the range between the new size and the old
* where blocks needing to be zeroed may exist . To get the
* block where the last byte in the file currently resides ,
* we need to subtract one from the size and truncate back
* to a block boundary . We subtract 1 in case the size is
* exactly on a block boundary .
*/
last_fsb = isize ? XFS_B_TO_FSBT ( mp , isize - 1 ) : ( xfs_fileoff_t ) - 1 ;
start_zero_fsb = XFS_B_TO_FSB ( mp , ( xfs_ufsize_t ) isize ) ;
end_zero_fsb = XFS_B_TO_FSBT ( mp , offset - 1 ) ;
ASSERT ( ( xfs_sfiloff_t ) last_fsb < ( xfs_sfiloff_t ) start_zero_fsb ) ;
if ( last_fsb = = end_zero_fsb ) {
/*
* The size was only incremented on its last block .
* We took care of that above , so just return .
*/
return 0 ;
}
ASSERT ( start_zero_fsb < = end_zero_fsb ) ;
while ( start_zero_fsb < = end_zero_fsb ) {
nimaps = 1 ;
zero_count_fsb = end_zero_fsb - start_zero_fsb + 1 ;
error = xfs_bmapi ( NULL , ip , start_zero_fsb , zero_count_fsb ,
2010-06-23 12:11:15 +04:00
0 , NULL , 0 , & imap , & nimaps , NULL ) ;
2010-02-15 12:44:46 +03:00
if ( error ) {
ASSERT ( xfs_isilocked ( ip , XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL ) ) ;
return error ;
}
ASSERT ( nimaps > 0 ) ;
if ( imap . br_state = = XFS_EXT_UNWRITTEN | |
imap . br_startblock = = HOLESTARTBLOCK ) {
/*
* This loop handles initializing pages that were
* partially initialized by the code below this
* loop . It basically zeroes the part of the page
* that sits on a hole and sets the page as P_HOLE
* and calls remapf if it is a mapped file .
*/
start_zero_fsb = imap . br_startoff + imap . br_blockcount ;
ASSERT ( start_zero_fsb < = ( end_zero_fsb + 1 ) ) ;
continue ;
}
/*
* There are blocks we need to zero .
* Drop the inode lock while we ' re doing the I / O .
* We ' ll still have the iolock to protect us .
*/
xfs_iunlock ( ip , XFS_ILOCK_EXCL ) ;
zero_off = XFS_FSB_TO_B ( mp , start_zero_fsb ) ;
zero_len = XFS_FSB_TO_B ( mp , imap . br_blockcount ) ;
if ( ( zero_off + zero_len ) > offset )
zero_len = offset - zero_off ;
error = xfs_iozero ( ip , zero_off , zero_len ) ;
if ( error ) {
goto out_lock ;
}
start_zero_fsb = imap . br_startoff + imap . br_blockcount ;
ASSERT ( start_zero_fsb < = ( end_zero_fsb + 1 ) ) ;
xfs_ilock ( ip , XFS_ILOCK_EXCL ) ;
}
return 0 ;
out_lock :
xfs_ilock ( ip , XFS_ILOCK_EXCL ) ;
ASSERT ( error > = 0 ) ;
return error ;
}
2011-01-11 02:23:42 +03:00
/*
* Common pre - write limit and setup checks .
*
* Returns with iolock held according to @ iolock .
*/
STATIC ssize_t
xfs_file_aio_write_checks (
struct file * file ,
loff_t * pos ,
size_t * count ,
int * iolock )
{
struct inode * inode = file - > f_mapping - > host ;
struct xfs_inode * ip = XFS_I ( inode ) ;
xfs_fsize_t new_size ;
int error = 0 ;
error = generic_write_checks ( file , pos , count , S_ISBLK ( inode - > i_mode ) ) ;
if ( error ) {
xfs_rw_iunlock ( ip , XFS_ILOCK_EXCL | * iolock ) ;
* iolock = 0 ;
return error ;
}
new_size = * pos + * count ;
if ( new_size > ip - > i_size )
ip - > i_new_size = new_size ;
if ( likely ( ! ( file - > f_mode & FMODE_NOCMTIME ) ) )
file_update_time ( file ) ;
/*
* If the offset is beyond the size of the file , we need to zero any
* blocks that fall between the existing EOF and the start of this
* write .
*/
if ( * pos > ip - > i_size )
error = - xfs_zero_eof ( ip , * pos , ip - > i_size ) ;
xfs_rw_iunlock ( ip , XFS_ILOCK_EXCL ) ;
if ( error )
return error ;
/*
* If we ' re writing the file then make sure to clear the setuid and
* setgid bits if the process is not being run by root . This keeps
* people from modifying setuid and setgid binaries .
*/
return file_remove_suid ( file ) ;
}
2011-01-11 02:15:36 +03:00
/*
* xfs_file_dio_aio_write - handle direct IO writes
*
* Lock the inode appropriately to prepare for and issue a direct IO write .
2011-01-11 02:22:40 +03:00
* By separating it from the buffered write path we remove all the tricky to
2011-01-11 02:15:36 +03:00
* follow locking changes and looping .
*
2011-01-11 02:22:40 +03:00
* If there are cached pages or we ' re extending the file , we need IOLOCK_EXCL
* until we ' re sure the bytes at the new EOF have been zeroed and / or the cached
* pages are flushed out .
*
* In most cases the direct IO writes will be done holding IOLOCK_SHARED
* allowing them to be done in parallel with reads and other direct IO writes .
* However , if the IO is not aligned to filesystem blocks , the direct IO layer
* needs to do sub - block zeroing and that requires serialisation against other
* direct IOs to the same block . In this case we need to serialise the
* submission of the unaligned IOs so that we don ' t get racing block zeroing in
* the dio layer . To avoid the problem with aio , we also need to wait for
* outstanding IOs to complete so that unwritten extent conversion is completed
* before we try to map the overlapping block . This is currently implemented by
* hitting it with a big hammer ( i . e . xfs_ioend_wait ( ) ) .
*
2011-01-11 02:15:36 +03:00
* Returns with locks held indicated by @ iolock and errors indicated by
* negative return values .
*/
STATIC ssize_t
xfs_file_dio_aio_write (
struct kiocb * iocb ,
const struct iovec * iovp ,
unsigned long nr_segs ,
loff_t pos ,
size_t ocount ,
int * iolock )
{
struct file * file = iocb - > ki_filp ;
struct address_space * mapping = file - > f_mapping ;
struct inode * inode = mapping - > host ;
struct xfs_inode * ip = XFS_I ( inode ) ;
struct xfs_mount * mp = ip - > i_mount ;
ssize_t ret = 0 ;
size_t count = ocount ;
2011-01-11 02:22:40 +03:00
int unaligned_io = 0 ;
2011-01-11 02:15:36 +03:00
struct xfs_buftarg * target = XFS_IS_REALTIME_INODE ( ip ) ?
mp - > m_rtdev_targp : mp - > m_ddev_targp ;
* iolock = 0 ;
if ( ( pos & target - > bt_smask ) | | ( count & target - > bt_smask ) )
return - XFS_ERROR ( EINVAL ) ;
2011-01-11 02:22:40 +03:00
if ( ( pos & mp - > m_blockmask ) | | ( ( pos + count ) & mp - > m_blockmask ) )
unaligned_io = 1 ;
if ( unaligned_io | | mapping - > nrpages | | pos > ip - > i_size )
2011-01-11 02:15:36 +03:00
* iolock = XFS_IOLOCK_EXCL ;
else
* iolock = XFS_IOLOCK_SHARED ;
xfs_rw_ilock ( ip , XFS_ILOCK_EXCL | * iolock ) ;
2011-01-11 02:23:42 +03:00
ret = xfs_file_aio_write_checks ( file , & pos , & count , iolock ) ;
if ( ret )
2011-01-11 02:15:36 +03:00
return ret ;
if ( mapping - > nrpages ) {
WARN_ON ( * iolock ! = XFS_IOLOCK_EXCL ) ;
ret = - xfs_flushinval_pages ( ip , ( pos & PAGE_CACHE_MASK ) , - 1 ,
FI_REMAPF_LOCKED ) ;
if ( ret )
return ret ;
}
2011-01-11 02:22:40 +03:00
/*
* If we are doing unaligned IO , wait for all other IO to drain ,
* otherwise demote the lock if we had to flush cached pages
*/
if ( unaligned_io )
xfs_ioend_wait ( ip ) ;
else if ( * iolock = = XFS_IOLOCK_EXCL ) {
2011-01-11 02:15:36 +03:00
xfs_rw_ilock_demote ( ip , XFS_IOLOCK_EXCL ) ;
* iolock = XFS_IOLOCK_SHARED ;
}
trace_xfs_file_direct_write ( ip , count , iocb - > ki_pos , 0 ) ;
ret = generic_file_direct_write ( iocb , iovp ,
& nr_segs , pos , & iocb - > ki_pos , count , ocount ) ;
/* No fallback to buffered IO on errors for XFS. */
ASSERT ( ret < 0 | | ret = = count ) ;
return ret ;
}
2010-02-15 12:44:47 +03:00
STATIC ssize_t
2011-01-11 02:17:30 +03:00
xfs_file_buffered_aio_write (
2010-02-15 12:44:46 +03:00
struct kiocb * iocb ,
const struct iovec * iovp ,
2010-02-15 12:44:47 +03:00
unsigned long nr_segs ,
2011-01-11 02:17:30 +03:00
loff_t pos ,
size_t ocount ,
int * iolock )
2010-02-15 12:44:46 +03:00
{
struct file * file = iocb - > ki_filp ;
struct address_space * mapping = file - > f_mapping ;
struct inode * inode = mapping - > host ;
2010-02-15 12:44:47 +03:00
struct xfs_inode * ip = XFS_I ( inode ) ;
2011-01-11 02:17:30 +03:00
ssize_t ret ;
int enospc = 0 ;
size_t count = ocount ;
2010-02-15 12:44:46 +03:00
2011-01-11 02:17:30 +03:00
* iolock = XFS_IOLOCK_EXCL ;
xfs_rw_ilock ( ip , XFS_ILOCK_EXCL | * iolock ) ;
2010-02-15 12:44:46 +03:00
2011-01-11 02:23:42 +03:00
ret = xfs_file_aio_write_checks ( file , & pos , & count , iolock ) ;
if ( ret )
2011-01-11 02:17:30 +03:00
return ret ;
2010-02-15 12:44:46 +03:00
/* We can write back this queue in page reclaim */
current - > backing_dev_info = mapping - > backing_dev_info ;
write_retry :
2011-01-11 02:17:30 +03:00
trace_xfs_file_buffered_write ( ip , count , iocb - > ki_pos , 0 ) ;
ret = generic_file_buffered_write ( iocb , iovp , nr_segs ,
pos , & iocb - > ki_pos , count , ret ) ;
/*
* if we just got an ENOSPC , flush the inode now we aren ' t holding any
* page locks and retry * once *
*/
if ( ret = = - ENOSPC & & ! enospc ) {
ret = - xfs_flush_pages ( ip , 0 , - 1 , 0 , FI_NONE ) ;
if ( ret )
return ret ;
enospc = 1 ;
goto write_retry ;
2010-02-15 12:44:46 +03:00
}
current - > backing_dev_info = NULL ;
2011-01-11 02:17:30 +03:00
return ret ;
}
STATIC ssize_t
xfs_file_aio_write (
struct kiocb * iocb ,
const struct iovec * iovp ,
unsigned long nr_segs ,
loff_t pos )
{
struct file * file = iocb - > ki_filp ;
struct address_space * mapping = file - > f_mapping ;
struct inode * inode = mapping - > host ;
struct xfs_inode * ip = XFS_I ( inode ) ;
ssize_t ret ;
int iolock ;
size_t ocount = 0 ;
XFS_STATS_INC ( xs_write_calls ) ;
BUG_ON ( iocb - > ki_pos ! = pos ) ;
ret = generic_segment_checks ( iovp , & nr_segs , & ocount , VERIFY_READ ) ;
if ( ret )
return ret ;
if ( ocount = = 0 )
return 0 ;
xfs_wait_for_freeze ( ip - > i_mount , SB_FREEZE_WRITE ) ;
if ( XFS_FORCED_SHUTDOWN ( ip - > i_mount ) )
return - EIO ;
if ( unlikely ( file - > f_flags & O_DIRECT ) )
ret = xfs_file_dio_aio_write ( iocb , iovp , nr_segs , pos ,
ocount , & iolock ) ;
else
ret = xfs_file_buffered_aio_write ( iocb , iovp , nr_segs , pos ,
ocount , & iolock ) ;
2010-02-15 12:44:46 +03:00
2011-01-11 02:14:06 +03:00
xfs_aio_write_isize_update ( inode , & iocb - > ki_pos , ret ) ;
2010-02-15 12:44:46 +03:00
if ( ret < = 0 )
2011-01-11 02:17:30 +03:00
goto out_unlock ;
2010-02-15 12:44:46 +03:00
/* Handle various SYNC-type writes */
if ( ( file - > f_flags & O_DSYNC ) | | IS_SYNC ( inode ) ) {
loff_t end = pos + ret - 1 ;
2011-01-11 02:13:53 +03:00
int error , error2 ;
2010-02-15 12:44:46 +03:00
2011-01-12 03:37:10 +03:00
xfs_rw_iunlock ( ip , iolock ) ;
2011-01-11 02:13:53 +03:00
error = filemap_write_and_wait_range ( mapping , pos , end ) ;
2011-01-12 03:37:10 +03:00
xfs_rw_ilock ( ip , iolock ) ;
2010-02-15 12:44:46 +03:00
2010-05-26 19:53:25 +04:00
error2 = - xfs_file_fsync ( file ,
2010-02-15 12:44:48 +03:00
( file - > f_flags & __O_SYNC ) ? 0 : 1 ) ;
2011-01-11 02:13:53 +03:00
if ( error )
ret = error ;
else if ( error2 )
ret = error2 ;
2010-02-15 12:44:46 +03:00
}
2011-01-11 02:17:30 +03:00
out_unlock :
2011-01-11 02:14:16 +03:00
xfs_aio_write_newsize_update ( ip ) ;
2011-01-12 03:37:10 +03:00
xfs_rw_iunlock ( ip , iolock ) ;
2011-01-11 02:13:53 +03:00
return ret ;
2010-02-15 12:44:46 +03:00
}
2011-01-14 15:07:43 +03:00
STATIC long
xfs_file_fallocate (
struct file * file ,
int mode ,
loff_t offset ,
loff_t len )
{
struct inode * inode = file - > f_path . dentry - > d_inode ;
long error ;
loff_t new_size = 0 ;
xfs_flock64_t bf ;
xfs_inode_t * ip = XFS_I ( inode ) ;
int cmd = XFS_IOC_RESVSP ;
2011-03-26 01:13:08 +03:00
int attr_flags = XFS_ATTR_NOLOCK ;
2011-01-14 15:07:43 +03:00
if ( mode & ~ ( FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE ) )
return - EOPNOTSUPP ;
bf . l_whence = 0 ;
bf . l_start = offset ;
bf . l_len = len ;
xfs_ilock ( ip , XFS_IOLOCK_EXCL ) ;
if ( mode & FALLOC_FL_PUNCH_HOLE )
cmd = XFS_IOC_UNRESVSP ;
/* check the new inode size is valid before allocating */
if ( ! ( mode & FALLOC_FL_KEEP_SIZE ) & &
offset + len > i_size_read ( inode ) ) {
new_size = offset + len ;
error = inode_newsize_ok ( inode , new_size ) ;
if ( error )
goto out_unlock ;
}
2011-03-26 01:13:08 +03:00
if ( file - > f_flags & O_DSYNC )
attr_flags | = XFS_ATTR_SYNC ;
error = - xfs_change_file_space ( ip , cmd , & bf , 0 , attr_flags ) ;
2011-01-14 15:07:43 +03:00
if ( error )
goto out_unlock ;
/* Change file size if needed */
if ( new_size ) {
struct iattr iattr ;
iattr . ia_valid = ATTR_SIZE ;
iattr . ia_size = new_size ;
error = - xfs_setattr ( ip , & iattr , XFS_ATTR_NOLOCK ) ;
}
out_unlock :
xfs_iunlock ( ip , XFS_IOLOCK_EXCL ) ;
return error ;
}
2005-04-17 02:20:36 +04:00
STATIC int
2006-03-14 06:00:35 +03:00
xfs_file_open (
2005-04-17 02:20:36 +04:00
struct inode * inode ,
2008-11-28 06:23:32 +03:00
struct file * file )
2005-04-17 02:20:36 +04:00
{
2008-11-28 06:23:32 +03:00
if ( ! ( file - > f_flags & O_LARGEFILE ) & & i_size_read ( inode ) > MAX_NON_LFS )
2005-04-17 02:20:36 +04:00
return - EFBIG ;
2008-11-28 06:23:32 +03:00
if ( XFS_FORCED_SHUTDOWN ( XFS_M ( inode - > i_sb ) ) )
return - EIO ;
return 0 ;
}
STATIC int
xfs_dir_open (
struct inode * inode ,
struct file * file )
{
struct xfs_inode * ip = XFS_I ( inode ) ;
int mode ;
int error ;
error = xfs_file_open ( inode , file ) ;
if ( error )
return error ;
/*
* If there are any blocks , read - ahead block 0 as we ' re almost
* certain to have the next operation be a read there .
*/
mode = xfs_ilock_map_shared ( ip ) ;
if ( ip - > i_d . di_nextents > 0 )
xfs_da_reada_buf ( NULL , ip , 0 , XFS_DATA_FORK ) ;
xfs_iunlock ( ip , mode ) ;
return 0 ;
2005-04-17 02:20:36 +04:00
}
STATIC int
2006-03-14 06:00:35 +03:00
xfs_file_release (
2005-04-17 02:20:36 +04:00
struct inode * inode ,
struct file * filp )
{
2007-08-29 04:58:01 +04:00
return - xfs_release ( XFS_I ( inode ) ) ;
2005-04-17 02:20:36 +04:00
}
STATIC int
2006-03-14 06:00:35 +03:00
xfs_file_readdir (
2005-04-17 02:20:36 +04:00
struct file * filp ,
void * dirent ,
filldir_t filldir )
{
2007-08-28 07:58:24 +04:00
struct inode * inode = filp - > f_path . dentry - > d_inode ;
2007-08-29 04:58:01 +04:00
xfs_inode_t * ip = XFS_I ( inode ) ;
2007-08-28 07:58:24 +04:00
int error ;
size_t bufsize ;
/*
* The Linux API doesn ' t pass down the total size of the buffer
* we read into down to the filesystem . With the filldir concept
* it ' s not needed for correct information , but the XFS dir2 leaf
* code wants an estimate of the buffer size to calculate it ' s
* readahead window and size the buffers used for mapping to
* physical blocks .
*
* Try to give it an estimate that ' s good enough , maybe at some
* point we can change the - > readdir prototype to include the
2010-02-03 20:50:13 +03:00
* buffer size . For now we use the current glibc buffer size .
2007-08-28 07:58:24 +04:00
*/
2010-02-03 20:50:13 +03:00
bufsize = ( size_t ) min_t ( loff_t , 32768 , ip - > i_d . di_size ) ;
2007-08-28 07:58:24 +04:00
2007-08-29 04:58:01 +04:00
error = xfs_readdir ( ip , dirent , bufsize ,
2007-08-28 07:58:24 +04:00
( xfs_off_t * ) & filp - > f_pos , filldir ) ;
if ( error )
return - error ;
return 0 ;
2005-04-17 02:20:36 +04:00
}
STATIC int
2006-03-14 06:00:35 +03:00
xfs_file_mmap (
2005-04-17 02:20:36 +04:00
struct file * filp ,
struct vm_area_struct * vma )
{
2006-03-14 06:00:35 +03:00
vma - > vm_ops = & xfs_file_vm_ops ;
2007-07-19 12:47:03 +04:00
vma - > vm_flags | = VM_CAN_NONLINEAR ;
2005-06-21 08:07:45 +04:00
2006-06-09 08:52:13 +04:00
file_accessed ( filp ) ;
2005-04-17 02:20:36 +04:00
return 0 ;
}
2007-07-19 10:28:17 +04:00
/*
* mmap ( ) d file has taken write protection fault and is being made
* writable . We can set the page state up correctly for a writable
* page , which means we can do correct delalloc accounting ( ENOSPC
* checking ! ) and unwritten extent mapping .
*/
STATIC int
xfs_vm_page_mkwrite (
struct vm_area_struct * vma ,
2009-04-01 02:23:21 +04:00
struct vm_fault * vmf )
2007-07-19 10:28:17 +04:00
{
2009-04-01 02:23:21 +04:00
return block_page_mkwrite ( vma , vmf , xfs_get_blocks ) ;
2007-07-19 10:28:17 +04:00
}
2006-03-28 13:56:42 +04:00
const struct file_operations xfs_file_operations = {
2005-04-17 02:20:36 +04:00
. llseek = generic_file_llseek ,
. read = do_sync_read ,
2005-09-02 09:43:05 +04:00
. write = do_sync_write ,
2006-03-14 06:00:35 +03:00
. aio_read = xfs_file_aio_read ,
. aio_write = xfs_file_aio_write ,
2006-03-31 07:08:59 +04:00
. splice_read = xfs_file_splice_read ,
. splice_write = xfs_file_splice_write ,
2006-03-14 06:00:35 +03:00
. unlocked_ioctl = xfs_file_ioctl ,
2005-04-17 02:20:36 +04:00
# ifdef CONFIG_COMPAT
2006-03-14 06:00:35 +03:00
. compat_ioctl = xfs_file_compat_ioctl ,
2005-04-17 02:20:36 +04:00
# endif
2006-03-14 06:00:35 +03:00
. mmap = xfs_file_mmap ,
. open = xfs_file_open ,
. release = xfs_file_release ,
. fsync = xfs_file_fsync ,
2011-01-14 15:07:43 +03:00
. fallocate = xfs_file_fallocate ,
2005-04-17 02:20:36 +04:00
} ;
2006-03-28 13:56:42 +04:00
const struct file_operations xfs_dir_file_operations = {
2008-11-28 06:23:32 +03:00
. open = xfs_dir_open ,
2005-04-17 02:20:36 +04:00
. read = generic_read_dir ,
2006-03-14 06:00:35 +03:00
. readdir = xfs_file_readdir ,
2008-08-24 15:24:41 +04:00
. llseek = generic_file_llseek ,
2006-03-14 06:00:35 +03:00
. unlocked_ioctl = xfs_file_ioctl ,
2005-05-06 17:44:46 +04:00
# ifdef CONFIG_COMPAT
2006-03-14 06:00:35 +03:00
. compat_ioctl = xfs_file_compat_ioctl ,
2005-05-06 17:44:46 +04:00
# endif
2006-03-14 06:00:35 +03:00
. fsync = xfs_file_fsync ,
2005-04-17 02:20:36 +04:00
} ;
2009-09-27 22:29:37 +04:00
static const struct vm_operations_struct xfs_file_vm_ops = {
2007-07-19 12:46:59 +04:00
. fault = filemap_fault ,
2007-07-19 10:28:17 +04:00
. page_mkwrite = xfs_vm_page_mkwrite ,
2005-06-21 08:07:45 +04:00
} ;