2006-03-31 14:30:42 +04:00
/*
* High - level sync ( ) - related operations
*/
# include <linux/kernel.h>
# include <linux/file.h>
# include <linux/fs.h>
# include <linux/module.h>
2006-10-18 21:55:46 +04:00
# include <linux/sched.h>
2006-03-31 14:30:42 +04:00
# include <linux/writeback.h>
# include <linux/syscalls.h>
# include <linux/linkage.h>
# include <linux/pagemap.h>
2006-08-29 22:05:54 +04:00
# include <linux/quotaops.h>
# include <linux/buffer_head.h>
2006-03-31 14:30:42 +04:00
# define VALID_FLAGS (SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE| \
SYNC_FILE_RANGE_WAIT_AFTER )
2006-08-29 22:05:54 +04:00
/*
* sync everything . Start out by waking pdflush , because that writes back
* all queues in parallel .
*/
static void do_sync ( unsigned long wait )
{
wakeup_pdflush ( 0 ) ;
sync_inodes ( 0 ) ; /* All mappings, inodes and their blockdevs */
DQUOT_SYNC ( NULL ) ;
sync_supers ( ) ; /* Write the superblocks */
sync_filesystems ( 0 ) ; /* Start syncing the filesystems */
sync_filesystems ( wait ) ; /* Waitingly sync the filesystems */
sync_inodes ( wait ) ; /* Mappings, inodes and blockdevs, again. */
if ( ! wait )
printk ( " Emergency Sync complete \n " ) ;
if ( unlikely ( laptop_mode ) )
laptop_sync_completion ( ) ;
}
asmlinkage long sys_sync ( void )
{
do_sync ( 1 ) ;
return 0 ;
}
void emergency_sync ( void )
{
pdflush_operation ( do_sync , 0 ) ;
}
/*
* Generic function to fsync a file .
*
* filp may be NULL if called via the msync of a vma .
*/
int file_fsync ( struct file * filp , struct dentry * dentry , int datasync )
{
struct inode * inode = dentry - > d_inode ;
struct super_block * sb ;
int ret , err ;
/* sync the inode to buffers */
ret = write_inode_now ( inode , 0 ) ;
/* sync the superblock to buffers */
sb = inode - > i_sb ;
lock_super ( sb ) ;
if ( sb - > s_op - > write_super )
sb - > s_op - > write_super ( sb ) ;
unlock_super ( sb ) ;
/* .. finally sync the buffers to disk */
err = sync_blockdev ( sb - > s_bdev ) ;
if ( ! ret )
ret = err ;
return ret ;
}
long do_fsync ( struct file * file , int datasync )
{
int ret ;
int err ;
struct address_space * mapping = file - > f_mapping ;
if ( ! file - > f_op | | ! file - > f_op - > fsync ) {
/* Why? We can still call filemap_fdatawrite */
ret = - EINVAL ;
goto out ;
}
ret = filemap_fdatawrite ( mapping ) ;
/*
* We need to protect against concurrent writers , which could cause
* livelocks in fsync_buffers_list ( ) .
*/
mutex_lock ( & mapping - > host - > i_mutex ) ;
2006-12-08 13:36:35 +03:00
err = file - > f_op - > fsync ( file , file - > f_path . dentry , datasync ) ;
2006-08-29 22:05:54 +04:00
if ( ! ret )
ret = err ;
mutex_unlock ( & mapping - > host - > i_mutex ) ;
err = filemap_fdatawait ( mapping ) ;
if ( ! ret )
ret = err ;
out :
return ret ;
}
static long __do_fsync ( unsigned int fd , int datasync )
{
struct file * file ;
int ret = - EBADF ;
file = fget ( fd ) ;
if ( file ) {
ret = do_fsync ( file , datasync ) ;
fput ( file ) ;
}
return ret ;
}
asmlinkage long sys_fsync ( unsigned int fd )
{
return __do_fsync ( fd , 0 ) ;
}
asmlinkage long sys_fdatasync ( unsigned int fd )
{
return __do_fsync ( fd , 1 ) ;
}
2006-03-31 14:30:42 +04:00
/*
* sys_sync_file_range ( ) permits finely controlled syncing over a segment of
* a file in the range offset . . ( offset + nbytes - 1 ) inclusive . If nbytes is
* zero then sys_sync_file_range ( ) will operate from offset out to EOF .
*
* The flag bits are :
*
* SYNC_FILE_RANGE_WAIT_BEFORE : wait upon writeout of all pages in the range
* before performing the write .
*
* SYNC_FILE_RANGE_WRITE : initiate writeout of all those dirty pages in the
* range which are not presently under writeback .
*
* SYNC_FILE_RANGE_WAIT_AFTER : wait upon writeout of all pages in the range
* after performing the write .
*
* Useful combinations of the flag bits are :
*
* SYNC_FILE_RANGE_WAIT_BEFORE | SYNC_FILE_RANGE_WRITE : ensures that all pages
* in the range which were dirty on entry to sys_sync_file_range ( ) are placed
* under writeout . This is a start - write - for - data - integrity operation .
*
* SYNC_FILE_RANGE_WRITE : start writeout of all dirty pages in the range which
* are not presently under writeout . This is an asynchronous flush - to - disk
* operation . Not suitable for data integrity operations .
*
* SYNC_FILE_RANGE_WAIT_BEFORE ( or SYNC_FILE_RANGE_WAIT_AFTER ) : wait for
* completion of writeout of all pages in the range . This will be used after an
* earlier SYNC_FILE_RANGE_WAIT_BEFORE | SYNC_FILE_RANGE_WRITE operation to wait
* for that operation to complete and to return the result .
*
* SYNC_FILE_RANGE_WAIT_BEFORE | SYNC_FILE_RANGE_WRITE | SYNC_FILE_RANGE_WAIT_AFTER :
* a traditional sync ( ) operation . This is a write - for - data - integrity operation
* which will ensure that all pages in the range which were dirty on entry to
* sys_sync_file_range ( ) are committed to disk .
*
*
* SYNC_FILE_RANGE_WAIT_BEFORE and SYNC_FILE_RANGE_WAIT_AFTER will detect any
* I / O errors or ENOSPC conditions and will return those to the caller , after
* clearing the EIO and ENOSPC flags in the address_space .
*
* It should be noted that none of these operations write out the file ' s
* metadata . So unless the application is strictly performing overwrites of
* already - instantiated disk blocks , there are no guarantees here that the data
* will be available after a crash .
*/
asmlinkage long sys_sync_file_range ( int fd , loff_t offset , loff_t nbytes ,
2006-04-11 09:53:57 +04:00
unsigned int flags )
2006-03-31 14:30:42 +04:00
{
int ret ;
struct file * file ;
loff_t endbyte ; /* inclusive */
int fput_needed ;
umode_t i_mode ;
ret = - EINVAL ;
if ( flags & ~ VALID_FLAGS )
goto out ;
endbyte = offset + nbytes ;
if ( ( s64 ) offset < 0 )
goto out ;
if ( ( s64 ) endbyte < 0 )
goto out ;
if ( endbyte < offset )
goto out ;
if ( sizeof ( pgoff_t ) = = 4 ) {
if ( offset > = ( 0x100000000ULL < < PAGE_CACHE_SHIFT ) ) {
/*
* The range starts outside a 32 bit machine ' s
* pagecache addressing capabilities . Let it " succeed "
*/
ret = 0 ;
goto out ;
}
if ( endbyte > = ( 0x100000000ULL < < PAGE_CACHE_SHIFT ) ) {
/*
* Out to EOF
*/
nbytes = 0 ;
}
}
if ( nbytes = = 0 )
[PATCH] writeback: fix range handling
When a writeback_control's `start' and `end' fields are used to
indicate a one-byte-range starting at file offset zero, the required
values of .start=0,.end=0 mean that the ->writepages() implementation
has no way of telling that it is being asked to perform a range
request. Because we're currently overloading (start == 0 && end == 0)
to mean "this is not a write-a-range request".
To make all this sane, the patch changes range of writeback_control.
So caller does: If it is calling ->writepages() to write pages, it
sets range (range_start/end or range_cyclic) always.
And if range_cyclic is true, ->writepages() thinks the range is
cyclic, otherwise it just uses range_start and range_end.
This patch does,
- Add LLONG_MAX, LLONG_MIN, ULLONG_MAX to include/linux/kernel.h
-1 is usually ok for range_end (type is long long). But, if someone did,
range_end += val; range_end is "val - 1"
u64val = range_end >> bits; u64val is "~(0ULL)"
or something, they are wrong. So, this adds LLONG_MAX to avoid nasty
things, and uses LLONG_MAX for range_end.
- All callers of ->writepages() sets range_start/end or range_cyclic.
- Fix updates of ->writeback_index. It seems already bit strange.
If it starts at 0 and ended by check of nr_to_write, this last
index may reduce chance to scan end of file. So, this updates
->writeback_index only if range_cyclic is true or whole-file is
scanned.
Signed-off-by: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
Cc: Nathan Scott <nathans@sgi.com>
Cc: Anton Altaparmakov <aia21@cantab.net>
Cc: Steven French <sfrench@us.ibm.com>
Cc: "Vladimir V. Saveliev" <vs@namesys.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2006-06-23 13:03:26 +04:00
endbyte = LLONG_MAX ;
2006-03-31 14:30:42 +04:00
else
endbyte - - ; /* inclusive */
ret = - EBADF ;
file = fget_light ( fd , & fput_needed ) ;
if ( ! file )
goto out ;
2006-12-08 13:36:35 +03:00
i_mode = file - > f_path . dentry - > d_inode - > i_mode ;
2006-03-31 14:30:42 +04:00
ret = - ESPIPE ;
if ( ! S_ISREG ( i_mode ) & & ! S_ISBLK ( i_mode ) & & ! S_ISDIR ( i_mode ) & &
! S_ISLNK ( i_mode ) )
goto out_put ;
ret = do_sync_file_range ( file , offset , endbyte , flags ) ;
out_put :
fput_light ( file , fput_needed ) ;
out :
return ret ;
}
/*
* ` endbyte ' is inclusive
*/
int do_sync_file_range ( struct file * file , loff_t offset , loff_t endbyte ,
2006-04-11 09:53:57 +04:00
unsigned int flags )
2006-03-31 14:30:42 +04:00
{
int ret ;
struct address_space * mapping ;
mapping = file - > f_mapping ;
if ( ! mapping ) {
ret = - EINVAL ;
goto out ;
}
ret = 0 ;
if ( flags & SYNC_FILE_RANGE_WAIT_BEFORE ) {
ret = wait_on_page_writeback_range ( mapping ,
offset > > PAGE_CACHE_SHIFT ,
endbyte > > PAGE_CACHE_SHIFT ) ;
if ( ret < 0 )
goto out ;
}
if ( flags & SYNC_FILE_RANGE_WRITE ) {
ret = __filemap_fdatawrite_range ( mapping , offset , endbyte ,
WB_SYNC_NONE ) ;
if ( ret < 0 )
goto out ;
}
if ( flags & SYNC_FILE_RANGE_WAIT_AFTER ) {
ret = wait_on_page_writeback_range ( mapping ,
offset > > PAGE_CACHE_SHIFT ,
endbyte > > PAGE_CACHE_SHIFT ) ;
}
out :
return ret ;
}
EXPORT_SYMBOL_GPL ( do_sync_file_range ) ;