2005-04-17 02:20:36 +04:00
/*
* Copyright 2000 by Hans Reiser , licensing governed by reiserfs / README
*/
2006-08-29 22:06:18 +04:00
# include <linux/config.h>
2005-04-17 02:20:36 +04:00
# include <linux/time.h>
# include <linux/reiserfs_fs.h>
# include <linux/reiserfs_acl.h>
# include <linux/reiserfs_xattr.h>
# include <linux/smp_lock.h>
# include <asm/uaccess.h>
# include <linux/pagemap.h>
# include <linux/swap.h>
# include <linux/writeback.h>
# include <linux/blkdev.h>
# include <linux/buffer_head.h>
# include <linux/quotaops.h>
/*
* * We pack the tails of files on file close , not at the time they are written .
* * This implies an unnecessary copy of the tail and an unnecessary indirect item
* * insertion / balancing , for files that are written in one write .
* * It avoids unnecessary tail packings ( balances ) for files that are written in
* * multiple writes and are small enough to have tails .
* *
* * file_release is called by the VFS layer when the file is closed . If
* * this is the last open file descriptor , and the file
* * small enough to have a tail , and the tail is currently in an
* * unformatted node , the tail is converted back into a direct item .
* *
* * We use reiserfs_truncate_file to pack the tail , since it already has
* * all the conditions coded .
*/
2005-07-13 07:21:28 +04:00
static int reiserfs_file_release ( struct inode * inode , struct file * filp )
2005-04-17 02:20:36 +04:00
{
2005-07-13 07:21:28 +04:00
struct reiserfs_transaction_handle th ;
int err ;
int jbegin_failure = 0 ;
2005-04-17 02:20:36 +04:00
2005-07-13 07:21:28 +04:00
if ( ! S_ISREG ( inode - > i_mode ) )
BUG ( ) ;
2005-04-17 02:20:36 +04:00
2005-07-13 07:21:28 +04:00
/* fast out for when nothing needs to be done */
if ( ( atomic_read ( & inode - > i_count ) > 1 | |
! ( REISERFS_I ( inode ) - > i_flags & i_pack_on_close_mask ) | |
! tail_has_to_be_packed ( inode ) ) & &
REISERFS_I ( inode ) - > i_prealloc_count < = 0 ) {
return 0 ;
}
2005-04-17 02:20:36 +04:00
2006-01-10 02:59:24 +03:00
mutex_lock ( & inode - > i_mutex ) ;
2006-08-05 23:15:08 +04:00
reiserfs_write_lock ( inode - > i_sb ) ;
2005-07-13 07:21:28 +04:00
/* freeing preallocation only involves relogging blocks that
* are already in the current transaction . preallocation gets
* freed at the end of each transaction , so it is impossible for
* us to log any additional blocks ( including quota blocks )
*/
err = journal_begin ( & th , inode - > i_sb , 1 ) ;
2005-04-17 02:20:36 +04:00
if ( err ) {
2005-07-13 07:21:28 +04:00
/* uh oh, we can't allow the inode to go away while there
* is still preallocation blocks pending . Try to join the
* aborted transaction
*/
jbegin_failure = err ;
err = journal_join_abort ( & th , inode - > i_sb , 1 ) ;
if ( err ) {
/* hmpf, our choices here aren't good. We can pin the inode
* which will disallow unmount from every happening , we can
* do nothing , which will corrupt random memory on unmount ,
* or we can forcibly remove the file from the preallocation
* list , which will leak blocks on disk . Lets pin the inode
* and let the admin know what is going on .
*/
igrab ( inode ) ;
reiserfs_warning ( inode - > i_sb ,
" pinning inode %lu because the "
" preallocation can't be freed " ) ;
goto out ;
}
2005-04-17 02:20:36 +04:00
}
2005-07-13 07:21:28 +04:00
reiserfs_update_inode_transaction ( inode ) ;
2005-04-17 02:20:36 +04:00
# ifdef REISERFS_PREALLOCATE
2005-07-13 07:21:28 +04:00
reiserfs_discard_prealloc ( & th , inode ) ;
2005-04-17 02:20:36 +04:00
# endif
2005-07-13 07:21:28 +04:00
err = journal_end ( & th , inode - > i_sb , 1 ) ;
/* copy back the error code from journal_begin */
if ( ! err )
err = jbegin_failure ;
if ( ! err & & atomic_read ( & inode - > i_count ) < = 1 & &
( REISERFS_I ( inode ) - > i_flags & i_pack_on_close_mask ) & &
tail_has_to_be_packed ( inode ) ) {
/* if regular file is released by last holder and it has been
appended ( we append by unformatted node only ) or its direct
item ( s ) had to be converted , then it may have to be
indirect2direct converted */
err = reiserfs_truncate_file ( inode , 0 ) ;
}
out :
2006-01-10 02:59:24 +03:00
mutex_unlock ( & inode - > i_mutex ) ;
2005-07-13 07:21:28 +04:00
reiserfs_write_unlock ( inode - > i_sb ) ;
return err ;
2005-04-17 02:20:36 +04:00
}
2005-07-13 07:21:28 +04:00
static void reiserfs_vfs_truncate_file ( struct inode * inode )
{
reiserfs_truncate_file ( inode , 1 ) ;
2005-04-17 02:20:36 +04:00
}
/* Sync a reiserfs file. */
/*
* FIXME : sync_mapping_buffers ( ) never has anything to sync . Can
* be removed . . .
*/
2005-07-13 07:21:28 +04:00
static int reiserfs_sync_file ( struct file * p_s_filp ,
struct dentry * p_s_dentry , int datasync )
{
struct inode * p_s_inode = p_s_dentry - > d_inode ;
int n_err ;
int barrier_done ;
if ( ! S_ISREG ( p_s_inode - > i_mode ) )
BUG ( ) ;
n_err = sync_mapping_buffers ( p_s_inode - > i_mapping ) ;
reiserfs_write_lock ( p_s_inode - > i_sb ) ;
barrier_done = reiserfs_commit_for_inode ( p_s_inode ) ;
reiserfs_write_unlock ( p_s_inode - > i_sb ) ;
2006-09-29 12:59:54 +04:00
if ( barrier_done ! = 1 & & reiserfs_barrier_flush ( p_s_inode - > i_sb ) )
2005-07-13 07:21:28 +04:00
blkdev_issue_flush ( p_s_inode - > i_sb - > s_bdev , NULL ) ;
if ( barrier_done < 0 )
return barrier_done ;
return ( n_err < 0 ) ? - EIO : 0 ;
2005-04-17 02:20:36 +04:00
}
/* I really do not want to play with memory shortage right now, so
to simplify the code , we are not going to write more than this much pages at
a time . This still should considerably improve performance compared to 4 k
at a time case . This is 32 pages of 4 k size . */
# define REISERFS_WRITE_PAGES_AT_A_TIME (128 * 1024) / PAGE_CACHE_SIZE
/* Allocates blocks for a file to fulfil write request.
Maps all unmapped but prepared pages from the list .
Updates metadata with newly allocated blocknumbers as needed */
2005-07-13 07:21:28 +04:00
static int reiserfs_allocate_blocks_for_region ( struct reiserfs_transaction_handle * th , struct inode * inode , /* Inode we work with */
loff_t pos , /* Writing position */
int num_pages , /* number of pages write going
to touch */
int write_bytes , /* amount of bytes to write */
struct page * * prepared_pages , /* array of
prepared pages
*/
int blocks_to_allocate /* Amount of blocks we
need to allocate to
fit the data into file
*/
)
2005-04-17 02:20:36 +04:00
{
2005-07-13 07:21:28 +04:00
struct cpu_key key ; // cpu key of item that we are going to deal with
struct item_head * ih ; // pointer to item head that we are going to deal with
struct buffer_head * bh ; // Buffer head that contains items that we are going to deal with
__le32 * item ; // pointer to item we are going to deal with
INITIALIZE_PATH ( path ) ; // path to item, that we are going to deal with.
b_blocknr_t * allocated_blocks ; // Pointer to a place where allocated blocknumbers would be stored.
reiserfs_blocknr_hint_t hint ; // hint structure for block allocator.
size_t res ; // return value of various functions that we call.
int curr_block ; // current block used to keep track of unmapped blocks.
int i ; // loop counter
int itempos ; // position in item
unsigned int from = ( pos & ( PAGE_CACHE_SIZE - 1 ) ) ; // writing position in
// first page
unsigned int to = ( ( pos + write_bytes - 1 ) & ( PAGE_CACHE_SIZE - 1 ) ) + 1 ; /* last modified byte offset in last page */
__u64 hole_size ; // amount of blocks for a file hole, if it needed to be created.
int modifying_this_item = 0 ; // Flag for items traversal code to keep track
// of the fact that we already prepared
// current block for journal
int will_prealloc = 0 ;
RFALSE ( ! blocks_to_allocate ,
" green-9004: tried to allocate zero blocks? " ) ;
/* only preallocate if this is a small write */
if ( REISERFS_I ( inode ) - > i_prealloc_count | |
( ! ( write_bytes & ( inode - > i_sb - > s_blocksize - 1 ) ) & &
blocks_to_allocate <
REISERFS_SB ( inode - > i_sb ) - > s_alloc_options . preallocsize ) )
will_prealloc =
REISERFS_SB ( inode - > i_sb ) - > s_alloc_options . preallocsize ;
allocated_blocks = kmalloc ( ( blocks_to_allocate + will_prealloc ) *
sizeof ( b_blocknr_t ) , GFP_NOFS ) ;
2006-02-01 14:06:44 +03:00
if ( ! allocated_blocks )
return - ENOMEM ;
2005-07-13 07:21:28 +04:00
/* First we compose a key to point at the writing position, we want to do
that outside of any locking region . */
make_cpu_key ( & key , inode , pos + 1 , TYPE_ANY , 3 /*key length */ ) ;
/* If we came here, it means we absolutely need to open a transaction,
since we need to allocate some blocks */
reiserfs_write_lock ( inode - > i_sb ) ; // Journaling stuff and we need that.
res = journal_begin ( th , inode - > i_sb , JOURNAL_PER_BALANCE_CNT * 3 + 1 + 2 * REISERFS_QUOTA_TRANS_BLOCKS ( inode - > i_sb ) ) ; // Wish I know if this number enough
if ( res )
2005-04-17 02:20:36 +04:00
goto error_exit ;
2005-07-13 07:21:28 +04:00
reiserfs_update_inode_transaction ( inode ) ;
2005-04-17 02:20:36 +04:00
2005-07-13 07:21:28 +04:00
/* Look for the in-tree position of our write, need path for block allocator */
res = search_for_position_by_key ( inode - > i_sb , & key , & path ) ;
if ( res = = IO_ERROR ) {
res = - EIO ;
2005-04-17 02:20:36 +04:00
goto error_exit ;
}
2005-07-13 07:21:28 +04:00
/* Allocate blocks */
/* First fill in "hint" structure for block allocator */
hint . th = th ; // transaction handle.
hint . path = & path ; // Path, so that block allocator can determine packing locality or whatever it needs to determine.
hint . inode = inode ; // Inode is needed by block allocator too.
hint . search_start = 0 ; // We have no hint on where to search free blocks for block allocator.
hint . key = key . on_disk_key ; // on disk key of file.
hint . block = inode - > i_blocks > > ( inode - > i_sb - > s_blocksize_bits - 9 ) ; // Number of disk blocks this file occupies already.
hint . formatted_node = 0 ; // We are allocating blocks for unformatted node.
hint . preallocate = will_prealloc ;
/* Call block allocator to allocate blocks */
res =
reiserfs_allocate_blocknrs ( & hint , allocated_blocks ,
blocks_to_allocate , blocks_to_allocate ) ;
if ( res ! = CARRY_ON ) {
if ( res = = NO_DISK_SPACE ) {
/* We flush the transaction in case of no space. This way some
blocks might become free */
SB_JOURNAL ( inode - > i_sb ) - > j_must_wait = 1 ;
res = restart_transaction ( th , inode , & path ) ;
if ( res )
goto error_exit ;
/* We might have scheduled, so search again */
res =
search_for_position_by_key ( inode - > i_sb , & key ,
& path ) ;
if ( res = = IO_ERROR ) {
res = - EIO ;
goto error_exit ;
}
2005-04-17 02:20:36 +04:00
2005-07-13 07:21:28 +04:00
/* update changed info for hint structure. */
res =
reiserfs_allocate_blocknrs ( & hint , allocated_blocks ,
blocks_to_allocate ,
blocks_to_allocate ) ;
if ( res ! = CARRY_ON ) {
2005-11-09 08:34:58 +03:00
res = res = = QUOTA_EXCEEDED ? - EDQUOT : - ENOSPC ;
2005-07-13 07:21:28 +04:00
pathrelse ( & path ) ;
goto error_exit ;
}
} else {
2005-11-09 08:34:58 +03:00
res = res = = QUOTA_EXCEEDED ? - EDQUOT : - ENOSPC ;
2005-07-13 07:21:28 +04:00
pathrelse ( & path ) ;
goto error_exit ;
}
}
# ifdef __BIG_ENDIAN
// Too bad, I have not found any way to convert a given region from
// cpu format to little endian format
2005-04-17 02:20:36 +04:00
{
2005-07-13 07:21:28 +04:00
int i ;
for ( i = 0 ; i < blocks_to_allocate ; i + + )
allocated_blocks [ i ] = cpu_to_le32 ( allocated_blocks [ i ] ) ;
2005-04-17 02:20:36 +04:00
}
2005-07-13 07:21:28 +04:00
# endif
2005-04-17 02:20:36 +04:00
2005-07-13 07:21:28 +04:00
/* Blocks allocating well might have scheduled and tree might have changed,
let ' s search the tree again */
/* find where in the tree our write should go */
res = search_for_position_by_key ( inode - > i_sb , & key , & path ) ;
if ( res = = IO_ERROR ) {
res = - EIO ;
2005-04-17 02:20:36 +04:00
goto error_exit_free_blocks ;
2005-07-13 07:21:28 +04:00
}
bh = get_last_bh ( & path ) ; // Get a bufferhead for last element in path.
ih = get_ih ( & path ) ; // Get a pointer to last item head in path.
item = get_item ( & path ) ; // Get a pointer to last item in path
/* Let's see what we have found */
if ( res ! = POSITION_FOUND ) { /* position not found, this means that we
might need to append file with holes
first */
// Since we are writing past the file's end, we need to find out if
// there is a hole that needs to be inserted before our writing
// position, and how many blocks it is going to cover (we need to
// populate pointers to file blocks representing the hole with zeros)
{
int item_offset = 1 ;
/*
* if ih is stat data , its offset is 0 and we don ' t want to
* add 1 to pos in the hole_size calculation
*/
if ( is_statdata_le_ih ( ih ) )
item_offset = 0 ;
hole_size = ( pos + item_offset -
( le_key_k_offset
( get_inode_item_key_version ( inode ) ,
& ( ih - > ih_key ) ) + op_bytes_number ( ih ,
inode - >
i_sb - >
s_blocksize ) ) )
> > inode - > i_sb - > s_blocksize_bits ;
}
if ( hole_size > 0 ) {
int to_paste = min_t ( __u64 , hole_size , MAX_ITEM_LEN ( inode - > i_sb - > s_blocksize ) / UNFM_P_SIZE ) ; // How much data to insert first time.
/* area filled with zeroes, to supply as list of zero blocknumbers
We allocate it outside of loop just in case loop would spin for
several iterations . */
char * zeros = kmalloc ( to_paste * UNFM_P_SIZE , GFP_ATOMIC ) ; // We cannot insert more than MAX_ITEM_LEN bytes anyway.
if ( ! zeros ) {
res = - ENOMEM ;
goto error_exit_free_blocks ;
2005-04-17 02:20:36 +04:00
}
2005-07-13 07:21:28 +04:00
memset ( zeros , 0 , to_paste * UNFM_P_SIZE ) ;
do {
to_paste =
min_t ( __u64 , hole_size ,
MAX_ITEM_LEN ( inode - > i_sb - >
s_blocksize ) /
UNFM_P_SIZE ) ;
if ( is_indirect_le_ih ( ih ) ) {
/* Ok, there is existing indirect item already. Need to append it */
/* Calculate position past inserted item */
make_cpu_key ( & key , inode ,
le_key_k_offset
( get_inode_item_key_version
( inode ) ,
& ( ih - > ih_key ) ) +
op_bytes_number ( ih ,
inode - >
i_sb - >
s_blocksize ) ,
TYPE_INDIRECT , 3 ) ;
res =
reiserfs_paste_into_item ( th , & path ,
& key ,
inode ,
( char * )
zeros ,
UNFM_P_SIZE
*
to_paste ) ;
if ( res ) {
kfree ( zeros ) ;
goto error_exit_free_blocks ;
}
} else if ( is_statdata_le_ih ( ih ) ) {
/* No existing item, create it */
/* item head for new item */
struct item_head ins_ih ;
/* create a key for our new item */
make_cpu_key ( & key , inode , 1 ,
TYPE_INDIRECT , 3 ) ;
/* Create new item head for our new item */
make_le_item_head ( & ins_ih , & key ,
key . version , 1 ,
TYPE_INDIRECT ,
to_paste *
UNFM_P_SIZE ,
0 /* free space */ ) ;
/* Find where such item should live in the tree */
res =
search_item ( inode - > i_sb , & key ,
& path ) ;
if ( res ! = ITEM_NOT_FOUND ) {
/* item should not exist, otherwise we have error */
if ( res ! = - ENOSPC ) {
reiserfs_warning ( inode - >
i_sb ,
" green-9008: search_by_key (%K) returned %d " ,
& key ,
res ) ;
}
res = - EIO ;
kfree ( zeros ) ;
goto error_exit_free_blocks ;
}
res =
reiserfs_insert_item ( th , & path ,
& key , & ins_ih ,
inode ,
( char * ) zeros ) ;
} else {
reiserfs_panic ( inode - > i_sb ,
" green-9011: Unexpected key type %K \n " ,
& key ) ;
}
if ( res ) {
kfree ( zeros ) ;
goto error_exit_free_blocks ;
}
/* Now we want to check if transaction is too full, and if it is
we restart it . This will also free the path . */
if ( journal_transaction_should_end
( th , th - > t_blocks_allocated ) ) {
res =
restart_transaction ( th , inode ,
& path ) ;
if ( res ) {
pathrelse ( & path ) ;
kfree ( zeros ) ;
goto error_exit ;
}
}
/* Well, need to recalculate path and stuff */
set_cpu_key_k_offset ( & key ,
cpu_key_k_offset ( & key ) +
( to_paste < < inode - >
i_blkbits ) ) ;
res =
search_for_position_by_key ( inode - > i_sb ,
& key , & path ) ;
if ( res = = IO_ERROR ) {
res = - EIO ;
kfree ( zeros ) ;
goto error_exit_free_blocks ;
}
bh = get_last_bh ( & path ) ;
ih = get_ih ( & path ) ;
item = get_item ( & path ) ;
hole_size - = to_paste ;
} while ( hole_size ) ;
kfree ( zeros ) ;
2005-04-17 02:20:36 +04:00
}
2005-07-13 07:21:28 +04:00
}
// Go through existing indirect items first
// replace all zeroes with blocknumbers from list
// Note that if no corresponding item was found, by previous search,
// it means there are no existing in-tree representation for file area
// we are going to overwrite, so there is nothing to scan through for holes.
for ( curr_block = 0 , itempos = path . pos_in_item ;
curr_block < blocks_to_allocate & & res = = POSITION_FOUND ; ) {
retry :
if ( itempos > = ih_item_len ( ih ) / UNFM_P_SIZE ) {
/* We run out of data in this indirect item, let's look for another
one . */
/* First if we are already modifying current item, log it */
if ( modifying_this_item ) {
journal_mark_dirty ( th , inode - > i_sb , bh ) ;
modifying_this_item = 0 ;
}
/* Then set the key to look for a new indirect item (offset of old
item is added to old item length */
set_cpu_key_k_offset ( & key ,
le_key_k_offset
( get_inode_item_key_version ( inode ) ,
& ( ih - > ih_key ) ) +
op_bytes_number ( ih ,
inode - > i_sb - >
s_blocksize ) ) ;
/* Search ofor position of new key in the tree. */
res =
search_for_position_by_key ( inode - > i_sb , & key ,
& path ) ;
if ( res = = IO_ERROR ) {
res = - EIO ;
goto error_exit_free_blocks ;
}
bh = get_last_bh ( & path ) ;
ih = get_ih ( & path ) ;
item = get_item ( & path ) ;
itempos = path . pos_in_item ;
continue ; // loop to check all kinds of conditions and so on.
2005-04-17 02:20:36 +04:00
}
2005-07-13 07:21:28 +04:00
/* Ok, we have correct position in item now, so let's see if it is
representing file hole ( blocknumber is zero ) and fill it if needed */
if ( ! item [ itempos ] ) {
/* Ok, a hole. Now we need to check if we already prepared this
block to be journaled */
while ( ! modifying_this_item ) { // loop until succeed
/* Well, this item is not journaled yet, so we must prepare
it for journal first , before we can change it */
struct item_head tmp_ih ; // We copy item head of found item,
// here to detect if fs changed under
// us while we were preparing for
// journal.
int fs_gen ; // We store fs generation here to find if someone
// changes fs under our feet
copy_item_head ( & tmp_ih , ih ) ; // Remember itemhead
fs_gen = get_generation ( inode - > i_sb ) ; // remember fs generation
reiserfs_prepare_for_journal ( inode - > i_sb , bh , 1 ) ; // Prepare a buffer within which indirect item is stored for changing.
if ( fs_changed ( fs_gen , inode - > i_sb )
& & item_moved ( & tmp_ih , & path ) ) {
// Sigh, fs was changed under us, we need to look for new
// location of item we are working with
/* unmark prepaerd area as journaled and search for it's
new position */
reiserfs_restore_prepared_buffer ( inode - >
i_sb ,
bh ) ;
res =
search_for_position_by_key ( inode - >
i_sb ,
& key ,
& path ) ;
if ( res = = IO_ERROR ) {
res = - EIO ;
goto error_exit_free_blocks ;
}
bh = get_last_bh ( & path ) ;
ih = get_ih ( & path ) ;
item = get_item ( & path ) ;
itempos = path . pos_in_item ;
goto retry ;
}
modifying_this_item = 1 ;
}
item [ itempos ] = allocated_blocks [ curr_block ] ; // Assign new block
curr_block + + ;
2005-04-17 02:20:36 +04:00
}
2005-07-13 07:21:28 +04:00
itempos + + ;
2005-04-17 02:20:36 +04:00
}
2005-07-13 07:21:28 +04:00
if ( modifying_this_item ) { // We need to log last-accessed block, if it
// was modified, but not logged yet.
journal_mark_dirty ( th , inode - > i_sb , bh ) ;
2005-04-17 02:20:36 +04:00
}
2005-07-13 07:21:28 +04:00
if ( curr_block < blocks_to_allocate ) {
// Oh, well need to append to indirect item, or to create indirect item
// if there weren't any
if ( is_indirect_le_ih ( ih ) ) {
// Existing indirect item - append. First calculate key for append
// position. We do not need to recalculate path as it should
// already point to correct place.
make_cpu_key ( & key , inode ,
le_key_k_offset ( get_inode_item_key_version
( inode ) ,
& ( ih - > ih_key ) ) +
op_bytes_number ( ih ,
inode - > i_sb - > s_blocksize ) ,
TYPE_INDIRECT , 3 ) ;
res =
reiserfs_paste_into_item ( th , & path , & key , inode ,
( char * ) ( allocated_blocks +
curr_block ) ,
UNFM_P_SIZE *
( blocks_to_allocate -
curr_block ) ) ;
if ( res ) {
goto error_exit_free_blocks ;
}
} else if ( is_statdata_le_ih ( ih ) ) {
// Last found item was statdata. That means we need to create indirect item.
struct item_head ins_ih ; /* itemhead for new item */
/* create a key for our new item */
make_cpu_key ( & key , inode , 1 , TYPE_INDIRECT , 3 ) ; // Position one,
// because that's
// where first
// indirect item
// begins
/* Create new item head for our new item */
make_le_item_head ( & ins_ih , & key , key . version , 1 ,
TYPE_INDIRECT ,
( blocks_to_allocate -
curr_block ) * UNFM_P_SIZE ,
0 /* free space */ ) ;
/* Find where such item should live in the tree */
res = search_item ( inode - > i_sb , & key , & path ) ;
if ( res ! = ITEM_NOT_FOUND ) {
/* Well, if we have found such item already, or some error
occured , we need to warn user and return error */
if ( res ! = - ENOSPC ) {
reiserfs_warning ( inode - > i_sb ,
" green-9009: search_by_key (%K) "
" returned %d " , & key ,
res ) ;
}
res = - EIO ;
goto error_exit_free_blocks ;
}
/* Insert item into the tree with the data as its body */
res =
reiserfs_insert_item ( th , & path , & key , & ins_ih ,
inode ,
( char * ) ( allocated_blocks +
curr_block ) ) ;
} else {
reiserfs_panic ( inode - > i_sb ,
" green-9010: unexpected item type for key %K \n " ,
& key ) ;
2005-04-17 02:20:36 +04:00
}
}
2005-07-13 07:21:28 +04:00
// the caller is responsible for closing the transaction
// unless we return an error, they are also responsible for logging
// the inode.
//
pathrelse ( & path ) ;
/*
* cleanup prellocation from previous writes
* if this is a partial block write
*/
if ( write_bytes & ( inode - > i_sb - > s_blocksize - 1 ) )
reiserfs_discard_prealloc ( th , inode ) ;
reiserfs_write_unlock ( inode - > i_sb ) ;
// go through all the pages/buffers and map the buffers to newly allocated
// blocks (so that system knows where to write these pages later).
curr_block = 0 ;
for ( i = 0 ; i < num_pages ; i + + ) {
struct page * page = prepared_pages [ i ] ; //current page
struct buffer_head * head = page_buffers ( page ) ; // first buffer for a page
int block_start , block_end ; // in-page offsets for buffers.
if ( ! page_buffers ( page ) )
reiserfs_panic ( inode - > i_sb ,
" green-9005: No buffers for prepared page??? " ) ;
/* For each buffer in page */
for ( bh = head , block_start = 0 ; bh ! = head | | ! block_start ;
block_start = block_end , bh = bh - > b_this_page ) {
if ( ! bh )
reiserfs_panic ( inode - > i_sb ,
" green-9006: Allocated but absent buffer for a page? " ) ;
block_end = block_start + inode - > i_sb - > s_blocksize ;
if ( i = = 0 & & block_end < = from )
/* if this buffer is before requested data to map, skip it */
continue ;
if ( i = = num_pages - 1 & & block_start > = to )
/* If this buffer is after requested data to map, abort
processing of current page */
break ;
if ( ! buffer_mapped ( bh ) ) { // Ok, unmapped buffer, need to map it
map_bh ( bh , inode - > i_sb ,
le32_to_cpu ( allocated_blocks
[ curr_block ] ) ) ;
curr_block + + ;
set_buffer_new ( bh ) ;
}
2005-04-17 02:20:36 +04:00
}
}
2005-07-13 07:21:28 +04:00
RFALSE ( curr_block > blocks_to_allocate ,
" green-9007: Used too many blocks? weird " ) ;
2005-04-17 02:20:36 +04:00
2005-07-13 07:21:28 +04:00
kfree ( allocated_blocks ) ;
return 0 ;
2005-04-17 02:20:36 +04:00
// Need to deal with transaction here.
2005-07-13 07:21:28 +04:00
error_exit_free_blocks :
pathrelse ( & path ) ;
// free blocks
for ( i = 0 ; i < blocks_to_allocate ; i + + )
reiserfs_free_block ( th , inode , le32_to_cpu ( allocated_blocks [ i ] ) ,
1 ) ;
error_exit :
if ( th - > t_trans_id ) {
int err ;
// update any changes we made to blk count
2005-09-13 12:25:17 +04:00
mark_inode_dirty ( inode ) ;
2005-07-13 07:21:28 +04:00
err =
journal_end ( th , inode - > i_sb ,
JOURNAL_PER_BALANCE_CNT * 3 + 1 +
2 * REISERFS_QUOTA_TRANS_BLOCKS ( inode - > i_sb ) ) ;
if ( err )
res = err ;
}
reiserfs_write_unlock ( inode - > i_sb ) ;
kfree ( allocated_blocks ) ;
return res ;
2005-04-17 02:20:36 +04:00
}
/* Unlock pages prepared by reiserfs_prepare_file_region_for_write */
2005-07-13 07:21:28 +04:00
static void reiserfs_unprepare_pages ( struct page * * prepared_pages , /* list of locked pages */
size_t num_pages /* amount of pages */ )
{
int i ; // loop counter
2005-04-17 02:20:36 +04:00
2005-07-13 07:21:28 +04:00
for ( i = 0 ; i < num_pages ; i + + ) {
struct page * page = prepared_pages [ i ] ;
2005-04-17 02:20:36 +04:00
2005-07-13 07:21:28 +04:00
try_to_free_buffers ( page ) ;
unlock_page ( page ) ;
page_cache_release ( page ) ;
}
2005-04-17 02:20:36 +04:00
}
/* This function will copy data from userspace to specified pages within
supplied byte range */
2005-07-13 07:21:28 +04:00
static int reiserfs_copy_from_user_to_file_region ( loff_t pos , /* In-file position */
int num_pages , /* Number of pages affected */
int write_bytes , /* Amount of bytes to write */
struct page * * prepared_pages , /* pointer to
array to
prepared pages
*/
const char __user * buf /* Pointer to user-supplied
data */
)
2005-04-17 02:20:36 +04:00
{
2005-07-13 07:21:28 +04:00
long page_fault = 0 ; // status of copy_from_user.
int i ; // loop counter.
int offset ; // offset in page
for ( i = 0 , offset = ( pos & ( PAGE_CACHE_SIZE - 1 ) ) ; i < num_pages ;
i + + , offset = 0 ) {
size_t count = min_t ( size_t , PAGE_CACHE_SIZE - offset , write_bytes ) ; // How much of bytes to write to this page
struct page * page = prepared_pages [ i ] ; // Current page we process.
fault_in_pages_readable ( buf , count ) ;
/* Copy data from userspace to the current page */
kmap ( page ) ;
page_fault = __copy_from_user ( page_address ( page ) + offset , buf , count ) ; // Copy the data.
/* Flush processor's dcache for this page */
flush_dcache_page ( page ) ;
kunmap ( page ) ;
buf + = count ;
write_bytes - = count ;
if ( page_fault )
break ; // Was there a fault? abort.
}
return page_fault ? - EFAULT : 0 ;
2005-04-17 02:20:36 +04:00
}
/* taken fs/buffer.c:__block_commit_write */
int reiserfs_commit_page ( struct inode * inode , struct page * page ,
2005-07-13 07:21:28 +04:00
unsigned from , unsigned to )
2005-04-17 02:20:36 +04:00
{
2005-07-13 07:21:28 +04:00
unsigned block_start , block_end ;
int partial = 0 ;
unsigned blocksize ;
struct buffer_head * bh , * head ;
unsigned long i_size_index = inode - > i_size > > PAGE_CACHE_SHIFT ;
int new ;
int logit = reiserfs_file_data_log ( inode ) ;
struct super_block * s = inode - > i_sb ;
int bh_per_page = PAGE_CACHE_SIZE / s - > s_blocksize ;
struct reiserfs_transaction_handle th ;
int ret = 0 ;
th . t_trans_id = 0 ;
blocksize = 1 < < inode - > i_blkbits ;
if ( logit ) {
reiserfs_write_lock ( s ) ;
ret = journal_begin ( & th , s , bh_per_page + 1 ) ;
if ( ret )
goto drop_write_lock ;
reiserfs_update_inode_transaction ( inode ) ;
}
for ( bh = head = page_buffers ( page ) , block_start = 0 ;
bh ! = head | | ! block_start ;
block_start = block_end , bh = bh - > b_this_page ) {
new = buffer_new ( bh ) ;
clear_buffer_new ( bh ) ;
block_end = block_start + blocksize ;
if ( block_end < = from | | block_start > = to ) {
if ( ! buffer_uptodate ( bh ) )
partial = 1 ;
} else {
set_buffer_uptodate ( bh ) ;
if ( logit ) {
reiserfs_prepare_for_journal ( s , bh , 1 ) ;
journal_mark_dirty ( & th , s , bh ) ;
} else if ( ! buffer_dirty ( bh ) ) {
mark_buffer_dirty ( bh ) ;
/* do data=ordered on any page past the end
* of file and any buffer marked BH_New .
*/
if ( reiserfs_data_ordered ( inode - > i_sb ) & &
( new | | page - > index > = i_size_index ) ) {
reiserfs_add_ordered_list ( inode , bh ) ;
}
}
}
2005-04-17 02:20:36 +04:00
}
2005-07-13 07:21:28 +04:00
if ( logit ) {
ret = journal_end ( & th , s , bh_per_page + 1 ) ;
drop_write_lock :
reiserfs_write_unlock ( s ) ;
}
/*
* If this is a partial write which happened to make all buffers
* uptodate then we can optimize away a bogus readpage ( ) for
* the next read ( ) . Here we ' discover ' whether the page went
* uptodate as a result of this ( potentially partial ) write .
*/
if ( ! partial )
SetPageUptodate ( page ) ;
return ret ;
2005-04-17 02:20:36 +04:00
}
/* Submit pages for write. This was separated from actual file copying
because we might want to allocate block numbers in - between .
This function assumes that caller will adjust file size to correct value . */
2005-07-13 07:21:28 +04:00
static int reiserfs_submit_file_region_for_write ( struct reiserfs_transaction_handle * th , struct inode * inode , loff_t pos , /* Writing position offset */
size_t num_pages , /* Number of pages to write */
size_t write_bytes , /* number of bytes to write */
struct page * * prepared_pages /* list of pages */
)
2005-04-17 02:20:36 +04:00
{
2005-07-13 07:21:28 +04:00
int status ; // return status of block_commit_write.
int retval = 0 ; // Return value we are going to return.
int i ; // loop counter
int offset ; // Writing offset in page.
int orig_write_bytes = write_bytes ;
int sd_update = 0 ;
for ( i = 0 , offset = ( pos & ( PAGE_CACHE_SIZE - 1 ) ) ; i < num_pages ;
i + + , offset = 0 ) {
int count = min_t ( int , PAGE_CACHE_SIZE - offset , write_bytes ) ; // How much of bytes to write to this page
struct page * page = prepared_pages [ i ] ; // Current page we process.
status =
reiserfs_commit_page ( inode , page , offset , offset + count ) ;
if ( status )
retval = status ; // To not overcomplicate matters We are going to
// submit all the pages even if there was error.
// we only remember error status to report it on
// exit.
write_bytes - = count ;
}
/* now that we've gotten all the ordered buffers marked dirty,
* we can safely update i_size and close any running transaction
*/
if ( pos + orig_write_bytes > inode - > i_size ) {
inode - > i_size = pos + orig_write_bytes ; // Set new size
/* If the file have grown so much that tail packing is no
* longer possible , reset " need to pack " flag */
if ( ( have_large_tails ( inode - > i_sb ) & &
inode - > i_size > i_block_size ( inode ) * 4 ) | |
( have_small_tails ( inode - > i_sb ) & &
inode - > i_size > i_block_size ( inode ) ) )
REISERFS_I ( inode ) - > i_flags & = ~ i_pack_on_close_mask ;
else if ( ( have_large_tails ( inode - > i_sb ) & &
inode - > i_size < i_block_size ( inode ) * 4 ) | |
( have_small_tails ( inode - > i_sb ) & &
inode - > i_size < i_block_size ( inode ) ) )
REISERFS_I ( inode ) - > i_flags | = i_pack_on_close_mask ;
if ( th - > t_trans_id ) {
reiserfs_write_lock ( inode - > i_sb ) ;
2005-09-13 12:25:17 +04:00
// this sets the proper flags for O_SYNC to trigger a commit
mark_inode_dirty ( inode ) ;
2005-07-13 07:21:28 +04:00
reiserfs_write_unlock ( inode - > i_sb ) ;
2006-07-10 15:43:56 +04:00
} else {
reiserfs_write_lock ( inode - > i_sb ) ;
reiserfs_update_inode_transaction ( inode ) ;
2005-09-13 12:25:17 +04:00
mark_inode_dirty ( inode ) ;
2006-07-10 15:43:56 +04:00
reiserfs_write_unlock ( inode - > i_sb ) ;
}
2005-07-13 07:21:28 +04:00
sd_update = 1 ;
}
2005-04-17 02:20:36 +04:00
if ( th - > t_trans_id ) {
2005-07-13 07:21:28 +04:00
reiserfs_write_lock ( inode - > i_sb ) ;
if ( ! sd_update )
2005-09-13 12:25:17 +04:00
mark_inode_dirty ( inode ) ;
2005-07-13 07:21:28 +04:00
status = journal_end ( th , th - > t_super , th - > t_blocks_allocated ) ;
if ( status )
retval = status ;
reiserfs_write_unlock ( inode - > i_sb ) ;
}
th - > t_trans_id = 0 ;
2005-04-17 02:20:36 +04:00
2005-07-13 07:21:28 +04:00
/*
* we have to unlock the pages after updating i_size , otherwise
* we race with writepage
*/
for ( i = 0 ; i < num_pages ; i + + ) {
struct page * page = prepared_pages [ i ] ;
unlock_page ( page ) ;
mark_page_accessed ( page ) ;
page_cache_release ( page ) ;
}
return retval ;
2005-04-17 02:20:36 +04:00
}
/* Look if passed writing region is going to touch file's tail
( if it is present ) . And if it is , convert the tail to unformatted node */
2005-07-13 07:21:28 +04:00
static int reiserfs_check_for_tail_and_convert ( struct inode * inode , /* inode to deal with */
loff_t pos , /* Writing position */
int write_bytes /* amount of bytes to write */
)
2005-04-17 02:20:36 +04:00
{
2005-07-13 07:21:28 +04:00
INITIALIZE_PATH ( path ) ; // needed for search_for_position
struct cpu_key key ; // Key that would represent last touched writing byte.
struct item_head * ih ; // item header of found block;
int res ; // Return value of various functions we call.
int cont_expand_offset ; // We will put offset for generic_cont_expand here
// This can be int just because tails are created
// only for small files.
2005-04-17 02:20:36 +04:00
/* this embodies a dependency on a particular tail policy */
2005-07-13 07:21:28 +04:00
if ( inode - > i_size > = inode - > i_sb - > s_blocksize * 4 ) {
/* such a big files do not have tails, so we won't bother ourselves
to look for tails , simply return */
return 0 ;
}
2005-04-17 02:20:36 +04:00
2005-07-13 07:21:28 +04:00
reiserfs_write_lock ( inode - > i_sb ) ;
/* find the item containing the last byte to be written, or if
* writing past the end of the file then the last item of the
* file ( and then we check its type ) . */
make_cpu_key ( & key , inode , pos + write_bytes + 1 , TYPE_ANY ,
3 /*key length */ ) ;
res = search_for_position_by_key ( inode - > i_sb , & key , & path ) ;
if ( res = = IO_ERROR ) {
reiserfs_write_unlock ( inode - > i_sb ) ;
return - EIO ;
}
ih = get_ih ( & path ) ;
res = 0 ;
if ( is_direct_le_ih ( ih ) ) {
/* Ok, closest item is file tail (tails are stored in "direct"
* items ) , so we need to unpack it . */
/* To not overcomplicate matters, we just call generic_cont_expand
which will in turn call other stuff and finally will boil down to
reiserfs_get_block ( ) that would do necessary conversion . */
cont_expand_offset =
le_key_k_offset ( get_inode_item_key_version ( inode ) ,
& ( ih - > ih_key ) ) ;
pathrelse ( & path ) ;
res = generic_cont_expand ( inode , cont_expand_offset ) ;
} else
pathrelse ( & path ) ;
reiserfs_write_unlock ( inode - > i_sb ) ;
return res ;
2005-04-17 02:20:36 +04:00
}
/* This function locks pages starting from @pos for @inode.
@ num_pages pages are locked and stored in
@ prepared_pages array . Also buffers are allocated for these pages .
First and last page of the region is read if it is overwritten only
partially . If last page did not exist before write ( file hole or file
append ) , it is zeroed , then .
Returns number of unallocated blocks that should be allocated to cover
new file data . */
2005-07-13 07:21:28 +04:00
static int reiserfs_prepare_file_region_for_write ( struct inode * inode
/* Inode of the file */ ,
loff_t pos , /* position in the file */
size_t num_pages , /* number of pages to
prepare */
size_t write_bytes , /* Amount of bytes to be
overwritten from
@ pos */
struct page * * prepared_pages /* pointer to array
where to store
prepared pages */
)
2005-04-17 02:20:36 +04:00
{
2005-07-13 07:21:28 +04:00
int res = 0 ; // Return values of different functions we call.
unsigned long index = pos > > PAGE_CACHE_SHIFT ; // Offset in file in pages.
int from = ( pos & ( PAGE_CACHE_SIZE - 1 ) ) ; // Writing offset in first page
int to = ( ( pos + write_bytes - 1 ) & ( PAGE_CACHE_SIZE - 1 ) ) + 1 ;
/* offset of last modified byte in last
page */
struct address_space * mapping = inode - > i_mapping ; // Pages are mapped here.
int i ; // Simple counter
int blocks = 0 ; /* Return value (blocks that should be allocated) */
struct buffer_head * bh , * head ; // Current bufferhead and first bufferhead
// of a page.
unsigned block_start , block_end ; // Starting and ending offsets of current
// buffer in the page.
struct buffer_head * wait [ 2 ] , * * wait_bh = wait ; // Buffers for page, if
// Page appeared to be not up
// to date. Note how we have
// at most 2 buffers, this is
// because we at most may
// partially overwrite two
// buffers for one page. One at // the beginning of write area
// and one at the end.
// Everything inthe middle gets // overwritten totally.
struct cpu_key key ; // cpu key of item that we are going to deal with
struct item_head * ih = NULL ; // pointer to item head that we are going to deal with
struct buffer_head * itembuf = NULL ; // Buffer head that contains items that we are going to deal with
INITIALIZE_PATH ( path ) ; // path to item, that we are going to deal with.
__le32 * item = NULL ; // pointer to item we are going to deal with
int item_pos = - 1 ; /* Position in indirect item */
if ( num_pages < 1 ) {
reiserfs_warning ( inode - > i_sb ,
" green-9001: reiserfs_prepare_file_region_for_write "
" called with zero number of pages to process " ) ;
return - EFAULT ;
2005-04-17 02:20:36 +04:00
}
2005-07-13 07:21:28 +04:00
/* We have 2 loops for pages. In first loop we grab and lock the pages, so
that nobody would touch these until we release the pages . Then
we ' d start to deal with mapping buffers to blocks . */
for ( i = 0 ; i < num_pages ; i + + ) {
prepared_pages [ i ] = grab_cache_page ( mapping , index + i ) ; // locks the page
if ( ! prepared_pages [ i ] ) {
res = - ENOMEM ;
goto failed_page_grabbing ;
2005-04-17 02:20:36 +04:00
}
2005-07-13 07:21:28 +04:00
if ( ! page_has_buffers ( prepared_pages [ i ] ) )
create_empty_buffers ( prepared_pages [ i ] ,
inode - > i_sb - > s_blocksize , 0 ) ;
}
2005-04-17 02:20:36 +04:00
2005-07-13 07:21:28 +04:00
/* Let's count amount of blocks for a case where all the blocks
overwritten are new ( we will substract already allocated blocks later ) */
if ( num_pages > 2 )
/* These are full-overwritten pages so we count all the blocks in
these pages are counted as needed to be allocated */
blocks =
( num_pages - 2 ) < < ( PAGE_CACHE_SHIFT - inode - > i_blkbits ) ;
/* count blocks needed for first page (possibly partially written) */
blocks + = ( ( PAGE_CACHE_SIZE - from ) > > inode - > i_blkbits ) + ! ! ( from & ( inode - > i_sb - > s_blocksize - 1 ) ) ; /* roundup */
/* Now we account for last page. If last page == first page (we
overwrite only one page ) , we substract all the blocks past the
last writing position in a page out of already calculated number
of blocks */
blocks + = ( ( num_pages > 1 ) < < ( PAGE_CACHE_SHIFT - inode - > i_blkbits ) ) -
( ( PAGE_CACHE_SIZE - to ) > > inode - > i_blkbits ) ;
/* Note how we do not roundup here since partial blocks still
should be allocated */
/* Now if all the write area lies past the file end, no point in
maping blocks , since there is none , so we just zero out remaining
parts of first and last pages in write area ( if needed ) */
if ( ( pos & ~ ( ( loff_t ) PAGE_CACHE_SIZE - 1 ) ) > inode - > i_size ) {
if ( from ! = 0 ) { /* First page needs to be partially zeroed */
char * kaddr = kmap_atomic ( prepared_pages [ 0 ] , KM_USER0 ) ;
memset ( kaddr , 0 , from ) ;
kunmap_atomic ( kaddr , KM_USER0 ) ;
}
if ( to ! = PAGE_CACHE_SIZE ) { /* Last page needs to be partially zeroed */
char * kaddr =
kmap_atomic ( prepared_pages [ num_pages - 1 ] ,
KM_USER0 ) ;
memset ( kaddr + to , 0 , PAGE_CACHE_SIZE - to ) ;
kunmap_atomic ( kaddr , KM_USER0 ) ;
2005-04-17 02:20:36 +04:00
}
2005-07-13 07:21:28 +04:00
/* Since all blocks are new - use already calculated value */
return blocks ;
}
/* Well, since we write somewhere into the middle of a file, there is
possibility we are writing over some already allocated blocks , so
let ' s map these blocks and substract number of such blocks out of blocks
we need to allocate ( calculated above ) */
/* Mask write position to start on blocksize, we do it out of the
loop for performance reasons */
pos & = ~ ( ( loff_t ) inode - > i_sb - > s_blocksize - 1 ) ;
/* Set cpu key to the starting position in a file (on left block boundary) */
make_cpu_key ( & key , inode ,
1 + ( ( pos ) & ~ ( ( loff_t ) inode - > i_sb - > s_blocksize - 1 ) ) ,
TYPE_ANY , 3 /*key length */ ) ;
reiserfs_write_lock ( inode - > i_sb ) ; // We need that for at least search_by_key()
for ( i = 0 ; i < num_pages ; i + + ) {
head = page_buffers ( prepared_pages [ i ] ) ;
/* For each buffer in the page */
for ( bh = head , block_start = 0 ; bh ! = head | | ! block_start ;
block_start = block_end , bh = bh - > b_this_page ) {
if ( ! bh )
reiserfs_panic ( inode - > i_sb ,
" green-9002: Allocated but absent buffer for a page? " ) ;
/* Find where this buffer ends */
block_end = block_start + inode - > i_sb - > s_blocksize ;
if ( i = = 0 & & block_end < = from )
/* if this buffer is before requested data to map, skip it */
continue ;
if ( i = = num_pages - 1 & & block_start > = to ) {
/* If this buffer is after requested data to map, abort
processing of current page */
break ;
2005-04-17 02:20:36 +04:00
}
2005-07-13 07:21:28 +04:00
if ( buffer_mapped ( bh ) & & bh - > b_blocknr ! = 0 ) {
/* This is optimisation for a case where buffer is mapped
and have blocknumber assigned . In case significant amount
of such buffers are present , we may avoid some amount
of search_by_key calls .
Probably it would be possible to move parts of this code
out of BKL , but I afraid that would overcomplicate code
without any noticeable benefit .
*/
item_pos + + ;
/* Update the key */
set_cpu_key_k_offset ( & key ,
cpu_key_k_offset ( & key ) +
inode - > i_sb - > s_blocksize ) ;
blocks - - ; // Decrease the amount of blocks that need to be
// allocated
continue ; // Go to the next buffer
}
2005-04-17 02:20:36 +04:00
2005-07-13 07:21:28 +04:00
if ( ! itembuf | | /* if first iteration */
item_pos > = ih_item_len ( ih ) / UNFM_P_SIZE ) { /* or if we progressed past the
current unformatted_item */
/* Try to find next item */
res =
search_for_position_by_key ( inode - > i_sb ,
& key , & path ) ;
/* Abort if no more items */
if ( res ! = POSITION_FOUND ) {
/* make sure later loops don't use this item */
itembuf = NULL ;
item = NULL ;
break ;
}
/* Update information about current indirect item */
itembuf = get_last_bh ( & path ) ;
ih = get_ih ( & path ) ;
item = get_item ( & path ) ;
item_pos = path . pos_in_item ;
RFALSE ( ! is_indirect_le_ih ( ih ) ,
" green-9003: indirect item expected " ) ;
}
2005-04-17 02:20:36 +04:00
2005-07-13 07:21:28 +04:00
/* See if there is some block associated with the file
at that position , map the buffer to this block */
if ( get_block_num ( item , item_pos ) ) {
map_bh ( bh , inode - > i_sb ,
get_block_num ( item , item_pos ) ) ;
blocks - - ; // Decrease the amount of blocks that need to be
// allocated
}
item_pos + + ;
/* Update the key */
set_cpu_key_k_offset ( & key ,
cpu_key_k_offset ( & key ) +
inode - > i_sb - > s_blocksize ) ;
2005-04-17 02:20:36 +04:00
}
}
2005-07-13 07:21:28 +04:00
pathrelse ( & path ) ; // Free the path
reiserfs_write_unlock ( inode - > i_sb ) ;
2005-04-17 02:20:36 +04:00
/* Now zero out unmappend buffers for the first and last pages of
write area or issue read requests if page is mapped . */
/* First page, see if it is not uptodate */
2005-07-13 07:21:28 +04:00
if ( ! PageUptodate ( prepared_pages [ 0 ] ) ) {
head = page_buffers ( prepared_pages [ 0 ] ) ;
/* For each buffer in page */
for ( bh = head , block_start = 0 ; bh ! = head | | ! block_start ;
block_start = block_end , bh = bh - > b_this_page ) {
if ( ! bh )
reiserfs_panic ( inode - > i_sb ,
" green-9002: Allocated but absent buffer for a page? " ) ;
/* Find where this buffer ends */
block_end = block_start + inode - > i_sb - > s_blocksize ;
if ( block_end < = from )
/* if this buffer is before requested data to map, skip it */
continue ;
if ( block_start < from ) { /* Aha, our partial buffer */
if ( buffer_mapped ( bh ) ) { /* If it is mapped, we need to
issue READ request for it to
not loose data */
ll_rw_block ( READ , 1 , & bh ) ;
* wait_bh + + = bh ;
} else { /* Not mapped, zero it */
char * kaddr =
kmap_atomic ( prepared_pages [ 0 ] ,
KM_USER0 ) ;
memset ( kaddr + block_start , 0 ,
from - block_start ) ;
kunmap_atomic ( kaddr , KM_USER0 ) ;
set_buffer_uptodate ( bh ) ;
}
}
2005-04-17 02:20:36 +04:00
}
}
/* Last page, see if it is not uptodate, or if the last page is past the end of the file. */
2005-07-13 07:21:28 +04:00
if ( ! PageUptodate ( prepared_pages [ num_pages - 1 ] ) | |
( ( pos + write_bytes ) > > PAGE_CACHE_SHIFT ) >
( inode - > i_size > > PAGE_CACHE_SHIFT ) ) {
head = page_buffers ( prepared_pages [ num_pages - 1 ] ) ;
/* for each buffer in page */
for ( bh = head , block_start = 0 ; bh ! = head | | ! block_start ;
block_start = block_end , bh = bh - > b_this_page ) {
if ( ! bh )
reiserfs_panic ( inode - > i_sb ,
" green-9002: Allocated but absent buffer for a page? " ) ;
/* Find where this buffer ends */
block_end = block_start + inode - > i_sb - > s_blocksize ;
if ( block_start > = to )
/* if this buffer is after requested data to map, skip it */
break ;
if ( block_end > to ) { /* Aha, our partial buffer */
if ( buffer_mapped ( bh ) ) { /* If it is mapped, we need to
issue READ request for it to
not loose data */
ll_rw_block ( READ , 1 , & bh ) ;
* wait_bh + + = bh ;
} else { /* Not mapped, zero it */
char * kaddr =
kmap_atomic ( prepared_pages
[ num_pages - 1 ] ,
KM_USER0 ) ;
memset ( kaddr + to , 0 , block_end - to ) ;
kunmap_atomic ( kaddr , KM_USER0 ) ;
set_buffer_uptodate ( bh ) ;
}
}
2005-04-17 02:20:36 +04:00
}
}
2005-07-13 07:21:28 +04:00
/* Wait for read requests we made to happen, if necessary */
while ( wait_bh > wait ) {
wait_on_buffer ( * - - wait_bh ) ;
if ( ! buffer_uptodate ( * wait_bh ) ) {
res = - EIO ;
goto failed_read ;
}
2005-04-17 02:20:36 +04:00
}
2005-07-13 07:21:28 +04:00
return blocks ;
failed_page_grabbing :
num_pages = i ;
failed_read :
reiserfs_unprepare_pages ( prepared_pages , num_pages ) ;
return res ;
2005-04-17 02:20:36 +04:00
}
/* Write @count bytes at position @ppos in a file indicated by @file
from the buffer @ buf .
generic_file_write ( ) is only appropriate for filesystems that are not seeking to optimize performance and want
something simple that works . It is not for serious use by general purpose filesystems , excepting the one that it was
written for ( ext2 / 3 ) . This is for several reasons :
* It has no understanding of any filesystem specific optimizations .
* It enters the filesystem repeatedly for each page that is written .
* It depends on reiserfs_get_block ( ) function which if implemented by reiserfs performs costly search_by_key
* operation for each page it is supplied with . By contrast reiserfs_file_write ( ) feeds as much as possible at a time
* to reiserfs which allows for fewer tree traversals .
* Each indirect pointer insertion takes a lot of cpu , because it involves memory moves inside of blocks .
* Asking the block allocation code for blocks one at a time is slightly less efficient .
All of these reasons for not using only generic file write were understood back when reiserfs was first miscoded to
use it , but we were in a hurry to make code freeze , and so it couldn ' t be revised then . This new code should make
things right finally .
Future Features : providing search_by_key with hints .
*/
2005-07-13 07:21:28 +04:00
static ssize_t reiserfs_file_write ( struct file * file , /* the file we are going to write into */
const char __user * buf , /* pointer to user supplied data
( in userspace ) */
size_t count , /* amount of bytes to write */
loff_t * ppos /* pointer to position in file that we start writing at. Should be updated to
* new current position before returning . */
)
2005-04-17 02:20:36 +04:00
{
2005-07-13 07:21:28 +04:00
size_t already_written = 0 ; // Number of bytes already written to the file.
loff_t pos ; // Current position in the file.
ssize_t res ; // return value of various functions that we call.
int err = 0 ;
struct inode * inode = file - > f_dentry - > d_inode ; // Inode of the file that we are writing to.
/* To simplify coding at this time, we store
locked pages in array for now */
struct page * prepared_pages [ REISERFS_WRITE_PAGES_AT_A_TIME ] ;
struct reiserfs_transaction_handle th ;
th . t_trans_id = 0 ;
2006-02-01 14:06:51 +03:00
/* If a filesystem is converted from 3.5 to 3.6, we'll have v3.5 items
* lying around ( most of the disk , in fact ) . Despite the filesystem
* now being a v3 .6 format , the old items still can ' t support large
* file sizes . Catch this case here , as the rest of the VFS layer is
* oblivious to the different limitations between old and new items .
* reiserfs_setattr catches this for truncates . This chunk is lifted
* from generic_write_checks . */
if ( get_inode_item_key_version ( inode ) = = KEY_FORMAT_3_5 & &
* ppos + count > MAX_NON_LFS ) {
if ( * ppos > = MAX_NON_LFS ) {
send_sig ( SIGXFSZ , current , 0 ) ;
return - EFBIG ;
}
if ( count > MAX_NON_LFS - ( unsigned long ) * ppos )
count = MAX_NON_LFS - ( unsigned long ) * ppos ;
}
2005-07-13 07:21:28 +04:00
if ( file - > f_flags & O_DIRECT ) { // Direct IO needs treatment
ssize_t result , after_file_end = 0 ;
if ( ( * ppos + count > = inode - > i_size )
| | ( file - > f_flags & O_APPEND ) ) {
/* If we are appending a file, we need to put this savelink in here.
If we will crash while doing direct io , finish_unfinished will
cut the garbage from the file end . */
reiserfs_write_lock ( inode - > i_sb ) ;
err =
journal_begin ( & th , inode - > i_sb ,
JOURNAL_PER_BALANCE_CNT ) ;
if ( err ) {
reiserfs_write_unlock ( inode - > i_sb ) ;
return err ;
}
reiserfs_update_inode_transaction ( inode ) ;
add_save_link ( & th , inode , 1 /* Truncate */ ) ;
after_file_end = 1 ;
err =
journal_end ( & th , inode - > i_sb ,
JOURNAL_PER_BALANCE_CNT ) ;
reiserfs_write_unlock ( inode - > i_sb ) ;
if ( err )
return err ;
}
result = generic_file_write ( file , buf , count , ppos ) ;
if ( after_file_end ) { /* Now update i_size and remove the savelink */
struct reiserfs_transaction_handle th ;
reiserfs_write_lock ( inode - > i_sb ) ;
err = journal_begin ( & th , inode - > i_sb , 1 ) ;
if ( err ) {
reiserfs_write_unlock ( inode - > i_sb ) ;
return err ;
}
reiserfs_update_inode_transaction ( inode ) ;
2005-09-13 12:25:17 +04:00
mark_inode_dirty ( inode ) ;
2005-07-13 07:21:28 +04:00
err = journal_end ( & th , inode - > i_sb , 1 ) ;
if ( err ) {
reiserfs_write_unlock ( inode - > i_sb ) ;
return err ;
}
err = remove_save_link ( inode , 1 /* truncate */ ) ;
reiserfs_write_unlock ( inode - > i_sb ) ;
if ( err )
return err ;
}
2005-04-17 02:20:36 +04:00
2005-07-13 07:21:28 +04:00
return result ;
}
2005-04-17 02:20:36 +04:00
2005-07-13 07:21:28 +04:00
if ( unlikely ( ( ssize_t ) count < 0 ) )
return - EINVAL ;
if ( unlikely ( ! access_ok ( VERIFY_READ , buf , count ) ) )
return - EFAULT ;
2006-01-10 02:59:24 +03:00
mutex_lock ( & inode - > i_mutex ) ; // locks the entire file for just us
2005-07-13 07:21:28 +04:00
pos = * ppos ;
/* Check if we can write to specified region of file, file
is not overly big and this kind of stuff . Adjust pos and
count , if needed */
res = generic_write_checks ( file , & pos , & count , 0 ) ;
if ( res )
goto out ;
if ( count = = 0 )
goto out ;
res = remove_suid ( file - > f_dentry ) ;
if ( res )
goto out ;
2006-01-10 07:52:01 +03:00
file_update_time ( file ) ;
2005-07-13 07:21:28 +04:00
// Ok, we are done with all the checks.
// Now we should start real work
/* If we are going to write past the file's packed tail or if we are going
to overwrite part of the tail , we need that tail to be converted into
unformatted node */
res = reiserfs_check_for_tail_and_convert ( inode , pos , count ) ;
if ( res )
goto out ;
while ( count > 0 ) {
/* This is the main loop in which we running until some error occures
or until we write all of the data . */
size_t num_pages ; /* amount of pages we are going to write this iteration */
size_t write_bytes ; /* amount of bytes to write during this iteration */
size_t blocks_to_allocate ; /* how much blocks we need to allocate for this iteration */
/* (pos & (PAGE_CACHE_SIZE-1)) is an idiom for offset into a page of pos */
num_pages = ! ! ( ( pos + count ) & ( PAGE_CACHE_SIZE - 1 ) ) + /* round up partial
pages */
( ( count +
( pos & ( PAGE_CACHE_SIZE - 1 ) ) ) > > PAGE_CACHE_SHIFT ) ;
/* convert size to amount of
pages */
reiserfs_write_lock ( inode - > i_sb ) ;
if ( num_pages > REISERFS_WRITE_PAGES_AT_A_TIME
| | num_pages > reiserfs_can_fit_pages ( inode - > i_sb ) ) {
/* If we were asked to write more data than we want to or if there
is not that much space , then we shorten amount of data to write
for this iteration . */
num_pages =
min_t ( size_t , REISERFS_WRITE_PAGES_AT_A_TIME ,
reiserfs_can_fit_pages ( inode - > i_sb ) ) ;
/* Also we should not forget to set size in bytes accordingly */
write_bytes = ( num_pages < < PAGE_CACHE_SHIFT ) -
( pos & ( PAGE_CACHE_SIZE - 1 ) ) ;
/* If position is not on the
start of the page , we need
to substract the offset
within page */
} else
write_bytes = count ;
/* reserve the blocks to be allocated later, so that later on
we still have the space to write the blocks to */
reiserfs_claim_blocks_to_be_allocated ( inode - > i_sb ,
num_pages < <
( PAGE_CACHE_SHIFT -
inode - > i_blkbits ) ) ;
reiserfs_write_unlock ( inode - > i_sb ) ;
if ( ! num_pages ) { /* If we do not have enough space even for a single page... */
if ( pos >
inode - > i_size + inode - > i_sb - > s_blocksize -
( pos & ( inode - > i_sb - > s_blocksize - 1 ) ) ) {
res = - ENOSPC ;
break ; // In case we are writing past the end of the last file block, break.
}
// Otherwise we are possibly overwriting the file, so
// let's set write size to be equal or less than blocksize.
// This way we get it correctly for file holes.
// But overwriting files on absolutelly full volumes would not
// be very efficient. Well, people are not supposed to fill
// 100% of disk space anyway.
write_bytes =
min_t ( size_t , count ,
inode - > i_sb - > s_blocksize -
( pos & ( inode - > i_sb - > s_blocksize - 1 ) ) ) ;
num_pages = 1 ;
// No blocks were claimed before, so do it now.
reiserfs_claim_blocks_to_be_allocated ( inode - > i_sb ,
1 < <
( PAGE_CACHE_SHIFT
-
inode - >
i_blkbits ) ) ;
}
2005-04-17 02:20:36 +04:00
2005-07-13 07:21:28 +04:00
/* Prepare for writing into the region, read in all the
partially overwritten pages , if needed . And lock the pages ,
so that nobody else can access these until we are done .
We get number of actual blocks needed as a result . */
2006-03-02 13:54:39 +03:00
res = reiserfs_prepare_file_region_for_write ( inode , pos ,
num_pages ,
write_bytes ,
prepared_pages ) ;
if ( res < 0 ) {
2005-07-13 07:21:28 +04:00
reiserfs_release_claimed_blocks ( inode - > i_sb ,
num_pages < <
( PAGE_CACHE_SHIFT -
inode - > i_blkbits ) ) ;
break ;
}
2005-04-17 02:20:36 +04:00
2006-03-02 13:54:39 +03:00
blocks_to_allocate = res ;
2005-07-13 07:21:28 +04:00
/* First we correct our estimate of how many blocks we need */
reiserfs_release_claimed_blocks ( inode - > i_sb ,
( num_pages < <
( PAGE_CACHE_SHIFT -
inode - > i_sb - >
s_blocksize_bits ) ) -
blocks_to_allocate ) ;
if ( blocks_to_allocate > 0 ) { /*We only allocate blocks if we need to */
/* Fill in all the possible holes and append the file if needed */
res =
reiserfs_allocate_blocks_for_region ( & th , inode , pos ,
num_pages ,
write_bytes ,
prepared_pages ,
blocks_to_allocate ) ;
}
2005-04-17 02:20:36 +04:00
2005-07-13 07:21:28 +04:00
/* well, we have allocated the blocks, so it is time to free
the reservation we made earlier . */
reiserfs_release_claimed_blocks ( inode - > i_sb ,
blocks_to_allocate ) ;
if ( res ) {
reiserfs_unprepare_pages ( prepared_pages , num_pages ) ;
break ;
}
2005-04-17 02:20:36 +04:00
2005-07-13 07:21:28 +04:00
/* NOTE that allocating blocks and filling blocks can be done in reverse order
and probably we would do that just to get rid of garbage in files after a
crash */
2005-04-17 02:20:36 +04:00
2005-07-13 07:21:28 +04:00
/* Copy data from user-supplied buffer to file's pages */
res =
reiserfs_copy_from_user_to_file_region ( pos , num_pages ,
write_bytes ,
prepared_pages , buf ) ;
if ( res ) {
reiserfs_unprepare_pages ( prepared_pages , num_pages ) ;
break ;
}
2005-04-17 02:20:36 +04:00
2005-07-13 07:21:28 +04:00
/* Send the pages to disk and unlock them. */
res =
reiserfs_submit_file_region_for_write ( & th , inode , pos ,
num_pages ,
write_bytes ,
prepared_pages ) ;
if ( res )
break ;
already_written + = write_bytes ;
buf + = write_bytes ;
* ppos = pos + = write_bytes ;
count - = write_bytes ;
2006-03-25 14:07:16 +03:00
balance_dirty_pages_ratelimited_nr ( inode - > i_mapping , num_pages ) ;
2005-04-17 02:20:36 +04:00
}
2005-07-13 07:21:28 +04:00
/* this is only true on error */
if ( th . t_trans_id ) {
reiserfs_write_lock ( inode - > i_sb ) ;
err = journal_end ( & th , th . t_super , th . t_blocks_allocated ) ;
reiserfs_write_unlock ( inode - > i_sb ) ;
if ( err ) {
res = err ;
goto out ;
}
2005-04-17 02:20:36 +04:00
}
2006-03-25 14:07:00 +03:00
if ( likely ( res > = 0 ) & &
( unlikely ( ( file - > f_flags & O_SYNC ) | | IS_SYNC ( inode ) ) ) )
res = generic_osync_inode ( inode , file - > f_mapping ,
OSYNC_METADATA | OSYNC_DATA ) ;
2005-04-17 02:20:36 +04:00
2006-01-10 02:59:24 +03:00
mutex_unlock ( & inode - > i_mutex ) ;
2005-07-13 07:21:28 +04:00
reiserfs_async_progress_wait ( inode - > i_sb ) ;
return ( already_written ! = 0 ) ? already_written : res ;
2005-04-17 02:20:36 +04:00
2005-07-13 07:21:28 +04:00
out :
2006-01-10 02:59:24 +03:00
mutex_unlock ( & inode - > i_mutex ) ; // unlock the file on exit.
2005-07-13 07:21:28 +04:00
return res ;
2005-04-17 02:20:36 +04:00
}
2006-03-28 13:56:42 +04:00
const struct file_operations reiserfs_file_operations = {
2005-07-13 07:21:28 +04:00
. read = generic_file_read ,
. write = reiserfs_file_write ,
. ioctl = reiserfs_ioctl ,
2006-08-29 22:06:18 +04:00
# ifdef CONFIG_COMPAT
. compat_ioctl = reiserfs_compat_ioctl ,
# endif
2005-07-13 07:21:28 +04:00
. mmap = generic_file_mmap ,
. release = reiserfs_file_release ,
. fsync = reiserfs_sync_file ,
. sendfile = generic_file_sendfile ,
. aio_read = generic_file_aio_read ,
2006-06-26 11:24:57 +04:00
. aio_write = generic_file_aio_write ,
2006-03-30 17:15:30 +04:00
. splice_read = generic_file_splice_read ,
. splice_write = generic_file_splice_write ,
2005-04-17 02:20:36 +04:00
} ;
2005-07-13 07:21:28 +04:00
struct inode_operations reiserfs_file_inode_operations = {
. truncate = reiserfs_vfs_truncate_file ,
. setattr = reiserfs_setattr ,
. setxattr = reiserfs_setxattr ,
. getxattr = reiserfs_getxattr ,
. listxattr = reiserfs_listxattr ,
. removexattr = reiserfs_removexattr ,
. permission = reiserfs_permission ,
2005-04-17 02:20:36 +04:00
} ;