2005-04-17 02:20:36 +04:00
/*
* Copyright 2000 by Hans Reiser , licensing governed by reiserfs / README
*/
# include <linux/time.h>
# include <linux/fs.h>
# include <linux/reiserfs_fs.h>
# include <linux/reiserfs_acl.h>
# include <linux/reiserfs_xattr.h>
2007-07-17 15:04:28 +04:00
# include <linux/exportfs.h>
2005-04-17 02:20:36 +04:00
# include <linux/smp_lock.h>
# include <linux/pagemap.h>
# include <linux/highmem.h>
# include <asm/uaccess.h>
# include <asm/unaligned.h>
# include <linux/buffer_head.h>
# include <linux/mpage.h>
# include <linux/writeback.h>
# include <linux/quotaops.h>
2007-10-16 12:25:14 +04:00
# include <linux/swap.h>
2005-04-17 02:20:36 +04:00
2007-10-16 12:25:14 +04:00
int reiserfs_commit_write ( struct file * f , struct page * page ,
unsigned from , unsigned to ) ;
int reiserfs_prepare_write ( struct file * f , struct page * page ,
unsigned from , unsigned to ) ;
2005-04-17 02:20:36 +04:00
2005-07-13 07:21:28 +04:00
void reiserfs_delete_inode ( struct inode * inode )
2005-04-17 02:20:36 +04:00
{
2005-07-13 07:21:28 +04:00
/* We need blocks for transaction + (user+group) quota update (possibly delete) */
int jbegin_count =
JOURNAL_PER_BALANCE_CNT * 2 +
2 * REISERFS_QUOTA_INIT_BLOCKS ( inode - > i_sb ) ;
struct reiserfs_transaction_handle th ;
2005-12-14 22:38:05 +03:00
int err ;
2005-04-17 02:20:36 +04:00
2005-09-10 00:01:31 +04:00
truncate_inode_pages ( & inode - > i_data , 0 ) ;
2005-07-13 07:21:28 +04:00
reiserfs_write_lock ( inode - > i_sb ) ;
2005-04-17 02:20:36 +04:00
2005-07-13 07:21:28 +04:00
/* The = 0 happens when we abort creating a new inode for some reason like lack of space.. */
if ( ! ( inode - > i_state & I_NEW ) & & INODE_PKEY ( inode ) - > k_objectid ! = 0 ) { /* also handles bad_inode case */
reiserfs_delete_xattrs ( inode ) ;
2005-04-17 02:20:36 +04:00
2006-08-05 23:14:01 +04:00
if ( journal_begin ( & th , inode - > i_sb , jbegin_count ) )
2005-07-13 07:21:28 +04:00
goto out ;
reiserfs_update_inode_transaction ( inode ) ;
2005-04-17 02:20:36 +04:00
2008-07-08 22:37:06 +04:00
reiserfs_discard_prealloc ( & th , inode ) ;
2005-12-14 22:38:05 +03:00
err = reiserfs_delete_object ( & th , inode ) ;
2005-04-17 02:20:36 +04:00
2005-07-13 07:21:28 +04:00
/* Do quota update inside a transaction for journaled quotas. We must do that
* after delete_object so that quota updates go into the same transaction as
* stat data deletion */
2005-12-14 22:38:05 +03:00
if ( ! err )
2009-01-26 19:14:18 +03:00
vfs_dq_free_inode ( inode ) ;
2005-07-13 07:21:28 +04:00
2006-08-05 23:14:01 +04:00
if ( journal_end ( & th , inode - > i_sb , jbegin_count ) )
2005-07-13 07:21:28 +04:00
goto out ;
2005-04-17 02:20:36 +04:00
2005-12-14 22:38:05 +03:00
/* check return value from reiserfs_delete_object after
* ending the transaction
*/
if ( err )
goto out ;
2005-07-13 07:21:28 +04:00
/* all items of file are deleted, so we can remove "save" link */
remove_save_link ( inode , 0 /* not truncate */ ) ; /* we can't do anything
* about an error here */
} else {
/* no object items are in the tree */
;
}
out :
clear_inode ( inode ) ; /* note this must go after the journal_end to prevent deadlock */
inode - > i_blocks = 0 ;
reiserfs_write_unlock ( inode - > i_sb ) ;
2005-04-17 02:20:36 +04:00
}
2005-07-13 07:21:28 +04:00
static void _make_cpu_key ( struct cpu_key * key , int version , __u32 dirid ,
__u32 objectid , loff_t offset , int type , int length )
2005-04-17 02:20:36 +04:00
{
2005-07-13 07:21:28 +04:00
key - > version = version ;
2005-04-17 02:20:36 +04:00
2005-07-13 07:21:28 +04:00
key - > on_disk_key . k_dir_id = dirid ;
key - > on_disk_key . k_objectid = objectid ;
set_cpu_key_k_offset ( key , offset ) ;
set_cpu_key_k_type ( key , type ) ;
key - > key_length = length ;
2005-04-17 02:20:36 +04:00
}
/* take base of inode_key (it comes from inode always) (dirid, objectid) and version from an inode, set
offset and type of key */
2005-07-13 07:21:28 +04:00
void make_cpu_key ( struct cpu_key * key , struct inode * inode , loff_t offset ,
int type , int length )
2005-04-17 02:20:36 +04:00
{
2005-07-13 07:21:28 +04:00
_make_cpu_key ( key , get_inode_item_key_version ( inode ) ,
le32_to_cpu ( INODE_PKEY ( inode ) - > k_dir_id ) ,
le32_to_cpu ( INODE_PKEY ( inode ) - > k_objectid ) , offset , type ,
length ) ;
2005-04-17 02:20:36 +04:00
}
//
// when key is 0, do not set version and short key
//
2005-07-13 07:21:28 +04:00
inline void make_le_item_head ( struct item_head * ih , const struct cpu_key * key ,
int version ,
loff_t offset , int type , int length ,
int entry_count /*or ih_free_space */ )
2005-04-17 02:20:36 +04:00
{
2005-07-13 07:21:28 +04:00
if ( key ) {
ih - > ih_key . k_dir_id = cpu_to_le32 ( key - > on_disk_key . k_dir_id ) ;
ih - > ih_key . k_objectid =
cpu_to_le32 ( key - > on_disk_key . k_objectid ) ;
}
put_ih_version ( ih , version ) ;
set_le_ih_k_offset ( ih , offset ) ;
set_le_ih_k_type ( ih , type ) ;
put_ih_item_len ( ih , length ) ;
/* set_ih_free_space (ih, 0); */
// for directory items it is entry count, for directs and stat
// datas - 0xffff, for indirects - 0
put_ih_entry_count ( ih , entry_count ) ;
2005-04-17 02:20:36 +04:00
}
//
// FIXME: we might cache recently accessed indirect item
// Ugh. Not too eager for that....
// I cut the code until such time as I see a convincing argument (benchmark).
// I don't want a bloated inode struct..., and I don't like code complexity....
/* cutting the code is fine, since it really isn't in use yet and is easy
* * to add back in . But , Vladimir has a really good idea here . Think
* * about what happens for reading a file . For each page ,
* * The VFS layer calls reiserfs_readpage , who searches the tree to find
* * an indirect item . This indirect item has X number of pointers , where
* * X is a big number if we ' ve done the block allocation right . But ,
* * we only use one or two of these pointers during each call to readpage ,
* * needlessly researching again later on .
* *
* * The size of the cache could be dynamic based on the size of the file .
* *
* * I ' d also like to see us cache the location the stat data item , since
* * we are needlessly researching for that frequently .
* *
* * - - chris
*/
/* If this page has a file tail in it, and
* * it was read in by get_block_create_0 , the page data is valid ,
* * but tail is still sitting in a direct item , and we can ' t write to
* * it . So , look through this page , and check all the mapped buffers
* * to make sure they have valid block numbers . Any that don ' t need
* * to be unmapped , so that block_prepare_write will correctly call
* * reiserfs_get_block to convert the tail into an unformatted node
*/
2005-07-13 07:21:28 +04:00
static inline void fix_tail_page_for_writing ( struct page * page )
{
struct buffer_head * head , * next , * bh ;
if ( page & & page_has_buffers ( page ) ) {
head = page_buffers ( page ) ;
bh = head ;
do {
next = bh - > b_this_page ;
if ( buffer_mapped ( bh ) & & bh - > b_blocknr = = 0 ) {
reiserfs_unmap_buffer ( bh ) ;
}
bh = next ;
} while ( bh ! = head ) ;
}
2005-04-17 02:20:36 +04:00
}
/* reiserfs_get_block does not need to allocate a block only if it has been
done already or non - hole position has been found in the indirect item */
2005-07-13 07:21:28 +04:00
static inline int allocation_needed ( int retval , b_blocknr_t allocated ,
struct item_head * ih ,
__le32 * item , int pos_in_item )
2005-04-17 02:20:36 +04:00
{
2005-07-13 07:21:28 +04:00
if ( allocated )
return 0 ;
if ( retval = = POSITION_FOUND & & is_indirect_le_ih ( ih ) & &
get_block_num ( item , pos_in_item ) )
return 0 ;
return 1 ;
2005-04-17 02:20:36 +04:00
}
2005-07-13 07:21:28 +04:00
static inline int indirect_item_found ( int retval , struct item_head * ih )
2005-04-17 02:20:36 +04:00
{
2005-07-13 07:21:28 +04:00
return ( retval = = POSITION_FOUND ) & & is_indirect_le_ih ( ih ) ;
2005-04-17 02:20:36 +04:00
}
2005-07-13 07:21:28 +04:00
static inline void set_block_dev_mapped ( struct buffer_head * bh ,
b_blocknr_t block , struct inode * inode )
2005-04-17 02:20:36 +04:00
{
map_bh ( bh , inode - > i_sb , block ) ;
}
//
// files which were created in the earlier version can not be longer,
// than 2 gb
//
2007-10-19 10:39:25 +04:00
static int file_capable ( struct inode * inode , sector_t block )
2005-04-17 02:20:36 +04:00
{
2005-07-13 07:21:28 +04:00
if ( get_inode_item_key_version ( inode ) ! = KEY_FORMAT_3_5 | | // it is new file.
block < ( 1 < < ( 31 - inode - > i_sb - > s_blocksize_bits ) ) ) // old file, but 'block' is inside of 2gb
return 1 ;
2005-04-17 02:20:36 +04:00
2005-07-13 07:21:28 +04:00
return 0 ;
2005-04-17 02:20:36 +04:00
}
2007-10-17 10:26:03 +04:00
static int restart_transaction ( struct reiserfs_transaction_handle * th ,
struct inode * inode , struct treepath * path )
2005-07-13 07:21:28 +04:00
{
struct super_block * s = th - > t_super ;
int len = th - > t_blocks_allocated ;
int err ;
BUG_ON ( ! th - > t_trans_id ) ;
BUG_ON ( ! th - > t_refcount ) ;
[PATCH] fix reiserfs bad path release panic
One of our test team hit a reiserfs_panic while running fsstress tests on
2.6.19-rc1. The message looks like :
REISERFS: panic(device Null superblock):
reiserfs[5676]: assertion !(p->path_length != 1 ) failed at
fs/reiserfs/stree.c:397:reiserfs_check_path: path not properly relsed.
The backtrace looked :
kernel BUG in reiserfs_panic at fs/reiserfs/prints.c:361!
.reiserfs_check_path+0x58/0x74
.reiserfs_get_block+0x1444/0x1508
.__block_prepare_write+0x1c8/0x558
.block_prepare_write+0x34/0x64
.reiserfs_prepare_write+0x118/0x1d0
.generic_file_buffered_write+0x314/0x82c
.__generic_file_aio_write_nolock+0x350/0x3e0
.__generic_file_write_nolock+0x78/0xb0
.generic_file_write+0x60/0xf0
.reiserfs_file_write+0x198/0x2038
.vfs_write+0xd0/0x1b4
.sys_write+0x4c/0x8c
syscall_exit+0x0/0x4
Upon debugging I found that the restart_transaction was not releasing
the path if the th->refcount was > 1.
/*static*/
int restart_transaction(struct reiserfs_transaction_handle *th,
struct inode *inode, struct path *path)
{
[...]
/* we cannot restart while nested */
if (th->t_refcount > 1) { <<- Path is not released in this case!
return 0;
}
pathrelse(path); <<- Path released here.
[...]
This could happen in such a situation :
In reiserfs/inode.c: reiserfs_get_block() ::
if (repeat == NO_DISK_SPACE || repeat == QUOTA_EXCEEDED) {
/* restart the transaction to give the journal a chance to free
** some blocks. releases the path, so we have to go back to
** research if we succeed on the second try
*/
SB_JOURNAL(inode->i_sb)->j_next_async_flush = 1;
-->> retval = restart_transaction(th, inode, &path); <<--
We are supposed to release the path, no matter we succeed or fail. But
if the th->refcount is > 1, the path is still valid. And,
if (retval)
goto failure;
repeat =
_allocate_block(th, block, inode,
&allocated_block_nr, NULL, create);
If the above allocate_block fails with NO_DISK_SPACE or QUOTA_EXCEEDED,
we would have path which is not released.
if (repeat != NO_DISK_SPACE && repeat != QUOTA_EXCEEDED) {
goto research;
}
if (repeat == QUOTA_EXCEEDED)
retval = -EDQUOT;
else
retval = -ENOSPC;
goto failure;
[...]
failure:
[...]
reiserfs_check_path(&path); << Panics here !
Attached here is a patch which could fix the issue.
fix reiserfs/inode.c : restart_transaction() to release the path in all
cases.
The restart_transaction() doesn't release the path when the the journal
handle has a refcount > 1. This would trigger a reiserfs_panic() if we
encounter an -ENOSPC / -EDQUOT in reiserfs_get_block().
Signed-off-by: Suzuki K P <suzuki@in.ibm.com>
Cc: "Vladimir V. Saveliev" <vs@namesys.com>
Cc: <reiserfs-dev@namesys.com>
Cc: Jeff Mahoney <jeffm@suse.com>
Acked-by: Jan Kara <jack@suse.cz>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2006-12-07 07:36:10 +03:00
pathrelse ( path ) ;
2005-07-13 07:21:28 +04:00
/* we cannot restart while nested */
if ( th - > t_refcount > 1 ) {
return 0 ;
}
reiserfs_update_sd ( th , inode ) ;
err = journal_end ( th , s , len ) ;
if ( ! err ) {
err = journal_begin ( th , s , JOURNAL_PER_BALANCE_CNT * 6 ) ;
if ( ! err )
reiserfs_update_inode_transaction ( inode ) ;
}
return err ;
2005-04-17 02:20:36 +04:00
}
// it is called by get_block when create == 0. Returns block number
// for 'block'-th logical block of file. When it hits direct item it
// returns 0 (being called from bmap) or read direct item into piece
// of page (bh_result)
// Please improve the english/clarity in the comment above, as it is
// hard to understand.
2007-10-19 10:39:25 +04:00
static int _get_block_create_0 ( struct inode * inode , sector_t block ,
2005-07-13 07:21:28 +04:00
struct buffer_head * bh_result , int args )
2005-04-17 02:20:36 +04:00
{
2005-07-13 07:21:28 +04:00
INITIALIZE_PATH ( path ) ;
struct cpu_key key ;
struct buffer_head * bh ;
struct item_head * ih , tmp_ih ;
int fs_gen ;
2007-10-19 10:39:25 +04:00
b_blocknr_t blocknr ;
2005-07-13 07:21:28 +04:00
char * p = NULL ;
int chars ;
int ret ;
int result ;
int done = 0 ;
unsigned long offset ;
// prepare the key to look for the 'block'-th block of file
make_cpu_key ( & key , inode ,
( loff_t ) block * inode - > i_sb - > s_blocksize + 1 , TYPE_ANY ,
3 ) ;
research :
result = search_for_position_by_key ( inode - > i_sb , & key , & path ) ;
if ( result ! = POSITION_FOUND ) {
pathrelse ( & path ) ;
if ( p )
kunmap ( bh_result - > b_page ) ;
if ( result = = IO_ERROR )
return - EIO ;
// We do not return -ENOENT if there is a hole but page is uptodate, because it means
// That there is some MMAPED data associated with it that is yet to be written to disk.
if ( ( args & GET_BLOCK_NO_HOLE )
& & ! PageUptodate ( bh_result - > b_page ) ) {
return - ENOENT ;
}
return 0 ;
}
//
bh = get_last_bh ( & path ) ;
ih = get_ih ( & path ) ;
if ( is_indirect_le_ih ( ih ) ) {
__le32 * ind_item = ( __le32 * ) B_I_PITEM ( bh , ih ) ;
/* FIXME: here we could cache indirect item or part of it in
the inode to avoid search_by_key in case of subsequent
access to file */
blocknr = get_block_num ( ind_item , path . pos_in_item ) ;
ret = 0 ;
if ( blocknr ) {
map_bh ( bh_result , inode - > i_sb , blocknr ) ;
if ( path . pos_in_item = =
( ( ih_item_len ( ih ) / UNFM_P_SIZE ) - 1 ) ) {
set_buffer_boundary ( bh_result ) ;
}
} else
// We do not return -ENOENT if there is a hole but page is uptodate, because it means
// That there is some MMAPED data associated with it that is yet to be written to disk.
if ( ( args & GET_BLOCK_NO_HOLE )
& & ! PageUptodate ( bh_result - > b_page ) ) {
ret = - ENOENT ;
}
pathrelse ( & path ) ;
if ( p )
kunmap ( bh_result - > b_page ) ;
return ret ;
}
// requested data are in direct item(s)
if ( ! ( args & GET_BLOCK_READ_DIRECT ) ) {
// we are called by bmap. FIXME: we can not map block of file
// when it is stored in direct item(s)
pathrelse ( & path ) ;
if ( p )
kunmap ( bh_result - > b_page ) ;
return - ENOENT ;
}
/* if we've got a direct item, and the buffer or page was uptodate,
* * we don ' t want to pull data off disk again . skip to the
* * end , where we map the buffer and return
*/
if ( buffer_uptodate ( bh_result ) ) {
goto finished ;
} else
/*
* * grab_tail_page can trigger calls to reiserfs_get_block on up to date
* * pages without any buffers . If the page is up to date , we don ' t want
* * read old data off disk . Set the up to date bit on the buffer instead
* * and jump to the end
*/
if ( ! bh_result - > b_page | | PageUptodate ( bh_result - > b_page ) ) {
2005-04-17 02:20:36 +04:00
set_buffer_uptodate ( bh_result ) ;
2005-07-13 07:21:28 +04:00
goto finished ;
}
// read file tail into part of page
offset = ( cpu_key_k_offset ( & key ) - 1 ) & ( PAGE_CACHE_SIZE - 1 ) ;
fs_gen = get_generation ( inode - > i_sb ) ;
copy_item_head ( & tmp_ih , ih ) ;
/* we only want to kmap if we are reading the tail into the page.
* * this is not the common case , so we don ' t kmap until we are
* * sure we need to . But , this means the item might move if
* * kmap schedules
*/
if ( ! p ) {
p = ( char * ) kmap ( bh_result - > b_page ) ;
if ( fs_changed ( fs_gen , inode - > i_sb )
& & item_moved ( & tmp_ih , & path ) ) {
goto research ;
}
}
p + = offset ;
memset ( p , 0 , inode - > i_sb - > s_blocksize ) ;
do {
if ( ! is_direct_le_ih ( ih ) ) {
BUG ( ) ;
}
/* make sure we don't read more bytes than actually exist in
* * the file . This can happen in odd cases where i_size isn ' t
2009-03-30 22:02:44 +04:00
* * correct , and when direct item padding results in a few
2005-07-13 07:21:28 +04:00
* * extra bytes at the end of the direct item
*/
if ( ( le_ih_k_offset ( ih ) + path . pos_in_item ) > inode - > i_size )
break ;
if ( ( le_ih_k_offset ( ih ) - 1 + ih_item_len ( ih ) ) > inode - > i_size ) {
chars =
inode - > i_size - ( le_ih_k_offset ( ih ) - 1 ) -
path . pos_in_item ;
done = 1 ;
} else {
chars = ih_item_len ( ih ) - path . pos_in_item ;
}
memcpy ( p , B_I_PITEM ( bh , ih ) + path . pos_in_item , chars ) ;
if ( done )
break ;
p + = chars ;
if ( PATH_LAST_POSITION ( & path ) ! = ( B_NR_ITEMS ( bh ) - 1 ) )
// we done, if read direct item is not the last item of
// node FIXME: we could try to check right delimiting key
// to see whether direct item continues in the right
// neighbor or rely on i_size
break ;
// update key to look for the next piece
set_cpu_key_k_offset ( & key , cpu_key_k_offset ( & key ) + chars ) ;
result = search_for_position_by_key ( inode - > i_sb , & key , & path ) ;
if ( result ! = POSITION_FOUND )
// i/o error most likely
break ;
bh = get_last_bh ( & path ) ;
ih = get_ih ( & path ) ;
} while ( 1 ) ;
flush_dcache_page ( bh_result - > b_page ) ;
kunmap ( bh_result - > b_page ) ;
finished :
pathrelse ( & path ) ;
if ( result = = IO_ERROR )
return - EIO ;
2005-04-17 02:20:36 +04:00
2005-07-13 07:21:28 +04:00
/* this buffer has valid data, but isn't valid for io. mapping it to
* block # 0 tells the rest of reiserfs it just has a tail in it
*/
map_bh ( bh_result , inode - > i_sb , 0 ) ;
set_buffer_uptodate ( bh_result ) ;
return 0 ;
}
2005-04-17 02:20:36 +04:00
// this is called to create file map. So, _get_block_create_0 will not
// read direct item
2005-07-13 07:21:28 +04:00
static int reiserfs_bmap ( struct inode * inode , sector_t block ,
struct buffer_head * bh_result , int create )
2005-04-17 02:20:36 +04:00
{
2005-07-13 07:21:28 +04:00
if ( ! file_capable ( inode , block ) )
return - EFBIG ;
reiserfs_write_lock ( inode - > i_sb ) ;
/* do not read the direct item */
_get_block_create_0 ( inode , block , bh_result , 0 ) ;
reiserfs_write_unlock ( inode - > i_sb ) ;
return 0 ;
2005-04-17 02:20:36 +04:00
}
/* special version of get_block that is only used by grab_tail_page right
* * now . It is sent to block_prepare_write , and when you try to get a
* * block past the end of the file ( or a block from a hole ) it returns
* * - ENOENT instead of a valid buffer . block_prepare_write expects to
* * be able to do i / o on the buffers returned , unless an error value
* * is also returned .
2009-03-30 22:02:44 +04:00
* *
2005-04-17 02:20:36 +04:00
* * So , this allows block_prepare_write to be used for reading a single block
* * in a page . Where it does not produce a valid page for holes , or past the
* * end of the file . This turns out to be exactly what we need for reading
* * tails for conversion .
* *
* * The point of the wrapper is forcing a certain value for create , even
2009-03-30 22:02:44 +04:00
* * though the VFS layer is calling this function with create = = 1. If you
* * don ' t want to send create = = GET_BLOCK_NO_HOLE to reiserfs_get_block ,
2005-04-17 02:20:36 +04:00
* * don ' t use this function .
*/
2005-07-13 07:21:28 +04:00
static int reiserfs_get_block_create_0 ( struct inode * inode , sector_t block ,
struct buffer_head * bh_result ,
int create )
{
return reiserfs_get_block ( inode , block , bh_result , GET_BLOCK_NO_HOLE ) ;
2005-04-17 02:20:36 +04:00
}
/* This is special helper for reiserfs_get_block in case we are executing
direct_IO request . */
static int reiserfs_get_blocks_direct_io ( struct inode * inode ,
sector_t iblock ,
struct buffer_head * bh_result ,
int create )
{
2005-07-13 07:21:28 +04:00
int ret ;
bh_result - > b_page = NULL ;
2005-04-17 02:20:36 +04:00
2005-07-13 07:21:28 +04:00
/* We set the b_size before reiserfs_get_block call since it is
referenced in convert_tail_for_hole ( ) that may be called from
reiserfs_get_block ( ) */
bh_result - > b_size = ( 1 < < inode - > i_blkbits ) ;
ret = reiserfs_get_block ( inode , iblock , bh_result ,
create | GET_BLOCK_NO_DANGLE ) ;
if ( ret )
goto out ;
/* don't allow direct io onto tail pages */
if ( buffer_mapped ( bh_result ) & & bh_result - > b_blocknr = = 0 ) {
/* make sure future calls to the direct io funcs for this offset
* * in the file fail by unmapping the buffer
*/
clear_buffer_mapped ( bh_result ) ;
ret = - EINVAL ;
}
/* Possible unpacked tail. Flush the data before pages have
disappeared */
if ( REISERFS_I ( inode ) - > i_flags & i_pack_on_close_mask ) {
int err ;
lock_kernel ( ) ;
err = reiserfs_commit_for_inode ( inode ) ;
REISERFS_I ( inode ) - > i_flags & = ~ i_pack_on_close_mask ;
unlock_kernel ( ) ;
if ( err < 0 )
ret = err ;
}
out :
return ret ;
}
2005-04-17 02:20:36 +04:00
/*
* * helper function for when reiserfs_get_block is called for a hole
* * but the file tail is still in a direct item
* * bh_result is the buffer head for the hole
* * tail_offset is the offset of the start of the tail in the file
* *
* * This calls prepare_write , which will start a new transaction
* * you should not be in a transaction , or have any paths held when you
* * call this .
*/
2005-07-13 07:21:28 +04:00
static int convert_tail_for_hole ( struct inode * inode ,
struct buffer_head * bh_result ,
loff_t tail_offset )
{
unsigned long index ;
unsigned long tail_end ;
unsigned long tail_start ;
struct page * tail_page ;
struct page * hole_page = bh_result - > b_page ;
int retval = 0 ;
if ( ( tail_offset & ( bh_result - > b_size - 1 ) ) ! = 1 )
return - EIO ;
/* always try to read until the end of the block */
tail_start = tail_offset & ( PAGE_CACHE_SIZE - 1 ) ;
tail_end = ( tail_start | ( bh_result - > b_size - 1 ) ) + 1 ;
index = tail_offset > > PAGE_CACHE_SHIFT ;
/* hole_page can be zero in case of direct_io, we are sure
that we cannot get here if we write with O_DIRECT into
tail page */
if ( ! hole_page | | index ! = hole_page - > index ) {
tail_page = grab_cache_page ( inode - > i_mapping , index ) ;
retval = - ENOMEM ;
if ( ! tail_page ) {
goto out ;
}
} else {
tail_page = hole_page ;
}
/* we don't have to make sure the conversion did not happen while
* * we were locking the page because anyone that could convert
2006-01-10 02:59:24 +03:00
* * must first take i_mutex .
2005-07-13 07:21:28 +04:00
* *
* * We must fix the tail page for writing because it might have buffers
* * that are mapped , but have a block number of 0. This indicates tail
* * data that has been read directly into the page , and block_prepare_write
* * won ' t trigger a get_block in this case .
*/
fix_tail_page_for_writing ( tail_page ) ;
retval = reiserfs_prepare_write ( NULL , tail_page , tail_start , tail_end ) ;
if ( retval )
goto unlock ;
/* tail conversion might change the data in the page */
flush_dcache_page ( tail_page ) ;
retval = reiserfs_commit_write ( NULL , tail_page , tail_start , tail_end ) ;
unlock :
if ( tail_page ! = hole_page ) {
unlock_page ( tail_page ) ;
page_cache_release ( tail_page ) ;
}
out :
return retval ;
2005-04-17 02:20:36 +04:00
}
static inline int _allocate_block ( struct reiserfs_transaction_handle * th ,
2007-10-19 10:39:25 +04:00
sector_t block ,
2005-07-13 07:21:28 +04:00
struct inode * inode ,
b_blocknr_t * allocated_block_nr ,
2006-12-08 13:36:32 +03:00
struct treepath * path , int flags )
2005-07-13 07:21:28 +04:00
{
BUG_ON ( ! th - > t_trans_id ) ;
2005-04-17 02:20:36 +04:00
# ifdef REISERFS_PREALLOCATE
2006-01-10 02:59:24 +03:00
if ( ! ( flags & GET_BLOCK_NO_IMUX ) ) {
2005-07-13 07:21:28 +04:00
return reiserfs_new_unf_blocknrs2 ( th , inode , allocated_block_nr ,
path , block ) ;
}
2005-04-17 02:20:36 +04:00
# endif
2005-07-13 07:21:28 +04:00
return reiserfs_new_unf_blocknrs ( th , inode , allocated_block_nr , path ,
block ) ;
2005-04-17 02:20:36 +04:00
}
2005-07-13 07:21:28 +04:00
int reiserfs_get_block ( struct inode * inode , sector_t block ,
struct buffer_head * bh_result , int create )
2005-04-17 02:20:36 +04:00
{
2005-07-13 07:21:28 +04:00
int repeat , retval = 0 ;
b_blocknr_t allocated_block_nr = 0 ; // b_blocknr_t is (unsigned) 32 bit int
INITIALIZE_PATH ( path ) ;
int pos_in_item ;
struct cpu_key key ;
struct buffer_head * bh , * unbh = NULL ;
struct item_head * ih , tmp_ih ;
__le32 * item ;
int done ;
int fs_gen ;
struct reiserfs_transaction_handle * th = NULL ;
2009-03-30 22:02:44 +04:00
/* space reserved in transaction batch:
2005-07-13 07:21:28 +04:00
. 3 balancings in direct - > indirect conversion
. 1 block involved into reiserfs_update_sd ( )
XXX in practically impossible worst case direct2indirect ( )
can incur ( much ) more than 3 balancings .
quota update for user , group */
int jbegin_count =
JOURNAL_PER_BALANCE_CNT * 3 + 1 +
2 * REISERFS_QUOTA_TRANS_BLOCKS ( inode - > i_sb ) ;
int version ;
int dangle = 1 ;
loff_t new_offset =
( ( ( loff_t ) block ) < < inode - > i_sb - > s_blocksize_bits ) + 1 ;
/* bad.... */
reiserfs_write_lock ( inode - > i_sb ) ;
version = get_inode_item_key_version ( inode ) ;
2005-04-17 02:20:36 +04:00
2005-07-13 07:21:28 +04:00
if ( ! file_capable ( inode , block ) ) {
reiserfs_write_unlock ( inode - > i_sb ) ;
return - EFBIG ;
}
/* if !create, we aren't changing the FS, so we don't need to
* * log anything , so we don ' t need to start a transaction
*/
if ( ! ( create & GET_BLOCK_CREATE ) ) {
int ret ;
/* find number of block-th logical block of the file */
ret = _get_block_create_0 ( inode , block , bh_result ,
create | GET_BLOCK_READ_DIRECT ) ;
reiserfs_write_unlock ( inode - > i_sb ) ;
return ret ;
}
/*
* if we ' re already in a transaction , make sure to close
* any new transactions we start in this func
*/
if ( ( create & GET_BLOCK_NO_DANGLE ) | |
reiserfs_transaction_running ( inode - > i_sb ) )
dangle = 0 ;
/* If file is of such a size, that it might have a tail and tails are enabled
* * we should mark it as possibly needing tail packing on close
*/
if ( ( have_large_tails ( inode - > i_sb )
& & inode - > i_size < i_block_size ( inode ) * 4 )
| | ( have_small_tails ( inode - > i_sb )
& & inode - > i_size < i_block_size ( inode ) ) )
REISERFS_I ( inode ) - > i_flags | = i_pack_on_close_mask ;
/* set the key of the first byte in the 'block'-th block of file */
make_cpu_key ( & key , inode , new_offset , TYPE_ANY , 3 /*key length */ ) ;
if ( ( new_offset + inode - > i_sb - > s_blocksize - 1 ) > inode - > i_size ) {
start_trans :
th = reiserfs_persistent_transaction ( inode - > i_sb , jbegin_count ) ;
if ( ! th ) {
retval = - ENOMEM ;
2005-04-17 02:20:36 +04:00
goto failure ;
}
2005-07-13 07:21:28 +04:00
reiserfs_update_inode_transaction ( inode ) ;
}
research :
2005-04-17 02:20:36 +04:00
2005-07-13 07:21:28 +04:00
retval = search_for_position_by_key ( inode - > i_sb , & key , & path ) ;
2005-04-17 02:20:36 +04:00
if ( retval = = IO_ERROR ) {
2005-07-13 07:21:28 +04:00
retval = - EIO ;
goto failure ;
}
bh = get_last_bh ( & path ) ;
ih = get_ih ( & path ) ;
item = get_item ( & path ) ;
2005-04-17 02:20:36 +04:00
pos_in_item = path . pos_in_item ;
2005-07-13 07:21:28 +04:00
fs_gen = get_generation ( inode - > i_sb ) ;
copy_item_head ( & tmp_ih , ih ) ;
if ( allocation_needed
( retval , allocated_block_nr , ih , item , pos_in_item ) ) {
/* we have to allocate block for the unformatted node */
if ( ! th ) {
pathrelse ( & path ) ;
goto start_trans ;
}
repeat =
_allocate_block ( th , block , inode , & allocated_block_nr ,
& path , create ) ;
if ( repeat = = NO_DISK_SPACE | | repeat = = QUOTA_EXCEEDED ) {
/* restart the transaction to give the journal a chance to free
* * some blocks . releases the path , so we have to go back to
* * research if we succeed on the second try
*/
SB_JOURNAL ( inode - > i_sb ) - > j_next_async_flush = 1 ;
retval = restart_transaction ( th , inode , & path ) ;
if ( retval )
goto failure ;
repeat =
_allocate_block ( th , block , inode ,
& allocated_block_nr , NULL , create ) ;
if ( repeat ! = NO_DISK_SPACE & & repeat ! = QUOTA_EXCEEDED ) {
goto research ;
}
if ( repeat = = QUOTA_EXCEEDED )
retval = - EDQUOT ;
else
retval = - ENOSPC ;
goto failure ;
}
if ( fs_changed ( fs_gen , inode - > i_sb )
& & item_moved ( & tmp_ih , & path ) ) {
goto research ;
}
}
if ( indirect_item_found ( retval , ih ) ) {
b_blocknr_t unfm_ptr ;
/* 'block'-th block is in the file already (there is
corresponding cell in some indirect item ) . But it may be
zero unformatted node pointer ( hole ) */
unfm_ptr = get_block_num ( item , pos_in_item ) ;
if ( unfm_ptr = = 0 ) {
/* use allocated block to plug the hole */
reiserfs_prepare_for_journal ( inode - > i_sb , bh , 1 ) ;
if ( fs_changed ( fs_gen , inode - > i_sb )
& & item_moved ( & tmp_ih , & path ) ) {
reiserfs_restore_prepared_buffer ( inode - > i_sb ,
bh ) ;
goto research ;
}
set_buffer_new ( bh_result ) ;
if ( buffer_dirty ( bh_result )
& & reiserfs_data_ordered ( inode - > i_sb ) )
reiserfs_add_ordered_list ( inode , bh_result ) ;
put_block_num ( item , pos_in_item , allocated_block_nr ) ;
unfm_ptr = allocated_block_nr ;
journal_mark_dirty ( th , inode - > i_sb , bh ) ;
reiserfs_update_sd ( th , inode ) ;
}
set_block_dev_mapped ( bh_result , unfm_ptr , inode ) ;
pathrelse ( & path ) ;
retval = 0 ;
if ( ! dangle & & th )
retval = reiserfs_end_persistent_transaction ( th ) ;
reiserfs_write_unlock ( inode - > i_sb ) ;
/* the item was found, so new blocks were not added to the file
2009-03-30 22:02:44 +04:00
* * there is no need to make sure the inode is updated with this
2005-07-13 07:21:28 +04:00
* * transaction
*/
return retval ;
}
if ( ! th ) {
pathrelse ( & path ) ;
goto start_trans ;
}
/* desired position is not found or is in the direct item. We have
to append file with holes up to ' block ' - th block converting
direct items to indirect one if necessary */
done = 0 ;
do {
if ( is_statdata_le_ih ( ih ) ) {
__le32 unp = 0 ;
struct cpu_key tmp_key ;
/* indirect item has to be inserted */
make_le_item_head ( & tmp_ih , & key , version , 1 ,
TYPE_INDIRECT , UNFM_P_SIZE ,
0 /* free_space */ ) ;
if ( cpu_key_k_offset ( & key ) = = 1 ) {
/* we are going to add 'block'-th block to the file. Use
allocated block for that */
unp = cpu_to_le32 ( allocated_block_nr ) ;
set_block_dev_mapped ( bh_result ,
allocated_block_nr , inode ) ;
set_buffer_new ( bh_result ) ;
done = 1 ;
}
tmp_key = key ; // ;)
set_cpu_key_k_offset ( & tmp_key , 1 ) ;
PATH_LAST_POSITION ( & path ) + + ;
retval =
reiserfs_insert_item ( th , & path , & tmp_key , & tmp_ih ,
inode , ( char * ) & unp ) ;
if ( retval ) {
reiserfs_free_block ( th , inode ,
allocated_block_nr , 1 ) ;
goto failure ; // retval == -ENOSPC, -EDQUOT or -EIO or -EEXIST
}
//mark_tail_converted (inode);
} else if ( is_direct_le_ih ( ih ) ) {
/* direct item has to be converted */
loff_t tail_offset ;
tail_offset =
( ( le_ih_k_offset ( ih ) -
1 ) & ~ ( inode - > i_sb - > s_blocksize - 1 ) ) + 1 ;
if ( tail_offset = = cpu_key_k_offset ( & key ) ) {
/* direct item we just found fits into block we have
to map . Convert it into unformatted node : use
bh_result for the conversion */
set_block_dev_mapped ( bh_result ,
allocated_block_nr , inode ) ;
unbh = bh_result ;
done = 1 ;
} else {
/* we have to padd file tail stored in direct item(s)
up to block size and convert it to unformatted
node . FIXME : this should also get into page cache */
pathrelse ( & path ) ;
/*
* ugly , but we can only end the transaction if
* we aren ' t nested
*/
BUG_ON ( ! th - > t_refcount ) ;
if ( th - > t_refcount = = 1 ) {
retval =
reiserfs_end_persistent_transaction
( th ) ;
th = NULL ;
if ( retval )
goto failure ;
}
retval =
convert_tail_for_hole ( inode , bh_result ,
tail_offset ) ;
if ( retval ) {
if ( retval ! = - ENOSPC )
2009-03-30 22:02:28 +04:00
reiserfs_error ( inode - > i_sb ,
" clm-6004 " ,
" convert tail failed "
" inode %lu, error %d " ,
inode - > i_ino ,
retval ) ;
2005-07-13 07:21:28 +04:00
if ( allocated_block_nr ) {
/* the bitmap, the super, and the stat data == 3 */
if ( ! th )
th = reiserfs_persistent_transaction ( inode - > i_sb , 3 ) ;
if ( th )
reiserfs_free_block ( th ,
inode ,
allocated_block_nr ,
1 ) ;
}
goto failure ;
}
goto research ;
}
retval =
direct2indirect ( th , inode , & path , unbh ,
tail_offset ) ;
if ( retval ) {
reiserfs_unmap_buffer ( unbh ) ;
reiserfs_free_block ( th , inode ,
allocated_block_nr , 1 ) ;
goto failure ;
}
/* it is important the set_buffer_uptodate is done after
* * the direct2indirect . The buffer might contain valid
* * data newer than the data on disk ( read by readpage , changed ,
* * and then sent here by writepage ) . direct2indirect needs
* * to know if unbh was already up to date , so it can decide
* * if the data in unbh needs to be replaced with data from
* * the disk
*/
set_buffer_uptodate ( unbh ) ;
/* unbh->b_page == NULL in case of DIRECT_IO request, this means
buffer will disappear shortly , so it should not be added to
*/
if ( unbh - > b_page ) {
/* we've converted the tail, so we must
* * flush unbh before the transaction commits
*/
reiserfs_add_tail_list ( inode , unbh ) ;
/* mark it dirty now to prevent commit_write from adding
* * this buffer to the inode ' s dirty buffer list
*/
/*
* AKPM : changed __mark_buffer_dirty to mark_buffer_dirty ( ) .
* It ' s still atomic , but it sets the page dirty too ,
* which makes it eligible for writeback at any time by the
* VM ( which was also the case with __mark_buffer_dirty ( ) )
*/
mark_buffer_dirty ( unbh ) ;
}
} else {
/* append indirect item with holes if needed, when appending
pointer to ' block ' - th block use block , which is already
allocated */
struct cpu_key tmp_key ;
unp_t unf_single = 0 ; // We use this in case we need to allocate only
// one block which is a fastpath
unp_t * un ;
__u64 max_to_insert =
MAX_ITEM_LEN ( inode - > i_sb - > s_blocksize ) /
UNFM_P_SIZE ;
__u64 blocks_needed ;
RFALSE ( pos_in_item ! = ih_item_len ( ih ) / UNFM_P_SIZE ,
" vs-804: invalid position for append " ) ;
/* indirect item has to be appended, set up key of that position */
make_cpu_key ( & tmp_key , inode ,
le_key_k_offset ( version ,
& ( ih - > ih_key ) ) +
op_bytes_number ( ih ,
inode - > i_sb - > s_blocksize ) ,
//pos_in_item * inode->i_sb->s_blocksize,
TYPE_INDIRECT , 3 ) ; // key type is unimportant
2006-03-02 13:54:39 +03:00
RFALSE ( cpu_key_k_offset ( & tmp_key ) > cpu_key_k_offset ( & key ) ,
" green-805: invalid offset " ) ;
2005-07-13 07:21:28 +04:00
blocks_needed =
1 +
( ( cpu_key_k_offset ( & key ) -
cpu_key_k_offset ( & tmp_key ) ) > > inode - > i_sb - >
s_blocksize_bits ) ;
if ( blocks_needed = = 1 ) {
un = & unf_single ;
} else {
2006-12-07 07:39:01 +03:00
un = kzalloc ( min ( blocks_needed , max_to_insert ) * UNFM_P_SIZE , GFP_ATOMIC ) ; // We need to avoid scheduling.
2005-07-13 07:21:28 +04:00
if ( ! un ) {
un = & unf_single ;
blocks_needed = 1 ;
max_to_insert = 0 ;
2006-12-07 07:39:01 +03:00
}
2005-07-13 07:21:28 +04:00
}
if ( blocks_needed < = max_to_insert ) {
/* we are going to add target block to the file. Use allocated
block for that */
un [ blocks_needed - 1 ] =
cpu_to_le32 ( allocated_block_nr ) ;
set_block_dev_mapped ( bh_result ,
allocated_block_nr , inode ) ;
set_buffer_new ( bh_result ) ;
done = 1 ;
} else {
/* paste hole to the indirect item */
/* If kmalloc failed, max_to_insert becomes zero and it means we
only have space for one block */
blocks_needed =
max_to_insert ? max_to_insert : 1 ;
}
retval =
reiserfs_paste_into_item ( th , & path , & tmp_key , inode ,
( char * ) un ,
UNFM_P_SIZE *
blocks_needed ) ;
if ( blocks_needed ! = 1 )
kfree ( un ) ;
if ( retval ) {
reiserfs_free_block ( th , inode ,
allocated_block_nr , 1 ) ;
goto failure ;
}
if ( ! done ) {
/* We need to mark new file size in case this function will be
interrupted / aborted later on . And we may do this only for
holes . */
inode - > i_size + =
inode - > i_sb - > s_blocksize * blocks_needed ;
}
}
2005-04-17 02:20:36 +04:00
2005-07-13 07:21:28 +04:00
if ( done = = 1 )
break ;
2005-04-17 02:20:36 +04:00
2005-07-13 07:21:28 +04:00
/* this loop could log more blocks than we had originally asked
* * for . So , we have to allow the transaction to end if it is
2009-03-30 22:02:44 +04:00
* * too big or too full . Update the inode so things are
2005-07-13 07:21:28 +04:00
* * consistent if we crash before the function returns
* *
* * release the path so that anybody waiting on the path before
* * ending their transaction will be able to continue .
*/
if ( journal_transaction_should_end ( th , th - > t_blocks_allocated ) ) {
retval = restart_transaction ( th , inode , & path ) ;
if ( retval )
goto failure ;
}
2009-03-30 22:02:44 +04:00
/* inserting indirect pointers for a hole can take a
2005-07-13 07:21:28 +04:00
* * long time . reschedule if needed
*/
cond_resched ( ) ;
2005-04-17 02:20:36 +04:00
2005-07-13 07:21:28 +04:00
retval = search_for_position_by_key ( inode - > i_sb , & key , & path ) ;
if ( retval = = IO_ERROR ) {
retval = - EIO ;
goto failure ;
}
if ( retval = = POSITION_FOUND ) {
2009-03-30 22:02:21 +04:00
reiserfs_warning ( inode - > i_sb , " vs-825 " ,
2005-07-13 07:21:28 +04:00
" %K should not be found " , & key ) ;
retval = - EEXIST ;
if ( allocated_block_nr )
reiserfs_free_block ( th , inode ,
allocated_block_nr , 1 ) ;
pathrelse ( & path ) ;
goto failure ;
}
bh = get_last_bh ( & path ) ;
ih = get_ih ( & path ) ;
item = get_item ( & path ) ;
pos_in_item = path . pos_in_item ;
} while ( 1 ) ;
retval = 0 ;
failure :
if ( th & & ( ! dangle | | ( retval & & ! th - > t_trans_id ) ) ) {
int err ;
if ( th - > t_trans_id )
reiserfs_update_sd ( th , inode ) ;
err = reiserfs_end_persistent_transaction ( th ) ;
if ( err )
retval = err ;
}
reiserfs_write_unlock ( inode - > i_sb ) ;
reiserfs_check_path ( & path ) ;
return retval ;
2005-04-17 02:20:36 +04:00
}
static int
reiserfs_readpages ( struct file * file , struct address_space * mapping ,
2005-07-13 07:21:28 +04:00
struct list_head * pages , unsigned nr_pages )
2005-04-17 02:20:36 +04:00
{
2005-07-13 07:21:28 +04:00
return mpage_readpages ( mapping , pages , nr_pages , reiserfs_get_block ) ;
2005-04-17 02:20:36 +04:00
}
/* Compute real number of used bytes by file
* Following three functions can go away when we ' ll have enough space in stat item
*/
static int real_space_diff ( struct inode * inode , int sd_size )
{
2005-07-13 07:21:28 +04:00
int bytes ;
loff_t blocksize = inode - > i_sb - > s_blocksize ;
if ( S_ISLNK ( inode - > i_mode ) | | S_ISDIR ( inode - > i_mode ) )
return sd_size ;
/* End of file is also in full block with indirect reference, so round
* * up to the next block .
* *
* * there is just no way to know if the tail is actually packed
* * on the file , so we have to assume it isn ' t . When we pack the
* * tail , we add 4 bytes to pretend there really is an unformatted
* * node pointer
*/
bytes =
( ( inode - > i_size +
( blocksize - 1 ) ) > > inode - > i_sb - > s_blocksize_bits ) * UNFM_P_SIZE +
sd_size ;
return bytes ;
2005-04-17 02:20:36 +04:00
}
static inline loff_t to_real_used_space ( struct inode * inode , ulong blocks ,
2005-07-13 07:21:28 +04:00
int sd_size )
2005-04-17 02:20:36 +04:00
{
2005-07-13 07:21:28 +04:00
if ( S_ISLNK ( inode - > i_mode ) | | S_ISDIR ( inode - > i_mode ) ) {
return inode - > i_size +
( loff_t ) ( real_space_diff ( inode , sd_size ) ) ;
}
return ( ( loff_t ) real_space_diff ( inode , sd_size ) ) +
( ( ( loff_t ) blocks ) < < 9 ) ;
2005-04-17 02:20:36 +04:00
}
/* Compute number of blocks used by file in ReiserFS counting */
static inline ulong to_fake_used_blocks ( struct inode * inode , int sd_size )
{
2005-07-13 07:21:28 +04:00
loff_t bytes = inode_get_bytes ( inode ) ;
loff_t real_space = real_space_diff ( inode , sd_size ) ;
/* keeps fsck and non-quota versions of reiserfs happy */
if ( S_ISLNK ( inode - > i_mode ) | | S_ISDIR ( inode - > i_mode ) ) {
bytes + = ( loff_t ) 511 ;
}
/* files from before the quota patch might i_blocks such that
* * bytes < real_space . Deal with that here to prevent it from
* * going negative .
*/
if ( bytes < real_space )
return 0 ;
return ( bytes - real_space ) > > 9 ;
2005-04-17 02:20:36 +04:00
}
//
// BAD: new directories have stat data of new type and all other items
// of old type. Version stored in the inode says about body items, so
// in update_stat_data we can not rely on inode, but have to check
// item version directly
//
// called by read_locked_inode
2006-12-08 13:36:32 +03:00
static void init_inode ( struct inode * inode , struct treepath * path )
2005-04-17 02:20:36 +04:00
{
2005-07-13 07:21:28 +04:00
struct buffer_head * bh ;
struct item_head * ih ;
__u32 rdev ;
//int version = ITEM_VERSION_1;
bh = PATH_PLAST_BUFFER ( path ) ;
ih = PATH_PITEM_HEAD ( path ) ;
copy_key ( INODE_PKEY ( inode ) , & ( ih - > ih_key ) ) ;
INIT_LIST_HEAD ( & ( REISERFS_I ( inode ) - > i_prealloc_list ) ) ;
REISERFS_I ( inode ) - > i_flags = 0 ;
REISERFS_I ( inode ) - > i_prealloc_block = 0 ;
REISERFS_I ( inode ) - > i_prealloc_count = 0 ;
REISERFS_I ( inode ) - > i_trans_id = 0 ;
REISERFS_I ( inode ) - > i_jl = NULL ;
2007-01-23 07:40:46 +03:00
mutex_init ( & ( REISERFS_I ( inode ) - > i_mmap ) ) ;
2006-09-29 13:00:00 +04:00
reiserfs_init_acl_access ( inode ) ;
reiserfs_init_acl_default ( inode ) ;
2006-09-29 12:59:58 +04:00
reiserfs_init_xattr_rwsem ( inode ) ;
2005-07-13 07:21:28 +04:00
if ( stat_data_v1 ( ih ) ) {
struct stat_data_v1 * sd =
( struct stat_data_v1 * ) B_I_PITEM ( bh , ih ) ;
unsigned long blocks ;
set_inode_item_key_version ( inode , KEY_FORMAT_3_5 ) ;
set_inode_sd_version ( inode , STAT_DATA_V1 ) ;
inode - > i_mode = sd_v1_mode ( sd ) ;
inode - > i_nlink = sd_v1_nlink ( sd ) ;
inode - > i_uid = sd_v1_uid ( sd ) ;
inode - > i_gid = sd_v1_gid ( sd ) ;
inode - > i_size = sd_v1_size ( sd ) ;
inode - > i_atime . tv_sec = sd_v1_atime ( sd ) ;
inode - > i_mtime . tv_sec = sd_v1_mtime ( sd ) ;
inode - > i_ctime . tv_sec = sd_v1_ctime ( sd ) ;
inode - > i_atime . tv_nsec = 0 ;
inode - > i_ctime . tv_nsec = 0 ;
inode - > i_mtime . tv_nsec = 0 ;
inode - > i_blocks = sd_v1_blocks ( sd ) ;
inode - > i_generation = le32_to_cpu ( INODE_PKEY ( inode ) - > k_dir_id ) ;
blocks = ( inode - > i_size + 511 ) > > 9 ;
blocks = _ROUND_UP ( blocks , inode - > i_sb - > s_blocksize > > 9 ) ;
if ( inode - > i_blocks > blocks ) {
// there was a bug in <=3.5.23 when i_blocks could take negative
// values. Starting from 3.5.17 this value could even be stored in
// stat data. For such files we set i_blocks based on file
// size. Just 2 notes: this can be wrong for sparce files. On-disk value will be
// only updated if file's inode will ever change
inode - > i_blocks = blocks ;
}
2005-04-17 02:20:36 +04:00
2005-07-13 07:21:28 +04:00
rdev = sd_v1_rdev ( sd ) ;
REISERFS_I ( inode ) - > i_first_direct_byte =
sd_v1_first_direct_byte ( sd ) ;
/* an early bug in the quota code can give us an odd number for the
* * block count . This is incorrect , fix it here .
*/
if ( inode - > i_blocks & 1 ) {
inode - > i_blocks + + ;
}
inode_set_bytes ( inode ,
to_real_used_space ( inode , inode - > i_blocks ,
SD_V1_SIZE ) ) ;
/* nopack is initially zero for v1 objects. For v2 objects,
nopack is initialised from sd_attrs */
REISERFS_I ( inode ) - > i_flags & = ~ i_nopack_mask ;
} else {
// new stat data found, but object may have old items
// (directories and symlinks)
struct stat_data * sd = ( struct stat_data * ) B_I_PITEM ( bh , ih ) ;
inode - > i_mode = sd_v2_mode ( sd ) ;
inode - > i_nlink = sd_v2_nlink ( sd ) ;
inode - > i_uid = sd_v2_uid ( sd ) ;
inode - > i_size = sd_v2_size ( sd ) ;
inode - > i_gid = sd_v2_gid ( sd ) ;
inode - > i_mtime . tv_sec = sd_v2_mtime ( sd ) ;
inode - > i_atime . tv_sec = sd_v2_atime ( sd ) ;
inode - > i_ctime . tv_sec = sd_v2_ctime ( sd ) ;
inode - > i_ctime . tv_nsec = 0 ;
inode - > i_mtime . tv_nsec = 0 ;
inode - > i_atime . tv_nsec = 0 ;
inode - > i_blocks = sd_v2_blocks ( sd ) ;
rdev = sd_v2_rdev ( sd ) ;
if ( S_ISCHR ( inode - > i_mode ) | | S_ISBLK ( inode - > i_mode ) )
inode - > i_generation =
le32_to_cpu ( INODE_PKEY ( inode ) - > k_dir_id ) ;
else
inode - > i_generation = sd_v2_generation ( sd ) ;
2005-04-17 02:20:36 +04:00
2005-07-13 07:21:28 +04:00
if ( S_ISDIR ( inode - > i_mode ) | | S_ISLNK ( inode - > i_mode ) )
set_inode_item_key_version ( inode , KEY_FORMAT_3_5 ) ;
else
set_inode_item_key_version ( inode , KEY_FORMAT_3_6 ) ;
REISERFS_I ( inode ) - > i_first_direct_byte = 0 ;
set_inode_sd_version ( inode , STAT_DATA_V2 ) ;
inode_set_bytes ( inode ,
to_real_used_space ( inode , inode - > i_blocks ,
SD_V2_SIZE ) ) ;
/* read persistent inode attributes from sd and initalise
generic inode flags from them */
REISERFS_I ( inode ) - > i_attrs = sd_v2_attrs ( sd ) ;
sd_attrs_to_i_attrs ( sd_v2_attrs ( sd ) , inode ) ;
}
pathrelse ( path ) ;
if ( S_ISREG ( inode - > i_mode ) ) {
inode - > i_op = & reiserfs_file_inode_operations ;
inode - > i_fop = & reiserfs_file_operations ;
inode - > i_mapping - > a_ops = & reiserfs_address_space_operations ;
} else if ( S_ISDIR ( inode - > i_mode ) ) {
inode - > i_op = & reiserfs_dir_inode_operations ;
inode - > i_fop = & reiserfs_dir_operations ;
} else if ( S_ISLNK ( inode - > i_mode ) ) {
inode - > i_op = & reiserfs_symlink_inode_operations ;
inode - > i_mapping - > a_ops = & reiserfs_address_space_operations ;
} else {
inode - > i_blocks = 0 ;
inode - > i_op = & reiserfs_special_inode_operations ;
init_special_inode ( inode , inode - > i_mode , new_decode_dev ( rdev ) ) ;
}
}
2005-04-17 02:20:36 +04:00
// update new stat data with inode fields
2005-07-13 07:21:28 +04:00
static void inode2sd ( void * sd , struct inode * inode , loff_t size )
2005-04-17 02:20:36 +04:00
{
2005-07-13 07:21:28 +04:00
struct stat_data * sd_v2 = ( struct stat_data * ) sd ;
__u16 flags ;
set_sd_v2_mode ( sd_v2 , inode - > i_mode ) ;
set_sd_v2_nlink ( sd_v2 , inode - > i_nlink ) ;
set_sd_v2_uid ( sd_v2 , inode - > i_uid ) ;
set_sd_v2_size ( sd_v2 , size ) ;
set_sd_v2_gid ( sd_v2 , inode - > i_gid ) ;
set_sd_v2_mtime ( sd_v2 , inode - > i_mtime . tv_sec ) ;
set_sd_v2_atime ( sd_v2 , inode - > i_atime . tv_sec ) ;
set_sd_v2_ctime ( sd_v2 , inode - > i_ctime . tv_sec ) ;
set_sd_v2_blocks ( sd_v2 , to_fake_used_blocks ( inode , SD_V2_SIZE ) ) ;
if ( S_ISCHR ( inode - > i_mode ) | | S_ISBLK ( inode - > i_mode ) )
set_sd_v2_rdev ( sd_v2 , new_encode_dev ( inode - > i_rdev ) ) ;
else
set_sd_v2_generation ( sd_v2 , inode - > i_generation ) ;
flags = REISERFS_I ( inode ) - > i_attrs ;
i_attrs_to_sd_attrs ( inode , & flags ) ;
set_sd_v2_attrs ( sd_v2 , flags ) ;
2005-04-17 02:20:36 +04:00
}
// used to copy inode's fields to old stat data
2005-07-13 07:21:28 +04:00
static void inode2sd_v1 ( void * sd , struct inode * inode , loff_t size )
2005-04-17 02:20:36 +04:00
{
2005-07-13 07:21:28 +04:00
struct stat_data_v1 * sd_v1 = ( struct stat_data_v1 * ) sd ;
set_sd_v1_mode ( sd_v1 , inode - > i_mode ) ;
set_sd_v1_uid ( sd_v1 , inode - > i_uid ) ;
set_sd_v1_gid ( sd_v1 , inode - > i_gid ) ;
set_sd_v1_nlink ( sd_v1 , inode - > i_nlink ) ;
set_sd_v1_size ( sd_v1 , size ) ;
set_sd_v1_atime ( sd_v1 , inode - > i_atime . tv_sec ) ;
set_sd_v1_ctime ( sd_v1 , inode - > i_ctime . tv_sec ) ;
set_sd_v1_mtime ( sd_v1 , inode - > i_mtime . tv_sec ) ;
if ( S_ISCHR ( inode - > i_mode ) | | S_ISBLK ( inode - > i_mode ) )
set_sd_v1_rdev ( sd_v1 , new_encode_dev ( inode - > i_rdev ) ) ;
else
set_sd_v1_blocks ( sd_v1 , to_fake_used_blocks ( inode , SD_V1_SIZE ) ) ;
2005-04-17 02:20:36 +04:00
2005-07-13 07:21:28 +04:00
// Sigh. i_first_direct_byte is back
set_sd_v1_first_direct_byte ( sd_v1 ,
REISERFS_I ( inode ) - > i_first_direct_byte ) ;
}
2005-04-17 02:20:36 +04:00
/* NOTE, you must prepare the buffer head before sending it here,
* * and then log it after the call
*/
2006-12-08 13:36:32 +03:00
static void update_stat_data ( struct treepath * path , struct inode * inode ,
2005-07-13 07:21:28 +04:00
loff_t size )
2005-04-17 02:20:36 +04:00
{
2005-07-13 07:21:28 +04:00
struct buffer_head * bh ;
struct item_head * ih ;
bh = PATH_PLAST_BUFFER ( path ) ;
ih = PATH_PITEM_HEAD ( path ) ;
if ( ! is_statdata_le_ih ( ih ) )
2009-03-30 22:02:25 +04:00
reiserfs_panic ( inode - > i_sb , " vs-13065 " , " key %k, found item %h " ,
2005-07-13 07:21:28 +04:00
INODE_PKEY ( inode ) , ih ) ;
if ( stat_data_v1 ( ih ) ) {
// path points to old stat data
inode2sd_v1 ( B_I_PITEM ( bh , ih ) , inode , size ) ;
} else {
inode2sd ( B_I_PITEM ( bh , ih ) , inode , size ) ;
}
2005-04-17 02:20:36 +04:00
2005-07-13 07:21:28 +04:00
return ;
}
2005-04-17 02:20:36 +04:00
2005-07-13 07:21:28 +04:00
void reiserfs_update_sd_size ( struct reiserfs_transaction_handle * th ,
struct inode * inode , loff_t size )
2005-04-17 02:20:36 +04:00
{
2005-07-13 07:21:28 +04:00
struct cpu_key key ;
INITIALIZE_PATH ( path ) ;
struct buffer_head * bh ;
int fs_gen ;
struct item_head * ih , tmp_ih ;
int retval ;
BUG_ON ( ! th - > t_trans_id ) ;
make_cpu_key ( & key , inode , SD_OFFSET , TYPE_STAT_DATA , 3 ) ; //key type is unimportant
for ( ; ; ) {
int pos ;
/* look for the object's stat data */
retval = search_item ( inode - > i_sb , & key , & path ) ;
if ( retval = = IO_ERROR ) {
2009-03-30 22:02:28 +04:00
reiserfs_error ( inode - > i_sb , " vs-13050 " ,
" i/o failure occurred trying to "
" update %K stat data " , & key ) ;
2005-07-13 07:21:28 +04:00
return ;
}
if ( retval = = ITEM_NOT_FOUND ) {
pos = PATH_LAST_POSITION ( & path ) ;
pathrelse ( & path ) ;
if ( inode - > i_nlink = = 0 ) {
/*reiserfs_warning (inode->i_sb, "vs-13050: reiserfs_update_sd: i_nlink == 0, stat data not found"); */
return ;
}
2009-03-30 22:02:21 +04:00
reiserfs_warning ( inode - > i_sb , " vs-13060 " ,
" stat data of object %k (nlink == %d) "
" not found (pos %d) " ,
2005-07-13 07:21:28 +04:00
INODE_PKEY ( inode ) , inode - > i_nlink ,
pos ) ;
reiserfs_check_path ( & path ) ;
return ;
}
/* sigh, prepare_for_journal might schedule. When it schedules the
* * FS might change . We have to detect that , and loop back to the
* * search if the stat data item has moved
*/
bh = get_last_bh ( & path ) ;
ih = get_ih ( & path ) ;
copy_item_head ( & tmp_ih , ih ) ;
fs_gen = get_generation ( inode - > i_sb ) ;
reiserfs_prepare_for_journal ( inode - > i_sb , bh , 1 ) ;
if ( fs_changed ( fs_gen , inode - > i_sb )
& & item_moved ( & tmp_ih , & path ) ) {
reiserfs_restore_prepared_buffer ( inode - > i_sb , bh ) ;
continue ; /* Stat_data item has been moved after scheduling. */
}
break ;
}
update_stat_data ( & path , inode , size ) ;
journal_mark_dirty ( th , th - > t_super , bh ) ;
pathrelse ( & path ) ;
return ;
2005-04-17 02:20:36 +04:00
}
/* reiserfs_read_locked_inode is called to read the inode off disk, and it
* * does a make_bad_inode when things go wrong . But , we need to make sure
* * and clear the key in the private portion of the inode , otherwise a
* * corresponding iput might try to delete whatever object the inode last
* * represented .
*/
2005-07-13 07:21:28 +04:00
static void reiserfs_make_bad_inode ( struct inode * inode )
{
memset ( INODE_PKEY ( inode ) , 0 , KEY_SIZE ) ;
make_bad_inode ( inode ) ;
2005-04-17 02:20:36 +04:00
}
//
// initially this function was derived from minix or ext2's analog and
// evolved as the prototype did
//
2005-07-13 07:21:28 +04:00
int reiserfs_init_locked_inode ( struct inode * inode , void * p )
2005-04-17 02:20:36 +04:00
{
2005-07-13 07:21:28 +04:00
struct reiserfs_iget_args * args = ( struct reiserfs_iget_args * ) p ;
inode - > i_ino = args - > objectid ;
INODE_PKEY ( inode ) - > k_dir_id = cpu_to_le32 ( args - > dirid ) ;
return 0 ;
2005-04-17 02:20:36 +04:00
}
/* looks for stat data in the tree, and fills up the fields of in-core
inode stat data fields */
2005-07-13 07:21:28 +04:00
void reiserfs_read_locked_inode ( struct inode * inode ,
struct reiserfs_iget_args * args )
2005-04-17 02:20:36 +04:00
{
2005-07-13 07:21:28 +04:00
INITIALIZE_PATH ( path_to_sd ) ;
struct cpu_key key ;
unsigned long dirino ;
int retval ;
dirino = args - > dirid ;
/* set version 1, version 2 could be used too, because stat data
key is the same in both versions */
key . version = KEY_FORMAT_3_5 ;
key . on_disk_key . k_dir_id = dirino ;
key . on_disk_key . k_objectid = inode - > i_ino ;
key . on_disk_key . k_offset = 0 ;
key . on_disk_key . k_type = 0 ;
/* look for the object's stat data */
retval = search_item ( inode - > i_sb , & key , & path_to_sd ) ;
if ( retval = = IO_ERROR ) {
2009-03-30 22:02:28 +04:00
reiserfs_error ( inode - > i_sb , " vs-13070 " ,
" i/o failure occurred trying to find "
" stat data of %K " , & key ) ;
2005-07-13 07:21:28 +04:00
reiserfs_make_bad_inode ( inode ) ;
return ;
}
if ( retval ! = ITEM_FOUND ) {
/* a stale NFS handle can trigger this without it being an error */
pathrelse ( & path_to_sd ) ;
reiserfs_make_bad_inode ( inode ) ;
inode - > i_nlink = 0 ;
return ;
}
init_inode ( inode , & path_to_sd ) ;
/* It is possible that knfsd is trying to access inode of a file
that is being removed from the disk by some other thread . As we
update sd on unlink all that is required is to check for nlink
here . This bug was first found by Sizif when debugging
SquidNG / Butterfly , forgotten , and found again after Philippe
2009-03-30 22:02:44 +04:00
Gramoulle < philippe . gramoulle @ mmania . com > reproduced it .
2005-07-13 07:21:28 +04:00
More logical fix would require changes in fs / inode . c : iput ( ) to
remove inode from hash - table _after_ fs cleaned disk stuff up and
in iget ( ) to return NULL if I_FREEING inode is found in
hash - table . */
/* Currently there is one place where it's ok to meet inode with
nlink = = 0 : processing of open - unlinked and half - truncated files
during mount ( fs / reiserfs / super . c : finish_unfinished ( ) ) . */
if ( ( inode - > i_nlink = = 0 ) & &
! REISERFS_SB ( inode - > i_sb ) - > s_is_unlinked_ok ) {
2009-03-30 22:02:21 +04:00
reiserfs_warning ( inode - > i_sb , " vs-13075 " ,
2005-07-13 07:21:28 +04:00
" dead inode read from disk %K. "
" This is likely to be race with knfsd. Ignore " ,
& key ) ;
reiserfs_make_bad_inode ( inode ) ;
}
reiserfs_check_path ( & path_to_sd ) ; /* init inode should be relsing */
2005-04-17 02:20:36 +04:00
}
/**
* reiserfs_find_actor ( ) - " find actor " reiserfs supplies to iget5_locked ( ) .
*
* @ inode : inode from hash table to check
* @ opaque : " cookie " passed to iget5_locked ( ) . This is & reiserfs_iget_args .
*
* This function is called by iget5_locked ( ) to distinguish reiserfs inodes
* having the same inode numbers . Such inodes can only exist due to some
* error condition . One of them should be bad . Inodes with identical
* inode numbers ( objectids ) are distinguished by parent directory ids .
*
*/
2005-07-13 07:21:28 +04:00
int reiserfs_find_actor ( struct inode * inode , void * opaque )
2005-04-17 02:20:36 +04:00
{
2005-07-13 07:21:28 +04:00
struct reiserfs_iget_args * args ;
2005-04-17 02:20:36 +04:00
2005-07-13 07:21:28 +04:00
args = opaque ;
/* args is already in CPU order */
return ( inode - > i_ino = = args - > objectid ) & &
( le32_to_cpu ( INODE_PKEY ( inode ) - > k_dir_id ) = = args - > dirid ) ;
2005-04-17 02:20:36 +04:00
}
2005-07-13 07:21:28 +04:00
struct inode * reiserfs_iget ( struct super_block * s , const struct cpu_key * key )
2005-04-17 02:20:36 +04:00
{
2005-07-13 07:21:28 +04:00
struct inode * inode ;
struct reiserfs_iget_args args ;
args . objectid = key - > on_disk_key . k_objectid ;
args . dirid = key - > on_disk_key . k_dir_id ;
inode = iget5_locked ( s , key - > on_disk_key . k_objectid ,
reiserfs_find_actor , reiserfs_init_locked_inode ,
( void * ) ( & args ) ) ;
if ( ! inode )
return ERR_PTR ( - ENOMEM ) ;
if ( inode - > i_state & I_NEW ) {
reiserfs_read_locked_inode ( inode , & args ) ;
unlock_new_inode ( inode ) ;
}
if ( comp_short_keys ( INODE_PKEY ( inode ) , key ) | | is_bad_inode ( inode ) ) {
/* either due to i/o error or a stale NFS handle */
iput ( inode ) ;
inode = NULL ;
}
return inode ;
2005-04-17 02:20:36 +04:00
}
2007-10-22 03:42:13 +04:00
static struct dentry * reiserfs_get_dentry ( struct super_block * sb ,
u32 objectid , u32 dir_id , u32 generation )
2005-04-17 02:20:36 +04:00
{
2005-07-13 07:21:28 +04:00
struct cpu_key key ;
struct inode * inode ;
2007-10-22 03:42:13 +04:00
key . on_disk_key . k_objectid = objectid ;
key . on_disk_key . k_dir_id = dir_id ;
2005-07-13 07:21:28 +04:00
reiserfs_write_lock ( sb ) ;
inode = reiserfs_iget ( sb , & key ) ;
2007-10-22 03:42:13 +04:00
if ( inode & & ! IS_ERR ( inode ) & & generation ! = 0 & &
generation ! = inode - > i_generation ) {
2005-07-13 07:21:28 +04:00
iput ( inode ) ;
inode = NULL ;
}
reiserfs_write_unlock ( sb ) ;
2008-08-11 17:49:04 +04:00
return d_obtain_alias ( inode ) ;
2005-04-17 02:20:36 +04:00
}
2007-10-22 03:42:13 +04:00
struct dentry * reiserfs_fh_to_dentry ( struct super_block * sb , struct fid * fid ,
int fh_len , int fh_type )
2005-07-13 07:21:28 +04:00
{
/* fhtype happens to reflect the number of u32s encoded.
* due to a bug in earlier code , fhtype might indicate there
* are more u32s then actually fitted .
* so if fhtype seems to be more than len , reduce fhtype .
* Valid types are :
* 2 - objectid + dir_id - legacy support
* 3 - objectid + dir_id + generation
* 4 - objectid + dir_id + objectid and dirid of parent - legacy
* 5 - objectid + dir_id + generation + objectid and dirid of parent
* 6 - as above plus generation of directory
* 6 does not fit in NFSv2 handles
*/
2007-10-22 03:42:13 +04:00
if ( fh_type > fh_len ) {
if ( fh_type ! = 6 | | fh_len ! = 5 )
2009-03-30 22:02:21 +04:00
reiserfs_warning ( sb , " reiserfs-13077 " ,
2007-10-22 03:42:13 +04:00
" nfsd/reiserfs, fhtype=%d, len=%d - odd " ,
fh_type , fh_len ) ;
fh_type = 5 ;
2005-07-13 07:21:28 +04:00
}
2007-10-22 03:42:13 +04:00
return reiserfs_get_dentry ( sb , fid - > raw [ 0 ] , fid - > raw [ 1 ] ,
( fh_type = = 3 | | fh_type > = 5 ) ? fid - > raw [ 2 ] : 0 ) ;
}
2005-04-17 02:20:36 +04:00
2007-10-22 03:42:13 +04:00
struct dentry * reiserfs_fh_to_parent ( struct super_block * sb , struct fid * fid ,
int fh_len , int fh_type )
{
if ( fh_type < 4 )
return NULL ;
return reiserfs_get_dentry ( sb ,
( fh_type > = 5 ) ? fid - > raw [ 3 ] : fid - > raw [ 2 ] ,
( fh_type > = 5 ) ? fid - > raw [ 4 ] : fid - > raw [ 3 ] ,
( fh_type = = 6 ) ? fid - > raw [ 5 ] : 0 ) ;
2005-04-17 02:20:36 +04:00
}
2005-07-13 07:21:28 +04:00
int reiserfs_encode_fh ( struct dentry * dentry , __u32 * data , int * lenp ,
int need_parent )
{
struct inode * inode = dentry - > d_inode ;
int maxlen = * lenp ;
if ( maxlen < 3 )
return 255 ;
data [ 0 ] = inode - > i_ino ;
data [ 1 ] = le32_to_cpu ( INODE_PKEY ( inode ) - > k_dir_id ) ;
data [ 2 ] = inode - > i_generation ;
* lenp = 3 ;
/* no room for directory info? return what we've stored so far */
if ( maxlen < 5 | | ! need_parent )
return 3 ;
spin_lock ( & dentry - > d_lock ) ;
inode = dentry - > d_parent - > d_inode ;
data [ 3 ] = inode - > i_ino ;
data [ 4 ] = le32_to_cpu ( INODE_PKEY ( inode ) - > k_dir_id ) ;
* lenp = 5 ;
if ( maxlen > = 6 ) {
data [ 5 ] = inode - > i_generation ;
* lenp = 6 ;
}
spin_unlock ( & dentry - > d_lock ) ;
return * lenp ;
}
2005-04-17 02:20:36 +04:00
/* looks for stat data, then copies fields to it, marks the buffer
containing stat data as dirty */
/* reiserfs inodes are never really dirty, since the dirty inode call
* * always logs them . This call allows the VFS inode marking routines
* * to properly mark inodes for datasync and such , but only actually
* * does something when called for a synchronous update .
*/
2005-07-13 07:21:28 +04:00
int reiserfs_write_inode ( struct inode * inode , int do_sync )
{
struct reiserfs_transaction_handle th ;
int jbegin_count = 1 ;
if ( inode - > i_sb - > s_flags & MS_RDONLY )
return - EROFS ;
/* memory pressure can sometimes initiate write_inode calls with sync == 1,
2009-03-30 22:02:44 +04:00
* * these cases are just when the system needs ram , not when the
2005-07-13 07:21:28 +04:00
* * inode needs to reach disk for safety , and they can safely be
* * ignored because the altered inode has already been logged .
*/
if ( do_sync & & ! ( current - > flags & PF_MEMALLOC ) ) {
reiserfs_write_lock ( inode - > i_sb ) ;
if ( ! journal_begin ( & th , inode - > i_sb , jbegin_count ) ) {
reiserfs_update_sd ( & th , inode ) ;
journal_end_sync ( & th , inode - > i_sb , jbegin_count ) ;
}
reiserfs_write_unlock ( inode - > i_sb ) ;
}
return 0 ;
2005-04-17 02:20:36 +04:00
}
/* stat data of new object is inserted already, this inserts the item
containing " . " and " .. " entries */
2005-07-13 07:21:28 +04:00
static int reiserfs_new_directory ( struct reiserfs_transaction_handle * th ,
struct inode * inode ,
2006-12-08 13:36:32 +03:00
struct item_head * ih , struct treepath * path ,
2005-07-13 07:21:28 +04:00
struct inode * dir )
2005-04-17 02:20:36 +04:00
{
2005-07-13 07:21:28 +04:00
struct super_block * sb = th - > t_super ;
char empty_dir [ EMPTY_DIR_SIZE ] ;
char * body = empty_dir ;
struct cpu_key key ;
int retval ;
BUG_ON ( ! th - > t_trans_id ) ;
_make_cpu_key ( & key , KEY_FORMAT_3_5 , le32_to_cpu ( ih - > ih_key . k_dir_id ) ,
le32_to_cpu ( ih - > ih_key . k_objectid ) , DOT_OFFSET ,
TYPE_DIRENTRY , 3 /*key length */ ) ;
/* compose item head for new item. Directories consist of items of
old type ( ITEM_VERSION_1 ) . Do not set key ( second arg is 0 ) , it
is done by reiserfs_new_inode */
if ( old_format_only ( sb ) ) {
make_le_item_head ( ih , NULL , KEY_FORMAT_3_5 , DOT_OFFSET ,
TYPE_DIRENTRY , EMPTY_DIR_SIZE_V1 , 2 ) ;
make_empty_dir_item_v1 ( body , ih - > ih_key . k_dir_id ,
ih - > ih_key . k_objectid ,
INODE_PKEY ( dir ) - > k_dir_id ,
INODE_PKEY ( dir ) - > k_objectid ) ;
} else {
make_le_item_head ( ih , NULL , KEY_FORMAT_3_5 , DOT_OFFSET ,
TYPE_DIRENTRY , EMPTY_DIR_SIZE , 2 ) ;
make_empty_dir_item ( body , ih - > ih_key . k_dir_id ,
ih - > ih_key . k_objectid ,
INODE_PKEY ( dir ) - > k_dir_id ,
INODE_PKEY ( dir ) - > k_objectid ) ;
}
/* look for place in the tree for new item */
retval = search_item ( sb , & key , path ) ;
if ( retval = = IO_ERROR ) {
2009-03-30 22:02:28 +04:00
reiserfs_error ( sb , " vs-13080 " ,
" i/o failure occurred creating new directory " ) ;
2005-07-13 07:21:28 +04:00
return - EIO ;
}
if ( retval = = ITEM_FOUND ) {
pathrelse ( path ) ;
2009-03-30 22:02:21 +04:00
reiserfs_warning ( sb , " vs-13070 " ,
2005-07-13 07:21:28 +04:00
" object with this key exists (%k) " ,
& ( ih - > ih_key ) ) ;
return - EEXIST ;
}
2005-04-17 02:20:36 +04:00
2005-07-13 07:21:28 +04:00
/* insert item, that is empty directory item */
return reiserfs_insert_item ( th , path , & key , ih , inode , body ) ;
}
2005-04-17 02:20:36 +04:00
/* stat data of object has been inserted, this inserts the item
containing the body of symlink */
2005-07-13 07:21:28 +04:00
static int reiserfs_new_symlink ( struct reiserfs_transaction_handle * th , struct inode * inode , /* Inode of symlink */
struct item_head * ih ,
2006-12-08 13:36:32 +03:00
struct treepath * path , const char * symname ,
2005-07-13 07:21:28 +04:00
int item_len )
2005-04-17 02:20:36 +04:00
{
2005-07-13 07:21:28 +04:00
struct super_block * sb = th - > t_super ;
struct cpu_key key ;
int retval ;
BUG_ON ( ! th - > t_trans_id ) ;
_make_cpu_key ( & key , KEY_FORMAT_3_5 ,
le32_to_cpu ( ih - > ih_key . k_dir_id ) ,
le32_to_cpu ( ih - > ih_key . k_objectid ) ,
1 , TYPE_DIRECT , 3 /*key length */ ) ;
make_le_item_head ( ih , NULL , KEY_FORMAT_3_5 , 1 , TYPE_DIRECT , item_len ,
0 /*free_space */ ) ;
/* look for place in the tree for new item */
retval = search_item ( sb , & key , path ) ;
if ( retval = = IO_ERROR ) {
2009-03-30 22:02:28 +04:00
reiserfs_error ( sb , " vs-13080 " ,
" i/o failure occurred creating new symlink " ) ;
2005-07-13 07:21:28 +04:00
return - EIO ;
}
if ( retval = = ITEM_FOUND ) {
pathrelse ( path ) ;
2009-03-30 22:02:21 +04:00
reiserfs_warning ( sb , " vs-13080 " ,
2005-07-13 07:21:28 +04:00
" object with this key exists (%k) " ,
& ( ih - > ih_key ) ) ;
return - EEXIST ;
}
2005-04-17 02:20:36 +04:00
2005-07-13 07:21:28 +04:00
/* insert item, that is body of symlink */
return reiserfs_insert_item ( th , path , & key , ih , inode , symname ) ;
}
2005-04-17 02:20:36 +04:00
/* inserts the stat data into the tree, and then calls
reiserfs_new_directory ( to insert " . " , " .. " item if new object is
directory ) or reiserfs_new_symlink ( to insert symlink body if new
2009-03-30 22:02:44 +04:00
object is symlink ) or nothing ( if new object is regular file )
2005-04-17 02:20:36 +04:00
NOTE ! uid and gid must already be set in the inode . If we return
non - zero due to an error , we have to drop the quota previously allocated
for the fresh inode . This can only be done outside a transaction , so
if we return non - zero , we also end the transaction . */
2005-07-13 07:21:28 +04:00
int reiserfs_new_inode ( struct reiserfs_transaction_handle * th ,
struct inode * dir , int mode , const char * symname ,
2009-03-30 22:02:44 +04:00
/* 0 for regular, EMTRY_DIR_SIZE for dirs,
2005-07-13 07:21:28 +04:00
strlen ( symname ) for symlinks ) */
loff_t i_size , struct dentry * dentry ,
2009-03-30 22:02:41 +04:00
struct inode * inode ,
struct reiserfs_security_handle * security )
2005-04-17 02:20:36 +04:00
{
2005-07-13 07:21:28 +04:00
struct super_block * sb ;
2008-12-30 10:03:58 +03:00
struct reiserfs_iget_args args ;
2005-07-13 07:21:28 +04:00
INITIALIZE_PATH ( path_to_key ) ;
struct cpu_key key ;
struct item_head ih ;
struct stat_data sd ;
int retval ;
int err ;
BUG_ON ( ! th - > t_trans_id ) ;
2009-01-26 19:14:18 +03:00
if ( vfs_dq_alloc_inode ( inode ) ) {
2005-07-13 07:21:28 +04:00
err = - EDQUOT ;
goto out_end_trans ;
}
2006-10-04 13:15:30 +04:00
if ( ! dir - > i_nlink ) {
2005-07-13 07:21:28 +04:00
err = - EPERM ;
goto out_bad_inode ;
}
sb = dir - > i_sb ;
/* item head of new item */
ih . ih_key . k_dir_id = reiserfs_choose_packing ( dir ) ;
ih . ih_key . k_objectid = cpu_to_le32 ( reiserfs_get_unused_objectid ( th ) ) ;
if ( ! ih . ih_key . k_objectid ) {
err = - ENOMEM ;
goto out_bad_inode ;
}
2008-12-30 10:03:58 +03:00
args . objectid = inode - > i_ino = le32_to_cpu ( ih . ih_key . k_objectid ) ;
2009-01-02 16:16:51 +03:00
if ( old_format_only ( sb ) )
make_le_item_head ( & ih , NULL , KEY_FORMAT_3_5 , SD_OFFSET ,
TYPE_STAT_DATA , SD_V1_SIZE , MAX_US_INT ) ;
else
make_le_item_head ( & ih , NULL , KEY_FORMAT_3_6 , SD_OFFSET ,
TYPE_STAT_DATA , SD_SIZE , MAX_US_INT ) ;
2008-12-30 10:03:58 +03:00
memcpy ( INODE_PKEY ( inode ) , & ( ih . ih_key ) , KEY_SIZE ) ;
args . dirid = le32_to_cpu ( ih . ih_key . k_dir_id ) ;
if ( insert_inode_locked4 ( inode , args . objectid ,
reiserfs_find_actor , & args ) < 0 ) {
err = - EINVAL ;
goto out_bad_inode ;
}
2005-07-13 07:21:28 +04:00
if ( old_format_only ( sb ) )
2009-03-30 22:02:44 +04:00
/* not a perfect generation count, as object ids can be reused, but
2005-07-13 07:21:28 +04:00
* * this is as good as reiserfs can do right now .
* * note that the private part of inode isn ' t filled in yet , we have
* * to use the directory .
*/
inode - > i_generation = le32_to_cpu ( INODE_PKEY ( dir ) - > k_objectid ) ;
else
2005-04-17 02:20:36 +04:00
# if defined( USE_INODE_GENERATION_COUNTER )
2005-07-13 07:21:28 +04:00
inode - > i_generation =
le32_to_cpu ( REISERFS_SB ( sb ) - > s_rs - > s_inode_generation ) ;
2005-04-17 02:20:36 +04:00
# else
2005-07-13 07:21:28 +04:00
inode - > i_generation = + + event ;
2005-04-17 02:20:36 +04:00
# endif
2005-07-13 07:21:28 +04:00
/* fill stat data */
inode - > i_nlink = ( S_ISDIR ( mode ) ? 2 : 1 ) ;
/* uid and gid must already be set by the caller for quota init */
/* symlink cannot be immutable or append only, right? */
if ( S_ISLNK ( inode - > i_mode ) )
inode - > i_flags & = ~ ( S_IMMUTABLE | S_APPEND ) ;
inode - > i_mtime = inode - > i_atime = inode - > i_ctime = CURRENT_TIME_SEC ;
inode - > i_size = i_size ;
inode - > i_blocks = 0 ;
inode - > i_bytes = 0 ;
REISERFS_I ( inode ) - > i_first_direct_byte = S_ISLNK ( mode ) ? 1 :
U32_MAX /*NO_BYTES_IN_DIRECT_ITEM */ ;
INIT_LIST_HEAD ( & ( REISERFS_I ( inode ) - > i_prealloc_list ) ) ;
REISERFS_I ( inode ) - > i_flags = 0 ;
REISERFS_I ( inode ) - > i_prealloc_block = 0 ;
REISERFS_I ( inode ) - > i_prealloc_count = 0 ;
REISERFS_I ( inode ) - > i_trans_id = 0 ;
REISERFS_I ( inode ) - > i_jl = NULL ;
REISERFS_I ( inode ) - > i_attrs =
REISERFS_I ( dir ) - > i_attrs & REISERFS_INHERIT_MASK ;
sd_attrs_to_i_attrs ( REISERFS_I ( inode ) - > i_attrs , inode ) ;
2007-01-23 07:40:46 +03:00
mutex_init ( & ( REISERFS_I ( inode ) - > i_mmap ) ) ;
2006-09-29 13:00:00 +04:00
reiserfs_init_acl_access ( inode ) ;
reiserfs_init_acl_default ( inode ) ;
2006-09-29 12:59:58 +04:00
reiserfs_init_xattr_rwsem ( inode ) ;
2005-07-13 07:21:28 +04:00
/* key to search for correct place for new stat data */
_make_cpu_key ( & key , KEY_FORMAT_3_6 , le32_to_cpu ( ih . ih_key . k_dir_id ) ,
le32_to_cpu ( ih . ih_key . k_objectid ) , SD_OFFSET ,
TYPE_STAT_DATA , 3 /*key length */ ) ;
/* find proper place for inserting of stat data */
retval = search_item ( sb , & key , & path_to_key ) ;
if ( retval = = IO_ERROR ) {
err = - EIO ;
goto out_bad_inode ;
}
if ( retval = = ITEM_FOUND ) {
pathrelse ( & path_to_key ) ;
err = - EEXIST ;
goto out_bad_inode ;
}
if ( old_format_only ( sb ) ) {
if ( inode - > i_uid & ~ 0xffff | | inode - > i_gid & ~ 0xffff ) {
pathrelse ( & path_to_key ) ;
/* i_uid or i_gid is too big to be stored in stat data v3.5 */
err = - EINVAL ;
goto out_bad_inode ;
}
inode2sd_v1 ( & sd , inode , inode - > i_size ) ;
} else {
inode2sd ( & sd , inode , inode - > i_size ) ;
}
// store in in-core inode the key of stat data and version all
// object items will have (directory items will have old offset
// format, other new objects will consist of new items)
if ( old_format_only ( sb ) | | S_ISDIR ( mode ) | | S_ISLNK ( mode ) )
set_inode_item_key_version ( inode , KEY_FORMAT_3_5 ) ;
else
set_inode_item_key_version ( inode , KEY_FORMAT_3_6 ) ;
if ( old_format_only ( sb ) )
set_inode_sd_version ( inode , STAT_DATA_V1 ) ;
else
set_inode_sd_version ( inode , STAT_DATA_V2 ) ;
/* insert the stat data into the tree */
2005-04-17 02:20:36 +04:00
# ifdef DISPLACE_NEW_PACKING_LOCALITIES
2005-07-13 07:21:28 +04:00
if ( REISERFS_I ( dir ) - > new_packing_locality )
th - > displace_new_blocks = 1 ;
2005-04-17 02:20:36 +04:00
# endif
2005-07-13 07:21:28 +04:00
retval =
reiserfs_insert_item ( th , & path_to_key , & key , & ih , inode ,
( char * ) ( & sd ) ) ;
if ( retval ) {
err = retval ;
reiserfs_check_path ( & path_to_key ) ;
goto out_bad_inode ;
}
2005-04-17 02:20:36 +04:00
# ifdef DISPLACE_NEW_PACKING_LOCALITIES
2005-07-13 07:21:28 +04:00
if ( ! th - > displace_new_blocks )
REISERFS_I ( dir ) - > new_packing_locality = 0 ;
2005-04-17 02:20:36 +04:00
# endif
2005-07-13 07:21:28 +04:00
if ( S_ISDIR ( mode ) ) {
/* insert item with "." and ".." */
retval =
reiserfs_new_directory ( th , inode , & ih , & path_to_key , dir ) ;
}
if ( S_ISLNK ( mode ) ) {
/* insert body of symlink */
if ( ! old_format_only ( sb ) )
i_size = ROUND_UP ( i_size ) ;
retval =
reiserfs_new_symlink ( th , inode , & ih , & path_to_key , symname ,
i_size ) ;
}
if ( retval ) {
err = retval ;
reiserfs_check_path ( & path_to_key ) ;
journal_end ( th , th - > t_super , th - > t_blocks_allocated ) ;
goto out_inserted_sd ;
}
if ( reiserfs_posixacl ( inode - > i_sb ) ) {
2009-03-30 22:02:39 +04:00
retval = reiserfs_inherit_default_acl ( th , dir , dentry , inode ) ;
2005-07-13 07:21:28 +04:00
if ( retval ) {
err = retval ;
reiserfs_check_path ( & path_to_key ) ;
journal_end ( th , th - > t_super , th - > t_blocks_allocated ) ;
goto out_inserted_sd ;
}
} else if ( inode - > i_sb - > s_flags & MS_POSIXACL ) {
2009-03-30 22:02:21 +04:00
reiserfs_warning ( inode - > i_sb , " jdm-13090 " ,
" ACLs aren't enabled in the fs, "
2005-07-13 07:21:28 +04:00
" but vfs thinks they are! " ) ;
2009-03-30 22:02:32 +04:00
} else if ( IS_PRIVATE ( dir ) )
inode - > i_flags | = S_PRIVATE ;
2005-07-13 07:21:28 +04:00
2009-03-30 22:02:41 +04:00
if ( security - > name ) {
retval = reiserfs_security_write ( th , inode , security ) ;
if ( retval ) {
err = retval ;
reiserfs_check_path ( & path_to_key ) ;
retval = journal_end ( th , th - > t_super ,
th - > t_blocks_allocated ) ;
if ( retval )
err = retval ;
goto out_inserted_sd ;
}
2005-07-13 07:21:28 +04:00
}
reiserfs_update_sd ( th , inode ) ;
reiserfs_check_path ( & path_to_key ) ;
return 0 ;
2005-04-17 02:20:36 +04:00
/* it looks like you can easily compress these two goto targets into
* one . Keeping it like this doesn ' t actually hurt anything , and they
* are place holders for what the quota code actually needs .
*/
2005-07-13 07:21:28 +04:00
out_bad_inode :
/* Invalidate the object, nothing was inserted yet */
INODE_PKEY ( inode ) - > k_objectid = 0 ;
/* Quota change must be inside a transaction for journaling */
2009-01-26 19:14:18 +03:00
vfs_dq_free_inode ( inode ) ;
2005-07-13 07:21:28 +04:00
out_end_trans :
journal_end ( th , th - > t_super , th - > t_blocks_allocated ) ;
/* Drop can be outside and it needs more credits so it's better to have it outside */
2009-01-26 19:14:18 +03:00
vfs_dq_drop ( inode ) ;
2005-07-13 07:21:28 +04:00
inode - > i_flags | = S_NOQUOTA ;
make_bad_inode ( inode ) ;
out_inserted_sd :
inode - > i_nlink = 0 ;
th - > t_trans_id = 0 ; /* so the caller can't use this handle later */
2008-12-30 10:03:58 +03:00
unlock_new_inode ( inode ) ; /* OK to do even if we hadn't locked it */
2009-03-30 22:02:35 +04:00
iput ( inode ) ;
2005-07-13 07:21:28 +04:00
return err ;
2005-04-17 02:20:36 +04:00
}
/*
* * finds the tail page in the page cache ,
* * reads the last block in .
* *
* * On success , page_result is set to a locked , pinned page , and bh_result
* * is set to an up to date buffer for the last block in the file . returns 0.
* *
* * tail conversion is not done , so bh_result might not be valid for writing
* * check buffer_mapped ( bh_result ) and bh_result - > b_blocknr ! = 0 before
* * trying to write the block .
* *
* * on failure , nonzero is returned , page_result and bh_result are untouched .
*/
2009-03-30 22:02:47 +04:00
static int grab_tail_page ( struct inode * inode ,
2005-07-13 07:21:28 +04:00
struct page * * page_result ,
struct buffer_head * * bh_result )
{
/* we want the page with the last byte in the file,
* * not the page that will hold the next byte for appending
*/
2009-03-30 22:02:47 +04:00
unsigned long index = ( inode - > i_size - 1 ) > > PAGE_CACHE_SHIFT ;
2005-07-13 07:21:28 +04:00
unsigned long pos = 0 ;
unsigned long start = 0 ;
2009-03-30 22:02:47 +04:00
unsigned long blocksize = inode - > i_sb - > s_blocksize ;
unsigned long offset = ( inode - > i_size ) & ( PAGE_CACHE_SIZE - 1 ) ;
2005-07-13 07:21:28 +04:00
struct buffer_head * bh ;
struct buffer_head * head ;
struct page * page ;
int error ;
/* we know that we are only called with inode->i_size > 0.
* * we also know that a file tail can never be as big as a block
* * If i_size % blocksize = = 0 , our file is currently block aligned
* * and it won ' t need converting or zeroing after a truncate .
*/
if ( ( offset & ( blocksize - 1 ) ) = = 0 ) {
return - ENOENT ;
}
2009-03-30 22:02:47 +04:00
page = grab_cache_page ( inode - > i_mapping , index ) ;
2005-07-13 07:21:28 +04:00
error = - ENOMEM ;
if ( ! page ) {
goto out ;
}
/* start within the page of the last block in the file */
start = ( offset / blocksize ) * blocksize ;
error = block_prepare_write ( page , start , offset ,
reiserfs_get_block_create_0 ) ;
if ( error )
goto unlock ;
head = page_buffers ( page ) ;
bh = head ;
do {
if ( pos > = start ) {
break ;
}
bh = bh - > b_this_page ;
pos + = blocksize ;
} while ( bh ! = head ) ;
if ( ! buffer_uptodate ( bh ) ) {
/* note, this should never happen, prepare_write should
* * be taking care of this for us . If the buffer isn ' t up to date ,
* * I ' ve screwed up the code to find the buffer , or the code to
* * call prepare_write
*/
2009-03-30 22:02:47 +04:00
reiserfs_error ( inode - > i_sb , " clm-6000 " ,
2009-03-30 22:02:28 +04:00
" error reading block %lu " , bh - > b_blocknr ) ;
2005-07-13 07:21:28 +04:00
error = - EIO ;
goto unlock ;
}
* bh_result = bh ;
* page_result = page ;
out :
return error ;
unlock :
unlock_page ( page ) ;
page_cache_release ( page ) ;
return error ;
2005-04-17 02:20:36 +04:00
}
/*
* * vfs version of truncate file . Must NOT be called with
* * a transaction already started .
* *
* * some code taken from block_truncate_page
*/
2009-03-30 22:02:47 +04:00
int reiserfs_truncate_file ( struct inode * inode , int update_timestamps )
2005-07-13 07:21:28 +04:00
{
struct reiserfs_transaction_handle th ;
/* we want the offset for the first byte after the end of the file */
2009-03-30 22:02:47 +04:00
unsigned long offset = inode - > i_size & ( PAGE_CACHE_SIZE - 1 ) ;
unsigned blocksize = inode - > i_sb - > s_blocksize ;
2005-07-13 07:21:28 +04:00
unsigned length ;
struct page * page = NULL ;
int error ;
struct buffer_head * bh = NULL ;
2005-12-14 22:38:05 +03:00
int err2 ;
2005-07-13 07:21:28 +04:00
2009-03-30 22:02:47 +04:00
reiserfs_write_lock ( inode - > i_sb ) ;
2005-07-13 07:21:28 +04:00
2009-03-30 22:02:47 +04:00
if ( inode - > i_size > 0 ) {
error = grab_tail_page ( inode , & page , & bh ) ;
if ( error ) {
2009-03-30 22:02:44 +04:00
// -ENOENT means we truncated past the end of the file,
2005-07-13 07:21:28 +04:00
// and get_block_create_0 could not find a block to read in,
// which is ok.
if ( error ! = - ENOENT )
2009-03-30 22:02:47 +04:00
reiserfs_error ( inode - > i_sb , " clm-6001 " ,
2009-03-30 22:02:28 +04:00
" grab_tail_page failed %d " ,
error ) ;
2005-07-13 07:21:28 +04:00
page = NULL ;
bh = NULL ;
}
}
2005-04-17 02:20:36 +04:00
2009-03-30 22:02:44 +04:00
/* so, if page != NULL, we have a buffer head for the offset at
* * the end of the file . if the bh is mapped , and bh - > b_blocknr ! = 0 ,
* * then we have an unformatted node . Otherwise , we have a direct item ,
* * and no zeroing is required on disk . We zero after the truncate ,
* * because the truncate might pack the item anyway
2005-07-13 07:21:28 +04:00
* * ( it will unmap bh if it packs ) .
2005-04-17 02:20:36 +04:00
*/
2005-07-13 07:21:28 +04:00
/* it is enough to reserve space in transaction for 2 balancings:
one for " save " link adding and another for the first
cut_from_item . 1 is for update_sd */
2009-03-30 22:02:47 +04:00
error = journal_begin ( & th , inode - > i_sb ,
2005-07-13 07:21:28 +04:00
JOURNAL_PER_BALANCE_CNT * 2 + 1 ) ;
if ( error )
goto out ;
2009-03-30 22:02:47 +04:00
reiserfs_update_inode_transaction ( inode ) ;
2005-07-13 07:21:28 +04:00
if ( update_timestamps )
/* we are doing real truncate: if the system crashes before the last
transaction of truncating gets committed - on reboot the file
either appears truncated properly or not truncated at all */
2009-03-30 22:02:47 +04:00
add_save_link ( & th , inode , 1 ) ;
err2 = reiserfs_do_truncate ( & th , inode , page , update_timestamps ) ;
2005-07-13 07:21:28 +04:00
error =
2009-03-30 22:02:47 +04:00
journal_end ( & th , inode - > i_sb , JOURNAL_PER_BALANCE_CNT * 2 + 1 ) ;
2005-07-13 07:21:28 +04:00
if ( error )
goto out ;
2005-12-14 22:38:05 +03:00
/* check reiserfs_do_truncate after ending the transaction */
if ( err2 ) {
error = err2 ;
goto out ;
}
2005-07-13 07:21:28 +04:00
if ( update_timestamps ) {
2009-03-30 22:02:47 +04:00
error = remove_save_link ( inode , 1 /* truncate */ ) ;
2005-07-13 07:21:28 +04:00
if ( error )
goto out ;
}
if ( page ) {
length = offset & ( blocksize - 1 ) ;
/* if we are not on a block boundary */
if ( length ) {
length = blocksize - length ;
2008-02-05 09:28:29 +03:00
zero_user ( page , offset , length ) ;
2005-07-13 07:21:28 +04:00
if ( buffer_mapped ( bh ) & & bh - > b_blocknr ! = 0 ) {
mark_buffer_dirty ( bh ) ;
}
}
unlock_page ( page ) ;
page_cache_release ( page ) ;
}
2009-03-30 22:02:47 +04:00
reiserfs_write_unlock ( inode - > i_sb ) ;
2005-07-13 07:21:28 +04:00
return 0 ;
out :
if ( page ) {
unlock_page ( page ) ;
page_cache_release ( page ) ;
}
2009-03-30 22:02:47 +04:00
reiserfs_write_unlock ( inode - > i_sb ) ;
2005-07-13 07:21:28 +04:00
return error ;
}
static int map_block_for_writepage ( struct inode * inode ,
struct buffer_head * bh_result ,
unsigned long block )
{
struct reiserfs_transaction_handle th ;
int fs_gen ;
struct item_head tmp_ih ;
struct item_head * ih ;
struct buffer_head * bh ;
__le32 * item ;
struct cpu_key key ;
INITIALIZE_PATH ( path ) ;
int pos_in_item ;
int jbegin_count = JOURNAL_PER_BALANCE_CNT ;
2005-11-29 00:43:53 +03:00
loff_t byte_offset = ( ( loff_t ) block < < inode - > i_sb - > s_blocksize_bits ) + 1 ;
2005-07-13 07:21:28 +04:00
int retval ;
int use_get_block = 0 ;
int bytes_copied = 0 ;
int copy_size ;
int trans_running = 0 ;
/* catch places below that try to log something without starting a trans */
th . t_trans_id = 0 ;
if ( ! buffer_uptodate ( bh_result ) ) {
return - EIO ;
}
kmap ( bh_result - > b_page ) ;
start_over :
reiserfs_write_lock ( inode - > i_sb ) ;
make_cpu_key ( & key , inode , byte_offset , TYPE_ANY , 3 ) ;
research :
retval = search_for_position_by_key ( inode - > i_sb , & key , & path ) ;
if ( retval ! = POSITION_FOUND ) {
use_get_block = 1 ;
goto out ;
}
bh = get_last_bh ( & path ) ;
ih = get_ih ( & path ) ;
item = get_item ( & path ) ;
pos_in_item = path . pos_in_item ;
/* we've found an unformatted node */
if ( indirect_item_found ( retval , ih ) ) {
if ( bytes_copied > 0 ) {
2009-03-30 22:02:21 +04:00
reiserfs_warning ( inode - > i_sb , " clm-6002 " ,
" bytes_copied %d " , bytes_copied ) ;
2005-07-13 07:21:28 +04:00
}
if ( ! get_block_num ( item , pos_in_item ) ) {
/* crap, we are writing to a hole */
use_get_block = 1 ;
goto out ;
}
set_block_dev_mapped ( bh_result ,
get_block_num ( item , pos_in_item ) , inode ) ;
} else if ( is_direct_le_ih ( ih ) ) {
char * p ;
p = page_address ( bh_result - > b_page ) ;
p + = ( byte_offset - 1 ) & ( PAGE_CACHE_SIZE - 1 ) ;
copy_size = ih_item_len ( ih ) - pos_in_item ;
fs_gen = get_generation ( inode - > i_sb ) ;
copy_item_head ( & tmp_ih , ih ) ;
if ( ! trans_running ) {
/* vs-3050 is gone, no need to drop the path */
retval = journal_begin ( & th , inode - > i_sb , jbegin_count ) ;
if ( retval )
goto out ;
reiserfs_update_inode_transaction ( inode ) ;
trans_running = 1 ;
if ( fs_changed ( fs_gen , inode - > i_sb )
& & item_moved ( & tmp_ih , & path ) ) {
reiserfs_restore_prepared_buffer ( inode - > i_sb ,
bh ) ;
goto research ;
}
}
reiserfs_prepare_for_journal ( inode - > i_sb , bh , 1 ) ;
if ( fs_changed ( fs_gen , inode - > i_sb )
& & item_moved ( & tmp_ih , & path ) ) {
reiserfs_restore_prepared_buffer ( inode - > i_sb , bh ) ;
goto research ;
}
memcpy ( B_I_PITEM ( bh , ih ) + pos_in_item , p + bytes_copied ,
copy_size ) ;
journal_mark_dirty ( & th , inode - > i_sb , bh ) ;
bytes_copied + = copy_size ;
set_block_dev_mapped ( bh_result , 0 , inode ) ;
/* are there still bytes left? */
if ( bytes_copied < bh_result - > b_size & &
( byte_offset + bytes_copied ) < inode - > i_size ) {
set_cpu_key_k_offset ( & key ,
cpu_key_k_offset ( & key ) +
copy_size ) ;
goto research ;
}
} else {
2009-03-30 22:02:21 +04:00
reiserfs_warning ( inode - > i_sb , " clm-6003 " ,
" bad item inode %lu " , inode - > i_ino ) ;
2005-07-13 07:21:28 +04:00
retval = - EIO ;
goto out ;
}
retval = 0 ;
out :
pathrelse ( & path ) ;
if ( trans_running ) {
int err = journal_end ( & th , inode - > i_sb , jbegin_count ) ;
if ( err )
retval = err ;
trans_running = 0 ;
}
reiserfs_write_unlock ( inode - > i_sb ) ;
/* this is where we fill in holes in the file. */
if ( use_get_block ) {
retval = reiserfs_get_block ( inode , block , bh_result ,
2006-01-10 02:59:24 +03:00
GET_BLOCK_CREATE | GET_BLOCK_NO_IMUX
2005-07-13 07:21:28 +04:00
| GET_BLOCK_NO_DANGLE ) ;
if ( ! retval ) {
if ( ! buffer_mapped ( bh_result )
| | bh_result - > b_blocknr = = 0 ) {
/* get_block failed to find a mapped unformatted node. */
use_get_block = 0 ;
goto start_over ;
}
}
}
kunmap ( bh_result - > b_page ) ;
if ( ! retval & & buffer_mapped ( bh_result ) & & bh_result - > b_blocknr = = 0 ) {
/* we've copied data from the page into the direct item, so the
* buffer in the page is now clean , mark it to reflect that .
*/
lock_buffer ( bh_result ) ;
clear_buffer_dirty ( bh_result ) ;
unlock_buffer ( bh_result ) ;
}
return retval ;
2005-04-17 02:20:36 +04:00
}
2009-03-30 22:02:44 +04:00
/*
* mason @ suse . com : updated in 2.5 .54 to follow the same general io
2005-04-17 02:20:36 +04:00
* start / recovery path as __block_write_full_page , along with special
* code to handle reiserfs tails .
*/
2005-07-13 07:21:28 +04:00
static int reiserfs_write_full_page ( struct page * page ,
struct writeback_control * wbc )
{
struct inode * inode = page - > mapping - > host ;
unsigned long end_index = inode - > i_size > > PAGE_CACHE_SHIFT ;
int error = 0 ;
unsigned long block ;
2006-08-05 23:15:10 +04:00
sector_t last_block ;
2005-07-13 07:21:28 +04:00
struct buffer_head * head , * bh ;
int partial = 0 ;
int nr = 0 ;
int checked = PageChecked ( page ) ;
struct reiserfs_transaction_handle th ;
struct super_block * s = inode - > i_sb ;
int bh_per_page = PAGE_CACHE_SIZE / s - > s_blocksize ;
th . t_trans_id = 0 ;
2006-02-01 14:06:49 +03:00
/* no logging allowed when nonblocking or from PF_MEMALLOC */
if ( checked & & ( current - > flags & PF_MEMALLOC ) ) {
redirty_page_for_writepage ( wbc , page ) ;
unlock_page ( page ) ;
return 0 ;
}
2005-07-13 07:21:28 +04:00
/* The page dirty bit is cleared before writepage is called, which
* means we have to tell create_empty_buffers to make dirty buffers
* The page really should be up to date at this point , so tossing
* in the BH_Uptodate is just a sanity check .
*/
if ( ! page_has_buffers ( page ) ) {
create_empty_buffers ( page , s - > s_blocksize ,
( 1 < < BH_Dirty ) | ( 1 < < BH_Uptodate ) ) ;
}
head = page_buffers ( page ) ;
2005-04-17 02:20:36 +04:00
2005-07-13 07:21:28 +04:00
/* last page in the file, zero out any contents past the
* * last byte in the file
*/
if ( page - > index > = end_index ) {
unsigned last_offset ;
last_offset = inode - > i_size & ( PAGE_CACHE_SIZE - 1 ) ;
/* no file contents in this page */
if ( page - > index > = end_index + 1 | | ! last_offset ) {
unlock_page ( page ) ;
return 0 ;
}
2008-02-05 09:28:29 +03:00
zero_user_segment ( page , last_offset , PAGE_CACHE_SIZE ) ;
2005-04-17 02:20:36 +04:00
}
2005-07-13 07:21:28 +04:00
bh = head ;
block = page - > index < < ( PAGE_CACHE_SHIFT - s - > s_blocksize_bits ) ;
2006-08-05 23:15:10 +04:00
last_block = ( i_size_read ( inode ) - 1 ) > > inode - > i_blkbits ;
2005-07-13 07:21:28 +04:00
/* first map all the buffers, logging any direct items we find */
do {
2006-08-05 23:15:10 +04:00
if ( block > last_block ) {
/*
* This can happen when the block size is less than
* the page size . The corresponding bytes in the page
* were zero filled above
*/
clear_buffer_dirty ( bh ) ;
set_buffer_uptodate ( bh ) ;
} else if ( ( checked | | buffer_dirty ( bh ) ) & &
( ! buffer_mapped ( bh ) | | ( buffer_mapped ( bh )
2005-07-13 07:21:28 +04:00
& & bh - > b_blocknr = =
0 ) ) ) {
/* not mapped yet, or it points to a direct item, search
* the btree for the mapping info , and log any direct
* items found
*/
if ( ( error = map_block_for_writepage ( inode , bh , block ) ) ) {
goto fail ;
}
}
bh = bh - > b_this_page ;
block + + ;
} while ( bh ! = head ) ;
/*
* we start the transaction after map_block_for_writepage ,
* because it can create holes in the file ( an unbounded operation ) .
* starting it here , we can make a reliable estimate for how many
* blocks we ' re going to log
2005-04-17 02:20:36 +04:00
*/
2005-07-13 07:21:28 +04:00
if ( checked ) {
ClearPageChecked ( page ) ;
reiserfs_write_lock ( s ) ;
error = journal_begin ( & th , s , bh_per_page + 1 ) ;
if ( error ) {
reiserfs_write_unlock ( s ) ;
goto fail ;
}
reiserfs_update_inode_transaction ( inode ) ;
2005-04-17 02:20:36 +04:00
}
2005-07-13 07:21:28 +04:00
/* now go through and lock any dirty buffers on the page */
do {
get_bh ( bh ) ;
if ( ! buffer_mapped ( bh ) )
continue ;
if ( buffer_mapped ( bh ) & & bh - > b_blocknr = = 0 )
continue ;
if ( checked ) {
reiserfs_prepare_for_journal ( s , bh , 1 ) ;
journal_mark_dirty ( & th , s , bh ) ;
continue ;
}
/* from this point on, we know the buffer is mapped to a
* real block and not a direct item
*/
if ( wbc - > sync_mode ! = WB_SYNC_NONE | | ! wbc - > nonblocking ) {
lock_buffer ( bh ) ;
} else {
2008-08-02 14:02:13 +04:00
if ( ! trylock_buffer ( bh ) ) {
2005-07-13 07:21:28 +04:00
redirty_page_for_writepage ( wbc , page ) ;
continue ;
}
}
if ( test_clear_buffer_dirty ( bh ) ) {
mark_buffer_async_write ( bh ) ;
} else {
unlock_buffer ( bh ) ;
}
} while ( ( bh = bh - > b_this_page ) ! = head ) ;
if ( checked ) {
error = journal_end ( & th , s , bh_per_page + 1 ) ;
reiserfs_write_unlock ( s ) ;
if ( error )
goto fail ;
2005-04-17 02:20:36 +04:00
}
2005-07-13 07:21:28 +04:00
BUG_ON ( PageWriteback ( page ) ) ;
set_page_writeback ( page ) ;
unlock_page ( page ) ;
2005-04-17 02:20:36 +04:00
2005-07-13 07:21:28 +04:00
/*
2009-03-30 22:02:44 +04:00
* since any buffer might be the only dirty buffer on the page ,
2005-07-13 07:21:28 +04:00
* the first submit_bh can bring the page out of writeback .
* be careful with the buffers .
2005-04-17 02:20:36 +04:00
*/
do {
2005-07-13 07:21:28 +04:00
struct buffer_head * next = bh - > b_this_page ;
if ( buffer_async_write ( bh ) ) {
submit_bh ( WRITE , bh ) ;
nr + + ;
}
put_bh ( bh ) ;
bh = next ;
} while ( bh ! = head ) ;
2005-04-17 02:20:36 +04:00
2005-07-13 07:21:28 +04:00
error = 0 ;
done :
if ( nr = = 0 ) {
/*
* if this page only had a direct item , it is very possible for
2009-03-30 22:02:44 +04:00
* no io to be required without there being an error . Or ,
* someone else could have locked them and sent them down the
2005-07-13 07:21:28 +04:00
* pipe without locking the page
*/
bh = head ;
do {
if ( ! buffer_uptodate ( bh ) ) {
partial = 1 ;
break ;
}
bh = bh - > b_this_page ;
} while ( bh ! = head ) ;
if ( ! partial )
SetPageUptodate ( page ) ;
end_page_writeback ( page ) ;
}
return error ;
2005-04-17 02:20:36 +04:00
2005-07-13 07:21:28 +04:00
fail :
/* catches various errors, we need to make sure any valid dirty blocks
2009-03-30 22:02:44 +04:00
* get to the media . The page is currently locked and not marked for
2005-07-13 07:21:28 +04:00
* writeback
*/
ClearPageUptodate ( page ) ;
bh = head ;
do {
get_bh ( bh ) ;
if ( buffer_mapped ( bh ) & & buffer_dirty ( bh ) & & bh - > b_blocknr ) {
lock_buffer ( bh ) ;
mark_buffer_async_write ( bh ) ;
} else {
/*
* clear any dirty bits that might have come from getting
* attached to a dirty page
*/
clear_buffer_dirty ( bh ) ;
}
bh = bh - > b_this_page ;
} while ( bh ! = head ) ;
SetPageError ( page ) ;
BUG_ON ( PageWriteback ( page ) ) ;
set_page_writeback ( page ) ;
unlock_page ( page ) ;
do {
struct buffer_head * next = bh - > b_this_page ;
if ( buffer_async_write ( bh ) ) {
clear_buffer_dirty ( bh ) ;
submit_bh ( WRITE , bh ) ;
nr + + ;
}
put_bh ( bh ) ;
bh = next ;
} while ( bh ! = head ) ;
goto done ;
2005-04-17 02:20:36 +04:00
}
2005-07-13 07:21:28 +04:00
static int reiserfs_readpage ( struct file * f , struct page * page )
{
return block_read_full_page ( page , reiserfs_get_block ) ;
}
2005-04-17 02:20:36 +04:00
2005-07-13 07:21:28 +04:00
static int reiserfs_writepage ( struct page * page , struct writeback_control * wbc )
2005-04-17 02:20:36 +04:00
{
2005-07-13 07:21:28 +04:00
struct inode * inode = page - > mapping - > host ;
reiserfs_wait_on_write_block ( inode - > i_sb ) ;
return reiserfs_write_full_page ( page , wbc ) ;
2005-04-17 02:20:36 +04:00
}
2007-10-16 12:25:14 +04:00
static int reiserfs_write_begin ( struct file * file ,
struct address_space * mapping ,
loff_t pos , unsigned len , unsigned flags ,
struct page * * pagep , void * * fsdata )
{
struct inode * inode ;
struct page * page ;
pgoff_t index ;
int ret ;
int old_ref = 0 ;
2007-10-16 12:25:14 +04:00
inode = mapping - > host ;
* fsdata = 0 ;
if ( flags & AOP_FLAG_CONT_EXPAND & &
( pos & ( inode - > i_sb - > s_blocksize - 1 ) ) = = 0 ) {
pos + + ;
* fsdata = ( void * ) ( unsigned long ) flags ;
}
2007-10-16 12:25:14 +04:00
index = pos > > PAGE_CACHE_SHIFT ;
fs: symlink write_begin allocation context fix
With the write_begin/write_end aops, page_symlink was broken because it
could no longer pass a GFP_NOFS type mask into the point where the
allocations happened. They are done in write_begin, which would always
assume that the filesystem can be entered from reclaim. This bug could
cause filesystem deadlocks.
The funny thing with having a gfp_t mask there is that it doesn't really
allow the caller to arbitrarily tinker with the context in which it can be
called. It couldn't ever be GFP_ATOMIC, for example, because it needs to
take the page lock. The only thing any callers care about is __GFP_FS
anyway, so turn that into a single flag.
Add a new flag for write_begin, AOP_FLAG_NOFS. Filesystems can now act on
this flag in their write_begin function. Change __grab_cache_page to
accept a nofs argument as well, to honour that flag (while we're there,
change the name to grab_cache_page_write_begin which is more instructive
and does away with random leading underscores).
This is really a more flexible way to go in the end anyway -- if a
filesystem happens to want any extra allocations aside from the pagecache
ones in ints write_begin function, it may now use GFP_KERNEL (rather than
GFP_NOFS) for common case allocations (eg. ocfs2_alloc_write_ctxt, for a
random example).
[kosaki.motohiro@jp.fujitsu.com: fix ubifs]
[kosaki.motohiro@jp.fujitsu.com: fix fuse]
Signed-off-by: Nick Piggin <npiggin@suse.de>
Reviewed-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: <stable@kernel.org> [2.6.28.x]
Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
[ Cleaned up the calling convention: just pass in the AOP flags
untouched to the grab_cache_page_write_begin() function. That
just simplifies everybody, and may even allow future expansion of the
logic. - Linus ]
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2009-01-04 23:00:53 +03:00
page = grab_cache_page_write_begin ( mapping , index , flags ) ;
2007-10-16 12:25:14 +04:00
if ( ! page )
return - ENOMEM ;
* pagep = page ;
reiserfs_wait_on_write_block ( inode - > i_sb ) ;
fix_tail_page_for_writing ( page ) ;
if ( reiserfs_transaction_running ( inode - > i_sb ) ) {
struct reiserfs_transaction_handle * th ;
th = ( struct reiserfs_transaction_handle * ) current - >
journal_info ;
BUG_ON ( ! th - > t_refcount ) ;
BUG_ON ( ! th - > t_trans_id ) ;
old_ref = th - > t_refcount ;
th - > t_refcount + + ;
}
ret = block_write_begin ( file , mapping , pos , len , flags , pagep , fsdata ,
reiserfs_get_block ) ;
if ( ret & & reiserfs_transaction_running ( inode - > i_sb ) ) {
struct reiserfs_transaction_handle * th = current - > journal_info ;
/* this gets a little ugly. If reiserfs_get_block returned an
* error and left a transacstion running , we ' ve got to close it ,
* and we ' ve got to free handle if it was a persistent transaction .
*
* But , if we had nested into an existing transaction , we need
* to just drop the ref count on the handle .
*
* If old_ref = = 0 , the transaction is from reiserfs_get_block ,
* and it was a persistent trans . Otherwise , it was nested above .
*/
if ( th - > t_refcount > old_ref ) {
if ( old_ref )
th - > t_refcount - - ;
else {
int err ;
reiserfs_write_lock ( inode - > i_sb ) ;
err = reiserfs_end_persistent_transaction ( th ) ;
reiserfs_write_unlock ( inode - > i_sb ) ;
if ( err )
ret = err ;
}
}
}
if ( ret ) {
unlock_page ( page ) ;
page_cache_release ( page ) ;
}
return ret ;
}
int reiserfs_prepare_write ( struct file * f , struct page * page ,
unsigned from , unsigned to )
2005-07-13 07:21:28 +04:00
{
struct inode * inode = page - > mapping - > host ;
int ret ;
int old_ref = 0 ;
reiserfs_wait_on_write_block ( inode - > i_sb ) ;
fix_tail_page_for_writing ( page ) ;
if ( reiserfs_transaction_running ( inode - > i_sb ) ) {
struct reiserfs_transaction_handle * th ;
th = ( struct reiserfs_transaction_handle * ) current - >
journal_info ;
BUG_ON ( ! th - > t_refcount ) ;
BUG_ON ( ! th - > t_trans_id ) ;
old_ref = th - > t_refcount ;
th - > t_refcount + + ;
2005-04-17 02:20:36 +04:00
}
2005-07-13 07:21:28 +04:00
ret = block_prepare_write ( page , from , to , reiserfs_get_block ) ;
if ( ret & & reiserfs_transaction_running ( inode - > i_sb ) ) {
struct reiserfs_transaction_handle * th = current - > journal_info ;
/* this gets a little ugly. If reiserfs_get_block returned an
* error and left a transacstion running , we ' ve got to close it ,
* and we ' ve got to free handle if it was a persistent transaction .
*
* But , if we had nested into an existing transaction , we need
* to just drop the ref count on the handle .
*
* If old_ref = = 0 , the transaction is from reiserfs_get_block ,
* and it was a persistent trans . Otherwise , it was nested above .
*/
if ( th - > t_refcount > old_ref ) {
if ( old_ref )
th - > t_refcount - - ;
else {
int err ;
reiserfs_write_lock ( inode - > i_sb ) ;
err = reiserfs_end_persistent_transaction ( th ) ;
reiserfs_write_unlock ( inode - > i_sb ) ;
if ( err )
ret = err ;
}
}
}
return ret ;
2005-04-17 02:20:36 +04:00
2005-07-13 07:21:28 +04:00
}
2005-04-17 02:20:36 +04:00
2005-07-13 07:21:28 +04:00
static sector_t reiserfs_aop_bmap ( struct address_space * as , sector_t block )
{
return generic_block_bmap ( as , block , reiserfs_bmap ) ;
2005-04-17 02:20:36 +04:00
}
2007-10-16 12:25:14 +04:00
static int reiserfs_write_end ( struct file * file , struct address_space * mapping ,
loff_t pos , unsigned len , unsigned copied ,
struct page * page , void * fsdata )
{
struct inode * inode = page - > mapping - > host ;
int ret = 0 ;
int update_sd = 0 ;
struct reiserfs_transaction_handle * th ;
unsigned start ;
2007-10-16 12:25:14 +04:00
if ( ( unsigned long ) fsdata & AOP_FLAG_CONT_EXPAND )
pos + + ;
2007-10-16 12:25:14 +04:00
reiserfs_wait_on_write_block ( inode - > i_sb ) ;
if ( reiserfs_transaction_running ( inode - > i_sb ) )
th = current - > journal_info ;
else
th = NULL ;
start = pos & ( PAGE_CACHE_SIZE - 1 ) ;
if ( unlikely ( copied < len ) ) {
if ( ! PageUptodate ( page ) )
copied = 0 ;
page_zero_new_buffers ( page , start + copied , start + len ) ;
}
flush_dcache_page ( page ) ;
reiserfs_commit_page ( inode , page , start , start + copied ) ;
/* generic_commit_write does this for us, but does not update the
* * transaction tracking stuff when the size changes . So , we have
* * to do the i_size updates here .
*/
pos + = copied ;
if ( pos > inode - > i_size ) {
struct reiserfs_transaction_handle myth ;
reiserfs_write_lock ( inode - > i_sb ) ;
/* If the file have grown beyond the border where it
can have a tail , unmark it as needing a tail
packing */
if ( ( have_large_tails ( inode - > i_sb )
& & inode - > i_size > i_block_size ( inode ) * 4 )
| | ( have_small_tails ( inode - > i_sb )
& & inode - > i_size > i_block_size ( inode ) ) )
REISERFS_I ( inode ) - > i_flags & = ~ i_pack_on_close_mask ;
ret = journal_begin ( & myth , inode - > i_sb , 1 ) ;
if ( ret ) {
reiserfs_write_unlock ( inode - > i_sb ) ;
goto journal_error ;
}
reiserfs_update_inode_transaction ( inode ) ;
inode - > i_size = pos ;
/*
* this will just nest into our transaction . It ' s important
* to use mark_inode_dirty so the inode gets pushed around on the
* dirty lists , and so that O_SYNC works as expected
*/
mark_inode_dirty ( inode ) ;
reiserfs_update_sd ( & myth , inode ) ;
update_sd = 1 ;
ret = journal_end ( & myth , inode - > i_sb , 1 ) ;
reiserfs_write_unlock ( inode - > i_sb ) ;
if ( ret )
goto journal_error ;
}
if ( th ) {
reiserfs_write_lock ( inode - > i_sb ) ;
if ( ! update_sd )
mark_inode_dirty ( inode ) ;
ret = reiserfs_end_persistent_transaction ( th ) ;
reiserfs_write_unlock ( inode - > i_sb ) ;
if ( ret )
goto out ;
}
out :
unlock_page ( page ) ;
page_cache_release ( page ) ;
return ret = = 0 ? copied : ret ;
journal_error :
if ( th ) {
reiserfs_write_lock ( inode - > i_sb ) ;
if ( ! update_sd )
reiserfs_update_sd ( th , inode ) ;
ret = reiserfs_end_persistent_transaction ( th ) ;
reiserfs_write_unlock ( inode - > i_sb ) ;
}
goto out ;
}
int reiserfs_commit_write ( struct file * f , struct page * page ,
unsigned from , unsigned to )
2005-07-13 07:21:28 +04:00
{
struct inode * inode = page - > mapping - > host ;
loff_t pos = ( ( loff_t ) page - > index < < PAGE_CACHE_SHIFT ) + to ;
int ret = 0 ;
int update_sd = 0 ;
struct reiserfs_transaction_handle * th = NULL ;
reiserfs_wait_on_write_block ( inode - > i_sb ) ;
if ( reiserfs_transaction_running ( inode - > i_sb ) ) {
th = current - > journal_info ;
}
reiserfs_commit_page ( inode , page , from , to ) ;
2005-04-17 02:20:36 +04:00
2005-07-13 07:21:28 +04:00
/* generic_commit_write does this for us, but does not update the
* * transaction tracking stuff when the size changes . So , we have
* * to do the i_size updates here .
*/
if ( pos > inode - > i_size ) {
struct reiserfs_transaction_handle myth ;
reiserfs_write_lock ( inode - > i_sb ) ;
/* If the file have grown beyond the border where it
can have a tail , unmark it as needing a tail
packing */
if ( ( have_large_tails ( inode - > i_sb )
& & inode - > i_size > i_block_size ( inode ) * 4 )
| | ( have_small_tails ( inode - > i_sb )
& & inode - > i_size > i_block_size ( inode ) ) )
REISERFS_I ( inode ) - > i_flags & = ~ i_pack_on_close_mask ;
ret = journal_begin ( & myth , inode - > i_sb , 1 ) ;
if ( ret ) {
reiserfs_write_unlock ( inode - > i_sb ) ;
goto journal_error ;
}
reiserfs_update_inode_transaction ( inode ) ;
inode - > i_size = pos ;
2005-09-13 12:25:17 +04:00
/*
* this will just nest into our transaction . It ' s important
* to use mark_inode_dirty so the inode gets pushed around on the
* dirty lists , and so that O_SYNC works as expected
*/
mark_inode_dirty ( inode ) ;
2005-07-13 07:21:28 +04:00
reiserfs_update_sd ( & myth , inode ) ;
update_sd = 1 ;
ret = journal_end ( & myth , inode - > i_sb , 1 ) ;
reiserfs_write_unlock ( inode - > i_sb ) ;
if ( ret )
goto journal_error ;
}
if ( th ) {
reiserfs_write_lock ( inode - > i_sb ) ;
if ( ! update_sd )
2005-09-13 12:25:17 +04:00
mark_inode_dirty ( inode ) ;
2005-07-13 07:21:28 +04:00
ret = reiserfs_end_persistent_transaction ( th ) ;
reiserfs_write_unlock ( inode - > i_sb ) ;
if ( ret )
goto out ;
}
out :
return ret ;
2005-04-17 02:20:36 +04:00
2005-07-13 07:21:28 +04:00
journal_error :
if ( th ) {
reiserfs_write_lock ( inode - > i_sb ) ;
if ( ! update_sd )
reiserfs_update_sd ( th , inode ) ;
ret = reiserfs_end_persistent_transaction ( th ) ;
reiserfs_write_unlock ( inode - > i_sb ) ;
}
return ret ;
2005-04-17 02:20:36 +04:00
}
2005-07-13 07:21:28 +04:00
void sd_attrs_to_i_attrs ( __u16 sd_attrs , struct inode * inode )
2005-04-17 02:20:36 +04:00
{
2005-07-13 07:21:28 +04:00
if ( reiserfs_attrs ( inode - > i_sb ) ) {
if ( sd_attrs & REISERFS_SYNC_FL )
inode - > i_flags | = S_SYNC ;
2005-04-17 02:20:36 +04:00
else
2005-07-13 07:21:28 +04:00
inode - > i_flags & = ~ S_SYNC ;
if ( sd_attrs & REISERFS_IMMUTABLE_FL )
inode - > i_flags | = S_IMMUTABLE ;
2005-04-17 02:20:36 +04:00
else
2005-07-13 07:21:28 +04:00
inode - > i_flags & = ~ S_IMMUTABLE ;
if ( sd_attrs & REISERFS_APPEND_FL )
inode - > i_flags | = S_APPEND ;
2005-04-17 02:20:36 +04:00
else
2005-07-13 07:21:28 +04:00
inode - > i_flags & = ~ S_APPEND ;
if ( sd_attrs & REISERFS_NOATIME_FL )
inode - > i_flags | = S_NOATIME ;
2005-04-17 02:20:36 +04:00
else
2005-07-13 07:21:28 +04:00
inode - > i_flags & = ~ S_NOATIME ;
if ( sd_attrs & REISERFS_NOTAIL_FL )
2005-04-17 02:20:36 +04:00
REISERFS_I ( inode ) - > i_flags | = i_nopack_mask ;
else
REISERFS_I ( inode ) - > i_flags & = ~ i_nopack_mask ;
}
}
2005-07-13 07:21:28 +04:00
void i_attrs_to_sd_attrs ( struct inode * inode , __u16 * sd_attrs )
2005-04-17 02:20:36 +04:00
{
2005-07-13 07:21:28 +04:00
if ( reiserfs_attrs ( inode - > i_sb ) ) {
if ( inode - > i_flags & S_IMMUTABLE )
2005-04-17 02:20:36 +04:00
* sd_attrs | = REISERFS_IMMUTABLE_FL ;
else
* sd_attrs & = ~ REISERFS_IMMUTABLE_FL ;
2005-07-13 07:21:28 +04:00
if ( inode - > i_flags & S_SYNC )
2005-04-17 02:20:36 +04:00
* sd_attrs | = REISERFS_SYNC_FL ;
else
* sd_attrs & = ~ REISERFS_SYNC_FL ;
2005-07-13 07:21:28 +04:00
if ( inode - > i_flags & S_NOATIME )
2005-04-17 02:20:36 +04:00
* sd_attrs | = REISERFS_NOATIME_FL ;
else
* sd_attrs & = ~ REISERFS_NOATIME_FL ;
2005-07-13 07:21:28 +04:00
if ( REISERFS_I ( inode ) - > i_flags & i_nopack_mask )
2005-04-17 02:20:36 +04:00
* sd_attrs | = REISERFS_NOTAIL_FL ;
else
* sd_attrs & = ~ REISERFS_NOTAIL_FL ;
}
}
/* decide if this buffer needs to stay around for data logging or ordered
* * write purposes
*/
static int invalidatepage_can_drop ( struct inode * inode , struct buffer_head * bh )
{
2005-07-13 07:21:28 +04:00
int ret = 1 ;
struct reiserfs_journal * j = SB_JOURNAL ( inode - > i_sb ) ;
2006-02-01 14:06:47 +03:00
lock_buffer ( bh ) ;
2005-07-13 07:21:28 +04:00
spin_lock ( & j - > j_dirty_buffers_lock ) ;
if ( ! buffer_mapped ( bh ) ) {
goto free_jh ;
}
/* the page is locked, and the only places that log a data buffer
* also lock the page .
2005-04-17 02:20:36 +04:00
*/
2005-07-13 07:21:28 +04:00
if ( reiserfs_file_data_log ( inode ) ) {
/*
* very conservative , leave the buffer pinned if
* anyone might need it .
*/
if ( buffer_journaled ( bh ) | | buffer_journal_dirty ( bh ) ) {
ret = 0 ;
}
2006-02-01 14:06:47 +03:00
} else if ( buffer_dirty ( bh ) ) {
2005-07-13 07:21:28 +04:00
struct reiserfs_journal_list * jl ;
struct reiserfs_jh * jh = bh - > b_private ;
/* why is this safe?
* reiserfs_setattr updates i_size in the on disk
* stat data before allowing vmtruncate to be called .
*
* If buffer was put onto the ordered list for this
* transaction , we know for sure either this transaction
* or an older one already has updated i_size on disk ,
* and this ordered data won ' t be referenced in the file
* if we crash .
*
* if the buffer was put onto the ordered list for an older
* transaction , we need to leave it around
*/
if ( jh & & ( jl = jh - > jl )
& & jl ! = SB_JOURNAL ( inode - > i_sb ) - > j_current_jl )
ret = 0 ;
}
free_jh :
if ( ret & & bh - > b_private ) {
reiserfs_free_jh ( bh ) ;
}
spin_unlock ( & j - > j_dirty_buffers_lock ) ;
2006-02-01 14:06:47 +03:00
unlock_buffer ( bh ) ;
2005-07-13 07:21:28 +04:00
return ret ;
2005-04-17 02:20:36 +04:00
}
/* clm -- taken from fs/buffer.c:block_invalidate_page */
2006-03-26 13:37:18 +04:00
static void reiserfs_invalidatepage ( struct page * page , unsigned long offset )
2005-04-17 02:20:36 +04:00
{
2005-07-13 07:21:28 +04:00
struct buffer_head * head , * bh , * next ;
struct inode * inode = page - > mapping - > host ;
unsigned int curr_off = 0 ;
int ret = 1 ;
2005-04-17 02:20:36 +04:00
2005-07-13 07:21:28 +04:00
BUG_ON ( ! PageLocked ( page ) ) ;
2005-04-17 02:20:36 +04:00
2005-07-13 07:21:28 +04:00
if ( offset = = 0 )
ClearPageChecked ( page ) ;
2005-04-17 02:20:36 +04:00
2005-07-13 07:21:28 +04:00
if ( ! page_has_buffers ( page ) )
goto out ;
head = page_buffers ( page ) ;
bh = head ;
do {
unsigned int next_off = curr_off + bh - > b_size ;
next = bh - > b_this_page ;
2005-04-17 02:20:36 +04:00
2005-07-13 07:21:28 +04:00
/*
* is this block fully invalidated ?
*/
if ( offset < = curr_off ) {
if ( invalidatepage_can_drop ( inode , bh ) )
reiserfs_unmap_buffer ( bh ) ;
else
ret = 0 ;
}
curr_off = next_off ;
bh = next ;
} while ( bh ! = head ) ;
2005-04-17 02:20:36 +04:00
/*
2005-07-13 07:21:28 +04:00
* We release buffers only if the entire page is being invalidated .
* The get_block cached value has been unconditionally invalidated ,
* so real IO is not possible anymore .
2005-04-17 02:20:36 +04:00
*/
2006-03-26 13:37:18 +04:00
if ( ! offset & & ret ) {
2005-07-13 07:21:28 +04:00
ret = try_to_release_page ( page , 0 ) ;
2006-03-26 13:37:18 +04:00
/* maybe should BUG_ON(!ret); - neilb */
}
2005-07-13 07:21:28 +04:00
out :
2006-03-26 13:37:18 +04:00
return ;
2005-04-17 02:20:36 +04:00
}
2005-07-13 07:21:28 +04:00
static int reiserfs_set_page_dirty ( struct page * page )
{
struct inode * inode = page - > mapping - > host ;
if ( reiserfs_file_data_log ( inode ) ) {
SetPageChecked ( page ) ;
return __set_page_dirty_nobuffers ( page ) ;
}
return __set_page_dirty_buffers ( page ) ;
2005-04-17 02:20:36 +04:00
}
/*
* Returns 1 if the page ' s buffers were dropped . The page is locked .
*
* Takes j_dirty_buffers_lock to protect the b_assoc_buffers list_heads
* in the buffers at page_buffers ( page ) .
*
* even in - o notail mode , we can ' t be sure an old mount without - o notail
* didn ' t create files with tails .
*/
2005-10-21 11:20:48 +04:00
static int reiserfs_releasepage ( struct page * page , gfp_t unused_gfp_flags )
2005-04-17 02:20:36 +04:00
{
2005-07-13 07:21:28 +04:00
struct inode * inode = page - > mapping - > host ;
struct reiserfs_journal * j = SB_JOURNAL ( inode - > i_sb ) ;
struct buffer_head * head ;
struct buffer_head * bh ;
int ret = 1 ;
WARN_ON ( PageChecked ( page ) ) ;
spin_lock ( & j - > j_dirty_buffers_lock ) ;
head = page_buffers ( page ) ;
bh = head ;
do {
if ( bh - > b_private ) {
if ( ! buffer_dirty ( bh ) & & ! buffer_locked ( bh ) ) {
reiserfs_free_jh ( bh ) ;
} else {
ret = 0 ;
break ;
}
}
bh = bh - > b_this_page ;
} while ( bh ! = head ) ;
if ( ret )
ret = try_to_free_buffers ( page ) ;
spin_unlock ( & j - > j_dirty_buffers_lock ) ;
return ret ;
2005-04-17 02:20:36 +04:00
}
/* We thank Mingming Cao for helping us understand in great detail what
to do in this section of the code . */
static ssize_t reiserfs_direct_IO ( int rw , struct kiocb * iocb ,
2005-07-13 07:21:28 +04:00
const struct iovec * iov , loff_t offset ,
unsigned long nr_segs )
2005-04-17 02:20:36 +04:00
{
2005-07-13 07:21:28 +04:00
struct file * file = iocb - > ki_filp ;
struct inode * inode = file - > f_mapping - > host ;
2005-04-17 02:20:36 +04:00
2005-07-13 07:21:28 +04:00
return blockdev_direct_IO ( rw , iocb , inode , inode - > i_sb - > s_bdev , iov ,
offset , nr_segs ,
reiserfs_get_blocks_direct_io , NULL ) ;
2005-04-17 02:20:36 +04:00
}
2005-07-13 07:21:28 +04:00
int reiserfs_setattr ( struct dentry * dentry , struct iattr * attr )
{
struct inode * inode = dentry - > d_inode ;
int error ;
2007-10-18 14:05:19 +04:00
unsigned int ia_valid ;
/* must be turned off for recursive notify_change calls */
ia_valid = attr - > ia_valid & = ~ ( ATTR_KILL_SUID | ATTR_KILL_SGID ) ;
2005-07-13 07:21:28 +04:00
reiserfs_write_lock ( inode - > i_sb ) ;
if ( attr - > ia_valid & ATTR_SIZE ) {
/* version 2 items will be caught by the s_maxbytes check
* * done for us in vmtruncate
*/
if ( get_inode_item_key_version ( inode ) = = KEY_FORMAT_3_5 & &
attr - > ia_size > MAX_NON_LFS ) {
error = - EFBIG ;
goto out ;
}
/* fill in hole pointers in the expanding truncate case. */
if ( attr - > ia_size > inode - > i_size ) {
2007-10-16 12:25:14 +04:00
error = generic_cont_expand_simple ( inode , attr - > ia_size ) ;
2005-07-13 07:21:28 +04:00
if ( REISERFS_I ( inode ) - > i_prealloc_count > 0 ) {
int err ;
struct reiserfs_transaction_handle th ;
/* we're changing at most 2 bitmaps, inode + super */
err = journal_begin ( & th , inode - > i_sb , 4 ) ;
if ( ! err ) {
reiserfs_discard_prealloc ( & th , inode ) ;
err = journal_end ( & th , inode - > i_sb , 4 ) ;
}
if ( err )
error = err ;
}
if ( error )
goto out ;
2006-07-01 15:36:32 +04:00
/*
* file size is changed , ctime and mtime are
* to be updated
*/
attr - > ia_valid | = ( ATTR_MTIME | ATTR_CTIME ) ;
2005-04-17 02:20:36 +04:00
}
}
2005-07-13 07:21:28 +04:00
if ( ( ( ( attr - > ia_valid & ATTR_UID ) & & ( attr - > ia_uid & ~ 0xffff ) ) | |
( ( attr - > ia_valid & ATTR_GID ) & & ( attr - > ia_gid & ~ 0xffff ) ) ) & &
( get_inode_sd_version ( inode ) = = STAT_DATA_V1 ) ) {
2005-04-17 02:20:36 +04:00
/* stat data of format v3.5 has 16 bit uid and gid */
2005-07-13 07:21:28 +04:00
error = - EINVAL ;
goto out ;
}
2005-04-17 02:20:36 +04:00
2005-07-13 07:21:28 +04:00
error = inode_change_ok ( inode , attr ) ;
if ( ! error ) {
if ( ( ia_valid & ATTR_UID & & attr - > ia_uid ! = inode - > i_uid ) | |
( ia_valid & ATTR_GID & & attr - > ia_gid ! = inode - > i_gid ) ) {
error = reiserfs_chown_xattrs ( inode , attr ) ;
if ( ! error ) {
struct reiserfs_transaction_handle th ;
int jbegin_count =
2 *
( REISERFS_QUOTA_INIT_BLOCKS ( inode - > i_sb ) +
REISERFS_QUOTA_DEL_BLOCKS ( inode - > i_sb ) ) +
2 ;
/* (user+group)*(old+new) structure - we count quota info and , inode write (sb, inode) */
error =
journal_begin ( & th , inode - > i_sb ,
jbegin_count ) ;
if ( error )
goto out ;
error =
2009-01-26 19:14:18 +03:00
vfs_dq_transfer ( inode , attr ) ? - EDQUOT : 0 ;
2005-07-13 07:21:28 +04:00
if ( error ) {
journal_end ( & th , inode - > i_sb ,
jbegin_count ) ;
goto out ;
}
/* Update corresponding info in inode so that everything is in
* one transaction */
if ( attr - > ia_valid & ATTR_UID )
inode - > i_uid = attr - > ia_uid ;
if ( attr - > ia_valid & ATTR_GID )
inode - > i_gid = attr - > ia_gid ;
mark_inode_dirty ( inode ) ;
error =
journal_end ( & th , inode - > i_sb , jbegin_count ) ;
}
}
if ( ! error )
error = inode_setattr ( inode , attr ) ;
}
2005-04-17 02:20:36 +04:00
2005-07-13 07:21:28 +04:00
if ( ! error & & reiserfs_posixacl ( inode - > i_sb ) ) {
if ( attr - > ia_valid & ATTR_MODE )
error = reiserfs_acl_chmod ( inode ) ;
}
2005-04-17 02:20:36 +04:00
2005-07-13 07:21:28 +04:00
out :
reiserfs_write_unlock ( inode - > i_sb ) ;
return error ;
2005-04-17 02:20:36 +04:00
}
2006-06-28 15:26:44 +04:00
const struct address_space_operations reiserfs_address_space_operations = {
2005-07-13 07:21:28 +04:00
. writepage = reiserfs_writepage ,
. readpage = reiserfs_readpage ,
. readpages = reiserfs_readpages ,
. releasepage = reiserfs_releasepage ,
. invalidatepage = reiserfs_invalidatepage ,
. sync_page = block_sync_page ,
2007-10-16 12:25:14 +04:00
. write_begin = reiserfs_write_begin ,
. write_end = reiserfs_write_end ,
2005-07-13 07:21:28 +04:00
. bmap = reiserfs_aop_bmap ,
. direct_IO = reiserfs_direct_IO ,
. set_page_dirty = reiserfs_set_page_dirty ,
} ;