2006-10-11 12:21:03 +04:00
/*
* Copyright ( c ) 2003 - 2006 , Cluster File Systems , Inc , info @ clusterfs . com
* Written by Alex Tomas < alex @ clusterfs . com >
*
* Architecture independence :
* Copyright ( c ) 2005 , Bull S . A .
* Written by Pierre Peiffer < pierre . peiffer @ bull . net >
*
* This program is free software ; you can redistribute it and / or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation .
*
* This program is distributed in the hope that it will be useful ,
* but WITHOUT ANY WARRANTY ; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the
* GNU General Public License for more details .
*
* You should have received a copy of the GNU General Public Licens
* along with this program ; if not , write to the Free Software
* Foundation , Inc . , 59 Temple Place , Suite 330 , Boston , MA 02111 -
*/
/*
* Extents support for EXT4
*
* TODO :
* - ext4 * _error ( ) should be used in some situations
* - analyze all BUG ( ) / BUG_ON ( ) , use - EIO where appropriate
* - smart tree reduction
*/
# include <linux/module.h>
# include <linux/fs.h>
# include <linux/time.h>
2007-10-17 02:38:25 +04:00
# include <linux/jbd2.h>
2006-10-11 12:21:03 +04:00
# include <linux/highuid.h>
# include <linux/pagemap.h>
# include <linux/quotaops.h>
# include <linux/string.h>
# include <linux/slab.h>
2007-07-18 05:42:41 +04:00
# include <linux/falloc.h>
2006-10-11 12:21:03 +04:00
# include <asm/uaccess.h>
2008-10-07 08:46:36 +04:00
# include <linux/fiemap.h>
2008-04-30 02:13:32 +04:00
# include "ext4_jbd2.h"
# include "ext4_extents.h"
2006-10-11 12:21:03 +04:00
2011-03-22 04:38:05 +03:00
# include <trace/events/ext4.h>
2011-05-25 15:41:43 +04:00
static int ext4_split_extent ( handle_t * handle ,
struct inode * inode ,
struct ext4_ext_path * path ,
struct ext4_map_blocks * map ,
int split_flag ,
int flags ) ;
2009-08-18 06:17:20 +04:00
static int ext4_ext_truncate_extend_restart ( handle_t * handle ,
struct inode * inode ,
int needed )
2006-10-11 12:21:03 +04:00
{
int err ;
2009-01-07 08:06:22 +03:00
if ( ! ext4_handle_valid ( handle ) )
return 0 ;
2006-10-11 12:21:03 +04:00
if ( handle - > h_buffer_credits > needed )
2008-07-12 03:27:31 +04:00
return 0 ;
err = ext4_journal_extend ( handle , needed ) ;
2008-08-02 04:57:54 +04:00
if ( err < = 0 )
2008-07-12 03:27:31 +04:00
return err ;
2009-08-18 06:17:20 +04:00
err = ext4_truncate_restart_trans ( handle , inode , needed ) ;
2010-05-17 09:00:00 +04:00
if ( err = = 0 )
err = - EAGAIN ;
2009-08-18 06:17:20 +04:00
return err ;
2006-10-11 12:21:03 +04:00
}
/*
* could return :
* - EROFS
* - ENOMEM
*/
static int ext4_ext_get_access ( handle_t * handle , struct inode * inode ,
struct ext4_ext_path * path )
{
if ( path - > p_bh ) {
/* path points to block */
return ext4_journal_get_write_access ( handle , path - > p_bh ) ;
}
/* path points to leaf/index in inode body */
/* we use in-core data, no need to protect them */
return 0 ;
}
/*
* could return :
* - EROFS
* - ENOMEM
* - EIO
*/
2011-09-04 18:18:14 +04:00
# define ext4_ext_dirty(handle, inode, path) \
__ext4_ext_dirty ( __func__ , __LINE__ , ( handle ) , ( inode ) , ( path ) )
static int __ext4_ext_dirty ( const char * where , unsigned int line ,
handle_t * handle , struct inode * inode ,
struct ext4_ext_path * path )
2006-10-11 12:21:03 +04:00
{
int err ;
if ( path - > p_bh ) {
/* path points to block */
2011-09-04 18:18:14 +04:00
err = __ext4_handle_dirty_metadata ( where , line , handle ,
inode , path - > p_bh ) ;
2006-10-11 12:21:03 +04:00
} else {
/* path points to leaf/index in inode body */
err = ext4_mark_inode_dirty ( handle , inode ) ;
}
return err ;
}
2006-10-11 12:21:05 +04:00
static ext4_fsblk_t ext4_ext_find_goal ( struct inode * inode ,
2006-10-11 12:21:03 +04:00
struct ext4_ext_path * path ,
2008-01-29 07:58:27 +03:00
ext4_lblk_t block )
2006-10-11 12:21:03 +04:00
{
int depth ;
if ( path ) {
struct ext4_extent * ex ;
depth = path - > p_depth ;
2011-01-10 20:12:28 +03:00
/*
* Try to predict block placement assuming that we are
* filling in a file which will eventually be
* non - sparse - - - i . e . , in the case of libbfd writing
* an ELF object sections out - of - order but in a way
* the eventually results in a contiguous object or
* executable file , or some database extending a table
* space file . However , this is actually somewhat
* non - ideal if we are writing a sparse file such as
* qemu or KVM writing a raw image file that is going
* to stay fairly sparse , since it will end up
* fragmenting the file system ' s free space . Maybe we
* should have some hueristics or some way to allow
* userspace to pass a hint to file system ,
2011-01-21 18:21:31 +03:00
* especially if the latter case turns out to be
2011-01-10 20:12:28 +03:00
* common .
*/
2006-12-07 07:41:33 +03:00
ex = path [ depth ] . p_ext ;
2011-01-10 20:12:28 +03:00
if ( ex ) {
ext4_fsblk_t ext_pblk = ext4_ext_pblock ( ex ) ;
ext4_lblk_t ext_block = le32_to_cpu ( ex - > ee_block ) ;
if ( block > ext_block )
return ext_pblk + ( block - ext_block ) ;
else
return ext_pblk - ( ext_block - block ) ;
}
2006-10-11 12:21:03 +04:00
2006-10-11 12:21:07 +04:00
/* it looks like index is empty;
* try to find starting block from index itself */
2006-10-11 12:21:03 +04:00
if ( path [ depth ] . p_bh )
return path [ depth ] . p_bh - > b_blocknr ;
}
/* OK. use inode's group */
2011-06-28 18:01:31 +04:00
return ext4_inode_to_goal_block ( inode ) ;
2006-10-11 12:21:03 +04:00
}
2008-07-12 03:27:31 +04:00
/*
* Allocation for a meta data block
*/
2006-10-11 12:21:05 +04:00
static ext4_fsblk_t
2008-07-12 03:27:31 +04:00
ext4_ext_new_meta_block ( handle_t * handle , struct inode * inode ,
2006-10-11 12:21:03 +04:00
struct ext4_ext_path * path ,
2011-05-25 15:41:26 +04:00
struct ext4_extent * ex , int * err , unsigned int flags )
2006-10-11 12:21:03 +04:00
{
2006-10-11 12:21:05 +04:00
ext4_fsblk_t goal , newblock ;
2006-10-11 12:21:03 +04:00
goal = ext4_ext_find_goal ( inode , path , le32_to_cpu ( ex - > ee_block ) ) ;
2011-05-25 15:41:26 +04:00
newblock = ext4_new_meta_blocks ( handle , inode , goal , flags ,
NULL , err ) ;
2006-10-11 12:21:03 +04:00
return newblock ;
}
2009-08-28 18:40:33 +04:00
static inline int ext4_ext_space_block ( struct inode * inode , int check )
2006-10-11 12:21:03 +04:00
{
int size ;
size = ( inode - > i_sb - > s_blocksize - sizeof ( struct ext4_extent_header ) )
/ sizeof ( struct ext4_extent ) ;
2009-08-28 18:40:33 +04:00
if ( ! check ) {
2007-02-17 21:20:16 +03:00
# ifdef AGGRESSIVE_TEST
2009-08-28 18:40:33 +04:00
if ( size > 6 )
size = 6 ;
2006-10-11 12:21:03 +04:00
# endif
2009-08-28 18:40:33 +04:00
}
2006-10-11 12:21:03 +04:00
return size ;
}
2009-08-28 18:40:33 +04:00
static inline int ext4_ext_space_block_idx ( struct inode * inode , int check )
2006-10-11 12:21:03 +04:00
{
int size ;
size = ( inode - > i_sb - > s_blocksize - sizeof ( struct ext4_extent_header ) )
/ sizeof ( struct ext4_extent_idx ) ;
2009-08-28 18:40:33 +04:00
if ( ! check ) {
2007-02-17 21:20:16 +03:00
# ifdef AGGRESSIVE_TEST
2009-08-28 18:40:33 +04:00
if ( size > 5 )
size = 5 ;
2006-10-11 12:21:03 +04:00
# endif
2009-08-28 18:40:33 +04:00
}
2006-10-11 12:21:03 +04:00
return size ;
}
2009-08-28 18:40:33 +04:00
static inline int ext4_ext_space_root ( struct inode * inode , int check )
2006-10-11 12:21:03 +04:00
{
int size ;
size = sizeof ( EXT4_I ( inode ) - > i_data ) ;
size - = sizeof ( struct ext4_extent_header ) ;
size / = sizeof ( struct ext4_extent ) ;
2009-08-28 18:40:33 +04:00
if ( ! check ) {
2007-02-17 21:20:16 +03:00
# ifdef AGGRESSIVE_TEST
2009-08-28 18:40:33 +04:00
if ( size > 3 )
size = 3 ;
2006-10-11 12:21:03 +04:00
# endif
2009-08-28 18:40:33 +04:00
}
2006-10-11 12:21:03 +04:00
return size ;
}
2009-08-28 18:40:33 +04:00
static inline int ext4_ext_space_root_idx ( struct inode * inode , int check )
2006-10-11 12:21:03 +04:00
{
int size ;
size = sizeof ( EXT4_I ( inode ) - > i_data ) ;
size - = sizeof ( struct ext4_extent_header ) ;
size / = sizeof ( struct ext4_extent_idx ) ;
2009-08-28 18:40:33 +04:00
if ( ! check ) {
2007-02-17 21:20:16 +03:00
# ifdef AGGRESSIVE_TEST
2009-08-28 18:40:33 +04:00
if ( size > 4 )
size = 4 ;
2006-10-11 12:21:03 +04:00
# endif
2009-08-28 18:40:33 +04:00
}
2006-10-11 12:21:03 +04:00
return size ;
}
2008-07-15 01:52:37 +04:00
/*
* Calculate the number of metadata blocks needed
* to allocate @ blocks
* Worse case is one block per extent
*/
2011-01-10 20:13:03 +03:00
int ext4_ext_calc_metadata_amount ( struct inode * inode , ext4_lblk_t lblock )
2008-07-15 01:52:37 +04:00
{
2010-01-01 10:41:30 +03:00
struct ext4_inode_info * ei = EXT4_I ( inode ) ;
int idxs , num = 0 ;
2008-07-15 01:52:37 +04:00
2010-01-01 10:41:30 +03:00
idxs = ( ( inode - > i_sb - > s_blocksize - sizeof ( struct ext4_extent_header ) )
/ sizeof ( struct ext4_extent_idx ) ) ;
2008-07-15 01:52:37 +04:00
/*
2010-01-01 10:41:30 +03:00
* If the new delayed allocation block is contiguous with the
* previous da block , it can share index blocks with the
* previous block , so we only need to allocate a new index
* block every idxs leaf blocks . At ldxs * * 2 blocks , we need
* an additional index block , and at ldxs * * 3 blocks , yet
* another index blocks .
2008-07-15 01:52:37 +04:00
*/
2010-01-01 10:41:30 +03:00
if ( ei - > i_da_metadata_calc_len & &
ei - > i_da_metadata_calc_last_lblock + 1 = = lblock ) {
if ( ( ei - > i_da_metadata_calc_len % idxs ) = = 0 )
num + + ;
if ( ( ei - > i_da_metadata_calc_len % ( idxs * idxs ) ) = = 0 )
num + + ;
if ( ( ei - > i_da_metadata_calc_len % ( idxs * idxs * idxs ) ) = = 0 ) {
num + + ;
ei - > i_da_metadata_calc_len = 0 ;
} else
ei - > i_da_metadata_calc_len + + ;
ei - > i_da_metadata_calc_last_lblock + + ;
return num ;
}
2008-07-15 01:52:37 +04:00
2010-01-01 10:41:30 +03:00
/*
* In the worst case we need a new set of index blocks at
* every level of the inode ' s extent tree .
*/
ei - > i_da_metadata_calc_len = 1 ;
ei - > i_da_metadata_calc_last_lblock = lblock ;
return ext_depth ( inode ) + 1 ;
2008-07-15 01:52:37 +04:00
}
2007-07-18 17:19:09 +04:00
static int
ext4_ext_max_entries ( struct inode * inode , int depth )
{
int max ;
if ( depth = = ext_depth ( inode ) ) {
if ( depth = = 0 )
2009-08-28 18:40:33 +04:00
max = ext4_ext_space_root ( inode , 1 ) ;
2007-07-18 17:19:09 +04:00
else
2009-08-28 18:40:33 +04:00
max = ext4_ext_space_root_idx ( inode , 1 ) ;
2007-07-18 17:19:09 +04:00
} else {
if ( depth = = 0 )
2009-08-28 18:40:33 +04:00
max = ext4_ext_space_block ( inode , 1 ) ;
2007-07-18 17:19:09 +04:00
else
2009-08-28 18:40:33 +04:00
max = ext4_ext_space_block_idx ( inode , 1 ) ;
2007-07-18 17:19:09 +04:00
}
return max ;
}
2009-03-12 16:51:20 +03:00
static int ext4_valid_extent ( struct inode * inode , struct ext4_extent * ext )
{
2010-10-28 05:30:14 +04:00
ext4_fsblk_t block = ext4_ext_pblock ( ext ) ;
2009-03-12 16:51:20 +03:00
int len = ext4_ext_get_actual_len ( ext ) ;
2009-04-23 04:52:25 +04:00
2009-05-17 23:38:01 +04:00
return ext4_data_block_valid ( EXT4_SB ( inode - > i_sb ) , block , len ) ;
2009-03-12 16:51:20 +03:00
}
static int ext4_valid_extent_idx ( struct inode * inode ,
struct ext4_extent_idx * ext_idx )
{
2010-10-28 05:30:14 +04:00
ext4_fsblk_t block = ext4_idx_pblock ( ext_idx ) ;
2009-04-23 04:52:25 +04:00
2009-05-17 23:38:01 +04:00
return ext4_data_block_valid ( EXT4_SB ( inode - > i_sb ) , block , 1 ) ;
2009-03-12 16:51:20 +03:00
}
static int ext4_valid_extent_entries ( struct inode * inode ,
struct ext4_extent_header * eh ,
int depth )
{
struct ext4_extent * ext ;
struct ext4_extent_idx * ext_idx ;
unsigned short entries ;
if ( eh - > eh_entries = = 0 )
return 1 ;
entries = le16_to_cpu ( eh - > eh_entries ) ;
if ( depth = = 0 ) {
/* leaf entries */
ext = EXT_FIRST_EXTENT ( eh ) ;
while ( entries ) {
if ( ! ext4_valid_extent ( inode , ext ) )
return 0 ;
ext + + ;
entries - - ;
}
} else {
ext_idx = EXT_FIRST_INDEX ( eh ) ;
while ( entries ) {
if ( ! ext4_valid_extent_idx ( inode , ext_idx ) )
return 0 ;
ext_idx + + ;
entries - - ;
}
}
return 1 ;
}
2010-07-27 19:56:40 +04:00
static int __ext4_ext_check ( const char * function , unsigned int line ,
struct inode * inode , struct ext4_extent_header * eh ,
int depth )
2007-07-18 17:19:09 +04:00
{
const char * error_msg ;
int max = 0 ;
if ( unlikely ( eh - > eh_magic ! = EXT4_EXT_MAGIC ) ) {
error_msg = " invalid magic " ;
goto corrupted ;
}
if ( unlikely ( le16_to_cpu ( eh - > eh_depth ) ! = depth ) ) {
error_msg = " unexpected eh_depth " ;
goto corrupted ;
}
if ( unlikely ( eh - > eh_max = = 0 ) ) {
error_msg = " invalid eh_max " ;
goto corrupted ;
}
max = ext4_ext_max_entries ( inode , depth ) ;
if ( unlikely ( le16_to_cpu ( eh - > eh_max ) > max ) ) {
error_msg = " too large eh_max " ;
goto corrupted ;
}
if ( unlikely ( le16_to_cpu ( eh - > eh_entries ) > le16_to_cpu ( eh - > eh_max ) ) ) {
error_msg = " invalid eh_entries " ;
goto corrupted ;
}
2009-03-12 16:51:20 +03:00
if ( ! ext4_valid_extent_entries ( inode , eh , depth ) ) {
error_msg = " invalid extent entries " ;
goto corrupted ;
}
2007-07-18 17:19:09 +04:00
return 0 ;
corrupted :
2010-07-27 19:56:40 +04:00
ext4_error_inode ( inode , function , line , 0 ,
2010-05-17 05:00:00 +04:00
" bad header/extent: %s - magic %x, "
2007-07-18 17:19:09 +04:00
" entries %u, max %u(%u), depth %u(%u) " ,
2010-05-17 05:00:00 +04:00
error_msg , le16_to_cpu ( eh - > eh_magic ) ,
2007-07-18 17:19:09 +04:00
le16_to_cpu ( eh - > eh_entries ) , le16_to_cpu ( eh - > eh_max ) ,
max , le16_to_cpu ( eh - > eh_depth ) , depth ) ;
return - EIO ;
}
2009-03-12 16:51:20 +03:00
# define ext4_ext_check(inode, eh, depth) \
2010-07-27 19:56:40 +04:00
__ext4_ext_check ( __func__ , __LINE__ , inode , eh , depth )
2007-07-18 17:19:09 +04:00
2009-03-27 23:39:58 +03:00
int ext4_ext_check_inode ( struct inode * inode )
{
return ext4_ext_check ( inode , ext_inode_hdr ( inode ) , ext_depth ( inode ) ) ;
}
2006-10-11 12:21:03 +04:00
# ifdef EXT_DEBUG
static void ext4_ext_show_path ( struct inode * inode , struct ext4_ext_path * path )
{
int k , l = path - > p_depth ;
ext_debug ( " path: " ) ;
for ( k = 0 ; k < = l ; k + + , path + + ) {
if ( path - > p_idx ) {
2006-10-11 12:21:11 +04:00
ext_debug ( " %d->%llu " , le32_to_cpu ( path - > p_idx - > ei_block ) ,
2010-10-28 05:30:14 +04:00
ext4_idx_pblock ( path - > p_idx ) ) ;
2006-10-11 12:21:03 +04:00
} else if ( path - > p_ext ) {
2009-09-18 21:34:55 +04:00
ext_debug ( " %d:[%d]%d:%llu " ,
2006-10-11 12:21:03 +04:00
le32_to_cpu ( path - > p_ext - > ee_block ) ,
2009-09-18 21:34:55 +04:00
ext4_ext_is_uninitialized ( path - > p_ext ) ,
2007-07-18 05:42:41 +04:00
ext4_ext_get_actual_len ( path - > p_ext ) ,
2010-10-28 05:30:14 +04:00
ext4_ext_pblock ( path - > p_ext ) ) ;
2006-10-11 12:21:03 +04:00
} else
ext_debug ( " [] " ) ;
}
ext_debug ( " \n " ) ;
}
static void ext4_ext_show_leaf ( struct inode * inode , struct ext4_ext_path * path )
{
int depth = ext_depth ( inode ) ;
struct ext4_extent_header * eh ;
struct ext4_extent * ex ;
int i ;
if ( ! path )
return ;
eh = path [ depth ] . p_hdr ;
ex = EXT_FIRST_EXTENT ( eh ) ;
2009-09-18 21:34:55 +04:00
ext_debug ( " Displaying leaf extents for inode %lu \n " , inode - > i_ino ) ;
2006-10-11 12:21:03 +04:00
for ( i = 0 ; i < le16_to_cpu ( eh - > eh_entries ) ; i + + , ex + + ) {
2009-09-18 21:34:55 +04:00
ext_debug ( " %d:[%d]%d:%llu " , le32_to_cpu ( ex - > ee_block ) ,
ext4_ext_is_uninitialized ( ex ) ,
2010-10-28 05:30:14 +04:00
ext4_ext_get_actual_len ( ex ) , ext4_ext_pblock ( ex ) ) ;
2006-10-11 12:21:03 +04:00
}
ext_debug ( " \n " ) ;
}
2011-05-26 01:41:48 +04:00
static void ext4_ext_show_move ( struct inode * inode , struct ext4_ext_path * path ,
ext4_fsblk_t newblock , int level )
{
int depth = ext_depth ( inode ) ;
struct ext4_extent * ex ;
if ( depth ! = level ) {
struct ext4_extent_idx * idx ;
idx = path [ level ] . p_idx ;
while ( idx < = EXT_MAX_INDEX ( path [ level ] . p_hdr ) ) {
ext_debug ( " %d: move %d:%llu in new index %llu \n " , level ,
le32_to_cpu ( idx - > ei_block ) ,
ext4_idx_pblock ( idx ) ,
newblock ) ;
idx + + ;
}
return ;
}
ex = path [ depth ] . p_ext ;
while ( ex < = EXT_MAX_EXTENT ( path [ depth ] . p_hdr ) ) {
ext_debug ( " move %d:%llu:[%d]%d in new leaf %llu \n " ,
le32_to_cpu ( ex - > ee_block ) ,
ext4_ext_pblock ( ex ) ,
ext4_ext_is_uninitialized ( ex ) ,
ext4_ext_get_actual_len ( ex ) ,
newblock ) ;
ex + + ;
}
}
2006-10-11 12:21:03 +04:00
# else
2008-09-09 06:25:24 +04:00
# define ext4_ext_show_path(inode, path)
# define ext4_ext_show_leaf(inode, path)
2011-05-26 01:41:48 +04:00
# define ext4_ext_show_move(inode, path, newblock, level)
2006-10-11 12:21:03 +04:00
# endif
2008-02-26 00:54:37 +03:00
void ext4_ext_drop_refs ( struct ext4_ext_path * path )
2006-10-11 12:21:03 +04:00
{
int depth = path - > p_depth ;
int i ;
for ( i = 0 ; i < = depth ; i + + , path + + )
if ( path - > p_bh ) {
brelse ( path - > p_bh ) ;
path - > p_bh = NULL ;
}
}
/*
2006-10-11 12:21:07 +04:00
* ext4_ext_binsearch_idx :
* binary search for the closest index of the given block
2007-07-18 17:19:09 +04:00
* the header must be checked before calling this
2006-10-11 12:21:03 +04:00
*/
static void
2008-01-29 07:58:27 +03:00
ext4_ext_binsearch_idx ( struct inode * inode ,
struct ext4_ext_path * path , ext4_lblk_t block )
2006-10-11 12:21:03 +04:00
{
struct ext4_extent_header * eh = path - > p_hdr ;
struct ext4_extent_idx * r , * l , * m ;
2008-01-29 07:58:27 +03:00
ext_debug ( " binsearch for %u(idx): " , block ) ;
2006-10-11 12:21:03 +04:00
l = EXT_FIRST_INDEX ( eh ) + 1 ;
2007-07-18 17:09:15 +04:00
r = EXT_LAST_INDEX ( eh ) ;
2006-10-11 12:21:03 +04:00
while ( l < = r ) {
m = l + ( r - l ) / 2 ;
if ( block < le32_to_cpu ( m - > ei_block ) )
r = m - 1 ;
else
l = m + 1 ;
2007-07-18 16:33:37 +04:00
ext_debug ( " %p(%u):%p(%u):%p(%u) " , l , le32_to_cpu ( l - > ei_block ) ,
m , le32_to_cpu ( m - > ei_block ) ,
r , le32_to_cpu ( r - > ei_block ) ) ;
2006-10-11 12:21:03 +04:00
}
path - > p_idx = l - 1 ;
2006-10-11 12:21:05 +04:00
ext_debug ( " -> %d->%lld " , le32_to_cpu ( path - > p_idx - > ei_block ) ,
2010-10-28 05:30:14 +04:00
ext4_idx_pblock ( path - > p_idx ) ) ;
2006-10-11 12:21:03 +04:00
# ifdef CHECK_BINSEARCH
{
struct ext4_extent_idx * chix , * ix ;
int k ;
chix = ix = EXT_FIRST_INDEX ( eh ) ;
for ( k = 0 ; k < le16_to_cpu ( eh - > eh_entries ) ; k + + , ix + + ) {
if ( k ! = 0 & &
le32_to_cpu ( ix - > ei_block ) < = le32_to_cpu ( ix [ - 1 ] . ei_block ) ) {
2008-09-09 07:00:52 +04:00
printk ( KERN_DEBUG " k=%d, ix=0x%p, "
" first=0x%p \n " , k ,
ix , EXT_FIRST_INDEX ( eh ) ) ;
printk ( KERN_DEBUG " %u <= %u \n " ,
2006-10-11 12:21:03 +04:00
le32_to_cpu ( ix - > ei_block ) ,
le32_to_cpu ( ix [ - 1 ] . ei_block ) ) ;
}
BUG_ON ( k & & le32_to_cpu ( ix - > ei_block )
2007-05-24 21:04:54 +04:00
< = le32_to_cpu ( ix [ - 1 ] . ei_block ) ) ;
2006-10-11 12:21:03 +04:00
if ( block < le32_to_cpu ( ix - > ei_block ) )
break ;
chix = ix ;
}
BUG_ON ( chix ! = path - > p_idx ) ;
}
# endif
}
/*
2006-10-11 12:21:07 +04:00
* ext4_ext_binsearch :
* binary search for closest extent of the given block
2007-07-18 17:19:09 +04:00
* the header must be checked before calling this
2006-10-11 12:21:03 +04:00
*/
static void
2008-01-29 07:58:27 +03:00
ext4_ext_binsearch ( struct inode * inode ,
struct ext4_ext_path * path , ext4_lblk_t block )
2006-10-11 12:21:03 +04:00
{
struct ext4_extent_header * eh = path - > p_hdr ;
struct ext4_extent * r , * l , * m ;
if ( eh - > eh_entries = = 0 ) {
/*
2006-10-11 12:21:07 +04:00
* this leaf is empty :
* we get such a leaf in split / add case
2006-10-11 12:21:03 +04:00
*/
return ;
}
2008-01-29 07:58:27 +03:00
ext_debug ( " binsearch for %u: " , block ) ;
2006-10-11 12:21:03 +04:00
l = EXT_FIRST_EXTENT ( eh ) + 1 ;
2007-07-18 17:09:15 +04:00
r = EXT_LAST_EXTENT ( eh ) ;
2006-10-11 12:21:03 +04:00
while ( l < = r ) {
m = l + ( r - l ) / 2 ;
if ( block < le32_to_cpu ( m - > ee_block ) )
r = m - 1 ;
else
l = m + 1 ;
2007-07-18 16:33:37 +04:00
ext_debug ( " %p(%u):%p(%u):%p(%u) " , l , le32_to_cpu ( l - > ee_block ) ,
m , le32_to_cpu ( m - > ee_block ) ,
r , le32_to_cpu ( r - > ee_block ) ) ;
2006-10-11 12:21:03 +04:00
}
path - > p_ext = l - 1 ;
2009-09-18 21:34:55 +04:00
ext_debug ( " -> %d:%llu:[%d]%d " ,
2007-05-24 21:04:54 +04:00
le32_to_cpu ( path - > p_ext - > ee_block ) ,
2010-10-28 05:30:14 +04:00
ext4_ext_pblock ( path - > p_ext ) ,
2009-09-18 21:34:55 +04:00
ext4_ext_is_uninitialized ( path - > p_ext ) ,
2007-07-18 05:42:41 +04:00
ext4_ext_get_actual_len ( path - > p_ext ) ) ;
2006-10-11 12:21:03 +04:00
# ifdef CHECK_BINSEARCH
{
struct ext4_extent * chex , * ex ;
int k ;
chex = ex = EXT_FIRST_EXTENT ( eh ) ;
for ( k = 0 ; k < le16_to_cpu ( eh - > eh_entries ) ; k + + , ex + + ) {
BUG_ON ( k & & le32_to_cpu ( ex - > ee_block )
2007-05-24 21:04:54 +04:00
< = le32_to_cpu ( ex [ - 1 ] . ee_block ) ) ;
2006-10-11 12:21:03 +04:00
if ( block < le32_to_cpu ( ex - > ee_block ) )
break ;
chex = ex ;
}
BUG_ON ( chex ! = path - > p_ext ) ;
}
# endif
}
int ext4_ext_tree_init ( handle_t * handle , struct inode * inode )
{
struct ext4_extent_header * eh ;
eh = ext_inode_hdr ( inode ) ;
eh - > eh_depth = 0 ;
eh - > eh_entries = 0 ;
eh - > eh_magic = EXT4_EXT_MAGIC ;
2009-08-28 18:40:33 +04:00
eh - > eh_max = cpu_to_le16 ( ext4_ext_space_root ( inode , 0 ) ) ;
2006-10-11 12:21:03 +04:00
ext4_mark_inode_dirty ( handle , inode ) ;
ext4_ext_invalidate_cache ( inode ) ;
return 0 ;
}
struct ext4_ext_path *
2008-01-29 07:58:27 +03:00
ext4_ext_find_extent ( struct inode * inode , ext4_lblk_t block ,
struct ext4_ext_path * path )
2006-10-11 12:21:03 +04:00
{
struct ext4_extent_header * eh ;
struct buffer_head * bh ;
short int depth , i , ppos = 0 , alloc = 0 ;
eh = ext_inode_hdr ( inode ) ;
2007-07-18 17:19:09 +04:00
depth = ext_depth ( inode ) ;
2006-10-11 12:21:03 +04:00
/* account possible depth increase */
if ( ! path ) {
2006-12-07 07:41:35 +03:00
path = kzalloc ( sizeof ( struct ext4_ext_path ) * ( depth + 2 ) ,
2006-10-11 12:21:03 +04:00
GFP_NOFS ) ;
if ( ! path )
return ERR_PTR ( - ENOMEM ) ;
alloc = 1 ;
}
path [ 0 ] . p_hdr = eh ;
2008-07-12 03:27:31 +04:00
path [ 0 ] . p_bh = NULL ;
2006-10-11 12:21:03 +04:00
2007-07-18 17:19:09 +04:00
i = depth ;
2006-10-11 12:21:03 +04:00
/* walk through the tree */
while ( i ) {
2009-03-27 23:39:58 +03:00
int need_to_validate = 0 ;
2006-10-11 12:21:03 +04:00
ext_debug ( " depth %d: num %d, max %d \n " ,
ppos , le16_to_cpu ( eh - > eh_entries ) , le16_to_cpu ( eh - > eh_max ) ) ;
2007-07-18 17:19:09 +04:00
2006-10-11 12:21:03 +04:00
ext4_ext_binsearch_idx ( inode , path + ppos , block ) ;
2010-10-28 05:30:14 +04:00
path [ ppos ] . p_block = ext4_idx_pblock ( path [ ppos ] . p_idx ) ;
2006-10-11 12:21:03 +04:00
path [ ppos ] . p_depth = i ;
path [ ppos ] . p_ext = NULL ;
2009-03-27 23:39:58 +03:00
bh = sb_getblk ( inode - > i_sb , path [ ppos ] . p_block ) ;
if ( unlikely ( ! bh ) )
2006-10-11 12:21:03 +04:00
goto err ;
2009-03-27 23:39:58 +03:00
if ( ! bh_uptodate_or_lock ( bh ) ) {
2011-03-22 04:38:05 +03:00
trace_ext4_ext_load_extent ( inode , block ,
path [ ppos ] . p_block ) ;
2009-03-27 23:39:58 +03:00
if ( bh_submit_read ( bh ) < 0 ) {
put_bh ( bh ) ;
goto err ;
}
/* validate the extent entries */
need_to_validate = 1 ;
}
2006-10-11 12:21:03 +04:00
eh = ext_block_hdr ( bh ) ;
ppos + + ;
2010-03-02 19:46:09 +03:00
if ( unlikely ( ppos > depth ) ) {
put_bh ( bh ) ;
EXT4_ERROR_INODE ( inode ,
" ppos %d > depth %d " , ppos , depth ) ;
goto err ;
}
2006-10-11 12:21:03 +04:00
path [ ppos ] . p_bh = bh ;
path [ ppos ] . p_hdr = eh ;
i - - ;
2009-03-27 23:39:58 +03:00
if ( need_to_validate & & ext4_ext_check ( inode , eh , i ) )
2006-10-11 12:21:03 +04:00
goto err ;
}
path [ ppos ] . p_depth = i ;
path [ ppos ] . p_ext = NULL ;
path [ ppos ] . p_idx = NULL ;
/* find extent */
ext4_ext_binsearch ( inode , path + ppos , block ) ;
2008-07-12 03:27:31 +04:00
/* if not an empty leaf */
if ( path [ ppos ] . p_ext )
2010-10-28 05:30:14 +04:00
path [ ppos ] . p_block = ext4_ext_pblock ( path [ ppos ] . p_ext ) ;
2006-10-11 12:21:03 +04:00
ext4_ext_show_path ( inode , path ) ;
return path ;
err :
ext4_ext_drop_refs ( path ) ;
if ( alloc )
kfree ( path ) ;
return ERR_PTR ( - EIO ) ;
}
/*
2006-10-11 12:21:07 +04:00
* ext4_ext_insert_index :
* insert new index [ @ logical ; @ ptr ] into the block at @ curp ;
* check where to insert : before @ curp or after @ curp
2006-10-11 12:21:03 +04:00
*/
2010-10-28 05:30:14 +04:00
static int ext4_ext_insert_index ( handle_t * handle , struct inode * inode ,
struct ext4_ext_path * curp ,
int logical , ext4_fsblk_t ptr )
2006-10-11 12:21:03 +04:00
{
struct ext4_extent_idx * ix ;
int len , err ;
2006-12-07 07:41:33 +03:00
err = ext4_ext_get_access ( handle , inode , curp ) ;
if ( err )
2006-10-11 12:21:03 +04:00
return err ;
2010-03-02 19:46:09 +03:00
if ( unlikely ( logical = = le32_to_cpu ( curp - > p_idx - > ei_block ) ) ) {
EXT4_ERROR_INODE ( inode ,
" logical %d == ei_block %d! " ,
logical , le32_to_cpu ( curp - > p_idx - > ei_block ) ) ;
return - EIO ;
}
2011-07-18 07:43:42 +04:00
if ( unlikely ( le16_to_cpu ( curp - > p_hdr - > eh_entries )
> = le16_to_cpu ( curp - > p_hdr - > eh_max ) ) ) {
EXT4_ERROR_INODE ( inode ,
" eh_entries %d >= eh_max %d! " ,
le16_to_cpu ( curp - > p_hdr - > eh_entries ) ,
le16_to_cpu ( curp - > p_hdr - > eh_max ) ) ;
return - EIO ;
}
2006-10-11 12:21:03 +04:00
len = EXT_MAX_INDEX ( curp - > p_hdr ) - curp - > p_idx ;
if ( logical > le32_to_cpu ( curp - > p_idx - > ei_block ) ) {
/* insert after */
if ( curp - > p_idx ! = EXT_LAST_INDEX ( curp - > p_hdr ) ) {
len = ( len - 1 ) * sizeof ( struct ext4_extent_idx ) ;
len = len < 0 ? 0 : len ;
2007-07-18 16:33:37 +04:00
ext_debug ( " insert new index %d after: %llu. "
2006-10-11 12:21:03 +04:00
" move %d from 0x%p to 0x%p \n " ,
logical , ptr , len ,
( curp - > p_idx + 1 ) , ( curp - > p_idx + 2 ) ) ;
memmove ( curp - > p_idx + 2 , curp - > p_idx + 1 , len ) ;
}
ix = curp - > p_idx + 1 ;
} else {
/* insert before */
len = len * sizeof ( struct ext4_extent_idx ) ;
len = len < 0 ? 0 : len ;
2007-07-18 16:33:37 +04:00
ext_debug ( " insert new index %d before: %llu. "
2006-10-11 12:21:03 +04:00
" move %d from 0x%p to 0x%p \n " ,
logical , ptr , len ,
curp - > p_idx , ( curp - > p_idx + 1 ) ) ;
memmove ( curp - > p_idx + 1 , curp - > p_idx , len ) ;
ix = curp - > p_idx ;
}
ix - > ei_block = cpu_to_le32 ( logical ) ;
2006-10-11 12:21:05 +04:00
ext4_idx_store_pblock ( ix , ptr ) ;
2008-04-17 18:38:59 +04:00
le16_add_cpu ( & curp - > p_hdr - > eh_entries , 1 ) ;
2006-10-11 12:21:03 +04:00
2010-03-02 19:46:09 +03:00
if ( unlikely ( ix > EXT_LAST_INDEX ( curp - > p_hdr ) ) ) {
EXT4_ERROR_INODE ( inode , " ix > EXT_LAST_INDEX! " ) ;
return - EIO ;
}
2006-10-11 12:21:03 +04:00
err = ext4_ext_dirty ( handle , inode , curp ) ;
ext4_std_error ( inode - > i_sb , err ) ;
return err ;
}
/*
2006-10-11 12:21:07 +04:00
* ext4_ext_split :
* inserts new subtree into the path , using free index entry
* at depth @ at :
* - allocates all needed blocks ( new leaf and all intermediate index blocks )
* - makes decision where to split
* - moves remaining extents and index entries ( right to the split point )
* into the newly allocated blocks
* - initializes subtree
2006-10-11 12:21:03 +04:00
*/
static int ext4_ext_split ( handle_t * handle , struct inode * inode ,
2011-05-25 15:41:26 +04:00
unsigned int flags ,
struct ext4_ext_path * path ,
struct ext4_extent * newext , int at )
2006-10-11 12:21:03 +04:00
{
struct buffer_head * bh = NULL ;
int depth = ext_depth ( inode ) ;
struct ext4_extent_header * neh ;
struct ext4_extent_idx * fidx ;
int i = at , k , m , a ;
2006-10-11 12:21:05 +04:00
ext4_fsblk_t newblock , oldblock ;
2006-10-11 12:21:03 +04:00
__le32 border ;
2006-10-11 12:21:05 +04:00
ext4_fsblk_t * ablocks = NULL ; /* array of allocated blocks */
2006-10-11 12:21:03 +04:00
int err = 0 ;
/* make decision: where to split? */
2006-10-11 12:21:07 +04:00
/* FIXME: now decision is simplest: at current extent */
2006-10-11 12:21:03 +04:00
2006-10-11 12:21:07 +04:00
/* if current leaf will be split, then we should use
2006-10-11 12:21:03 +04:00
* border from split point */
2010-03-02 19:46:09 +03:00
if ( unlikely ( path [ depth ] . p_ext > EXT_MAX_EXTENT ( path [ depth ] . p_hdr ) ) ) {
EXT4_ERROR_INODE ( inode , " p_ext > EXT_MAX_EXTENT! " ) ;
return - EIO ;
}
2006-10-11 12:21:03 +04:00
if ( path [ depth ] . p_ext ! = EXT_MAX_EXTENT ( path [ depth ] . p_hdr ) ) {
border = path [ depth ] . p_ext [ 1 ] . ee_block ;
2006-10-11 12:21:07 +04:00
ext_debug ( " leaf will be split. "
2006-10-11 12:21:03 +04:00
" next leaf starts at %d \n " ,
2007-05-24 21:04:54 +04:00
le32_to_cpu ( border ) ) ;
2006-10-11 12:21:03 +04:00
} else {
border = newext - > ee_block ;
ext_debug ( " leaf will be added. "
" next leaf starts at %d \n " ,
2007-05-24 21:04:54 +04:00
le32_to_cpu ( border ) ) ;
2006-10-11 12:21:03 +04:00
}
/*
2006-10-11 12:21:07 +04:00
* If error occurs , then we break processing
* and mark filesystem read - only . index won ' t
2006-10-11 12:21:03 +04:00
* be inserted and tree will be in consistent
2006-10-11 12:21:07 +04:00
* state . Next mount will repair buffers too .
2006-10-11 12:21:03 +04:00
*/
/*
2006-10-11 12:21:07 +04:00
* Get array to track all allocated blocks .
* We need this to handle errors and free blocks
* upon them .
2006-10-11 12:21:03 +04:00
*/
2006-12-07 07:41:35 +03:00
ablocks = kzalloc ( sizeof ( ext4_fsblk_t ) * depth , GFP_NOFS ) ;
2006-10-11 12:21:03 +04:00
if ( ! ablocks )
return - ENOMEM ;
/* allocate all needed blocks */
ext_debug ( " allocate %d blocks for indexes/leaf \n " , depth - at ) ;
for ( a = 0 ; a < depth - at ; a + + ) {
2008-07-12 03:27:31 +04:00
newblock = ext4_ext_new_meta_block ( handle , inode , path ,
2011-05-25 15:41:26 +04:00
newext , & err , flags ) ;
2006-10-11 12:21:03 +04:00
if ( newblock = = 0 )
goto cleanup ;
ablocks [ a ] = newblock ;
}
/* initialize new leaf */
newblock = ablocks [ - - a ] ;
2010-03-02 19:46:09 +03:00
if ( unlikely ( newblock = = 0 ) ) {
EXT4_ERROR_INODE ( inode , " newblock == 0! " ) ;
err = - EIO ;
goto cleanup ;
}
2006-10-11 12:21:03 +04:00
bh = sb_getblk ( inode - > i_sb , newblock ) ;
if ( ! bh ) {
err = - EIO ;
goto cleanup ;
}
lock_buffer ( bh ) ;
2006-12-07 07:41:33 +03:00
err = ext4_journal_get_create_access ( handle , bh ) ;
if ( err )
2006-10-11 12:21:03 +04:00
goto cleanup ;
neh = ext_block_hdr ( bh ) ;
neh - > eh_entries = 0 ;
2009-08-28 18:40:33 +04:00
neh - > eh_max = cpu_to_le16 ( ext4_ext_space_block ( inode , 0 ) ) ;
2006-10-11 12:21:03 +04:00
neh - > eh_magic = EXT4_EXT_MAGIC ;
neh - > eh_depth = 0 ;
2006-10-11 12:21:07 +04:00
/* move remainder of path[depth] to the new leaf */
2010-03-02 19:46:09 +03:00
if ( unlikely ( path [ depth ] . p_hdr - > eh_entries ! =
path [ depth ] . p_hdr - > eh_max ) ) {
EXT4_ERROR_INODE ( inode , " eh_entries %d != eh_max %d! " ,
path [ depth ] . p_hdr - > eh_entries ,
path [ depth ] . p_hdr - > eh_max ) ;
err = - EIO ;
goto cleanup ;
}
2006-10-11 12:21:03 +04:00
/* start copy from next extent */
2011-05-26 01:41:48 +04:00
m = EXT_MAX_EXTENT ( path [ depth ] . p_hdr ) - path [ depth ] . p_ext + + ;
ext4_ext_show_move ( inode , path , newblock , depth ) ;
2006-10-11 12:21:03 +04:00
if ( m ) {
2011-05-26 01:41:48 +04:00
struct ext4_extent * ex ;
ex = EXT_FIRST_EXTENT ( neh ) ;
memmove ( ex , path [ depth ] . p_ext , sizeof ( struct ext4_extent ) * m ) ;
2008-04-17 18:38:59 +04:00
le16_add_cpu ( & neh - > eh_entries , m ) ;
2006-10-11 12:21:03 +04:00
}
set_buffer_uptodate ( bh ) ;
unlock_buffer ( bh ) ;
2009-01-07 08:06:22 +03:00
err = ext4_handle_dirty_metadata ( handle , inode , bh ) ;
2006-12-07 07:41:33 +03:00
if ( err )
2006-10-11 12:21:03 +04:00
goto cleanup ;
brelse ( bh ) ;
bh = NULL ;
/* correct old leaf */
if ( m ) {
2006-12-07 07:41:33 +03:00
err = ext4_ext_get_access ( handle , inode , path + depth ) ;
if ( err )
2006-10-11 12:21:03 +04:00
goto cleanup ;
2008-04-17 18:38:59 +04:00
le16_add_cpu ( & path [ depth ] . p_hdr - > eh_entries , - m ) ;
2006-12-07 07:41:33 +03:00
err = ext4_ext_dirty ( handle , inode , path + depth ) ;
if ( err )
2006-10-11 12:21:03 +04:00
goto cleanup ;
}
/* create intermediate indexes */
k = depth - at - 1 ;
2010-03-02 19:46:09 +03:00
if ( unlikely ( k < 0 ) ) {
EXT4_ERROR_INODE ( inode , " k %d < 0! " , k ) ;
err = - EIO ;
goto cleanup ;
}
2006-10-11 12:21:03 +04:00
if ( k )
ext_debug ( " create %d intermediate indices \n " , k ) ;
/* insert new index into current index block */
/* current depth stored in i var */
i = depth - 1 ;
while ( k - - ) {
oldblock = newblock ;
newblock = ablocks [ - - a ] ;
2008-01-29 07:58:27 +03:00
bh = sb_getblk ( inode - > i_sb , newblock ) ;
2006-10-11 12:21:03 +04:00
if ( ! bh ) {
err = - EIO ;
goto cleanup ;
}
lock_buffer ( bh ) ;
2006-12-07 07:41:33 +03:00
err = ext4_journal_get_create_access ( handle , bh ) ;
if ( err )
2006-10-11 12:21:03 +04:00
goto cleanup ;
neh = ext_block_hdr ( bh ) ;
neh - > eh_entries = cpu_to_le16 ( 1 ) ;
neh - > eh_magic = EXT4_EXT_MAGIC ;
2009-08-28 18:40:33 +04:00
neh - > eh_max = cpu_to_le16 ( ext4_ext_space_block_idx ( inode , 0 ) ) ;
2006-10-11 12:21:03 +04:00
neh - > eh_depth = cpu_to_le16 ( depth - i ) ;
fidx = EXT_FIRST_INDEX ( neh ) ;
fidx - > ei_block = border ;
2006-10-11 12:21:05 +04:00
ext4_idx_store_pblock ( fidx , oldblock ) ;
2006-10-11 12:21:03 +04:00
2008-01-29 07:58:27 +03:00
ext_debug ( " int.index at %d (block %llu): %u -> %llu \n " ,
i , newblock , le32_to_cpu ( border ) , oldblock ) ;
2006-10-11 12:21:03 +04:00
2011-05-26 01:41:48 +04:00
/* move remainder of path[i] to the new index block */
2010-03-02 19:46:09 +03:00
if ( unlikely ( EXT_MAX_INDEX ( path [ i ] . p_hdr ) ! =
EXT_LAST_INDEX ( path [ i ] . p_hdr ) ) ) {
EXT4_ERROR_INODE ( inode ,
" EXT_MAX_INDEX != EXT_LAST_INDEX ee_block %d! " ,
le32_to_cpu ( path [ i ] . p_ext - > ee_block ) ) ;
err = - EIO ;
goto cleanup ;
}
2011-05-26 01:41:48 +04:00
/* start copy indexes */
m = EXT_MAX_INDEX ( path [ i ] . p_hdr ) - path [ i ] . p_idx + + ;
ext_debug ( " cur 0x%p, last 0x%p \n " , path [ i ] . p_idx ,
EXT_MAX_INDEX ( path [ i ] . p_hdr ) ) ;
ext4_ext_show_move ( inode , path , newblock , i ) ;
2006-10-11 12:21:03 +04:00
if ( m ) {
2011-05-26 01:41:48 +04:00
memmove ( + + fidx , path [ i ] . p_idx ,
2006-10-11 12:21:03 +04:00
sizeof ( struct ext4_extent_idx ) * m ) ;
2008-04-17 18:38:59 +04:00
le16_add_cpu ( & neh - > eh_entries , m ) ;
2006-10-11 12:21:03 +04:00
}
set_buffer_uptodate ( bh ) ;
unlock_buffer ( bh ) ;
2009-01-07 08:06:22 +03:00
err = ext4_handle_dirty_metadata ( handle , inode , bh ) ;
2006-12-07 07:41:33 +03:00
if ( err )
2006-10-11 12:21:03 +04:00
goto cleanup ;
brelse ( bh ) ;
bh = NULL ;
/* correct old index */
if ( m ) {
err = ext4_ext_get_access ( handle , inode , path + i ) ;
if ( err )
goto cleanup ;
2008-04-17 18:38:59 +04:00
le16_add_cpu ( & path [ i ] . p_hdr - > eh_entries , - m ) ;
2006-10-11 12:21:03 +04:00
err = ext4_ext_dirty ( handle , inode , path + i ) ;
if ( err )
goto cleanup ;
}
i - - ;
}
/* insert new index */
err = ext4_ext_insert_index ( handle , inode , path + at ,
le32_to_cpu ( border ) , newblock ) ;
cleanup :
if ( bh ) {
if ( buffer_locked ( bh ) )
unlock_buffer ( bh ) ;
brelse ( bh ) ;
}
if ( err ) {
/* free all allocated blocks in error case */
for ( i = 0 ; i < depth ; i + + ) {
if ( ! ablocks [ i ] )
continue ;
2011-02-22 05:01:42 +03:00
ext4_free_blocks ( handle , inode , NULL , ablocks [ i ] , 1 ,
2009-11-23 15:17:05 +03:00
EXT4_FREE_BLOCKS_METADATA ) ;
2006-10-11 12:21:03 +04:00
}
}
kfree ( ablocks ) ;
return err ;
}
/*
2006-10-11 12:21:07 +04:00
* ext4_ext_grow_indepth :
* implements tree growing procedure :
* - allocates new block
* - moves top - level data ( index block or leaf ) into the new block
* - initializes new top - level , creating index that points to the
* just created block
2006-10-11 12:21:03 +04:00
*/
static int ext4_ext_grow_indepth ( handle_t * handle , struct inode * inode ,
2011-05-25 15:41:26 +04:00
unsigned int flags ,
struct ext4_ext_path * path ,
struct ext4_extent * newext )
2006-10-11 12:21:03 +04:00
{
struct ext4_ext_path * curp = path ;
struct ext4_extent_header * neh ;
struct buffer_head * bh ;
2006-10-11 12:21:05 +04:00
ext4_fsblk_t newblock ;
2006-10-11 12:21:03 +04:00
int err = 0 ;
2011-05-25 15:41:26 +04:00
newblock = ext4_ext_new_meta_block ( handle , inode , path ,
newext , & err , flags ) ;
2006-10-11 12:21:03 +04:00
if ( newblock = = 0 )
return err ;
bh = sb_getblk ( inode - > i_sb , newblock ) ;
if ( ! bh ) {
err = - EIO ;
ext4_std_error ( inode - > i_sb , err ) ;
return err ;
}
lock_buffer ( bh ) ;
2006-12-07 07:41:33 +03:00
err = ext4_journal_get_create_access ( handle , bh ) ;
if ( err ) {
2006-10-11 12:21:03 +04:00
unlock_buffer ( bh ) ;
goto out ;
}
/* move top-level index/leaf into new block */
memmove ( bh - > b_data , curp - > p_hdr , sizeof ( EXT4_I ( inode ) - > i_data ) ) ;
/* set size of new block */
neh = ext_block_hdr ( bh ) ;
/* old root could have indexes or leaves
* so calculate e_max right way */
if ( ext_depth ( inode ) )
2009-08-28 18:40:33 +04:00
neh - > eh_max = cpu_to_le16 ( ext4_ext_space_block_idx ( inode , 0 ) ) ;
2006-10-11 12:21:03 +04:00
else
2009-08-28 18:40:33 +04:00
neh - > eh_max = cpu_to_le16 ( ext4_ext_space_block ( inode , 0 ) ) ;
2006-10-11 12:21:03 +04:00
neh - > eh_magic = EXT4_EXT_MAGIC ;
set_buffer_uptodate ( bh ) ;
unlock_buffer ( bh ) ;
2009-01-07 08:06:22 +03:00
err = ext4_handle_dirty_metadata ( handle , inode , bh ) ;
2006-12-07 07:41:33 +03:00
if ( err )
2006-10-11 12:21:03 +04:00
goto out ;
/* create index in new top-level index: num,max,pointer */
2006-12-07 07:41:33 +03:00
err = ext4_ext_get_access ( handle , inode , curp ) ;
if ( err )
2006-10-11 12:21:03 +04:00
goto out ;
curp - > p_hdr - > eh_magic = EXT4_EXT_MAGIC ;
2009-08-28 18:40:33 +04:00
curp - > p_hdr - > eh_max = cpu_to_le16 ( ext4_ext_space_root_idx ( inode , 0 ) ) ;
2006-10-11 12:21:03 +04:00
curp - > p_hdr - > eh_entries = cpu_to_le16 ( 1 ) ;
curp - > p_idx = EXT_FIRST_INDEX ( curp - > p_hdr ) ;
2007-07-18 17:09:15 +04:00
if ( path [ 0 ] . p_hdr - > eh_depth )
curp - > p_idx - > ei_block =
EXT_FIRST_INDEX ( path [ 0 ] . p_hdr ) - > ei_block ;
else
curp - > p_idx - > ei_block =
EXT_FIRST_EXTENT ( path [ 0 ] . p_hdr ) - > ee_block ;
2006-10-11 12:21:05 +04:00
ext4_idx_store_pblock ( curp - > p_idx , newblock ) ;
2006-10-11 12:21:03 +04:00
neh = ext_inode_hdr ( inode ) ;
2006-10-11 12:21:11 +04:00
ext_debug ( " new root: num %d(%d), lblock %d, ptr %llu \n " ,
2006-10-11 12:21:03 +04:00
le16_to_cpu ( neh - > eh_entries ) , le16_to_cpu ( neh - > eh_max ) ,
2010-06-14 21:28:03 +04:00
le32_to_cpu ( EXT_FIRST_INDEX ( neh ) - > ei_block ) ,
2010-10-28 05:30:14 +04:00
ext4_idx_pblock ( EXT_FIRST_INDEX ( neh ) ) ) ;
2006-10-11 12:21:03 +04:00
neh - > eh_depth = cpu_to_le16 ( path - > p_depth + 1 ) ;
err = ext4_ext_dirty ( handle , inode , curp ) ;
out :
brelse ( bh ) ;
return err ;
}
/*
2006-10-11 12:21:07 +04:00
* ext4_ext_create_new_leaf :
* finds empty index and adds new leaf .
* if no free index is found , then it requests in - depth growing .
2006-10-11 12:21:03 +04:00
*/
static int ext4_ext_create_new_leaf ( handle_t * handle , struct inode * inode ,
2011-05-25 15:41:26 +04:00
unsigned int flags ,
struct ext4_ext_path * path ,
struct ext4_extent * newext )
2006-10-11 12:21:03 +04:00
{
struct ext4_ext_path * curp ;
int depth , i , err = 0 ;
repeat :
i = depth = ext_depth ( inode ) ;
/* walk up to the tree and look for free index entry */
curp = path + depth ;
while ( i > 0 & & ! EXT_HAS_FREE_INDEX ( curp ) ) {
i - - ;
curp - - ;
}
2006-10-11 12:21:07 +04:00
/* we use already allocated block for index block,
* so subsequent data blocks should be contiguous */
2006-10-11 12:21:03 +04:00
if ( EXT_HAS_FREE_INDEX ( curp ) ) {
/* if we found index with free entry, then use that
* entry : create all needed subtree and add new leaf */
2011-05-25 15:41:26 +04:00
err = ext4_ext_split ( handle , inode , flags , path , newext , i ) ;
2008-07-12 03:27:31 +04:00
if ( err )
goto out ;
2006-10-11 12:21:03 +04:00
/* refill path */
ext4_ext_drop_refs ( path ) ;
path = ext4_ext_find_extent ( inode ,
2008-01-29 07:58:27 +03:00
( ext4_lblk_t ) le32_to_cpu ( newext - > ee_block ) ,
path ) ;
2006-10-11 12:21:03 +04:00
if ( IS_ERR ( path ) )
err = PTR_ERR ( path ) ;
} else {
/* tree is full, time to grow in depth */
2011-05-25 15:41:26 +04:00
err = ext4_ext_grow_indepth ( handle , inode , flags ,
path , newext ) ;
2006-10-11 12:21:03 +04:00
if ( err )
goto out ;
/* refill path */
ext4_ext_drop_refs ( path ) ;
path = ext4_ext_find_extent ( inode ,
2008-01-29 07:58:27 +03:00
( ext4_lblk_t ) le32_to_cpu ( newext - > ee_block ) ,
path ) ;
2006-10-11 12:21:03 +04:00
if ( IS_ERR ( path ) ) {
err = PTR_ERR ( path ) ;
goto out ;
}
/*
2006-10-11 12:21:07 +04:00
* only first ( depth 0 - > 1 ) produces free space ;
* in all other cases we have to split the grown tree
2006-10-11 12:21:03 +04:00
*/
depth = ext_depth ( inode ) ;
if ( path [ depth ] . p_hdr - > eh_entries = = path [ depth ] . p_hdr - > eh_max ) {
2006-10-11 12:21:07 +04:00
/* now we need to split */
2006-10-11 12:21:03 +04:00
goto repeat ;
}
}
out :
return err ;
}
2008-01-29 07:58:27 +03:00
/*
* search the closest allocated block to the left for * logical
* and returns it at @ logical + it ' s physical address at @ phys
* if * logical is the smallest allocated block , the function
* returns 0 at @ phys
* return value contains 0 ( success ) or error code
*/
2010-10-28 05:30:14 +04:00
static int ext4_ext_search_left ( struct inode * inode ,
struct ext4_ext_path * path ,
ext4_lblk_t * logical , ext4_fsblk_t * phys )
2008-01-29 07:58:27 +03:00
{
struct ext4_extent_idx * ix ;
struct ext4_extent * ex ;
2008-01-29 07:58:27 +03:00
int depth , ee_len ;
2008-01-29 07:58:27 +03:00
2010-03-02 19:46:09 +03:00
if ( unlikely ( path = = NULL ) ) {
EXT4_ERROR_INODE ( inode , " path == NULL *logical %d! " , * logical ) ;
return - EIO ;
}
2008-01-29 07:58:27 +03:00
depth = path - > p_depth ;
* phys = 0 ;
if ( depth = = 0 & & path - > p_ext = = NULL )
return 0 ;
/* usually extent in the path covers blocks smaller
* then * logical , but it can be that extent is the
* first one in the file */
ex = path [ depth ] . p_ext ;
2008-01-29 07:58:27 +03:00
ee_len = ext4_ext_get_actual_len ( ex ) ;
2008-01-29 07:58:27 +03:00
if ( * logical < le32_to_cpu ( ex - > ee_block ) ) {
2010-03-02 19:46:09 +03:00
if ( unlikely ( EXT_FIRST_EXTENT ( path [ depth ] . p_hdr ) ! = ex ) ) {
EXT4_ERROR_INODE ( inode ,
" EXT_FIRST_EXTENT != ex *logical %d ee_block %d! " ,
* logical , le32_to_cpu ( ex - > ee_block ) ) ;
return - EIO ;
}
2008-01-29 07:58:27 +03:00
while ( - - depth > = 0 ) {
ix = path [ depth ] . p_idx ;
2010-03-02 19:46:09 +03:00
if ( unlikely ( ix ! = EXT_FIRST_INDEX ( path [ depth ] . p_hdr ) ) ) {
EXT4_ERROR_INODE ( inode ,
" ix (%d) != EXT_FIRST_INDEX (%d) (depth %d)! " ,
ix ! = NULL ? ix - > ei_block : 0 ,
EXT_FIRST_INDEX ( path [ depth ] . p_hdr ) ! = NULL ?
EXT_FIRST_INDEX ( path [ depth ] . p_hdr ) - > ei_block : 0 ,
depth ) ;
return - EIO ;
}
2008-01-29 07:58:27 +03:00
}
return 0 ;
}
2010-03-02 19:46:09 +03:00
if ( unlikely ( * logical < ( le32_to_cpu ( ex - > ee_block ) + ee_len ) ) ) {
EXT4_ERROR_INODE ( inode ,
" logical %d < ee_block %d + ee_len %d! " ,
* logical , le32_to_cpu ( ex - > ee_block ) , ee_len ) ;
return - EIO ;
}
2008-01-29 07:58:27 +03:00
2008-01-29 07:58:27 +03:00
* logical = le32_to_cpu ( ex - > ee_block ) + ee_len - 1 ;
2010-10-28 05:30:14 +04:00
* phys = ext4_ext_pblock ( ex ) + ee_len - 1 ;
2008-01-29 07:58:27 +03:00
return 0 ;
}
/*
* search the closest allocated block to the right for * logical
* and returns it at @ logical + it ' s physical address at @ phys
* if * logical is the smallest allocated block , the function
* returns 0 at @ phys
* return value contains 0 ( success ) or error code
*/
2010-10-28 05:30:14 +04:00
static int ext4_ext_search_right ( struct inode * inode ,
struct ext4_ext_path * path ,
2011-09-10 02:52:51 +04:00
ext4_lblk_t * logical , ext4_fsblk_t * phys ,
struct ext4_extent * * ret_ex )
2008-01-29 07:58:27 +03:00
{
struct buffer_head * bh = NULL ;
struct ext4_extent_header * eh ;
struct ext4_extent_idx * ix ;
struct ext4_extent * ex ;
ext4_fsblk_t block ;
2009-03-11 01:18:47 +03:00
int depth ; /* Note, NOT eh_depth; depth from top of tree */
int ee_len ;
2008-01-29 07:58:27 +03:00
2010-03-02 19:46:09 +03:00
if ( unlikely ( path = = NULL ) ) {
EXT4_ERROR_INODE ( inode , " path == NULL *logical %d! " , * logical ) ;
return - EIO ;
}
2008-01-29 07:58:27 +03:00
depth = path - > p_depth ;
* phys = 0 ;
if ( depth = = 0 & & path - > p_ext = = NULL )
return 0 ;
/* usually extent in the path covers blocks smaller
* then * logical , but it can be that extent is the
* first one in the file */
ex = path [ depth ] . p_ext ;
2008-01-29 07:58:27 +03:00
ee_len = ext4_ext_get_actual_len ( ex ) ;
2008-01-29 07:58:27 +03:00
if ( * logical < le32_to_cpu ( ex - > ee_block ) ) {
2010-03-02 19:46:09 +03:00
if ( unlikely ( EXT_FIRST_EXTENT ( path [ depth ] . p_hdr ) ! = ex ) ) {
EXT4_ERROR_INODE ( inode ,
" first_extent(path[%d].p_hdr) != ex " ,
depth ) ;
return - EIO ;
}
2008-01-29 07:58:27 +03:00
while ( - - depth > = 0 ) {
ix = path [ depth ] . p_idx ;
2010-03-02 19:46:09 +03:00
if ( unlikely ( ix ! = EXT_FIRST_INDEX ( path [ depth ] . p_hdr ) ) ) {
EXT4_ERROR_INODE ( inode ,
" ix != EXT_FIRST_INDEX *logical %d! " ,
* logical ) ;
return - EIO ;
}
2008-01-29 07:58:27 +03:00
}
2011-09-10 02:52:51 +04:00
goto found_extent ;
2008-01-29 07:58:27 +03:00
}
2010-03-02 19:46:09 +03:00
if ( unlikely ( * logical < ( le32_to_cpu ( ex - > ee_block ) + ee_len ) ) ) {
EXT4_ERROR_INODE ( inode ,
" logical %d < ee_block %d + ee_len %d! " ,
* logical , le32_to_cpu ( ex - > ee_block ) , ee_len ) ;
return - EIO ;
}
2008-01-29 07:58:27 +03:00
if ( ex ! = EXT_LAST_EXTENT ( path [ depth ] . p_hdr ) ) {
/* next allocated block in this leaf */
ex + + ;
2011-09-10 02:52:51 +04:00
goto found_extent ;
2008-01-29 07:58:27 +03:00
}
/* go up and search for index to the right */
while ( - - depth > = 0 ) {
ix = path [ depth ] . p_idx ;
if ( ix ! = EXT_LAST_INDEX ( path [ depth ] . p_hdr ) )
2008-11-26 01:24:23 +03:00
goto got_index ;
2008-01-29 07:58:27 +03:00
}
2008-11-26 01:24:23 +03:00
/* we've gone up to the root and found no index to the right */
return 0 ;
2008-01-29 07:58:27 +03:00
2008-11-26 01:24:23 +03:00
got_index :
2008-01-29 07:58:27 +03:00
/* we've found index to the right, let's
* follow it and find the closest allocated
* block to the right */
ix + + ;
2010-10-28 05:30:14 +04:00
block = ext4_idx_pblock ( ix ) ;
2008-01-29 07:58:27 +03:00
while ( + + depth < path - > p_depth ) {
bh = sb_bread ( inode - > i_sb , block ) ;
if ( bh = = NULL )
return - EIO ;
eh = ext_block_hdr ( bh ) ;
2009-03-11 01:18:47 +03:00
/* subtract from p_depth to get proper eh_depth */
2009-03-12 16:51:20 +03:00
if ( ext4_ext_check ( inode , eh , path - > p_depth - depth ) ) {
2008-01-29 07:58:27 +03:00
put_bh ( bh ) ;
return - EIO ;
}
ix = EXT_FIRST_INDEX ( eh ) ;
2010-10-28 05:30:14 +04:00
block = ext4_idx_pblock ( ix ) ;
2008-01-29 07:58:27 +03:00
put_bh ( bh ) ;
}
bh = sb_bread ( inode - > i_sb , block ) ;
if ( bh = = NULL )
return - EIO ;
eh = ext_block_hdr ( bh ) ;
2009-03-12 16:51:20 +03:00
if ( ext4_ext_check ( inode , eh , path - > p_depth - depth ) ) {
2008-01-29 07:58:27 +03:00
put_bh ( bh ) ;
return - EIO ;
}
ex = EXT_FIRST_EXTENT ( eh ) ;
2011-09-10 02:52:51 +04:00
found_extent :
2008-01-29 07:58:27 +03:00
* logical = le32_to_cpu ( ex - > ee_block ) ;
2010-10-28 05:30:14 +04:00
* phys = ext4_ext_pblock ( ex ) ;
2011-09-10 02:52:51 +04:00
* ret_ex = ex ;
if ( bh )
put_bh ( bh ) ;
2008-01-29 07:58:27 +03:00
return 0 ;
}
2006-10-11 12:21:03 +04:00
/*
2006-10-11 12:21:07 +04:00
* ext4_ext_next_allocated_block :
2011-06-06 08:05:17 +04:00
* returns allocated block in subsequent extent or EXT_MAX_BLOCKS .
2006-10-11 12:21:07 +04:00
* NOTE : it considers block number from index entry as
* allocated block . Thus , index entries have to be consistent
* with leaves .
2006-10-11 12:21:03 +04:00
*/
2008-01-29 07:58:27 +03:00
static ext4_lblk_t
2006-10-11 12:21:03 +04:00
ext4_ext_next_allocated_block ( struct ext4_ext_path * path )
{
int depth ;
BUG_ON ( path = = NULL ) ;
depth = path - > p_depth ;
if ( depth = = 0 & & path - > p_ext = = NULL )
2011-06-06 08:05:17 +04:00
return EXT_MAX_BLOCKS ;
2006-10-11 12:21:03 +04:00
while ( depth > = 0 ) {
if ( depth = = path - > p_depth ) {
/* leaf */
if ( path [ depth ] . p_ext ! =
EXT_LAST_EXTENT ( path [ depth ] . p_hdr ) )
return le32_to_cpu ( path [ depth ] . p_ext [ 1 ] . ee_block ) ;
} else {
/* index */
if ( path [ depth ] . p_idx ! =
EXT_LAST_INDEX ( path [ depth ] . p_hdr ) )
return le32_to_cpu ( path [ depth ] . p_idx [ 1 ] . ei_block ) ;
}
depth - - ;
}
2011-06-06 08:05:17 +04:00
return EXT_MAX_BLOCKS ;
2006-10-11 12:21:03 +04:00
}
/*
2006-10-11 12:21:07 +04:00
* ext4_ext_next_leaf_block :
2011-06-06 08:05:17 +04:00
* returns first allocated block from next leaf or EXT_MAX_BLOCKS
2006-10-11 12:21:03 +04:00
*/
2011-07-24 05:49:07 +04:00
static ext4_lblk_t ext4_ext_next_leaf_block ( struct ext4_ext_path * path )
2006-10-11 12:21:03 +04:00
{
int depth ;
BUG_ON ( path = = NULL ) ;
depth = path - > p_depth ;
/* zero-tree has no leaf blocks at all */
if ( depth = = 0 )
2011-06-06 08:05:17 +04:00
return EXT_MAX_BLOCKS ;
2006-10-11 12:21:03 +04:00
/* go to index block */
depth - - ;
while ( depth > = 0 ) {
if ( path [ depth ] . p_idx ! =
EXT_LAST_INDEX ( path [ depth ] . p_hdr ) )
2008-01-29 07:58:27 +03:00
return ( ext4_lblk_t )
le32_to_cpu ( path [ depth ] . p_idx [ 1 ] . ei_block ) ;
2006-10-11 12:21:03 +04:00
depth - - ;
}
2011-06-06 08:05:17 +04:00
return EXT_MAX_BLOCKS ;
2006-10-11 12:21:03 +04:00
}
/*
2006-10-11 12:21:07 +04:00
* ext4_ext_correct_indexes :
* if leaf gets modified and modified extent is first in the leaf ,
* then we have to correct all indexes above .
2006-10-11 12:21:03 +04:00
* TODO : do we need to correct tree in all cases ?
*/
2008-01-29 07:58:27 +03:00
static int ext4_ext_correct_indexes ( handle_t * handle , struct inode * inode ,
2006-10-11 12:21:03 +04:00
struct ext4_ext_path * path )
{
struct ext4_extent_header * eh ;
int depth = ext_depth ( inode ) ;
struct ext4_extent * ex ;
__le32 border ;
int k , err = 0 ;
eh = path [ depth ] . p_hdr ;
ex = path [ depth ] . p_ext ;
2010-03-02 19:46:09 +03:00
if ( unlikely ( ex = = NULL | | eh = = NULL ) ) {
EXT4_ERROR_INODE ( inode ,
" ex %p == NULL or eh %p == NULL " , ex , eh ) ;
return - EIO ;
}
2006-10-11 12:21:03 +04:00
if ( depth = = 0 ) {
/* there is no tree at all */
return 0 ;
}
if ( ex ! = EXT_FIRST_EXTENT ( eh ) ) {
/* we correct tree if first leaf got modified only */
return 0 ;
}
/*
2006-10-11 12:21:07 +04:00
* TODO : we need correction if border is smaller than current one
2006-10-11 12:21:03 +04:00
*/
k = depth - 1 ;
border = path [ depth ] . p_ext - > ee_block ;
2006-12-07 07:41:33 +03:00
err = ext4_ext_get_access ( handle , inode , path + k ) ;
if ( err )
2006-10-11 12:21:03 +04:00
return err ;
path [ k ] . p_idx - > ei_block = border ;
2006-12-07 07:41:33 +03:00
err = ext4_ext_dirty ( handle , inode , path + k ) ;
if ( err )
2006-10-11 12:21:03 +04:00
return err ;
while ( k - - ) {
/* change all left-side indexes */
if ( path [ k + 1 ] . p_idx ! = EXT_FIRST_INDEX ( path [ k + 1 ] . p_hdr ) )
break ;
2006-12-07 07:41:33 +03:00
err = ext4_ext_get_access ( handle , inode , path + k ) ;
if ( err )
2006-10-11 12:21:03 +04:00
break ;
path [ k ] . p_idx - > ei_block = border ;
2006-12-07 07:41:33 +03:00
err = ext4_ext_dirty ( handle , inode , path + k ) ;
if ( err )
2006-10-11 12:21:03 +04:00
break ;
}
return err ;
}
2009-06-18 03:24:03 +04:00
int
2006-10-11 12:21:03 +04:00
ext4_can_extents_be_merged ( struct inode * inode , struct ext4_extent * ex1 ,
struct ext4_extent * ex2 )
{
2007-07-18 17:02:56 +04:00
unsigned short ext1_ee_len , ext2_ee_len , max_len ;
2007-07-18 05:42:41 +04:00
/*
* Make sure that either both extents are uninitialized , or
* both are _not_ .
*/
if ( ext4_ext_is_uninitialized ( ex1 ) ^ ext4_ext_is_uninitialized ( ex2 ) )
return 0 ;
2007-07-18 17:02:56 +04:00
if ( ext4_ext_is_uninitialized ( ex1 ) )
max_len = EXT_UNINIT_MAX_LEN ;
else
max_len = EXT_INIT_MAX_LEN ;
2007-07-18 05:42:41 +04:00
ext1_ee_len = ext4_ext_get_actual_len ( ex1 ) ;
ext2_ee_len = ext4_ext_get_actual_len ( ex2 ) ;
if ( le32_to_cpu ( ex1 - > ee_block ) + ext1_ee_len ! =
2006-10-11 12:21:24 +04:00
le32_to_cpu ( ex2 - > ee_block ) )
2006-10-11 12:21:03 +04:00
return 0 ;
2006-10-11 12:21:06 +04:00
/*
* To allow future support for preallocated extents to be added
* as an RO_COMPAT feature , refuse to merge to extents if
2006-10-11 12:21:07 +04:00
* this can result in the top bit of ee_len being set .
2006-10-11 12:21:06 +04:00
*/
2007-07-18 17:02:56 +04:00
if ( ext1_ee_len + ext2_ee_len > max_len )
2006-10-11 12:21:06 +04:00
return 0 ;
2007-02-17 21:20:16 +03:00
# ifdef AGGRESSIVE_TEST
2008-01-29 07:58:27 +03:00
if ( ext1_ee_len > = 4 )
2006-10-11 12:21:03 +04:00
return 0 ;
# endif
2010-10-28 05:30:14 +04:00
if ( ext4_ext_pblock ( ex1 ) + ext1_ee_len = = ext4_ext_pblock ( ex2 ) )
2006-10-11 12:21:03 +04:00
return 1 ;
return 0 ;
}
2007-07-18 05:42:38 +04:00
/*
* This function tries to merge the " ex " extent to the next extent in the tree .
* It always tries to merge towards right . If you want to merge towards
* left , pass " ex - 1 " as argument instead of " ex " .
* Returns 0 if the extents ( ex and ex + 1 ) were _not_ merged and returns
* 1 if they got merged .
*/
2011-05-03 19:45:29 +04:00
static int ext4_ext_try_to_merge_right ( struct inode * inode ,
2010-10-28 05:30:14 +04:00
struct ext4_ext_path * path ,
struct ext4_extent * ex )
2007-07-18 05:42:38 +04:00
{
struct ext4_extent_header * eh ;
unsigned int depth , len ;
int merge_done = 0 ;
int uninitialized = 0 ;
depth = ext_depth ( inode ) ;
BUG_ON ( path [ depth ] . p_hdr = = NULL ) ;
eh = path [ depth ] . p_hdr ;
while ( ex < EXT_LAST_EXTENT ( eh ) ) {
if ( ! ext4_can_extents_be_merged ( inode , ex , ex + 1 ) )
break ;
/* merge with next extent! */
if ( ext4_ext_is_uninitialized ( ex ) )
uninitialized = 1 ;
ex - > ee_len = cpu_to_le16 ( ext4_ext_get_actual_len ( ex )
+ ext4_ext_get_actual_len ( ex + 1 ) ) ;
if ( uninitialized )
ext4_ext_mark_uninitialized ( ex ) ;
if ( ex + 1 < EXT_LAST_EXTENT ( eh ) ) {
len = ( EXT_LAST_EXTENT ( eh ) - ex - 1 )
* sizeof ( struct ext4_extent ) ;
memmove ( ex + 1 , ex + 2 , len ) ;
}
2008-04-17 18:38:59 +04:00
le16_add_cpu ( & eh - > eh_entries , - 1 ) ;
2007-07-18 05:42:38 +04:00
merge_done = 1 ;
WARN_ON ( eh - > eh_entries = = 0 ) ;
if ( ! eh - > eh_entries )
2010-05-17 05:00:00 +04:00
EXT4_ERROR_INODE ( inode , " eh->eh_entries = 0! " ) ;
2007-07-18 05:42:38 +04:00
}
return merge_done ;
}
2011-05-03 19:45:29 +04:00
/*
* This function tries to merge the @ ex extent to neighbours in the tree .
* return 1 if merge left else 0.
*/
static int ext4_ext_try_to_merge ( struct inode * inode ,
struct ext4_ext_path * path ,
struct ext4_extent * ex ) {
struct ext4_extent_header * eh ;
unsigned int depth ;
int merge_done = 0 ;
int ret = 0 ;
depth = ext_depth ( inode ) ;
BUG_ON ( path [ depth ] . p_hdr = = NULL ) ;
eh = path [ depth ] . p_hdr ;
if ( ex > EXT_FIRST_EXTENT ( eh ) )
merge_done = ext4_ext_try_to_merge_right ( inode , path , ex - 1 ) ;
if ( ! merge_done )
ret = ext4_ext_try_to_merge_right ( inode , path , ex ) ;
return ret ;
}
2007-05-24 21:04:13 +04:00
/*
* check if a portion of the " newext " extent overlaps with an
* existing extent .
*
* If there is an overlap discovered , it updates the length of the newext
* such that there will be no overlap , and then returns 1.
* If there is no overlap found , it returns 0.
*/
2011-09-10 02:52:51 +04:00
static unsigned int ext4_ext_check_overlap ( struct ext4_sb_info * sbi ,
struct inode * inode ,
2010-10-28 05:30:14 +04:00
struct ext4_extent * newext ,
struct ext4_ext_path * path )
2007-05-24 21:04:13 +04:00
{
2008-01-29 07:58:27 +03:00
ext4_lblk_t b1 , b2 ;
2007-05-24 21:04:13 +04:00
unsigned int depth , len1 ;
unsigned int ret = 0 ;
b1 = le32_to_cpu ( newext - > ee_block ) ;
2007-07-18 05:42:41 +04:00
len1 = ext4_ext_get_actual_len ( newext ) ;
2007-05-24 21:04:13 +04:00
depth = ext_depth ( inode ) ;
if ( ! path [ depth ] . p_ext )
goto out ;
b2 = le32_to_cpu ( path [ depth ] . p_ext - > ee_block ) ;
2011-09-10 02:52:51 +04:00
b2 & = ~ ( sbi - > s_cluster_ratio - 1 ) ;
2007-05-24 21:04:13 +04:00
/*
* get the next allocated block if the extent in the path
2008-07-27 00:15:44 +04:00
* is before the requested block ( s )
2007-05-24 21:04:13 +04:00
*/
if ( b2 < b1 ) {
b2 = ext4_ext_next_allocated_block ( path ) ;
2011-06-06 08:05:17 +04:00
if ( b2 = = EXT_MAX_BLOCKS )
2007-05-24 21:04:13 +04:00
goto out ;
2011-09-10 02:52:51 +04:00
b2 & = ~ ( sbi - > s_cluster_ratio - 1 ) ;
2007-05-24 21:04:13 +04:00
}
2008-01-29 07:58:27 +03:00
/* check for wrap through zero on extent logical start block*/
2007-05-24 21:04:13 +04:00
if ( b1 + len1 < b1 ) {
2011-06-06 08:05:17 +04:00
len1 = EXT_MAX_BLOCKS - b1 ;
2007-05-24 21:04:13 +04:00
newext - > ee_len = cpu_to_le16 ( len1 ) ;
ret = 1 ;
}
/* check for overlap */
if ( b1 + len1 > b2 ) {
newext - > ee_len = cpu_to_le16 ( b2 - b1 ) ;
ret = 1 ;
}
out :
return ret ;
}
2006-10-11 12:21:03 +04:00
/*
2006-10-11 12:21:07 +04:00
* ext4_ext_insert_extent :
* tries to merge requsted extent into the existing extent or
* inserts requested extent as new one into the tree ,
* creating new leaf in the no - space case .
2006-10-11 12:21:03 +04:00
*/
int ext4_ext_insert_extent ( handle_t * handle , struct inode * inode ,
struct ext4_ext_path * path ,
2009-09-28 23:49:08 +04:00
struct ext4_extent * newext , int flag )
2006-10-11 12:21:03 +04:00
{
2008-09-09 06:25:24 +04:00
struct ext4_extent_header * eh ;
2006-10-11 12:21:03 +04:00
struct ext4_extent * ex , * fex ;
struct ext4_extent * nearex ; /* nearest extent */
struct ext4_ext_path * npath = NULL ;
2008-01-29 07:58:27 +03:00
int depth , len , err ;
ext4_lblk_t next ;
2007-07-18 05:42:41 +04:00
unsigned uninitialized = 0 ;
2011-05-25 15:41:26 +04:00
int flags = 0 ;
2006-10-11 12:21:03 +04:00
2010-03-02 19:46:09 +03:00
if ( unlikely ( ext4_ext_get_actual_len ( newext ) = = 0 ) ) {
EXT4_ERROR_INODE ( inode , " ext4_ext_get_actual_len(newext) == 0 " ) ;
return - EIO ;
}
2006-10-11 12:21:03 +04:00
depth = ext_depth ( inode ) ;
ex = path [ depth ] . p_ext ;
2010-03-02 19:46:09 +03:00
if ( unlikely ( path [ depth ] . p_hdr = = NULL ) ) {
EXT4_ERROR_INODE ( inode , " path[%d].p_hdr == NULL " , depth ) ;
return - EIO ;
}
2006-10-11 12:21:03 +04:00
/* try to insert block into found extent and return */
2010-03-05 00:14:02 +03:00
if ( ex & & ! ( flag & EXT4_GET_BLOCKS_PRE_IO )
2009-09-28 23:49:08 +04:00
& & ext4_can_extents_be_merged ( inode , ex , newext ) ) {
2009-09-18 21:34:55 +04:00
ext_debug ( " append [%d]%d block to %d:[%d]%d (from %llu) \n " ,
2010-10-28 05:30:14 +04:00
ext4_ext_is_uninitialized ( newext ) ,
ext4_ext_get_actual_len ( newext ) ,
le32_to_cpu ( ex - > ee_block ) ,
ext4_ext_is_uninitialized ( ex ) ,
ext4_ext_get_actual_len ( ex ) ,
ext4_ext_pblock ( ex ) ) ;
2006-12-07 07:41:33 +03:00
err = ext4_ext_get_access ( handle , inode , path + depth ) ;
if ( err )
2006-10-11 12:21:03 +04:00
return err ;
2007-07-18 05:42:41 +04:00
/*
* ext4_can_extents_be_merged should have checked that either
* both extents are uninitialized , or both aren ' t . Thus we
* need to check only one of them here .
*/
if ( ext4_ext_is_uninitialized ( ex ) )
uninitialized = 1 ;
ex - > ee_len = cpu_to_le16 ( ext4_ext_get_actual_len ( ex )
+ ext4_ext_get_actual_len ( newext ) ) ;
if ( uninitialized )
ext4_ext_mark_uninitialized ( ex ) ;
2006-10-11 12:21:03 +04:00
eh = path [ depth ] . p_hdr ;
nearex = ex ;
goto merge ;
}
depth = ext_depth ( inode ) ;
eh = path [ depth ] . p_hdr ;
if ( le16_to_cpu ( eh - > eh_entries ) < le16_to_cpu ( eh - > eh_max ) )
goto has_space ;
/* probably next leaf has space for us? */
fex = EXT_LAST_EXTENT ( eh ) ;
2011-07-12 02:24:01 +04:00
next = EXT_MAX_BLOCKS ;
if ( le32_to_cpu ( newext - > ee_block ) > le32_to_cpu ( fex - > ee_block ) )
2011-07-24 05:49:07 +04:00
next = ext4_ext_next_leaf_block ( path ) ;
2011-07-12 02:24:01 +04:00
if ( next ! = EXT_MAX_BLOCKS ) {
2006-10-11 12:21:03 +04:00
ext_debug ( " next leaf block - %d \n " , next ) ;
BUG_ON ( npath ! = NULL ) ;
npath = ext4_ext_find_extent ( inode , next , NULL ) ;
if ( IS_ERR ( npath ) )
return PTR_ERR ( npath ) ;
BUG_ON ( npath - > p_depth ! = path - > p_depth ) ;
eh = npath [ depth ] . p_hdr ;
if ( le16_to_cpu ( eh - > eh_entries ) < le16_to_cpu ( eh - > eh_max ) ) {
2011-03-31 05:57:33 +04:00
ext_debug ( " next leaf isn't full(%d) \n " ,
2006-10-11 12:21:03 +04:00
le16_to_cpu ( eh - > eh_entries ) ) ;
path = npath ;
2011-07-11 19:43:59 +04:00
goto has_space ;
2006-10-11 12:21:03 +04:00
}
ext_debug ( " next leaf has no free space(%d,%d) \n " ,
le16_to_cpu ( eh - > eh_entries ) , le16_to_cpu ( eh - > eh_max ) ) ;
}
/*
2006-10-11 12:21:07 +04:00
* There is no free space in the found leaf .
* We ' re gonna add a new leaf in the tree .
2006-10-11 12:21:03 +04:00
*/
2011-05-25 15:41:26 +04:00
if ( flag & EXT4_GET_BLOCKS_PUNCH_OUT_EXT )
flags = EXT4_MB_USE_ROOT_BLOCKS ;
err = ext4_ext_create_new_leaf ( handle , inode , flags , path , newext ) ;
2006-10-11 12:21:03 +04:00
if ( err )
goto cleanup ;
depth = ext_depth ( inode ) ;
eh = path [ depth ] . p_hdr ;
has_space :
nearex = path [ depth ] . p_ext ;
2006-12-07 07:41:33 +03:00
err = ext4_ext_get_access ( handle , inode , path + depth ) ;
if ( err )
2006-10-11 12:21:03 +04:00
goto cleanup ;
if ( ! nearex ) {
/* there is no extent in this leaf, create first one */
2009-09-18 21:34:55 +04:00
ext_debug ( " first extent in the leaf: %d:%llu:[%d]%d \n " ,
2007-05-24 21:04:54 +04:00
le32_to_cpu ( newext - > ee_block ) ,
2010-10-28 05:30:14 +04:00
ext4_ext_pblock ( newext ) ,
2009-09-18 21:34:55 +04:00
ext4_ext_is_uninitialized ( newext ) ,
2007-07-18 05:42:41 +04:00
ext4_ext_get_actual_len ( newext ) ) ;
2006-10-11 12:21:03 +04:00
path [ depth ] . p_ext = EXT_FIRST_EXTENT ( eh ) ;
} else if ( le32_to_cpu ( newext - > ee_block )
2007-05-24 21:04:54 +04:00
> le32_to_cpu ( nearex - > ee_block ) ) {
2006-10-11 12:21:03 +04:00
/* BUG_ON(newext->ee_block == nearex->ee_block); */
if ( nearex ! = EXT_LAST_EXTENT ( eh ) ) {
len = EXT_MAX_EXTENT ( eh ) - nearex ;
len = ( len - 1 ) * sizeof ( struct ext4_extent ) ;
len = len < 0 ? 0 : len ;
2009-09-18 21:34:55 +04:00
ext_debug ( " insert %d:%llu:[%d]%d after: nearest 0x%p, "
2006-10-11 12:21:03 +04:00
" move %d from 0x%p to 0x%p \n " ,
2007-05-24 21:04:54 +04:00
le32_to_cpu ( newext - > ee_block ) ,
2010-10-28 05:30:14 +04:00
ext4_ext_pblock ( newext ) ,
2009-09-18 21:34:55 +04:00
ext4_ext_is_uninitialized ( newext ) ,
2007-07-18 05:42:41 +04:00
ext4_ext_get_actual_len ( newext ) ,
2006-10-11 12:21:03 +04:00
nearex , len , nearex + 1 , nearex + 2 ) ;
memmove ( nearex + 2 , nearex + 1 , len ) ;
}
path [ depth ] . p_ext = nearex + 1 ;
} else {
BUG_ON ( newext - > ee_block = = nearex - > ee_block ) ;
len = ( EXT_MAX_EXTENT ( eh ) - nearex ) * sizeof ( struct ext4_extent ) ;
len = len < 0 ? 0 : len ;
2009-09-18 21:34:55 +04:00
ext_debug ( " insert %d:%llu:[%d]%d before: nearest 0x%p, "
2006-10-11 12:21:03 +04:00
" move %d from 0x%p to 0x%p \n " ,
le32_to_cpu ( newext - > ee_block ) ,
2010-10-28 05:30:14 +04:00
ext4_ext_pblock ( newext ) ,
2009-09-18 21:34:55 +04:00
ext4_ext_is_uninitialized ( newext ) ,
2007-07-18 05:42:41 +04:00
ext4_ext_get_actual_len ( newext ) ,
2011-07-24 05:51:07 +04:00
nearex , len , nearex , nearex + 1 ) ;
2006-10-11 12:21:03 +04:00
memmove ( nearex + 1 , nearex , len ) ;
path [ depth ] . p_ext = nearex ;
}
2008-04-17 18:38:59 +04:00
le16_add_cpu ( & eh - > eh_entries , 1 ) ;
2006-10-11 12:21:03 +04:00
nearex = path [ depth ] . p_ext ;
nearex - > ee_block = newext - > ee_block ;
2010-10-28 05:30:14 +04:00
ext4_ext_store_pblock ( nearex , ext4_ext_pblock ( newext ) ) ;
2006-10-11 12:21:03 +04:00
nearex - > ee_len = newext - > ee_len ;
merge :
/* try to merge extents to the right */
2010-03-05 00:14:02 +03:00
if ( ! ( flag & EXT4_GET_BLOCKS_PRE_IO ) )
2009-09-28 23:49:08 +04:00
ext4_ext_try_to_merge ( inode , path , nearex ) ;
2006-10-11 12:21:03 +04:00
/* try to merge extents to the left */
/* time to correct all indexes above */
err = ext4_ext_correct_indexes ( handle , inode , path ) ;
if ( err )
goto cleanup ;
err = ext4_ext_dirty ( handle , inode , path + depth ) ;
cleanup :
if ( npath ) {
ext4_ext_drop_refs ( npath ) ;
kfree ( npath ) ;
}
ext4_ext_invalidate_cache ( inode ) ;
return err ;
}
2010-10-28 05:30:14 +04:00
static int ext4_ext_walk_space ( struct inode * inode , ext4_lblk_t block ,
ext4_lblk_t num , ext_prepare_callback func ,
void * cbdata )
2008-10-07 08:46:36 +04:00
{
struct ext4_ext_path * path = NULL ;
struct ext4_ext_cache cbex ;
struct ext4_extent * ex ;
ext4_lblk_t next , start = 0 , end = 0 ;
ext4_lblk_t last = block + num ;
int depth , exists , err = 0 ;
BUG_ON ( func = = NULL ) ;
BUG_ON ( inode = = NULL ) ;
2011-06-06 08:05:17 +04:00
while ( block < last & & block ! = EXT_MAX_BLOCKS ) {
2008-10-07 08:46:36 +04:00
num = last - block ;
/* find extent for this block */
2009-12-10 05:30:02 +03:00
down_read ( & EXT4_I ( inode ) - > i_data_sem ) ;
2008-10-07 08:46:36 +04:00
path = ext4_ext_find_extent ( inode , block , path ) ;
2009-12-10 05:30:02 +03:00
up_read ( & EXT4_I ( inode ) - > i_data_sem ) ;
2008-10-07 08:46:36 +04:00
if ( IS_ERR ( path ) ) {
err = PTR_ERR ( path ) ;
path = NULL ;
break ;
}
depth = ext_depth ( inode ) ;
2010-03-02 19:46:09 +03:00
if ( unlikely ( path [ depth ] . p_hdr = = NULL ) ) {
EXT4_ERROR_INODE ( inode , " path[%d].p_hdr == NULL " , depth ) ;
err = - EIO ;
break ;
}
2008-10-07 08:46:36 +04:00
ex = path [ depth ] . p_ext ;
next = ext4_ext_next_allocated_block ( path ) ;
exists = 0 ;
if ( ! ex ) {
/* there is no extent yet, so try to allocate
* all requested space */
start = block ;
end = block + num ;
} else if ( le32_to_cpu ( ex - > ee_block ) > block ) {
/* need to allocate space before found extent */
start = block ;
end = le32_to_cpu ( ex - > ee_block ) ;
if ( block + num < end )
end = block + num ;
} else if ( block > = le32_to_cpu ( ex - > ee_block )
+ ext4_ext_get_actual_len ( ex ) ) {
/* need to allocate space after found extent */
start = block ;
end = block + num ;
if ( end > = next )
end = next ;
} else if ( block > = le32_to_cpu ( ex - > ee_block ) ) {
/*
* some part of requested space is covered
* by found extent
*/
start = block ;
end = le32_to_cpu ( ex - > ee_block )
+ ext4_ext_get_actual_len ( ex ) ;
if ( block + num < end )
end = block + num ;
exists = 1 ;
} else {
BUG ( ) ;
}
BUG_ON ( end < = start ) ;
if ( ! exists ) {
cbex . ec_block = start ;
cbex . ec_len = end - start ;
cbex . ec_start = 0 ;
} else {
cbex . ec_block = le32_to_cpu ( ex - > ee_block ) ;
cbex . ec_len = ext4_ext_get_actual_len ( ex ) ;
2010-10-28 05:30:14 +04:00
cbex . ec_start = ext4_ext_pblock ( ex ) ;
2008-10-07 08:46:36 +04:00
}
2010-03-02 19:46:09 +03:00
if ( unlikely ( cbex . ec_len = = 0 ) ) {
EXT4_ERROR_INODE ( inode , " cbex.ec_len == 0 " ) ;
err = - EIO ;
break ;
}
2011-06-06 08:06:52 +04:00
err = func ( inode , next , & cbex , ex , cbdata ) ;
2008-10-07 08:46:36 +04:00
ext4_ext_drop_refs ( path ) ;
if ( err < 0 )
break ;
if ( err = = EXT_REPEAT )
continue ;
else if ( err = = EXT_BREAK ) {
err = 0 ;
break ;
}
if ( ext_depth ( inode ) ! = depth ) {
/* depth was changed. we have to realloc path */
kfree ( path ) ;
path = NULL ;
}
block = cbex . ec_block + cbex . ec_len ;
}
if ( path ) {
ext4_ext_drop_refs ( path ) ;
kfree ( path ) ;
}
return err ;
}
2006-12-07 07:41:36 +03:00
static void
2008-01-29 07:58:27 +03:00
ext4_ext_put_in_cache ( struct inode * inode , ext4_lblk_t block ,
2011-01-10 20:13:26 +03:00
__u32 len , ext4_fsblk_t start )
2006-10-11 12:21:03 +04:00
{
struct ext4_ext_cache * cex ;
BUG_ON ( len = = 0 ) ;
2009-05-15 17:07:28 +04:00
spin_lock ( & EXT4_I ( inode ) - > i_block_reservation_lock ) ;
2006-10-11 12:21:03 +04:00
cex = & EXT4_I ( inode ) - > i_cached_extent ;
cex - > ec_block = block ;
cex - > ec_len = len ;
cex - > ec_start = start ;
2009-05-15 17:07:28 +04:00
spin_unlock ( & EXT4_I ( inode ) - > i_block_reservation_lock ) ;
2006-10-11 12:21:03 +04:00
}
/*
2006-10-11 12:21:07 +04:00
* ext4_ext_put_gap_in_cache :
* calculate boundaries of the gap that the requested block fits into
2006-10-11 12:21:03 +04:00
* and cache this gap
*/
2006-12-07 07:41:36 +03:00
static void
2006-10-11 12:21:03 +04:00
ext4_ext_put_gap_in_cache ( struct inode * inode , struct ext4_ext_path * path ,
2008-01-29 07:58:27 +03:00
ext4_lblk_t block )
2006-10-11 12:21:03 +04:00
{
int depth = ext_depth ( inode ) ;
2008-01-29 07:58:27 +03:00
unsigned long len ;
ext4_lblk_t lblock ;
2006-10-11 12:21:03 +04:00
struct ext4_extent * ex ;
ex = path [ depth ] . p_ext ;
if ( ex = = NULL ) {
/* there is no extent yet, so gap is [0;-] */
lblock = 0 ;
2011-06-06 08:05:17 +04:00
len = EXT_MAX_BLOCKS ;
2006-10-11 12:21:03 +04:00
ext_debug ( " cache gap(whole file): " ) ;
} else if ( block < le32_to_cpu ( ex - > ee_block ) ) {
lblock = block ;
len = le32_to_cpu ( ex - > ee_block ) - block ;
2008-01-29 07:58:27 +03:00
ext_debug ( " cache gap(before): %u [%u:%u] " ,
block ,
le32_to_cpu ( ex - > ee_block ) ,
ext4_ext_get_actual_len ( ex ) ) ;
2006-10-11 12:21:03 +04:00
} else if ( block > = le32_to_cpu ( ex - > ee_block )
2007-07-18 05:42:41 +04:00
+ ext4_ext_get_actual_len ( ex ) ) {
2008-01-29 07:58:27 +03:00
ext4_lblk_t next ;
2007-05-24 21:04:54 +04:00
lblock = le32_to_cpu ( ex - > ee_block )
2007-07-18 05:42:41 +04:00
+ ext4_ext_get_actual_len ( ex ) ;
2008-01-29 07:58:27 +03:00
next = ext4_ext_next_allocated_block ( path ) ;
2008-01-29 07:58:27 +03:00
ext_debug ( " cache gap(after): [%u:%u] %u " ,
le32_to_cpu ( ex - > ee_block ) ,
ext4_ext_get_actual_len ( ex ) ,
block ) ;
2008-01-29 07:58:27 +03:00
BUG_ON ( next = = lblock ) ;
len = next - lblock ;
2006-10-11 12:21:03 +04:00
} else {
lblock = len = 0 ;
BUG ( ) ;
}
2008-01-29 07:58:27 +03:00
ext_debug ( " -> %u:%lu \n " , lblock , len ) ;
2011-01-10 20:13:26 +03:00
ext4_ext_put_in_cache ( inode , lblock , len , 0 ) ;
2006-10-11 12:21:03 +04:00
}
2011-01-10 20:13:26 +03:00
/*
2011-07-24 05:53:25 +04:00
* ext4_ext_check_cache ( )
2011-05-25 15:41:50 +04:00
* Checks to see if the given block is in the cache .
* If it is , the cached extent is stored in the given
* cache extent pointer . If the cached extent is a hole ,
* this routine should be used instead of
* ext4_ext_in_cache if the calling function needs to
* know the size of the hole .
*
* @ inode : The files inode
* @ block : The block to look for in the cache
* @ ex : Pointer where the cached extent will be stored
* if it contains block
*
2011-01-10 20:13:26 +03:00
* Return 0 if cache is invalid ; 1 if the cache is valid
*/
2011-05-25 15:41:50 +04:00
static int ext4_ext_check_cache ( struct inode * inode , ext4_lblk_t block ,
struct ext4_ext_cache * ex ) {
2006-10-11 12:21:03 +04:00
struct ext4_ext_cache * cex ;
2011-05-23 05:24:16 +04:00
struct ext4_sb_info * sbi ;
2011-01-10 20:13:26 +03:00
int ret = 0 ;
2006-10-11 12:21:03 +04:00
2010-05-17 15:00:00 +04:00
/*
2009-05-15 17:07:28 +04:00
* We borrow i_block_reservation_lock to protect i_cached_extent
*/
spin_lock ( & EXT4_I ( inode ) - > i_block_reservation_lock ) ;
2006-10-11 12:21:03 +04:00
cex = & EXT4_I ( inode ) - > i_cached_extent ;
2011-05-23 05:24:16 +04:00
sbi = EXT4_SB ( inode - > i_sb ) ;
2006-10-11 12:21:03 +04:00
/* has cache valid data? */
2011-01-10 20:13:26 +03:00
if ( cex - > ec_len = = 0 )
2009-05-15 17:07:28 +04:00
goto errout ;
2006-10-11 12:21:03 +04:00
2010-03-04 07:55:01 +03:00
if ( in_range ( block , cex - > ec_block , cex - > ec_len ) ) {
2011-05-25 15:41:50 +04:00
memcpy ( ex , cex , sizeof ( struct ext4_ext_cache ) ) ;
2008-01-29 07:58:27 +03:00
ext_debug ( " %u cached by %u:%u:%llu \n " ,
block ,
cex - > ec_block , cex - > ec_len , cex - > ec_start ) ;
2011-01-10 20:13:26 +03:00
ret = 1 ;
2006-10-11 12:21:03 +04:00
}
2009-05-15 17:07:28 +04:00
errout :
2011-05-23 05:24:16 +04:00
if ( ! ret )
sbi - > extent_cache_misses + + ;
else
sbi - > extent_cache_hits + + ;
2009-05-15 17:07:28 +04:00
spin_unlock ( & EXT4_I ( inode ) - > i_block_reservation_lock ) ;
return ret ;
2006-10-11 12:21:03 +04:00
}
2011-05-25 15:41:50 +04:00
/*
* ext4_ext_in_cache ( )
* Checks to see if the given block is in the cache .
* If it is , the cached extent is stored in the given
* extent pointer .
*
* @ inode : The files inode
* @ block : The block to look for in the cache
* @ ex : Pointer where the cached extent will be stored
* if it contains block
*
* Return 0 if cache is invalid ; 1 if the cache is valid
*/
static int
ext4_ext_in_cache ( struct inode * inode , ext4_lblk_t block ,
struct ext4_extent * ex )
{
struct ext4_ext_cache cex ;
int ret = 0 ;
if ( ext4_ext_check_cache ( inode , block , & cex ) ) {
ex - > ee_block = cpu_to_le32 ( cex . ec_block ) ;
ext4_ext_store_pblock ( ex , cex . ec_start ) ;
ex - > ee_len = cpu_to_le16 ( cex . ec_len ) ;
ret = 1 ;
}
return ret ;
}
2006-10-11 12:21:03 +04:00
/*
2006-10-11 12:21:07 +04:00
* ext4_ext_rm_idx :
* removes index from the index block .
2006-10-11 12:21:03 +04:00
*/
2008-01-29 07:58:27 +03:00
static int ext4_ext_rm_idx ( handle_t * handle , struct inode * inode ,
2006-10-11 12:21:03 +04:00
struct ext4_ext_path * path )
{
int err ;
2006-10-11 12:21:05 +04:00
ext4_fsblk_t leaf ;
2006-10-11 12:21:03 +04:00
/* free index block */
path - - ;
2010-10-28 05:30:14 +04:00
leaf = ext4_idx_pblock ( path - > p_idx ) ;
2010-03-02 19:46:09 +03:00
if ( unlikely ( path - > p_hdr - > eh_entries = = 0 ) ) {
EXT4_ERROR_INODE ( inode , " path->p_hdr->eh_entries == 0 " ) ;
return - EIO ;
}
2006-12-07 07:41:33 +03:00
err = ext4_ext_get_access ( handle , inode , path ) ;
if ( err )
2006-10-11 12:21:03 +04:00
return err ;
2011-07-28 05:29:33 +04:00
if ( path - > p_idx ! = EXT_LAST_INDEX ( path - > p_hdr ) ) {
int len = EXT_LAST_INDEX ( path - > p_hdr ) - path - > p_idx ;
len * = sizeof ( struct ext4_extent_idx ) ;
memmove ( path - > p_idx , path - > p_idx + 1 , len ) ;
}
2008-04-17 18:38:59 +04:00
le16_add_cpu ( & path - > p_hdr - > eh_entries , - 1 ) ;
2006-12-07 07:41:33 +03:00
err = ext4_ext_dirty ( handle , inode , path ) ;
if ( err )
2006-10-11 12:21:03 +04:00
return err ;
2006-10-11 12:21:11 +04:00
ext_debug ( " index is empty, remove it, free block %llu \n " , leaf ) ;
2011-02-22 05:01:42 +03:00
ext4_free_blocks ( handle , inode , NULL , leaf , 1 ,
2009-11-23 15:17:05 +03:00
EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET ) ;
2006-10-11 12:21:03 +04:00
return err ;
}
/*
2008-08-20 06:16:05 +04:00
* ext4_ext_calc_credits_for_single_extent :
* This routine returns max . credits that needed to insert an extent
* to the extent tree .
* When pass the actual path , the caller should calculate credits
* under i_data_sem .
2006-10-11 12:21:03 +04:00
*/
2008-08-20 06:15:58 +04:00
int ext4_ext_calc_credits_for_single_extent ( struct inode * inode , int nrblocks ,
2006-10-11 12:21:03 +04:00
struct ext4_ext_path * path )
{
if ( path ) {
2008-08-20 06:16:05 +04:00
int depth = ext_depth ( inode ) ;
2008-08-20 06:16:03 +04:00
int ret = 0 ;
2008-08-20 06:16:05 +04:00
2006-10-11 12:21:03 +04:00
/* probably there is space in leaf? */
if ( le16_to_cpu ( path [ depth ] . p_hdr - > eh_entries )
2008-08-20 06:16:05 +04:00
< le16_to_cpu ( path [ depth ] . p_hdr - > eh_max ) ) {
2006-10-11 12:21:03 +04:00
2008-08-20 06:16:05 +04:00
/*
* There are some space in the leaf tree , no
* need to account for leaf block credit
*
* bitmaps and block group descriptor blocks
* and other metadat blocks still need to be
* accounted .
*/
2008-08-20 06:15:58 +04:00
/* 1 bitmap, 1 block group descriptor */
2008-08-20 06:16:05 +04:00
ret = 2 + EXT4_META_TRANS_BLOCKS ( inode - > i_sb ) ;
2009-07-06 07:12:04 +04:00
return ret ;
2008-08-20 06:16:05 +04:00
}
}
2006-10-11 12:21:03 +04:00
2008-08-20 06:15:58 +04:00
return ext4_chunk_trans_blocks ( inode , nrblocks ) ;
2008-08-20 06:16:05 +04:00
}
2006-10-11 12:21:03 +04:00
2008-08-20 06:16:05 +04:00
/*
* How many index / leaf blocks need to change / allocate to modify nrblocks ?
*
* if nrblocks are fit in a single extent ( chunk flag is 1 ) , then
* in the worse case , each tree level index / leaf need to be changed
* if the tree split due to insert a new extent , then the old tree
* index / leaf need to be updated too
*
* If the nrblocks are discontiguous , they could cause
* the whole tree split more than once , but this is really rare .
*/
2008-08-20 06:15:58 +04:00
int ext4_ext_index_trans_blocks ( struct inode * inode , int nrblocks , int chunk )
2008-08-20 06:16:05 +04:00
{
int index ;
int depth = ext_depth ( inode ) ;
2006-10-11 12:21:03 +04:00
2008-08-20 06:16:05 +04:00
if ( chunk )
index = depth * 2 ;
else
index = depth * 3 ;
2006-10-11 12:21:03 +04:00
2008-08-20 06:16:05 +04:00
return index ;
2006-10-11 12:21:03 +04:00
}
static int ext4_remove_blocks ( handle_t * handle , struct inode * inode ,
2011-09-10 02:54:51 +04:00
struct ext4_extent * ex ,
ext4_fsblk_t * partial_cluster ,
ext4_lblk_t from , ext4_lblk_t to )
2006-10-11 12:21:03 +04:00
{
2011-09-10 02:54:51 +04:00
struct ext4_sb_info * sbi = EXT4_SB ( inode - > i_sb ) ;
2007-07-18 05:42:41 +04:00
unsigned short ee_len = ext4_ext_get_actual_len ( ex ) ;
2011-09-10 02:54:51 +04:00
ext4_fsblk_t pblk ;
2009-11-23 15:17:05 +03:00
int flags = EXT4_FREE_BLOCKS_FORGET ;
2006-10-11 12:21:03 +04:00
2008-01-29 08:19:52 +03:00
if ( S_ISDIR ( inode - > i_mode ) | | S_ISLNK ( inode - > i_mode ) )
2009-11-23 15:17:05 +03:00
flags | = EXT4_FREE_BLOCKS_METADATA ;
2011-09-10 02:54:51 +04:00
/*
* For bigalloc file systems , we never free a partial cluster
* at the beginning of the extent . Instead , we make a note
* that we tried freeing the cluster , and check to see if we
* need to free it on a subsequent call to ext4_remove_blocks ,
* or at the end of the ext4_truncate ( ) operation .
*/
flags | = EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER ;
/*
* If we have a partial cluster , and it ' s different from the
* cluster of the last block , we need to explicitly free the
* partial cluster here .
*/
pblk = ext4_ext_pblock ( ex ) + ee_len - 1 ;
if ( * partial_cluster & & ( EXT4_B2C ( sbi , pblk ) ! = * partial_cluster ) ) {
ext4_free_blocks ( handle , inode , NULL ,
EXT4_C2B ( sbi , * partial_cluster ) ,
sbi - > s_cluster_ratio , flags ) ;
* partial_cluster = 0 ;
}
2006-10-11 12:21:03 +04:00
# ifdef EXTENTS_STATS
{
struct ext4_sb_info * sbi = EXT4_SB ( inode - > i_sb ) ;
spin_lock ( & sbi - > s_ext_stats_lock ) ;
sbi - > s_ext_blocks + = ee_len ;
sbi - > s_ext_extents + + ;
if ( ee_len < sbi - > s_ext_min )
sbi - > s_ext_min = ee_len ;
if ( ee_len > sbi - > s_ext_max )
sbi - > s_ext_max = ee_len ;
if ( ext_depth ( inode ) > sbi - > s_depth_max )
sbi - > s_depth_max = ext_depth ( inode ) ;
spin_unlock ( & sbi - > s_ext_stats_lock ) ;
}
# endif
if ( from > = le32_to_cpu ( ex - > ee_block )
2007-07-18 05:42:41 +04:00
& & to = = le32_to_cpu ( ex - > ee_block ) + ee_len - 1 ) {
2006-10-11 12:21:03 +04:00
/* tail removal */
2008-01-29 07:58:27 +03:00
ext4_lblk_t num ;
2007-07-18 05:42:41 +04:00
num = le32_to_cpu ( ex - > ee_block ) + ee_len - from ;
2011-09-10 02:54:51 +04:00
pblk = ext4_ext_pblock ( ex ) + ee_len - num ;
ext_debug ( " free last %u blocks starting %llu \n " , num , pblk ) ;
ext4_free_blocks ( handle , inode , NULL , pblk , num , flags ) ;
/*
* If the block range to be freed didn ' t start at the
* beginning of a cluster , and we removed the entire
* extent , save the partial cluster here , since we
* might need to delete if we determine that the
* truncate operation has removed all of the blocks in
* the cluster .
*/
if ( pblk & ( sbi - > s_cluster_ratio - 1 ) & &
( ee_len = = num ) )
* partial_cluster = EXT4_B2C ( sbi , pblk ) ;
else
* partial_cluster = 0 ;
2006-10-11 12:21:03 +04:00
} else if ( from = = le32_to_cpu ( ex - > ee_block )
2007-07-18 05:42:41 +04:00
& & to < = le32_to_cpu ( ex - > ee_block ) + ee_len - 1 ) {
2011-05-25 15:41:43 +04:00
/* head removal */
ext4_lblk_t num ;
ext4_fsblk_t start ;
num = to - from ;
start = ext4_ext_pblock ( ex ) ;
ext_debug ( " free first %u blocks starting %llu \n " , num , start ) ;
ext4_free_blocks ( handle , inode , 0 , start , num , flags ) ;
2006-10-11 12:21:03 +04:00
} else {
2008-01-29 07:58:27 +03:00
printk ( KERN_INFO " strange request: removal(2) "
" %u-%u from %u:%u \n " ,
from , to , le32_to_cpu ( ex - > ee_block ) , ee_len ) ;
2006-10-11 12:21:03 +04:00
}
return 0 ;
}
2011-05-25 15:41:43 +04:00
/*
* ext4_ext_rm_leaf ( ) Removes the extents associated with the
* blocks appearing between " start " and " end " , and splits the extents
* if " start " and " end " appear in the same extent
*
* @ handle : The journal handle
* @ inode : The files inode
* @ path : The path to the leaf
* @ start : The first block to remove
* @ end : The last block to remove
*/
2006-10-11 12:21:03 +04:00
static int
ext4_ext_rm_leaf ( handle_t * handle , struct inode * inode ,
2011-09-10 02:54:51 +04:00
struct ext4_ext_path * path , ext4_fsblk_t * partial_cluster ,
ext4_lblk_t start , ext4_lblk_t end )
2006-10-11 12:21:03 +04:00
{
2011-09-10 02:54:51 +04:00
struct ext4_sb_info * sbi = EXT4_SB ( inode - > i_sb ) ;
2006-10-11 12:21:03 +04:00
int err = 0 , correct_index = 0 ;
int depth = ext_depth ( inode ) , credits ;
struct ext4_extent_header * eh ;
2008-01-29 07:58:27 +03:00
ext4_lblk_t a , b , block ;
unsigned num ;
ext4_lblk_t ex_ee_block ;
2006-10-11 12:21:03 +04:00
unsigned short ex_ee_len ;
2007-07-18 05:42:41 +04:00
unsigned uninitialized = 0 ;
2006-10-11 12:21:03 +04:00
struct ext4_extent * ex ;
2011-05-25 15:41:43 +04:00
struct ext4_map_blocks map ;
2006-10-11 12:21:03 +04:00
2007-07-18 17:19:09 +04:00
/* the header must be checked already in ext4_ext_remove_space() */
2008-01-29 07:58:27 +03:00
ext_debug ( " truncate since %u in leaf \n " , start ) ;
2006-10-11 12:21:03 +04:00
if ( ! path [ depth ] . p_hdr )
path [ depth ] . p_hdr = ext_block_hdr ( path [ depth ] . p_bh ) ;
eh = path [ depth ] . p_hdr ;
2010-03-02 19:46:09 +03:00
if ( unlikely ( path [ depth ] . p_hdr = = NULL ) ) {
EXT4_ERROR_INODE ( inode , " path[%d].p_hdr == NULL " , depth ) ;
return - EIO ;
}
2006-10-11 12:21:03 +04:00
/* find where to start removing */
ex = EXT_LAST_EXTENT ( eh ) ;
ex_ee_block = le32_to_cpu ( ex - > ee_block ) ;
2007-07-18 05:42:41 +04:00
ex_ee_len = ext4_ext_get_actual_len ( ex ) ;
2006-10-11 12:21:03 +04:00
while ( ex > = EXT_FIRST_EXTENT ( eh ) & &
ex_ee_block + ex_ee_len > start ) {
2009-06-10 22:22:55 +04:00
if ( ext4_ext_is_uninitialized ( ex ) )
uninitialized = 1 ;
else
uninitialized = 0 ;
2009-09-18 21:34:55 +04:00
ext_debug ( " remove ext %u:[%d]%d \n " , ex_ee_block ,
uninitialized , ex_ee_len ) ;
2006-10-11 12:21:03 +04:00
path [ depth ] . p_ext = ex ;
a = ex_ee_block > start ? ex_ee_block : start ;
2011-05-25 15:41:43 +04:00
b = ex_ee_block + ex_ee_len - 1 < end ?
ex_ee_block + ex_ee_len - 1 : end ;
2006-10-11 12:21:03 +04:00
ext_debug ( " border %u:%u \n " , a , b ) ;
2011-05-25 15:41:43 +04:00
/* If this extent is beyond the end of the hole, skip it */
if ( end < = ex_ee_block ) {
ex - - ;
ex_ee_block = le32_to_cpu ( ex - > ee_block ) ;
ex_ee_len = ext4_ext_get_actual_len ( ex ) ;
continue ;
} else if ( a ! = ex_ee_block & &
b ! = ex_ee_block + ex_ee_len - 1 ) {
/*
* If this is a truncate , then this condition should
* never happen because at least one of the end points
* needs to be on the edge of the extent .
*/
2011-06-06 08:05:17 +04:00
if ( end = = EXT_MAX_BLOCKS - 1 ) {
2011-05-25 15:41:43 +04:00
ext_debug ( " bad truncate %u:%u \n " ,
start , end ) ;
block = 0 ;
num = 0 ;
err = - EIO ;
goto out ;
}
/*
* else this is a hole punch , so the extent needs to
* be split since neither edge of the hole is on the
* extent edge
*/
else {
map . m_pblk = ext4_ext_pblock ( ex ) ;
map . m_lblk = ex_ee_block ;
map . m_len = b - ex_ee_block ;
err = ext4_split_extent ( handle ,
inode , path , & map , 0 ,
EXT4_GET_BLOCKS_PUNCH_OUT_EXT |
EXT4_GET_BLOCKS_PRE_IO ) ;
if ( err < 0 )
goto out ;
ex_ee_len = ext4_ext_get_actual_len ( ex ) ;
b = ex_ee_block + ex_ee_len - 1 < end ?
ex_ee_block + ex_ee_len - 1 : end ;
/* Then remove tail of this extent */
block = ex_ee_block ;
num = a - block ;
}
2006-10-11 12:21:03 +04:00
} else if ( a ! = ex_ee_block ) {
/* remove tail of the extent */
block = ex_ee_block ;
num = a - block ;
} else if ( b ! = ex_ee_block + ex_ee_len - 1 ) {
/* remove head of the extent */
2011-05-25 15:41:43 +04:00
block = b ;
num = ex_ee_block + ex_ee_len - b ;
/*
* If this is a truncate , this condition
* should never happen
*/
2011-06-06 08:05:17 +04:00
if ( end = = EXT_MAX_BLOCKS - 1 ) {
2011-05-25 15:41:43 +04:00
ext_debug ( " bad truncate %u:%u \n " ,
start , end ) ;
err = - EIO ;
goto out ;
}
2006-10-11 12:21:03 +04:00
} else {
/* remove whole extent: excellent! */
block = ex_ee_block ;
num = 0 ;
2011-05-25 15:41:43 +04:00
if ( a ! = ex_ee_block ) {
ext_debug ( " bad truncate %u:%u \n " ,
start , end ) ;
err = - EIO ;
goto out ;
}
if ( b ! = ex_ee_block + ex_ee_len - 1 ) {
ext_debug ( " bad truncate %u:%u \n " ,
start , end ) ;
err = - EIO ;
goto out ;
}
2006-10-11 12:21:03 +04:00
}
2008-08-02 05:59:19 +04:00
/*
* 3 for leaf , sb , and inode plus 2 ( bmap and group
* descriptor ) for each block group ; assume two block
* groups plus ex_ee_len / blocks_per_block_group for
* the worst case
*/
credits = 7 + 2 * ( ex_ee_len / EXT4_BLOCKS_PER_GROUP ( inode - > i_sb ) ) ;
2006-10-11 12:21:03 +04:00
if ( ex = = EXT_FIRST_EXTENT ( eh ) ) {
correct_index = 1 ;
credits + = ( ext_depth ( inode ) ) + 1 ;
}
2009-12-09 06:42:15 +03:00
credits + = EXT4_MAXQUOTAS_TRANS_BLOCKS ( inode - > i_sb ) ;
2006-10-11 12:21:03 +04:00
2009-08-18 06:17:20 +04:00
err = ext4_ext_truncate_extend_restart ( handle , inode , credits ) ;
2008-07-12 03:27:31 +04:00
if ( err )
2006-10-11 12:21:03 +04:00
goto out ;
err = ext4_ext_get_access ( handle , inode , path + depth ) ;
if ( err )
goto out ;
2011-09-10 02:54:51 +04:00
err = ext4_remove_blocks ( handle , inode , ex , partial_cluster ,
a , b ) ;
2006-10-11 12:21:03 +04:00
if ( err )
goto out ;
if ( num = = 0 ) {
2006-10-11 12:21:07 +04:00
/* this extent is removed; mark slot entirely unused */
2006-10-11 12:21:05 +04:00
ext4_ext_store_pblock ( ex , 0 ) ;
2011-05-25 15:41:43 +04:00
} else if ( block ! = ex_ee_block ) {
/*
* If this was a head removal , then we need to update
* the physical block since it is now at a different
* location
*/
ext4_ext_store_pblock ( ex , ext4_ext_pblock ( ex ) + ( b - a ) ) ;
2006-10-11 12:21:03 +04:00
}
ex - > ee_block = cpu_to_le32 ( block ) ;
ex - > ee_len = cpu_to_le16 ( num ) ;
2007-07-18 17:02:56 +04:00
/*
* Do not mark uninitialized if all the blocks in the
* extent have been removed .
*/
if ( uninitialized & & num )
2007-07-18 05:42:41 +04:00
ext4_ext_mark_uninitialized ( ex ) ;
2006-10-11 12:21:03 +04:00
err = ext4_ext_dirty ( handle , inode , path + depth ) ;
if ( err )
goto out ;
2011-05-25 15:41:43 +04:00
/*
* If the extent was completely released ,
* we need to remove it from the leaf
*/
if ( num = = 0 ) {
2011-06-06 08:05:17 +04:00
if ( end ! = EXT_MAX_BLOCKS - 1 ) {
2011-05-25 15:41:43 +04:00
/*
* For hole punching , we need to scoot all the
* extents up when an extent is removed so that
* we dont have blank extents in the middle
*/
memmove ( ex , ex + 1 , ( EXT_LAST_EXTENT ( eh ) - ex ) *
sizeof ( struct ext4_extent ) ) ;
/* Now get rid of the one at the end */
memset ( EXT_LAST_EXTENT ( eh ) , 0 ,
sizeof ( struct ext4_extent ) ) ;
}
le16_add_cpu ( & eh - > eh_entries , - 1 ) ;
2011-09-10 02:54:51 +04:00
} else
* partial_cluster = 0 ;
2011-05-25 15:41:43 +04:00
2006-10-11 12:21:11 +04:00
ext_debug ( " new extent: %u:%u:%llu \n " , block , num ,
2010-10-28 05:30:14 +04:00
ext4_ext_pblock ( ex ) ) ;
2006-10-11 12:21:03 +04:00
ex - - ;
ex_ee_block = le32_to_cpu ( ex - > ee_block ) ;
2007-07-18 05:42:41 +04:00
ex_ee_len = ext4_ext_get_actual_len ( ex ) ;
2006-10-11 12:21:03 +04:00
}
if ( correct_index & & eh - > eh_entries )
err = ext4_ext_correct_indexes ( handle , inode , path ) ;
2011-09-10 02:54:51 +04:00
/*
* If there is still a entry in the leaf node , check to see if
* it references the partial cluster . This is the only place
* where it could ; if it doesn ' t , we can free the cluster .
*/
if ( * partial_cluster & & ex > = EXT_FIRST_EXTENT ( eh ) & &
( EXT4_B2C ( sbi , ext4_ext_pblock ( ex ) + ex_ee_len - 1 ) ! =
* partial_cluster ) ) {
int flags = EXT4_FREE_BLOCKS_FORGET ;
if ( S_ISDIR ( inode - > i_mode ) | | S_ISLNK ( inode - > i_mode ) )
flags | = EXT4_FREE_BLOCKS_METADATA ;
ext4_free_blocks ( handle , inode , NULL ,
EXT4_C2B ( sbi , * partial_cluster ) ,
sbi - > s_cluster_ratio , flags ) ;
* partial_cluster = 0 ;
}
2006-10-11 12:21:03 +04:00
/* if this leaf is free, then we should
* remove it from index block above */
if ( err = = 0 & & eh - > eh_entries = = 0 & & path [ depth ] . p_bh ! = NULL )
err = ext4_ext_rm_idx ( handle , inode , path + depth ) ;
out :
return err ;
}
/*
2006-10-11 12:21:07 +04:00
* ext4_ext_more_to_rm :
* returns 1 if current index has to be freed ( even partial )
2006-10-11 12:21:03 +04:00
*/
2006-12-07 07:41:36 +03:00
static int
2006-10-11 12:21:03 +04:00
ext4_ext_more_to_rm ( struct ext4_ext_path * path )
{
BUG_ON ( path - > p_idx = = NULL ) ;
if ( path - > p_idx < EXT_FIRST_INDEX ( path - > p_hdr ) )
return 0 ;
/*
2006-10-11 12:21:07 +04:00
* if truncate on deeper level happened , it wasn ' t partial ,
2006-10-11 12:21:03 +04:00
* so we have to consider current index for truncation
*/
if ( le16_to_cpu ( path - > p_hdr - > eh_entries ) = = path - > p_block )
return 0 ;
return 1 ;
}
2011-07-18 07:21:03 +04:00
static int ext4_ext_remove_space ( struct inode * inode , ext4_lblk_t start )
2006-10-11 12:21:03 +04:00
{
struct super_block * sb = inode - > i_sb ;
int depth = ext_depth ( inode ) ;
struct ext4_ext_path * path ;
2011-09-10 02:54:51 +04:00
ext4_fsblk_t partial_cluster = 0 ;
2006-10-11 12:21:03 +04:00
handle_t * handle ;
2010-05-17 09:00:00 +04:00
int i , err ;
2006-10-11 12:21:03 +04:00
2008-01-29 07:58:27 +03:00
ext_debug ( " truncate since %u \n " , start ) ;
2006-10-11 12:21:03 +04:00
/* probably first extent we're gonna free will be last in block */
handle = ext4_journal_start ( inode , depth + 1 ) ;
if ( IS_ERR ( handle ) )
return PTR_ERR ( handle ) ;
2010-05-17 09:00:00 +04:00
again :
2006-10-11 12:21:03 +04:00
ext4_ext_invalidate_cache ( inode ) ;
/*
2006-10-11 12:21:07 +04:00
* We start scanning from right side , freeing all the blocks
* after i_size and walking into the tree depth - wise .
2006-10-11 12:21:03 +04:00
*/
2010-05-17 09:00:00 +04:00
depth = ext_depth ( inode ) ;
2008-04-30 06:02:02 +04:00
path = kzalloc ( sizeof ( struct ext4_ext_path ) * ( depth + 1 ) , GFP_NOFS ) ;
2006-10-11 12:21:03 +04:00
if ( path = = NULL ) {
ext4_journal_stop ( handle ) ;
return - ENOMEM ;
}
2010-05-17 09:00:00 +04:00
path [ 0 ] . p_depth = depth ;
2006-10-11 12:21:03 +04:00
path [ 0 ] . p_hdr = ext_inode_hdr ( inode ) ;
2009-03-12 16:51:20 +03:00
if ( ext4_ext_check ( inode , path [ 0 ] . p_hdr , depth ) ) {
2006-10-11 12:21:03 +04:00
err = - EIO ;
goto out ;
}
2010-05-17 09:00:00 +04:00
i = err = 0 ;
2006-10-11 12:21:03 +04:00
while ( i > = 0 & & err = = 0 ) {
if ( i = = depth ) {
/* this is leaf block */
2011-05-25 15:41:43 +04:00
err = ext4_ext_rm_leaf ( handle , inode , path ,
2011-09-10 02:54:51 +04:00
& partial_cluster , start ,
EXT_MAX_BLOCKS - 1 ) ;
2006-10-11 12:21:07 +04:00
/* root level has p_bh == NULL, brelse() eats this */
2006-10-11 12:21:03 +04:00
brelse ( path [ i ] . p_bh ) ;
path [ i ] . p_bh = NULL ;
i - - ;
continue ;
}
/* this is index block */
if ( ! path [ i ] . p_hdr ) {
ext_debug ( " initialize header \n " ) ;
path [ i ] . p_hdr = ext_block_hdr ( path [ i ] . p_bh ) ;
}
if ( ! path [ i ] . p_idx ) {
2006-10-11 12:21:07 +04:00
/* this level hasn't been touched yet */
2006-10-11 12:21:03 +04:00
path [ i ] . p_idx = EXT_LAST_INDEX ( path [ i ] . p_hdr ) ;
path [ i ] . p_block = le16_to_cpu ( path [ i ] . p_hdr - > eh_entries ) + 1 ;
ext_debug ( " init index ptr: hdr 0x%p, num %d \n " ,
path [ i ] . p_hdr ,
le16_to_cpu ( path [ i ] . p_hdr - > eh_entries ) ) ;
} else {
2006-10-11 12:21:07 +04:00
/* we were already here, see at next index */
2006-10-11 12:21:03 +04:00
path [ i ] . p_idx - - ;
}
ext_debug ( " level %d - index, first 0x%p, cur 0x%p \n " ,
i , EXT_FIRST_INDEX ( path [ i ] . p_hdr ) ,
path [ i ] . p_idx ) ;
if ( ext4_ext_more_to_rm ( path + i ) ) {
2007-07-18 17:19:09 +04:00
struct buffer_head * bh ;
2006-10-11 12:21:03 +04:00
/* go to the next level */
2006-10-11 12:21:11 +04:00
ext_debug ( " move to level %d (block %llu) \n " ,
2010-10-28 05:30:14 +04:00
i + 1 , ext4_idx_pblock ( path [ i ] . p_idx ) ) ;
2006-10-11 12:21:03 +04:00
memset ( path + i + 1 , 0 , sizeof ( * path ) ) ;
2010-10-28 05:30:14 +04:00
bh = sb_bread ( sb , ext4_idx_pblock ( path [ i ] . p_idx ) ) ;
2007-07-18 17:19:09 +04:00
if ( ! bh ) {
2006-10-11 12:21:03 +04:00
/* should we reset i_size? */
err = - EIO ;
break ;
}
2007-07-18 17:19:09 +04:00
if ( WARN_ON ( i + 1 > depth ) ) {
err = - EIO ;
break ;
}
2009-03-12 16:51:20 +03:00
if ( ext4_ext_check ( inode , ext_block_hdr ( bh ) ,
2007-07-18 17:19:09 +04:00
depth - i - 1 ) ) {
err = - EIO ;
break ;
}
path [ i + 1 ] . p_bh = bh ;
2006-10-11 12:21:03 +04:00
2006-10-11 12:21:07 +04:00
/* save actual number of indexes since this
* number is changed at the next iteration */
2006-10-11 12:21:03 +04:00
path [ i ] . p_block = le16_to_cpu ( path [ i ] . p_hdr - > eh_entries ) ;
i + + ;
} else {
2006-10-11 12:21:07 +04:00
/* we finished processing this index, go up */
2006-10-11 12:21:03 +04:00
if ( path [ i ] . p_hdr - > eh_entries = = 0 & & i > 0 ) {
2006-10-11 12:21:07 +04:00
/* index is empty, remove it;
2006-10-11 12:21:03 +04:00
* handle must be already prepared by the
* truncatei_leaf ( ) */
err = ext4_ext_rm_idx ( handle , inode , path + i ) ;
}
2006-10-11 12:21:07 +04:00
/* root level has p_bh == NULL, brelse() eats this */
2006-10-11 12:21:03 +04:00
brelse ( path [ i ] . p_bh ) ;
path [ i ] . p_bh = NULL ;
i - - ;
ext_debug ( " return to level %d \n " , i ) ;
}
}
2011-09-10 03:04:51 +04:00
/* If we still have something in the partial cluster and we have removed
* even the first extent , then we should free the blocks in the partial
* cluster as well . */
if ( partial_cluster & & path - > p_hdr - > eh_entries = = 0 ) {
int flags = EXT4_FREE_BLOCKS_FORGET ;
if ( S_ISDIR ( inode - > i_mode ) | | S_ISLNK ( inode - > i_mode ) )
flags | = EXT4_FREE_BLOCKS_METADATA ;
ext4_free_blocks ( handle , inode , NULL ,
EXT4_C2B ( EXT4_SB ( sb ) , partial_cluster ) ,
EXT4_SB ( sb ) - > s_cluster_ratio , flags ) ;
partial_cluster = 0 ;
}
2006-10-11 12:21:03 +04:00
/* TODO: flexible tree reduction should be here */
if ( path - > p_hdr - > eh_entries = = 0 ) {
/*
2006-10-11 12:21:07 +04:00
* truncate to zero freed all the tree ,
* so we need to correct eh_depth
2006-10-11 12:21:03 +04:00
*/
err = ext4_ext_get_access ( handle , inode , path ) ;
if ( err = = 0 ) {
ext_inode_hdr ( inode ) - > eh_depth = 0 ;
ext_inode_hdr ( inode ) - > eh_max =
2009-08-28 18:40:33 +04:00
cpu_to_le16 ( ext4_ext_space_root ( inode , 0 ) ) ;
2006-10-11 12:21:03 +04:00
err = ext4_ext_dirty ( handle , inode , path ) ;
}
}
out :
ext4_ext_drop_refs ( path ) ;
kfree ( path ) ;
2010-05-17 09:00:00 +04:00
if ( err = = - EAGAIN )
goto again ;
2006-10-11 12:21:03 +04:00
ext4_journal_stop ( handle ) ;
return err ;
}
/*
* called at mount time
*/
void ext4_ext_init ( struct super_block * sb )
{
/*
* possible initialization would be here
*/
2009-01-06 22:53:16 +03:00
if ( EXT4_HAS_INCOMPAT_FEATURE ( sb , EXT4_FEATURE_INCOMPAT_EXTENTS ) ) {
2009-09-29 23:51:30 +04:00
# if defined(AGGRESSIVE_TEST) || defined(CHECK_BINSEARCH) || defined(EXTENTS_STATS)
2008-09-09 07:00:52 +04:00
printk ( KERN_INFO " EXT4-fs: file extents enabled " ) ;
2007-02-17 21:20:16 +03:00
# ifdef AGGRESSIVE_TEST
printk ( " , aggressive tests " ) ;
2006-10-11 12:21:03 +04:00
# endif
# ifdef CHECK_BINSEARCH
printk ( " , check binsearch " ) ;
# endif
# ifdef EXTENTS_STATS
printk ( " , stats " ) ;
# endif
printk ( " \n " ) ;
2009-09-29 23:51:30 +04:00
# endif
2006-10-11 12:21:03 +04:00
# ifdef EXTENTS_STATS
spin_lock_init ( & EXT4_SB ( sb ) - > s_ext_stats_lock ) ;
EXT4_SB ( sb ) - > s_ext_min = 1 < < 30 ;
EXT4_SB ( sb ) - > s_ext_max = 0 ;
# endif
}
}
/*
* called at umount time
*/
void ext4_ext_release ( struct super_block * sb )
{
2009-01-06 22:53:16 +03:00
if ( ! EXT4_HAS_INCOMPAT_FEATURE ( sb , EXT4_FEATURE_INCOMPAT_EXTENTS ) )
2006-10-11 12:21:03 +04:00
return ;
# ifdef EXTENTS_STATS
if ( EXT4_SB ( sb ) - > s_ext_blocks & & EXT4_SB ( sb ) - > s_ext_extents ) {
struct ext4_sb_info * sbi = EXT4_SB ( sb ) ;
printk ( KERN_ERR " EXT4-fs: %lu blocks in %lu extents (%lu ave) \n " ,
sbi - > s_ext_blocks , sbi - > s_ext_extents ,
sbi - > s_ext_blocks / sbi - > s_ext_extents ) ;
printk ( KERN_ERR " EXT4-fs: extents: %lu min, %lu max, max depth %lu \n " ,
sbi - > s_ext_min , sbi - > s_ext_max , sbi - > s_depth_max ) ;
}
# endif
}
2008-04-29 16:11:12 +04:00
/* FIXME!! we need to try to merge to left or right after zero-out */
static int ext4_ext_zeroout ( struct inode * inode , struct ext4_extent * ex )
{
2010-10-28 05:30:06 +04:00
ext4_fsblk_t ee_pblock ;
unsigned int ee_len ;
2010-05-12 08:00:00 +04:00
int ret ;
2008-04-29 16:11:12 +04:00
ee_len = ext4_ext_get_actual_len ( ex ) ;
2010-10-28 05:30:14 +04:00
ee_pblock = ext4_ext_pblock ( ex ) ;
2010-05-12 08:00:00 +04:00
2010-10-28 07:44:47 +04:00
ret = sb_issue_zeroout ( inode - > i_sb , ee_pblock , ee_len , GFP_NOFS ) ;
2010-10-28 05:30:06 +04:00
if ( ret > 0 )
ret = 0 ;
2008-04-29 16:11:12 +04:00
2010-10-28 05:30:06 +04:00
return ret ;
2008-04-29 16:11:12 +04:00
}
2011-05-03 20:23:07 +04:00
/*
* used by extent splitting .
*/
# define EXT4_EXT_MAY_ZEROOUT 0x1 / * safe to zeroout if split fails \
due to ENOSPC */
# define EXT4_EXT_MARK_UNINIT1 0x2 /* mark first half uninitialized */
# define EXT4_EXT_MARK_UNINIT2 0x4 /* mark second half uninitialized */
/*
* ext4_split_extent_at ( ) splits an extent at given block .
*
* @ handle : the journal handle
* @ inode : the file inode
* @ path : the path to the extent
* @ split : the logical block where the extent is splitted .
* @ split_flags : indicates if the extent could be zeroout if split fails , and
* the states ( init or uninit ) of new extents .
* @ flags : flags used to insert new extent to extent tree .
*
*
* Splits extent [ a , b ] into two extents [ a , @ split ) and [ @ split , b ] , states
* of which are deterimined by split_flag .
*
* There are two cases :
* a > the extent are splitted into two extent .
* b > split is not needed , and just mark the extent .
*
* return 0 on success .
*/
static int ext4_split_extent_at ( handle_t * handle ,
struct inode * inode ,
struct ext4_ext_path * path ,
ext4_lblk_t split ,
int split_flag ,
int flags )
{
ext4_fsblk_t newblock ;
ext4_lblk_t ee_block ;
struct ext4_extent * ex , newex , orig_ex ;
struct ext4_extent * ex2 = NULL ;
unsigned int ee_len , depth ;
int err = 0 ;
ext_debug ( " ext4_split_extents_at: inode %lu, logical "
" block %llu \n " , inode - > i_ino , ( unsigned long long ) split ) ;
ext4_ext_show_leaf ( inode , path ) ;
depth = ext_depth ( inode ) ;
ex = path [ depth ] . p_ext ;
ee_block = le32_to_cpu ( ex - > ee_block ) ;
ee_len = ext4_ext_get_actual_len ( ex ) ;
newblock = split - ee_block + ext4_ext_pblock ( ex ) ;
BUG_ON ( split < ee_block | | split > = ( ee_block + ee_len ) ) ;
err = ext4_ext_get_access ( handle , inode , path + depth ) ;
if ( err )
goto out ;
if ( split = = ee_block ) {
/*
* case b : block @ split is the block that the extent begins with
* then we just change the state of the extent , and splitting
* is not needed .
*/
if ( split_flag & EXT4_EXT_MARK_UNINIT2 )
ext4_ext_mark_uninitialized ( ex ) ;
else
ext4_ext_mark_initialized ( ex ) ;
if ( ! ( flags & EXT4_GET_BLOCKS_PRE_IO ) )
ext4_ext_try_to_merge ( inode , path , ex ) ;
err = ext4_ext_dirty ( handle , inode , path + depth ) ;
goto out ;
}
/* case a */
memcpy ( & orig_ex , ex , sizeof ( orig_ex ) ) ;
ex - > ee_len = cpu_to_le16 ( split - ee_block ) ;
if ( split_flag & EXT4_EXT_MARK_UNINIT1 )
ext4_ext_mark_uninitialized ( ex ) ;
/*
* path may lead to new leaf , not to original leaf any more
* after ext4_ext_insert_extent ( ) returns ,
*/
err = ext4_ext_dirty ( handle , inode , path + depth ) ;
if ( err )
goto fix_extent_len ;
ex2 = & newex ;
ex2 - > ee_block = cpu_to_le32 ( split ) ;
ex2 - > ee_len = cpu_to_le16 ( ee_len - ( split - ee_block ) ) ;
ext4_ext_store_pblock ( ex2 , newblock ) ;
if ( split_flag & EXT4_EXT_MARK_UNINIT2 )
ext4_ext_mark_uninitialized ( ex2 ) ;
err = ext4_ext_insert_extent ( handle , inode , path , & newex , flags ) ;
if ( err = = - ENOSPC & & ( EXT4_EXT_MAY_ZEROOUT & split_flag ) ) {
err = ext4_ext_zeroout ( inode , & orig_ex ) ;
if ( err )
goto fix_extent_len ;
/* update the extent length and mark as initialized */
ex - > ee_len = cpu_to_le32 ( ee_len ) ;
ext4_ext_try_to_merge ( inode , path , ex ) ;
err = ext4_ext_dirty ( handle , inode , path + depth ) ;
goto out ;
} else if ( err )
goto fix_extent_len ;
out :
ext4_ext_show_leaf ( inode , path ) ;
return err ;
fix_extent_len :
ex - > ee_len = orig_ex . ee_len ;
ext4_ext_dirty ( handle , inode , path + depth ) ;
return err ;
}
/*
* ext4_split_extents ( ) splits an extent and mark extent which is covered
* by @ map as split_flags indicates
*
* It may result in splitting the extent into multiple extents ( upto three )
* There are three possibilities :
* a > There is no split required
* b > Splits in two extents : Split is happening at either end of the extent
* c > Splits in three extents : Somone is splitting in middle of the extent
*
*/
static int ext4_split_extent ( handle_t * handle ,
struct inode * inode ,
struct ext4_ext_path * path ,
struct ext4_map_blocks * map ,
int split_flag ,
int flags )
{
ext4_lblk_t ee_block ;
struct ext4_extent * ex ;
unsigned int ee_len , depth ;
int err = 0 ;
int uninitialized ;
int split_flag1 , flags1 ;
depth = ext_depth ( inode ) ;
ex = path [ depth ] . p_ext ;
ee_block = le32_to_cpu ( ex - > ee_block ) ;
ee_len = ext4_ext_get_actual_len ( ex ) ;
uninitialized = ext4_ext_is_uninitialized ( ex ) ;
if ( map - > m_lblk + map - > m_len < ee_block + ee_len ) {
split_flag1 = split_flag & EXT4_EXT_MAY_ZEROOUT ?
EXT4_EXT_MAY_ZEROOUT : 0 ;
flags1 = flags | EXT4_GET_BLOCKS_PRE_IO ;
if ( uninitialized )
split_flag1 | = EXT4_EXT_MARK_UNINIT1 |
EXT4_EXT_MARK_UNINIT2 ;
err = ext4_split_extent_at ( handle , inode , path ,
map - > m_lblk + map - > m_len , split_flag1 , flags1 ) ;
2011-05-23 04:49:12 +04:00
if ( err )
goto out ;
2011-05-03 20:23:07 +04:00
}
ext4_ext_drop_refs ( path ) ;
path = ext4_ext_find_extent ( inode , map - > m_lblk , path ) ;
if ( IS_ERR ( path ) )
return PTR_ERR ( path ) ;
if ( map - > m_lblk > = ee_block ) {
split_flag1 = split_flag & EXT4_EXT_MAY_ZEROOUT ?
EXT4_EXT_MAY_ZEROOUT : 0 ;
if ( uninitialized )
split_flag1 | = EXT4_EXT_MARK_UNINIT1 ;
if ( split_flag & EXT4_EXT_MARK_UNINIT2 )
split_flag1 | = EXT4_EXT_MARK_UNINIT2 ;
err = ext4_split_extent_at ( handle , inode , path ,
map - > m_lblk , split_flag1 , flags ) ;
if ( err )
goto out ;
}
ext4_ext_show_leaf ( inode , path ) ;
out :
return err ? err : map - > m_len ;
}
2008-04-17 18:38:59 +04:00
# define EXT4_EXT_ZERO_LEN 7
2007-07-18 05:42:38 +04:00
/*
2010-05-17 03:00:00 +04:00
* This function is called by ext4_ext_map_blocks ( ) if someone tries to write
2007-07-18 05:42:38 +04:00
* to an uninitialized extent . It may result in splitting the uninitialized
2011-03-31 05:57:33 +04:00
* extent into multiple extents ( up to three - one initialized and two
2007-07-18 05:42:38 +04:00
* uninitialized ) .
* There are three possibilities :
* a > There is no split required : Entire extent should be initialized
* b > Splits in two extents : Write is happening at either end of the extent
* c > Splits in three extents : Somone is writing in middle of the extent
*/
2008-01-29 07:58:27 +03:00
static int ext4_ext_convert_to_initialized ( handle_t * handle ,
2010-05-17 03:00:00 +04:00
struct inode * inode ,
struct ext4_map_blocks * map ,
struct ext4_ext_path * path )
2007-07-18 05:42:38 +04:00
{
2011-05-03 20:25:07 +04:00
struct ext4_map_blocks split_map ;
struct ext4_extent zero_ex ;
struct ext4_extent * ex ;
2010-05-16 14:00:00 +04:00
ext4_lblk_t ee_block , eof_block ;
2008-01-29 07:58:27 +03:00
unsigned int allocated , ee_len , depth ;
2007-07-18 05:42:38 +04:00
int err = 0 ;
2011-05-03 20:25:07 +04:00
int split_flag = 0 ;
2010-05-16 14:00:00 +04:00
ext_debug ( " ext4_ext_convert_to_initialized: inode %lu, logical "
" block %llu, max_blocks %u \n " , inode - > i_ino ,
2010-05-17 03:00:00 +04:00
( unsigned long long ) map - > m_lblk , map - > m_len ) ;
2010-05-16 14:00:00 +04:00
eof_block = ( inode - > i_size + inode - > i_sb - > s_blocksize - 1 ) > >
inode - > i_sb - > s_blocksize_bits ;
2010-05-17 03:00:00 +04:00
if ( eof_block < map - > m_lblk + map - > m_len )
eof_block = map - > m_lblk + map - > m_len ;
2007-07-18 05:42:38 +04:00
depth = ext_depth ( inode ) ;
ex = path [ depth ] . p_ext ;
ee_block = le32_to_cpu ( ex - > ee_block ) ;
ee_len = ext4_ext_get_actual_len ( ex ) ;
2010-05-17 03:00:00 +04:00
allocated = ee_len - ( map - > m_lblk - ee_block ) ;
2007-07-18 05:42:38 +04:00
2011-05-03 20:25:07 +04:00
WARN_ON ( map - > m_lblk < ee_block ) ;
2010-05-16 14:00:00 +04:00
/*
* It is safe to convert extent to initialized via explicit
* zeroout only if extent is fully insde i_size or new_size .
*/
2011-05-03 20:25:07 +04:00
split_flag | = ee_block + ee_len < = eof_block ? EXT4_EXT_MAY_ZEROOUT : 0 ;
2010-05-16 14:00:00 +04:00
2008-04-17 18:38:59 +04:00
/* If extent has less than 2*EXT4_EXT_ZERO_LEN zerout directly */
2011-05-03 20:25:07 +04:00
if ( ee_len < = 2 * EXT4_EXT_ZERO_LEN & &
( EXT4_EXT_MAY_ZEROOUT & split_flag ) ) {
err = ext4_ext_zeroout ( inode , ex ) ;
2008-04-17 18:38:59 +04:00
if ( err )
2008-08-03 02:51:32 +04:00
goto out ;
err = ext4_ext_get_access ( handle , inode , path + depth ) ;
if ( err )
goto out ;
2011-05-03 20:25:07 +04:00
ext4_ext_mark_initialized ( ex ) ;
ext4_ext_try_to_merge ( inode , path , ex ) ;
err = ext4_ext_dirty ( handle , inode , path + depth ) ;
goto out ;
2007-07-18 05:42:38 +04:00
}
2011-05-03 20:25:07 +04:00
2007-07-18 05:42:38 +04:00
/*
2011-05-03 20:25:07 +04:00
* four cases :
* 1. split the extent into three extents .
* 2. split the extent into two extents , zeroout the first half .
* 3. split the extent into two extents , zeroout the second half .
* 4. split the extent into two extents with out zeroout .
2007-07-18 05:42:38 +04:00
*/
2011-05-03 20:25:07 +04:00
split_map . m_lblk = map - > m_lblk ;
split_map . m_len = map - > m_len ;
if ( allocated > map - > m_len ) {
if ( allocated < = EXT4_EXT_ZERO_LEN & &
( EXT4_EXT_MAY_ZEROOUT & split_flag ) ) {
/* case 3 */
zero_ex . ee_block =
2011-05-16 18:11:09 +04:00
cpu_to_le32 ( map - > m_lblk ) ;
zero_ex . ee_len = cpu_to_le16 ( allocated ) ;
2011-05-03 20:25:07 +04:00
ext4_ext_store_pblock ( & zero_ex ,
ext4_ext_pblock ( ex ) + map - > m_lblk - ee_block ) ;
err = ext4_ext_zeroout ( inode , & zero_ex ) ;
2007-07-18 05:42:38 +04:00
if ( err )
goto out ;
2011-05-03 20:25:07 +04:00
split_map . m_lblk = map - > m_lblk ;
split_map . m_len = allocated ;
} else if ( ( map - > m_lblk - ee_block + map - > m_len <
EXT4_EXT_ZERO_LEN ) & &
( EXT4_EXT_MAY_ZEROOUT & split_flag ) ) {
/* case 2 */
if ( map - > m_lblk ! = ee_block ) {
zero_ex . ee_block = ex - > ee_block ;
zero_ex . ee_len = cpu_to_le16 ( map - > m_lblk -
ee_block ) ;
ext4_ext_store_pblock ( & zero_ex ,
ext4_ext_pblock ( ex ) ) ;
err = ext4_ext_zeroout ( inode , & zero_ex ) ;
if ( err )
goto out ;
}
split_map . m_lblk = ee_block ;
2011-05-16 18:11:09 +04:00
split_map . m_len = map - > m_lblk - ee_block + map - > m_len ;
allocated = map - > m_len ;
2007-07-18 05:42:38 +04:00
}
}
2011-05-03 20:25:07 +04:00
allocated = ext4_split_extent ( handle , inode , path ,
& split_map , split_flag , 0 ) ;
if ( allocated < 0 )
err = allocated ;
2007-07-18 05:42:38 +04:00
out :
return err ? err : allocated ;
}
2009-09-28 23:49:08 +04:00
/*
2010-05-17 03:00:00 +04:00
* This function is called by ext4_ext_map_blocks ( ) from
2009-09-28 23:49:08 +04:00
* ext4_get_blocks_dio_write ( ) when DIO to write
* to an uninitialized extent .
*
2011-02-15 02:05:43 +03:00
* Writing to an uninitialized extent may result in splitting the uninitialized
tree-wide: fix comment/printk typos
"gadget", "through", "command", "maintain", "maintain", "controller", "address",
"between", "initiali[zs]e", "instead", "function", "select", "already",
"equal", "access", "management", "hierarchy", "registration", "interest",
"relative", "memory", "offset", "already",
Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
2010-11-01 22:38:34 +03:00
* extent into multiple / initialized uninitialized extents ( up to three )
2009-09-28 23:49:08 +04:00
* There are three possibilities :
* a > There is no split required : Entire extent should be uninitialized
* b > Splits in two extents : Write is happening at either end of the extent
* c > Splits in three extents : Somone is writing in middle of the extent
*
* One of more index blocks maybe needed if the extent tree grow after
tree-wide: fix comment/printk typos
"gadget", "through", "command", "maintain", "maintain", "controller", "address",
"between", "initiali[zs]e", "instead", "function", "select", "already",
"equal", "access", "management", "hierarchy", "registration", "interest",
"relative", "memory", "offset", "already",
Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
2010-11-01 22:38:34 +03:00
* the uninitialized extent split . To prevent ENOSPC occur at the IO
2009-09-28 23:49:08 +04:00
* complete , we need to split the uninitialized extent before DIO submit
2010-06-11 14:17:00 +04:00
* the IO . The uninitialized extent called at this time will be split
2009-09-28 23:49:08 +04:00
* into three uninitialized extent ( at most ) . After IO complete , the part
* being filled will be convert to initialized by the end_io callback function
* via ext4_convert_unwritten_extents ( ) .
2009-11-06 12:01:23 +03:00
*
* Returns the size of uninitialized extent to be written on success .
2009-09-28 23:49:08 +04:00
*/
static int ext4_split_unwritten_extents ( handle_t * handle ,
struct inode * inode ,
2010-05-17 03:00:00 +04:00
struct ext4_map_blocks * map ,
2009-09-28 23:49:08 +04:00
struct ext4_ext_path * path ,
int flags )
{
2011-05-03 20:25:07 +04:00
ext4_lblk_t eof_block ;
ext4_lblk_t ee_block ;
struct ext4_extent * ex ;
unsigned int ee_len ;
int split_flag = 0 , depth ;
2010-05-16 14:00:00 +04:00
ext_debug ( " ext4_split_unwritten_extents: inode %lu, logical "
" block %llu, max_blocks %u \n " , inode - > i_ino ,
2010-05-17 03:00:00 +04:00
( unsigned long long ) map - > m_lblk , map - > m_len ) ;
2010-05-16 14:00:00 +04:00
eof_block = ( inode - > i_size + inode - > i_sb - > s_blocksize - 1 ) > >
inode - > i_sb - > s_blocksize_bits ;
2010-05-17 03:00:00 +04:00
if ( eof_block < map - > m_lblk + map - > m_len )
eof_block = map - > m_lblk + map - > m_len ;
2010-05-16 14:00:00 +04:00
/*
* It is safe to convert extent to initialized via explicit
* zeroout only if extent is fully insde i_size or new_size .
*/
2011-05-03 20:25:07 +04:00
depth = ext_depth ( inode ) ;
ex = path [ depth ] . p_ext ;
ee_block = le32_to_cpu ( ex - > ee_block ) ;
ee_len = ext4_ext_get_actual_len ( ex ) ;
2009-09-28 23:49:08 +04:00
2011-05-03 20:25:07 +04:00
split_flag | = ee_block + ee_len < = eof_block ? EXT4_EXT_MAY_ZEROOUT : 0 ;
split_flag | = EXT4_EXT_MARK_UNINIT2 ;
2009-09-28 23:49:08 +04:00
2011-05-03 20:25:07 +04:00
flags | = EXT4_GET_BLOCKS_PRE_IO ;
return ext4_split_extent ( handle , inode , path , map , split_flag , flags ) ;
2009-09-28 23:49:08 +04:00
}
2011-05-03 19:45:29 +04:00
2010-03-02 21:28:44 +03:00
static int ext4_convert_unwritten_extents_endio ( handle_t * handle ,
2009-09-28 23:49:08 +04:00
struct inode * inode ,
struct ext4_ext_path * path )
{
struct ext4_extent * ex ;
int depth ;
int err = 0 ;
depth = ext_depth ( inode ) ;
ex = path [ depth ] . p_ext ;
2011-05-03 19:45:29 +04:00
ext_debug ( " ext4_convert_unwritten_extents_endio: inode %lu, logical "
" block %llu, max_blocks %u \n " , inode - > i_ino ,
( unsigned long long ) le32_to_cpu ( ex - > ee_block ) ,
ext4_ext_get_actual_len ( ex ) ) ;
2009-09-28 23:49:08 +04:00
err = ext4_ext_get_access ( handle , inode , path + depth ) ;
if ( err )
goto out ;
/* first mark the extent as initialized */
ext4_ext_mark_initialized ( ex ) ;
2011-05-03 19:45:29 +04:00
/* note: ext4_ext_correct_indexes() isn't needed here because
* borders are not changed
2009-09-28 23:49:08 +04:00
*/
2011-05-03 19:45:29 +04:00
ext4_ext_try_to_merge ( inode , path , ex ) ;
2009-09-28 23:49:08 +04:00
/* Mark modified extent as dirty */
err = ext4_ext_dirty ( handle , inode , path + depth ) ;
out :
ext4_ext_show_leaf ( inode , path ) ;
return err ;
}
2009-12-30 07:39:06 +03:00
static void unmap_underlying_metadata_blocks ( struct block_device * bdev ,
sector_t block , int count )
{
int i ;
for ( i = 0 ; i < count ; i + + )
unmap_underlying_metadata ( bdev , block + i ) ;
}
2010-10-28 05:23:12 +04:00
/*
* Handle EOFBLOCKS_FL flag , clearing it if necessary
*/
static int check_eofblocks_fl ( handle_t * handle , struct inode * inode ,
2011-01-10 21:03:35 +03:00
ext4_lblk_t lblk ,
2010-10-28 05:23:12 +04:00
struct ext4_ext_path * path ,
unsigned int len )
{
int i , depth ;
struct ext4_extent_header * eh ;
2011-03-23 21:08:27 +03:00
struct ext4_extent * last_ex ;
2010-10-28 05:23:12 +04:00
if ( ! ext4_test_inode_flag ( inode , EXT4_INODE_EOFBLOCKS ) )
return 0 ;
depth = ext_depth ( inode ) ;
eh = path [ depth ] . p_hdr ;
if ( unlikely ( ! eh - > eh_entries ) ) {
EXT4_ERROR_INODE ( inode , " eh->eh_entries == 0 and "
" EOFBLOCKS_FL set " ) ;
return - EIO ;
}
last_ex = EXT_LAST_EXTENT ( eh ) ;
/*
* We should clear the EOFBLOCKS_FL flag if we are writing the
* last block in the last extent in the file . We test this by
* first checking to see if the caller to
* ext4_ext_get_blocks ( ) was interested in the last block ( or
* a block beyond the last block ) in the current extent . If
* this turns out to be false , we can bail out from this
* function immediately .
*/
2011-01-10 21:03:35 +03:00
if ( lblk + len < le32_to_cpu ( last_ex - > ee_block ) +
2010-10-28 05:23:12 +04:00
ext4_ext_get_actual_len ( last_ex ) )
return 0 ;
/*
* If the caller does appear to be planning to write at or
* beyond the end of the current extent , we then test to see
* if the current extent is the last extent in the file , by
* checking to make sure it was reached via the rightmost node
* at each level of the tree .
*/
for ( i = depth - 1 ; i > = 0 ; i - - )
if ( path [ i ] . p_idx ! = EXT_LAST_INDEX ( path [ i ] . p_hdr ) )
return 0 ;
ext4_clear_inode_flag ( inode , EXT4_INODE_EOFBLOCKS ) ;
return ext4_mark_inode_dirty ( handle , inode ) ;
}
2011-09-10 03:04:51 +04:00
/**
* ext4_find_delalloc_range : find delayed allocated block in the given range .
*
* Goes through the buffer heads in the range [ lblk_start , lblk_end ] and returns
* whether there are any buffers marked for delayed allocation . It returns ' 1 '
* on the first delalloc ' ed buffer head found . If no buffer head in the given
* range is marked for delalloc , it returns 0.
* lblk_start should always be < = lblk_end .
* search_hint_reverse is to indicate that searching in reverse from lblk_end to
* lblk_start might be more efficient ( i . e . , we will likely hit the delalloc ' ed
* block sooner ) . This is useful when blocks are truncated sequentially from
* lblk_start towards lblk_end .
*/
static int ext4_find_delalloc_range ( struct inode * inode ,
ext4_lblk_t lblk_start ,
ext4_lblk_t lblk_end ,
int search_hint_reverse )
{
struct address_space * mapping = inode - > i_mapping ;
struct buffer_head * head , * bh = NULL ;
struct page * page ;
ext4_lblk_t i , pg_lblk ;
pgoff_t index ;
/* reverse search wont work if fs block size is less than page size */
if ( inode - > i_blkbits < PAGE_CACHE_SHIFT )
search_hint_reverse = 0 ;
if ( search_hint_reverse )
i = lblk_end ;
else
i = lblk_start ;
index = i > > ( PAGE_CACHE_SHIFT - inode - > i_blkbits ) ;
while ( ( i > = lblk_start ) & & ( i < = lblk_end ) ) {
page = find_get_page ( mapping , index ) ;
if ( ! page | | ! PageDirty ( page ) )
goto nextpage ;
if ( PageWriteback ( page ) ) {
/*
* This might be a race with allocation and writeout . In
* this case we just assume that the rest of the range
* will eventually be written and there wont be any
* delalloc blocks left .
* TODO : the above assumption is troublesome , but might
* work better in practice . other option could be note
* somewhere that the cluster is getting written out and
* detect that here .
*/
page_cache_release ( page ) ;
return 0 ;
}
if ( ! page_has_buffers ( page ) )
goto nextpage ;
head = page_buffers ( page ) ;
if ( ! head )
goto nextpage ;
bh = head ;
pg_lblk = index < < ( PAGE_CACHE_SHIFT -
inode - > i_blkbits ) ;
do {
if ( unlikely ( pg_lblk < lblk_start ) ) {
/*
* This is possible when fs block size is less
* than page size and our cluster starts / ends in
* middle of the page . So we need to skip the
* initial few blocks till we reach the ' lblk '
*/
pg_lblk + + ;
continue ;
}
if ( buffer_delay ( bh ) ) {
page_cache_release ( page ) ;
return 1 ;
}
if ( search_hint_reverse )
i - - ;
else
i + + ;
} while ( ( i > = lblk_start ) & & ( i < = lblk_end ) & &
( ( bh = bh - > b_this_page ) ! = head ) ) ;
nextpage :
if ( page )
page_cache_release ( page ) ;
/*
* Move to next page . ' i ' will be the first lblk in the next
* page .
*/
if ( search_hint_reverse )
index - - ;
else
index + + ;
i = index < < ( PAGE_CACHE_SHIFT - inode - > i_blkbits ) ;
}
return 0 ;
}
int ext4_find_delalloc_cluster ( struct inode * inode , ext4_lblk_t lblk ,
int search_hint_reverse )
{
struct ext4_sb_info * sbi = EXT4_SB ( inode - > i_sb ) ;
ext4_lblk_t lblk_start , lblk_end ;
lblk_start = lblk & ( ~ ( sbi - > s_cluster_ratio - 1 ) ) ;
lblk_end = lblk_start + sbi - > s_cluster_ratio - 1 ;
return ext4_find_delalloc_range ( inode , lblk_start , lblk_end ,
search_hint_reverse ) ;
}
/**
* Determines how many complete clusters ( out of those specified by the ' map ' )
* are under delalloc and were reserved quota for .
* This function is called when we are writing out the blocks that were
* originally written with their allocation delayed , but then the space was
* allocated using fallocate ( ) before the delayed allocation could be resolved .
* The cases to look for are :
* ( ' = ' indicated delayed allocated blocks
* ' - ' indicates non - delayed allocated blocks )
* ( a ) partial clusters towards beginning and / or end outside of allocated range
* are not delalloc ' ed .
* Ex :
* | - - - - c - - - = | = = = = c = = = = | = = = = c = = = = | = = = - c - - - - |
* | + + + + + + allocated + + + + + + |
* = = > 4 complete clusters in above example
*
* ( b ) partial cluster ( outside of allocated range ) towards either end is
* marked for delayed allocation . In this case , we will exclude that
* cluster .
* Ex :
* | - - - - = = = = c = = = = = = = = | = = = = = = = = c = = = = = = = = |
* | + + + + + + allocated + + + + + + |
* = = > 1 complete clusters in above example
*
* Ex :
* | = = = = = = = = = = = = = = = = c = = = = = = = = = = = = = = = = |
* | + + + + + + allocated + + + + + + |
* = = > 0 complete clusters in above example
*
* The ext4_da_update_reserve_space will be called only if we
* determine here that there were some " entire " clusters that span
* this ' allocated ' range .
* In the non - bigalloc case , this function will just end up returning num_blks
* without ever calling ext4_find_delalloc_range .
*/
static unsigned int
get_reserved_cluster_alloc ( struct inode * inode , ext4_lblk_t lblk_start ,
unsigned int num_blks )
{
struct ext4_sb_info * sbi = EXT4_SB ( inode - > i_sb ) ;
ext4_lblk_t alloc_cluster_start , alloc_cluster_end ;
ext4_lblk_t lblk_from , lblk_to , c_offset ;
unsigned int allocated_clusters = 0 ;
alloc_cluster_start = EXT4_B2C ( sbi , lblk_start ) ;
alloc_cluster_end = EXT4_B2C ( sbi , lblk_start + num_blks - 1 ) ;
/* max possible clusters for this allocation */
allocated_clusters = alloc_cluster_end - alloc_cluster_start + 1 ;
/* Check towards left side */
c_offset = lblk_start & ( sbi - > s_cluster_ratio - 1 ) ;
if ( c_offset ) {
lblk_from = lblk_start & ( ~ ( sbi - > s_cluster_ratio - 1 ) ) ;
lblk_to = lblk_from + c_offset - 1 ;
if ( ext4_find_delalloc_range ( inode , lblk_from , lblk_to , 0 ) )
allocated_clusters - - ;
}
/* Now check towards right. */
c_offset = ( lblk_start + num_blks ) & ( sbi - > s_cluster_ratio - 1 ) ;
if ( allocated_clusters & & c_offset ) {
lblk_from = lblk_start + num_blks ;
lblk_to = lblk_from + ( sbi - > s_cluster_ratio - c_offset ) - 1 ;
if ( ext4_find_delalloc_range ( inode , lblk_from , lblk_to , 0 ) )
allocated_clusters - - ;
}
return allocated_clusters ;
}
2009-09-28 23:49:08 +04:00
static int
ext4_ext_handle_uninitialized_extents ( handle_t * handle , struct inode * inode ,
2010-05-17 03:00:00 +04:00
struct ext4_map_blocks * map ,
2009-09-28 23:49:08 +04:00
struct ext4_ext_path * path , int flags ,
2010-05-17 03:00:00 +04:00
unsigned int allocated , ext4_fsblk_t newblock )
2009-09-28 23:49:08 +04:00
{
int ret = 0 ;
int err = 0 ;
2009-09-28 23:48:29 +04:00
ext4_io_end_t * io = EXT4_I ( inode ) - > cur_aio_dio ;
2009-09-28 23:49:08 +04:00
ext_debug ( " ext4_ext_handle_uninitialized_extents: inode %lu, logical "
" block %llu, max_blocks %u, flags %d, allocated %u " ,
2010-05-17 03:00:00 +04:00
inode - > i_ino , ( unsigned long long ) map - > m_lblk , map - > m_len ,
2009-09-28 23:49:08 +04:00
flags , allocated ) ;
ext4_ext_show_leaf ( inode , path ) ;
2010-03-02 21:28:44 +03:00
/* get_block() before submit the IO, split the extent */
2010-03-05 00:14:02 +03:00
if ( ( flags & EXT4_GET_BLOCKS_PRE_IO ) ) {
2010-05-17 03:00:00 +04:00
ret = ext4_split_unwritten_extents ( handle , inode , map ,
path , flags ) ;
2009-11-10 18:48:04 +03:00
/*
* Flag the inode ( non aio case ) or end_io struct ( aio case )
2011-03-31 05:57:33 +04:00
* that this IO needs to conversion to written when IO is
2009-11-10 18:48:04 +03:00
* completed
*/
ext4: serialize unaligned asynchronous DIO
ext4 has a data corruption case when doing non-block-aligned
asynchronous direct IO into a sparse file, as demonstrated
by xfstest 240.
The root cause is that while ext4 preallocates space in the
hole, mappings of that space still look "new" and
dio_zero_block() will zero out the unwritten portions. When
more than one AIO thread is going, they both find this "new"
block and race to zero out their portion; this is uncoordinated
and causes data corruption.
Dave Chinner fixed this for xfs by simply serializing all
unaligned asynchronous direct IO. I've done the same here.
The difference is that we only wait on conversions, not all IO.
This is a very big hammer, and I'm not very pleased with
stuffing this into ext4_file_write(). But since ext4 is
DIO_LOCKING, we need to serialize it at this high level.
I tried to move this into ext4_ext_direct_IO, but by then
we have the i_mutex already, and we will wait on the
work queue to do conversions - which must also take the
i_mutex. So that won't work.
This was originally exposed by qemu-kvm installing to
a raw disk image with a normal sector-63 alignment. I've
tested a backport of this patch with qemu, and it does
avoid the corruption. It is also quite a lot slower
(14 min for package installs, vs. 8 min for well-aligned)
but I'll take slow correctness over fast corruption any day.
Mingming suggested that we can track outstanding
conversions, and wait on those so that non-sparse
files won't be affected, and I've implemented that here;
unaligned AIO to nonsparse files won't take a perf hit.
[tytso@mit.edu: Keep the mutex as a hashed array instead
of bloating the ext4 inode]
[tytso@mit.edu: Fix up namespace issues so that global
variables are protected with an "ext4_" prefix.]
Signed-off-by: Eric Sandeen <sandeen@redhat.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
2011-02-12 16:17:34 +03:00
if ( io & & ! ( io - > flag & EXT4_IO_END_UNWRITTEN ) ) {
2010-10-28 05:30:10 +04:00
io - > flag = EXT4_IO_END_UNWRITTEN ;
ext4: serialize unaligned asynchronous DIO
ext4 has a data corruption case when doing non-block-aligned
asynchronous direct IO into a sparse file, as demonstrated
by xfstest 240.
The root cause is that while ext4 preallocates space in the
hole, mappings of that space still look "new" and
dio_zero_block() will zero out the unwritten portions. When
more than one AIO thread is going, they both find this "new"
block and race to zero out their portion; this is uncoordinated
and causes data corruption.
Dave Chinner fixed this for xfs by simply serializing all
unaligned asynchronous direct IO. I've done the same here.
The difference is that we only wait on conversions, not all IO.
This is a very big hammer, and I'm not very pleased with
stuffing this into ext4_file_write(). But since ext4 is
DIO_LOCKING, we need to serialize it at this high level.
I tried to move this into ext4_ext_direct_IO, but by then
we have the i_mutex already, and we will wait on the
work queue to do conversions - which must also take the
i_mutex. So that won't work.
This was originally exposed by qemu-kvm installing to
a raw disk image with a normal sector-63 alignment. I've
tested a backport of this patch with qemu, and it does
avoid the corruption. It is also quite a lot slower
(14 min for package installs, vs. 8 min for well-aligned)
but I'll take slow correctness over fast corruption any day.
Mingming suggested that we can track outstanding
conversions, and wait on those so that non-sparse
files won't be affected, and I've implemented that here;
unaligned AIO to nonsparse files won't take a perf hit.
[tytso@mit.edu: Keep the mutex as a hashed array instead
of bloating the ext4 inode]
[tytso@mit.edu: Fix up namespace issues so that global
variables are protected with an "ext4_" prefix.]
Signed-off-by: Eric Sandeen <sandeen@redhat.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
2011-02-12 16:17:34 +03:00
atomic_inc ( & EXT4_I ( inode ) - > i_aiodio_unwritten ) ;
} else
2010-01-24 22:34:07 +03:00
ext4_set_inode_state ( inode , EXT4_STATE_DIO_UNWRITTEN ) ;
2010-03-05 00:14:02 +03:00
if ( ext4_should_dioread_nolock ( inode ) )
2010-05-17 03:00:00 +04:00
map - > m_flags | = EXT4_MAP_UNINIT ;
2009-09-28 23:49:08 +04:00
goto out ;
}
2010-03-02 21:28:44 +03:00
/* IO end_io complete, convert the filled extent to written */
2010-03-05 00:14:02 +03:00
if ( ( flags & EXT4_GET_BLOCKS_CONVERT ) ) {
2010-03-02 21:28:44 +03:00
ret = ext4_convert_unwritten_extents_endio ( handle , inode ,
2009-09-28 23:49:08 +04:00
path ) ;
2010-10-28 05:23:12 +04:00
if ( ret > = 0 ) {
2009-12-09 07:51:10 +03:00
ext4_update_inode_fsync_trans ( handle , inode , 1 ) ;
2011-01-10 21:03:35 +03:00
err = check_eofblocks_fl ( handle , inode , map - > m_lblk ,
path , map - > m_len ) ;
2010-10-28 05:23:12 +04:00
} else
err = ret ;
2009-09-28 23:49:08 +04:00
goto out2 ;
}
/* buffered IO case */
/*
* repeat fallocate creation request
* we already have an unwritten extent
*/
if ( flags & EXT4_GET_BLOCKS_UNINIT_EXT )
goto map_out ;
/* buffered READ or buffered write_begin() lookup */
if ( ( flags & EXT4_GET_BLOCKS_CREATE ) = = 0 ) {
/*
* We have blocks reserved already . We
* return allocated blocks so that delalloc
* won ' t do block reservation for us . But
* the buffer head will be unmapped so that
* a read from the block returns 0 s .
*/
2010-05-17 03:00:00 +04:00
map - > m_flags | = EXT4_MAP_UNWRITTEN ;
2009-09-28 23:49:08 +04:00
goto out1 ;
}
/* buffered write, writepage time, convert*/
2010-05-17 03:00:00 +04:00
ret = ext4_ext_convert_to_initialized ( handle , inode , map , path ) ;
2010-10-28 05:23:12 +04:00
if ( ret > = 0 ) {
2009-12-09 07:51:10 +03:00
ext4_update_inode_fsync_trans ( handle , inode , 1 ) ;
2011-01-10 21:03:35 +03:00
err = check_eofblocks_fl ( handle , inode , map - > m_lblk , path ,
map - > m_len ) ;
2010-10-28 05:23:12 +04:00
if ( err < 0 )
goto out2 ;
}
2009-09-28 23:49:08 +04:00
out :
if ( ret < = 0 ) {
err = ret ;
goto out2 ;
} else
allocated = ret ;
2010-05-17 03:00:00 +04:00
map - > m_flags | = EXT4_MAP_NEW ;
2009-12-30 07:39:06 +03:00
/*
* if we allocated more blocks than requested
* we need to make sure we unmap the extra block
* allocated . The actual needed block will get
* unmapped later when we find the buffer_head marked
* new .
*/
2010-05-17 03:00:00 +04:00
if ( allocated > map - > m_len ) {
2009-12-30 07:39:06 +03:00
unmap_underlying_metadata_blocks ( inode - > i_sb - > s_bdev ,
2010-05-17 03:00:00 +04:00
newblock + map - > m_len ,
allocated - map - > m_len ) ;
allocated = map - > m_len ;
2009-12-30 07:39:06 +03:00
}
2010-01-25 12:00:31 +03:00
/*
* If we have done fallocate with the offset that is already
* delayed allocated , we would have block reservation
* and quota reservation done in the delayed write path .
* But fallocate would have already updated quota and block
* count for this offset . So cancel these reservation
*/
2011-09-10 03:04:51 +04:00
if ( flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE ) {
unsigned int reserved_clusters ;
reserved_clusters = get_reserved_cluster_alloc ( inode ,
map - > m_lblk , map - > m_len ) ;
if ( reserved_clusters )
ext4_da_update_reserve_space ( inode ,
reserved_clusters ,
0 ) ;
}
2010-01-25 12:00:31 +03:00
2009-09-28 23:49:08 +04:00
map_out :
2010-05-17 03:00:00 +04:00
map - > m_flags | = EXT4_MAP_MAPPED ;
2009-09-28 23:49:08 +04:00
out1 :
2010-05-17 03:00:00 +04:00
if ( allocated > map - > m_len )
allocated = map - > m_len ;
2009-09-28 23:49:08 +04:00
ext4_ext_show_leaf ( inode , path ) ;
2010-05-17 03:00:00 +04:00
map - > m_pblk = newblock ;
map - > m_len = allocated ;
2009-09-28 23:49:08 +04:00
out2 :
if ( path ) {
ext4_ext_drop_refs ( path ) ;
kfree ( path ) ;
}
return err ? err : allocated ;
}
2010-10-28 05:23:12 +04:00
2011-09-10 02:52:51 +04:00
/*
* get_implied_cluster_alloc - check to see if the requested
* allocation ( in the map structure ) overlaps with a cluster already
* allocated in an extent .
* @ sbi The ext4 - specific superblock structure
* @ map The requested lblk - > pblk mapping
* @ ex The extent structure which might contain an implied
* cluster allocation
*
* This function is called by ext4_ext_map_blocks ( ) after we failed to
* find blocks that were already in the inode ' s extent tree . Hence ,
* we know that the beginning of the requested region cannot overlap
* the extent from the inode ' s extent tree . There are three cases we
* want to catch . The first is this case :
*
* | - - - cluster # N - - |
* | - - - extent - - - | | - - - - requested region - - - |
* | = = = = = = = = = = |
*
* The second case that we need to test for is this one :
*
* | - - - - - - - - - cluster # N - - - - - - - - - - - - - - - - |
* | - - - requested region - - | | - - - - - - - extent - - - - |
* | = = = = = = = = = = = = = = = = = = = = = = = |
*
* The third case is when the requested region lies between two extents
* within the same cluster :
* | - - - - - - - - - - - - - cluster # N - - - - - - - - - - - - - |
* | - - - - - ex - - - - - | | - - - - ex_right - - - - |
* | - - - - - - requested region - - - - - - |
* | = = = = = = = = = = = = = = = = |
*
* In each of the above cases , we need to set the map - > m_pblk and
* map - > m_len so it corresponds to the return the extent labelled as
* " |====| " from cluster # N , since it is already in use for data in
* cluster EXT4_B2C ( sbi , map - > m_lblk ) . We will then return 1 to
* signal to ext4_ext_map_blocks ( ) that map - > m_pblk should be treated
* as a new " allocated " block region . Otherwise , we will return 0 and
* ext4_ext_map_blocks ( ) will then allocate one or more new clusters
* by calling ext4_mb_new_blocks ( ) .
*/
static int get_implied_cluster_alloc ( struct ext4_sb_info * sbi ,
struct ext4_map_blocks * map ,
struct ext4_extent * ex ,
struct ext4_ext_path * path )
{
ext4_lblk_t c_offset = map - > m_lblk & ( sbi - > s_cluster_ratio - 1 ) ;
ext4_lblk_t ex_cluster_start , ex_cluster_end ;
ext4_lblk_t rr_cluster_start , rr_cluster_end ;
ext4_lblk_t ee_block = le32_to_cpu ( ex - > ee_block ) ;
ext4_fsblk_t ee_start = ext4_ext_pblock ( ex ) ;
unsigned short ee_len = ext4_ext_get_actual_len ( ex ) ;
/* The extent passed in that we are trying to match */
ex_cluster_start = EXT4_B2C ( sbi , ee_block ) ;
ex_cluster_end = EXT4_B2C ( sbi , ee_block + ee_len - 1 ) ;
/* The requested region passed into ext4_map_blocks() */
rr_cluster_start = EXT4_B2C ( sbi , map - > m_lblk ) ;
rr_cluster_end = EXT4_B2C ( sbi , map - > m_lblk + map - > m_len - 1 ) ;
if ( ( rr_cluster_start = = ex_cluster_end ) | |
( rr_cluster_start = = ex_cluster_start ) ) {
if ( rr_cluster_start = = ex_cluster_end )
ee_start + = ee_len - 1 ;
map - > m_pblk = ( ee_start & ~ ( sbi - > s_cluster_ratio - 1 ) ) +
c_offset ;
map - > m_len = min ( map - > m_len ,
( unsigned ) sbi - > s_cluster_ratio - c_offset ) ;
/*
* Check for and handle this case :
*
* | - - - - - - - - - cluster # N - - - - - - - - - - - - - |
* | - - - - - - - extent - - - - |
* | - - - requested region - - - |
* | = = = = = = = = = = = |
*/
if ( map - > m_lblk < ee_block )
map - > m_len = min ( map - > m_len , ee_block - map - > m_lblk ) ;
/*
* Check for the case where there is already another allocated
* block to the right of ' ex ' but before the end of the cluster .
*
* | - - - - - - - - - - - - - cluster # N - - - - - - - - - - - - - |
* | - - - - - ex - - - - - | | - - - - ex_right - - - - |
* | - - - - - - requested region - - - - - - |
* | = = = = = = = = = = = = = = = = |
*/
if ( map - > m_lblk > ee_block ) {
ext4_lblk_t next = ext4_ext_next_allocated_block ( path ) ;
map - > m_len = min ( map - > m_len , next - map - > m_lblk ) ;
}
return 1 ;
}
return 0 ;
}
2008-01-29 07:58:27 +03:00
/*
2008-02-25 23:29:55 +03:00
* Block allocation / map / preallocation routine for extents based files
*
*
2008-01-29 07:58:27 +03:00
* Need to be called with
2008-01-29 07:58:26 +03:00
* down_read ( & EXT4_I ( inode ) - > i_data_sem ) if not allocating file system block
* ( ie , create is zero ) . Otherwise down_write ( & EXT4_I ( inode ) - > i_data_sem )
2008-02-25 23:29:55 +03:00
*
* return > 0 , number of of blocks already mapped / allocated
* if create = = 0 and these are pre - allocated blocks
* buffer head is unmapped
* otherwise blocks are mapped
*
* return = 0 , if plain look up failed ( blocks have not been allocated )
* buffer head is unmapped
*
* return < 0 , error case .
2008-01-29 07:58:27 +03:00
*/
2010-05-17 03:00:00 +04:00
int ext4_ext_map_blocks ( handle_t * handle , struct inode * inode ,
struct ext4_map_blocks * map , int flags )
2006-10-11 12:21:03 +04:00
{
struct ext4_ext_path * path = NULL ;
2011-09-10 02:52:51 +04:00
struct ext4_extent newex , * ex , * ex2 ;
struct ext4_sb_info * sbi = EXT4_SB ( inode - > i_sb ) ;
2011-03-22 04:38:05 +03:00
ext4_fsblk_t newblock = 0 ;
2011-09-10 02:52:51 +04:00
int free_on_err = 0 , err = 0 , depth , ret ;
unsigned int allocated = 0 , offset = 0 ;
2011-09-10 03:04:51 +04:00
unsigned int allocated_clusters = 0 , reserved_clusters = 0 ;
2011-05-25 15:41:46 +04:00
unsigned int punched_out = 0 ;
unsigned int result = 0 ;
2008-01-29 08:19:52 +03:00
struct ext4_allocation_request ar ;
2009-09-28 23:48:29 +04:00
ext4_io_end_t * io = EXT4_I ( inode ) - > cur_aio_dio ;
2011-09-10 02:52:51 +04:00
ext4_lblk_t cluster_offset ;
2011-05-25 15:41:46 +04:00
struct ext4_map_blocks punch_map ;
2006-10-11 12:21:03 +04:00
2009-09-01 16:44:37 +04:00
ext_debug ( " blocks %u/%u requested for inode %lu \n " ,
2010-05-17 03:00:00 +04:00
map - > m_lblk , map - > m_len , inode - > i_ino ) ;
2011-03-22 04:38:05 +03:00
trace_ext4_ext_map_blocks_enter ( inode , map - > m_lblk , map - > m_len , flags ) ;
2006-10-11 12:21:03 +04:00
/* check in cache */
2011-07-18 07:27:43 +04:00
if ( ! ( flags & EXT4_GET_BLOCKS_PUNCH_OUT_EXT ) & &
ext4_ext_in_cache ( inode , map - > m_lblk , & newex ) ) {
2011-01-10 20:13:26 +03:00
if ( ! newex . ee_start_lo & & ! newex . ee_start_hi ) {
2011-09-10 03:04:51 +04:00
if ( ( sbi - > s_cluster_ratio > 1 ) & &
ext4_find_delalloc_cluster ( inode , map - > m_lblk , 0 ) )
map - > m_flags | = EXT4_MAP_FROM_CLUSTER ;
2009-05-14 08:58:52 +04:00
if ( ( flags & EXT4_GET_BLOCKS_CREATE ) = = 0 ) {
2007-07-18 05:42:38 +04:00
/*
* block isn ' t allocated yet and
* user doesn ' t want to allocate it
*/
2006-10-11 12:21:03 +04:00
goto out2 ;
}
/* we should allocate requested block */
2011-01-10 20:13:26 +03:00
} else {
2006-10-11 12:21:03 +04:00
/* block is already allocated */
2011-09-10 03:04:51 +04:00
if ( sbi - > s_cluster_ratio > 1 )
map - > m_flags | = EXT4_MAP_FROM_CLUSTER ;
2010-05-17 03:00:00 +04:00
newblock = map - > m_lblk
2007-05-24 21:04:54 +04:00
- le32_to_cpu ( newex . ee_block )
2010-10-28 05:30:14 +04:00
+ ext4_ext_pblock ( & newex ) ;
2006-10-11 12:21:07 +04:00
/* number of remaining blocks in the extent */
2008-01-29 07:58:27 +03:00
allocated = ext4_ext_get_actual_len ( & newex ) -
2010-05-17 03:00:00 +04:00
( map - > m_lblk - le32_to_cpu ( newex . ee_block ) ) ;
2006-10-11 12:21:03 +04:00
goto out ;
}
}
/* find extent for this block */
2010-05-17 03:00:00 +04:00
path = ext4_ext_find_extent ( inode , map - > m_lblk , NULL ) ;
2006-10-11 12:21:03 +04:00
if ( IS_ERR ( path ) ) {
err = PTR_ERR ( path ) ;
path = NULL ;
goto out2 ;
}
depth = ext_depth ( inode ) ;
/*
2006-10-11 12:21:07 +04:00
* consistent leaf must not be empty ;
* this situation is possible , though , _during_ tree modification ;
2006-10-11 12:21:03 +04:00
* this is why assert can ' t be put in ext4_ext_find_extent ( )
*/
2010-03-02 19:46:09 +03:00
if ( unlikely ( path [ depth ] . p_ext = = NULL & & depth ! = 0 ) ) {
EXT4_ERROR_INODE ( inode , " bad extent address "
2010-05-17 07:00:00 +04:00
" lblock: %lu, depth: %d pblock %lld " ,
( unsigned long ) map - > m_lblk , depth ,
path [ depth ] . p_block ) ;
2009-12-14 17:53:52 +03:00
err = - EIO ;
goto out2 ;
}
2006-10-11 12:21:03 +04:00
2006-12-07 07:41:33 +03:00
ex = path [ depth ] . p_ext ;
if ( ex ) {
2008-01-29 07:58:27 +03:00
ext4_lblk_t ee_block = le32_to_cpu ( ex - > ee_block ) ;
2010-10-28 05:30:14 +04:00
ext4_fsblk_t ee_start = ext4_ext_pblock ( ex ) ;
2007-07-18 05:42:41 +04:00
unsigned short ee_len ;
2006-10-11 12:21:06 +04:00
/*
* Uninitialized extents are treated as holes , except that
2007-07-18 05:42:38 +04:00
* we split out initialized portions during a write .
2006-10-11 12:21:06 +04:00
*/
2007-07-18 05:42:41 +04:00
ee_len = ext4_ext_get_actual_len ( ex ) ;
2006-10-11 12:21:07 +04:00
/* if found extent covers block, simply return it */
2010-05-17 03:00:00 +04:00
if ( in_range ( map - > m_lblk , ee_block , ee_len ) ) {
2011-09-10 02:54:51 +04:00
ext4_fsblk_t partial_cluster = 0 ;
2010-05-17 03:00:00 +04:00
newblock = map - > m_lblk - ee_block + ee_start ;
2006-10-11 12:21:07 +04:00
/* number of remaining blocks in the extent */
2010-05-17 03:00:00 +04:00
allocated = ee_len - ( map - > m_lblk - ee_block ) ;
ext_debug ( " %u fit into %u:%d -> %llu \n " , map - > m_lblk ,
ee_block , ee_len , newblock ) ;
2007-07-18 05:42:38 +04:00
2011-05-25 15:41:46 +04:00
if ( ( flags & EXT4_GET_BLOCKS_PUNCH_OUT_EXT ) = = 0 ) {
/*
* Do not put uninitialized extent
* in the cache
*/
if ( ! ext4_ext_is_uninitialized ( ex ) ) {
ext4_ext_put_in_cache ( inode , ee_block ,
ee_len , ee_start ) ;
goto out ;
}
ret = ext4_ext_handle_uninitialized_extents (
handle , inode , map , path , flags ,
allocated , newblock ) ;
return ret ;
2007-07-18 05:42:38 +04:00
}
2011-05-25 15:41:46 +04:00
/*
* Punch out the map length , but only to the
* end of the extent
*/
punched_out = allocated < map - > m_len ?
allocated : map - > m_len ;
/*
* Sense extents need to be converted to
* uninitialized , they must fit in an
* uninitialized extent
*/
if ( punched_out > EXT_UNINIT_MAX_LEN )
punched_out = EXT_UNINIT_MAX_LEN ;
punch_map . m_lblk = map - > m_lblk ;
punch_map . m_pblk = newblock ;
punch_map . m_len = punched_out ;
punch_map . m_flags = 0 ;
/* Check to see if the extent needs to be split */
if ( punch_map . m_len ! = ee_len | |
punch_map . m_lblk ! = ee_block ) {
ret = ext4_split_extent ( handle , inode ,
path , & punch_map , 0 ,
EXT4_GET_BLOCKS_PUNCH_OUT_EXT |
EXT4_GET_BLOCKS_PRE_IO ) ;
if ( ret < 0 ) {
err = ret ;
goto out2 ;
}
/*
* find extent for the block at
* the start of the hole
*/
ext4_ext_drop_refs ( path ) ;
kfree ( path ) ;
path = ext4_ext_find_extent ( inode ,
map - > m_lblk , NULL ) ;
if ( IS_ERR ( path ) ) {
err = PTR_ERR ( path ) ;
path = NULL ;
goto out2 ;
}
depth = ext_depth ( inode ) ;
ex = path [ depth ] . p_ext ;
ee_len = ext4_ext_get_actual_len ( ex ) ;
ee_block = le32_to_cpu ( ex - > ee_block ) ;
ee_start = ext4_ext_pblock ( ex ) ;
}
ext4_ext_mark_uninitialized ( ex ) ;
2011-07-18 07:17:02 +04:00
ext4_ext_invalidate_cache ( inode ) ;
err = ext4_ext_rm_leaf ( handle , inode , path ,
2011-09-10 02:54:51 +04:00
& partial_cluster , map - > m_lblk ,
map - > m_lblk + punched_out ) ;
2011-07-18 07:17:02 +04:00
if ( ! err & & path - > p_hdr - > eh_entries = = 0 ) {
/*
* Punch hole freed all of this sub tree ,
* so we need to correct eh_depth
*/
err = ext4_ext_get_access ( handle , inode , path ) ;
if ( err = = 0 ) {
ext_inode_hdr ( inode ) - > eh_depth = 0 ;
ext_inode_hdr ( inode ) - > eh_max =
cpu_to_le16 ( ext4_ext_space_root (
inode , 0 ) ) ;
err = ext4_ext_dirty (
handle , inode , path ) ;
}
}
2011-05-25 15:41:46 +04:00
goto out2 ;
2006-10-11 12:21:03 +04:00
}
}
2011-09-10 03:04:51 +04:00
if ( ( sbi - > s_cluster_ratio > 1 ) & &
ext4_find_delalloc_cluster ( inode , map - > m_lblk , 0 ) )
map - > m_flags | = EXT4_MAP_FROM_CLUSTER ;
2006-10-11 12:21:03 +04:00
/*
2006-10-11 12:21:07 +04:00
* requested block isn ' t allocated yet ;
2006-10-11 12:21:03 +04:00
* we couldn ' t try to create block if create flag is zero
*/
2009-05-14 08:58:52 +04:00
if ( ( flags & EXT4_GET_BLOCKS_CREATE ) = = 0 ) {
2007-07-18 05:42:38 +04:00
/*
* put just found gap into cache to speed up
* subsequent requests
*/
2010-05-17 03:00:00 +04:00
ext4_ext_put_gap_in_cache ( inode , path , map - > m_lblk ) ;
2006-10-11 12:21:03 +04:00
goto out2 ;
}
2011-09-10 02:52:51 +04:00
2006-10-11 12:21:03 +04:00
/*
2008-10-10 17:40:52 +04:00
* Okay , we need to do block allocation .
2006-10-11 12:21:24 +04:00
*/
2011-09-10 03:04:51 +04:00
map - > m_flags & = ~ EXT4_MAP_FROM_CLUSTER ;
2011-09-10 02:52:51 +04:00
newex . ee_block = cpu_to_le32 ( map - > m_lblk ) ;
cluster_offset = map - > m_lblk & ( sbi - > s_cluster_ratio - 1 ) ;
/*
* If we are doing bigalloc , check to see if the extent returned
* by ext4_ext_find_extent ( ) implies a cluster we can use .
*/
if ( cluster_offset & & ex & &
get_implied_cluster_alloc ( sbi , map , ex , path ) ) {
ar . len = allocated = map - > m_len ;
newblock = map - > m_pblk ;
2011-09-10 03:04:51 +04:00
map - > m_flags | = EXT4_MAP_FROM_CLUSTER ;
2011-09-10 02:52:51 +04:00
goto got_allocated_blocks ;
}
2006-10-11 12:21:03 +04:00
2008-01-29 08:19:52 +03:00
/* find neighbour allocated blocks */
2010-05-17 03:00:00 +04:00
ar . lleft = map - > m_lblk ;
2008-01-29 08:19:52 +03:00
err = ext4_ext_search_left ( inode , path , & ar . lleft , & ar . pleft ) ;
if ( err )
goto out2 ;
2010-05-17 03:00:00 +04:00
ar . lright = map - > m_lblk ;
2011-09-10 02:52:51 +04:00
ex2 = NULL ;
err = ext4_ext_search_right ( inode , path , & ar . lright , & ar . pright , & ex2 ) ;
2008-01-29 08:19:52 +03:00
if ( err )
goto out2 ;
2007-05-24 21:04:13 +04:00
2011-09-10 02:52:51 +04:00
/* Check if the extent after searching to the right implies a
* cluster we can use . */
if ( ( sbi - > s_cluster_ratio > 1 ) & & ex2 & &
get_implied_cluster_alloc ( sbi , map , ex2 , path ) ) {
ar . len = allocated = map - > m_len ;
newblock = map - > m_pblk ;
2011-09-10 03:04:51 +04:00
map - > m_flags | = EXT4_MAP_FROM_CLUSTER ;
2011-09-10 02:52:51 +04:00
goto got_allocated_blocks ;
}
2007-07-18 17:02:56 +04:00
/*
* See if request is beyond maximum number of blocks we can have in
* a single extent . For an initialized extent this limit is
* EXT_INIT_MAX_LEN and for an uninitialized extent this limit is
* EXT_UNINIT_MAX_LEN .
*/
2010-05-17 03:00:00 +04:00
if ( map - > m_len > EXT_INIT_MAX_LEN & &
2009-05-14 08:58:52 +04:00
! ( flags & EXT4_GET_BLOCKS_UNINIT_EXT ) )
2010-05-17 03:00:00 +04:00
map - > m_len = EXT_INIT_MAX_LEN ;
else if ( map - > m_len > EXT_UNINIT_MAX_LEN & &
2009-05-14 08:58:52 +04:00
( flags & EXT4_GET_BLOCKS_UNINIT_EXT ) )
2010-05-17 03:00:00 +04:00
map - > m_len = EXT_UNINIT_MAX_LEN ;
2007-07-18 17:02:56 +04:00
2010-05-17 03:00:00 +04:00
/* Check if we can really insert (m_lblk)::(m_lblk + m_len) extent */
newex . ee_len = cpu_to_le16 ( map - > m_len ) ;
2011-09-10 02:52:51 +04:00
err = ext4_ext_check_overlap ( sbi , inode , & newex , path ) ;
2007-05-24 21:04:13 +04:00
if ( err )
2008-01-29 07:58:27 +03:00
allocated = ext4_ext_get_actual_len ( & newex ) ;
2007-05-24 21:04:13 +04:00
else
2010-05-17 03:00:00 +04:00
allocated = map - > m_len ;
2008-01-29 08:19:52 +03:00
/* allocate new block */
ar . inode = inode ;
2010-05-17 03:00:00 +04:00
ar . goal = ext4_ext_find_goal ( inode , path , map - > m_lblk ) ;
ar . logical = map - > m_lblk ;
2011-09-10 02:52:51 +04:00
/*
* We calculate the offset from the beginning of the cluster
* for the logical block number , since when we allocate a
* physical cluster , the physical block should start at the
* same offset from the beginning of the cluster . This is
* needed so that future calls to get_implied_cluster_alloc ( )
* work correctly .
*/
offset = map - > m_lblk & ( sbi - > s_cluster_ratio - 1 ) ;
ar . len = EXT4_NUM_B2C ( sbi , offset + allocated ) ;
ar . goal - = offset ;
ar . logical - = offset ;
2008-01-29 08:19:52 +03:00
if ( S_ISREG ( inode - > i_mode ) )
ar . flags = EXT4_MB_HINT_DATA ;
else
/* disable in-core preallocation for non-regular files */
ar . flags = 0 ;
2011-05-25 15:41:54 +04:00
if ( flags & EXT4_GET_BLOCKS_NO_NORMALIZE )
ar . flags | = EXT4_MB_HINT_NOPREALLOC ;
2008-01-29 08:19:52 +03:00
newblock = ext4_mb_new_blocks ( handle , & ar , & err ) ;
2006-10-11 12:21:03 +04:00
if ( ! newblock )
goto out2 ;
2009-09-01 16:44:37 +04:00
ext_debug ( " allocate new block: goal %llu, found %llu/%u \n " ,
2008-11-05 08:14:04 +03:00
ar . goal , newblock , allocated ) ;
2011-09-10 02:52:51 +04:00
free_on_err = 1 ;
2011-09-10 03:04:51 +04:00
allocated_clusters = ar . len ;
2011-09-10 02:52:51 +04:00
ar . len = EXT4_C2B ( sbi , ar . len ) - offset ;
if ( ar . len > allocated )
ar . len = allocated ;
2006-10-11 12:21:03 +04:00
2011-09-10 02:52:51 +04:00
got_allocated_blocks :
2006-10-11 12:21:03 +04:00
/* try to insert new extent into found leaf and return */
2011-09-10 02:52:51 +04:00
ext4_ext_store_pblock ( & newex , newblock + offset ) ;
2008-01-29 08:19:52 +03:00
newex . ee_len = cpu_to_le16 ( ar . len ) ;
2009-09-28 23:48:29 +04:00
/* Mark uninitialized */
if ( flags & EXT4_GET_BLOCKS_UNINIT_EXT ) {
2007-07-18 05:42:41 +04:00
ext4_ext_mark_uninitialized ( & newex ) ;
2009-09-28 23:48:29 +04:00
/*
2010-03-05 00:14:02 +03:00
* io_end structure was created for every IO write to an
2011-03-31 05:57:33 +04:00
* uninitialized extent . To avoid unnecessary conversion ,
2010-03-05 00:14:02 +03:00
* here we flag the IO that really needs the conversion .
2009-11-10 18:48:04 +03:00
* For non asycn direct IO case , flag the inode state
2011-03-31 05:57:33 +04:00
* that we need to perform conversion when IO is done .
2009-09-28 23:48:29 +04:00
*/
2010-03-05 00:14:02 +03:00
if ( ( flags & EXT4_GET_BLOCKS_PRE_IO ) ) {
ext4: serialize unaligned asynchronous DIO
ext4 has a data corruption case when doing non-block-aligned
asynchronous direct IO into a sparse file, as demonstrated
by xfstest 240.
The root cause is that while ext4 preallocates space in the
hole, mappings of that space still look "new" and
dio_zero_block() will zero out the unwritten portions. When
more than one AIO thread is going, they both find this "new"
block and race to zero out their portion; this is uncoordinated
and causes data corruption.
Dave Chinner fixed this for xfs by simply serializing all
unaligned asynchronous direct IO. I've done the same here.
The difference is that we only wait on conversions, not all IO.
This is a very big hammer, and I'm not very pleased with
stuffing this into ext4_file_write(). But since ext4 is
DIO_LOCKING, we need to serialize it at this high level.
I tried to move this into ext4_ext_direct_IO, but by then
we have the i_mutex already, and we will wait on the
work queue to do conversions - which must also take the
i_mutex. So that won't work.
This was originally exposed by qemu-kvm installing to
a raw disk image with a normal sector-63 alignment. I've
tested a backport of this patch with qemu, and it does
avoid the corruption. It is also quite a lot slower
(14 min for package installs, vs. 8 min for well-aligned)
but I'll take slow correctness over fast corruption any day.
Mingming suggested that we can track outstanding
conversions, and wait on those so that non-sparse
files won't be affected, and I've implemented that here;
unaligned AIO to nonsparse files won't take a perf hit.
[tytso@mit.edu: Keep the mutex as a hashed array instead
of bloating the ext4 inode]
[tytso@mit.edu: Fix up namespace issues so that global
variables are protected with an "ext4_" prefix.]
Signed-off-by: Eric Sandeen <sandeen@redhat.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
2011-02-12 16:17:34 +03:00
if ( io & & ! ( io - > flag & EXT4_IO_END_UNWRITTEN ) ) {
2010-10-28 05:30:10 +04:00
io - > flag = EXT4_IO_END_UNWRITTEN ;
ext4: serialize unaligned asynchronous DIO
ext4 has a data corruption case when doing non-block-aligned
asynchronous direct IO into a sparse file, as demonstrated
by xfstest 240.
The root cause is that while ext4 preallocates space in the
hole, mappings of that space still look "new" and
dio_zero_block() will zero out the unwritten portions. When
more than one AIO thread is going, they both find this "new"
block and race to zero out their portion; this is uncoordinated
and causes data corruption.
Dave Chinner fixed this for xfs by simply serializing all
unaligned asynchronous direct IO. I've done the same here.
The difference is that we only wait on conversions, not all IO.
This is a very big hammer, and I'm not very pleased with
stuffing this into ext4_file_write(). But since ext4 is
DIO_LOCKING, we need to serialize it at this high level.
I tried to move this into ext4_ext_direct_IO, but by then
we have the i_mutex already, and we will wait on the
work queue to do conversions - which must also take the
i_mutex. So that won't work.
This was originally exposed by qemu-kvm installing to
a raw disk image with a normal sector-63 alignment. I've
tested a backport of this patch with qemu, and it does
avoid the corruption. It is also quite a lot slower
(14 min for package installs, vs. 8 min for well-aligned)
but I'll take slow correctness over fast corruption any day.
Mingming suggested that we can track outstanding
conversions, and wait on those so that non-sparse
files won't be affected, and I've implemented that here;
unaligned AIO to nonsparse files won't take a perf hit.
[tytso@mit.edu: Keep the mutex as a hashed array instead
of bloating the ext4 inode]
[tytso@mit.edu: Fix up namespace issues so that global
variables are protected with an "ext4_" prefix.]
Signed-off-by: Eric Sandeen <sandeen@redhat.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
2011-02-12 16:17:34 +03:00
atomic_inc ( & EXT4_I ( inode ) - > i_aiodio_unwritten ) ;
} else
2010-01-24 22:34:07 +03:00
ext4_set_inode_state ( inode ,
EXT4_STATE_DIO_UNWRITTEN ) ;
2009-11-10 18:48:04 +03:00
}
2010-03-05 00:14:02 +03:00
if ( ext4_should_dioread_nolock ( inode ) )
2010-05-17 03:00:00 +04:00
map - > m_flags | = EXT4_MAP_UNINIT ;
2009-09-28 23:48:29 +04:00
}
2010-02-24 17:52:53 +03:00
2011-01-10 21:03:35 +03:00
err = check_eofblocks_fl ( handle , inode , map - > m_lblk , path , ar . len ) ;
2011-07-11 04:07:25 +04:00
if ( ! err )
err = ext4_ext_insert_extent ( handle , inode , path ,
& newex , flags ) ;
2011-09-10 02:52:51 +04:00
if ( err & & free_on_err ) {
ext4: fix i_blocks/quota accounting when extent insertion fails
The current implementation of ext4_free_blocks() always calls
dquot_free_block This looks quite sensible in the most cases: blocks
to be freed are associated with inode and were accounted in quota and
i_blocks some time ago.
However, there is a case when blocks to free were not accounted by the
time calling ext4_free_blocks() yet:
1. delalloc is on, write_begin pre-allocated some space in quota
2. write-back happens, ext4 allocates some blocks in ext4_ext_map_blocks()
3. then ext4_ext_map_blocks() gets an error (e.g. ENOSPC) from
ext4_ext_insert_extent() and calls ext4_free_blocks().
In this scenario, ext4_free_blocks() calls dquot_free_block() who, in
turn, decrements i_blocks for blocks which were not accounted yet (due
to delalloc) After clean umount, e2fsck reports something like:
> Inode 21, i_blocks is 5080, should be 5128. Fix<y>?
because i_blocks was erroneously decremented as explained above.
The patch fixes the problem by passing the new flag
EXT4_FREE_BLOCKS_NO_QUOT_UPDATE to ext4_free_blocks(), to request
that the dquot_free_block() call be skipped.
Signed-off-by: Maxim Patlasov <maxim.patlasov@gmail.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Cc: stable@kernel.org
2011-07-11 03:37:48 +04:00
int fb_flags = flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE ?
EXT4_FREE_BLOCKS_NO_QUOT_UPDATE : 0 ;
2007-05-24 21:04:25 +04:00
/* free data blocks we just allocated */
2008-01-29 08:19:52 +03:00
/* not a good idea to call discard here directly,
* but otherwise we ' d need to call it every free ( ) */
2008-10-10 17:40:52 +04:00
ext4_discard_preallocations ( inode ) ;
2011-02-22 05:01:42 +03:00
ext4_free_blocks ( handle , inode , NULL , ext4_ext_pblock ( & newex ) ,
ext4: fix i_blocks/quota accounting when extent insertion fails
The current implementation of ext4_free_blocks() always calls
dquot_free_block This looks quite sensible in the most cases: blocks
to be freed are associated with inode and were accounted in quota and
i_blocks some time ago.
However, there is a case when blocks to free were not accounted by the
time calling ext4_free_blocks() yet:
1. delalloc is on, write_begin pre-allocated some space in quota
2. write-back happens, ext4 allocates some blocks in ext4_ext_map_blocks()
3. then ext4_ext_map_blocks() gets an error (e.g. ENOSPC) from
ext4_ext_insert_extent() and calls ext4_free_blocks().
In this scenario, ext4_free_blocks() calls dquot_free_block() who, in
turn, decrements i_blocks for blocks which were not accounted yet (due
to delalloc) After clean umount, e2fsck reports something like:
> Inode 21, i_blocks is 5080, should be 5128. Fix<y>?
because i_blocks was erroneously decremented as explained above.
The patch fixes the problem by passing the new flag
EXT4_FREE_BLOCKS_NO_QUOT_UPDATE to ext4_free_blocks(), to request
that the dquot_free_block() call be skipped.
Signed-off-by: Maxim Patlasov <maxim.patlasov@gmail.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Cc: stable@kernel.org
2011-07-11 03:37:48 +04:00
ext4_ext_get_actual_len ( & newex ) , fb_flags ) ;
2006-10-11 12:21:03 +04:00
goto out2 ;
2007-05-24 21:04:25 +04:00
}
2006-10-11 12:21:03 +04:00
/* previous routine could use block we allocated */
2010-10-28 05:30:14 +04:00
newblock = ext4_ext_pblock ( & newex ) ;
2008-01-29 07:58:27 +03:00
allocated = ext4_ext_get_actual_len ( & newex ) ;
2010-05-17 03:00:00 +04:00
if ( allocated > map - > m_len )
allocated = map - > m_len ;
map - > m_flags | = EXT4_MAP_NEW ;
2006-10-11 12:21:03 +04:00
2010-01-25 12:00:31 +03:00
/*
* Update reserved blocks / metadata blocks after successful
* block allocation which had been deferred till now .
*/
2011-09-10 03:04:51 +04:00
if ( flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE ) {
/*
* Check how many clusters we had reserved this allocted range .
*/
reserved_clusters = get_reserved_cluster_alloc ( inode ,
map - > m_lblk , allocated ) ;
if ( map - > m_flags & EXT4_MAP_FROM_CLUSTER ) {
if ( reserved_clusters ) {
/*
* We have clusters reserved for this range .
* But since we are not doing actual allocation
* and are simply using blocks from previously
* allocated cluster , we should release the
* reservation and not claim quota .
*/
ext4_da_update_reserve_space ( inode ,
reserved_clusters , 0 ) ;
}
} else {
BUG_ON ( allocated_clusters < reserved_clusters ) ;
/* We will claim quota for all newly allocated blocks.*/
ext4_da_update_reserve_space ( inode , allocated_clusters ,
1 ) ;
if ( reserved_clusters < allocated_clusters ) {
int reservation = allocated_clusters -
reserved_clusters ;
/*
* It seems we claimed few clusters outside of
* the range of this allocation . We should give
* it back to the reservation pool . This can
* happen in the following case :
*
* * Suppose s_cluster_ratio is 4 ( i . e . , each
* cluster has 4 blocks . Thus , the clusters
* are [ 0 - 3 ] , [ 4 - 7 ] , [ 8 - 11 ] . . .
* * First comes delayed allocation write for
* logical blocks 10 & 11. Since there were no
* previous delayed allocated blocks in the
* range [ 8 - 11 ] , we would reserve 1 cluster
* for this write .
* * Next comes write for logical blocks 3 to 8.
* In this case , we will reserve 2 clusters
* ( for [ 0 - 3 ] and [ 4 - 7 ] ; and not for [ 8 - 11 ] as
* that range has a delayed allocated blocks .
* Thus total reserved clusters now becomes 3.
* * Now , during the delayed allocation writeout
* time , we will first write blocks [ 3 - 8 ] and
* allocate 3 clusters for writing these
* blocks . Also , we would claim all these
* three clusters above .
* * Now when we come here to writeout the
* blocks [ 10 - 11 ] , we would expect to claim
* the reservation of 1 cluster we had made
* ( and we would claim it since there are no
* more delayed allocated blocks in the range
* [ 8 - 11 ] . But our reserved cluster count had
* already gone to 0.
*
* Thus , at the step 4 above when we determine
* that there are still some unwritten delayed
* allocated blocks outside of our current
* block range , we should increment the
* reserved clusters count so that when the
* remaining blocks finally gets written , we
* could claim them .
*/
while ( reservation ) {
ext4_da_reserve_space ( inode ,
map - > m_lblk ) ;
reservation - - ;
}
}
}
}
2010-01-25 12:00:31 +03:00
2009-12-09 07:51:10 +03:00
/*
* Cache the extent and update transaction to commit on fdatasync only
* when it is _not_ an uninitialized extent .
*/
if ( ( flags & EXT4_GET_BLOCKS_UNINIT_EXT ) = = 0 ) {
2011-01-10 20:13:26 +03:00
ext4_ext_put_in_cache ( inode , map - > m_lblk , allocated , newblock ) ;
2009-12-09 07:51:10 +03:00
ext4_update_inode_fsync_trans ( handle , inode , 1 ) ;
} else
ext4_update_inode_fsync_trans ( handle , inode , 0 ) ;
2006-10-11 12:21:03 +04:00
out :
2010-05-17 03:00:00 +04:00
if ( allocated > map - > m_len )
allocated = map - > m_len ;
2006-10-11 12:21:03 +04:00
ext4_ext_show_leaf ( inode , path ) ;
2010-05-17 03:00:00 +04:00
map - > m_flags | = EXT4_MAP_MAPPED ;
map - > m_pblk = newblock ;
map - > m_len = allocated ;
2006-10-11 12:21:03 +04:00
out2 :
if ( path ) {
ext4_ext_drop_refs ( path ) ;
kfree ( path ) ;
}
2011-03-22 04:38:05 +03:00
trace_ext4_ext_map_blocks_exit ( inode , map - > m_lblk ,
newblock , map - > m_len , err ? err : allocated ) ;
2011-05-25 15:41:46 +04:00
result = ( flags & EXT4_GET_BLOCKS_PUNCH_OUT_EXT ) ?
punched_out : allocated ;
return err ? err : result ;
2006-10-11 12:21:03 +04:00
}
2008-07-12 03:27:31 +04:00
void ext4_ext_truncate ( struct inode * inode )
2006-10-11 12:21:03 +04:00
{
struct address_space * mapping = inode - > i_mapping ;
struct super_block * sb = inode - > i_sb ;
2008-01-29 07:58:27 +03:00
ext4_lblk_t last_block ;
2006-10-11 12:21:03 +04:00
handle_t * handle ;
2011-09-07 05:49:44 +04:00
loff_t page_len ;
2006-10-11 12:21:03 +04:00
int err = 0 ;
2011-01-10 20:47:05 +03:00
/*
* finish any pending end_io work so we won ' t run the risk of
* converting any truncated blocks to initialized later
*/
ext4_flush_completed_IO ( inode ) ;
2006-10-11 12:21:03 +04:00
/*
* probably first extent we ' re gonna free will be last in block
*/
2008-08-20 06:16:03 +04:00
err = ext4_writepage_trans_blocks ( inode ) ;
2006-10-11 12:21:03 +04:00
handle = ext4_journal_start ( inode , err ) ;
2008-07-12 03:27:31 +04:00
if ( IS_ERR ( handle ) )
2006-10-11 12:21:03 +04:00
return ;
2011-09-07 05:49:44 +04:00
if ( inode - > i_size % PAGE_CACHE_SIZE ! = 0 ) {
page_len = PAGE_CACHE_SIZE -
( inode - > i_size & ( PAGE_CACHE_SIZE - 1 ) ) ;
err = ext4_discard_partial_page_buffers ( handle ,
mapping , inode - > i_size , page_len , 0 ) ;
if ( err )
goto out_stop ;
}
2006-10-11 12:21:03 +04:00
2008-07-12 03:27:31 +04:00
if ( ext4_orphan_add ( handle , inode ) )
goto out_stop ;
2008-01-29 07:58:26 +03:00
down_write ( & EXT4_I ( inode ) - > i_data_sem ) ;
2006-10-11 12:21:03 +04:00
ext4_ext_invalidate_cache ( inode ) ;
2008-10-10 17:40:52 +04:00
ext4_discard_preallocations ( inode ) ;
2008-01-29 08:19:52 +03:00
2006-10-11 12:21:03 +04:00
/*
2006-10-11 12:21:07 +04:00
* TODO : optimization is possible here .
* Probably we need not scan at all ,
* because page truncation is enough .
2006-10-11 12:21:03 +04:00
*/
/* we have to know where to truncate from in crash case */
EXT4_I ( inode ) - > i_disksize = inode - > i_size ;
ext4_mark_inode_dirty ( handle , inode ) ;
last_block = ( inode - > i_size + sb - > s_blocksize - 1 )
> > EXT4_BLOCK_SIZE_BITS ( sb ) ;
2011-07-18 07:21:03 +04:00
err = ext4_ext_remove_space ( inode , last_block ) ;
2006-10-11 12:21:03 +04:00
/* In a multi-transaction truncate, we only make the final
2007-07-18 05:42:38 +04:00
* transaction synchronous .
*/
2006-10-11 12:21:03 +04:00
if ( IS_SYNC ( inode ) )
2009-01-07 08:06:22 +03:00
ext4_handle_sync ( handle ) ;
2006-10-11 12:21:03 +04:00
2008-07-12 03:27:31 +04:00
up_write ( & EXT4_I ( inode ) - > i_data_sem ) ;
2011-05-23 05:33:00 +04:00
out_stop :
2006-10-11 12:21:03 +04:00
/*
2006-10-11 12:21:07 +04:00
* If this was a simple ftruncate ( ) and the file will remain alive ,
2006-10-11 12:21:03 +04:00
* then we need to clear up the orphan record which we created above .
* However , if this was a real unlink then we were called by
* ext4_delete_inode ( ) , and we allow that function to clean up the
* orphan info for us .
*/
if ( inode - > i_nlink )
ext4_orphan_del ( handle , inode ) ;
2008-04-30 06:00:41 +04:00
inode - > i_mtime = inode - > i_ctime = ext4_current_time ( inode ) ;
ext4_mark_inode_dirty ( handle , inode ) ;
2006-10-11 12:21:03 +04:00
ext4_journal_stop ( handle ) ;
}
2008-04-29 16:11:12 +04:00
static void ext4_falloc_update_inode ( struct inode * inode ,
int mode , loff_t new_size , int update_ctime )
{
struct timespec now ;
if ( update_ctime ) {
now = current_fs_time ( inode - > i_sb ) ;
if ( ! timespec_equal ( & inode - > i_ctime , & now ) )
inode - > i_ctime = now ;
}
/*
* Update only when preallocation was requested beyond
* the file size .
*/
2008-09-13 21:06:18 +04:00
if ( ! ( mode & FALLOC_FL_KEEP_SIZE ) ) {
if ( new_size > i_size_read ( inode ) )
i_size_write ( inode , new_size ) ;
if ( new_size > EXT4_I ( inode ) - > i_disksize )
ext4_update_i_disksize ( inode , new_size ) ;
2010-02-24 17:52:53 +03:00
} else {
/*
* Mark that we allocate beyond EOF so the subsequent truncate
* can proceed even if the new size is the same as i_size .
*/
if ( new_size > i_size_read ( inode ) )
2010-05-17 06:00:00 +04:00
ext4_set_inode_flag ( inode , EXT4_INODE_EOFBLOCKS ) ;
2008-04-29 16:11:12 +04:00
}
}
2007-07-18 05:42:41 +04:00
/*
2011-01-14 15:07:43 +03:00
* preallocate space for a file . This implements ext4 ' s fallocate file
2007-07-18 05:42:41 +04:00
* operation , which gets called from sys_fallocate system call .
* For block - mapped files , posix_fallocate should fall back to the method
* of writing zeroes to the required new blocks ( the same behavior which is
* expected for file systems which do not support fallocate ( ) system call ) .
*/
2011-01-14 15:07:43 +03:00
long ext4_fallocate ( struct file * file , int mode , loff_t offset , loff_t len )
2007-07-18 05:42:41 +04:00
{
2011-01-14 15:07:43 +03:00
struct inode * inode = file - > f_path . dentry - > d_inode ;
2007-07-18 05:42:41 +04:00
handle_t * handle ;
2008-04-29 16:11:12 +04:00
loff_t new_size ;
2008-11-05 08:14:04 +03:00
unsigned int max_blocks ;
2007-07-18 05:42:41 +04:00
int ret = 0 ;
int ret2 = 0 ;
int retries = 0 ;
2010-05-17 04:00:00 +04:00
struct ext4_map_blocks map ;
2007-07-18 05:42:41 +04:00
unsigned int credits , blkbits = inode - > i_blkbits ;
/*
* currently supporting ( pre ) allocate mode for extent - based
* files _only_
*/
2010-05-17 06:00:00 +04:00
if ( ! ( ext4_test_inode_flag ( inode , EXT4_INODE_EXTENTS ) ) )
2007-07-18 05:42:41 +04:00
return - EOPNOTSUPP ;
2011-05-25 15:41:50 +04:00
/* Return error if mode is not supported */
if ( mode & ~ ( FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE ) )
return - EOPNOTSUPP ;
if ( mode & FALLOC_FL_PUNCH_HOLE )
return ext4_punch_hole ( file , offset , len ) ;
2011-03-22 04:38:05 +03:00
trace_ext4_fallocate_enter ( inode , offset , len , mode ) ;
2010-05-17 04:00:00 +04:00
map . m_lblk = offset > > blkbits ;
2008-04-29 16:11:12 +04:00
/*
* We can ' t just convert len to max_blocks because
* If blocksize = 4096 offset = 3072 and len = 2048
*/
2007-07-18 05:42:41 +04:00
max_blocks = ( EXT4_BLOCK_ALIGN ( len + offset , blkbits ) > > blkbits )
2010-05-17 04:00:00 +04:00
- map . m_lblk ;
2007-07-18 05:42:41 +04:00
/*
2008-08-20 06:16:03 +04:00
* credits to insert 1 extent into extent tree
2007-07-18 05:42:41 +04:00
*/
2008-08-20 06:16:03 +04:00
credits = ext4_chunk_trans_blocks ( inode , max_blocks ) ;
2008-02-15 20:47:21 +03:00
mutex_lock ( & inode - > i_mutex ) ;
2010-05-16 22:00:00 +04:00
ret = inode_newsize_ok ( inode , ( len + offset ) ) ;
if ( ret ) {
mutex_unlock ( & inode - > i_mutex ) ;
2011-03-22 04:38:05 +03:00
trace_ext4_fallocate_exit ( inode , offset , max_blocks , ret ) ;
2010-05-16 22:00:00 +04:00
return ret ;
}
2007-07-18 05:42:41 +04:00
retry :
while ( ret > = 0 & & ret < max_blocks ) {
2010-05-17 04:00:00 +04:00
map . m_lblk = map . m_lblk + ret ;
map . m_len = max_blocks = max_blocks - ret ;
2007-07-18 05:42:41 +04:00
handle = ext4_journal_start ( inode , credits ) ;
if ( IS_ERR ( handle ) ) {
ret = PTR_ERR ( handle ) ;
break ;
}
2010-05-17 04:00:00 +04:00
ret = ext4_map_blocks ( handle , inode , & map ,
2011-05-25 15:41:54 +04:00
EXT4_GET_BLOCKS_CREATE_UNINIT_EXT |
EXT4_GET_BLOCKS_NO_NORMALIZE ) ;
2008-01-29 07:58:27 +03:00
if ( ret < = 0 ) {
2008-02-25 23:41:35 +03:00
# ifdef EXT4FS_DEBUG
WARN_ON ( ret < = 0 ) ;
2010-05-17 03:00:00 +04:00
printk ( KERN_ERR " %s: ext4_ext_map_blocks "
2008-02-25 23:41:35 +03:00
" returned error inode#%lu, block=%u, "
2009-01-27 03:26:26 +03:00
" max_blocks=%u " , __func__ ,
2010-10-28 05:30:15 +04:00
inode - > i_ino , map . m_lblk , max_blocks ) ;
2008-02-25 23:41:35 +03:00
# endif
2007-07-18 05:42:41 +04:00
ext4_mark_inode_dirty ( handle , inode ) ;
ret2 = ext4_journal_stop ( handle ) ;
break ;
}
2010-05-17 04:00:00 +04:00
if ( ( map . m_lblk + ret ) > = ( EXT4_BLOCK_ALIGN ( offset + len ,
2008-04-29 16:11:12 +04:00
blkbits ) > > blkbits ) )
new_size = offset + len ;
else
2011-07-28 06:11:20 +04:00
new_size = ( ( loff_t ) map . m_lblk + ret ) < < blkbits ;
2007-07-18 05:42:41 +04:00
2008-04-29 16:11:12 +04:00
ext4_falloc_update_inode ( inode , mode , new_size ,
2010-05-17 04:00:00 +04:00
( map . m_flags & EXT4_MAP_NEW ) ) ;
2007-07-18 05:42:41 +04:00
ext4_mark_inode_dirty ( handle , inode ) ;
ret2 = ext4_journal_stop ( handle ) ;
if ( ret2 )
break ;
}
2008-04-29 16:11:12 +04:00
if ( ret = = - ENOSPC & &
ext4_should_retry_alloc ( inode - > i_sb , & retries ) ) {
ret = 0 ;
2007-07-18 05:42:41 +04:00
goto retry ;
}
2008-02-15 20:47:21 +03:00
mutex_unlock ( & inode - > i_mutex ) ;
2011-03-22 04:38:05 +03:00
trace_ext4_fallocate_exit ( inode , offset , max_blocks ,
ret > 0 ? ret2 : ret ) ;
2007-07-18 05:42:41 +04:00
return ret > 0 ? ret2 : ret ;
}
2008-10-07 08:46:36 +04:00
2009-09-28 23:49:08 +04:00
/*
* This function convert a range of blocks to written extents
* The caller of this function will pass the start offset and the size .
* all unwritten extents within this range will be converted to
* written extents .
*
* This function is called from the direct IO end io call back
* function , to convert the fallocated extents after IO is completed .
2009-11-10 18:48:08 +03:00
* Returns 0 on success .
2009-09-28 23:49:08 +04:00
*/
int ext4_convert_unwritten_extents ( struct inode * inode , loff_t offset ,
2010-02-05 07:58:38 +03:00
ssize_t len )
2009-09-28 23:49:08 +04:00
{
handle_t * handle ;
unsigned int max_blocks ;
int ret = 0 ;
int ret2 = 0 ;
2010-05-17 04:00:00 +04:00
struct ext4_map_blocks map ;
2009-09-28 23:49:08 +04:00
unsigned int credits , blkbits = inode - > i_blkbits ;
2010-05-17 04:00:00 +04:00
map . m_lblk = offset > > blkbits ;
2009-09-28 23:49:08 +04:00
/*
* We can ' t just convert len to max_blocks because
* If blocksize = 4096 offset = 3072 and len = 2048
*/
2010-05-17 04:00:00 +04:00
max_blocks = ( ( EXT4_BLOCK_ALIGN ( len + offset , blkbits ) > > blkbits ) -
map . m_lblk ) ;
2009-09-28 23:49:08 +04:00
/*
* credits to insert 1 extent into extent tree
*/
credits = ext4_chunk_trans_blocks ( inode , max_blocks ) ;
while ( ret > = 0 & & ret < max_blocks ) {
2010-05-17 04:00:00 +04:00
map . m_lblk + = ret ;
map . m_len = ( max_blocks - = ret ) ;
2009-09-28 23:49:08 +04:00
handle = ext4_journal_start ( inode , credits ) ;
if ( IS_ERR ( handle ) ) {
ret = PTR_ERR ( handle ) ;
break ;
}
2010-05-17 04:00:00 +04:00
ret = ext4_map_blocks ( handle , inode , & map ,
2010-03-02 21:28:44 +03:00
EXT4_GET_BLOCKS_IO_CONVERT_EXT ) ;
2009-09-28 23:49:08 +04:00
if ( ret < = 0 ) {
WARN_ON ( ret < = 0 ) ;
2010-05-17 03:00:00 +04:00
printk ( KERN_ERR " %s: ext4_ext_map_blocks "
2009-09-28 23:49:08 +04:00
" returned error inode#%lu, block=%u, "
" max_blocks=%u " , __func__ ,
2010-05-17 04:00:00 +04:00
inode - > i_ino , map . m_lblk , map . m_len ) ;
2009-09-28 23:49:08 +04:00
}
ext4_mark_inode_dirty ( handle , inode ) ;
ret2 = ext4_journal_stop ( handle ) ;
if ( ret < = 0 | | ret2 )
break ;
}
return ret > 0 ? ret2 : ret ;
}
2011-02-28 01:25:47 +03:00
2008-10-07 08:46:36 +04:00
/*
* Callback function called for each extent to gather FIEMAP information .
*/
2011-06-06 08:06:52 +04:00
static int ext4_ext_fiemap_cb ( struct inode * inode , ext4_lblk_t next ,
2008-10-07 08:46:36 +04:00
struct ext4_ext_cache * newex , struct ext4_extent * ex ,
void * data )
{
__u64 logical ;
__u64 physical ;
__u64 length ;
__u32 flags = 0 ;
2011-02-28 01:25:47 +03:00
int ret = 0 ;
struct fiemap_extent_info * fieinfo = data ;
unsigned char blksize_bits ;
2008-10-07 08:46:36 +04:00
2011-02-28 01:25:47 +03:00
blksize_bits = inode - > i_sb - > s_blocksize_bits ;
logical = ( __u64 ) newex - > ec_block < < blksize_bits ;
2008-10-07 08:46:36 +04:00
2011-01-10 20:13:26 +03:00
if ( newex - > ec_start = = 0 ) {
2011-02-28 01:25:47 +03:00
/*
* No extent in extent - tree contains block @ newex - > ec_start ,
* then the block may stay in 1 ) a hole or 2 ) delayed - extent .
*
* Holes or delayed - extents are processed as follows .
* 1. lookup dirty pages with specified range in pagecache .
* If no page is got , then there is no delayed - extent and
* return with EXT_CONTINUE .
* 2. find the 1 st mapped buffer ,
* 3. check if the mapped buffer is both in the request range
* and a delayed buffer . If not , there is no delayed - extent ,
* then return .
* 4. a delayed - extent is found , the extent will be collected .
*/
ext4_lblk_t end = 0 ;
pgoff_t last_offset ;
pgoff_t offset ;
pgoff_t index ;
2011-05-24 19:36:58 +04:00
pgoff_t start_index = 0 ;
2011-02-28 01:25:47 +03:00
struct page * * pages = NULL ;
2008-10-07 08:46:36 +04:00
struct buffer_head * bh = NULL ;
2011-02-28 01:25:47 +03:00
struct buffer_head * head = NULL ;
unsigned int nr_pages = PAGE_SIZE / sizeof ( struct page * ) ;
pages = kmalloc ( PAGE_SIZE , GFP_KERNEL ) ;
if ( pages = = NULL )
return - ENOMEM ;
2008-10-07 08:46:36 +04:00
offset = logical > > PAGE_SHIFT ;
2011-02-28 01:25:47 +03:00
repeat :
last_offset = offset ;
head = NULL ;
ret = find_get_pages_tag ( inode - > i_mapping , & offset ,
PAGECACHE_TAG_DIRTY , nr_pages , pages ) ;
if ( ! ( flags & FIEMAP_EXTENT_DELALLOC ) ) {
/* First time, try to find a mapped buffer. */
if ( ret = = 0 ) {
out :
for ( index = 0 ; index < ret ; index + + )
page_cache_release ( pages [ index ] ) ;
/* just a hole. */
kfree ( pages ) ;
return EXT_CONTINUE ;
}
2011-05-24 19:36:58 +04:00
index = 0 ;
2008-10-07 08:46:36 +04:00
2011-05-24 19:36:58 +04:00
next_page :
2011-02-28 01:25:47 +03:00
/* Try to find the 1st mapped buffer. */
2011-05-24 19:36:58 +04:00
end = ( ( __u64 ) pages [ index ] - > index < < PAGE_SHIFT ) > >
2011-02-28 01:25:47 +03:00
blksize_bits ;
2011-05-24 19:36:58 +04:00
if ( ! page_has_buffers ( pages [ index ] ) )
2011-02-28 01:25:47 +03:00
goto out ;
2011-05-24 19:36:58 +04:00
head = page_buffers ( pages [ index ] ) ;
2011-02-28 01:25:47 +03:00
if ( ! head )
goto out ;
2008-10-07 08:46:36 +04:00
2011-05-24 19:36:58 +04:00
index + + ;
2011-02-28 01:25:47 +03:00
bh = head ;
do {
2011-05-24 19:36:58 +04:00
if ( end > = newex - > ec_block +
newex - > ec_len )
/* The buffer is out of
* the request range .
*/
goto out ;
if ( buffer_mapped ( bh ) & &
end > = newex - > ec_block ) {
start_index = index - 1 ;
2011-02-28 01:25:47 +03:00
/* get the 1st mapped buffer. */
goto found_mapped_buffer ;
}
2011-05-24 19:36:58 +04:00
2011-02-28 01:25:47 +03:00
bh = bh - > b_this_page ;
end + + ;
} while ( bh ! = head ) ;
2008-10-07 08:46:36 +04:00
2011-05-24 19:36:58 +04:00
/* No mapped buffer in the range found in this page,
* We need to look up next page .
*/
if ( index > = ret ) {
/* There is no page left, but we need to limit
* newex - > ec_len .
*/
newex - > ec_len = end - newex - > ec_block ;
goto out ;
}
goto next_page ;
2008-10-07 08:46:36 +04:00
} else {
2011-02-28 01:25:47 +03:00
/*Find contiguous delayed buffers. */
if ( ret > 0 & & pages [ 0 ] - > index = = last_offset )
head = page_buffers ( pages [ 0 ] ) ;
bh = head ;
2011-05-24 19:36:58 +04:00
index = 1 ;
start_index = 0 ;
2008-10-07 08:46:36 +04:00
}
2011-02-28 01:25:47 +03:00
found_mapped_buffer :
if ( bh ! = NULL & & buffer_delay ( bh ) ) {
/* 1st or contiguous delayed buffer found. */
if ( ! ( flags & FIEMAP_EXTENT_DELALLOC ) ) {
/*
* 1 st delayed buffer found , record
* the start of extent .
*/
flags | = FIEMAP_EXTENT_DELALLOC ;
newex - > ec_block = end ;
logical = ( __u64 ) end < < blksize_bits ;
}
/* Find contiguous delayed buffers. */
do {
if ( ! buffer_delay ( bh ) )
goto found_delayed_extent ;
bh = bh - > b_this_page ;
end + + ;
} while ( bh ! = head ) ;
2011-05-24 19:36:58 +04:00
for ( ; index < ret ; index + + ) {
2011-02-28 01:25:47 +03:00
if ( ! page_has_buffers ( pages [ index ] ) ) {
bh = NULL ;
break ;
}
head = page_buffers ( pages [ index ] ) ;
if ( ! head ) {
bh = NULL ;
break ;
}
2011-05-24 19:36:58 +04:00
2011-02-28 01:25:47 +03:00
if ( pages [ index ] - > index ! =
2011-05-24 19:36:58 +04:00
pages [ start_index ] - > index + index
- start_index ) {
2011-02-28 01:25:47 +03:00
/* Blocks are not contiguous. */
bh = NULL ;
break ;
}
bh = head ;
do {
if ( ! buffer_delay ( bh ) )
/* Delayed-extent ends. */
goto found_delayed_extent ;
bh = bh - > b_this_page ;
end + + ;
} while ( bh ! = head ) ;
}
} else if ( ! ( flags & FIEMAP_EXTENT_DELALLOC ) )
/* a hole found. */
goto out ;
found_delayed_extent :
newex - > ec_len = min ( end - newex - > ec_block ,
( ext4_lblk_t ) EXT_INIT_MAX_LEN ) ;
if ( ret = = nr_pages & & bh ! = NULL & &
newex - > ec_len < EXT_INIT_MAX_LEN & &
buffer_delay ( bh ) ) {
/* Have not collected an extent and continue. */
for ( index = 0 ; index < ret ; index + + )
page_cache_release ( pages [ index ] ) ;
goto repeat ;
2008-10-07 08:46:36 +04:00
}
2011-02-28 01:25:47 +03:00
for ( index = 0 ; index < ret ; index + + )
page_cache_release ( pages [ index ] ) ;
kfree ( pages ) ;
2008-10-07 08:46:36 +04:00
}
physical = ( __u64 ) newex - > ec_start < < blksize_bits ;
length = ( __u64 ) newex - > ec_len < < blksize_bits ;
if ( ex & & ext4_ext_is_uninitialized ( ex ) )
flags | = FIEMAP_EXTENT_UNWRITTEN ;
2011-06-06 08:06:52 +04:00
if ( next = = EXT_MAX_BLOCKS )
2008-10-07 08:46:36 +04:00
flags | = FIEMAP_EXTENT_LAST ;
2011-02-28 01:25:47 +03:00
ret = fiemap_fill_next_extent ( fieinfo , logical , physical ,
2008-10-07 08:46:36 +04:00
length , flags ) ;
2011-02-28 01:25:47 +03:00
if ( ret < 0 )
return ret ;
if ( ret = = 1 )
2008-10-07 08:46:36 +04:00
return EXT_BREAK ;
return EXT_CONTINUE ;
}
/* fiemap flags we can handle specified here */
# define EXT4_FIEMAP_FLAGS (FIEMAP_FLAG_SYNC|FIEMAP_FLAG_XATTR)
2008-11-22 23:04:59 +03:00
static int ext4_xattr_fiemap ( struct inode * inode ,
struct fiemap_extent_info * fieinfo )
2008-10-07 08:46:36 +04:00
{
__u64 physical = 0 ;
__u64 length ;
__u32 flags = FIEMAP_EXTENT_LAST ;
int blockbits = inode - > i_sb - > s_blocksize_bits ;
int error = 0 ;
/* in-inode? */
2010-01-24 22:34:07 +03:00
if ( ext4_test_inode_state ( inode , EXT4_STATE_XATTR ) ) {
2008-10-07 08:46:36 +04:00
struct ext4_iloc iloc ;
int offset ; /* offset of xattr in inode */
error = ext4_get_inode_loc ( inode , & iloc ) ;
if ( error )
return error ;
physical = iloc . bh - > b_blocknr < < blockbits ;
offset = EXT4_GOOD_OLD_INODE_SIZE +
EXT4_I ( inode ) - > i_extra_isize ;
physical + = offset ;
length = EXT4_SB ( inode - > i_sb ) - > s_inode_size - offset ;
flags | = FIEMAP_EXTENT_DATA_INLINE ;
2010-04-04 01:44:16 +04:00
brelse ( iloc . bh ) ;
2008-10-07 08:46:36 +04:00
} else { /* external block */
physical = EXT4_I ( inode ) - > i_file_acl < < blockbits ;
length = inode - > i_sb - > s_blocksize ;
}
if ( physical )
error = fiemap_fill_next_extent ( fieinfo , 0 , physical ,
length , flags ) ;
return ( error < 0 ? error : 0 ) ;
}
2011-05-25 15:41:50 +04:00
/*
* ext4_ext_punch_hole
*
* Punches a hole of " length " bytes in a file starting
* at byte " offset "
*
* @ inode : The inode of the file to punch a hole in
* @ offset : The starting byte offset of the hole
* @ length : The length of the hole
*
* Returns the number of blocks removed or negative on err
*/
int ext4_ext_punch_hole ( struct file * file , loff_t offset , loff_t length )
{
struct inode * inode = file - > f_path . dentry - > d_inode ;
struct super_block * sb = inode - > i_sb ;
struct ext4_ext_cache cache_ex ;
ext4_lblk_t first_block , last_block , num_blocks , iblock , max_blocks ;
struct address_space * mapping = inode - > i_mapping ;
struct ext4_map_blocks map ;
handle_t * handle ;
2011-09-03 19:55:59 +04:00
loff_t first_page , last_page , page_len ;
loff_t first_page_offset , last_page_offset ;
2011-05-25 15:41:50 +04:00
int ret , credits , blocks_released , err = 0 ;
2011-09-03 19:56:52 +04:00
/* No need to punch hole beyond i_size */
if ( offset > = inode - > i_size )
return 0 ;
/*
* If the hole extends beyond i_size , set the hole
* to end after the page that contains i_size
*/
if ( offset + length > inode - > i_size ) {
length = inode - > i_size +
PAGE_CACHE_SIZE - ( inode - > i_size & ( PAGE_CACHE_SIZE - 1 ) ) -
offset ;
}
2011-05-25 15:41:50 +04:00
first_block = ( offset + sb - > s_blocksize - 1 ) > >
EXT4_BLOCK_SIZE_BITS ( sb ) ;
last_block = ( offset + length ) > > EXT4_BLOCK_SIZE_BITS ( sb ) ;
first_page = ( offset + PAGE_CACHE_SIZE - 1 ) > > PAGE_CACHE_SHIFT ;
last_page = ( offset + length ) > > PAGE_CACHE_SHIFT ;
first_page_offset = first_page < < PAGE_CACHE_SHIFT ;
last_page_offset = last_page < < PAGE_CACHE_SHIFT ;
/*
* Write out all dirty pages to avoid race conditions
* Then release them .
*/
if ( mapping - > nrpages & & mapping_tagged ( mapping , PAGECACHE_TAG_DIRTY ) ) {
err = filemap_write_and_wait_range ( mapping ,
2011-09-03 19:56:52 +04:00
offset , offset + length - 1 ) ;
2011-05-25 15:41:50 +04:00
2011-09-03 19:56:52 +04:00
if ( err )
return err ;
2011-05-25 15:41:50 +04:00
}
/* Now release the pages */
if ( last_page_offset > first_page_offset ) {
truncate_inode_pages_range ( mapping , first_page_offset ,
last_page_offset - 1 ) ;
}
/* finish any pending end_io work */
ext4_flush_completed_IO ( inode ) ;
credits = ext4_writepage_trans_blocks ( inode ) ;
handle = ext4_journal_start ( inode , credits ) ;
if ( IS_ERR ( handle ) )
return PTR_ERR ( handle ) ;
err = ext4_orphan_add ( handle , inode ) ;
if ( err )
goto out ;
/*
2011-09-03 19:55:59 +04:00
* Now we need to zero out the non - page - aligned data in the
* pages at the start and tail of the hole , and unmap the buffer
* heads for the block aligned regions of the page that were
* completely zeroed .
2011-05-25 15:41:50 +04:00
*/
2011-09-03 19:55:59 +04:00
if ( first_page > last_page ) {
/*
* If the file space being truncated is contained within a page
* just zero out and unmap the middle of that page
*/
err = ext4_discard_partial_page_buffers ( handle ,
mapping , offset , length , 0 ) ;
if ( err )
goto out ;
} else {
/*
* zero out and unmap the partial page that contains
* the start of the hole
*/
page_len = first_page_offset - offset ;
if ( page_len > 0 ) {
err = ext4_discard_partial_page_buffers ( handle , mapping ,
offset , page_len , 0 ) ;
if ( err )
goto out ;
}
/*
* zero out and unmap the partial page that contains
* the end of the hole
*/
page_len = offset + length - last_page_offset ;
if ( page_len > 0 ) {
err = ext4_discard_partial_page_buffers ( handle , mapping ,
last_page_offset , page_len , 0 ) ;
if ( err )
goto out ;
2011-05-25 15:41:50 +04:00
}
}
2011-09-03 19:56:52 +04:00
/*
* If i_size is contained in the last page , we need to
* unmap and zero the partial page after i_size
*/
if ( inode - > i_size > > PAGE_CACHE_SHIFT = = last_page & &
inode - > i_size % PAGE_CACHE_SIZE ! = 0 ) {
page_len = PAGE_CACHE_SIZE -
( inode - > i_size & ( PAGE_CACHE_SIZE - 1 ) ) ;
if ( page_len > 0 ) {
err = ext4_discard_partial_page_buffers ( handle ,
mapping , inode - > i_size , page_len , 0 ) ;
if ( err )
goto out ;
}
}
2011-05-25 15:41:50 +04:00
/* If there are no blocks to remove, return now */
if ( first_block > = last_block )
goto out ;
down_write ( & EXT4_I ( inode ) - > i_data_sem ) ;
ext4_ext_invalidate_cache ( inode ) ;
ext4_discard_preallocations ( inode ) ;
/*
* Loop over all the blocks and identify blocks
* that need to be punched out
*/
iblock = first_block ;
blocks_released = 0 ;
while ( iblock < last_block ) {
max_blocks = last_block - iblock ;
num_blocks = 1 ;
memset ( & map , 0 , sizeof ( map ) ) ;
map . m_lblk = iblock ;
map . m_len = max_blocks ;
ret = ext4_ext_map_blocks ( handle , inode , & map ,
EXT4_GET_BLOCKS_PUNCH_OUT_EXT ) ;
if ( ret > 0 ) {
blocks_released + = ret ;
num_blocks = ret ;
} else if ( ret = = 0 ) {
/*
* If map blocks could not find the block ,
* then it is in a hole . If the hole was
* not already cached , then map blocks should
* put it in the cache . So we can get the hole
* out of the cache
*/
memset ( & cache_ex , 0 , sizeof ( cache_ex ) ) ;
if ( ( ext4_ext_check_cache ( inode , iblock , & cache_ex ) ) & &
! cache_ex . ec_start ) {
/* The hole is cached */
num_blocks = cache_ex . ec_block +
cache_ex . ec_len - iblock ;
} else {
/* The block could not be identified */
err = - EIO ;
break ;
}
} else {
/* Map blocks error */
err = ret ;
break ;
}
if ( num_blocks = = 0 ) {
/* This condition should never happen */
ext_debug ( " Block lookup failed " ) ;
err = - EIO ;
break ;
}
iblock + = num_blocks ;
}
if ( blocks_released > 0 ) {
ext4_ext_invalidate_cache ( inode ) ;
ext4_discard_preallocations ( inode ) ;
}
if ( IS_SYNC ( inode ) )
ext4_handle_sync ( handle ) ;
up_write ( & EXT4_I ( inode ) - > i_data_sem ) ;
out :
ext4_orphan_del ( handle , inode ) ;
inode - > i_mtime = inode - > i_ctime = ext4_current_time ( inode ) ;
ext4_mark_inode_dirty ( handle , inode ) ;
ext4_journal_stop ( handle ) ;
return err ;
}
2008-10-07 08:46:36 +04:00
int ext4_fiemap ( struct inode * inode , struct fiemap_extent_info * fieinfo ,
__u64 start , __u64 len )
{
ext4_lblk_t start_blk ;
int error = 0 ;
/* fallback to generic here if not in extents fmt */
2010-05-17 06:00:00 +04:00
if ( ! ( ext4_test_inode_flag ( inode , EXT4_INODE_EXTENTS ) ) )
2008-10-07 08:46:36 +04:00
return generic_block_fiemap ( inode , fieinfo , start , len ,
ext4_get_block ) ;
if ( fiemap_check_flags ( fieinfo , EXT4_FIEMAP_FLAGS ) )
return - EBADR ;
if ( fieinfo - > fi_flags & FIEMAP_FLAG_XATTR ) {
error = ext4_xattr_fiemap ( inode , fieinfo ) ;
} else {
2010-03-05 01:07:28 +03:00
ext4_lblk_t len_blks ;
__u64 last_blk ;
2008-10-07 08:46:36 +04:00
start_blk = start > > inode - > i_sb - > s_blocksize_bits ;
2010-03-05 01:07:28 +03:00
last_blk = ( start + len - 1 ) > > inode - > i_sb - > s_blocksize_bits ;
2011-06-06 08:05:17 +04:00
if ( last_blk > = EXT_MAX_BLOCKS )
last_blk = EXT_MAX_BLOCKS - 1 ;
2010-03-05 01:07:28 +03:00
len_blks = ( ( ext4_lblk_t ) last_blk ) - start_blk + 1 ;
2008-10-07 08:46:36 +04:00
/*
* Walk the extent tree gathering extent information .
* ext4_ext_fiemap_cb will push extents back to user .
*/
error = ext4_ext_walk_space ( inode , start_blk , len_blks ,
ext4_ext_fiemap_cb , fieinfo ) ;
}
return error ;
}