2006-10-11 12:21:03 +04:00
/*
* Copyright ( c ) 2003 - 2006 , Cluster File Systems , Inc , info @ clusterfs . com
* Written by Alex Tomas < alex @ clusterfs . com >
*
* Architecture independence :
* Copyright ( c ) 2005 , Bull S . A .
* Written by Pierre Peiffer < pierre . peiffer @ bull . net >
*
* This program is free software ; you can redistribute it and / or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation .
*
* This program is distributed in the hope that it will be useful ,
* but WITHOUT ANY WARRANTY ; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the
* GNU General Public License for more details .
*
* You should have received a copy of the GNU General Public Licens
* along with this program ; if not , write to the Free Software
* Foundation , Inc . , 59 Temple Place , Suite 330 , Boston , MA 02111 -
*/
/*
* Extents support for EXT4
*
* TODO :
* - ext4 * _error ( ) should be used in some situations
* - analyze all BUG ( ) / BUG_ON ( ) , use - EIO where appropriate
* - smart tree reduction
*/
# include <linux/module.h>
# include <linux/fs.h>
# include <linux/time.h>
2007-10-17 02:38:25 +04:00
# include <linux/jbd2.h>
2006-10-11 12:21:03 +04:00
# include <linux/highuid.h>
# include <linux/pagemap.h>
# include <linux/quotaops.h>
# include <linux/string.h>
# include <linux/slab.h>
2007-07-18 05:42:41 +04:00
# include <linux/falloc.h>
2006-10-11 12:21:03 +04:00
# include <asm/uaccess.h>
2008-10-07 08:46:36 +04:00
# include <linux/fiemap.h>
2008-04-30 02:13:32 +04:00
# include "ext4_jbd2.h"
2006-10-11 12:21:03 +04:00
2011-03-22 04:38:05 +03:00
# include <trace/events/ext4.h>
2011-05-25 15:41:43 +04:00
static int ext4_split_extent ( handle_t * handle ,
struct inode * inode ,
struct ext4_ext_path * path ,
struct ext4_map_blocks * map ,
int split_flag ,
int flags ) ;
2009-08-18 06:17:20 +04:00
static int ext4_ext_truncate_extend_restart ( handle_t * handle ,
struct inode * inode ,
int needed )
2006-10-11 12:21:03 +04:00
{
int err ;
2009-01-07 08:06:22 +03:00
if ( ! ext4_handle_valid ( handle ) )
return 0 ;
2006-10-11 12:21:03 +04:00
if ( handle - > h_buffer_credits > needed )
2008-07-12 03:27:31 +04:00
return 0 ;
err = ext4_journal_extend ( handle , needed ) ;
2008-08-02 04:57:54 +04:00
if ( err < = 0 )
2008-07-12 03:27:31 +04:00
return err ;
2009-08-18 06:17:20 +04:00
err = ext4_truncate_restart_trans ( handle , inode , needed ) ;
2010-05-17 09:00:00 +04:00
if ( err = = 0 )
err = - EAGAIN ;
2009-08-18 06:17:20 +04:00
return err ;
2006-10-11 12:21:03 +04:00
}
/*
* could return :
* - EROFS
* - ENOMEM
*/
static int ext4_ext_get_access ( handle_t * handle , struct inode * inode ,
struct ext4_ext_path * path )
{
if ( path - > p_bh ) {
/* path points to block */
return ext4_journal_get_write_access ( handle , path - > p_bh ) ;
}
/* path points to leaf/index in inode body */
/* we use in-core data, no need to protect them */
return 0 ;
}
/*
* could return :
* - EROFS
* - ENOMEM
* - EIO
*/
2011-09-04 18:18:14 +04:00
# define ext4_ext_dirty(handle, inode, path) \
__ext4_ext_dirty ( __func__ , __LINE__ , ( handle ) , ( inode ) , ( path ) )
static int __ext4_ext_dirty ( const char * where , unsigned int line ,
handle_t * handle , struct inode * inode ,
struct ext4_ext_path * path )
2006-10-11 12:21:03 +04:00
{
int err ;
if ( path - > p_bh ) {
/* path points to block */
2011-09-04 18:18:14 +04:00
err = __ext4_handle_dirty_metadata ( where , line , handle ,
inode , path - > p_bh ) ;
2006-10-11 12:21:03 +04:00
} else {
/* path points to leaf/index in inode body */
err = ext4_mark_inode_dirty ( handle , inode ) ;
}
return err ;
}
2006-10-11 12:21:05 +04:00
static ext4_fsblk_t ext4_ext_find_goal ( struct inode * inode ,
2006-10-11 12:21:03 +04:00
struct ext4_ext_path * path ,
2008-01-29 07:58:27 +03:00
ext4_lblk_t block )
2006-10-11 12:21:03 +04:00
{
if ( path ) {
2011-10-29 17:23:38 +04:00
int depth = path - > p_depth ;
2006-10-11 12:21:03 +04:00
struct ext4_extent * ex ;
2011-01-10 20:12:28 +03:00
/*
* Try to predict block placement assuming that we are
* filling in a file which will eventually be
* non - sparse - - - i . e . , in the case of libbfd writing
* an ELF object sections out - of - order but in a way
* the eventually results in a contiguous object or
* executable file , or some database extending a table
* space file . However , this is actually somewhat
* non - ideal if we are writing a sparse file such as
* qemu or KVM writing a raw image file that is going
* to stay fairly sparse , since it will end up
* fragmenting the file system ' s free space . Maybe we
* should have some hueristics or some way to allow
* userspace to pass a hint to file system ,
2011-01-21 18:21:31 +03:00
* especially if the latter case turns out to be
2011-01-10 20:12:28 +03:00
* common .
*/
2006-12-07 07:41:33 +03:00
ex = path [ depth ] . p_ext ;
2011-01-10 20:12:28 +03:00
if ( ex ) {
ext4_fsblk_t ext_pblk = ext4_ext_pblock ( ex ) ;
ext4_lblk_t ext_block = le32_to_cpu ( ex - > ee_block ) ;
if ( block > ext_block )
return ext_pblk + ( block - ext_block ) ;
else
return ext_pblk - ( ext_block - block ) ;
}
2006-10-11 12:21:03 +04:00
2006-10-11 12:21:07 +04:00
/* it looks like index is empty;
* try to find starting block from index itself */
2006-10-11 12:21:03 +04:00
if ( path [ depth ] . p_bh )
return path [ depth ] . p_bh - > b_blocknr ;
}
/* OK. use inode's group */
2011-06-28 18:01:31 +04:00
return ext4_inode_to_goal_block ( inode ) ;
2006-10-11 12:21:03 +04:00
}
2008-07-12 03:27:31 +04:00
/*
* Allocation for a meta data block
*/
2006-10-11 12:21:05 +04:00
static ext4_fsblk_t
2008-07-12 03:27:31 +04:00
ext4_ext_new_meta_block ( handle_t * handle , struct inode * inode ,
2006-10-11 12:21:03 +04:00
struct ext4_ext_path * path ,
2011-05-25 15:41:26 +04:00
struct ext4_extent * ex , int * err , unsigned int flags )
2006-10-11 12:21:03 +04:00
{
2006-10-11 12:21:05 +04:00
ext4_fsblk_t goal , newblock ;
2006-10-11 12:21:03 +04:00
goal = ext4_ext_find_goal ( inode , path , le32_to_cpu ( ex - > ee_block ) ) ;
2011-05-25 15:41:26 +04:00
newblock = ext4_new_meta_blocks ( handle , inode , goal , flags ,
NULL , err ) ;
2006-10-11 12:21:03 +04:00
return newblock ;
}
2009-08-28 18:40:33 +04:00
static inline int ext4_ext_space_block ( struct inode * inode , int check )
2006-10-11 12:21:03 +04:00
{
int size ;
size = ( inode - > i_sb - > s_blocksize - sizeof ( struct ext4_extent_header ) )
/ sizeof ( struct ext4_extent ) ;
2007-02-17 21:20:16 +03:00
# ifdef AGGRESSIVE_TEST
2011-10-29 17:29:11 +04:00
if ( ! check & & size > 6 )
size = 6 ;
2006-10-11 12:21:03 +04:00
# endif
return size ;
}
2009-08-28 18:40:33 +04:00
static inline int ext4_ext_space_block_idx ( struct inode * inode , int check )
2006-10-11 12:21:03 +04:00
{
int size ;
size = ( inode - > i_sb - > s_blocksize - sizeof ( struct ext4_extent_header ) )
/ sizeof ( struct ext4_extent_idx ) ;
2007-02-17 21:20:16 +03:00
# ifdef AGGRESSIVE_TEST
2011-10-29 17:29:11 +04:00
if ( ! check & & size > 5 )
size = 5 ;
2006-10-11 12:21:03 +04:00
# endif
return size ;
}
2009-08-28 18:40:33 +04:00
static inline int ext4_ext_space_root ( struct inode * inode , int check )
2006-10-11 12:21:03 +04:00
{
int size ;
size = sizeof ( EXT4_I ( inode ) - > i_data ) ;
size - = sizeof ( struct ext4_extent_header ) ;
size / = sizeof ( struct ext4_extent ) ;
2007-02-17 21:20:16 +03:00
# ifdef AGGRESSIVE_TEST
2011-10-29 17:29:11 +04:00
if ( ! check & & size > 3 )
size = 3 ;
2006-10-11 12:21:03 +04:00
# endif
return size ;
}
2009-08-28 18:40:33 +04:00
static inline int ext4_ext_space_root_idx ( struct inode * inode , int check )
2006-10-11 12:21:03 +04:00
{
int size ;
size = sizeof ( EXT4_I ( inode ) - > i_data ) ;
size - = sizeof ( struct ext4_extent_header ) ;
size / = sizeof ( struct ext4_extent_idx ) ;
2007-02-17 21:20:16 +03:00
# ifdef AGGRESSIVE_TEST
2011-10-29 17:29:11 +04:00
if ( ! check & & size > 4 )
size = 4 ;
2006-10-11 12:21:03 +04:00
# endif
return size ;
}
2008-07-15 01:52:37 +04:00
/*
* Calculate the number of metadata blocks needed
* to allocate @ blocks
* Worse case is one block per extent
*/
2011-01-10 20:13:03 +03:00
int ext4_ext_calc_metadata_amount ( struct inode * inode , ext4_lblk_t lblock )
2008-07-15 01:52:37 +04:00
{
2010-01-01 10:41:30 +03:00
struct ext4_inode_info * ei = EXT4_I ( inode ) ;
2011-10-29 17:23:38 +04:00
int idxs ;
2008-07-15 01:52:37 +04:00
2010-01-01 10:41:30 +03:00
idxs = ( ( inode - > i_sb - > s_blocksize - sizeof ( struct ext4_extent_header ) )
/ sizeof ( struct ext4_extent_idx ) ) ;
2008-07-15 01:52:37 +04:00
/*
2010-01-01 10:41:30 +03:00
* If the new delayed allocation block is contiguous with the
* previous da block , it can share index blocks with the
* previous block , so we only need to allocate a new index
* block every idxs leaf blocks . At ldxs * * 2 blocks , we need
* an additional index block , and at ldxs * * 3 blocks , yet
* another index blocks .
2008-07-15 01:52:37 +04:00
*/
2010-01-01 10:41:30 +03:00
if ( ei - > i_da_metadata_calc_len & &
ei - > i_da_metadata_calc_last_lblock + 1 = = lblock ) {
2011-10-29 17:23:38 +04:00
int num = 0 ;
2010-01-01 10:41:30 +03:00
if ( ( ei - > i_da_metadata_calc_len % idxs ) = = 0 )
num + + ;
if ( ( ei - > i_da_metadata_calc_len % ( idxs * idxs ) ) = = 0 )
num + + ;
if ( ( ei - > i_da_metadata_calc_len % ( idxs * idxs * idxs ) ) = = 0 ) {
num + + ;
ei - > i_da_metadata_calc_len = 0 ;
} else
ei - > i_da_metadata_calc_len + + ;
ei - > i_da_metadata_calc_last_lblock + + ;
return num ;
}
2008-07-15 01:52:37 +04:00
2010-01-01 10:41:30 +03:00
/*
* In the worst case we need a new set of index blocks at
* every level of the inode ' s extent tree .
*/
ei - > i_da_metadata_calc_len = 1 ;
ei - > i_da_metadata_calc_last_lblock = lblock ;
return ext_depth ( inode ) + 1 ;
2008-07-15 01:52:37 +04:00
}
2007-07-18 17:19:09 +04:00
static int
ext4_ext_max_entries ( struct inode * inode , int depth )
{
int max ;
if ( depth = = ext_depth ( inode ) ) {
if ( depth = = 0 )
2009-08-28 18:40:33 +04:00
max = ext4_ext_space_root ( inode , 1 ) ;
2007-07-18 17:19:09 +04:00
else
2009-08-28 18:40:33 +04:00
max = ext4_ext_space_root_idx ( inode , 1 ) ;
2007-07-18 17:19:09 +04:00
} else {
if ( depth = = 0 )
2009-08-28 18:40:33 +04:00
max = ext4_ext_space_block ( inode , 1 ) ;
2007-07-18 17:19:09 +04:00
else
2009-08-28 18:40:33 +04:00
max = ext4_ext_space_block_idx ( inode , 1 ) ;
2007-07-18 17:19:09 +04:00
}
return max ;
}
2009-03-12 16:51:20 +03:00
static int ext4_valid_extent ( struct inode * inode , struct ext4_extent * ext )
{
2010-10-28 05:30:14 +04:00
ext4_fsblk_t block = ext4_ext_pblock ( ext ) ;
2009-03-12 16:51:20 +03:00
int len = ext4_ext_get_actual_len ( ext ) ;
2009-04-23 04:52:25 +04:00
2009-05-17 23:38:01 +04:00
return ext4_data_block_valid ( EXT4_SB ( inode - > i_sb ) , block , len ) ;
2009-03-12 16:51:20 +03:00
}
static int ext4_valid_extent_idx ( struct inode * inode ,
struct ext4_extent_idx * ext_idx )
{
2010-10-28 05:30:14 +04:00
ext4_fsblk_t block = ext4_idx_pblock ( ext_idx ) ;
2009-04-23 04:52:25 +04:00
2009-05-17 23:38:01 +04:00
return ext4_data_block_valid ( EXT4_SB ( inode - > i_sb ) , block , 1 ) ;
2009-03-12 16:51:20 +03:00
}
static int ext4_valid_extent_entries ( struct inode * inode ,
struct ext4_extent_header * eh ,
int depth )
{
unsigned short entries ;
if ( eh - > eh_entries = = 0 )
return 1 ;
entries = le16_to_cpu ( eh - > eh_entries ) ;
if ( depth = = 0 ) {
/* leaf entries */
2011-10-29 17:23:38 +04:00
struct ext4_extent * ext = EXT_FIRST_EXTENT ( eh ) ;
2009-03-12 16:51:20 +03:00
while ( entries ) {
if ( ! ext4_valid_extent ( inode , ext ) )
return 0 ;
ext + + ;
entries - - ;
}
} else {
2011-10-29 17:23:38 +04:00
struct ext4_extent_idx * ext_idx = EXT_FIRST_INDEX ( eh ) ;
2009-03-12 16:51:20 +03:00
while ( entries ) {
if ( ! ext4_valid_extent_idx ( inode , ext_idx ) )
return 0 ;
ext_idx + + ;
entries - - ;
}
}
return 1 ;
}
2010-07-27 19:56:40 +04:00
static int __ext4_ext_check ( const char * function , unsigned int line ,
struct inode * inode , struct ext4_extent_header * eh ,
int depth )
2007-07-18 17:19:09 +04:00
{
const char * error_msg ;
int max = 0 ;
if ( unlikely ( eh - > eh_magic ! = EXT4_EXT_MAGIC ) ) {
error_msg = " invalid magic " ;
goto corrupted ;
}
if ( unlikely ( le16_to_cpu ( eh - > eh_depth ) ! = depth ) ) {
error_msg = " unexpected eh_depth " ;
goto corrupted ;
}
if ( unlikely ( eh - > eh_max = = 0 ) ) {
error_msg = " invalid eh_max " ;
goto corrupted ;
}
max = ext4_ext_max_entries ( inode , depth ) ;
if ( unlikely ( le16_to_cpu ( eh - > eh_max ) > max ) ) {
error_msg = " too large eh_max " ;
goto corrupted ;
}
if ( unlikely ( le16_to_cpu ( eh - > eh_entries ) > le16_to_cpu ( eh - > eh_max ) ) ) {
error_msg = " invalid eh_entries " ;
goto corrupted ;
}
2009-03-12 16:51:20 +03:00
if ( ! ext4_valid_extent_entries ( inode , eh , depth ) ) {
error_msg = " invalid extent entries " ;
goto corrupted ;
}
2007-07-18 17:19:09 +04:00
return 0 ;
corrupted :
2010-07-27 19:56:40 +04:00
ext4_error_inode ( inode , function , line , 0 ,
2010-05-17 05:00:00 +04:00
" bad header/extent: %s - magic %x, "
2007-07-18 17:19:09 +04:00
" entries %u, max %u(%u), depth %u(%u) " ,
2010-05-17 05:00:00 +04:00
error_msg , le16_to_cpu ( eh - > eh_magic ) ,
2007-07-18 17:19:09 +04:00
le16_to_cpu ( eh - > eh_entries ) , le16_to_cpu ( eh - > eh_max ) ,
max , le16_to_cpu ( eh - > eh_depth ) , depth ) ;
return - EIO ;
}
2009-03-12 16:51:20 +03:00
# define ext4_ext_check(inode, eh, depth) \
2010-07-27 19:56:40 +04:00
__ext4_ext_check ( __func__ , __LINE__ , inode , eh , depth )
2007-07-18 17:19:09 +04:00
2009-03-27 23:39:58 +03:00
int ext4_ext_check_inode ( struct inode * inode )
{
return ext4_ext_check ( inode , ext_inode_hdr ( inode ) , ext_depth ( inode ) ) ;
}
2006-10-11 12:21:03 +04:00
# ifdef EXT_DEBUG
static void ext4_ext_show_path ( struct inode * inode , struct ext4_ext_path * path )
{
int k , l = path - > p_depth ;
ext_debug ( " path: " ) ;
for ( k = 0 ; k < = l ; k + + , path + + ) {
if ( path - > p_idx ) {
2006-10-11 12:21:11 +04:00
ext_debug ( " %d->%llu " , le32_to_cpu ( path - > p_idx - > ei_block ) ,
2010-10-28 05:30:14 +04:00
ext4_idx_pblock ( path - > p_idx ) ) ;
2006-10-11 12:21:03 +04:00
} else if ( path - > p_ext ) {
2009-09-18 21:34:55 +04:00
ext_debug ( " %d:[%d]%d:%llu " ,
2006-10-11 12:21:03 +04:00
le32_to_cpu ( path - > p_ext - > ee_block ) ,
2009-09-18 21:34:55 +04:00
ext4_ext_is_uninitialized ( path - > p_ext ) ,
2007-07-18 05:42:41 +04:00
ext4_ext_get_actual_len ( path - > p_ext ) ,
2010-10-28 05:30:14 +04:00
ext4_ext_pblock ( path - > p_ext ) ) ;
2006-10-11 12:21:03 +04:00
} else
ext_debug ( " [] " ) ;
}
ext_debug ( " \n " ) ;
}
static void ext4_ext_show_leaf ( struct inode * inode , struct ext4_ext_path * path )
{
int depth = ext_depth ( inode ) ;
struct ext4_extent_header * eh ;
struct ext4_extent * ex ;
int i ;
if ( ! path )
return ;
eh = path [ depth ] . p_hdr ;
ex = EXT_FIRST_EXTENT ( eh ) ;
2009-09-18 21:34:55 +04:00
ext_debug ( " Displaying leaf extents for inode %lu \n " , inode - > i_ino ) ;
2006-10-11 12:21:03 +04:00
for ( i = 0 ; i < le16_to_cpu ( eh - > eh_entries ) ; i + + , ex + + ) {
2009-09-18 21:34:55 +04:00
ext_debug ( " %d:[%d]%d:%llu " , le32_to_cpu ( ex - > ee_block ) ,
ext4_ext_is_uninitialized ( ex ) ,
2010-10-28 05:30:14 +04:00
ext4_ext_get_actual_len ( ex ) , ext4_ext_pblock ( ex ) ) ;
2006-10-11 12:21:03 +04:00
}
ext_debug ( " \n " ) ;
}
2011-05-26 01:41:48 +04:00
static void ext4_ext_show_move ( struct inode * inode , struct ext4_ext_path * path ,
ext4_fsblk_t newblock , int level )
{
int depth = ext_depth ( inode ) ;
struct ext4_extent * ex ;
if ( depth ! = level ) {
struct ext4_extent_idx * idx ;
idx = path [ level ] . p_idx ;
while ( idx < = EXT_MAX_INDEX ( path [ level ] . p_hdr ) ) {
ext_debug ( " %d: move %d:%llu in new index %llu \n " , level ,
le32_to_cpu ( idx - > ei_block ) ,
ext4_idx_pblock ( idx ) ,
newblock ) ;
idx + + ;
}
return ;
}
ex = path [ depth ] . p_ext ;
while ( ex < = EXT_MAX_EXTENT ( path [ depth ] . p_hdr ) ) {
ext_debug ( " move %d:%llu:[%d]%d in new leaf %llu \n " ,
le32_to_cpu ( ex - > ee_block ) ,
ext4_ext_pblock ( ex ) ,
ext4_ext_is_uninitialized ( ex ) ,
ext4_ext_get_actual_len ( ex ) ,
newblock ) ;
ex + + ;
}
}
2006-10-11 12:21:03 +04:00
# else
2008-09-09 06:25:24 +04:00
# define ext4_ext_show_path(inode, path)
# define ext4_ext_show_leaf(inode, path)
2011-05-26 01:41:48 +04:00
# define ext4_ext_show_move(inode, path, newblock, level)
2006-10-11 12:21:03 +04:00
# endif
2008-02-26 00:54:37 +03:00
void ext4_ext_drop_refs ( struct ext4_ext_path * path )
2006-10-11 12:21:03 +04:00
{
int depth = path - > p_depth ;
int i ;
for ( i = 0 ; i < = depth ; i + + , path + + )
if ( path - > p_bh ) {
brelse ( path - > p_bh ) ;
path - > p_bh = NULL ;
}
}
/*
2006-10-11 12:21:07 +04:00
* ext4_ext_binsearch_idx :
* binary search for the closest index of the given block
2007-07-18 17:19:09 +04:00
* the header must be checked before calling this
2006-10-11 12:21:03 +04:00
*/
static void
2008-01-29 07:58:27 +03:00
ext4_ext_binsearch_idx ( struct inode * inode ,
struct ext4_ext_path * path , ext4_lblk_t block )
2006-10-11 12:21:03 +04:00
{
struct ext4_extent_header * eh = path - > p_hdr ;
struct ext4_extent_idx * r , * l , * m ;
2008-01-29 07:58:27 +03:00
ext_debug ( " binsearch for %u(idx): " , block ) ;
2006-10-11 12:21:03 +04:00
l = EXT_FIRST_INDEX ( eh ) + 1 ;
2007-07-18 17:09:15 +04:00
r = EXT_LAST_INDEX ( eh ) ;
2006-10-11 12:21:03 +04:00
while ( l < = r ) {
m = l + ( r - l ) / 2 ;
if ( block < le32_to_cpu ( m - > ei_block ) )
r = m - 1 ;
else
l = m + 1 ;
2007-07-18 16:33:37 +04:00
ext_debug ( " %p(%u):%p(%u):%p(%u) " , l , le32_to_cpu ( l - > ei_block ) ,
m , le32_to_cpu ( m - > ei_block ) ,
r , le32_to_cpu ( r - > ei_block ) ) ;
2006-10-11 12:21:03 +04:00
}
path - > p_idx = l - 1 ;
2006-10-11 12:21:05 +04:00
ext_debug ( " -> %d->%lld " , le32_to_cpu ( path - > p_idx - > ei_block ) ,
2010-10-28 05:30:14 +04:00
ext4_idx_pblock ( path - > p_idx ) ) ;
2006-10-11 12:21:03 +04:00
# ifdef CHECK_BINSEARCH
{
struct ext4_extent_idx * chix , * ix ;
int k ;
chix = ix = EXT_FIRST_INDEX ( eh ) ;
for ( k = 0 ; k < le16_to_cpu ( eh - > eh_entries ) ; k + + , ix + + ) {
if ( k ! = 0 & &
le32_to_cpu ( ix - > ei_block ) < = le32_to_cpu ( ix [ - 1 ] . ei_block ) ) {
2008-09-09 07:00:52 +04:00
printk ( KERN_DEBUG " k=%d, ix=0x%p, "
" first=0x%p \n " , k ,
ix , EXT_FIRST_INDEX ( eh ) ) ;
printk ( KERN_DEBUG " %u <= %u \n " ,
2006-10-11 12:21:03 +04:00
le32_to_cpu ( ix - > ei_block ) ,
le32_to_cpu ( ix [ - 1 ] . ei_block ) ) ;
}
BUG_ON ( k & & le32_to_cpu ( ix - > ei_block )
2007-05-24 21:04:54 +04:00
< = le32_to_cpu ( ix [ - 1 ] . ei_block ) ) ;
2006-10-11 12:21:03 +04:00
if ( block < le32_to_cpu ( ix - > ei_block ) )
break ;
chix = ix ;
}
BUG_ON ( chix ! = path - > p_idx ) ;
}
# endif
}
/*
2006-10-11 12:21:07 +04:00
* ext4_ext_binsearch :
* binary search for closest extent of the given block
2007-07-18 17:19:09 +04:00
* the header must be checked before calling this
2006-10-11 12:21:03 +04:00
*/
static void
2008-01-29 07:58:27 +03:00
ext4_ext_binsearch ( struct inode * inode ,
struct ext4_ext_path * path , ext4_lblk_t block )
2006-10-11 12:21:03 +04:00
{
struct ext4_extent_header * eh = path - > p_hdr ;
struct ext4_extent * r , * l , * m ;
if ( eh - > eh_entries = = 0 ) {
/*
2006-10-11 12:21:07 +04:00
* this leaf is empty :
* we get such a leaf in split / add case
2006-10-11 12:21:03 +04:00
*/
return ;
}
2008-01-29 07:58:27 +03:00
ext_debug ( " binsearch for %u: " , block ) ;
2006-10-11 12:21:03 +04:00
l = EXT_FIRST_EXTENT ( eh ) + 1 ;
2007-07-18 17:09:15 +04:00
r = EXT_LAST_EXTENT ( eh ) ;
2006-10-11 12:21:03 +04:00
while ( l < = r ) {
m = l + ( r - l ) / 2 ;
if ( block < le32_to_cpu ( m - > ee_block ) )
r = m - 1 ;
else
l = m + 1 ;
2007-07-18 16:33:37 +04:00
ext_debug ( " %p(%u):%p(%u):%p(%u) " , l , le32_to_cpu ( l - > ee_block ) ,
m , le32_to_cpu ( m - > ee_block ) ,
r , le32_to_cpu ( r - > ee_block ) ) ;
2006-10-11 12:21:03 +04:00
}
path - > p_ext = l - 1 ;
2009-09-18 21:34:55 +04:00
ext_debug ( " -> %d:%llu:[%d]%d " ,
2007-05-24 21:04:54 +04:00
le32_to_cpu ( path - > p_ext - > ee_block ) ,
2010-10-28 05:30:14 +04:00
ext4_ext_pblock ( path - > p_ext ) ,
2009-09-18 21:34:55 +04:00
ext4_ext_is_uninitialized ( path - > p_ext ) ,
2007-07-18 05:42:41 +04:00
ext4_ext_get_actual_len ( path - > p_ext ) ) ;
2006-10-11 12:21:03 +04:00
# ifdef CHECK_BINSEARCH
{
struct ext4_extent * chex , * ex ;
int k ;
chex = ex = EXT_FIRST_EXTENT ( eh ) ;
for ( k = 0 ; k < le16_to_cpu ( eh - > eh_entries ) ; k + + , ex + + ) {
BUG_ON ( k & & le32_to_cpu ( ex - > ee_block )
2007-05-24 21:04:54 +04:00
< = le32_to_cpu ( ex [ - 1 ] . ee_block ) ) ;
2006-10-11 12:21:03 +04:00
if ( block < le32_to_cpu ( ex - > ee_block ) )
break ;
chex = ex ;
}
BUG_ON ( chex ! = path - > p_ext ) ;
}
# endif
}
int ext4_ext_tree_init ( handle_t * handle , struct inode * inode )
{
struct ext4_extent_header * eh ;
eh = ext_inode_hdr ( inode ) ;
eh - > eh_depth = 0 ;
eh - > eh_entries = 0 ;
eh - > eh_magic = EXT4_EXT_MAGIC ;
2009-08-28 18:40:33 +04:00
eh - > eh_max = cpu_to_le16 ( ext4_ext_space_root ( inode , 0 ) ) ;
2006-10-11 12:21:03 +04:00
ext4_mark_inode_dirty ( handle , inode ) ;
ext4_ext_invalidate_cache ( inode ) ;
return 0 ;
}
struct ext4_ext_path *
2008-01-29 07:58:27 +03:00
ext4_ext_find_extent ( struct inode * inode , ext4_lblk_t block ,
struct ext4_ext_path * path )
2006-10-11 12:21:03 +04:00
{
struct ext4_extent_header * eh ;
struct buffer_head * bh ;
short int depth , i , ppos = 0 , alloc = 0 ;
eh = ext_inode_hdr ( inode ) ;
2007-07-18 17:19:09 +04:00
depth = ext_depth ( inode ) ;
2006-10-11 12:21:03 +04:00
/* account possible depth increase */
if ( ! path ) {
2006-12-07 07:41:35 +03:00
path = kzalloc ( sizeof ( struct ext4_ext_path ) * ( depth + 2 ) ,
2006-10-11 12:21:03 +04:00
GFP_NOFS ) ;
if ( ! path )
return ERR_PTR ( - ENOMEM ) ;
alloc = 1 ;
}
path [ 0 ] . p_hdr = eh ;
2008-07-12 03:27:31 +04:00
path [ 0 ] . p_bh = NULL ;
2006-10-11 12:21:03 +04:00
2007-07-18 17:19:09 +04:00
i = depth ;
2006-10-11 12:21:03 +04:00
/* walk through the tree */
while ( i ) {
2009-03-27 23:39:58 +03:00
int need_to_validate = 0 ;
2006-10-11 12:21:03 +04:00
ext_debug ( " depth %d: num %d, max %d \n " ,
ppos , le16_to_cpu ( eh - > eh_entries ) , le16_to_cpu ( eh - > eh_max ) ) ;
2007-07-18 17:19:09 +04:00
2006-10-11 12:21:03 +04:00
ext4_ext_binsearch_idx ( inode , path + ppos , block ) ;
2010-10-28 05:30:14 +04:00
path [ ppos ] . p_block = ext4_idx_pblock ( path [ ppos ] . p_idx ) ;
2006-10-11 12:21:03 +04:00
path [ ppos ] . p_depth = i ;
path [ ppos ] . p_ext = NULL ;
2009-03-27 23:39:58 +03:00
bh = sb_getblk ( inode - > i_sb , path [ ppos ] . p_block ) ;
if ( unlikely ( ! bh ) )
2006-10-11 12:21:03 +04:00
goto err ;
2009-03-27 23:39:58 +03:00
if ( ! bh_uptodate_or_lock ( bh ) ) {
2011-03-22 04:38:05 +03:00
trace_ext4_ext_load_extent ( inode , block ,
path [ ppos ] . p_block ) ;
2009-03-27 23:39:58 +03:00
if ( bh_submit_read ( bh ) < 0 ) {
put_bh ( bh ) ;
goto err ;
}
/* validate the extent entries */
need_to_validate = 1 ;
}
2006-10-11 12:21:03 +04:00
eh = ext_block_hdr ( bh ) ;
ppos + + ;
2010-03-02 19:46:09 +03:00
if ( unlikely ( ppos > depth ) ) {
put_bh ( bh ) ;
EXT4_ERROR_INODE ( inode ,
" ppos %d > depth %d " , ppos , depth ) ;
goto err ;
}
2006-10-11 12:21:03 +04:00
path [ ppos ] . p_bh = bh ;
path [ ppos ] . p_hdr = eh ;
i - - ;
2009-03-27 23:39:58 +03:00
if ( need_to_validate & & ext4_ext_check ( inode , eh , i ) )
2006-10-11 12:21:03 +04:00
goto err ;
}
path [ ppos ] . p_depth = i ;
path [ ppos ] . p_ext = NULL ;
path [ ppos ] . p_idx = NULL ;
/* find extent */
ext4_ext_binsearch ( inode , path + ppos , block ) ;
2008-07-12 03:27:31 +04:00
/* if not an empty leaf */
if ( path [ ppos ] . p_ext )
2010-10-28 05:30:14 +04:00
path [ ppos ] . p_block = ext4_ext_pblock ( path [ ppos ] . p_ext ) ;
2006-10-11 12:21:03 +04:00
ext4_ext_show_path ( inode , path ) ;
return path ;
err :
ext4_ext_drop_refs ( path ) ;
if ( alloc )
kfree ( path ) ;
return ERR_PTR ( - EIO ) ;
}
/*
2006-10-11 12:21:07 +04:00
* ext4_ext_insert_index :
* insert new index [ @ logical ; @ ptr ] into the block at @ curp ;
* check where to insert : before @ curp or after @ curp
2006-10-11 12:21:03 +04:00
*/
2010-10-28 05:30:14 +04:00
static int ext4_ext_insert_index ( handle_t * handle , struct inode * inode ,
struct ext4_ext_path * curp ,
int logical , ext4_fsblk_t ptr )
2006-10-11 12:21:03 +04:00
{
struct ext4_extent_idx * ix ;
int len , err ;
2006-12-07 07:41:33 +03:00
err = ext4_ext_get_access ( handle , inode , curp ) ;
if ( err )
2006-10-11 12:21:03 +04:00
return err ;
2010-03-02 19:46:09 +03:00
if ( unlikely ( logical = = le32_to_cpu ( curp - > p_idx - > ei_block ) ) ) {
EXT4_ERROR_INODE ( inode ,
" logical %d == ei_block %d! " ,
logical , le32_to_cpu ( curp - > p_idx - > ei_block ) ) ;
return - EIO ;
}
2011-07-18 07:43:42 +04:00
if ( unlikely ( le16_to_cpu ( curp - > p_hdr - > eh_entries )
> = le16_to_cpu ( curp - > p_hdr - > eh_max ) ) ) {
EXT4_ERROR_INODE ( inode ,
" eh_entries %d >= eh_max %d! " ,
le16_to_cpu ( curp - > p_hdr - > eh_entries ) ,
le16_to_cpu ( curp - > p_hdr - > eh_max ) ) ;
return - EIO ;
}
2006-10-11 12:21:03 +04:00
if ( logical > le32_to_cpu ( curp - > p_idx - > ei_block ) ) {
/* insert after */
2011-10-27 19:52:18 +04:00
ext_debug ( " insert new index %d after: %llu \n " , logical , ptr ) ;
2006-10-11 12:21:03 +04:00
ix = curp - > p_idx + 1 ;
} else {
/* insert before */
2011-10-27 19:52:18 +04:00
ext_debug ( " insert new index %d before: %llu \n " , logical , ptr ) ;
2006-10-11 12:21:03 +04:00
ix = curp - > p_idx ;
}
2011-10-27 19:52:18 +04:00
len = EXT_LAST_INDEX ( curp - > p_hdr ) - ix + 1 ;
BUG_ON ( len < 0 ) ;
if ( len > 0 ) {
ext_debug ( " insert new index %d: "
" move %d indices from 0x%p to 0x%p \n " ,
logical , len , ix , ix + 1 ) ;
memmove ( ix + 1 , ix , len * sizeof ( struct ext4_extent_idx ) ) ;
}
2011-10-17 18:13:46 +04:00
if ( unlikely ( ix > EXT_MAX_INDEX ( curp - > p_hdr ) ) ) {
EXT4_ERROR_INODE ( inode , " ix > EXT_MAX_INDEX! " ) ;
return - EIO ;
}
2006-10-11 12:21:03 +04:00
ix - > ei_block = cpu_to_le32 ( logical ) ;
2006-10-11 12:21:05 +04:00
ext4_idx_store_pblock ( ix , ptr ) ;
2008-04-17 18:38:59 +04:00
le16_add_cpu ( & curp - > p_hdr - > eh_entries , 1 ) ;
2006-10-11 12:21:03 +04:00
2010-03-02 19:46:09 +03:00
if ( unlikely ( ix > EXT_LAST_INDEX ( curp - > p_hdr ) ) ) {
EXT4_ERROR_INODE ( inode , " ix > EXT_LAST_INDEX! " ) ;
return - EIO ;
}
2006-10-11 12:21:03 +04:00
err = ext4_ext_dirty ( handle , inode , curp ) ;
ext4_std_error ( inode - > i_sb , err ) ;
return err ;
}
/*
2006-10-11 12:21:07 +04:00
* ext4_ext_split :
* inserts new subtree into the path , using free index entry
* at depth @ at :
* - allocates all needed blocks ( new leaf and all intermediate index blocks )
* - makes decision where to split
* - moves remaining extents and index entries ( right to the split point )
* into the newly allocated blocks
* - initializes subtree
2006-10-11 12:21:03 +04:00
*/
static int ext4_ext_split ( handle_t * handle , struct inode * inode ,
2011-05-25 15:41:26 +04:00
unsigned int flags ,
struct ext4_ext_path * path ,
struct ext4_extent * newext , int at )
2006-10-11 12:21:03 +04:00
{
struct buffer_head * bh = NULL ;
int depth = ext_depth ( inode ) ;
struct ext4_extent_header * neh ;
struct ext4_extent_idx * fidx ;
int i = at , k , m , a ;
2006-10-11 12:21:05 +04:00
ext4_fsblk_t newblock , oldblock ;
2006-10-11 12:21:03 +04:00
__le32 border ;
2006-10-11 12:21:05 +04:00
ext4_fsblk_t * ablocks = NULL ; /* array of allocated blocks */
2006-10-11 12:21:03 +04:00
int err = 0 ;
/* make decision: where to split? */
2006-10-11 12:21:07 +04:00
/* FIXME: now decision is simplest: at current extent */
2006-10-11 12:21:03 +04:00
2006-10-11 12:21:07 +04:00
/* if current leaf will be split, then we should use
2006-10-11 12:21:03 +04:00
* border from split point */
2010-03-02 19:46:09 +03:00
if ( unlikely ( path [ depth ] . p_ext > EXT_MAX_EXTENT ( path [ depth ] . p_hdr ) ) ) {
EXT4_ERROR_INODE ( inode , " p_ext > EXT_MAX_EXTENT! " ) ;
return - EIO ;
}
2006-10-11 12:21:03 +04:00
if ( path [ depth ] . p_ext ! = EXT_MAX_EXTENT ( path [ depth ] . p_hdr ) ) {
border = path [ depth ] . p_ext [ 1 ] . ee_block ;
2006-10-11 12:21:07 +04:00
ext_debug ( " leaf will be split. "
2006-10-11 12:21:03 +04:00
" next leaf starts at %d \n " ,
2007-05-24 21:04:54 +04:00
le32_to_cpu ( border ) ) ;
2006-10-11 12:21:03 +04:00
} else {
border = newext - > ee_block ;
ext_debug ( " leaf will be added. "
" next leaf starts at %d \n " ,
2007-05-24 21:04:54 +04:00
le32_to_cpu ( border ) ) ;
2006-10-11 12:21:03 +04:00
}
/*
2006-10-11 12:21:07 +04:00
* If error occurs , then we break processing
* and mark filesystem read - only . index won ' t
2006-10-11 12:21:03 +04:00
* be inserted and tree will be in consistent
2006-10-11 12:21:07 +04:00
* state . Next mount will repair buffers too .
2006-10-11 12:21:03 +04:00
*/
/*
2006-10-11 12:21:07 +04:00
* Get array to track all allocated blocks .
* We need this to handle errors and free blocks
* upon them .
2006-10-11 12:21:03 +04:00
*/
2006-12-07 07:41:35 +03:00
ablocks = kzalloc ( sizeof ( ext4_fsblk_t ) * depth , GFP_NOFS ) ;
2006-10-11 12:21:03 +04:00
if ( ! ablocks )
return - ENOMEM ;
/* allocate all needed blocks */
ext_debug ( " allocate %d blocks for indexes/leaf \n " , depth - at ) ;
for ( a = 0 ; a < depth - at ; a + + ) {
2008-07-12 03:27:31 +04:00
newblock = ext4_ext_new_meta_block ( handle , inode , path ,
2011-05-25 15:41:26 +04:00
newext , & err , flags ) ;
2006-10-11 12:21:03 +04:00
if ( newblock = = 0 )
goto cleanup ;
ablocks [ a ] = newblock ;
}
/* initialize new leaf */
newblock = ablocks [ - - a ] ;
2010-03-02 19:46:09 +03:00
if ( unlikely ( newblock = = 0 ) ) {
EXT4_ERROR_INODE ( inode , " newblock == 0! " ) ;
err = - EIO ;
goto cleanup ;
}
2006-10-11 12:21:03 +04:00
bh = sb_getblk ( inode - > i_sb , newblock ) ;
if ( ! bh ) {
err = - EIO ;
goto cleanup ;
}
lock_buffer ( bh ) ;
2006-12-07 07:41:33 +03:00
err = ext4_journal_get_create_access ( handle , bh ) ;
if ( err )
2006-10-11 12:21:03 +04:00
goto cleanup ;
neh = ext_block_hdr ( bh ) ;
neh - > eh_entries = 0 ;
2009-08-28 18:40:33 +04:00
neh - > eh_max = cpu_to_le16 ( ext4_ext_space_block ( inode , 0 ) ) ;
2006-10-11 12:21:03 +04:00
neh - > eh_magic = EXT4_EXT_MAGIC ;
neh - > eh_depth = 0 ;
2006-10-11 12:21:07 +04:00
/* move remainder of path[depth] to the new leaf */
2010-03-02 19:46:09 +03:00
if ( unlikely ( path [ depth ] . p_hdr - > eh_entries ! =
path [ depth ] . p_hdr - > eh_max ) ) {
EXT4_ERROR_INODE ( inode , " eh_entries %d != eh_max %d! " ,
path [ depth ] . p_hdr - > eh_entries ,
path [ depth ] . p_hdr - > eh_max ) ;
err = - EIO ;
goto cleanup ;
}
2006-10-11 12:21:03 +04:00
/* start copy from next extent */
2011-05-26 01:41:48 +04:00
m = EXT_MAX_EXTENT ( path [ depth ] . p_hdr ) - path [ depth ] . p_ext + + ;
ext4_ext_show_move ( inode , path , newblock , depth ) ;
2006-10-11 12:21:03 +04:00
if ( m ) {
2011-05-26 01:41:48 +04:00
struct ext4_extent * ex ;
ex = EXT_FIRST_EXTENT ( neh ) ;
memmove ( ex , path [ depth ] . p_ext , sizeof ( struct ext4_extent ) * m ) ;
2008-04-17 18:38:59 +04:00
le16_add_cpu ( & neh - > eh_entries , m ) ;
2006-10-11 12:21:03 +04:00
}
set_buffer_uptodate ( bh ) ;
unlock_buffer ( bh ) ;
2009-01-07 08:06:22 +03:00
err = ext4_handle_dirty_metadata ( handle , inode , bh ) ;
2006-12-07 07:41:33 +03:00
if ( err )
2006-10-11 12:21:03 +04:00
goto cleanup ;
brelse ( bh ) ;
bh = NULL ;
/* correct old leaf */
if ( m ) {
2006-12-07 07:41:33 +03:00
err = ext4_ext_get_access ( handle , inode , path + depth ) ;
if ( err )
2006-10-11 12:21:03 +04:00
goto cleanup ;
2008-04-17 18:38:59 +04:00
le16_add_cpu ( & path [ depth ] . p_hdr - > eh_entries , - m ) ;
2006-12-07 07:41:33 +03:00
err = ext4_ext_dirty ( handle , inode , path + depth ) ;
if ( err )
2006-10-11 12:21:03 +04:00
goto cleanup ;
}
/* create intermediate indexes */
k = depth - at - 1 ;
2010-03-02 19:46:09 +03:00
if ( unlikely ( k < 0 ) ) {
EXT4_ERROR_INODE ( inode , " k %d < 0! " , k ) ;
err = - EIO ;
goto cleanup ;
}
2006-10-11 12:21:03 +04:00
if ( k )
ext_debug ( " create %d intermediate indices \n " , k ) ;
/* insert new index into current index block */
/* current depth stored in i var */
i = depth - 1 ;
while ( k - - ) {
oldblock = newblock ;
newblock = ablocks [ - - a ] ;
2008-01-29 07:58:27 +03:00
bh = sb_getblk ( inode - > i_sb , newblock ) ;
2006-10-11 12:21:03 +04:00
if ( ! bh ) {
err = - EIO ;
goto cleanup ;
}
lock_buffer ( bh ) ;
2006-12-07 07:41:33 +03:00
err = ext4_journal_get_create_access ( handle , bh ) ;
if ( err )
2006-10-11 12:21:03 +04:00
goto cleanup ;
neh = ext_block_hdr ( bh ) ;
neh - > eh_entries = cpu_to_le16 ( 1 ) ;
neh - > eh_magic = EXT4_EXT_MAGIC ;
2009-08-28 18:40:33 +04:00
neh - > eh_max = cpu_to_le16 ( ext4_ext_space_block_idx ( inode , 0 ) ) ;
2006-10-11 12:21:03 +04:00
neh - > eh_depth = cpu_to_le16 ( depth - i ) ;
fidx = EXT_FIRST_INDEX ( neh ) ;
fidx - > ei_block = border ;
2006-10-11 12:21:05 +04:00
ext4_idx_store_pblock ( fidx , oldblock ) ;
2006-10-11 12:21:03 +04:00
2008-01-29 07:58:27 +03:00
ext_debug ( " int.index at %d (block %llu): %u -> %llu \n " ,
i , newblock , le32_to_cpu ( border ) , oldblock ) ;
2006-10-11 12:21:03 +04:00
2011-05-26 01:41:48 +04:00
/* move remainder of path[i] to the new index block */
2010-03-02 19:46:09 +03:00
if ( unlikely ( EXT_MAX_INDEX ( path [ i ] . p_hdr ) ! =
EXT_LAST_INDEX ( path [ i ] . p_hdr ) ) ) {
EXT4_ERROR_INODE ( inode ,
" EXT_MAX_INDEX != EXT_LAST_INDEX ee_block %d! " ,
le32_to_cpu ( path [ i ] . p_ext - > ee_block ) ) ;
err = - EIO ;
goto cleanup ;
}
2011-05-26 01:41:48 +04:00
/* start copy indexes */
m = EXT_MAX_INDEX ( path [ i ] . p_hdr ) - path [ i ] . p_idx + + ;
ext_debug ( " cur 0x%p, last 0x%p \n " , path [ i ] . p_idx ,
EXT_MAX_INDEX ( path [ i ] . p_hdr ) ) ;
ext4_ext_show_move ( inode , path , newblock , i ) ;
2006-10-11 12:21:03 +04:00
if ( m ) {
2011-05-26 01:41:48 +04:00
memmove ( + + fidx , path [ i ] . p_idx ,
2006-10-11 12:21:03 +04:00
sizeof ( struct ext4_extent_idx ) * m ) ;
2008-04-17 18:38:59 +04:00
le16_add_cpu ( & neh - > eh_entries , m ) ;
2006-10-11 12:21:03 +04:00
}
set_buffer_uptodate ( bh ) ;
unlock_buffer ( bh ) ;
2009-01-07 08:06:22 +03:00
err = ext4_handle_dirty_metadata ( handle , inode , bh ) ;
2006-12-07 07:41:33 +03:00
if ( err )
2006-10-11 12:21:03 +04:00
goto cleanup ;
brelse ( bh ) ;
bh = NULL ;
/* correct old index */
if ( m ) {
err = ext4_ext_get_access ( handle , inode , path + i ) ;
if ( err )
goto cleanup ;
2008-04-17 18:38:59 +04:00
le16_add_cpu ( & path [ i ] . p_hdr - > eh_entries , - m ) ;
2006-10-11 12:21:03 +04:00
err = ext4_ext_dirty ( handle , inode , path + i ) ;
if ( err )
goto cleanup ;
}
i - - ;
}
/* insert new index */
err = ext4_ext_insert_index ( handle , inode , path + at ,
le32_to_cpu ( border ) , newblock ) ;
cleanup :
if ( bh ) {
if ( buffer_locked ( bh ) )
unlock_buffer ( bh ) ;
brelse ( bh ) ;
}
if ( err ) {
/* free all allocated blocks in error case */
for ( i = 0 ; i < depth ; i + + ) {
if ( ! ablocks [ i ] )
continue ;
2011-02-22 05:01:42 +03:00
ext4_free_blocks ( handle , inode , NULL , ablocks [ i ] , 1 ,
2009-11-23 15:17:05 +03:00
EXT4_FREE_BLOCKS_METADATA ) ;
2006-10-11 12:21:03 +04:00
}
}
kfree ( ablocks ) ;
return err ;
}
/*
2006-10-11 12:21:07 +04:00
* ext4_ext_grow_indepth :
* implements tree growing procedure :
* - allocates new block
* - moves top - level data ( index block or leaf ) into the new block
* - initializes new top - level , creating index that points to the
* just created block
2006-10-11 12:21:03 +04:00
*/
static int ext4_ext_grow_indepth ( handle_t * handle , struct inode * inode ,
2011-05-25 15:41:26 +04:00
unsigned int flags ,
struct ext4_extent * newext )
2006-10-11 12:21:03 +04:00
{
struct ext4_extent_header * neh ;
struct buffer_head * bh ;
2006-10-11 12:21:05 +04:00
ext4_fsblk_t newblock ;
2006-10-11 12:21:03 +04:00
int err = 0 ;
2011-10-22 09:26:05 +04:00
newblock = ext4_ext_new_meta_block ( handle , inode , NULL ,
2011-05-25 15:41:26 +04:00
newext , & err , flags ) ;
2006-10-11 12:21:03 +04:00
if ( newblock = = 0 )
return err ;
bh = sb_getblk ( inode - > i_sb , newblock ) ;
if ( ! bh ) {
err = - EIO ;
ext4_std_error ( inode - > i_sb , err ) ;
return err ;
}
lock_buffer ( bh ) ;
2006-12-07 07:41:33 +03:00
err = ext4_journal_get_create_access ( handle , bh ) ;
if ( err ) {
2006-10-11 12:21:03 +04:00
unlock_buffer ( bh ) ;
goto out ;
}
/* move top-level index/leaf into new block */
2011-10-22 09:26:05 +04:00
memmove ( bh - > b_data , EXT4_I ( inode ) - > i_data ,
sizeof ( EXT4_I ( inode ) - > i_data ) ) ;
2006-10-11 12:21:03 +04:00
/* set size of new block */
neh = ext_block_hdr ( bh ) ;
/* old root could have indexes or leaves
* so calculate e_max right way */
if ( ext_depth ( inode ) )
2009-08-28 18:40:33 +04:00
neh - > eh_max = cpu_to_le16 ( ext4_ext_space_block_idx ( inode , 0 ) ) ;
2006-10-11 12:21:03 +04:00
else
2009-08-28 18:40:33 +04:00
neh - > eh_max = cpu_to_le16 ( ext4_ext_space_block ( inode , 0 ) ) ;
2006-10-11 12:21:03 +04:00
neh - > eh_magic = EXT4_EXT_MAGIC ;
set_buffer_uptodate ( bh ) ;
unlock_buffer ( bh ) ;
2009-01-07 08:06:22 +03:00
err = ext4_handle_dirty_metadata ( handle , inode , bh ) ;
2006-12-07 07:41:33 +03:00
if ( err )
2006-10-11 12:21:03 +04:00
goto out ;
2011-10-22 09:26:05 +04:00
/* Update top-level index: num,max,pointer */
2006-10-11 12:21:03 +04:00
neh = ext_inode_hdr ( inode ) ;
2011-10-22 09:26:05 +04:00
neh - > eh_entries = cpu_to_le16 ( 1 ) ;
ext4_idx_store_pblock ( EXT_FIRST_INDEX ( neh ) , newblock ) ;
if ( neh - > eh_depth = = 0 ) {
/* Root extent block becomes index block */
neh - > eh_max = cpu_to_le16 ( ext4_ext_space_root_idx ( inode , 0 ) ) ;
EXT_FIRST_INDEX ( neh ) - > ei_block =
EXT_FIRST_EXTENT ( neh ) - > ee_block ;
}
2006-10-11 12:21:11 +04:00
ext_debug ( " new root: num %d(%d), lblock %d, ptr %llu \n " ,
2006-10-11 12:21:03 +04:00
le16_to_cpu ( neh - > eh_entries ) , le16_to_cpu ( neh - > eh_max ) ,
2010-06-14 21:28:03 +04:00
le32_to_cpu ( EXT_FIRST_INDEX ( neh ) - > ei_block ) ,
2010-10-28 05:30:14 +04:00
ext4_idx_pblock ( EXT_FIRST_INDEX ( neh ) ) ) ;
2006-10-11 12:21:03 +04:00
2011-10-22 09:26:05 +04:00
neh - > eh_depth = cpu_to_le16 ( neh - > eh_depth + 1 ) ;
ext4_mark_inode_dirty ( handle , inode ) ;
2006-10-11 12:21:03 +04:00
out :
brelse ( bh ) ;
return err ;
}
/*
2006-10-11 12:21:07 +04:00
* ext4_ext_create_new_leaf :
* finds empty index and adds new leaf .
* if no free index is found , then it requests in - depth growing .
2006-10-11 12:21:03 +04:00
*/
static int ext4_ext_create_new_leaf ( handle_t * handle , struct inode * inode ,
2011-05-25 15:41:26 +04:00
unsigned int flags ,
struct ext4_ext_path * path ,
struct ext4_extent * newext )
2006-10-11 12:21:03 +04:00
{
struct ext4_ext_path * curp ;
int depth , i , err = 0 ;
repeat :
i = depth = ext_depth ( inode ) ;
/* walk up to the tree and look for free index entry */
curp = path + depth ;
while ( i > 0 & & ! EXT_HAS_FREE_INDEX ( curp ) ) {
i - - ;
curp - - ;
}
2006-10-11 12:21:07 +04:00
/* we use already allocated block for index block,
* so subsequent data blocks should be contiguous */
2006-10-11 12:21:03 +04:00
if ( EXT_HAS_FREE_INDEX ( curp ) ) {
/* if we found index with free entry, then use that
* entry : create all needed subtree and add new leaf */
2011-05-25 15:41:26 +04:00
err = ext4_ext_split ( handle , inode , flags , path , newext , i ) ;
2008-07-12 03:27:31 +04:00
if ( err )
goto out ;
2006-10-11 12:21:03 +04:00
/* refill path */
ext4_ext_drop_refs ( path ) ;
path = ext4_ext_find_extent ( inode ,
2008-01-29 07:58:27 +03:00
( ext4_lblk_t ) le32_to_cpu ( newext - > ee_block ) ,
path ) ;
2006-10-11 12:21:03 +04:00
if ( IS_ERR ( path ) )
err = PTR_ERR ( path ) ;
} else {
/* tree is full, time to grow in depth */
2011-10-22 09:26:05 +04:00
err = ext4_ext_grow_indepth ( handle , inode , flags , newext ) ;
2006-10-11 12:21:03 +04:00
if ( err )
goto out ;
/* refill path */
ext4_ext_drop_refs ( path ) ;
path = ext4_ext_find_extent ( inode ,
2008-01-29 07:58:27 +03:00
( ext4_lblk_t ) le32_to_cpu ( newext - > ee_block ) ,
path ) ;
2006-10-11 12:21:03 +04:00
if ( IS_ERR ( path ) ) {
err = PTR_ERR ( path ) ;
goto out ;
}
/*
2006-10-11 12:21:07 +04:00
* only first ( depth 0 - > 1 ) produces free space ;
* in all other cases we have to split the grown tree
2006-10-11 12:21:03 +04:00
*/
depth = ext_depth ( inode ) ;
if ( path [ depth ] . p_hdr - > eh_entries = = path [ depth ] . p_hdr - > eh_max ) {
2006-10-11 12:21:07 +04:00
/* now we need to split */
2006-10-11 12:21:03 +04:00
goto repeat ;
}
}
out :
return err ;
}
2008-01-29 07:58:27 +03:00
/*
* search the closest allocated block to the left for * logical
* and returns it at @ logical + it ' s physical address at @ phys
* if * logical is the smallest allocated block , the function
* returns 0 at @ phys
* return value contains 0 ( success ) or error code
*/
2010-10-28 05:30:14 +04:00
static int ext4_ext_search_left ( struct inode * inode ,
struct ext4_ext_path * path ,
ext4_lblk_t * logical , ext4_fsblk_t * phys )
2008-01-29 07:58:27 +03:00
{
struct ext4_extent_idx * ix ;
struct ext4_extent * ex ;
2008-01-29 07:58:27 +03:00
int depth , ee_len ;
2008-01-29 07:58:27 +03:00
2010-03-02 19:46:09 +03:00
if ( unlikely ( path = = NULL ) ) {
EXT4_ERROR_INODE ( inode , " path == NULL *logical %d! " , * logical ) ;
return - EIO ;
}
2008-01-29 07:58:27 +03:00
depth = path - > p_depth ;
* phys = 0 ;
if ( depth = = 0 & & path - > p_ext = = NULL )
return 0 ;
/* usually extent in the path covers blocks smaller
* then * logical , but it can be that extent is the
* first one in the file */
ex = path [ depth ] . p_ext ;
2008-01-29 07:58:27 +03:00
ee_len = ext4_ext_get_actual_len ( ex ) ;
2008-01-29 07:58:27 +03:00
if ( * logical < le32_to_cpu ( ex - > ee_block ) ) {
2010-03-02 19:46:09 +03:00
if ( unlikely ( EXT_FIRST_EXTENT ( path [ depth ] . p_hdr ) ! = ex ) ) {
EXT4_ERROR_INODE ( inode ,
" EXT_FIRST_EXTENT != ex *logical %d ee_block %d! " ,
* logical , le32_to_cpu ( ex - > ee_block ) ) ;
return - EIO ;
}
2008-01-29 07:58:27 +03:00
while ( - - depth > = 0 ) {
ix = path [ depth ] . p_idx ;
2010-03-02 19:46:09 +03:00
if ( unlikely ( ix ! = EXT_FIRST_INDEX ( path [ depth ] . p_hdr ) ) ) {
EXT4_ERROR_INODE ( inode ,
" ix (%d) != EXT_FIRST_INDEX (%d) (depth %d)! " ,
2011-10-09 00:08:34 +04:00
ix ! = NULL ? le32_to_cpu ( ix - > ei_block ) : 0 ,
2010-03-02 19:46:09 +03:00
EXT_FIRST_INDEX ( path [ depth ] . p_hdr ) ! = NULL ?
2011-10-09 00:08:34 +04:00
le32_to_cpu ( EXT_FIRST_INDEX ( path [ depth ] . p_hdr ) - > ei_block ) : 0 ,
2010-03-02 19:46:09 +03:00
depth ) ;
return - EIO ;
}
2008-01-29 07:58:27 +03:00
}
return 0 ;
}
2010-03-02 19:46:09 +03:00
if ( unlikely ( * logical < ( le32_to_cpu ( ex - > ee_block ) + ee_len ) ) ) {
EXT4_ERROR_INODE ( inode ,
" logical %d < ee_block %d + ee_len %d! " ,
* logical , le32_to_cpu ( ex - > ee_block ) , ee_len ) ;
return - EIO ;
}
2008-01-29 07:58:27 +03:00
2008-01-29 07:58:27 +03:00
* logical = le32_to_cpu ( ex - > ee_block ) + ee_len - 1 ;
2010-10-28 05:30:14 +04:00
* phys = ext4_ext_pblock ( ex ) + ee_len - 1 ;
2008-01-29 07:58:27 +03:00
return 0 ;
}
/*
* search the closest allocated block to the right for * logical
* and returns it at @ logical + it ' s physical address at @ phys
2011-10-08 23:53:49 +04:00
* if * logical is the largest allocated block , the function
2008-01-29 07:58:27 +03:00
* returns 0 at @ phys
* return value contains 0 ( success ) or error code
*/
2010-10-28 05:30:14 +04:00
static int ext4_ext_search_right ( struct inode * inode ,
struct ext4_ext_path * path ,
2011-09-10 02:52:51 +04:00
ext4_lblk_t * logical , ext4_fsblk_t * phys ,
struct ext4_extent * * ret_ex )
2008-01-29 07:58:27 +03:00
{
struct buffer_head * bh = NULL ;
struct ext4_extent_header * eh ;
struct ext4_extent_idx * ix ;
struct ext4_extent * ex ;
ext4_fsblk_t block ;
2009-03-11 01:18:47 +03:00
int depth ; /* Note, NOT eh_depth; depth from top of tree */
int ee_len ;
2008-01-29 07:58:27 +03:00
2010-03-02 19:46:09 +03:00
if ( unlikely ( path = = NULL ) ) {
EXT4_ERROR_INODE ( inode , " path == NULL *logical %d! " , * logical ) ;
return - EIO ;
}
2008-01-29 07:58:27 +03:00
depth = path - > p_depth ;
* phys = 0 ;
if ( depth = = 0 & & path - > p_ext = = NULL )
return 0 ;
/* usually extent in the path covers blocks smaller
* then * logical , but it can be that extent is the
* first one in the file */
ex = path [ depth ] . p_ext ;
2008-01-29 07:58:27 +03:00
ee_len = ext4_ext_get_actual_len ( ex ) ;
2008-01-29 07:58:27 +03:00
if ( * logical < le32_to_cpu ( ex - > ee_block ) ) {
2010-03-02 19:46:09 +03:00
if ( unlikely ( EXT_FIRST_EXTENT ( path [ depth ] . p_hdr ) ! = ex ) ) {
EXT4_ERROR_INODE ( inode ,
" first_extent(path[%d].p_hdr) != ex " ,
depth ) ;
return - EIO ;
}
2008-01-29 07:58:27 +03:00
while ( - - depth > = 0 ) {
ix = path [ depth ] . p_idx ;
2010-03-02 19:46:09 +03:00
if ( unlikely ( ix ! = EXT_FIRST_INDEX ( path [ depth ] . p_hdr ) ) ) {
EXT4_ERROR_INODE ( inode ,
" ix != EXT_FIRST_INDEX *logical %d! " ,
* logical ) ;
return - EIO ;
}
2008-01-29 07:58:27 +03:00
}
2011-09-10 02:52:51 +04:00
goto found_extent ;
2008-01-29 07:58:27 +03:00
}
2010-03-02 19:46:09 +03:00
if ( unlikely ( * logical < ( le32_to_cpu ( ex - > ee_block ) + ee_len ) ) ) {
EXT4_ERROR_INODE ( inode ,
" logical %d < ee_block %d + ee_len %d! " ,
* logical , le32_to_cpu ( ex - > ee_block ) , ee_len ) ;
return - EIO ;
}
2008-01-29 07:58:27 +03:00
if ( ex ! = EXT_LAST_EXTENT ( path [ depth ] . p_hdr ) ) {
/* next allocated block in this leaf */
ex + + ;
2011-09-10 02:52:51 +04:00
goto found_extent ;
2008-01-29 07:58:27 +03:00
}
/* go up and search for index to the right */
while ( - - depth > = 0 ) {
ix = path [ depth ] . p_idx ;
if ( ix ! = EXT_LAST_INDEX ( path [ depth ] . p_hdr ) )
2008-11-26 01:24:23 +03:00
goto got_index ;
2008-01-29 07:58:27 +03:00
}
2008-11-26 01:24:23 +03:00
/* we've gone up to the root and found no index to the right */
return 0 ;
2008-01-29 07:58:27 +03:00
2008-11-26 01:24:23 +03:00
got_index :
2008-01-29 07:58:27 +03:00
/* we've found index to the right, let's
* follow it and find the closest allocated
* block to the right */
ix + + ;
2010-10-28 05:30:14 +04:00
block = ext4_idx_pblock ( ix ) ;
2008-01-29 07:58:27 +03:00
while ( + + depth < path - > p_depth ) {
bh = sb_bread ( inode - > i_sb , block ) ;
if ( bh = = NULL )
return - EIO ;
eh = ext_block_hdr ( bh ) ;
2009-03-11 01:18:47 +03:00
/* subtract from p_depth to get proper eh_depth */
2009-03-12 16:51:20 +03:00
if ( ext4_ext_check ( inode , eh , path - > p_depth - depth ) ) {
2008-01-29 07:58:27 +03:00
put_bh ( bh ) ;
return - EIO ;
}
ix = EXT_FIRST_INDEX ( eh ) ;
2010-10-28 05:30:14 +04:00
block = ext4_idx_pblock ( ix ) ;
2008-01-29 07:58:27 +03:00
put_bh ( bh ) ;
}
bh = sb_bread ( inode - > i_sb , block ) ;
if ( bh = = NULL )
return - EIO ;
eh = ext_block_hdr ( bh ) ;
2009-03-12 16:51:20 +03:00
if ( ext4_ext_check ( inode , eh , path - > p_depth - depth ) ) {
2008-01-29 07:58:27 +03:00
put_bh ( bh ) ;
return - EIO ;
}
ex = EXT_FIRST_EXTENT ( eh ) ;
2011-09-10 02:52:51 +04:00
found_extent :
2008-01-29 07:58:27 +03:00
* logical = le32_to_cpu ( ex - > ee_block ) ;
2010-10-28 05:30:14 +04:00
* phys = ext4_ext_pblock ( ex ) ;
2011-09-10 02:52:51 +04:00
* ret_ex = ex ;
if ( bh )
put_bh ( bh ) ;
2008-01-29 07:58:27 +03:00
return 0 ;
}
2006-10-11 12:21:03 +04:00
/*
2006-10-11 12:21:07 +04:00
* ext4_ext_next_allocated_block :
2011-06-06 08:05:17 +04:00
* returns allocated block in subsequent extent or EXT_MAX_BLOCKS .
2006-10-11 12:21:07 +04:00
* NOTE : it considers block number from index entry as
* allocated block . Thus , index entries have to be consistent
* with leaves .
2006-10-11 12:21:03 +04:00
*/
2008-01-29 07:58:27 +03:00
static ext4_lblk_t
2006-10-11 12:21:03 +04:00
ext4_ext_next_allocated_block ( struct ext4_ext_path * path )
{
int depth ;
BUG_ON ( path = = NULL ) ;
depth = path - > p_depth ;
if ( depth = = 0 & & path - > p_ext = = NULL )
2011-06-06 08:05:17 +04:00
return EXT_MAX_BLOCKS ;
2006-10-11 12:21:03 +04:00
while ( depth > = 0 ) {
if ( depth = = path - > p_depth ) {
/* leaf */
2011-10-26 12:38:59 +04:00
if ( path [ depth ] . p_ext & &
path [ depth ] . p_ext ! =
2006-10-11 12:21:03 +04:00
EXT_LAST_EXTENT ( path [ depth ] . p_hdr ) )
return le32_to_cpu ( path [ depth ] . p_ext [ 1 ] . ee_block ) ;
} else {
/* index */
if ( path [ depth ] . p_idx ! =
EXT_LAST_INDEX ( path [ depth ] . p_hdr ) )
return le32_to_cpu ( path [ depth ] . p_idx [ 1 ] . ei_block ) ;
}
depth - - ;
}
2011-06-06 08:05:17 +04:00
return EXT_MAX_BLOCKS ;
2006-10-11 12:21:03 +04:00
}
/*
2006-10-11 12:21:07 +04:00
* ext4_ext_next_leaf_block :
2011-06-06 08:05:17 +04:00
* returns first allocated block from next leaf or EXT_MAX_BLOCKS
2006-10-11 12:21:03 +04:00
*/
2011-07-24 05:49:07 +04:00
static ext4_lblk_t ext4_ext_next_leaf_block ( struct ext4_ext_path * path )
2006-10-11 12:21:03 +04:00
{
int depth ;
BUG_ON ( path = = NULL ) ;
depth = path - > p_depth ;
/* zero-tree has no leaf blocks at all */
if ( depth = = 0 )
2011-06-06 08:05:17 +04:00
return EXT_MAX_BLOCKS ;
2006-10-11 12:21:03 +04:00
/* go to index block */
depth - - ;
while ( depth > = 0 ) {
if ( path [ depth ] . p_idx ! =
EXT_LAST_INDEX ( path [ depth ] . p_hdr ) )
2008-01-29 07:58:27 +03:00
return ( ext4_lblk_t )
le32_to_cpu ( path [ depth ] . p_idx [ 1 ] . ei_block ) ;
2006-10-11 12:21:03 +04:00
depth - - ;
}
2011-06-06 08:05:17 +04:00
return EXT_MAX_BLOCKS ;
2006-10-11 12:21:03 +04:00
}
/*
2006-10-11 12:21:07 +04:00
* ext4_ext_correct_indexes :
* if leaf gets modified and modified extent is first in the leaf ,
* then we have to correct all indexes above .
2006-10-11 12:21:03 +04:00
* TODO : do we need to correct tree in all cases ?
*/
2008-01-29 07:58:27 +03:00
static int ext4_ext_correct_indexes ( handle_t * handle , struct inode * inode ,
2006-10-11 12:21:03 +04:00
struct ext4_ext_path * path )
{
struct ext4_extent_header * eh ;
int depth = ext_depth ( inode ) ;
struct ext4_extent * ex ;
__le32 border ;
int k , err = 0 ;
eh = path [ depth ] . p_hdr ;
ex = path [ depth ] . p_ext ;
2010-03-02 19:46:09 +03:00
if ( unlikely ( ex = = NULL | | eh = = NULL ) ) {
EXT4_ERROR_INODE ( inode ,
" ex %p == NULL or eh %p == NULL " , ex , eh ) ;
return - EIO ;
}
2006-10-11 12:21:03 +04:00
if ( depth = = 0 ) {
/* there is no tree at all */
return 0 ;
}
if ( ex ! = EXT_FIRST_EXTENT ( eh ) ) {
/* we correct tree if first leaf got modified only */
return 0 ;
}
/*
2006-10-11 12:21:07 +04:00
* TODO : we need correction if border is smaller than current one
2006-10-11 12:21:03 +04:00
*/
k = depth - 1 ;
border = path [ depth ] . p_ext - > ee_block ;
2006-12-07 07:41:33 +03:00
err = ext4_ext_get_access ( handle , inode , path + k ) ;
if ( err )
2006-10-11 12:21:03 +04:00
return err ;
path [ k ] . p_idx - > ei_block = border ;
2006-12-07 07:41:33 +03:00
err = ext4_ext_dirty ( handle , inode , path + k ) ;
if ( err )
2006-10-11 12:21:03 +04:00
return err ;
while ( k - - ) {
/* change all left-side indexes */
if ( path [ k + 1 ] . p_idx ! = EXT_FIRST_INDEX ( path [ k + 1 ] . p_hdr ) )
break ;
2006-12-07 07:41:33 +03:00
err = ext4_ext_get_access ( handle , inode , path + k ) ;
if ( err )
2006-10-11 12:21:03 +04:00
break ;
path [ k ] . p_idx - > ei_block = border ;
2006-12-07 07:41:33 +03:00
err = ext4_ext_dirty ( handle , inode , path + k ) ;
if ( err )
2006-10-11 12:21:03 +04:00
break ;
}
return err ;
}
2009-06-18 03:24:03 +04:00
int
2006-10-11 12:21:03 +04:00
ext4_can_extents_be_merged ( struct inode * inode , struct ext4_extent * ex1 ,
struct ext4_extent * ex2 )
{
2007-07-18 17:02:56 +04:00
unsigned short ext1_ee_len , ext2_ee_len , max_len ;
2007-07-18 05:42:41 +04:00
/*
* Make sure that either both extents are uninitialized , or
* both are _not_ .
*/
if ( ext4_ext_is_uninitialized ( ex1 ) ^ ext4_ext_is_uninitialized ( ex2 ) )
return 0 ;
2007-07-18 17:02:56 +04:00
if ( ext4_ext_is_uninitialized ( ex1 ) )
max_len = EXT_UNINIT_MAX_LEN ;
else
max_len = EXT_INIT_MAX_LEN ;
2007-07-18 05:42:41 +04:00
ext1_ee_len = ext4_ext_get_actual_len ( ex1 ) ;
ext2_ee_len = ext4_ext_get_actual_len ( ex2 ) ;
if ( le32_to_cpu ( ex1 - > ee_block ) + ext1_ee_len ! =
2006-10-11 12:21:24 +04:00
le32_to_cpu ( ex2 - > ee_block ) )
2006-10-11 12:21:03 +04:00
return 0 ;
2006-10-11 12:21:06 +04:00
/*
* To allow future support for preallocated extents to be added
* as an RO_COMPAT feature , refuse to merge to extents if
2006-10-11 12:21:07 +04:00
* this can result in the top bit of ee_len being set .
2006-10-11 12:21:06 +04:00
*/
2007-07-18 17:02:56 +04:00
if ( ext1_ee_len + ext2_ee_len > max_len )
2006-10-11 12:21:06 +04:00
return 0 ;
2007-02-17 21:20:16 +03:00
# ifdef AGGRESSIVE_TEST
2008-01-29 07:58:27 +03:00
if ( ext1_ee_len > = 4 )
2006-10-11 12:21:03 +04:00
return 0 ;
# endif
2010-10-28 05:30:14 +04:00
if ( ext4_ext_pblock ( ex1 ) + ext1_ee_len = = ext4_ext_pblock ( ex2 ) )
2006-10-11 12:21:03 +04:00
return 1 ;
return 0 ;
}
2007-07-18 05:42:38 +04:00
/*
* This function tries to merge the " ex " extent to the next extent in the tree .
* It always tries to merge towards right . If you want to merge towards
* left , pass " ex - 1 " as argument instead of " ex " .
* Returns 0 if the extents ( ex and ex + 1 ) were _not_ merged and returns
* 1 if they got merged .
*/
2011-05-03 19:45:29 +04:00
static int ext4_ext_try_to_merge_right ( struct inode * inode ,
2010-10-28 05:30:14 +04:00
struct ext4_ext_path * path ,
struct ext4_extent * ex )
2007-07-18 05:42:38 +04:00
{
struct ext4_extent_header * eh ;
unsigned int depth , len ;
int merge_done = 0 ;
int uninitialized = 0 ;
depth = ext_depth ( inode ) ;
BUG_ON ( path [ depth ] . p_hdr = = NULL ) ;
eh = path [ depth ] . p_hdr ;
while ( ex < EXT_LAST_EXTENT ( eh ) ) {
if ( ! ext4_can_extents_be_merged ( inode , ex , ex + 1 ) )
break ;
/* merge with next extent! */
if ( ext4_ext_is_uninitialized ( ex ) )
uninitialized = 1 ;
ex - > ee_len = cpu_to_le16 ( ext4_ext_get_actual_len ( ex )
+ ext4_ext_get_actual_len ( ex + 1 ) ) ;
if ( uninitialized )
ext4_ext_mark_uninitialized ( ex ) ;
if ( ex + 1 < EXT_LAST_EXTENT ( eh ) ) {
len = ( EXT_LAST_EXTENT ( eh ) - ex - 1 )
* sizeof ( struct ext4_extent ) ;
memmove ( ex + 1 , ex + 2 , len ) ;
}
2008-04-17 18:38:59 +04:00
le16_add_cpu ( & eh - > eh_entries , - 1 ) ;
2007-07-18 05:42:38 +04:00
merge_done = 1 ;
WARN_ON ( eh - > eh_entries = = 0 ) ;
if ( ! eh - > eh_entries )
2010-05-17 05:00:00 +04:00
EXT4_ERROR_INODE ( inode , " eh->eh_entries = 0! " ) ;
2007-07-18 05:42:38 +04:00
}
return merge_done ;
}
2011-05-03 19:45:29 +04:00
/*
* This function tries to merge the @ ex extent to neighbours in the tree .
* return 1 if merge left else 0.
*/
static int ext4_ext_try_to_merge ( struct inode * inode ,
struct ext4_ext_path * path ,
struct ext4_extent * ex ) {
struct ext4_extent_header * eh ;
unsigned int depth ;
int merge_done = 0 ;
int ret = 0 ;
depth = ext_depth ( inode ) ;
BUG_ON ( path [ depth ] . p_hdr = = NULL ) ;
eh = path [ depth ] . p_hdr ;
if ( ex > EXT_FIRST_EXTENT ( eh ) )
merge_done = ext4_ext_try_to_merge_right ( inode , path , ex - 1 ) ;
if ( ! merge_done )
ret = ext4_ext_try_to_merge_right ( inode , path , ex ) ;
return ret ;
}
2007-05-24 21:04:13 +04:00
/*
* check if a portion of the " newext " extent overlaps with an
* existing extent .
*
* If there is an overlap discovered , it updates the length of the newext
* such that there will be no overlap , and then returns 1.
* If there is no overlap found , it returns 0.
*/
2011-09-10 02:52:51 +04:00
static unsigned int ext4_ext_check_overlap ( struct ext4_sb_info * sbi ,
struct inode * inode ,
2010-10-28 05:30:14 +04:00
struct ext4_extent * newext ,
struct ext4_ext_path * path )
2007-05-24 21:04:13 +04:00
{
2008-01-29 07:58:27 +03:00
ext4_lblk_t b1 , b2 ;
2007-05-24 21:04:13 +04:00
unsigned int depth , len1 ;
unsigned int ret = 0 ;
b1 = le32_to_cpu ( newext - > ee_block ) ;
2007-07-18 05:42:41 +04:00
len1 = ext4_ext_get_actual_len ( newext ) ;
2007-05-24 21:04:13 +04:00
depth = ext_depth ( inode ) ;
if ( ! path [ depth ] . p_ext )
goto out ;
b2 = le32_to_cpu ( path [ depth ] . p_ext - > ee_block ) ;
2011-09-10 02:52:51 +04:00
b2 & = ~ ( sbi - > s_cluster_ratio - 1 ) ;
2007-05-24 21:04:13 +04:00
/*
* get the next allocated block if the extent in the path
2008-07-27 00:15:44 +04:00
* is before the requested block ( s )
2007-05-24 21:04:13 +04:00
*/
if ( b2 < b1 ) {
b2 = ext4_ext_next_allocated_block ( path ) ;
2011-06-06 08:05:17 +04:00
if ( b2 = = EXT_MAX_BLOCKS )
2007-05-24 21:04:13 +04:00
goto out ;
2011-09-10 02:52:51 +04:00
b2 & = ~ ( sbi - > s_cluster_ratio - 1 ) ;
2007-05-24 21:04:13 +04:00
}
2008-01-29 07:58:27 +03:00
/* check for wrap through zero on extent logical start block*/
2007-05-24 21:04:13 +04:00
if ( b1 + len1 < b1 ) {
2011-06-06 08:05:17 +04:00
len1 = EXT_MAX_BLOCKS - b1 ;
2007-05-24 21:04:13 +04:00
newext - > ee_len = cpu_to_le16 ( len1 ) ;
ret = 1 ;
}
/* check for overlap */
if ( b1 + len1 > b2 ) {
newext - > ee_len = cpu_to_le16 ( b2 - b1 ) ;
ret = 1 ;
}
out :
return ret ;
}
2006-10-11 12:21:03 +04:00
/*
2006-10-11 12:21:07 +04:00
* ext4_ext_insert_extent :
* tries to merge requsted extent into the existing extent or
* inserts requested extent as new one into the tree ,
* creating new leaf in the no - space case .
2006-10-11 12:21:03 +04:00
*/
int ext4_ext_insert_extent ( handle_t * handle , struct inode * inode ,
struct ext4_ext_path * path ,
2009-09-28 23:49:08 +04:00
struct ext4_extent * newext , int flag )
2006-10-11 12:21:03 +04:00
{
2008-09-09 06:25:24 +04:00
struct ext4_extent_header * eh ;
2006-10-11 12:21:03 +04:00
struct ext4_extent * ex , * fex ;
struct ext4_extent * nearex ; /* nearest extent */
struct ext4_ext_path * npath = NULL ;
2008-01-29 07:58:27 +03:00
int depth , len , err ;
ext4_lblk_t next ;
2007-07-18 05:42:41 +04:00
unsigned uninitialized = 0 ;
2011-05-25 15:41:26 +04:00
int flags = 0 ;
2006-10-11 12:21:03 +04:00
2010-03-02 19:46:09 +03:00
if ( unlikely ( ext4_ext_get_actual_len ( newext ) = = 0 ) ) {
EXT4_ERROR_INODE ( inode , " ext4_ext_get_actual_len(newext) == 0 " ) ;
return - EIO ;
}
2006-10-11 12:21:03 +04:00
depth = ext_depth ( inode ) ;
ex = path [ depth ] . p_ext ;
2010-03-02 19:46:09 +03:00
if ( unlikely ( path [ depth ] . p_hdr = = NULL ) ) {
EXT4_ERROR_INODE ( inode , " path[%d].p_hdr == NULL " , depth ) ;
return - EIO ;
}
2006-10-11 12:21:03 +04:00
/* try to insert block into found extent and return */
2010-03-05 00:14:02 +03:00
if ( ex & & ! ( flag & EXT4_GET_BLOCKS_PRE_IO )
2009-09-28 23:49:08 +04:00
& & ext4_can_extents_be_merged ( inode , ex , newext ) ) {
2011-11-02 02:56:41 +04:00
ext_debug ( " append [%d]%d block to %u:[%d]%d (from %llu) \n " ,
2010-10-28 05:30:14 +04:00
ext4_ext_is_uninitialized ( newext ) ,
ext4_ext_get_actual_len ( newext ) ,
le32_to_cpu ( ex - > ee_block ) ,
ext4_ext_is_uninitialized ( ex ) ,
ext4_ext_get_actual_len ( ex ) ,
ext4_ext_pblock ( ex ) ) ;
2006-12-07 07:41:33 +03:00
err = ext4_ext_get_access ( handle , inode , path + depth ) ;
if ( err )
2006-10-11 12:21:03 +04:00
return err ;
2007-07-18 05:42:41 +04:00
/*
* ext4_can_extents_be_merged should have checked that either
* both extents are uninitialized , or both aren ' t . Thus we
* need to check only one of them here .
*/
if ( ext4_ext_is_uninitialized ( ex ) )
uninitialized = 1 ;
ex - > ee_len = cpu_to_le16 ( ext4_ext_get_actual_len ( ex )
+ ext4_ext_get_actual_len ( newext ) ) ;
if ( uninitialized )
ext4_ext_mark_uninitialized ( ex ) ;
2006-10-11 12:21:03 +04:00
eh = path [ depth ] . p_hdr ;
nearex = ex ;
goto merge ;
}
depth = ext_depth ( inode ) ;
eh = path [ depth ] . p_hdr ;
if ( le16_to_cpu ( eh - > eh_entries ) < le16_to_cpu ( eh - > eh_max ) )
goto has_space ;
/* probably next leaf has space for us? */
fex = EXT_LAST_EXTENT ( eh ) ;
2011-07-12 02:24:01 +04:00
next = EXT_MAX_BLOCKS ;
if ( le32_to_cpu ( newext - > ee_block ) > le32_to_cpu ( fex - > ee_block ) )
2011-07-24 05:49:07 +04:00
next = ext4_ext_next_leaf_block ( path ) ;
2011-07-12 02:24:01 +04:00
if ( next ! = EXT_MAX_BLOCKS ) {
2011-11-02 02:56:41 +04:00
ext_debug ( " next leaf block - %u \n " , next ) ;
2006-10-11 12:21:03 +04:00
BUG_ON ( npath ! = NULL ) ;
npath = ext4_ext_find_extent ( inode , next , NULL ) ;
if ( IS_ERR ( npath ) )
return PTR_ERR ( npath ) ;
BUG_ON ( npath - > p_depth ! = path - > p_depth ) ;
eh = npath [ depth ] . p_hdr ;
if ( le16_to_cpu ( eh - > eh_entries ) < le16_to_cpu ( eh - > eh_max ) ) {
2011-03-31 05:57:33 +04:00
ext_debug ( " next leaf isn't full(%d) \n " ,
2006-10-11 12:21:03 +04:00
le16_to_cpu ( eh - > eh_entries ) ) ;
path = npath ;
2011-07-11 19:43:59 +04:00
goto has_space ;
2006-10-11 12:21:03 +04:00
}
ext_debug ( " next leaf has no free space(%d,%d) \n " ,
le16_to_cpu ( eh - > eh_entries ) , le16_to_cpu ( eh - > eh_max ) ) ;
}
/*
2006-10-11 12:21:07 +04:00
* There is no free space in the found leaf .
* We ' re gonna add a new leaf in the tree .
2006-10-11 12:21:03 +04:00
*/
2011-05-25 15:41:26 +04:00
if ( flag & EXT4_GET_BLOCKS_PUNCH_OUT_EXT )
flags = EXT4_MB_USE_ROOT_BLOCKS ;
err = ext4_ext_create_new_leaf ( handle , inode , flags , path , newext ) ;
2006-10-11 12:21:03 +04:00
if ( err )
goto cleanup ;
depth = ext_depth ( inode ) ;
eh = path [ depth ] . p_hdr ;
has_space :
nearex = path [ depth ] . p_ext ;
2006-12-07 07:41:33 +03:00
err = ext4_ext_get_access ( handle , inode , path + depth ) ;
if ( err )
2006-10-11 12:21:03 +04:00
goto cleanup ;
if ( ! nearex ) {
/* there is no extent in this leaf, create first one */
2011-11-02 02:56:41 +04:00
ext_debug ( " first extent in the leaf: %u:%llu:[%d]%d \n " ,
2007-05-24 21:04:54 +04:00
le32_to_cpu ( newext - > ee_block ) ,
2010-10-28 05:30:14 +04:00
ext4_ext_pblock ( newext ) ,
2009-09-18 21:34:55 +04:00
ext4_ext_is_uninitialized ( newext ) ,
2007-07-18 05:42:41 +04:00
ext4_ext_get_actual_len ( newext ) ) ;
2011-10-27 19:52:18 +04:00
nearex = EXT_FIRST_EXTENT ( eh ) ;
} else {
if ( le32_to_cpu ( newext - > ee_block )
2007-05-24 21:04:54 +04:00
> le32_to_cpu ( nearex - > ee_block ) ) {
2011-10-27 19:52:18 +04:00
/* Insert after */
2011-11-02 02:56:41 +04:00
ext_debug ( " insert %u:%llu:[%d]%d before: "
" nearest %p \n " ,
2011-10-27 19:52:18 +04:00
le32_to_cpu ( newext - > ee_block ) ,
ext4_ext_pblock ( newext ) ,
ext4_ext_is_uninitialized ( newext ) ,
ext4_ext_get_actual_len ( newext ) ,
nearex ) ;
nearex + + ;
} else {
/* Insert before */
BUG_ON ( newext - > ee_block = = nearex - > ee_block ) ;
2011-11-02 02:56:41 +04:00
ext_debug ( " insert %u:%llu:[%d]%d after: "
" nearest %p \n " ,
2007-05-24 21:04:54 +04:00
le32_to_cpu ( newext - > ee_block ) ,
2010-10-28 05:30:14 +04:00
ext4_ext_pblock ( newext ) ,
2009-09-18 21:34:55 +04:00
ext4_ext_is_uninitialized ( newext ) ,
2007-07-18 05:42:41 +04:00
ext4_ext_get_actual_len ( newext ) ,
2011-10-27 19:52:18 +04:00
nearex ) ;
}
len = EXT_LAST_EXTENT ( eh ) - nearex + 1 ;
if ( len > 0 ) {
2011-11-02 02:56:41 +04:00
ext_debug ( " insert %u:%llu:[%d]%d: "
2011-10-27 19:52:18 +04:00
" move %d extents from 0x%p to 0x%p \n " ,
le32_to_cpu ( newext - > ee_block ) ,
ext4_ext_pblock ( newext ) ,
ext4_ext_is_uninitialized ( newext ) ,
ext4_ext_get_actual_len ( newext ) ,
len , nearex , nearex + 1 ) ;
memmove ( nearex + 1 , nearex ,
len * sizeof ( struct ext4_extent ) ) ;
2006-10-11 12:21:03 +04:00
}
}
2008-04-17 18:38:59 +04:00
le16_add_cpu ( & eh - > eh_entries , 1 ) ;
2011-10-27 19:52:18 +04:00
path [ depth ] . p_ext = nearex ;
2006-10-11 12:21:03 +04:00
nearex - > ee_block = newext - > ee_block ;
2010-10-28 05:30:14 +04:00
ext4_ext_store_pblock ( nearex , ext4_ext_pblock ( newext ) ) ;
2006-10-11 12:21:03 +04:00
nearex - > ee_len = newext - > ee_len ;
merge :
/* try to merge extents to the right */
2010-03-05 00:14:02 +03:00
if ( ! ( flag & EXT4_GET_BLOCKS_PRE_IO ) )
2009-09-28 23:49:08 +04:00
ext4_ext_try_to_merge ( inode , path , nearex ) ;
2006-10-11 12:21:03 +04:00
/* try to merge extents to the left */
/* time to correct all indexes above */
err = ext4_ext_correct_indexes ( handle , inode , path ) ;
if ( err )
goto cleanup ;
err = ext4_ext_dirty ( handle , inode , path + depth ) ;
cleanup :
if ( npath ) {
ext4_ext_drop_refs ( npath ) ;
kfree ( npath ) ;
}
ext4_ext_invalidate_cache ( inode ) ;
return err ;
}
2010-10-28 05:30:14 +04:00
static int ext4_ext_walk_space ( struct inode * inode , ext4_lblk_t block ,
ext4_lblk_t num , ext_prepare_callback func ,
void * cbdata )
2008-10-07 08:46:36 +04:00
{
struct ext4_ext_path * path = NULL ;
struct ext4_ext_cache cbex ;
struct ext4_extent * ex ;
ext4_lblk_t next , start = 0 , end = 0 ;
ext4_lblk_t last = block + num ;
int depth , exists , err = 0 ;
BUG_ON ( func = = NULL ) ;
BUG_ON ( inode = = NULL ) ;
2011-06-06 08:05:17 +04:00
while ( block < last & & block ! = EXT_MAX_BLOCKS ) {
2008-10-07 08:46:36 +04:00
num = last - block ;
/* find extent for this block */
2009-12-10 05:30:02 +03:00
down_read ( & EXT4_I ( inode ) - > i_data_sem ) ;
2008-10-07 08:46:36 +04:00
path = ext4_ext_find_extent ( inode , block , path ) ;
2009-12-10 05:30:02 +03:00
up_read ( & EXT4_I ( inode ) - > i_data_sem ) ;
2008-10-07 08:46:36 +04:00
if ( IS_ERR ( path ) ) {
err = PTR_ERR ( path ) ;
path = NULL ;
break ;
}
depth = ext_depth ( inode ) ;
2010-03-02 19:46:09 +03:00
if ( unlikely ( path [ depth ] . p_hdr = = NULL ) ) {
EXT4_ERROR_INODE ( inode , " path[%d].p_hdr == NULL " , depth ) ;
err = - EIO ;
break ;
}
2008-10-07 08:46:36 +04:00
ex = path [ depth ] . p_ext ;
next = ext4_ext_next_allocated_block ( path ) ;
exists = 0 ;
if ( ! ex ) {
/* there is no extent yet, so try to allocate
* all requested space */
start = block ;
end = block + num ;
} else if ( le32_to_cpu ( ex - > ee_block ) > block ) {
/* need to allocate space before found extent */
start = block ;
end = le32_to_cpu ( ex - > ee_block ) ;
if ( block + num < end )
end = block + num ;
} else if ( block > = le32_to_cpu ( ex - > ee_block )
+ ext4_ext_get_actual_len ( ex ) ) {
/* need to allocate space after found extent */
start = block ;
end = block + num ;
if ( end > = next )
end = next ;
} else if ( block > = le32_to_cpu ( ex - > ee_block ) ) {
/*
* some part of requested space is covered
* by found extent
*/
start = block ;
end = le32_to_cpu ( ex - > ee_block )
+ ext4_ext_get_actual_len ( ex ) ;
if ( block + num < end )
end = block + num ;
exists = 1 ;
} else {
BUG ( ) ;
}
BUG_ON ( end < = start ) ;
if ( ! exists ) {
cbex . ec_block = start ;
cbex . ec_len = end - start ;
cbex . ec_start = 0 ;
} else {
cbex . ec_block = le32_to_cpu ( ex - > ee_block ) ;
cbex . ec_len = ext4_ext_get_actual_len ( ex ) ;
2010-10-28 05:30:14 +04:00
cbex . ec_start = ext4_ext_pblock ( ex ) ;
2008-10-07 08:46:36 +04:00
}
2010-03-02 19:46:09 +03:00
if ( unlikely ( cbex . ec_len = = 0 ) ) {
EXT4_ERROR_INODE ( inode , " cbex.ec_len == 0 " ) ;
err = - EIO ;
break ;
}
2011-06-06 08:06:52 +04:00
err = func ( inode , next , & cbex , ex , cbdata ) ;
2008-10-07 08:46:36 +04:00
ext4_ext_drop_refs ( path ) ;
if ( err < 0 )
break ;
if ( err = = EXT_REPEAT )
continue ;
else if ( err = = EXT_BREAK ) {
err = 0 ;
break ;
}
if ( ext_depth ( inode ) ! = depth ) {
/* depth was changed. we have to realloc path */
kfree ( path ) ;
path = NULL ;
}
block = cbex . ec_block + cbex . ec_len ;
}
if ( path ) {
ext4_ext_drop_refs ( path ) ;
kfree ( path ) ;
}
return err ;
}
2006-12-07 07:41:36 +03:00
static void
2008-01-29 07:58:27 +03:00
ext4_ext_put_in_cache ( struct inode * inode , ext4_lblk_t block ,
2011-01-10 20:13:26 +03:00
__u32 len , ext4_fsblk_t start )
2006-10-11 12:21:03 +04:00
{
struct ext4_ext_cache * cex ;
BUG_ON ( len = = 0 ) ;
2009-05-15 17:07:28 +04:00
spin_lock ( & EXT4_I ( inode ) - > i_block_reservation_lock ) ;
2011-09-10 03:18:51 +04:00
trace_ext4_ext_put_in_cache ( inode , block , len , start ) ;
2006-10-11 12:21:03 +04:00
cex = & EXT4_I ( inode ) - > i_cached_extent ;
cex - > ec_block = block ;
cex - > ec_len = len ;
cex - > ec_start = start ;
2009-05-15 17:07:28 +04:00
spin_unlock ( & EXT4_I ( inode ) - > i_block_reservation_lock ) ;
2006-10-11 12:21:03 +04:00
}
/*
2006-10-11 12:21:07 +04:00
* ext4_ext_put_gap_in_cache :
* calculate boundaries of the gap that the requested block fits into
2006-10-11 12:21:03 +04:00
* and cache this gap
*/
2006-12-07 07:41:36 +03:00
static void
2006-10-11 12:21:03 +04:00
ext4_ext_put_gap_in_cache ( struct inode * inode , struct ext4_ext_path * path ,
2008-01-29 07:58:27 +03:00
ext4_lblk_t block )
2006-10-11 12:21:03 +04:00
{
int depth = ext_depth ( inode ) ;
2008-01-29 07:58:27 +03:00
unsigned long len ;
ext4_lblk_t lblock ;
2006-10-11 12:21:03 +04:00
struct ext4_extent * ex ;
ex = path [ depth ] . p_ext ;
if ( ex = = NULL ) {
/* there is no extent yet, so gap is [0;-] */
lblock = 0 ;
2011-06-06 08:05:17 +04:00
len = EXT_MAX_BLOCKS ;
2006-10-11 12:21:03 +04:00
ext_debug ( " cache gap(whole file): " ) ;
} else if ( block < le32_to_cpu ( ex - > ee_block ) ) {
lblock = block ;
len = le32_to_cpu ( ex - > ee_block ) - block ;
2008-01-29 07:58:27 +03:00
ext_debug ( " cache gap(before): %u [%u:%u] " ,
block ,
le32_to_cpu ( ex - > ee_block ) ,
ext4_ext_get_actual_len ( ex ) ) ;
2006-10-11 12:21:03 +04:00
} else if ( block > = le32_to_cpu ( ex - > ee_block )
2007-07-18 05:42:41 +04:00
+ ext4_ext_get_actual_len ( ex ) ) {
2008-01-29 07:58:27 +03:00
ext4_lblk_t next ;
2007-05-24 21:04:54 +04:00
lblock = le32_to_cpu ( ex - > ee_block )
2007-07-18 05:42:41 +04:00
+ ext4_ext_get_actual_len ( ex ) ;
2008-01-29 07:58:27 +03:00
next = ext4_ext_next_allocated_block ( path ) ;
2008-01-29 07:58:27 +03:00
ext_debug ( " cache gap(after): [%u:%u] %u " ,
le32_to_cpu ( ex - > ee_block ) ,
ext4_ext_get_actual_len ( ex ) ,
block ) ;
2008-01-29 07:58:27 +03:00
BUG_ON ( next = = lblock ) ;
len = next - lblock ;
2006-10-11 12:21:03 +04:00
} else {
lblock = len = 0 ;
BUG ( ) ;
}
2008-01-29 07:58:27 +03:00
ext_debug ( " -> %u:%lu \n " , lblock , len ) ;
2011-01-10 20:13:26 +03:00
ext4_ext_put_in_cache ( inode , lblock , len , 0 ) ;
2006-10-11 12:21:03 +04:00
}
2011-01-10 20:13:26 +03:00
/*
2011-07-24 05:53:25 +04:00
* ext4_ext_check_cache ( )
2011-05-25 15:41:50 +04:00
* Checks to see if the given block is in the cache .
* If it is , the cached extent is stored in the given
* cache extent pointer . If the cached extent is a hole ,
* this routine should be used instead of
* ext4_ext_in_cache if the calling function needs to
* know the size of the hole .
*
* @ inode : The files inode
* @ block : The block to look for in the cache
* @ ex : Pointer where the cached extent will be stored
* if it contains block
*
2011-01-10 20:13:26 +03:00
* Return 0 if cache is invalid ; 1 if the cache is valid
*/
2011-05-25 15:41:50 +04:00
static int ext4_ext_check_cache ( struct inode * inode , ext4_lblk_t block ,
struct ext4_ext_cache * ex ) {
2006-10-11 12:21:03 +04:00
struct ext4_ext_cache * cex ;
2011-05-23 05:24:16 +04:00
struct ext4_sb_info * sbi ;
2011-01-10 20:13:26 +03:00
int ret = 0 ;
2006-10-11 12:21:03 +04:00
2010-05-17 15:00:00 +04:00
/*
2009-05-15 17:07:28 +04:00
* We borrow i_block_reservation_lock to protect i_cached_extent
*/
spin_lock ( & EXT4_I ( inode ) - > i_block_reservation_lock ) ;
2006-10-11 12:21:03 +04:00
cex = & EXT4_I ( inode ) - > i_cached_extent ;
2011-05-23 05:24:16 +04:00
sbi = EXT4_SB ( inode - > i_sb ) ;
2006-10-11 12:21:03 +04:00
/* has cache valid data? */
2011-01-10 20:13:26 +03:00
if ( cex - > ec_len = = 0 )
2009-05-15 17:07:28 +04:00
goto errout ;
2006-10-11 12:21:03 +04:00
2010-03-04 07:55:01 +03:00
if ( in_range ( block , cex - > ec_block , cex - > ec_len ) ) {
2011-05-25 15:41:50 +04:00
memcpy ( ex , cex , sizeof ( struct ext4_ext_cache ) ) ;
2008-01-29 07:58:27 +03:00
ext_debug ( " %u cached by %u:%u:%llu \n " ,
block ,
cex - > ec_block , cex - > ec_len , cex - > ec_start ) ;
2011-01-10 20:13:26 +03:00
ret = 1 ;
2006-10-11 12:21:03 +04:00
}
2009-05-15 17:07:28 +04:00
errout :
2011-05-23 05:24:16 +04:00
if ( ! ret )
sbi - > extent_cache_misses + + ;
else
sbi - > extent_cache_hits + + ;
2011-09-10 03:18:51 +04:00
trace_ext4_ext_in_cache ( inode , block , ret ) ;
2009-05-15 17:07:28 +04:00
spin_unlock ( & EXT4_I ( inode ) - > i_block_reservation_lock ) ;
return ret ;
2006-10-11 12:21:03 +04:00
}
2011-05-25 15:41:50 +04:00
/*
* ext4_ext_in_cache ( )
* Checks to see if the given block is in the cache .
* If it is , the cached extent is stored in the given
* extent pointer .
*
* @ inode : The files inode
* @ block : The block to look for in the cache
* @ ex : Pointer where the cached extent will be stored
* if it contains block
*
* Return 0 if cache is invalid ; 1 if the cache is valid
*/
static int
ext4_ext_in_cache ( struct inode * inode , ext4_lblk_t block ,
struct ext4_extent * ex )
{
struct ext4_ext_cache cex ;
int ret = 0 ;
if ( ext4_ext_check_cache ( inode , block , & cex ) ) {
ex - > ee_block = cpu_to_le32 ( cex . ec_block ) ;
ext4_ext_store_pblock ( ex , cex . ec_start ) ;
ex - > ee_len = cpu_to_le16 ( cex . ec_len ) ;
ret = 1 ;
}
return ret ;
}
2006-10-11 12:21:03 +04:00
/*
2006-10-11 12:21:07 +04:00
* ext4_ext_rm_idx :
* removes index from the index block .
2006-10-11 12:21:03 +04:00
*/
2008-01-29 07:58:27 +03:00
static int ext4_ext_rm_idx ( handle_t * handle , struct inode * inode ,
2006-10-11 12:21:03 +04:00
struct ext4_ext_path * path )
{
int err ;
2006-10-11 12:21:05 +04:00
ext4_fsblk_t leaf ;
2006-10-11 12:21:03 +04:00
/* free index block */
path - - ;
2010-10-28 05:30:14 +04:00
leaf = ext4_idx_pblock ( path - > p_idx ) ;
2010-03-02 19:46:09 +03:00
if ( unlikely ( path - > p_hdr - > eh_entries = = 0 ) ) {
EXT4_ERROR_INODE ( inode , " path->p_hdr->eh_entries == 0 " ) ;
return - EIO ;
}
2006-12-07 07:41:33 +03:00
err = ext4_ext_get_access ( handle , inode , path ) ;
if ( err )
2006-10-11 12:21:03 +04:00
return err ;
2011-07-28 05:29:33 +04:00
if ( path - > p_idx ! = EXT_LAST_INDEX ( path - > p_hdr ) ) {
int len = EXT_LAST_INDEX ( path - > p_hdr ) - path - > p_idx ;
len * = sizeof ( struct ext4_extent_idx ) ;
memmove ( path - > p_idx , path - > p_idx + 1 , len ) ;
}
2008-04-17 18:38:59 +04:00
le16_add_cpu ( & path - > p_hdr - > eh_entries , - 1 ) ;
2006-12-07 07:41:33 +03:00
err = ext4_ext_dirty ( handle , inode , path ) ;
if ( err )
2006-10-11 12:21:03 +04:00
return err ;
2006-10-11 12:21:11 +04:00
ext_debug ( " index is empty, remove it, free block %llu \n " , leaf ) ;
2011-09-10 03:18:51 +04:00
trace_ext4_ext_rm_idx ( inode , leaf ) ;
2011-02-22 05:01:42 +03:00
ext4_free_blocks ( handle , inode , NULL , leaf , 1 ,
2009-11-23 15:17:05 +03:00
EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET ) ;
2006-10-11 12:21:03 +04:00
return err ;
}
/*
2008-08-20 06:16:05 +04:00
* ext4_ext_calc_credits_for_single_extent :
* This routine returns max . credits that needed to insert an extent
* to the extent tree .
* When pass the actual path , the caller should calculate credits
* under i_data_sem .
2006-10-11 12:21:03 +04:00
*/
2008-08-20 06:15:58 +04:00
int ext4_ext_calc_credits_for_single_extent ( struct inode * inode , int nrblocks ,
2006-10-11 12:21:03 +04:00
struct ext4_ext_path * path )
{
if ( path ) {
2008-08-20 06:16:05 +04:00
int depth = ext_depth ( inode ) ;
2008-08-20 06:16:03 +04:00
int ret = 0 ;
2008-08-20 06:16:05 +04:00
2006-10-11 12:21:03 +04:00
/* probably there is space in leaf? */
if ( le16_to_cpu ( path [ depth ] . p_hdr - > eh_entries )
2008-08-20 06:16:05 +04:00
< le16_to_cpu ( path [ depth ] . p_hdr - > eh_max ) ) {
2006-10-11 12:21:03 +04:00
2008-08-20 06:16:05 +04:00
/*
* There are some space in the leaf tree , no
* need to account for leaf block credit
*
* bitmaps and block group descriptor blocks
2011-10-08 23:53:49 +04:00
* and other metadata blocks still need to be
2008-08-20 06:16:05 +04:00
* accounted .
*/
2008-08-20 06:15:58 +04:00
/* 1 bitmap, 1 block group descriptor */
2008-08-20 06:16:05 +04:00
ret = 2 + EXT4_META_TRANS_BLOCKS ( inode - > i_sb ) ;
2009-07-06 07:12:04 +04:00
return ret ;
2008-08-20 06:16:05 +04:00
}
}
2006-10-11 12:21:03 +04:00
2008-08-20 06:15:58 +04:00
return ext4_chunk_trans_blocks ( inode , nrblocks ) ;
2008-08-20 06:16:05 +04:00
}
2006-10-11 12:21:03 +04:00
2008-08-20 06:16:05 +04:00
/*
* How many index / leaf blocks need to change / allocate to modify nrblocks ?
*
* if nrblocks are fit in a single extent ( chunk flag is 1 ) , then
* in the worse case , each tree level index / leaf need to be changed
* if the tree split due to insert a new extent , then the old tree
* index / leaf need to be updated too
*
* If the nrblocks are discontiguous , they could cause
* the whole tree split more than once , but this is really rare .
*/
2008-08-20 06:15:58 +04:00
int ext4_ext_index_trans_blocks ( struct inode * inode , int nrblocks , int chunk )
2008-08-20 06:16:05 +04:00
{
int index ;
int depth = ext_depth ( inode ) ;
2006-10-11 12:21:03 +04:00
2008-08-20 06:16:05 +04:00
if ( chunk )
index = depth * 2 ;
else
index = depth * 3 ;
2006-10-11 12:21:03 +04:00
2008-08-20 06:16:05 +04:00
return index ;
2006-10-11 12:21:03 +04:00
}
static int ext4_remove_blocks ( handle_t * handle , struct inode * inode ,
2011-09-10 02:54:51 +04:00
struct ext4_extent * ex ,
ext4_fsblk_t * partial_cluster ,
ext4_lblk_t from , ext4_lblk_t to )
2006-10-11 12:21:03 +04:00
{
2011-09-10 02:54:51 +04:00
struct ext4_sb_info * sbi = EXT4_SB ( inode - > i_sb ) ;
2007-07-18 05:42:41 +04:00
unsigned short ee_len = ext4_ext_get_actual_len ( ex ) ;
2011-09-10 02:54:51 +04:00
ext4_fsblk_t pblk ;
2009-11-23 15:17:05 +03:00
int flags = EXT4_FREE_BLOCKS_FORGET ;
2006-10-11 12:21:03 +04:00
2008-01-29 08:19:52 +03:00
if ( S_ISDIR ( inode - > i_mode ) | | S_ISLNK ( inode - > i_mode ) )
2009-11-23 15:17:05 +03:00
flags | = EXT4_FREE_BLOCKS_METADATA ;
2011-09-10 02:54:51 +04:00
/*
* For bigalloc file systems , we never free a partial cluster
* at the beginning of the extent . Instead , we make a note
* that we tried freeing the cluster , and check to see if we
* need to free it on a subsequent call to ext4_remove_blocks ,
* or at the end of the ext4_truncate ( ) operation .
*/
flags | = EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER ;
2011-09-10 03:18:51 +04:00
trace_ext4_remove_blocks ( inode , ex , from , to , * partial_cluster ) ;
2011-09-10 02:54:51 +04:00
/*
* If we have a partial cluster , and it ' s different from the
* cluster of the last block , we need to explicitly free the
* partial cluster here .
*/
pblk = ext4_ext_pblock ( ex ) + ee_len - 1 ;
if ( * partial_cluster & & ( EXT4_B2C ( sbi , pblk ) ! = * partial_cluster ) ) {
ext4_free_blocks ( handle , inode , NULL ,
EXT4_C2B ( sbi , * partial_cluster ) ,
sbi - > s_cluster_ratio , flags ) ;
* partial_cluster = 0 ;
}
2006-10-11 12:21:03 +04:00
# ifdef EXTENTS_STATS
{
struct ext4_sb_info * sbi = EXT4_SB ( inode - > i_sb ) ;
spin_lock ( & sbi - > s_ext_stats_lock ) ;
sbi - > s_ext_blocks + = ee_len ;
sbi - > s_ext_extents + + ;
if ( ee_len < sbi - > s_ext_min )
sbi - > s_ext_min = ee_len ;
if ( ee_len > sbi - > s_ext_max )
sbi - > s_ext_max = ee_len ;
if ( ext_depth ( inode ) > sbi - > s_depth_max )
sbi - > s_depth_max = ext_depth ( inode ) ;
spin_unlock ( & sbi - > s_ext_stats_lock ) ;
}
# endif
if ( from > = le32_to_cpu ( ex - > ee_block )
2007-07-18 05:42:41 +04:00
& & to = = le32_to_cpu ( ex - > ee_block ) + ee_len - 1 ) {
2006-10-11 12:21:03 +04:00
/* tail removal */
2008-01-29 07:58:27 +03:00
ext4_lblk_t num ;
2007-07-18 05:42:41 +04:00
num = le32_to_cpu ( ex - > ee_block ) + ee_len - from ;
2011-09-10 02:54:51 +04:00
pblk = ext4_ext_pblock ( ex ) + ee_len - num ;
ext_debug ( " free last %u blocks starting %llu \n " , num , pblk ) ;
ext4_free_blocks ( handle , inode , NULL , pblk , num , flags ) ;
/*
* If the block range to be freed didn ' t start at the
* beginning of a cluster , and we removed the entire
* extent , save the partial cluster here , since we
* might need to delete if we determine that the
* truncate operation has removed all of the blocks in
* the cluster .
*/
if ( pblk & ( sbi - > s_cluster_ratio - 1 ) & &
( ee_len = = num ) )
* partial_cluster = EXT4_B2C ( sbi , pblk ) ;
else
* partial_cluster = 0 ;
2006-10-11 12:21:03 +04:00
} else if ( from = = le32_to_cpu ( ex - > ee_block )
2007-07-18 05:42:41 +04:00
& & to < = le32_to_cpu ( ex - > ee_block ) + ee_len - 1 ) {
2011-05-25 15:41:43 +04:00
/* head removal */
ext4_lblk_t num ;
ext4_fsblk_t start ;
num = to - from ;
start = ext4_ext_pblock ( ex ) ;
ext_debug ( " free first %u blocks starting %llu \n " , num , start ) ;
2011-10-18 19:01:51 +04:00
ext4_free_blocks ( handle , inode , NULL , start , num , flags ) ;
2011-05-25 15:41:43 +04:00
2006-10-11 12:21:03 +04:00
} else {
2008-01-29 07:58:27 +03:00
printk ( KERN_INFO " strange request: removal(2) "
" %u-%u from %u:%u \n " ,
from , to , le32_to_cpu ( ex - > ee_block ) , ee_len ) ;
2006-10-11 12:21:03 +04:00
}
return 0 ;
}
2011-05-25 15:41:43 +04:00
/*
* ext4_ext_rm_leaf ( ) Removes the extents associated with the
* blocks appearing between " start " and " end " , and splits the extents
* if " start " and " end " appear in the same extent
*
* @ handle : The journal handle
* @ inode : The files inode
* @ path : The path to the leaf
* @ start : The first block to remove
* @ end : The last block to remove
*/
2006-10-11 12:21:03 +04:00
static int
ext4_ext_rm_leaf ( handle_t * handle , struct inode * inode ,
2011-09-10 02:54:51 +04:00
struct ext4_ext_path * path , ext4_fsblk_t * partial_cluster ,
ext4_lblk_t start , ext4_lblk_t end )
2006-10-11 12:21:03 +04:00
{
2011-09-10 02:54:51 +04:00
struct ext4_sb_info * sbi = EXT4_SB ( inode - > i_sb ) ;
2006-10-11 12:21:03 +04:00
int err = 0 , correct_index = 0 ;
int depth = ext_depth ( inode ) , credits ;
struct ext4_extent_header * eh ;
2011-10-25 13:35:05 +04:00
ext4_lblk_t a , b ;
2008-01-29 07:58:27 +03:00
unsigned num ;
ext4_lblk_t ex_ee_block ;
2006-10-11 12:21:03 +04:00
unsigned short ex_ee_len ;
2007-07-18 05:42:41 +04:00
unsigned uninitialized = 0 ;
2006-10-11 12:21:03 +04:00
struct ext4_extent * ex ;
2007-07-18 17:19:09 +04:00
/* the header must be checked already in ext4_ext_remove_space() */
2008-01-29 07:58:27 +03:00
ext_debug ( " truncate since %u in leaf \n " , start ) ;
2006-10-11 12:21:03 +04:00
if ( ! path [ depth ] . p_hdr )
path [ depth ] . p_hdr = ext_block_hdr ( path [ depth ] . p_bh ) ;
eh = path [ depth ] . p_hdr ;
2010-03-02 19:46:09 +03:00
if ( unlikely ( path [ depth ] . p_hdr = = NULL ) ) {
EXT4_ERROR_INODE ( inode , " path[%d].p_hdr == NULL " , depth ) ;
return - EIO ;
}
2006-10-11 12:21:03 +04:00
/* find where to start removing */
ex = EXT_LAST_EXTENT ( eh ) ;
ex_ee_block = le32_to_cpu ( ex - > ee_block ) ;
2007-07-18 05:42:41 +04:00
ex_ee_len = ext4_ext_get_actual_len ( ex ) ;
2006-10-11 12:21:03 +04:00
2011-09-10 03:18:51 +04:00
trace_ext4_ext_rm_leaf ( inode , start , ex , * partial_cluster ) ;
2006-10-11 12:21:03 +04:00
while ( ex > = EXT_FIRST_EXTENT ( eh ) & &
ex_ee_block + ex_ee_len > start ) {
2009-06-10 22:22:55 +04:00
if ( ext4_ext_is_uninitialized ( ex ) )
uninitialized = 1 ;
else
uninitialized = 0 ;
2009-09-18 21:34:55 +04:00
ext_debug ( " remove ext %u:[%d]%d \n " , ex_ee_block ,
uninitialized , ex_ee_len ) ;
2006-10-11 12:21:03 +04:00
path [ depth ] . p_ext = ex ;
a = ex_ee_block > start ? ex_ee_block : start ;
2011-05-25 15:41:43 +04:00
b = ex_ee_block + ex_ee_len - 1 < end ?
ex_ee_block + ex_ee_len - 1 : end ;
2006-10-11 12:21:03 +04:00
ext_debug ( " border %u:%u \n " , a , b ) ;
2011-05-25 15:41:43 +04:00
/* If this extent is beyond the end of the hole, skip it */
if ( end < = ex_ee_block ) {
ex - - ;
ex_ee_block = le32_to_cpu ( ex - > ee_block ) ;
ex_ee_len = ext4_ext_get_actual_len ( ex ) ;
continue ;
2011-10-25 13:35:05 +04:00
} else if ( b ! = ex_ee_block + ex_ee_len - 1 ) {
EXT4_ERROR_INODE ( inode , " bad truncate %u:%u \n " ,
start , end ) ;
err = - EIO ;
goto out ;
2006-10-11 12:21:03 +04:00
} else if ( a ! = ex_ee_block ) {
/* remove tail of the extent */
2011-10-25 13:35:05 +04:00
num = a - ex_ee_block ;
2006-10-11 12:21:03 +04:00
} else {
/* remove whole extent: excellent! */
num = 0 ;
}
2008-08-02 05:59:19 +04:00
/*
* 3 for leaf , sb , and inode plus 2 ( bmap and group
* descriptor ) for each block group ; assume two block
* groups plus ex_ee_len / blocks_per_block_group for
* the worst case
*/
credits = 7 + 2 * ( ex_ee_len / EXT4_BLOCKS_PER_GROUP ( inode - > i_sb ) ) ;
2006-10-11 12:21:03 +04:00
if ( ex = = EXT_FIRST_EXTENT ( eh ) ) {
correct_index = 1 ;
credits + = ( ext_depth ( inode ) ) + 1 ;
}
2009-12-09 06:42:15 +03:00
credits + = EXT4_MAXQUOTAS_TRANS_BLOCKS ( inode - > i_sb ) ;
2006-10-11 12:21:03 +04:00
2009-08-18 06:17:20 +04:00
err = ext4_ext_truncate_extend_restart ( handle , inode , credits ) ;
2008-07-12 03:27:31 +04:00
if ( err )
2006-10-11 12:21:03 +04:00
goto out ;
err = ext4_ext_get_access ( handle , inode , path + depth ) ;
if ( err )
goto out ;
2011-09-10 02:54:51 +04:00
err = ext4_remove_blocks ( handle , inode , ex , partial_cluster ,
a , b ) ;
2006-10-11 12:21:03 +04:00
if ( err )
goto out ;
2011-10-25 13:35:05 +04:00
if ( num = = 0 )
2006-10-11 12:21:07 +04:00
/* this extent is removed; mark slot entirely unused */
2006-10-11 12:21:05 +04:00
ext4_ext_store_pblock ( ex , 0 ) ;
2006-10-11 12:21:03 +04:00
ex - > ee_len = cpu_to_le16 ( num ) ;
2007-07-18 17:02:56 +04:00
/*
* Do not mark uninitialized if all the blocks in the
* extent have been removed .
*/
if ( uninitialized & & num )
2007-07-18 05:42:41 +04:00
ext4_ext_mark_uninitialized ( ex ) ;
2011-05-25 15:41:43 +04:00
/*
* If the extent was completely released ,
* we need to remove it from the leaf
*/
if ( num = = 0 ) {
2011-06-06 08:05:17 +04:00
if ( end ! = EXT_MAX_BLOCKS - 1 ) {
2011-05-25 15:41:43 +04:00
/*
* For hole punching , we need to scoot all the
* extents up when an extent is removed so that
* we dont have blank extents in the middle
*/
memmove ( ex , ex + 1 , ( EXT_LAST_EXTENT ( eh ) - ex ) *
sizeof ( struct ext4_extent ) ) ;
/* Now get rid of the one at the end */
memset ( EXT_LAST_EXTENT ( eh ) , 0 ,
sizeof ( struct ext4_extent ) ) ;
}
le16_add_cpu ( & eh - > eh_entries , - 1 ) ;
2011-09-10 02:54:51 +04:00
} else
* partial_cluster = 0 ;
2011-05-25 15:41:43 +04:00
2011-10-25 13:35:05 +04:00
err = ext4_ext_dirty ( handle , inode , path + depth ) ;
if ( err )
goto out ;
2006-10-11 12:21:11 +04:00
ext_debug ( " new extent: %u:%u:%llu \n " , block , num ,
2010-10-28 05:30:14 +04:00
ext4_ext_pblock ( ex ) ) ;
2006-10-11 12:21:03 +04:00
ex - - ;
ex_ee_block = le32_to_cpu ( ex - > ee_block ) ;
2007-07-18 05:42:41 +04:00
ex_ee_len = ext4_ext_get_actual_len ( ex ) ;
2006-10-11 12:21:03 +04:00
}
if ( correct_index & & eh - > eh_entries )
err = ext4_ext_correct_indexes ( handle , inode , path ) ;
2011-09-10 02:54:51 +04:00
/*
* If there is still a entry in the leaf node , check to see if
* it references the partial cluster . This is the only place
* where it could ; if it doesn ' t , we can free the cluster .
*/
if ( * partial_cluster & & ex > = EXT_FIRST_EXTENT ( eh ) & &
( EXT4_B2C ( sbi , ext4_ext_pblock ( ex ) + ex_ee_len - 1 ) ! =
* partial_cluster ) ) {
int flags = EXT4_FREE_BLOCKS_FORGET ;
if ( S_ISDIR ( inode - > i_mode ) | | S_ISLNK ( inode - > i_mode ) )
flags | = EXT4_FREE_BLOCKS_METADATA ;
ext4_free_blocks ( handle , inode , NULL ,
EXT4_C2B ( sbi , * partial_cluster ) ,
sbi - > s_cluster_ratio , flags ) ;
* partial_cluster = 0 ;
}
2006-10-11 12:21:03 +04:00
/* if this leaf is free, then we should
* remove it from index block above */
if ( err = = 0 & & eh - > eh_entries = = 0 & & path [ depth ] . p_bh ! = NULL )
err = ext4_ext_rm_idx ( handle , inode , path + depth ) ;
out :
return err ;
}
/*
2006-10-11 12:21:07 +04:00
* ext4_ext_more_to_rm :
* returns 1 if current index has to be freed ( even partial )
2006-10-11 12:21:03 +04:00
*/
2006-12-07 07:41:36 +03:00
static int
2006-10-11 12:21:03 +04:00
ext4_ext_more_to_rm ( struct ext4_ext_path * path )
{
BUG_ON ( path - > p_idx = = NULL ) ;
if ( path - > p_idx < EXT_FIRST_INDEX ( path - > p_hdr ) )
return 0 ;
/*
2006-10-11 12:21:07 +04:00
* if truncate on deeper level happened , it wasn ' t partial ,
2006-10-11 12:21:03 +04:00
* so we have to consider current index for truncation
*/
if ( le16_to_cpu ( path - > p_hdr - > eh_entries ) = = path - > p_block )
return 0 ;
return 1 ;
}
2011-07-18 07:21:03 +04:00
static int ext4_ext_remove_space ( struct inode * inode , ext4_lblk_t start )
2006-10-11 12:21:03 +04:00
{
struct super_block * sb = inode - > i_sb ;
int depth = ext_depth ( inode ) ;
struct ext4_ext_path * path ;
2011-09-10 02:54:51 +04:00
ext4_fsblk_t partial_cluster = 0 ;
2006-10-11 12:21:03 +04:00
handle_t * handle ;
2010-05-17 09:00:00 +04:00
int i , err ;
2006-10-11 12:21:03 +04:00
2008-01-29 07:58:27 +03:00
ext_debug ( " truncate since %u \n " , start ) ;
2006-10-11 12:21:03 +04:00
/* probably first extent we're gonna free will be last in block */
handle = ext4_journal_start ( inode , depth + 1 ) ;
if ( IS_ERR ( handle ) )
return PTR_ERR ( handle ) ;
2010-05-17 09:00:00 +04:00
again :
2006-10-11 12:21:03 +04:00
ext4_ext_invalidate_cache ( inode ) ;
2011-09-10 03:18:51 +04:00
trace_ext4_ext_remove_space ( inode , start , depth ) ;
2006-10-11 12:21:03 +04:00
/*
2006-10-11 12:21:07 +04:00
* We start scanning from right side , freeing all the blocks
* after i_size and walking into the tree depth - wise .
2006-10-11 12:21:03 +04:00
*/
2010-05-17 09:00:00 +04:00
depth = ext_depth ( inode ) ;
2008-04-30 06:02:02 +04:00
path = kzalloc ( sizeof ( struct ext4_ext_path ) * ( depth + 1 ) , GFP_NOFS ) ;
2006-10-11 12:21:03 +04:00
if ( path = = NULL ) {
ext4_journal_stop ( handle ) ;
return - ENOMEM ;
}
2010-05-17 09:00:00 +04:00
path [ 0 ] . p_depth = depth ;
2006-10-11 12:21:03 +04:00
path [ 0 ] . p_hdr = ext_inode_hdr ( inode ) ;
2009-03-12 16:51:20 +03:00
if ( ext4_ext_check ( inode , path [ 0 ] . p_hdr , depth ) ) {
2006-10-11 12:21:03 +04:00
err = - EIO ;
goto out ;
}
2010-05-17 09:00:00 +04:00
i = err = 0 ;
2006-10-11 12:21:03 +04:00
while ( i > = 0 & & err = = 0 ) {
if ( i = = depth ) {
/* this is leaf block */
2011-05-25 15:41:43 +04:00
err = ext4_ext_rm_leaf ( handle , inode , path ,
2011-09-10 02:54:51 +04:00
& partial_cluster , start ,
EXT_MAX_BLOCKS - 1 ) ;
2006-10-11 12:21:07 +04:00
/* root level has p_bh == NULL, brelse() eats this */
2006-10-11 12:21:03 +04:00
brelse ( path [ i ] . p_bh ) ;
path [ i ] . p_bh = NULL ;
i - - ;
continue ;
}
/* this is index block */
if ( ! path [ i ] . p_hdr ) {
ext_debug ( " initialize header \n " ) ;
path [ i ] . p_hdr = ext_block_hdr ( path [ i ] . p_bh ) ;
}
if ( ! path [ i ] . p_idx ) {
2006-10-11 12:21:07 +04:00
/* this level hasn't been touched yet */
2006-10-11 12:21:03 +04:00
path [ i ] . p_idx = EXT_LAST_INDEX ( path [ i ] . p_hdr ) ;
path [ i ] . p_block = le16_to_cpu ( path [ i ] . p_hdr - > eh_entries ) + 1 ;
ext_debug ( " init index ptr: hdr 0x%p, num %d \n " ,
path [ i ] . p_hdr ,
le16_to_cpu ( path [ i ] . p_hdr - > eh_entries ) ) ;
} else {
2006-10-11 12:21:07 +04:00
/* we were already here, see at next index */
2006-10-11 12:21:03 +04:00
path [ i ] . p_idx - - ;
}
ext_debug ( " level %d - index, first 0x%p, cur 0x%p \n " ,
i , EXT_FIRST_INDEX ( path [ i ] . p_hdr ) ,
path [ i ] . p_idx ) ;
if ( ext4_ext_more_to_rm ( path + i ) ) {
2007-07-18 17:19:09 +04:00
struct buffer_head * bh ;
2006-10-11 12:21:03 +04:00
/* go to the next level */
2006-10-11 12:21:11 +04:00
ext_debug ( " move to level %d (block %llu) \n " ,
2010-10-28 05:30:14 +04:00
i + 1 , ext4_idx_pblock ( path [ i ] . p_idx ) ) ;
2006-10-11 12:21:03 +04:00
memset ( path + i + 1 , 0 , sizeof ( * path ) ) ;
2010-10-28 05:30:14 +04:00
bh = sb_bread ( sb , ext4_idx_pblock ( path [ i ] . p_idx ) ) ;
2007-07-18 17:19:09 +04:00
if ( ! bh ) {
2006-10-11 12:21:03 +04:00
/* should we reset i_size? */
err = - EIO ;
break ;
}
2007-07-18 17:19:09 +04:00
if ( WARN_ON ( i + 1 > depth ) ) {
err = - EIO ;
break ;
}
2009-03-12 16:51:20 +03:00
if ( ext4_ext_check ( inode , ext_block_hdr ( bh ) ,
2007-07-18 17:19:09 +04:00
depth - i - 1 ) ) {
err = - EIO ;
break ;
}
path [ i + 1 ] . p_bh = bh ;
2006-10-11 12:21:03 +04:00
2006-10-11 12:21:07 +04:00
/* save actual number of indexes since this
* number is changed at the next iteration */
2006-10-11 12:21:03 +04:00
path [ i ] . p_block = le16_to_cpu ( path [ i ] . p_hdr - > eh_entries ) ;
i + + ;
} else {
2006-10-11 12:21:07 +04:00
/* we finished processing this index, go up */
2006-10-11 12:21:03 +04:00
if ( path [ i ] . p_hdr - > eh_entries = = 0 & & i > 0 ) {
2006-10-11 12:21:07 +04:00
/* index is empty, remove it;
2006-10-11 12:21:03 +04:00
* handle must be already prepared by the
* truncatei_leaf ( ) */
err = ext4_ext_rm_idx ( handle , inode , path + i ) ;
}
2006-10-11 12:21:07 +04:00
/* root level has p_bh == NULL, brelse() eats this */
2006-10-11 12:21:03 +04:00
brelse ( path [ i ] . p_bh ) ;
path [ i ] . p_bh = NULL ;
i - - ;
ext_debug ( " return to level %d \n " , i ) ;
}
}
2011-09-10 03:18:51 +04:00
trace_ext4_ext_remove_space_done ( inode , start , depth , partial_cluster ,
path - > p_hdr - > eh_entries ) ;
2011-09-10 03:04:51 +04:00
/* If we still have something in the partial cluster and we have removed
* even the first extent , then we should free the blocks in the partial
* cluster as well . */
if ( partial_cluster & & path - > p_hdr - > eh_entries = = 0 ) {
int flags = EXT4_FREE_BLOCKS_FORGET ;
if ( S_ISDIR ( inode - > i_mode ) | | S_ISLNK ( inode - > i_mode ) )
flags | = EXT4_FREE_BLOCKS_METADATA ;
ext4_free_blocks ( handle , inode , NULL ,
EXT4_C2B ( EXT4_SB ( sb ) , partial_cluster ) ,
EXT4_SB ( sb ) - > s_cluster_ratio , flags ) ;
partial_cluster = 0 ;
}
2006-10-11 12:21:03 +04:00
/* TODO: flexible tree reduction should be here */
if ( path - > p_hdr - > eh_entries = = 0 ) {
/*
2006-10-11 12:21:07 +04:00
* truncate to zero freed all the tree ,
* so we need to correct eh_depth
2006-10-11 12:21:03 +04:00
*/
err = ext4_ext_get_access ( handle , inode , path ) ;
if ( err = = 0 ) {
ext_inode_hdr ( inode ) - > eh_depth = 0 ;
ext_inode_hdr ( inode ) - > eh_max =
2009-08-28 18:40:33 +04:00
cpu_to_le16 ( ext4_ext_space_root ( inode , 0 ) ) ;
2006-10-11 12:21:03 +04:00
err = ext4_ext_dirty ( handle , inode , path ) ;
}
}
out :
ext4_ext_drop_refs ( path ) ;
kfree ( path ) ;
2010-05-17 09:00:00 +04:00
if ( err = = - EAGAIN )
goto again ;
2006-10-11 12:21:03 +04:00
ext4_journal_stop ( handle ) ;
return err ;
}
/*
* called at mount time
*/
void ext4_ext_init ( struct super_block * sb )
{
/*
* possible initialization would be here
*/
2009-01-06 22:53:16 +03:00
if ( EXT4_HAS_INCOMPAT_FEATURE ( sb , EXT4_FEATURE_INCOMPAT_EXTENTS ) ) {
2009-09-29 23:51:30 +04:00
# if defined(AGGRESSIVE_TEST) || defined(CHECK_BINSEARCH) || defined(EXTENTS_STATS)
2008-09-09 07:00:52 +04:00
printk ( KERN_INFO " EXT4-fs: file extents enabled " ) ;
2007-02-17 21:20:16 +03:00
# ifdef AGGRESSIVE_TEST
printk ( " , aggressive tests " ) ;
2006-10-11 12:21:03 +04:00
# endif
# ifdef CHECK_BINSEARCH
printk ( " , check binsearch " ) ;
# endif
# ifdef EXTENTS_STATS
printk ( " , stats " ) ;
# endif
printk ( " \n " ) ;
2009-09-29 23:51:30 +04:00
# endif
2006-10-11 12:21:03 +04:00
# ifdef EXTENTS_STATS
spin_lock_init ( & EXT4_SB ( sb ) - > s_ext_stats_lock ) ;
EXT4_SB ( sb ) - > s_ext_min = 1 < < 30 ;
EXT4_SB ( sb ) - > s_ext_max = 0 ;
# endif
}
}
/*
* called at umount time
*/
void ext4_ext_release ( struct super_block * sb )
{
2009-01-06 22:53:16 +03:00
if ( ! EXT4_HAS_INCOMPAT_FEATURE ( sb , EXT4_FEATURE_INCOMPAT_EXTENTS ) )
2006-10-11 12:21:03 +04:00
return ;
# ifdef EXTENTS_STATS
if ( EXT4_SB ( sb ) - > s_ext_blocks & & EXT4_SB ( sb ) - > s_ext_extents ) {
struct ext4_sb_info * sbi = EXT4_SB ( sb ) ;
printk ( KERN_ERR " EXT4-fs: %lu blocks in %lu extents (%lu ave) \n " ,
sbi - > s_ext_blocks , sbi - > s_ext_extents ,
sbi - > s_ext_blocks / sbi - > s_ext_extents ) ;
printk ( KERN_ERR " EXT4-fs: extents: %lu min, %lu max, max depth %lu \n " ,
sbi - > s_ext_min , sbi - > s_ext_max , sbi - > s_depth_max ) ;
}
# endif
}
2008-04-29 16:11:12 +04:00
/* FIXME!! we need to try to merge to left or right after zero-out */
static int ext4_ext_zeroout ( struct inode * inode , struct ext4_extent * ex )
{
2010-10-28 05:30:06 +04:00
ext4_fsblk_t ee_pblock ;
unsigned int ee_len ;
2010-05-12 08:00:00 +04:00
int ret ;
2008-04-29 16:11:12 +04:00
ee_len = ext4_ext_get_actual_len ( ex ) ;
2010-10-28 05:30:14 +04:00
ee_pblock = ext4_ext_pblock ( ex ) ;
2010-05-12 08:00:00 +04:00
2010-10-28 07:44:47 +04:00
ret = sb_issue_zeroout ( inode - > i_sb , ee_pblock , ee_len , GFP_NOFS ) ;
2010-10-28 05:30:06 +04:00
if ( ret > 0 )
ret = 0 ;
2008-04-29 16:11:12 +04:00
2010-10-28 05:30:06 +04:00
return ret ;
2008-04-29 16:11:12 +04:00
}
2011-05-03 20:23:07 +04:00
/*
* used by extent splitting .
*/
# define EXT4_EXT_MAY_ZEROOUT 0x1 / * safe to zeroout if split fails \
due to ENOSPC */
# define EXT4_EXT_MARK_UNINIT1 0x2 /* mark first half uninitialized */
# define EXT4_EXT_MARK_UNINIT2 0x4 /* mark second half uninitialized */
/*
* ext4_split_extent_at ( ) splits an extent at given block .
*
* @ handle : the journal handle
* @ inode : the file inode
* @ path : the path to the extent
* @ split : the logical block where the extent is splitted .
* @ split_flags : indicates if the extent could be zeroout if split fails , and
* the states ( init or uninit ) of new extents .
* @ flags : flags used to insert new extent to extent tree .
*
*
* Splits extent [ a , b ] into two extents [ a , @ split ) and [ @ split , b ] , states
* of which are deterimined by split_flag .
*
* There are two cases :
* a > the extent are splitted into two extent .
* b > split is not needed , and just mark the extent .
*
* return 0 on success .
*/
static int ext4_split_extent_at ( handle_t * handle ,
struct inode * inode ,
struct ext4_ext_path * path ,
ext4_lblk_t split ,
int split_flag ,
int flags )
{
ext4_fsblk_t newblock ;
ext4_lblk_t ee_block ;
struct ext4_extent * ex , newex , orig_ex ;
struct ext4_extent * ex2 = NULL ;
unsigned int ee_len , depth ;
int err = 0 ;
ext_debug ( " ext4_split_extents_at: inode %lu, logical "
" block %llu \n " , inode - > i_ino , ( unsigned long long ) split ) ;
ext4_ext_show_leaf ( inode , path ) ;
depth = ext_depth ( inode ) ;
ex = path [ depth ] . p_ext ;
ee_block = le32_to_cpu ( ex - > ee_block ) ;
ee_len = ext4_ext_get_actual_len ( ex ) ;
newblock = split - ee_block + ext4_ext_pblock ( ex ) ;
BUG_ON ( split < ee_block | | split > = ( ee_block + ee_len ) ) ;
err = ext4_ext_get_access ( handle , inode , path + depth ) ;
if ( err )
goto out ;
if ( split = = ee_block ) {
/*
* case b : block @ split is the block that the extent begins with
* then we just change the state of the extent , and splitting
* is not needed .
*/
if ( split_flag & EXT4_EXT_MARK_UNINIT2 )
ext4_ext_mark_uninitialized ( ex ) ;
else
ext4_ext_mark_initialized ( ex ) ;
if ( ! ( flags & EXT4_GET_BLOCKS_PRE_IO ) )
ext4_ext_try_to_merge ( inode , path , ex ) ;
err = ext4_ext_dirty ( handle , inode , path + depth ) ;
goto out ;
}
/* case a */
memcpy ( & orig_ex , ex , sizeof ( orig_ex ) ) ;
ex - > ee_len = cpu_to_le16 ( split - ee_block ) ;
if ( split_flag & EXT4_EXT_MARK_UNINIT1 )
ext4_ext_mark_uninitialized ( ex ) ;
/*
* path may lead to new leaf , not to original leaf any more
* after ext4_ext_insert_extent ( ) returns ,
*/
err = ext4_ext_dirty ( handle , inode , path + depth ) ;
if ( err )
goto fix_extent_len ;
ex2 = & newex ;
ex2 - > ee_block = cpu_to_le32 ( split ) ;
ex2 - > ee_len = cpu_to_le16 ( ee_len - ( split - ee_block ) ) ;
ext4_ext_store_pblock ( ex2 , newblock ) ;
if ( split_flag & EXT4_EXT_MARK_UNINIT2 )
ext4_ext_mark_uninitialized ( ex2 ) ;
err = ext4_ext_insert_extent ( handle , inode , path , & newex , flags ) ;
if ( err = = - ENOSPC & & ( EXT4_EXT_MAY_ZEROOUT & split_flag ) ) {
err = ext4_ext_zeroout ( inode , & orig_ex ) ;
if ( err )
goto fix_extent_len ;
/* update the extent length and mark as initialized */
ex - > ee_len = cpu_to_le32 ( ee_len ) ;
ext4_ext_try_to_merge ( inode , path , ex ) ;
err = ext4_ext_dirty ( handle , inode , path + depth ) ;
goto out ;
} else if ( err )
goto fix_extent_len ;
out :
ext4_ext_show_leaf ( inode , path ) ;
return err ;
fix_extent_len :
ex - > ee_len = orig_ex . ee_len ;
ext4_ext_dirty ( handle , inode , path + depth ) ;
return err ;
}
/*
* ext4_split_extents ( ) splits an extent and mark extent which is covered
* by @ map as split_flags indicates
*
* It may result in splitting the extent into multiple extents ( upto three )
* There are three possibilities :
* a > There is no split required
* b > Splits in two extents : Split is happening at either end of the extent
* c > Splits in three extents : Somone is splitting in middle of the extent
*
*/
static int ext4_split_extent ( handle_t * handle ,
struct inode * inode ,
struct ext4_ext_path * path ,
struct ext4_map_blocks * map ,
int split_flag ,
int flags )
{
ext4_lblk_t ee_block ;
struct ext4_extent * ex ;
unsigned int ee_len , depth ;
int err = 0 ;
int uninitialized ;
int split_flag1 , flags1 ;
depth = ext_depth ( inode ) ;
ex = path [ depth ] . p_ext ;
ee_block = le32_to_cpu ( ex - > ee_block ) ;
ee_len = ext4_ext_get_actual_len ( ex ) ;
uninitialized = ext4_ext_is_uninitialized ( ex ) ;
if ( map - > m_lblk + map - > m_len < ee_block + ee_len ) {
split_flag1 = split_flag & EXT4_EXT_MAY_ZEROOUT ?
EXT4_EXT_MAY_ZEROOUT : 0 ;
flags1 = flags | EXT4_GET_BLOCKS_PRE_IO ;
if ( uninitialized )
split_flag1 | = EXT4_EXT_MARK_UNINIT1 |
EXT4_EXT_MARK_UNINIT2 ;
err = ext4_split_extent_at ( handle , inode , path ,
map - > m_lblk + map - > m_len , split_flag1 , flags1 ) ;
2011-05-23 04:49:12 +04:00
if ( err )
goto out ;
2011-05-03 20:23:07 +04:00
}
ext4_ext_drop_refs ( path ) ;
path = ext4_ext_find_extent ( inode , map - > m_lblk , path ) ;
if ( IS_ERR ( path ) )
return PTR_ERR ( path ) ;
if ( map - > m_lblk > = ee_block ) {
split_flag1 = split_flag & EXT4_EXT_MAY_ZEROOUT ?
EXT4_EXT_MAY_ZEROOUT : 0 ;
if ( uninitialized )
split_flag1 | = EXT4_EXT_MARK_UNINIT1 ;
if ( split_flag & EXT4_EXT_MARK_UNINIT2 )
split_flag1 | = EXT4_EXT_MARK_UNINIT2 ;
err = ext4_split_extent_at ( handle , inode , path ,
map - > m_lblk , split_flag1 , flags ) ;
if ( err )
goto out ;
}
ext4_ext_show_leaf ( inode , path ) ;
out :
return err ? err : map - > m_len ;
}
2008-04-17 18:38:59 +04:00
# define EXT4_EXT_ZERO_LEN 7
2007-07-18 05:42:38 +04:00
/*
2010-05-17 03:00:00 +04:00
* This function is called by ext4_ext_map_blocks ( ) if someone tries to write
2007-07-18 05:42:38 +04:00
* to an uninitialized extent . It may result in splitting the uninitialized
2011-03-31 05:57:33 +04:00
* extent into multiple extents ( up to three - one initialized and two
2007-07-18 05:42:38 +04:00
* uninitialized ) .
* There are three possibilities :
* a > There is no split required : Entire extent should be initialized
* b > Splits in two extents : Write is happening at either end of the extent
* c > Splits in three extents : Somone is writing in middle of the extent
2011-10-27 19:43:23 +04:00
*
* Pre - conditions :
* - The extent pointed to by ' path ' is uninitialized .
* - The extent pointed to by ' path ' contains a superset
* of the logical span [ map - > m_lblk , map - > m_lblk + map - > m_len ) .
*
* Post - conditions on success :
* - the returned value is the number of blocks beyond map - > l_lblk
* that are allocated and initialized .
* It is guaranteed to be > = map - > m_len .
2007-07-18 05:42:38 +04:00
*/
2008-01-29 07:58:27 +03:00
static int ext4_ext_convert_to_initialized ( handle_t * handle ,
2010-05-17 03:00:00 +04:00
struct inode * inode ,
struct ext4_map_blocks * map ,
struct ext4_ext_path * path )
2007-07-18 05:42:38 +04:00
{
2011-10-27 19:43:23 +04:00
struct ext4_extent_header * eh ;
2011-05-03 20:25:07 +04:00
struct ext4_map_blocks split_map ;
struct ext4_extent zero_ex ;
struct ext4_extent * ex ;
2010-05-16 14:00:00 +04:00
ext4_lblk_t ee_block , eof_block ;
2011-10-26 11:42:36 +04:00
unsigned int ee_len , depth ;
int allocated ;
2007-07-18 05:42:38 +04:00
int err = 0 ;
2011-05-03 20:25:07 +04:00
int split_flag = 0 ;
2010-05-16 14:00:00 +04:00
ext_debug ( " ext4_ext_convert_to_initialized: inode %lu, logical "
" block %llu, max_blocks %u \n " , inode - > i_ino ,
2010-05-17 03:00:00 +04:00
( unsigned long long ) map - > m_lblk , map - > m_len ) ;
2010-05-16 14:00:00 +04:00
eof_block = ( inode - > i_size + inode - > i_sb - > s_blocksize - 1 ) > >
inode - > i_sb - > s_blocksize_bits ;
2010-05-17 03:00:00 +04:00
if ( eof_block < map - > m_lblk + map - > m_len )
eof_block = map - > m_lblk + map - > m_len ;
2007-07-18 05:42:38 +04:00
depth = ext_depth ( inode ) ;
2011-10-27 19:43:23 +04:00
eh = path [ depth ] . p_hdr ;
2007-07-18 05:42:38 +04:00
ex = path [ depth ] . p_ext ;
ee_block = le32_to_cpu ( ex - > ee_block ) ;
ee_len = ext4_ext_get_actual_len ( ex ) ;
2010-05-17 03:00:00 +04:00
allocated = ee_len - ( map - > m_lblk - ee_block ) ;
2007-07-18 05:42:38 +04:00
2011-10-27 19:43:23 +04:00
trace_ext4_ext_convert_to_initialized_enter ( inode , map , ex ) ;
/* Pre-conditions */
BUG_ON ( ! ext4_ext_is_uninitialized ( ex ) ) ;
BUG_ON ( ! in_range ( map - > m_lblk , ee_block , ee_len ) ) ;
BUG_ON ( map - > m_lblk + map - > m_len > ee_block + ee_len ) ;
/*
* Attempt to transfer newly initialized blocks from the currently
* uninitialized extent to its left neighbor . This is much cheaper
* than an insertion followed by a merge as those involve costly
* memmove ( ) calls . This is the common case in steady state for
* workloads doing fallocate ( FALLOC_FL_KEEP_SIZE ) followed by append
* writes .
*
* Limitations of the current logic :
* - L1 : we only deal with writes at the start of the extent .
* The approach could be extended to writes at the end
* of the extent but this scenario was deemed less common .
* - L2 : we do not deal with writes covering the whole extent .
* This would require removing the extent if the transfer
* is possible .
* - L3 : we only attempt to merge with an extent stored in the
* same extent tree node .
*/
if ( ( map - > m_lblk = = ee_block ) & & /*L1*/
( map - > m_len < ee_len ) & & /*L2*/
( ex > EXT_FIRST_EXTENT ( eh ) ) ) { /*L3*/
struct ext4_extent * prev_ex ;
ext4_lblk_t prev_lblk ;
ext4_fsblk_t prev_pblk , ee_pblk ;
unsigned int prev_len , write_len ;
prev_ex = ex - 1 ;
prev_lblk = le32_to_cpu ( prev_ex - > ee_block ) ;
prev_len = ext4_ext_get_actual_len ( prev_ex ) ;
prev_pblk = ext4_ext_pblock ( prev_ex ) ;
ee_pblk = ext4_ext_pblock ( ex ) ;
write_len = map - > m_len ;
/*
* A transfer of blocks from ' ex ' to ' prev_ex ' is allowed
* upon those conditions :
* - C1 : prev_ex is initialized ,
* - C2 : prev_ex is logically abutting ex ,
* - C3 : prev_ex is physically abutting ex ,
* - C4 : prev_ex can receive the additional blocks without
* overflowing the ( initialized ) length limit .
*/
if ( ( ! ext4_ext_is_uninitialized ( prev_ex ) ) & & /*C1*/
( ( prev_lblk + prev_len ) = = ee_block ) & & /*C2*/
( ( prev_pblk + prev_len ) = = ee_pblk ) & & /*C3*/
( prev_len < ( EXT_INIT_MAX_LEN - write_len ) ) ) { /*C4*/
err = ext4_ext_get_access ( handle , inode , path + depth ) ;
if ( err )
goto out ;
trace_ext4_ext_convert_to_initialized_fastpath ( inode ,
map , ex , prev_ex ) ;
/* Shift the start of ex by 'write_len' blocks */
ex - > ee_block = cpu_to_le32 ( ee_block + write_len ) ;
ext4_ext_store_pblock ( ex , ee_pblk + write_len ) ;
ex - > ee_len = cpu_to_le16 ( ee_len - write_len ) ;
ext4_ext_mark_uninitialized ( ex ) ; /* Restore the flag */
/* Extend prev_ex by 'write_len' blocks */
prev_ex - > ee_len = cpu_to_le16 ( prev_len + write_len ) ;
/* Mark the block containing both extents as dirty */
ext4_ext_dirty ( handle , inode , path + depth ) ;
/* Update path to point to the right extent */
path [ depth ] . p_ext = prev_ex ;
/* Result: number of initialized blocks past m_lblk */
allocated = write_len ;
goto out ;
}
}
2011-05-03 20:25:07 +04:00
WARN_ON ( map - > m_lblk < ee_block ) ;
2010-05-16 14:00:00 +04:00
/*
* It is safe to convert extent to initialized via explicit
* zeroout only if extent is fully insde i_size or new_size .
*/
2011-05-03 20:25:07 +04:00
split_flag | = ee_block + ee_len < = eof_block ? EXT4_EXT_MAY_ZEROOUT : 0 ;
2010-05-16 14:00:00 +04:00
2008-04-17 18:38:59 +04:00
/* If extent has less than 2*EXT4_EXT_ZERO_LEN zerout directly */
2011-05-03 20:25:07 +04:00
if ( ee_len < = 2 * EXT4_EXT_ZERO_LEN & &
( EXT4_EXT_MAY_ZEROOUT & split_flag ) ) {
err = ext4_ext_zeroout ( inode , ex ) ;
2008-04-17 18:38:59 +04:00
if ( err )
2008-08-03 02:51:32 +04:00
goto out ;
err = ext4_ext_get_access ( handle , inode , path + depth ) ;
if ( err )
goto out ;
2011-05-03 20:25:07 +04:00
ext4_ext_mark_initialized ( ex ) ;
ext4_ext_try_to_merge ( inode , path , ex ) ;
err = ext4_ext_dirty ( handle , inode , path + depth ) ;
goto out ;
2007-07-18 05:42:38 +04:00
}
2011-05-03 20:25:07 +04:00
2007-07-18 05:42:38 +04:00
/*
2011-05-03 20:25:07 +04:00
* four cases :
* 1. split the extent into three extents .
* 2. split the extent into two extents , zeroout the first half .
* 3. split the extent into two extents , zeroout the second half .
* 4. split the extent into two extents with out zeroout .
2007-07-18 05:42:38 +04:00
*/
2011-05-03 20:25:07 +04:00
split_map . m_lblk = map - > m_lblk ;
split_map . m_len = map - > m_len ;
if ( allocated > map - > m_len ) {
if ( allocated < = EXT4_EXT_ZERO_LEN & &
( EXT4_EXT_MAY_ZEROOUT & split_flag ) ) {
/* case 3 */
zero_ex . ee_block =
2011-05-16 18:11:09 +04:00
cpu_to_le32 ( map - > m_lblk ) ;
zero_ex . ee_len = cpu_to_le16 ( allocated ) ;
2011-05-03 20:25:07 +04:00
ext4_ext_store_pblock ( & zero_ex ,
ext4_ext_pblock ( ex ) + map - > m_lblk - ee_block ) ;
err = ext4_ext_zeroout ( inode , & zero_ex ) ;
2007-07-18 05:42:38 +04:00
if ( err )
goto out ;
2011-05-03 20:25:07 +04:00
split_map . m_lblk = map - > m_lblk ;
split_map . m_len = allocated ;
} else if ( ( map - > m_lblk - ee_block + map - > m_len <
EXT4_EXT_ZERO_LEN ) & &
( EXT4_EXT_MAY_ZEROOUT & split_flag ) ) {
/* case 2 */
if ( map - > m_lblk ! = ee_block ) {
zero_ex . ee_block = ex - > ee_block ;
zero_ex . ee_len = cpu_to_le16 ( map - > m_lblk -
ee_block ) ;
ext4_ext_store_pblock ( & zero_ex ,
ext4_ext_pblock ( ex ) ) ;
err = ext4_ext_zeroout ( inode , & zero_ex ) ;
if ( err )
goto out ;
}
split_map . m_lblk = ee_block ;
2011-05-16 18:11:09 +04:00
split_map . m_len = map - > m_lblk - ee_block + map - > m_len ;
allocated = map - > m_len ;
2007-07-18 05:42:38 +04:00
}
}
2011-05-03 20:25:07 +04:00
allocated = ext4_split_extent ( handle , inode , path ,
& split_map , split_flag , 0 ) ;
if ( allocated < 0 )
err = allocated ;
2007-07-18 05:42:38 +04:00
out :
return err ? err : allocated ;
}
2009-09-28 23:49:08 +04:00
/*
2010-05-17 03:00:00 +04:00
* This function is called by ext4_ext_map_blocks ( ) from
2009-09-28 23:49:08 +04:00
* ext4_get_blocks_dio_write ( ) when DIO to write
* to an uninitialized extent .
*
2011-02-15 02:05:43 +03:00
* Writing to an uninitialized extent may result in splitting the uninitialized
tree-wide: fix comment/printk typos
"gadget", "through", "command", "maintain", "maintain", "controller", "address",
"between", "initiali[zs]e", "instead", "function", "select", "already",
"equal", "access", "management", "hierarchy", "registration", "interest",
"relative", "memory", "offset", "already",
Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
2010-11-01 22:38:34 +03:00
* extent into multiple / initialized uninitialized extents ( up to three )
2009-09-28 23:49:08 +04:00
* There are three possibilities :
* a > There is no split required : Entire extent should be uninitialized
* b > Splits in two extents : Write is happening at either end of the extent
* c > Splits in three extents : Somone is writing in middle of the extent
*
* One of more index blocks maybe needed if the extent tree grow after
tree-wide: fix comment/printk typos
"gadget", "through", "command", "maintain", "maintain", "controller", "address",
"between", "initiali[zs]e", "instead", "function", "select", "already",
"equal", "access", "management", "hierarchy", "registration", "interest",
"relative", "memory", "offset", "already",
Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
2010-11-01 22:38:34 +03:00
* the uninitialized extent split . To prevent ENOSPC occur at the IO
2009-09-28 23:49:08 +04:00
* complete , we need to split the uninitialized extent before DIO submit
2010-06-11 14:17:00 +04:00
* the IO . The uninitialized extent called at this time will be split
2009-09-28 23:49:08 +04:00
* into three uninitialized extent ( at most ) . After IO complete , the part
* being filled will be convert to initialized by the end_io callback function
* via ext4_convert_unwritten_extents ( ) .
2009-11-06 12:01:23 +03:00
*
* Returns the size of uninitialized extent to be written on success .
2009-09-28 23:49:08 +04:00
*/
static int ext4_split_unwritten_extents ( handle_t * handle ,
struct inode * inode ,
2010-05-17 03:00:00 +04:00
struct ext4_map_blocks * map ,
2009-09-28 23:49:08 +04:00
struct ext4_ext_path * path ,
int flags )
{
2011-05-03 20:25:07 +04:00
ext4_lblk_t eof_block ;
ext4_lblk_t ee_block ;
struct ext4_extent * ex ;
unsigned int ee_len ;
int split_flag = 0 , depth ;
2010-05-16 14:00:00 +04:00
ext_debug ( " ext4_split_unwritten_extents: inode %lu, logical "
" block %llu, max_blocks %u \n " , inode - > i_ino ,
2010-05-17 03:00:00 +04:00
( unsigned long long ) map - > m_lblk , map - > m_len ) ;
2010-05-16 14:00:00 +04:00
eof_block = ( inode - > i_size + inode - > i_sb - > s_blocksize - 1 ) > >
inode - > i_sb - > s_blocksize_bits ;
2010-05-17 03:00:00 +04:00
if ( eof_block < map - > m_lblk + map - > m_len )
eof_block = map - > m_lblk + map - > m_len ;
2010-05-16 14:00:00 +04:00
/*
* It is safe to convert extent to initialized via explicit
* zeroout only if extent is fully insde i_size or new_size .
*/
2011-05-03 20:25:07 +04:00
depth = ext_depth ( inode ) ;
ex = path [ depth ] . p_ext ;
ee_block = le32_to_cpu ( ex - > ee_block ) ;
ee_len = ext4_ext_get_actual_len ( ex ) ;
2009-09-28 23:49:08 +04:00
2011-05-03 20:25:07 +04:00
split_flag | = ee_block + ee_len < = eof_block ? EXT4_EXT_MAY_ZEROOUT : 0 ;
split_flag | = EXT4_EXT_MARK_UNINIT2 ;
2009-09-28 23:49:08 +04:00
2011-05-03 20:25:07 +04:00
flags | = EXT4_GET_BLOCKS_PRE_IO ;
return ext4_split_extent ( handle , inode , path , map , split_flag , flags ) ;
2009-09-28 23:49:08 +04:00
}
2011-05-03 19:45:29 +04:00
2010-03-02 21:28:44 +03:00
static int ext4_convert_unwritten_extents_endio ( handle_t * handle ,
2009-09-28 23:49:08 +04:00
struct inode * inode ,
struct ext4_ext_path * path )
{
struct ext4_extent * ex ;
int depth ;
int err = 0 ;
depth = ext_depth ( inode ) ;
ex = path [ depth ] . p_ext ;
2011-05-03 19:45:29 +04:00
ext_debug ( " ext4_convert_unwritten_extents_endio: inode %lu, logical "
" block %llu, max_blocks %u \n " , inode - > i_ino ,
( unsigned long long ) le32_to_cpu ( ex - > ee_block ) ,
ext4_ext_get_actual_len ( ex ) ) ;
2009-09-28 23:49:08 +04:00
err = ext4_ext_get_access ( handle , inode , path + depth ) ;
if ( err )
goto out ;
/* first mark the extent as initialized */
ext4_ext_mark_initialized ( ex ) ;
2011-05-03 19:45:29 +04:00
/* note: ext4_ext_correct_indexes() isn't needed here because
* borders are not changed
2009-09-28 23:49:08 +04:00
*/
2011-05-03 19:45:29 +04:00
ext4_ext_try_to_merge ( inode , path , ex ) ;
2009-09-28 23:49:08 +04:00
/* Mark modified extent as dirty */
err = ext4_ext_dirty ( handle , inode , path + depth ) ;
out :
ext4_ext_show_leaf ( inode , path ) ;
return err ;
}
2009-12-30 07:39:06 +03:00
static void unmap_underlying_metadata_blocks ( struct block_device * bdev ,
sector_t block , int count )
{
int i ;
for ( i = 0 ; i < count ; i + + )
unmap_underlying_metadata ( bdev , block + i ) ;
}
2010-10-28 05:23:12 +04:00
/*
* Handle EOFBLOCKS_FL flag , clearing it if necessary
*/
static int check_eofblocks_fl ( handle_t * handle , struct inode * inode ,
2011-01-10 21:03:35 +03:00
ext4_lblk_t lblk ,
2010-10-28 05:23:12 +04:00
struct ext4_ext_path * path ,
unsigned int len )
{
int i , depth ;
struct ext4_extent_header * eh ;
2011-03-23 21:08:27 +03:00
struct ext4_extent * last_ex ;
2010-10-28 05:23:12 +04:00
if ( ! ext4_test_inode_flag ( inode , EXT4_INODE_EOFBLOCKS ) )
return 0 ;
depth = ext_depth ( inode ) ;
eh = path [ depth ] . p_hdr ;
if ( unlikely ( ! eh - > eh_entries ) ) {
EXT4_ERROR_INODE ( inode , " eh->eh_entries == 0 and "
" EOFBLOCKS_FL set " ) ;
return - EIO ;
}
last_ex = EXT_LAST_EXTENT ( eh ) ;
/*
* We should clear the EOFBLOCKS_FL flag if we are writing the
* last block in the last extent in the file . We test this by
* first checking to see if the caller to
* ext4_ext_get_blocks ( ) was interested in the last block ( or
* a block beyond the last block ) in the current extent . If
* this turns out to be false , we can bail out from this
* function immediately .
*/
2011-01-10 21:03:35 +03:00
if ( lblk + len < le32_to_cpu ( last_ex - > ee_block ) +
2010-10-28 05:23:12 +04:00
ext4_ext_get_actual_len ( last_ex ) )
return 0 ;
/*
* If the caller does appear to be planning to write at or
* beyond the end of the current extent , we then test to see
* if the current extent is the last extent in the file , by
* checking to make sure it was reached via the rightmost node
* at each level of the tree .
*/
for ( i = depth - 1 ; i > = 0 ; i - - )
if ( path [ i ] . p_idx ! = EXT_LAST_INDEX ( path [ i ] . p_hdr ) )
return 0 ;
ext4_clear_inode_flag ( inode , EXT4_INODE_EOFBLOCKS ) ;
return ext4_mark_inode_dirty ( handle , inode ) ;
}
2011-09-10 03:04:51 +04:00
/**
* ext4_find_delalloc_range : find delayed allocated block in the given range .
*
* Goes through the buffer heads in the range [ lblk_start , lblk_end ] and returns
* whether there are any buffers marked for delayed allocation . It returns ' 1 '
* on the first delalloc ' ed buffer head found . If no buffer head in the given
* range is marked for delalloc , it returns 0.
* lblk_start should always be < = lblk_end .
* search_hint_reverse is to indicate that searching in reverse from lblk_end to
* lblk_start might be more efficient ( i . e . , we will likely hit the delalloc ' ed
* block sooner ) . This is useful when blocks are truncated sequentially from
* lblk_start towards lblk_end .
*/
static int ext4_find_delalloc_range ( struct inode * inode ,
ext4_lblk_t lblk_start ,
ext4_lblk_t lblk_end ,
int search_hint_reverse )
{
struct address_space * mapping = inode - > i_mapping ;
struct buffer_head * head , * bh = NULL ;
struct page * page ;
ext4_lblk_t i , pg_lblk ;
pgoff_t index ;
/* reverse search wont work if fs block size is less than page size */
if ( inode - > i_blkbits < PAGE_CACHE_SHIFT )
search_hint_reverse = 0 ;
if ( search_hint_reverse )
i = lblk_end ;
else
i = lblk_start ;
index = i > > ( PAGE_CACHE_SHIFT - inode - > i_blkbits ) ;
while ( ( i > = lblk_start ) & & ( i < = lblk_end ) ) {
page = find_get_page ( mapping , index ) ;
2011-09-10 03:20:51 +04:00
if ( ! page )
2011-09-10 03:04:51 +04:00
goto nextpage ;
if ( ! page_has_buffers ( page ) )
goto nextpage ;
head = page_buffers ( page ) ;
if ( ! head )
goto nextpage ;
bh = head ;
pg_lblk = index < < ( PAGE_CACHE_SHIFT -
inode - > i_blkbits ) ;
do {
if ( unlikely ( pg_lblk < lblk_start ) ) {
/*
* This is possible when fs block size is less
* than page size and our cluster starts / ends in
* middle of the page . So we need to skip the
* initial few blocks till we reach the ' lblk '
*/
pg_lblk + + ;
continue ;
}
2011-09-10 03:20:51 +04:00
/* Check if the buffer is delayed allocated and that it
* is not yet mapped . ( when da - buffers are mapped during
* their writeout , their da_mapped bit is set . )
*/
if ( buffer_delay ( bh ) & & ! buffer_da_mapped ( bh ) ) {
2011-09-10 03:04:51 +04:00
page_cache_release ( page ) ;
2011-09-10 03:18:51 +04:00
trace_ext4_find_delalloc_range ( inode ,
lblk_start , lblk_end ,
search_hint_reverse ,
1 , i ) ;
2011-09-10 03:04:51 +04:00
return 1 ;
}
if ( search_hint_reverse )
i - - ;
else
i + + ;
} while ( ( i > = lblk_start ) & & ( i < = lblk_end ) & &
( ( bh = bh - > b_this_page ) ! = head ) ) ;
nextpage :
if ( page )
page_cache_release ( page ) ;
/*
* Move to next page . ' i ' will be the first lblk in the next
* page .
*/
if ( search_hint_reverse )
index - - ;
else
index + + ;
i = index < < ( PAGE_CACHE_SHIFT - inode - > i_blkbits ) ;
}
2011-09-10 03:18:51 +04:00
trace_ext4_find_delalloc_range ( inode , lblk_start , lblk_end ,
search_hint_reverse , 0 , 0 ) ;
2011-09-10 03:04:51 +04:00
return 0 ;
}
int ext4_find_delalloc_cluster ( struct inode * inode , ext4_lblk_t lblk ,
int search_hint_reverse )
{
struct ext4_sb_info * sbi = EXT4_SB ( inode - > i_sb ) ;
ext4_lblk_t lblk_start , lblk_end ;
lblk_start = lblk & ( ~ ( sbi - > s_cluster_ratio - 1 ) ) ;
lblk_end = lblk_start + sbi - > s_cluster_ratio - 1 ;
return ext4_find_delalloc_range ( inode , lblk_start , lblk_end ,
search_hint_reverse ) ;
}
/**
* Determines how many complete clusters ( out of those specified by the ' map ' )
* are under delalloc and were reserved quota for .
* This function is called when we are writing out the blocks that were
* originally written with their allocation delayed , but then the space was
* allocated using fallocate ( ) before the delayed allocation could be resolved .
* The cases to look for are :
* ( ' = ' indicated delayed allocated blocks
* ' - ' indicates non - delayed allocated blocks )
* ( a ) partial clusters towards beginning and / or end outside of allocated range
* are not delalloc ' ed .
* Ex :
* | - - - - c - - - = | = = = = c = = = = | = = = = c = = = = | = = = - c - - - - |
* | + + + + + + allocated + + + + + + |
* = = > 4 complete clusters in above example
*
* ( b ) partial cluster ( outside of allocated range ) towards either end is
* marked for delayed allocation . In this case , we will exclude that
* cluster .
* Ex :
* | - - - - = = = = c = = = = = = = = | = = = = = = = = c = = = = = = = = |
* | + + + + + + allocated + + + + + + |
* = = > 1 complete clusters in above example
*
* Ex :
* | = = = = = = = = = = = = = = = = c = = = = = = = = = = = = = = = = |
* | + + + + + + allocated + + + + + + |
* = = > 0 complete clusters in above example
*
* The ext4_da_update_reserve_space will be called only if we
* determine here that there were some " entire " clusters that span
* this ' allocated ' range .
* In the non - bigalloc case , this function will just end up returning num_blks
* without ever calling ext4_find_delalloc_range .
*/
static unsigned int
get_reserved_cluster_alloc ( struct inode * inode , ext4_lblk_t lblk_start ,
unsigned int num_blks )
{
struct ext4_sb_info * sbi = EXT4_SB ( inode - > i_sb ) ;
ext4_lblk_t alloc_cluster_start , alloc_cluster_end ;
ext4_lblk_t lblk_from , lblk_to , c_offset ;
unsigned int allocated_clusters = 0 ;
alloc_cluster_start = EXT4_B2C ( sbi , lblk_start ) ;
alloc_cluster_end = EXT4_B2C ( sbi , lblk_start + num_blks - 1 ) ;
/* max possible clusters for this allocation */
allocated_clusters = alloc_cluster_end - alloc_cluster_start + 1 ;
2011-09-10 03:18:51 +04:00
trace_ext4_get_reserved_cluster_alloc ( inode , lblk_start , num_blks ) ;
2011-09-10 03:04:51 +04:00
/* Check towards left side */
c_offset = lblk_start & ( sbi - > s_cluster_ratio - 1 ) ;
if ( c_offset ) {
lblk_from = lblk_start & ( ~ ( sbi - > s_cluster_ratio - 1 ) ) ;
lblk_to = lblk_from + c_offset - 1 ;
if ( ext4_find_delalloc_range ( inode , lblk_from , lblk_to , 0 ) )
allocated_clusters - - ;
}
/* Now check towards right. */
c_offset = ( lblk_start + num_blks ) & ( sbi - > s_cluster_ratio - 1 ) ;
if ( allocated_clusters & & c_offset ) {
lblk_from = lblk_start + num_blks ;
lblk_to = lblk_from + ( sbi - > s_cluster_ratio - c_offset ) - 1 ;
if ( ext4_find_delalloc_range ( inode , lblk_from , lblk_to , 0 ) )
allocated_clusters - - ;
}
return allocated_clusters ;
}
2009-09-28 23:49:08 +04:00
static int
ext4_ext_handle_uninitialized_extents ( handle_t * handle , struct inode * inode ,
2010-05-17 03:00:00 +04:00
struct ext4_map_blocks * map ,
2009-09-28 23:49:08 +04:00
struct ext4_ext_path * path , int flags ,
2010-05-17 03:00:00 +04:00
unsigned int allocated , ext4_fsblk_t newblock )
2009-09-28 23:49:08 +04:00
{
int ret = 0 ;
int err = 0 ;
2009-09-28 23:48:29 +04:00
ext4_io_end_t * io = EXT4_I ( inode ) - > cur_aio_dio ;
2009-09-28 23:49:08 +04:00
ext_debug ( " ext4_ext_handle_uninitialized_extents: inode %lu, logical "
" block %llu, max_blocks %u, flags %d, allocated %u " ,
2010-05-17 03:00:00 +04:00
inode - > i_ino , ( unsigned long long ) map - > m_lblk , map - > m_len ,
2009-09-28 23:49:08 +04:00
flags , allocated ) ;
ext4_ext_show_leaf ( inode , path ) ;
2011-09-10 03:18:51 +04:00
trace_ext4_ext_handle_uninitialized_extents ( inode , map , allocated ,
newblock ) ;
2010-03-02 21:28:44 +03:00
/* get_block() before submit the IO, split the extent */
2010-03-05 00:14:02 +03:00
if ( ( flags & EXT4_GET_BLOCKS_PRE_IO ) ) {
2010-05-17 03:00:00 +04:00
ret = ext4_split_unwritten_extents ( handle , inode , map ,
path , flags ) ;
2009-11-10 18:48:04 +03:00
/*
* Flag the inode ( non aio case ) or end_io struct ( aio case )
2011-03-31 05:57:33 +04:00
* that this IO needs to conversion to written when IO is
2009-11-10 18:48:04 +03:00
* completed
*/
2011-11-01 01:30:44 +04:00
if ( io )
ext4_set_io_unwritten_flag ( inode , io ) ;
else
2010-01-24 22:34:07 +03:00
ext4_set_inode_state ( inode , EXT4_STATE_DIO_UNWRITTEN ) ;
2010-03-05 00:14:02 +03:00
if ( ext4_should_dioread_nolock ( inode ) )
2010-05-17 03:00:00 +04:00
map - > m_flags | = EXT4_MAP_UNINIT ;
2009-09-28 23:49:08 +04:00
goto out ;
}
2010-03-02 21:28:44 +03:00
/* IO end_io complete, convert the filled extent to written */
2010-03-05 00:14:02 +03:00
if ( ( flags & EXT4_GET_BLOCKS_CONVERT ) ) {
2010-03-02 21:28:44 +03:00
ret = ext4_convert_unwritten_extents_endio ( handle , inode ,
2009-09-28 23:49:08 +04:00
path ) ;
2010-10-28 05:23:12 +04:00
if ( ret > = 0 ) {
2009-12-09 07:51:10 +03:00
ext4_update_inode_fsync_trans ( handle , inode , 1 ) ;
2011-01-10 21:03:35 +03:00
err = check_eofblocks_fl ( handle , inode , map - > m_lblk ,
path , map - > m_len ) ;
2010-10-28 05:23:12 +04:00
} else
err = ret ;
2009-09-28 23:49:08 +04:00
goto out2 ;
}
/* buffered IO case */
/*
* repeat fallocate creation request
* we already have an unwritten extent
*/
if ( flags & EXT4_GET_BLOCKS_UNINIT_EXT )
goto map_out ;
/* buffered READ or buffered write_begin() lookup */
if ( ( flags & EXT4_GET_BLOCKS_CREATE ) = = 0 ) {
/*
* We have blocks reserved already . We
* return allocated blocks so that delalloc
* won ' t do block reservation for us . But
* the buffer head will be unmapped so that
* a read from the block returns 0 s .
*/
2010-05-17 03:00:00 +04:00
map - > m_flags | = EXT4_MAP_UNWRITTEN ;
2009-09-28 23:49:08 +04:00
goto out1 ;
}
/* buffered write, writepage time, convert*/
2010-05-17 03:00:00 +04:00
ret = ext4_ext_convert_to_initialized ( handle , inode , map , path ) ;
2011-10-25 16:15:12 +04:00
if ( ret > = 0 )
2009-12-09 07:51:10 +03:00
ext4_update_inode_fsync_trans ( handle , inode , 1 ) ;
2009-09-28 23:49:08 +04:00
out :
if ( ret < = 0 ) {
err = ret ;
goto out2 ;
} else
allocated = ret ;
2010-05-17 03:00:00 +04:00
map - > m_flags | = EXT4_MAP_NEW ;
2009-12-30 07:39:06 +03:00
/*
* if we allocated more blocks than requested
* we need to make sure we unmap the extra block
* allocated . The actual needed block will get
* unmapped later when we find the buffer_head marked
* new .
*/
2010-05-17 03:00:00 +04:00
if ( allocated > map - > m_len ) {
2009-12-30 07:39:06 +03:00
unmap_underlying_metadata_blocks ( inode - > i_sb - > s_bdev ,
2010-05-17 03:00:00 +04:00
newblock + map - > m_len ,
allocated - map - > m_len ) ;
allocated = map - > m_len ;
2009-12-30 07:39:06 +03:00
}
2010-01-25 12:00:31 +03:00
/*
* If we have done fallocate with the offset that is already
* delayed allocated , we would have block reservation
* and quota reservation done in the delayed write path .
* But fallocate would have already updated quota and block
* count for this offset . So cancel these reservation
*/
2011-09-10 03:04:51 +04:00
if ( flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE ) {
unsigned int reserved_clusters ;
reserved_clusters = get_reserved_cluster_alloc ( inode ,
map - > m_lblk , map - > m_len ) ;
if ( reserved_clusters )
ext4_da_update_reserve_space ( inode ,
reserved_clusters ,
0 ) ;
}
2010-01-25 12:00:31 +03:00
2009-09-28 23:49:08 +04:00
map_out :
2010-05-17 03:00:00 +04:00
map - > m_flags | = EXT4_MAP_MAPPED ;
2011-10-25 16:15:12 +04:00
if ( ( flags & EXT4_GET_BLOCKS_KEEP_SIZE ) = = 0 ) {
err = check_eofblocks_fl ( handle , inode , map - > m_lblk , path ,
map - > m_len ) ;
if ( err < 0 )
goto out2 ;
}
2009-09-28 23:49:08 +04:00
out1 :
2010-05-17 03:00:00 +04:00
if ( allocated > map - > m_len )
allocated = map - > m_len ;
2009-09-28 23:49:08 +04:00
ext4_ext_show_leaf ( inode , path ) ;
2010-05-17 03:00:00 +04:00
map - > m_pblk = newblock ;
map - > m_len = allocated ;
2009-09-28 23:49:08 +04:00
out2 :
if ( path ) {
ext4_ext_drop_refs ( path ) ;
kfree ( path ) ;
}
return err ? err : allocated ;
}
2010-10-28 05:23:12 +04:00
2011-09-10 02:52:51 +04:00
/*
* get_implied_cluster_alloc - check to see if the requested
* allocation ( in the map structure ) overlaps with a cluster already
* allocated in an extent .
2011-09-10 03:18:51 +04:00
* @ sb The filesystem superblock structure
2011-09-10 02:52:51 +04:00
* @ map The requested lblk - > pblk mapping
* @ ex The extent structure which might contain an implied
* cluster allocation
*
* This function is called by ext4_ext_map_blocks ( ) after we failed to
* find blocks that were already in the inode ' s extent tree . Hence ,
* we know that the beginning of the requested region cannot overlap
* the extent from the inode ' s extent tree . There are three cases we
* want to catch . The first is this case :
*
* | - - - cluster # N - - |
* | - - - extent - - - | | - - - - requested region - - - |
* | = = = = = = = = = = |
*
* The second case that we need to test for is this one :
*
* | - - - - - - - - - cluster # N - - - - - - - - - - - - - - - - |
* | - - - requested region - - | | - - - - - - - extent - - - - |
* | = = = = = = = = = = = = = = = = = = = = = = = |
*
* The third case is when the requested region lies between two extents
* within the same cluster :
* | - - - - - - - - - - - - - cluster # N - - - - - - - - - - - - - |
* | - - - - - ex - - - - - | | - - - - ex_right - - - - |
* | - - - - - - requested region - - - - - - |
* | = = = = = = = = = = = = = = = = |
*
* In each of the above cases , we need to set the map - > m_pblk and
* map - > m_len so it corresponds to the return the extent labelled as
* " |====| " from cluster # N , since it is already in use for data in
* cluster EXT4_B2C ( sbi , map - > m_lblk ) . We will then return 1 to
* signal to ext4_ext_map_blocks ( ) that map - > m_pblk should be treated
* as a new " allocated " block region . Otherwise , we will return 0 and
* ext4_ext_map_blocks ( ) will then allocate one or more new clusters
* by calling ext4_mb_new_blocks ( ) .
*/
2011-09-10 03:18:51 +04:00
static int get_implied_cluster_alloc ( struct super_block * sb ,
2011-09-10 02:52:51 +04:00
struct ext4_map_blocks * map ,
struct ext4_extent * ex ,
struct ext4_ext_path * path )
{
2011-09-10 03:18:51 +04:00
struct ext4_sb_info * sbi = EXT4_SB ( sb ) ;
2011-09-10 02:52:51 +04:00
ext4_lblk_t c_offset = map - > m_lblk & ( sbi - > s_cluster_ratio - 1 ) ;
ext4_lblk_t ex_cluster_start , ex_cluster_end ;
ext4_lblk_t rr_cluster_start , rr_cluster_end ;
ext4_lblk_t ee_block = le32_to_cpu ( ex - > ee_block ) ;
ext4_fsblk_t ee_start = ext4_ext_pblock ( ex ) ;
unsigned short ee_len = ext4_ext_get_actual_len ( ex ) ;
/* The extent passed in that we are trying to match */
ex_cluster_start = EXT4_B2C ( sbi , ee_block ) ;
ex_cluster_end = EXT4_B2C ( sbi , ee_block + ee_len - 1 ) ;
/* The requested region passed into ext4_map_blocks() */
rr_cluster_start = EXT4_B2C ( sbi , map - > m_lblk ) ;
rr_cluster_end = EXT4_B2C ( sbi , map - > m_lblk + map - > m_len - 1 ) ;
if ( ( rr_cluster_start = = ex_cluster_end ) | |
( rr_cluster_start = = ex_cluster_start ) ) {
if ( rr_cluster_start = = ex_cluster_end )
ee_start + = ee_len - 1 ;
map - > m_pblk = ( ee_start & ~ ( sbi - > s_cluster_ratio - 1 ) ) +
c_offset ;
map - > m_len = min ( map - > m_len ,
( unsigned ) sbi - > s_cluster_ratio - c_offset ) ;
/*
* Check for and handle this case :
*
* | - - - - - - - - - cluster # N - - - - - - - - - - - - - |
* | - - - - - - - extent - - - - |
* | - - - requested region - - - |
* | = = = = = = = = = = = |
*/
if ( map - > m_lblk < ee_block )
map - > m_len = min ( map - > m_len , ee_block - map - > m_lblk ) ;
/*
* Check for the case where there is already another allocated
* block to the right of ' ex ' but before the end of the cluster .
*
* | - - - - - - - - - - - - - cluster # N - - - - - - - - - - - - - |
* | - - - - - ex - - - - - | | - - - - ex_right - - - - |
* | - - - - - - requested region - - - - - - |
* | = = = = = = = = = = = = = = = = |
*/
if ( map - > m_lblk > ee_block ) {
ext4_lblk_t next = ext4_ext_next_allocated_block ( path ) ;
map - > m_len = min ( map - > m_len , next - map - > m_lblk ) ;
}
2011-09-10 03:18:51 +04:00
trace_ext4_get_implied_cluster_alloc_exit ( sb , map , 1 ) ;
2011-09-10 02:52:51 +04:00
return 1 ;
}
2011-09-10 03:18:51 +04:00
trace_ext4_get_implied_cluster_alloc_exit ( sb , map , 0 ) ;
2011-09-10 02:52:51 +04:00
return 0 ;
}
2008-01-29 07:58:27 +03:00
/*
2008-02-25 23:29:55 +03:00
* Block allocation / map / preallocation routine for extents based files
*
*
2008-01-29 07:58:27 +03:00
* Need to be called with
2008-01-29 07:58:26 +03:00
* down_read ( & EXT4_I ( inode ) - > i_data_sem ) if not allocating file system block
* ( ie , create is zero ) . Otherwise down_write ( & EXT4_I ( inode ) - > i_data_sem )
2008-02-25 23:29:55 +03:00
*
* return > 0 , number of of blocks already mapped / allocated
* if create = = 0 and these are pre - allocated blocks
* buffer head is unmapped
* otherwise blocks are mapped
*
* return = 0 , if plain look up failed ( blocks have not been allocated )
* buffer head is unmapped
*
* return < 0 , error case .
2008-01-29 07:58:27 +03:00
*/
2010-05-17 03:00:00 +04:00
int ext4_ext_map_blocks ( handle_t * handle , struct inode * inode ,
struct ext4_map_blocks * map , int flags )
2006-10-11 12:21:03 +04:00
{
struct ext4_ext_path * path = NULL ;
2011-09-10 02:52:51 +04:00
struct ext4_extent newex , * ex , * ex2 ;
struct ext4_sb_info * sbi = EXT4_SB ( inode - > i_sb ) ;
2011-03-22 04:38:05 +03:00
ext4_fsblk_t newblock = 0 ;
2011-09-10 02:52:51 +04:00
int free_on_err = 0 , err = 0 , depth , ret ;
unsigned int allocated = 0 , offset = 0 ;
2011-10-29 17:23:38 +04:00
unsigned int allocated_clusters = 0 ;
2011-05-25 15:41:46 +04:00
unsigned int punched_out = 0 ;
unsigned int result = 0 ;
2008-01-29 08:19:52 +03:00
struct ext4_allocation_request ar ;
2009-09-28 23:48:29 +04:00
ext4_io_end_t * io = EXT4_I ( inode ) - > cur_aio_dio ;
2011-09-10 02:52:51 +04:00
ext4_lblk_t cluster_offset ;
2006-10-11 12:21:03 +04:00
2009-09-01 16:44:37 +04:00
ext_debug ( " blocks %u/%u requested for inode %lu \n " ,
2010-05-17 03:00:00 +04:00
map - > m_lblk , map - > m_len , inode - > i_ino ) ;
2011-03-22 04:38:05 +03:00
trace_ext4_ext_map_blocks_enter ( inode , map - > m_lblk , map - > m_len , flags ) ;
2006-10-11 12:21:03 +04:00
/* check in cache */
2011-07-18 07:27:43 +04:00
if ( ! ( flags & EXT4_GET_BLOCKS_PUNCH_OUT_EXT ) & &
ext4_ext_in_cache ( inode , map - > m_lblk , & newex ) ) {
2011-01-10 20:13:26 +03:00
if ( ! newex . ee_start_lo & & ! newex . ee_start_hi ) {
2011-09-10 03:04:51 +04:00
if ( ( sbi - > s_cluster_ratio > 1 ) & &
ext4_find_delalloc_cluster ( inode , map - > m_lblk , 0 ) )
map - > m_flags | = EXT4_MAP_FROM_CLUSTER ;
2009-05-14 08:58:52 +04:00
if ( ( flags & EXT4_GET_BLOCKS_CREATE ) = = 0 ) {
2007-07-18 05:42:38 +04:00
/*
* block isn ' t allocated yet and
* user doesn ' t want to allocate it
*/
2006-10-11 12:21:03 +04:00
goto out2 ;
}
/* we should allocate requested block */
2011-01-10 20:13:26 +03:00
} else {
2006-10-11 12:21:03 +04:00
/* block is already allocated */
2011-09-10 03:04:51 +04:00
if ( sbi - > s_cluster_ratio > 1 )
map - > m_flags | = EXT4_MAP_FROM_CLUSTER ;
2010-05-17 03:00:00 +04:00
newblock = map - > m_lblk
2007-05-24 21:04:54 +04:00
- le32_to_cpu ( newex . ee_block )
2010-10-28 05:30:14 +04:00
+ ext4_ext_pblock ( & newex ) ;
2006-10-11 12:21:07 +04:00
/* number of remaining blocks in the extent */
2008-01-29 07:58:27 +03:00
allocated = ext4_ext_get_actual_len ( & newex ) -
2010-05-17 03:00:00 +04:00
( map - > m_lblk - le32_to_cpu ( newex . ee_block ) ) ;
2006-10-11 12:21:03 +04:00
goto out ;
}
}
/* find extent for this block */
2010-05-17 03:00:00 +04:00
path = ext4_ext_find_extent ( inode , map - > m_lblk , NULL ) ;
2006-10-11 12:21:03 +04:00
if ( IS_ERR ( path ) ) {
err = PTR_ERR ( path ) ;
path = NULL ;
goto out2 ;
}
depth = ext_depth ( inode ) ;
/*
2006-10-11 12:21:07 +04:00
* consistent leaf must not be empty ;
* this situation is possible , though , _during_ tree modification ;
2006-10-11 12:21:03 +04:00
* this is why assert can ' t be put in ext4_ext_find_extent ( )
*/
2010-03-02 19:46:09 +03:00
if ( unlikely ( path [ depth ] . p_ext = = NULL & & depth ! = 0 ) ) {
EXT4_ERROR_INODE ( inode , " bad extent address "
2010-05-17 07:00:00 +04:00
" lblock: %lu, depth: %d pblock %lld " ,
( unsigned long ) map - > m_lblk , depth ,
path [ depth ] . p_block ) ;
2009-12-14 17:53:52 +03:00
err = - EIO ;
goto out2 ;
}
2006-10-11 12:21:03 +04:00
2006-12-07 07:41:33 +03:00
ex = path [ depth ] . p_ext ;
if ( ex ) {
2008-01-29 07:58:27 +03:00
ext4_lblk_t ee_block = le32_to_cpu ( ex - > ee_block ) ;
2010-10-28 05:30:14 +04:00
ext4_fsblk_t ee_start = ext4_ext_pblock ( ex ) ;
2007-07-18 05:42:41 +04:00
unsigned short ee_len ;
2006-10-11 12:21:06 +04:00
/*
* Uninitialized extents are treated as holes , except that
2007-07-18 05:42:38 +04:00
* we split out initialized portions during a write .
2006-10-11 12:21:06 +04:00
*/
2007-07-18 05:42:41 +04:00
ee_len = ext4_ext_get_actual_len ( ex ) ;
2011-09-10 03:18:51 +04:00
trace_ext4_ext_show_extent ( inode , ee_block , ee_start , ee_len ) ;
2006-10-11 12:21:07 +04:00
/* if found extent covers block, simply return it */
2010-05-17 03:00:00 +04:00
if ( in_range ( map - > m_lblk , ee_block , ee_len ) ) {
2011-10-29 17:23:38 +04:00
struct ext4_map_blocks punch_map ;
2011-09-10 02:54:51 +04:00
ext4_fsblk_t partial_cluster = 0 ;
2010-05-17 03:00:00 +04:00
newblock = map - > m_lblk - ee_block + ee_start ;
2006-10-11 12:21:07 +04:00
/* number of remaining blocks in the extent */
2010-05-17 03:00:00 +04:00
allocated = ee_len - ( map - > m_lblk - ee_block ) ;
ext_debug ( " %u fit into %u:%d -> %llu \n " , map - > m_lblk ,
ee_block , ee_len , newblock ) ;
2007-07-18 05:42:38 +04:00
2011-05-25 15:41:46 +04:00
if ( ( flags & EXT4_GET_BLOCKS_PUNCH_OUT_EXT ) = = 0 ) {
/*
* Do not put uninitialized extent
* in the cache
*/
if ( ! ext4_ext_is_uninitialized ( ex ) ) {
ext4_ext_put_in_cache ( inode , ee_block ,
ee_len , ee_start ) ;
goto out ;
}
ret = ext4_ext_handle_uninitialized_extents (
handle , inode , map , path , flags ,
allocated , newblock ) ;
return ret ;
2007-07-18 05:42:38 +04:00
}
2011-05-25 15:41:46 +04:00
/*
* Punch out the map length , but only to the
* end of the extent
*/
punched_out = allocated < map - > m_len ?
allocated : map - > m_len ;
/*
* Sense extents need to be converted to
* uninitialized , they must fit in an
* uninitialized extent
*/
if ( punched_out > EXT_UNINIT_MAX_LEN )
punched_out = EXT_UNINIT_MAX_LEN ;
punch_map . m_lblk = map - > m_lblk ;
punch_map . m_pblk = newblock ;
punch_map . m_len = punched_out ;
punch_map . m_flags = 0 ;
/* Check to see if the extent needs to be split */
if ( punch_map . m_len ! = ee_len | |
punch_map . m_lblk ! = ee_block ) {
ret = ext4_split_extent ( handle , inode ,
path , & punch_map , 0 ,
EXT4_GET_BLOCKS_PUNCH_OUT_EXT |
EXT4_GET_BLOCKS_PRE_IO ) ;
if ( ret < 0 ) {
err = ret ;
goto out2 ;
}
/*
* find extent for the block at
* the start of the hole
*/
ext4_ext_drop_refs ( path ) ;
kfree ( path ) ;
path = ext4_ext_find_extent ( inode ,
map - > m_lblk , NULL ) ;
if ( IS_ERR ( path ) ) {
err = PTR_ERR ( path ) ;
path = NULL ;
goto out2 ;
}
depth = ext_depth ( inode ) ;
ex = path [ depth ] . p_ext ;
ee_len = ext4_ext_get_actual_len ( ex ) ;
ee_block = le32_to_cpu ( ex - > ee_block ) ;
ee_start = ext4_ext_pblock ( ex ) ;
}
ext4_ext_mark_uninitialized ( ex ) ;
2011-07-18 07:17:02 +04:00
ext4_ext_invalidate_cache ( inode ) ;
err = ext4_ext_rm_leaf ( handle , inode , path ,
2011-09-10 02:54:51 +04:00
& partial_cluster , map - > m_lblk ,
map - > m_lblk + punched_out ) ;
2011-07-18 07:17:02 +04:00
if ( ! err & & path - > p_hdr - > eh_entries = = 0 ) {
/*
* Punch hole freed all of this sub tree ,
* so we need to correct eh_depth
*/
err = ext4_ext_get_access ( handle , inode , path ) ;
if ( err = = 0 ) {
ext_inode_hdr ( inode ) - > eh_depth = 0 ;
ext_inode_hdr ( inode ) - > eh_max =
cpu_to_le16 ( ext4_ext_space_root (
inode , 0 ) ) ;
err = ext4_ext_dirty (
handle , inode , path ) ;
}
}
2011-05-25 15:41:46 +04:00
goto out2 ;
2006-10-11 12:21:03 +04:00
}
}
2011-09-10 03:04:51 +04:00
if ( ( sbi - > s_cluster_ratio > 1 ) & &
ext4_find_delalloc_cluster ( inode , map - > m_lblk , 0 ) )
map - > m_flags | = EXT4_MAP_FROM_CLUSTER ;
2006-10-11 12:21:03 +04:00
/*
2006-10-11 12:21:07 +04:00
* requested block isn ' t allocated yet ;
2006-10-11 12:21:03 +04:00
* we couldn ' t try to create block if create flag is zero
*/
2009-05-14 08:58:52 +04:00
if ( ( flags & EXT4_GET_BLOCKS_CREATE ) = = 0 ) {
2007-07-18 05:42:38 +04:00
/*
* put just found gap into cache to speed up
* subsequent requests
*/
2010-05-17 03:00:00 +04:00
ext4_ext_put_gap_in_cache ( inode , path , map - > m_lblk ) ;
2006-10-11 12:21:03 +04:00
goto out2 ;
}
2011-09-10 02:52:51 +04:00
2006-10-11 12:21:03 +04:00
/*
2008-10-10 17:40:52 +04:00
* Okay , we need to do block allocation .
2006-10-11 12:21:24 +04:00
*/
2011-09-10 03:04:51 +04:00
map - > m_flags & = ~ EXT4_MAP_FROM_CLUSTER ;
2011-09-10 02:52:51 +04:00
newex . ee_block = cpu_to_le32 ( map - > m_lblk ) ;
cluster_offset = map - > m_lblk & ( sbi - > s_cluster_ratio - 1 ) ;
/*
* If we are doing bigalloc , check to see if the extent returned
* by ext4_ext_find_extent ( ) implies a cluster we can use .
*/
if ( cluster_offset & & ex & &
2011-09-10 03:18:51 +04:00
get_implied_cluster_alloc ( inode - > i_sb , map , ex , path ) ) {
2011-09-10 02:52:51 +04:00
ar . len = allocated = map - > m_len ;
newblock = map - > m_pblk ;
2011-09-10 03:04:51 +04:00
map - > m_flags | = EXT4_MAP_FROM_CLUSTER ;
2011-09-10 02:52:51 +04:00
goto got_allocated_blocks ;
}
2006-10-11 12:21:03 +04:00
2008-01-29 08:19:52 +03:00
/* find neighbour allocated blocks */
2010-05-17 03:00:00 +04:00
ar . lleft = map - > m_lblk ;
2008-01-29 08:19:52 +03:00
err = ext4_ext_search_left ( inode , path , & ar . lleft , & ar . pleft ) ;
if ( err )
goto out2 ;
2010-05-17 03:00:00 +04:00
ar . lright = map - > m_lblk ;
2011-09-10 02:52:51 +04:00
ex2 = NULL ;
err = ext4_ext_search_right ( inode , path , & ar . lright , & ar . pright , & ex2 ) ;
2008-01-29 08:19:52 +03:00
if ( err )
goto out2 ;
2007-05-24 21:04:13 +04:00
2011-09-10 02:52:51 +04:00
/* Check if the extent after searching to the right implies a
* cluster we can use . */
if ( ( sbi - > s_cluster_ratio > 1 ) & & ex2 & &
2011-09-10 03:18:51 +04:00
get_implied_cluster_alloc ( inode - > i_sb , map , ex2 , path ) ) {
2011-09-10 02:52:51 +04:00
ar . len = allocated = map - > m_len ;
newblock = map - > m_pblk ;
2011-09-10 03:04:51 +04:00
map - > m_flags | = EXT4_MAP_FROM_CLUSTER ;
2011-09-10 02:52:51 +04:00
goto got_allocated_blocks ;
}
2007-07-18 17:02:56 +04:00
/*
* See if request is beyond maximum number of blocks we can have in
* a single extent . For an initialized extent this limit is
* EXT_INIT_MAX_LEN and for an uninitialized extent this limit is
* EXT_UNINIT_MAX_LEN .
*/
2010-05-17 03:00:00 +04:00
if ( map - > m_len > EXT_INIT_MAX_LEN & &
2009-05-14 08:58:52 +04:00
! ( flags & EXT4_GET_BLOCKS_UNINIT_EXT ) )
2010-05-17 03:00:00 +04:00
map - > m_len = EXT_INIT_MAX_LEN ;
else if ( map - > m_len > EXT_UNINIT_MAX_LEN & &
2009-05-14 08:58:52 +04:00
( flags & EXT4_GET_BLOCKS_UNINIT_EXT ) )
2010-05-17 03:00:00 +04:00
map - > m_len = EXT_UNINIT_MAX_LEN ;
2007-07-18 17:02:56 +04:00
2010-05-17 03:00:00 +04:00
/* Check if we can really insert (m_lblk)::(m_lblk + m_len) extent */
newex . ee_len = cpu_to_le16 ( map - > m_len ) ;
2011-09-10 02:52:51 +04:00
err = ext4_ext_check_overlap ( sbi , inode , & newex , path ) ;
2007-05-24 21:04:13 +04:00
if ( err )
2008-01-29 07:58:27 +03:00
allocated = ext4_ext_get_actual_len ( & newex ) ;
2007-05-24 21:04:13 +04:00
else
2010-05-17 03:00:00 +04:00
allocated = map - > m_len ;
2008-01-29 08:19:52 +03:00
/* allocate new block */
ar . inode = inode ;
2010-05-17 03:00:00 +04:00
ar . goal = ext4_ext_find_goal ( inode , path , map - > m_lblk ) ;
ar . logical = map - > m_lblk ;
2011-09-10 02:52:51 +04:00
/*
* We calculate the offset from the beginning of the cluster
* for the logical block number , since when we allocate a
* physical cluster , the physical block should start at the
* same offset from the beginning of the cluster . This is
* needed so that future calls to get_implied_cluster_alloc ( )
* work correctly .
*/
offset = map - > m_lblk & ( sbi - > s_cluster_ratio - 1 ) ;
ar . len = EXT4_NUM_B2C ( sbi , offset + allocated ) ;
ar . goal - = offset ;
ar . logical - = offset ;
2008-01-29 08:19:52 +03:00
if ( S_ISREG ( inode - > i_mode ) )
ar . flags = EXT4_MB_HINT_DATA ;
else
/* disable in-core preallocation for non-regular files */
ar . flags = 0 ;
2011-05-25 15:41:54 +04:00
if ( flags & EXT4_GET_BLOCKS_NO_NORMALIZE )
ar . flags | = EXT4_MB_HINT_NOPREALLOC ;
2008-01-29 08:19:52 +03:00
newblock = ext4_mb_new_blocks ( handle , & ar , & err ) ;
2006-10-11 12:21:03 +04:00
if ( ! newblock )
goto out2 ;
2009-09-01 16:44:37 +04:00
ext_debug ( " allocate new block: goal %llu, found %llu/%u \n " ,
2008-11-05 08:14:04 +03:00
ar . goal , newblock , allocated ) ;
2011-09-10 02:52:51 +04:00
free_on_err = 1 ;
2011-09-10 03:04:51 +04:00
allocated_clusters = ar . len ;
2011-09-10 02:52:51 +04:00
ar . len = EXT4_C2B ( sbi , ar . len ) - offset ;
if ( ar . len > allocated )
ar . len = allocated ;
2006-10-11 12:21:03 +04:00
2011-09-10 02:52:51 +04:00
got_allocated_blocks :
2006-10-11 12:21:03 +04:00
/* try to insert new extent into found leaf and return */
2011-09-10 02:52:51 +04:00
ext4_ext_store_pblock ( & newex , newblock + offset ) ;
2008-01-29 08:19:52 +03:00
newex . ee_len = cpu_to_le16 ( ar . len ) ;
2009-09-28 23:48:29 +04:00
/* Mark uninitialized */
if ( flags & EXT4_GET_BLOCKS_UNINIT_EXT ) {
2007-07-18 05:42:41 +04:00
ext4_ext_mark_uninitialized ( & newex ) ;
2009-09-28 23:48:29 +04:00
/*
2010-03-05 00:14:02 +03:00
* io_end structure was created for every IO write to an
2011-03-31 05:57:33 +04:00
* uninitialized extent . To avoid unnecessary conversion ,
2010-03-05 00:14:02 +03:00
* here we flag the IO that really needs the conversion .
2009-11-10 18:48:04 +03:00
* For non asycn direct IO case , flag the inode state
2011-03-31 05:57:33 +04:00
* that we need to perform conversion when IO is done .
2009-09-28 23:48:29 +04:00
*/
2010-03-05 00:14:02 +03:00
if ( ( flags & EXT4_GET_BLOCKS_PRE_IO ) ) {
2011-11-01 01:30:44 +04:00
if ( io )
ext4_set_io_unwritten_flag ( inode , io ) ;
else
2010-01-24 22:34:07 +03:00
ext4_set_inode_state ( inode ,
EXT4_STATE_DIO_UNWRITTEN ) ;
2009-11-10 18:48:04 +03:00
}
2010-03-05 00:14:02 +03:00
if ( ext4_should_dioread_nolock ( inode ) )
2010-05-17 03:00:00 +04:00
map - > m_flags | = EXT4_MAP_UNINIT ;
2009-09-28 23:48:29 +04:00
}
2010-02-24 17:52:53 +03:00
2011-10-25 16:15:12 +04:00
err = 0 ;
if ( ( flags & EXT4_GET_BLOCKS_KEEP_SIZE ) = = 0 )
err = check_eofblocks_fl ( handle , inode , map - > m_lblk ,
path , ar . len ) ;
2011-07-11 04:07:25 +04:00
if ( ! err )
err = ext4_ext_insert_extent ( handle , inode , path ,
& newex , flags ) ;
2011-09-10 02:52:51 +04:00
if ( err & & free_on_err ) {
ext4: fix i_blocks/quota accounting when extent insertion fails
The current implementation of ext4_free_blocks() always calls
dquot_free_block This looks quite sensible in the most cases: blocks
to be freed are associated with inode and were accounted in quota and
i_blocks some time ago.
However, there is a case when blocks to free were not accounted by the
time calling ext4_free_blocks() yet:
1. delalloc is on, write_begin pre-allocated some space in quota
2. write-back happens, ext4 allocates some blocks in ext4_ext_map_blocks()
3. then ext4_ext_map_blocks() gets an error (e.g. ENOSPC) from
ext4_ext_insert_extent() and calls ext4_free_blocks().
In this scenario, ext4_free_blocks() calls dquot_free_block() who, in
turn, decrements i_blocks for blocks which were not accounted yet (due
to delalloc) After clean umount, e2fsck reports something like:
> Inode 21, i_blocks is 5080, should be 5128. Fix<y>?
because i_blocks was erroneously decremented as explained above.
The patch fixes the problem by passing the new flag
EXT4_FREE_BLOCKS_NO_QUOT_UPDATE to ext4_free_blocks(), to request
that the dquot_free_block() call be skipped.
Signed-off-by: Maxim Patlasov <maxim.patlasov@gmail.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Cc: stable@kernel.org
2011-07-11 03:37:48 +04:00
int fb_flags = flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE ?
EXT4_FREE_BLOCKS_NO_QUOT_UPDATE : 0 ;
2007-05-24 21:04:25 +04:00
/* free data blocks we just allocated */
2008-01-29 08:19:52 +03:00
/* not a good idea to call discard here directly,
* but otherwise we ' d need to call it every free ( ) */
2008-10-10 17:40:52 +04:00
ext4_discard_preallocations ( inode ) ;
2011-02-22 05:01:42 +03:00
ext4_free_blocks ( handle , inode , NULL , ext4_ext_pblock ( & newex ) ,
ext4: fix i_blocks/quota accounting when extent insertion fails
The current implementation of ext4_free_blocks() always calls
dquot_free_block This looks quite sensible in the most cases: blocks
to be freed are associated with inode and were accounted in quota and
i_blocks some time ago.
However, there is a case when blocks to free were not accounted by the
time calling ext4_free_blocks() yet:
1. delalloc is on, write_begin pre-allocated some space in quota
2. write-back happens, ext4 allocates some blocks in ext4_ext_map_blocks()
3. then ext4_ext_map_blocks() gets an error (e.g. ENOSPC) from
ext4_ext_insert_extent() and calls ext4_free_blocks().
In this scenario, ext4_free_blocks() calls dquot_free_block() who, in
turn, decrements i_blocks for blocks which were not accounted yet (due
to delalloc) After clean umount, e2fsck reports something like:
> Inode 21, i_blocks is 5080, should be 5128. Fix<y>?
because i_blocks was erroneously decremented as explained above.
The patch fixes the problem by passing the new flag
EXT4_FREE_BLOCKS_NO_QUOT_UPDATE to ext4_free_blocks(), to request
that the dquot_free_block() call be skipped.
Signed-off-by: Maxim Patlasov <maxim.patlasov@gmail.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Cc: stable@kernel.org
2011-07-11 03:37:48 +04:00
ext4_ext_get_actual_len ( & newex ) , fb_flags ) ;
2006-10-11 12:21:03 +04:00
goto out2 ;
2007-05-24 21:04:25 +04:00
}
2006-10-11 12:21:03 +04:00
/* previous routine could use block we allocated */
2010-10-28 05:30:14 +04:00
newblock = ext4_ext_pblock ( & newex ) ;
2008-01-29 07:58:27 +03:00
allocated = ext4_ext_get_actual_len ( & newex ) ;
2010-05-17 03:00:00 +04:00
if ( allocated > map - > m_len )
allocated = map - > m_len ;
map - > m_flags | = EXT4_MAP_NEW ;
2006-10-11 12:21:03 +04:00
2010-01-25 12:00:31 +03:00
/*
* Update reserved blocks / metadata blocks after successful
* block allocation which had been deferred till now .
*/
2011-09-10 03:04:51 +04:00
if ( flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE ) {
2011-10-29 17:23:38 +04:00
unsigned int reserved_clusters ;
2011-09-10 03:04:51 +04:00
/*
2011-10-29 17:23:38 +04:00
* Check how many clusters we had reserved this allocated range
2011-09-10 03:04:51 +04:00
*/
reserved_clusters = get_reserved_cluster_alloc ( inode ,
map - > m_lblk , allocated ) ;
if ( map - > m_flags & EXT4_MAP_FROM_CLUSTER ) {
if ( reserved_clusters ) {
/*
* We have clusters reserved for this range .
* But since we are not doing actual allocation
* and are simply using blocks from previously
* allocated cluster , we should release the
* reservation and not claim quota .
*/
ext4_da_update_reserve_space ( inode ,
reserved_clusters , 0 ) ;
}
} else {
BUG_ON ( allocated_clusters < reserved_clusters ) ;
/* We will claim quota for all newly allocated blocks.*/
ext4_da_update_reserve_space ( inode , allocated_clusters ,
1 ) ;
if ( reserved_clusters < allocated_clusters ) {
2011-09-10 03:20:51 +04:00
struct ext4_inode_info * ei = EXT4_I ( inode ) ;
2011-09-10 03:04:51 +04:00
int reservation = allocated_clusters -
reserved_clusters ;
/*
* It seems we claimed few clusters outside of
* the range of this allocation . We should give
* it back to the reservation pool . This can
* happen in the following case :
*
* * Suppose s_cluster_ratio is 4 ( i . e . , each
* cluster has 4 blocks . Thus , the clusters
* are [ 0 - 3 ] , [ 4 - 7 ] , [ 8 - 11 ] . . .
* * First comes delayed allocation write for
* logical blocks 10 & 11. Since there were no
* previous delayed allocated blocks in the
* range [ 8 - 11 ] , we would reserve 1 cluster
* for this write .
* * Next comes write for logical blocks 3 to 8.
* In this case , we will reserve 2 clusters
* ( for [ 0 - 3 ] and [ 4 - 7 ] ; and not for [ 8 - 11 ] as
* that range has a delayed allocated blocks .
* Thus total reserved clusters now becomes 3.
* * Now , during the delayed allocation writeout
* time , we will first write blocks [ 3 - 8 ] and
* allocate 3 clusters for writing these
* blocks . Also , we would claim all these
* three clusters above .
* * Now when we come here to writeout the
* blocks [ 10 - 11 ] , we would expect to claim
* the reservation of 1 cluster we had made
* ( and we would claim it since there are no
* more delayed allocated blocks in the range
* [ 8 - 11 ] . But our reserved cluster count had
* already gone to 0.
*
* Thus , at the step 4 above when we determine
* that there are still some unwritten delayed
* allocated blocks outside of our current
* block range , we should increment the
* reserved clusters count so that when the
* remaining blocks finally gets written , we
* could claim them .
*/
2011-09-10 03:20:51 +04:00
dquot_reserve_block ( inode ,
EXT4_C2B ( sbi , reservation ) ) ;
spin_lock ( & ei - > i_block_reservation_lock ) ;
ei - > i_reserved_data_blocks + = reservation ;
spin_unlock ( & ei - > i_block_reservation_lock ) ;
2011-09-10 03:04:51 +04:00
}
}
}
2010-01-25 12:00:31 +03:00
2009-12-09 07:51:10 +03:00
/*
* Cache the extent and update transaction to commit on fdatasync only
* when it is _not_ an uninitialized extent .
*/
if ( ( flags & EXT4_GET_BLOCKS_UNINIT_EXT ) = = 0 ) {
2011-01-10 20:13:26 +03:00
ext4_ext_put_in_cache ( inode , map - > m_lblk , allocated , newblock ) ;
2009-12-09 07:51:10 +03:00
ext4_update_inode_fsync_trans ( handle , inode , 1 ) ;
} else
ext4_update_inode_fsync_trans ( handle , inode , 0 ) ;
2006-10-11 12:21:03 +04:00
out :
2010-05-17 03:00:00 +04:00
if ( allocated > map - > m_len )
allocated = map - > m_len ;
2006-10-11 12:21:03 +04:00
ext4_ext_show_leaf ( inode , path ) ;
2010-05-17 03:00:00 +04:00
map - > m_flags | = EXT4_MAP_MAPPED ;
map - > m_pblk = newblock ;
map - > m_len = allocated ;
2006-10-11 12:21:03 +04:00
out2 :
if ( path ) {
ext4_ext_drop_refs ( path ) ;
kfree ( path ) ;
}
2011-05-25 15:41:46 +04:00
result = ( flags & EXT4_GET_BLOCKS_PUNCH_OUT_EXT ) ?
punched_out : allocated ;
2011-10-29 17:39:51 +04:00
trace_ext4_ext_map_blocks_exit ( inode , map - > m_lblk ,
newblock , map - > m_len , err ? err : result ) ;
2011-05-25 15:41:46 +04:00
return err ? err : result ;
2006-10-11 12:21:03 +04:00
}
2008-07-12 03:27:31 +04:00
void ext4_ext_truncate ( struct inode * inode )
2006-10-11 12:21:03 +04:00
{
struct address_space * mapping = inode - > i_mapping ;
struct super_block * sb = inode - > i_sb ;
2008-01-29 07:58:27 +03:00
ext4_lblk_t last_block ;
2006-10-11 12:21:03 +04:00
handle_t * handle ;
2011-09-07 05:49:44 +04:00
loff_t page_len ;
2006-10-11 12:21:03 +04:00
int err = 0 ;
2011-01-10 20:47:05 +03:00
/*
* finish any pending end_io work so we won ' t run the risk of
* converting any truncated blocks to initialized later
*/
ext4_flush_completed_IO ( inode ) ;
2006-10-11 12:21:03 +04:00
/*
* probably first extent we ' re gonna free will be last in block
*/
2008-08-20 06:16:03 +04:00
err = ext4_writepage_trans_blocks ( inode ) ;
2006-10-11 12:21:03 +04:00
handle = ext4_journal_start ( inode , err ) ;
2008-07-12 03:27:31 +04:00
if ( IS_ERR ( handle ) )
2006-10-11 12:21:03 +04:00
return ;
2011-09-07 05:49:44 +04:00
if ( inode - > i_size % PAGE_CACHE_SIZE ! = 0 ) {
page_len = PAGE_CACHE_SIZE -
( inode - > i_size & ( PAGE_CACHE_SIZE - 1 ) ) ;
err = ext4_discard_partial_page_buffers ( handle ,
mapping , inode - > i_size , page_len , 0 ) ;
if ( err )
goto out_stop ;
}
2006-10-11 12:21:03 +04:00
2008-07-12 03:27:31 +04:00
if ( ext4_orphan_add ( handle , inode ) )
goto out_stop ;
2008-01-29 07:58:26 +03:00
down_write ( & EXT4_I ( inode ) - > i_data_sem ) ;
2006-10-11 12:21:03 +04:00
ext4_ext_invalidate_cache ( inode ) ;
2008-10-10 17:40:52 +04:00
ext4_discard_preallocations ( inode ) ;
2008-01-29 08:19:52 +03:00
2006-10-11 12:21:03 +04:00
/*
2006-10-11 12:21:07 +04:00
* TODO : optimization is possible here .
* Probably we need not scan at all ,
* because page truncation is enough .
2006-10-11 12:21:03 +04:00
*/
/* we have to know where to truncate from in crash case */
EXT4_I ( inode ) - > i_disksize = inode - > i_size ;
ext4_mark_inode_dirty ( handle , inode ) ;
last_block = ( inode - > i_size + sb - > s_blocksize - 1 )
> > EXT4_BLOCK_SIZE_BITS ( sb ) ;
2011-07-18 07:21:03 +04:00
err = ext4_ext_remove_space ( inode , last_block ) ;
2006-10-11 12:21:03 +04:00
/* In a multi-transaction truncate, we only make the final
2007-07-18 05:42:38 +04:00
* transaction synchronous .
*/
2006-10-11 12:21:03 +04:00
if ( IS_SYNC ( inode ) )
2009-01-07 08:06:22 +03:00
ext4_handle_sync ( handle ) ;
2006-10-11 12:21:03 +04:00
2008-07-12 03:27:31 +04:00
up_write ( & EXT4_I ( inode ) - > i_data_sem ) ;
2011-05-23 05:33:00 +04:00
out_stop :
2006-10-11 12:21:03 +04:00
/*
2006-10-11 12:21:07 +04:00
* If this was a simple ftruncate ( ) and the file will remain alive ,
2006-10-11 12:21:03 +04:00
* then we need to clear up the orphan record which we created above .
* However , if this was a real unlink then we were called by
* ext4_delete_inode ( ) , and we allow that function to clean up the
* orphan info for us .
*/
if ( inode - > i_nlink )
ext4_orphan_del ( handle , inode ) ;
2008-04-30 06:00:41 +04:00
inode - > i_mtime = inode - > i_ctime = ext4_current_time ( inode ) ;
ext4_mark_inode_dirty ( handle , inode ) ;
2006-10-11 12:21:03 +04:00
ext4_journal_stop ( handle ) ;
}
2008-04-29 16:11:12 +04:00
static void ext4_falloc_update_inode ( struct inode * inode ,
int mode , loff_t new_size , int update_ctime )
{
struct timespec now ;
if ( update_ctime ) {
now = current_fs_time ( inode - > i_sb ) ;
if ( ! timespec_equal ( & inode - > i_ctime , & now ) )
inode - > i_ctime = now ;
}
/*
* Update only when preallocation was requested beyond
* the file size .
*/
2008-09-13 21:06:18 +04:00
if ( ! ( mode & FALLOC_FL_KEEP_SIZE ) ) {
if ( new_size > i_size_read ( inode ) )
i_size_write ( inode , new_size ) ;
if ( new_size > EXT4_I ( inode ) - > i_disksize )
ext4_update_i_disksize ( inode , new_size ) ;
2010-02-24 17:52:53 +03:00
} else {
/*
* Mark that we allocate beyond EOF so the subsequent truncate
* can proceed even if the new size is the same as i_size .
*/
if ( new_size > i_size_read ( inode ) )
2010-05-17 06:00:00 +04:00
ext4_set_inode_flag ( inode , EXT4_INODE_EOFBLOCKS ) ;
2008-04-29 16:11:12 +04:00
}
}
2007-07-18 05:42:41 +04:00
/*
2011-01-14 15:07:43 +03:00
* preallocate space for a file . This implements ext4 ' s fallocate file
2007-07-18 05:42:41 +04:00
* operation , which gets called from sys_fallocate system call .
* For block - mapped files , posix_fallocate should fall back to the method
* of writing zeroes to the required new blocks ( the same behavior which is
* expected for file systems which do not support fallocate ( ) system call ) .
*/
2011-01-14 15:07:43 +03:00
long ext4_fallocate ( struct file * file , int mode , loff_t offset , loff_t len )
2007-07-18 05:42:41 +04:00
{
2011-01-14 15:07:43 +03:00
struct inode * inode = file - > f_path . dentry - > d_inode ;
2007-07-18 05:42:41 +04:00
handle_t * handle ;
2008-04-29 16:11:12 +04:00
loff_t new_size ;
2008-11-05 08:14:04 +03:00
unsigned int max_blocks ;
2007-07-18 05:42:41 +04:00
int ret = 0 ;
int ret2 = 0 ;
int retries = 0 ;
2011-10-25 16:15:12 +04:00
int flags ;
2010-05-17 04:00:00 +04:00
struct ext4_map_blocks map ;
2007-07-18 05:42:41 +04:00
unsigned int credits , blkbits = inode - > i_blkbits ;
/*
* currently supporting ( pre ) allocate mode for extent - based
* files _only_
*/
2010-05-17 06:00:00 +04:00
if ( ! ( ext4_test_inode_flag ( inode , EXT4_INODE_EXTENTS ) ) )
2007-07-18 05:42:41 +04:00
return - EOPNOTSUPP ;
2011-05-25 15:41:50 +04:00
/* Return error if mode is not supported */
if ( mode & ~ ( FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE ) )
return - EOPNOTSUPP ;
if ( mode & FALLOC_FL_PUNCH_HOLE )
return ext4_punch_hole ( file , offset , len ) ;
2011-03-22 04:38:05 +03:00
trace_ext4_fallocate_enter ( inode , offset , len , mode ) ;
2010-05-17 04:00:00 +04:00
map . m_lblk = offset > > blkbits ;
2008-04-29 16:11:12 +04:00
/*
* We can ' t just convert len to max_blocks because
* If blocksize = 4096 offset = 3072 and len = 2048
*/
2007-07-18 05:42:41 +04:00
max_blocks = ( EXT4_BLOCK_ALIGN ( len + offset , blkbits ) > > blkbits )
2010-05-17 04:00:00 +04:00
- map . m_lblk ;
2007-07-18 05:42:41 +04:00
/*
2008-08-20 06:16:03 +04:00
* credits to insert 1 extent into extent tree
2007-07-18 05:42:41 +04:00
*/
2008-08-20 06:16:03 +04:00
credits = ext4_chunk_trans_blocks ( inode , max_blocks ) ;
2008-02-15 20:47:21 +03:00
mutex_lock ( & inode - > i_mutex ) ;
2010-05-16 22:00:00 +04:00
ret = inode_newsize_ok ( inode , ( len + offset ) ) ;
if ( ret ) {
mutex_unlock ( & inode - > i_mutex ) ;
2011-03-22 04:38:05 +03:00
trace_ext4_fallocate_exit ( inode , offset , max_blocks , ret ) ;
2010-05-16 22:00:00 +04:00
return ret ;
}
2011-11-01 02:41:47 +04:00
flags = EXT4_GET_BLOCKS_CREATE_UNINIT_EXT ;
2011-10-25 16:15:12 +04:00
if ( mode & FALLOC_FL_KEEP_SIZE )
flags | = EXT4_GET_BLOCKS_KEEP_SIZE ;
2011-11-01 02:41:47 +04:00
/*
* Don ' t normalize the request if it can fit in one extent so
* that it doesn ' t get unnecessarily split into multiple
* extents .
*/
if ( len < = EXT_UNINIT_MAX_LEN < < blkbits )
flags | = EXT4_GET_BLOCKS_NO_NORMALIZE ;
2007-07-18 05:42:41 +04:00
retry :
while ( ret > = 0 & & ret < max_blocks ) {
2010-05-17 04:00:00 +04:00
map . m_lblk = map . m_lblk + ret ;
map . m_len = max_blocks = max_blocks - ret ;
2007-07-18 05:42:41 +04:00
handle = ext4_journal_start ( inode , credits ) ;
if ( IS_ERR ( handle ) ) {
ret = PTR_ERR ( handle ) ;
break ;
}
2011-10-25 16:15:12 +04:00
ret = ext4_map_blocks ( handle , inode , & map , flags ) ;
2008-01-29 07:58:27 +03:00
if ( ret < = 0 ) {
2008-02-25 23:41:35 +03:00
# ifdef EXT4FS_DEBUG
WARN_ON ( ret < = 0 ) ;
2010-05-17 03:00:00 +04:00
printk ( KERN_ERR " %s: ext4_ext_map_blocks "
2008-02-25 23:41:35 +03:00
" returned error inode#%lu, block=%u, "
2009-01-27 03:26:26 +03:00
" max_blocks=%u " , __func__ ,
2010-10-28 05:30:15 +04:00
inode - > i_ino , map . m_lblk , max_blocks ) ;
2008-02-25 23:41:35 +03:00
# endif
2007-07-18 05:42:41 +04:00
ext4_mark_inode_dirty ( handle , inode ) ;
ret2 = ext4_journal_stop ( handle ) ;
break ;
}
2010-05-17 04:00:00 +04:00
if ( ( map . m_lblk + ret ) > = ( EXT4_BLOCK_ALIGN ( offset + len ,
2008-04-29 16:11:12 +04:00
blkbits ) > > blkbits ) )
new_size = offset + len ;
else
2011-07-28 06:11:20 +04:00
new_size = ( ( loff_t ) map . m_lblk + ret ) < < blkbits ;
2007-07-18 05:42:41 +04:00
2008-04-29 16:11:12 +04:00
ext4_falloc_update_inode ( inode , mode , new_size ,
2010-05-17 04:00:00 +04:00
( map . m_flags & EXT4_MAP_NEW ) ) ;
2007-07-18 05:42:41 +04:00
ext4_mark_inode_dirty ( handle , inode ) ;
ret2 = ext4_journal_stop ( handle ) ;
if ( ret2 )
break ;
}
2008-04-29 16:11:12 +04:00
if ( ret = = - ENOSPC & &
ext4_should_retry_alloc ( inode - > i_sb , & retries ) ) {
ret = 0 ;
2007-07-18 05:42:41 +04:00
goto retry ;
}
2008-02-15 20:47:21 +03:00
mutex_unlock ( & inode - > i_mutex ) ;
2011-03-22 04:38:05 +03:00
trace_ext4_fallocate_exit ( inode , offset , max_blocks ,
ret > 0 ? ret2 : ret ) ;
2007-07-18 05:42:41 +04:00
return ret > 0 ? ret2 : ret ;
}
2008-10-07 08:46:36 +04:00
2009-09-28 23:49:08 +04:00
/*
* This function convert a range of blocks to written extents
* The caller of this function will pass the start offset and the size .
* all unwritten extents within this range will be converted to
* written extents .
*
* This function is called from the direct IO end io call back
* function , to convert the fallocated extents after IO is completed .
2009-11-10 18:48:08 +03:00
* Returns 0 on success .
2009-09-28 23:49:08 +04:00
*/
int ext4_convert_unwritten_extents ( struct inode * inode , loff_t offset ,
2010-02-05 07:58:38 +03:00
ssize_t len )
2009-09-28 23:49:08 +04:00
{
handle_t * handle ;
unsigned int max_blocks ;
int ret = 0 ;
int ret2 = 0 ;
2010-05-17 04:00:00 +04:00
struct ext4_map_blocks map ;
2009-09-28 23:49:08 +04:00
unsigned int credits , blkbits = inode - > i_blkbits ;
2010-05-17 04:00:00 +04:00
map . m_lblk = offset > > blkbits ;
2009-09-28 23:49:08 +04:00
/*
* We can ' t just convert len to max_blocks because
* If blocksize = 4096 offset = 3072 and len = 2048
*/
2010-05-17 04:00:00 +04:00
max_blocks = ( ( EXT4_BLOCK_ALIGN ( len + offset , blkbits ) > > blkbits ) -
map . m_lblk ) ;
2009-09-28 23:49:08 +04:00
/*
* credits to insert 1 extent into extent tree
*/
credits = ext4_chunk_trans_blocks ( inode , max_blocks ) ;
while ( ret > = 0 & & ret < max_blocks ) {
2010-05-17 04:00:00 +04:00
map . m_lblk + = ret ;
map . m_len = ( max_blocks - = ret ) ;
2009-09-28 23:49:08 +04:00
handle = ext4_journal_start ( inode , credits ) ;
if ( IS_ERR ( handle ) ) {
ret = PTR_ERR ( handle ) ;
break ;
}
2010-05-17 04:00:00 +04:00
ret = ext4_map_blocks ( handle , inode , & map ,
2010-03-02 21:28:44 +03:00
EXT4_GET_BLOCKS_IO_CONVERT_EXT ) ;
2009-09-28 23:49:08 +04:00
if ( ret < = 0 ) {
WARN_ON ( ret < = 0 ) ;
2010-05-17 03:00:00 +04:00
printk ( KERN_ERR " %s: ext4_ext_map_blocks "
2009-09-28 23:49:08 +04:00
" returned error inode#%lu, block=%u, "
" max_blocks=%u " , __func__ ,
2010-05-17 04:00:00 +04:00
inode - > i_ino , map . m_lblk , map . m_len ) ;
2009-09-28 23:49:08 +04:00
}
ext4_mark_inode_dirty ( handle , inode ) ;
ret2 = ext4_journal_stop ( handle ) ;
if ( ret < = 0 | | ret2 )
break ;
}
return ret > 0 ? ret2 : ret ;
}
2011-02-28 01:25:47 +03:00
2008-10-07 08:46:36 +04:00
/*
* Callback function called for each extent to gather FIEMAP information .
*/
2011-06-06 08:06:52 +04:00
static int ext4_ext_fiemap_cb ( struct inode * inode , ext4_lblk_t next ,
2008-10-07 08:46:36 +04:00
struct ext4_ext_cache * newex , struct ext4_extent * ex ,
void * data )
{
__u64 logical ;
__u64 physical ;
__u64 length ;
__u32 flags = 0 ;
2011-02-28 01:25:47 +03:00
int ret = 0 ;
struct fiemap_extent_info * fieinfo = data ;
unsigned char blksize_bits ;
2008-10-07 08:46:36 +04:00
2011-02-28 01:25:47 +03:00
blksize_bits = inode - > i_sb - > s_blocksize_bits ;
logical = ( __u64 ) newex - > ec_block < < blksize_bits ;
2008-10-07 08:46:36 +04:00
2011-01-10 20:13:26 +03:00
if ( newex - > ec_start = = 0 ) {
2011-02-28 01:25:47 +03:00
/*
* No extent in extent - tree contains block @ newex - > ec_start ,
* then the block may stay in 1 ) a hole or 2 ) delayed - extent .
*
* Holes or delayed - extents are processed as follows .
* 1. lookup dirty pages with specified range in pagecache .
* If no page is got , then there is no delayed - extent and
* return with EXT_CONTINUE .
* 2. find the 1 st mapped buffer ,
* 3. check if the mapped buffer is both in the request range
* and a delayed buffer . If not , there is no delayed - extent ,
* then return .
* 4. a delayed - extent is found , the extent will be collected .
*/
ext4_lblk_t end = 0 ;
pgoff_t last_offset ;
pgoff_t offset ;
pgoff_t index ;
2011-05-24 19:36:58 +04:00
pgoff_t start_index = 0 ;
2011-02-28 01:25:47 +03:00
struct page * * pages = NULL ;
2008-10-07 08:46:36 +04:00
struct buffer_head * bh = NULL ;
2011-02-28 01:25:47 +03:00
struct buffer_head * head = NULL ;
unsigned int nr_pages = PAGE_SIZE / sizeof ( struct page * ) ;
pages = kmalloc ( PAGE_SIZE , GFP_KERNEL ) ;
if ( pages = = NULL )
return - ENOMEM ;
2008-10-07 08:46:36 +04:00
offset = logical > > PAGE_SHIFT ;
2011-02-28 01:25:47 +03:00
repeat :
last_offset = offset ;
head = NULL ;
ret = find_get_pages_tag ( inode - > i_mapping , & offset ,
PAGECACHE_TAG_DIRTY , nr_pages , pages ) ;
if ( ! ( flags & FIEMAP_EXTENT_DELALLOC ) ) {
/* First time, try to find a mapped buffer. */
if ( ret = = 0 ) {
out :
for ( index = 0 ; index < ret ; index + + )
page_cache_release ( pages [ index ] ) ;
/* just a hole. */
kfree ( pages ) ;
return EXT_CONTINUE ;
}
2011-05-24 19:36:58 +04:00
index = 0 ;
2008-10-07 08:46:36 +04:00
2011-05-24 19:36:58 +04:00
next_page :
2011-02-28 01:25:47 +03:00
/* Try to find the 1st mapped buffer. */
2011-05-24 19:36:58 +04:00
end = ( ( __u64 ) pages [ index ] - > index < < PAGE_SHIFT ) > >
2011-02-28 01:25:47 +03:00
blksize_bits ;
2011-05-24 19:36:58 +04:00
if ( ! page_has_buffers ( pages [ index ] ) )
2011-02-28 01:25:47 +03:00
goto out ;
2011-05-24 19:36:58 +04:00
head = page_buffers ( pages [ index ] ) ;
2011-02-28 01:25:47 +03:00
if ( ! head )
goto out ;
2008-10-07 08:46:36 +04:00
2011-05-24 19:36:58 +04:00
index + + ;
2011-02-28 01:25:47 +03:00
bh = head ;
do {
2011-05-24 19:36:58 +04:00
if ( end > = newex - > ec_block +
newex - > ec_len )
/* The buffer is out of
* the request range .
*/
goto out ;
if ( buffer_mapped ( bh ) & &
end > = newex - > ec_block ) {
start_index = index - 1 ;
2011-02-28 01:25:47 +03:00
/* get the 1st mapped buffer. */
goto found_mapped_buffer ;
}
2011-05-24 19:36:58 +04:00
2011-02-28 01:25:47 +03:00
bh = bh - > b_this_page ;
end + + ;
} while ( bh ! = head ) ;
2008-10-07 08:46:36 +04:00
2011-05-24 19:36:58 +04:00
/* No mapped buffer in the range found in this page,
* We need to look up next page .
*/
if ( index > = ret ) {
/* There is no page left, but we need to limit
* newex - > ec_len .
*/
newex - > ec_len = end - newex - > ec_block ;
goto out ;
}
goto next_page ;
2008-10-07 08:46:36 +04:00
} else {
2011-02-28 01:25:47 +03:00
/*Find contiguous delayed buffers. */
if ( ret > 0 & & pages [ 0 ] - > index = = last_offset )
head = page_buffers ( pages [ 0 ] ) ;
bh = head ;
2011-05-24 19:36:58 +04:00
index = 1 ;
start_index = 0 ;
2008-10-07 08:46:36 +04:00
}
2011-02-28 01:25:47 +03:00
found_mapped_buffer :
if ( bh ! = NULL & & buffer_delay ( bh ) ) {
/* 1st or contiguous delayed buffer found. */
if ( ! ( flags & FIEMAP_EXTENT_DELALLOC ) ) {
/*
* 1 st delayed buffer found , record
* the start of extent .
*/
flags | = FIEMAP_EXTENT_DELALLOC ;
newex - > ec_block = end ;
logical = ( __u64 ) end < < blksize_bits ;
}
/* Find contiguous delayed buffers. */
do {
if ( ! buffer_delay ( bh ) )
goto found_delayed_extent ;
bh = bh - > b_this_page ;
end + + ;
} while ( bh ! = head ) ;
2011-05-24 19:36:58 +04:00
for ( ; index < ret ; index + + ) {
2011-02-28 01:25:47 +03:00
if ( ! page_has_buffers ( pages [ index ] ) ) {
bh = NULL ;
break ;
}
head = page_buffers ( pages [ index ] ) ;
if ( ! head ) {
bh = NULL ;
break ;
}
2011-05-24 19:36:58 +04:00
2011-02-28 01:25:47 +03:00
if ( pages [ index ] - > index ! =
2011-05-24 19:36:58 +04:00
pages [ start_index ] - > index + index
- start_index ) {
2011-02-28 01:25:47 +03:00
/* Blocks are not contiguous. */
bh = NULL ;
break ;
}
bh = head ;
do {
if ( ! buffer_delay ( bh ) )
/* Delayed-extent ends. */
goto found_delayed_extent ;
bh = bh - > b_this_page ;
end + + ;
} while ( bh ! = head ) ;
}
} else if ( ! ( flags & FIEMAP_EXTENT_DELALLOC ) )
/* a hole found. */
goto out ;
found_delayed_extent :
newex - > ec_len = min ( end - newex - > ec_block ,
( ext4_lblk_t ) EXT_INIT_MAX_LEN ) ;
if ( ret = = nr_pages & & bh ! = NULL & &
newex - > ec_len < EXT_INIT_MAX_LEN & &
buffer_delay ( bh ) ) {
/* Have not collected an extent and continue. */
for ( index = 0 ; index < ret ; index + + )
page_cache_release ( pages [ index ] ) ;
goto repeat ;
2008-10-07 08:46:36 +04:00
}
2011-02-28 01:25:47 +03:00
for ( index = 0 ; index < ret ; index + + )
page_cache_release ( pages [ index ] ) ;
kfree ( pages ) ;
2008-10-07 08:46:36 +04:00
}
physical = ( __u64 ) newex - > ec_start < < blksize_bits ;
length = ( __u64 ) newex - > ec_len < < blksize_bits ;
if ( ex & & ext4_ext_is_uninitialized ( ex ) )
flags | = FIEMAP_EXTENT_UNWRITTEN ;
2011-06-06 08:06:52 +04:00
if ( next = = EXT_MAX_BLOCKS )
2008-10-07 08:46:36 +04:00
flags | = FIEMAP_EXTENT_LAST ;
2011-02-28 01:25:47 +03:00
ret = fiemap_fill_next_extent ( fieinfo , logical , physical ,
2008-10-07 08:46:36 +04:00
length , flags ) ;
2011-02-28 01:25:47 +03:00
if ( ret < 0 )
return ret ;
if ( ret = = 1 )
2008-10-07 08:46:36 +04:00
return EXT_BREAK ;
return EXT_CONTINUE ;
}
/* fiemap flags we can handle specified here */
# define EXT4_FIEMAP_FLAGS (FIEMAP_FLAG_SYNC|FIEMAP_FLAG_XATTR)
2008-11-22 23:04:59 +03:00
static int ext4_xattr_fiemap ( struct inode * inode ,
struct fiemap_extent_info * fieinfo )
2008-10-07 08:46:36 +04:00
{
__u64 physical = 0 ;
__u64 length ;
__u32 flags = FIEMAP_EXTENT_LAST ;
int blockbits = inode - > i_sb - > s_blocksize_bits ;
int error = 0 ;
/* in-inode? */
2010-01-24 22:34:07 +03:00
if ( ext4_test_inode_state ( inode , EXT4_STATE_XATTR ) ) {
2008-10-07 08:46:36 +04:00
struct ext4_iloc iloc ;
int offset ; /* offset of xattr in inode */
error = ext4_get_inode_loc ( inode , & iloc ) ;
if ( error )
return error ;
physical = iloc . bh - > b_blocknr < < blockbits ;
offset = EXT4_GOOD_OLD_INODE_SIZE +
EXT4_I ( inode ) - > i_extra_isize ;
physical + = offset ;
length = EXT4_SB ( inode - > i_sb ) - > s_inode_size - offset ;
flags | = FIEMAP_EXTENT_DATA_INLINE ;
2010-04-04 01:44:16 +04:00
brelse ( iloc . bh ) ;
2008-10-07 08:46:36 +04:00
} else { /* external block */
physical = EXT4_I ( inode ) - > i_file_acl < < blockbits ;
length = inode - > i_sb - > s_blocksize ;
}
if ( physical )
error = fiemap_fill_next_extent ( fieinfo , 0 , physical ,
length , flags ) ;
return ( error < 0 ? error : 0 ) ;
}
2011-05-25 15:41:50 +04:00
/*
* ext4_ext_punch_hole
*
* Punches a hole of " length " bytes in a file starting
* at byte " offset "
*
* @ inode : The inode of the file to punch a hole in
* @ offset : The starting byte offset of the hole
* @ length : The length of the hole
*
* Returns the number of blocks removed or negative on err
*/
int ext4_ext_punch_hole ( struct file * file , loff_t offset , loff_t length )
{
struct inode * inode = file - > f_path . dentry - > d_inode ;
struct super_block * sb = inode - > i_sb ;
struct ext4_ext_cache cache_ex ;
ext4_lblk_t first_block , last_block , num_blocks , iblock , max_blocks ;
struct address_space * mapping = inode - > i_mapping ;
struct ext4_map_blocks map ;
handle_t * handle ;
2011-09-03 19:55:59 +04:00
loff_t first_page , last_page , page_len ;
loff_t first_page_offset , last_page_offset ;
2011-05-25 15:41:50 +04:00
int ret , credits , blocks_released , err = 0 ;
2011-09-03 19:56:52 +04:00
/* No need to punch hole beyond i_size */
if ( offset > = inode - > i_size )
return 0 ;
/*
* If the hole extends beyond i_size , set the hole
* to end after the page that contains i_size
*/
if ( offset + length > inode - > i_size ) {
length = inode - > i_size +
PAGE_CACHE_SIZE - ( inode - > i_size & ( PAGE_CACHE_SIZE - 1 ) ) -
offset ;
}
2011-05-25 15:41:50 +04:00
first_block = ( offset + sb - > s_blocksize - 1 ) > >
EXT4_BLOCK_SIZE_BITS ( sb ) ;
last_block = ( offset + length ) > > EXT4_BLOCK_SIZE_BITS ( sb ) ;
first_page = ( offset + PAGE_CACHE_SIZE - 1 ) > > PAGE_CACHE_SHIFT ;
last_page = ( offset + length ) > > PAGE_CACHE_SHIFT ;
first_page_offset = first_page < < PAGE_CACHE_SHIFT ;
last_page_offset = last_page < < PAGE_CACHE_SHIFT ;
/*
* Write out all dirty pages to avoid race conditions
* Then release them .
*/
if ( mapping - > nrpages & & mapping_tagged ( mapping , PAGECACHE_TAG_DIRTY ) ) {
err = filemap_write_and_wait_range ( mapping ,
2011-09-03 19:56:52 +04:00
offset , offset + length - 1 ) ;
2011-05-25 15:41:50 +04:00
2011-09-03 19:56:52 +04:00
if ( err )
return err ;
2011-05-25 15:41:50 +04:00
}
/* Now release the pages */
if ( last_page_offset > first_page_offset ) {
truncate_inode_pages_range ( mapping , first_page_offset ,
last_page_offset - 1 ) ;
}
/* finish any pending end_io work */
ext4_flush_completed_IO ( inode ) ;
credits = ext4_writepage_trans_blocks ( inode ) ;
handle = ext4_journal_start ( inode , credits ) ;
if ( IS_ERR ( handle ) )
return PTR_ERR ( handle ) ;
err = ext4_orphan_add ( handle , inode ) ;
if ( err )
goto out ;
/*
2011-09-03 19:55:59 +04:00
* Now we need to zero out the non - page - aligned data in the
* pages at the start and tail of the hole , and unmap the buffer
* heads for the block aligned regions of the page that were
* completely zeroed .
2011-05-25 15:41:50 +04:00
*/
2011-09-03 19:55:59 +04:00
if ( first_page > last_page ) {
/*
* If the file space being truncated is contained within a page
* just zero out and unmap the middle of that page
*/
err = ext4_discard_partial_page_buffers ( handle ,
mapping , offset , length , 0 ) ;
if ( err )
goto out ;
} else {
/*
* zero out and unmap the partial page that contains
* the start of the hole
*/
page_len = first_page_offset - offset ;
if ( page_len > 0 ) {
err = ext4_discard_partial_page_buffers ( handle , mapping ,
offset , page_len , 0 ) ;
if ( err )
goto out ;
}
/*
* zero out and unmap the partial page that contains
* the end of the hole
*/
page_len = offset + length - last_page_offset ;
if ( page_len > 0 ) {
err = ext4_discard_partial_page_buffers ( handle , mapping ,
last_page_offset , page_len , 0 ) ;
if ( err )
goto out ;
2011-05-25 15:41:50 +04:00
}
}
2011-09-03 19:56:52 +04:00
/*
* If i_size is contained in the last page , we need to
* unmap and zero the partial page after i_size
*/
if ( inode - > i_size > > PAGE_CACHE_SHIFT = = last_page & &
inode - > i_size % PAGE_CACHE_SIZE ! = 0 ) {
page_len = PAGE_CACHE_SIZE -
( inode - > i_size & ( PAGE_CACHE_SIZE - 1 ) ) ;
if ( page_len > 0 ) {
err = ext4_discard_partial_page_buffers ( handle ,
mapping , inode - > i_size , page_len , 0 ) ;
if ( err )
goto out ;
}
}
2011-05-25 15:41:50 +04:00
/* If there are no blocks to remove, return now */
if ( first_block > = last_block )
goto out ;
down_write ( & EXT4_I ( inode ) - > i_data_sem ) ;
ext4_ext_invalidate_cache ( inode ) ;
ext4_discard_preallocations ( inode ) ;
/*
* Loop over all the blocks and identify blocks
* that need to be punched out
*/
iblock = first_block ;
blocks_released = 0 ;
while ( iblock < last_block ) {
max_blocks = last_block - iblock ;
num_blocks = 1 ;
memset ( & map , 0 , sizeof ( map ) ) ;
map . m_lblk = iblock ;
map . m_len = max_blocks ;
ret = ext4_ext_map_blocks ( handle , inode , & map ,
EXT4_GET_BLOCKS_PUNCH_OUT_EXT ) ;
if ( ret > 0 ) {
blocks_released + = ret ;
num_blocks = ret ;
} else if ( ret = = 0 ) {
/*
* If map blocks could not find the block ,
* then it is in a hole . If the hole was
* not already cached , then map blocks should
* put it in the cache . So we can get the hole
* out of the cache
*/
memset ( & cache_ex , 0 , sizeof ( cache_ex ) ) ;
if ( ( ext4_ext_check_cache ( inode , iblock , & cache_ex ) ) & &
! cache_ex . ec_start ) {
/* The hole is cached */
num_blocks = cache_ex . ec_block +
cache_ex . ec_len - iblock ;
} else {
/* The block could not be identified */
err = - EIO ;
break ;
}
} else {
/* Map blocks error */
err = ret ;
break ;
}
if ( num_blocks = = 0 ) {
/* This condition should never happen */
ext_debug ( " Block lookup failed " ) ;
err = - EIO ;
break ;
}
iblock + = num_blocks ;
}
if ( blocks_released > 0 ) {
ext4_ext_invalidate_cache ( inode ) ;
ext4_discard_preallocations ( inode ) ;
}
if ( IS_SYNC ( inode ) )
ext4_handle_sync ( handle ) ;
up_write ( & EXT4_I ( inode ) - > i_data_sem ) ;
out :
ext4_orphan_del ( handle , inode ) ;
inode - > i_mtime = inode - > i_ctime = ext4_current_time ( inode ) ;
ext4_mark_inode_dirty ( handle , inode ) ;
ext4_journal_stop ( handle ) ;
return err ;
}
2008-10-07 08:46:36 +04:00
int ext4_fiemap ( struct inode * inode , struct fiemap_extent_info * fieinfo ,
__u64 start , __u64 len )
{
ext4_lblk_t start_blk ;
int error = 0 ;
/* fallback to generic here if not in extents fmt */
2010-05-17 06:00:00 +04:00
if ( ! ( ext4_test_inode_flag ( inode , EXT4_INODE_EXTENTS ) ) )
2008-10-07 08:46:36 +04:00
return generic_block_fiemap ( inode , fieinfo , start , len ,
ext4_get_block ) ;
if ( fiemap_check_flags ( fieinfo , EXT4_FIEMAP_FLAGS ) )
return - EBADR ;
if ( fieinfo - > fi_flags & FIEMAP_FLAG_XATTR ) {
error = ext4_xattr_fiemap ( inode , fieinfo ) ;
} else {
2010-03-05 01:07:28 +03:00
ext4_lblk_t len_blks ;
__u64 last_blk ;
2008-10-07 08:46:36 +04:00
start_blk = start > > inode - > i_sb - > s_blocksize_bits ;
2010-03-05 01:07:28 +03:00
last_blk = ( start + len - 1 ) > > inode - > i_sb - > s_blocksize_bits ;
2011-06-06 08:05:17 +04:00
if ( last_blk > = EXT_MAX_BLOCKS )
last_blk = EXT_MAX_BLOCKS - 1 ;
2010-03-05 01:07:28 +03:00
len_blks = ( ( ext4_lblk_t ) last_blk ) - start_blk + 1 ;
2008-10-07 08:46:36 +04:00
/*
* Walk the extent tree gathering extent information .
* ext4_ext_fiemap_cb will push extents back to user .
*/
error = ext4_ext_walk_space ( inode , start_blk , len_blks ,
ext4_ext_fiemap_cb , fieinfo ) ;
}
return error ;
}