2005-12-16 01:31:24 +03:00
/* -*- mode: c; c-basic-offset: 8; -*-
* vim : noexpandtab sw = 8 ts = 8 sts = 0 :
*
* extent_map . c
*
2007-01-17 23:31:35 +03:00
* Block / Cluster mapping functions
2005-12-16 01:31:24 +03:00
*
* Copyright ( C ) 2004 Oracle . All rights reserved .
*
* This program is free software ; you can redistribute it and / or
* modify it under the terms of the GNU General Public
* License , version 2 , as published by the Free Software Foundation .
*
* This program is distributed in the hope that it will be useful ,
* but WITHOUT ANY WARRANTY ; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the GNU
* General Public License for more details .
*
* You should have received a copy of the GNU General Public
* License along with this program ; if not , write to the
* Free Software Foundation , Inc . , 59 Temple Place - Suite 330 ,
* Boston , MA 021110 - 1307 , USA .
*/
# include <linux/fs.h>
# include <linux/init.h>
# include <linux/types.h>
# define MLOG_MASK_PREFIX ML_EXTENT_MAP
# include <cluster/masklog.h>
# include "ocfs2.h"
2007-01-17 23:31:35 +03:00
# include "alloc.h"
2005-12-16 01:31:24 +03:00
# include "extent_map.h"
# include "inode.h"
# include "super.h"
# include "buffer_head_io.h"
2007-04-24 05:53:12 +04:00
/*
* The extent caching implementation is intentionally trivial .
*
* We only cache a small number of extents stored directly on the
* inode , so linear order operations are acceptable . If we ever want
* to increase the size of the extent map , then these algorithms must
* get smarter .
*/
void ocfs2_extent_map_init ( struct inode * inode )
{
struct ocfs2_inode_info * oi = OCFS2_I ( inode ) ;
oi - > ip_extent_map . em_num_items = 0 ;
INIT_LIST_HEAD ( & oi - > ip_extent_map . em_list ) ;
}
static void __ocfs2_extent_map_lookup ( struct ocfs2_extent_map * em ,
unsigned int cpos ,
struct ocfs2_extent_map_item * * ret_emi )
{
unsigned int range ;
struct ocfs2_extent_map_item * emi ;
* ret_emi = NULL ;
list_for_each_entry ( emi , & em - > em_list , ei_list ) {
range = emi - > ei_cpos + emi - > ei_clusters ;
if ( cpos > = emi - > ei_cpos & & cpos < range ) {
list_move ( & emi - > ei_list , & em - > em_list ) ;
* ret_emi = emi ;
break ;
}
}
}
static int ocfs2_extent_map_lookup ( struct inode * inode , unsigned int cpos ,
unsigned int * phys , unsigned int * len ,
unsigned int * flags )
{
unsigned int coff ;
struct ocfs2_inode_info * oi = OCFS2_I ( inode ) ;
struct ocfs2_extent_map_item * emi ;
spin_lock ( & oi - > ip_lock ) ;
__ocfs2_extent_map_lookup ( & oi - > ip_extent_map , cpos , & emi ) ;
if ( emi ) {
coff = cpos - emi - > ei_cpos ;
* phys = emi - > ei_phys + coff ;
if ( len )
* len = emi - > ei_clusters - coff ;
if ( flags )
* flags = emi - > ei_flags ;
}
spin_unlock ( & oi - > ip_lock ) ;
if ( emi = = NULL )
return - ENOENT ;
return 0 ;
}
/*
* Forget about all clusters equal to or greater than cpos .
*/
void ocfs2_extent_map_trunc ( struct inode * inode , unsigned int cpos )
{
2007-05-17 18:03:13 +04:00
struct ocfs2_extent_map_item * emi , * n ;
2007-04-24 05:53:12 +04:00
struct ocfs2_inode_info * oi = OCFS2_I ( inode ) ;
struct ocfs2_extent_map * em = & oi - > ip_extent_map ;
LIST_HEAD ( tmp_list ) ;
unsigned int range ;
spin_lock ( & oi - > ip_lock ) ;
2007-05-17 18:03:13 +04:00
list_for_each_entry_safe ( emi , n , & em - > em_list , ei_list ) {
2007-04-24 05:53:12 +04:00
if ( emi - > ei_cpos > = cpos ) {
/* Full truncate of this record. */
list_move ( & emi - > ei_list , & tmp_list ) ;
BUG_ON ( em - > em_num_items = = 0 ) ;
em - > em_num_items - - ;
continue ;
}
range = emi - > ei_cpos + emi - > ei_clusters ;
if ( range > cpos ) {
/* Partial truncate */
emi - > ei_clusters = cpos - emi - > ei_cpos ;
}
}
spin_unlock ( & oi - > ip_lock ) ;
2007-05-17 18:03:13 +04:00
list_for_each_entry_safe ( emi , n , & tmp_list , ei_list ) {
2007-04-24 05:53:12 +04:00
list_del ( & emi - > ei_list ) ;
kfree ( emi ) ;
}
}
/*
* Is any part of emi2 contained within emi1
*/
static int ocfs2_ei_is_contained ( struct ocfs2_extent_map_item * emi1 ,
struct ocfs2_extent_map_item * emi2 )
{
unsigned int range1 , range2 ;
/*
* Check if logical start of emi2 is inside emi1
*/
range1 = emi1 - > ei_cpos + emi1 - > ei_clusters ;
if ( emi2 - > ei_cpos > = emi1 - > ei_cpos & & emi2 - > ei_cpos < range1 )
return 1 ;
/*
* Check if logical end of emi2 is inside emi1
*/
range2 = emi2 - > ei_cpos + emi2 - > ei_clusters ;
if ( range2 > emi1 - > ei_cpos & & range2 < = range1 )
return 1 ;
return 0 ;
}
static void ocfs2_copy_emi_fields ( struct ocfs2_extent_map_item * dest ,
struct ocfs2_extent_map_item * src )
{
dest - > ei_cpos = src - > ei_cpos ;
dest - > ei_phys = src - > ei_phys ;
dest - > ei_clusters = src - > ei_clusters ;
dest - > ei_flags = src - > ei_flags ;
}
/*
* Try to merge emi with ins . Returns 1 if merge succeeds , zero
* otherwise .
*/
static int ocfs2_try_to_merge_extent_map ( struct ocfs2_extent_map_item * emi ,
struct ocfs2_extent_map_item * ins )
{
/*
* Handle contiguousness
*/
if ( ins - > ei_phys = = ( emi - > ei_phys + emi - > ei_clusters ) & &
ins - > ei_cpos = = ( emi - > ei_cpos + emi - > ei_clusters ) & &
ins - > ei_flags = = emi - > ei_flags ) {
emi - > ei_clusters + = ins - > ei_clusters ;
return 1 ;
} else if ( ( ins - > ei_phys + ins - > ei_clusters ) = = emi - > ei_phys & &
( ins - > ei_cpos + ins - > ei_clusters ) = = emi - > ei_phys & &
ins - > ei_flags = = emi - > ei_flags ) {
emi - > ei_phys = ins - > ei_phys ;
emi - > ei_cpos = ins - > ei_cpos ;
emi - > ei_clusters + = ins - > ei_clusters ;
return 1 ;
}
/*
* Overlapping extents - this shouldn ' t happen unless we ' ve
* split an extent to change it ' s flags . That is exceedingly
* rare , so there ' s no sense in trying to optimize it yet .
*/
if ( ocfs2_ei_is_contained ( emi , ins ) | |
ocfs2_ei_is_contained ( ins , emi ) ) {
ocfs2_copy_emi_fields ( emi , ins ) ;
return 1 ;
}
/* No merge was possible. */
return 0 ;
}
/*
* In order to reduce complexity on the caller , this insert function
* is intentionally liberal in what it will accept .
*
* The only rule is that the truncate call * must * be used whenever
* records have been deleted . This avoids inserting overlapping
* records with different physical mappings .
*/
void ocfs2_extent_map_insert_rec ( struct inode * inode ,
struct ocfs2_extent_rec * rec )
{
struct ocfs2_inode_info * oi = OCFS2_I ( inode ) ;
struct ocfs2_extent_map * em = & oi - > ip_extent_map ;
struct ocfs2_extent_map_item * emi , * new_emi = NULL ;
struct ocfs2_extent_map_item ins ;
ins . ei_cpos = le32_to_cpu ( rec - > e_cpos ) ;
ins . ei_phys = ocfs2_blocks_to_clusters ( inode - > i_sb ,
le64_to_cpu ( rec - > e_blkno ) ) ;
ins . ei_clusters = le16_to_cpu ( rec - > e_leaf_clusters ) ;
ins . ei_flags = rec - > e_flags ;
search :
spin_lock ( & oi - > ip_lock ) ;
list_for_each_entry ( emi , & em - > em_list , ei_list ) {
if ( ocfs2_try_to_merge_extent_map ( emi , & ins ) ) {
list_move ( & emi - > ei_list , & em - > em_list ) ;
spin_unlock ( & oi - > ip_lock ) ;
goto out ;
}
}
/*
* No item could be merged .
*
* Either allocate and add a new item , or overwrite the last recently
* inserted .
*/
if ( em - > em_num_items < OCFS2_MAX_EXTENT_MAP_ITEMS ) {
if ( new_emi = = NULL ) {
spin_unlock ( & oi - > ip_lock ) ;
new_emi = kmalloc ( sizeof ( * new_emi ) , GFP_NOFS ) ;
if ( new_emi = = NULL )
goto out ;
goto search ;
}
ocfs2_copy_emi_fields ( new_emi , & ins ) ;
list_add ( & new_emi - > ei_list , & em - > em_list ) ;
em - > em_num_items + + ;
new_emi = NULL ;
} else {
BUG_ON ( list_empty ( & em - > em_list ) | | em - > em_num_items = = 0 ) ;
emi = list_entry ( em - > em_list . prev ,
struct ocfs2_extent_map_item , ei_list ) ;
list_move ( & emi - > ei_list , & em - > em_list ) ;
ocfs2_copy_emi_fields ( emi , & ins ) ;
}
spin_unlock ( & oi - > ip_lock ) ;
out :
if ( new_emi )
kfree ( new_emi ) ;
}
2007-03-10 03:26:50 +03:00
/*
* Return the 1 st index within el which contains an extent start
* larger than v_cluster .
*/
static int ocfs2_search_for_hole_index ( struct ocfs2_extent_list * el ,
u32 v_cluster )
{
int i ;
struct ocfs2_extent_rec * rec ;
for ( i = 0 ; i < le16_to_cpu ( el - > l_next_free_rec ) ; i + + ) {
rec = & el - > l_recs [ i ] ;
if ( v_cluster < le32_to_cpu ( rec - > e_cpos ) )
break ;
}
return i ;
}
/*
* Figure out the size of a hole which starts at v_cluster within the given
* extent list .
*
* If there is no more allocation past v_cluster , we return the maximum
* cluster size minus v_cluster .
*
* If we have in - inode extents , then el points to the dinode list and
* eb_bh is NULL . Otherwise , eb_bh should point to the extent block
* containing el .
*/
static int ocfs2_figure_hole_clusters ( struct inode * inode ,
struct ocfs2_extent_list * el ,
struct buffer_head * eb_bh ,
u32 v_cluster ,
u32 * num_clusters )
{
int ret , i ;
struct buffer_head * next_eb_bh = NULL ;
struct ocfs2_extent_block * eb , * next_eb ;
i = ocfs2_search_for_hole_index ( el , v_cluster ) ;
if ( i = = le16_to_cpu ( el - > l_next_free_rec ) & & eb_bh ) {
eb = ( struct ocfs2_extent_block * ) eb_bh - > b_data ;
/*
* Check the next leaf for any extents .
*/
if ( le64_to_cpu ( eb - > h_next_leaf_blk ) = = 0ULL )
goto no_more_extents ;
ret = ocfs2_read_block ( OCFS2_SB ( inode - > i_sb ) ,
le64_to_cpu ( eb - > h_next_leaf_blk ) ,
& next_eb_bh , OCFS2_BH_CACHED , inode ) ;
if ( ret ) {
mlog_errno ( ret ) ;
goto out ;
}
next_eb = ( struct ocfs2_extent_block * ) next_eb_bh - > b_data ;
if ( ! OCFS2_IS_VALID_EXTENT_BLOCK ( next_eb ) ) {
ret = - EROFS ;
OCFS2_RO_ON_INVALID_EXTENT_BLOCK ( inode - > i_sb , next_eb ) ;
goto out ;
}
el = & next_eb - > h_list ;
i = ocfs2_search_for_hole_index ( el , v_cluster ) ;
}
no_more_extents :
if ( i = = le16_to_cpu ( el - > l_next_free_rec ) ) {
/*
* We ' re at the end of our existing allocation . Just
* return the maximum number of clusters we could
* possibly allocate .
*/
* num_clusters = UINT_MAX - v_cluster ;
} else {
* num_clusters = le32_to_cpu ( el - > l_recs [ i ] . e_cpos ) - v_cluster ;
}
ret = 0 ;
out :
brelse ( next_eb_bh ) ;
return ret ;
}
2007-02-10 07:24:12 +03:00
int ocfs2_get_clusters ( struct inode * inode , u32 v_cluster ,
2007-03-10 03:21:46 +03:00
u32 * p_cluster , u32 * num_clusters ,
unsigned int * extent_flags )
2005-12-16 01:31:24 +03:00
{
2007-01-17 23:31:35 +03:00
int ret , i ;
2007-03-10 03:21:46 +03:00
unsigned int flags = 0 ;
2007-01-17 23:31:35 +03:00
struct buffer_head * di_bh = NULL ;
struct buffer_head * eb_bh = NULL ;
2005-12-16 01:31:24 +03:00
struct ocfs2_dinode * di ;
2007-01-17 23:31:35 +03:00
struct ocfs2_extent_block * eb ;
2005-12-16 01:31:24 +03:00
struct ocfs2_extent_list * el ;
2007-01-17 23:31:35 +03:00
struct ocfs2_extent_rec * rec ;
u32 coff ;
2005-12-16 01:31:24 +03:00
2007-09-08 01:05:51 +04:00
if ( OCFS2_I ( inode ) - > ip_dyn_features & OCFS2_INLINE_DATA_FL ) {
ret = - ERANGE ;
mlog_errno ( ret ) ;
goto out ;
}
2007-04-24 05:53:12 +04:00
ret = ocfs2_extent_map_lookup ( inode , v_cluster , p_cluster ,
num_clusters , extent_flags ) ;
if ( ret = = 0 )
goto out ;
2007-01-17 23:31:35 +03:00
ret = ocfs2_read_block ( OCFS2_SB ( inode - > i_sb ) , OCFS2_I ( inode ) - > ip_blkno ,
& di_bh , OCFS2_BH_CACHED , inode ) ;
2005-12-16 01:31:24 +03:00
if ( ret ) {
mlog_errno ( ret ) ;
2007-01-17 23:31:35 +03:00
goto out ;
2005-12-16 01:31:24 +03:00
}
2007-01-17 23:31:35 +03:00
di = ( struct ocfs2_dinode * ) di_bh - > b_data ;
el = & di - > id2 . i_list ;
2005-12-16 01:31:24 +03:00
2007-01-17 23:31:35 +03:00
if ( el - > l_tree_depth ) {
ret = ocfs2_find_leaf ( inode , el , v_cluster , & eb_bh ) ;
if ( ret ) {
mlog_errno ( ret ) ;
goto out ;
}
2005-12-16 01:31:24 +03:00
2007-01-17 23:31:35 +03:00
eb = ( struct ocfs2_extent_block * ) eb_bh - > b_data ;
el = & eb - > h_list ;
2007-03-08 03:46:57 +03:00
if ( el - > l_tree_depth ) {
ocfs2_error ( inode - > i_sb ,
" Inode %lu has non zero tree depth in "
" leaf block %llu \n " , inode - > i_ino ,
( unsigned long long ) eb_bh - > b_blocknr ) ;
ret = - EROFS ;
goto out ;
}
2006-04-28 03:36:14 +04:00
}
2005-12-16 01:31:24 +03:00
2007-01-17 23:31:35 +03:00
i = ocfs2_search_extent_list ( el , v_cluster ) ;
if ( i = = - 1 ) {
2006-04-28 03:36:14 +04:00
/*
2007-01-17 23:31:35 +03:00
* A hole was found . Return some canned values that
2007-03-10 03:26:50 +03:00
* callers can key on . If asked for , num_clusters will
* be populated with the size of the hole .
2006-04-28 03:36:14 +04:00
*/
2007-01-17 23:31:35 +03:00
* p_cluster = 0 ;
2007-03-10 03:26:50 +03:00
if ( num_clusters ) {
ret = ocfs2_figure_hole_clusters ( inode , el , eb_bh ,
v_cluster ,
num_clusters ) ;
if ( ret ) {
mlog_errno ( ret ) ;
goto out ;
}
}
2007-01-17 23:31:35 +03:00
} else {
rec = & el - > l_recs [ i ] ;
2005-12-16 01:31:24 +03:00
2007-01-17 23:31:35 +03:00
BUG_ON ( v_cluster < le32_to_cpu ( rec - > e_cpos ) ) ;
2005-12-16 01:31:24 +03:00
2007-01-17 23:31:35 +03:00
if ( ! rec - > e_blkno ) {
ocfs2_error ( inode - > i_sb , " Inode %lu has bad extent "
" record (%u, %u, 0) " , inode - > i_ino ,
le32_to_cpu ( rec - > e_cpos ) ,
2007-03-08 03:46:57 +03:00
ocfs2_rec_clusters ( el , rec ) ) ;
2007-01-17 23:31:35 +03:00
ret = - EROFS ;
goto out ;
2005-12-16 01:31:24 +03:00
}
2007-01-17 23:31:35 +03:00
coff = v_cluster - le32_to_cpu ( rec - > e_cpos ) ;
2005-12-16 01:31:24 +03:00
2007-01-17 23:31:35 +03:00
* p_cluster = ocfs2_blocks_to_clusters ( inode - > i_sb ,
le64_to_cpu ( rec - > e_blkno ) ) ;
* p_cluster = * p_cluster + coff ;
2005-12-16 01:31:24 +03:00
2007-01-17 23:31:35 +03:00
if ( num_clusters )
2007-03-08 03:46:57 +03:00
* num_clusters = ocfs2_rec_clusters ( el , rec ) - coff ;
2007-03-10 03:21:46 +03:00
flags = rec - > e_flags ;
2007-04-24 05:53:12 +04:00
ocfs2_extent_map_insert_rec ( inode , rec ) ;
2005-12-16 01:31:24 +03:00
}
2007-03-10 03:21:46 +03:00
if ( extent_flags )
* extent_flags = flags ;
2007-01-17 23:31:35 +03:00
out :
brelse ( di_bh ) ;
brelse ( eb_bh ) ;
2005-12-16 01:31:24 +03:00
return ret ;
}
/*
2007-01-17 23:31:35 +03:00
* This expects alloc_sem to be held . The allocation cannot change at
* all while the map is in the process of being updated .
2005-12-16 01:31:24 +03:00
*/
2007-01-17 23:31:35 +03:00
int ocfs2_extent_map_get_blocks ( struct inode * inode , u64 v_blkno , u64 * p_blkno ,
2007-03-10 03:26:50 +03:00
u64 * ret_count , unsigned int * extent_flags )
2005-12-16 01:31:24 +03:00
{
int ret ;
int bpc = ocfs2_clusters_to_blocks ( inode - > i_sb , 1 ) ;
2007-01-17 23:31:35 +03:00
u32 cpos , num_clusters , p_cluster ;
u64 boff = 0 ;
2005-12-16 01:31:24 +03:00
cpos = ocfs2_blocks_to_clusters ( inode - > i_sb , v_blkno ) ;
2007-03-10 03:21:46 +03:00
ret = ocfs2_get_clusters ( inode , cpos , & p_cluster , & num_clusters ,
extent_flags ) ;
2005-12-16 01:31:24 +03:00
if ( ret ) {
mlog_errno ( ret ) ;
2007-01-17 23:31:35 +03:00
goto out ;
2005-12-16 01:31:24 +03:00
}
2007-01-17 23:31:35 +03:00
/*
* p_cluster = = 0 indicates a hole .
*/
if ( p_cluster ) {
boff = ocfs2_clusters_to_blocks ( inode - > i_sb , p_cluster ) ;
2005-12-16 01:31:24 +03:00
boff + = ( v_blkno & ( u64 ) ( bpc - 1 ) ) ;
}
2007-01-17 23:31:35 +03:00
* p_blkno = boff ;
2005-12-16 01:31:24 +03:00
2007-01-17 23:31:35 +03:00
if ( ret_count ) {
* ret_count = ocfs2_clusters_to_blocks ( inode - > i_sb , num_clusters ) ;
* ret_count - = v_blkno & ( u64 ) ( bpc - 1 ) ;
2005-12-16 01:31:24 +03:00
}
2007-01-17 23:31:35 +03:00
out :
return ret ;
2005-12-16 01:31:24 +03:00
}