2009-08-18 07:19:26 +04:00
/* -*- mode: c; c-basic-offset: 8; -*-
* vim : noexpandtab sw = 8 ts = 8 sts = 0 :
*
* refcounttree . c
*
* Copyright ( C ) 2009 Oracle . All rights reserved .
*
* This program is free software ; you can redistribute it and / or
* modify it under the terms of the GNU General Public
* License version 2 as published by the Free Software Foundation .
*
* This program is distributed in the hope that it will be useful ,
* but WITHOUT ANY WARRANTY ; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the GNU
* General Public License for more details .
*/
2009-08-11 10:33:14 +04:00
# include <linux/sort.h>
2009-08-18 07:19:26 +04:00
# define MLOG_MASK_PREFIX ML_REFCOUNT
# include <cluster/masklog.h>
# include "ocfs2.h"
# include "inode.h"
# include "alloc.h"
# include "suballoc.h"
# include "journal.h"
# include "uptodate.h"
# include "super.h"
# include "buffer_head_io.h"
# include "blockcheck.h"
2009-08-18 07:21:00 +04:00
# include "refcounttree.h"
2009-08-24 07:12:02 +04:00
# include "sysfile.h"
2009-08-24 07:13:37 +04:00
# include "dlmglue.h"
2009-08-11 10:33:14 +04:00
# include "extent_map.h"
2009-08-25 04:05:12 +04:00
# include "aops.h"
2009-08-18 07:43:17 +04:00
# include "xattr.h"
2009-09-21 06:38:17 +04:00
# include "namei.h"
2009-08-25 04:05:12 +04:00
# include <linux/bio.h>
# include <linux/blkdev.h>
# include <linux/gfp.h>
# include <linux/slab.h>
# include <linux/writeback.h>
# include <linux/pagevec.h>
# include <linux/swap.h>
2009-09-21 07:25:14 +04:00
# include <linux/security.h>
# include <linux/fsnotify.h>
# include <linux/quotaops.h>
# include <linux/namei.h>
# include <linux/mount.h>
2009-08-25 04:05:12 +04:00
struct ocfs2_cow_context {
struct inode * inode ;
u32 cow_start ;
u32 cow_len ;
2009-08-24 10:31:03 +04:00
struct ocfs2_extent_tree data_et ;
struct ocfs2_refcount_tree * ref_tree ;
2009-08-25 04:05:12 +04:00
struct buffer_head * ref_root_bh ;
struct ocfs2_alloc_context * meta_ac ;
struct ocfs2_alloc_context * data_ac ;
struct ocfs2_cached_dealloc_ctxt dealloc ;
2009-08-18 07:43:17 +04:00
void * cow_object ;
struct ocfs2_post_refcount * post_refcount ;
int extra_credits ;
2009-08-24 10:31:03 +04:00
int ( * get_clusters ) ( struct ocfs2_cow_context * context ,
u32 v_cluster , u32 * p_cluster ,
u32 * num_clusters ,
unsigned int * extent_flags ) ;
int ( * cow_duplicate_clusters ) ( handle_t * handle ,
struct ocfs2_cow_context * context ,
u32 cpos , u32 old_cluster ,
u32 new_cluster , u32 new_len ) ;
2009-08-25 04:05:12 +04:00
} ;
2009-08-18 07:21:00 +04:00
static inline struct ocfs2_refcount_tree *
cache_info_to_refcount ( struct ocfs2_caching_info * ci )
{
return container_of ( ci , struct ocfs2_refcount_tree , rf_ci ) ;
}
2009-08-18 07:19:26 +04:00
static int ocfs2_validate_refcount_block ( struct super_block * sb ,
struct buffer_head * bh )
{
int rc ;
struct ocfs2_refcount_block * rb =
( struct ocfs2_refcount_block * ) bh - > b_data ;
mlog ( 0 , " Validating refcount block %llu \n " ,
( unsigned long long ) bh - > b_blocknr ) ;
BUG_ON ( ! buffer_uptodate ( bh ) ) ;
/*
* If the ecc fails , we return the error but otherwise
* leave the filesystem running . We know any error is
* local to this block .
*/
rc = ocfs2_validate_meta_ecc ( sb , bh - > b_data , & rb - > rf_check ) ;
if ( rc ) {
mlog ( ML_ERROR , " Checksum failed for refcount block %llu \n " ,
( unsigned long long ) bh - > b_blocknr ) ;
return rc ;
}
if ( ! OCFS2_IS_VALID_REFCOUNT_BLOCK ( rb ) ) {
ocfs2_error ( sb ,
" Refcount block #%llu has bad signature %.*s " ,
( unsigned long long ) bh - > b_blocknr , 7 ,
rb - > rf_signature ) ;
return - EINVAL ;
}
if ( le64_to_cpu ( rb - > rf_blkno ) ! = bh - > b_blocknr ) {
ocfs2_error ( sb ,
" Refcount block #%llu has an invalid rf_blkno "
" of %llu " ,
( unsigned long long ) bh - > b_blocknr ,
( unsigned long long ) le64_to_cpu ( rb - > rf_blkno ) ) ;
return - EINVAL ;
}
if ( le32_to_cpu ( rb - > rf_fs_generation ) ! = OCFS2_SB ( sb ) - > fs_generation ) {
ocfs2_error ( sb ,
" Refcount block #%llu has an invalid "
" rf_fs_generation of #%u " ,
( unsigned long long ) bh - > b_blocknr ,
le32_to_cpu ( rb - > rf_fs_generation ) ) ;
return - EINVAL ;
}
return 0 ;
}
static int ocfs2_read_refcount_block ( struct ocfs2_caching_info * ci ,
u64 rb_blkno ,
struct buffer_head * * bh )
{
int rc ;
struct buffer_head * tmp = * bh ;
rc = ocfs2_read_block ( ci , rb_blkno , & tmp ,
ocfs2_validate_refcount_block ) ;
/* If ocfs2_read_block() got us a new bh, pass it up. */
if ( ! rc & & ! * bh )
* bh = tmp ;
return rc ;
}
2009-08-18 07:21:00 +04:00
static u64 ocfs2_refcount_cache_owner ( struct ocfs2_caching_info * ci )
{
struct ocfs2_refcount_tree * rf = cache_info_to_refcount ( ci ) ;
return rf - > rf_blkno ;
}
static struct super_block *
ocfs2_refcount_cache_get_super ( struct ocfs2_caching_info * ci )
{
struct ocfs2_refcount_tree * rf = cache_info_to_refcount ( ci ) ;
return rf - > rf_sb ;
}
static void ocfs2_refcount_cache_lock ( struct ocfs2_caching_info * ci )
{
struct ocfs2_refcount_tree * rf = cache_info_to_refcount ( ci ) ;
spin_lock ( & rf - > rf_lock ) ;
}
static void ocfs2_refcount_cache_unlock ( struct ocfs2_caching_info * ci )
{
struct ocfs2_refcount_tree * rf = cache_info_to_refcount ( ci ) ;
spin_unlock ( & rf - > rf_lock ) ;
}
static void ocfs2_refcount_cache_io_lock ( struct ocfs2_caching_info * ci )
{
struct ocfs2_refcount_tree * rf = cache_info_to_refcount ( ci ) ;
mutex_lock ( & rf - > rf_io_mutex ) ;
}
static void ocfs2_refcount_cache_io_unlock ( struct ocfs2_caching_info * ci )
{
struct ocfs2_refcount_tree * rf = cache_info_to_refcount ( ci ) ;
mutex_unlock ( & rf - > rf_io_mutex ) ;
}
static const struct ocfs2_caching_operations ocfs2_refcount_caching_ops = {
. co_owner = ocfs2_refcount_cache_owner ,
. co_get_super = ocfs2_refcount_cache_get_super ,
. co_cache_lock = ocfs2_refcount_cache_lock ,
. co_cache_unlock = ocfs2_refcount_cache_unlock ,
. co_io_lock = ocfs2_refcount_cache_io_lock ,
. co_io_unlock = ocfs2_refcount_cache_io_unlock ,
} ;
2009-08-24 07:13:37 +04:00
static struct ocfs2_refcount_tree *
ocfs2_find_refcount_tree ( struct ocfs2_super * osb , u64 blkno )
{
struct rb_node * n = osb - > osb_rf_lock_tree . rb_node ;
struct ocfs2_refcount_tree * tree = NULL ;
while ( n ) {
tree = rb_entry ( n , struct ocfs2_refcount_tree , rf_node ) ;
if ( blkno < tree - > rf_blkno )
n = n - > rb_left ;
else if ( blkno > tree - > rf_blkno )
n = n - > rb_right ;
else
return tree ;
}
return NULL ;
}
/* osb_lock is already locked. */
static void ocfs2_insert_refcount_tree ( struct ocfs2_super * osb ,
struct ocfs2_refcount_tree * new )
{
u64 rf_blkno = new - > rf_blkno ;
struct rb_node * parent = NULL ;
struct rb_node * * p = & osb - > osb_rf_lock_tree . rb_node ;
struct ocfs2_refcount_tree * tmp ;
while ( * p ) {
parent = * p ;
tmp = rb_entry ( parent , struct ocfs2_refcount_tree ,
rf_node ) ;
if ( rf_blkno < tmp - > rf_blkno )
p = & ( * p ) - > rb_left ;
else if ( rf_blkno > tmp - > rf_blkno )
p = & ( * p ) - > rb_right ;
else {
/* This should never happen! */
mlog ( ML_ERROR , " Duplicate refcount block %llu found! \n " ,
( unsigned long long ) rf_blkno ) ;
BUG ( ) ;
}
}
rb_link_node ( & new - > rf_node , parent , p ) ;
rb_insert_color ( & new - > rf_node , & osb - > osb_rf_lock_tree ) ;
}
static void ocfs2_free_refcount_tree ( struct ocfs2_refcount_tree * tree )
{
ocfs2_metadata_cache_exit ( & tree - > rf_ci ) ;
ocfs2_simple_drop_lockres ( OCFS2_SB ( tree - > rf_sb ) , & tree - > rf_lockres ) ;
ocfs2_lock_res_free ( & tree - > rf_lockres ) ;
kfree ( tree ) ;
}
static inline void
ocfs2_erase_refcount_tree_from_list_no_lock ( struct ocfs2_super * osb ,
struct ocfs2_refcount_tree * tree )
{
rb_erase ( & tree - > rf_node , & osb - > osb_rf_lock_tree ) ;
if ( osb - > osb_ref_tree_lru & & osb - > osb_ref_tree_lru = = tree )
osb - > osb_ref_tree_lru = NULL ;
}
static void ocfs2_erase_refcount_tree_from_list ( struct ocfs2_super * osb ,
struct ocfs2_refcount_tree * tree )
{
spin_lock ( & osb - > osb_lock ) ;
ocfs2_erase_refcount_tree_from_list_no_lock ( osb , tree ) ;
spin_unlock ( & osb - > osb_lock ) ;
}
2009-11-30 10:08:40 +03:00
static void ocfs2_kref_remove_refcount_tree ( struct kref * kref )
2009-08-24 07:13:37 +04:00
{
struct ocfs2_refcount_tree * tree =
container_of ( kref , struct ocfs2_refcount_tree , rf_getcnt ) ;
ocfs2_free_refcount_tree ( tree ) ;
}
static inline void
ocfs2_refcount_tree_get ( struct ocfs2_refcount_tree * tree )
{
kref_get ( & tree - > rf_getcnt ) ;
}
static inline void
ocfs2_refcount_tree_put ( struct ocfs2_refcount_tree * tree )
{
kref_put ( & tree - > rf_getcnt , ocfs2_kref_remove_refcount_tree ) ;
}
static inline void ocfs2_init_refcount_tree_ci ( struct ocfs2_refcount_tree * new ,
struct super_block * sb )
{
ocfs2_metadata_cache_init ( & new - > rf_ci , & ocfs2_refcount_caching_ops ) ;
mutex_init ( & new - > rf_io_mutex ) ;
new - > rf_sb = sb ;
spin_lock_init ( & new - > rf_lock ) ;
}
static inline void ocfs2_init_refcount_tree_lock ( struct ocfs2_super * osb ,
struct ocfs2_refcount_tree * new ,
u64 rf_blkno , u32 generation )
{
init_rwsem ( & new - > rf_sem ) ;
ocfs2_refcount_lock_res_init ( & new - > rf_lockres , osb ,
rf_blkno , generation ) ;
}
2009-08-24 07:12:02 +04:00
static struct ocfs2_refcount_tree *
ocfs2_allocate_refcount_tree ( struct ocfs2_super * osb , u64 rf_blkno )
{
struct ocfs2_refcount_tree * new ;
new = kzalloc ( sizeof ( struct ocfs2_refcount_tree ) , GFP_NOFS ) ;
if ( ! new )
return NULL ;
new - > rf_blkno = rf_blkno ;
kref_init ( & new - > rf_getcnt ) ;
ocfs2_init_refcount_tree_ci ( new , osb - > sb ) ;
return new ;
}
2009-08-24 07:13:37 +04:00
static int ocfs2_get_refcount_tree ( struct ocfs2_super * osb , u64 rf_blkno ,
struct ocfs2_refcount_tree * * ret_tree )
{
int ret = 0 ;
struct ocfs2_refcount_tree * tree , * new = NULL ;
struct buffer_head * ref_root_bh = NULL ;
struct ocfs2_refcount_block * ref_rb ;
spin_lock ( & osb - > osb_lock ) ;
if ( osb - > osb_ref_tree_lru & &
osb - > osb_ref_tree_lru - > rf_blkno = = rf_blkno )
tree = osb - > osb_ref_tree_lru ;
else
tree = ocfs2_find_refcount_tree ( osb , rf_blkno ) ;
if ( tree )
goto out ;
spin_unlock ( & osb - > osb_lock ) ;
2009-08-24 07:12:02 +04:00
new = ocfs2_allocate_refcount_tree ( osb , rf_blkno ) ;
2009-08-24 07:13:37 +04:00
if ( ! new ) {
ret = - ENOMEM ;
2009-08-24 07:12:02 +04:00
mlog_errno ( ret ) ;
2009-08-24 07:13:37 +04:00
return ret ;
}
/*
* We need the generation to create the refcount tree lock and since
* it isn ' t changed during the tree modification , we are safe here to
* read without protection .
* We also have to purge the cache after we create the lock since the
* refcount block may have the stale data . It can only be trusted when
* we hold the refcount lock .
*/
ret = ocfs2_read_refcount_block ( & new - > rf_ci , rf_blkno , & ref_root_bh ) ;
if ( ret ) {
mlog_errno ( ret ) ;
ocfs2_metadata_cache_exit ( & new - > rf_ci ) ;
kfree ( new ) ;
return ret ;
}
ref_rb = ( struct ocfs2_refcount_block * ) ref_root_bh - > b_data ;
new - > rf_generation = le32_to_cpu ( ref_rb - > rf_generation ) ;
ocfs2_init_refcount_tree_lock ( osb , new , rf_blkno ,
new - > rf_generation ) ;
ocfs2_metadata_cache_purge ( & new - > rf_ci ) ;
spin_lock ( & osb - > osb_lock ) ;
tree = ocfs2_find_refcount_tree ( osb , rf_blkno ) ;
if ( tree )
goto out ;
ocfs2_insert_refcount_tree ( osb , new ) ;
tree = new ;
new = NULL ;
out :
* ret_tree = tree ;
osb - > osb_ref_tree_lru = tree ;
spin_unlock ( & osb - > osb_lock ) ;
if ( new )
ocfs2_free_refcount_tree ( new ) ;
brelse ( ref_root_bh ) ;
return ret ;
}
static int ocfs2_get_refcount_block ( struct inode * inode , u64 * ref_blkno )
{
int ret ;
struct buffer_head * di_bh = NULL ;
struct ocfs2_dinode * di ;
ret = ocfs2_read_inode_block ( inode , & di_bh ) ;
if ( ret ) {
mlog_errno ( ret ) ;
goto out ;
}
BUG_ON ( ! ( OCFS2_I ( inode ) - > ip_dyn_features & OCFS2_HAS_REFCOUNT_FL ) ) ;
di = ( struct ocfs2_dinode * ) di_bh - > b_data ;
* ref_blkno = le64_to_cpu ( di - > i_refcount_loc ) ;
brelse ( di_bh ) ;
out :
return ret ;
}
static int __ocfs2_lock_refcount_tree ( struct ocfs2_super * osb ,
struct ocfs2_refcount_tree * tree , int rw )
{
int ret ;
ret = ocfs2_refcount_lock ( tree , rw ) ;
if ( ret ) {
mlog_errno ( ret ) ;
goto out ;
}
if ( rw )
down_write ( & tree - > rf_sem ) ;
else
down_read ( & tree - > rf_sem ) ;
out :
return ret ;
}
/*
* Lock the refcount tree pointed by ref_blkno and return the tree .
* In most case , we lock the tree and read the refcount block .
* So read it here if the caller really needs it .
*
* If the tree has been re - created by other node , it will free the
* old one and re - create it .
*/
int ocfs2_lock_refcount_tree ( struct ocfs2_super * osb ,
u64 ref_blkno , int rw ,
struct ocfs2_refcount_tree * * ret_tree ,
struct buffer_head * * ref_bh )
{
int ret , delete_tree = 0 ;
struct ocfs2_refcount_tree * tree = NULL ;
struct buffer_head * ref_root_bh = NULL ;
struct ocfs2_refcount_block * rb ;
again :
ret = ocfs2_get_refcount_tree ( osb , ref_blkno , & tree ) ;
if ( ret ) {
mlog_errno ( ret ) ;
return ret ;
}
ocfs2_refcount_tree_get ( tree ) ;
ret = __ocfs2_lock_refcount_tree ( osb , tree , rw ) ;
if ( ret ) {
mlog_errno ( ret ) ;
ocfs2_refcount_tree_put ( tree ) ;
goto out ;
}
ret = ocfs2_read_refcount_block ( & tree - > rf_ci , tree - > rf_blkno ,
& ref_root_bh ) ;
if ( ret ) {
mlog_errno ( ret ) ;
ocfs2_unlock_refcount_tree ( osb , tree , rw ) ;
ocfs2_refcount_tree_put ( tree ) ;
goto out ;
}
rb = ( struct ocfs2_refcount_block * ) ref_root_bh - > b_data ;
/*
* If the refcount block has been freed and re - created , we may need
* to recreate the refcount tree also .
*
* Here we just remove the tree from the rb - tree , and the last
* kref holder will unlock and delete this refcount_tree .
* Then we goto " again " and ocfs2_get_refcount_tree will create
* the new refcount tree for us .
*/
if ( tree - > rf_generation ! = le32_to_cpu ( rb - > rf_generation ) ) {
if ( ! tree - > rf_removed ) {
ocfs2_erase_refcount_tree_from_list ( osb , tree ) ;
tree - > rf_removed = 1 ;
delete_tree = 1 ;
}
ocfs2_unlock_refcount_tree ( osb , tree , rw ) ;
/*
* We get an extra reference when we create the refcount
* tree , so another put will destroy it .
*/
if ( delete_tree )
ocfs2_refcount_tree_put ( tree ) ;
brelse ( ref_root_bh ) ;
ref_root_bh = NULL ;
goto again ;
}
* ret_tree = tree ;
if ( ref_bh ) {
* ref_bh = ref_root_bh ;
ref_root_bh = NULL ;
}
out :
brelse ( ref_root_bh ) ;
return ret ;
}
void ocfs2_unlock_refcount_tree ( struct ocfs2_super * osb ,
struct ocfs2_refcount_tree * tree , int rw )
{
if ( rw )
up_write ( & tree - > rf_sem ) ;
else
up_read ( & tree - > rf_sem ) ;
ocfs2_refcount_unlock ( tree , rw ) ;
ocfs2_refcount_tree_put ( tree ) ;
}
void ocfs2_purge_refcount_trees ( struct ocfs2_super * osb )
{
struct rb_node * node ;
struct ocfs2_refcount_tree * tree ;
struct rb_root * root = & osb - > osb_rf_lock_tree ;
while ( ( node = rb_last ( root ) ) ! = NULL ) {
tree = rb_entry ( node , struct ocfs2_refcount_tree , rf_node ) ;
mlog ( 0 , " Purge tree %llu \n " ,
( unsigned long long ) tree - > rf_blkno ) ;
rb_erase ( & tree - > rf_node , root ) ;
ocfs2_free_refcount_tree ( tree ) ;
}
}
2009-08-24 07:12:02 +04:00
/*
* Create a refcount tree for an inode .
* We take for granted that the inode is already locked .
*/
static int ocfs2_create_refcount_tree ( struct inode * inode ,
struct buffer_head * di_bh )
{
int ret ;
handle_t * handle = NULL ;
struct ocfs2_alloc_context * meta_ac = NULL ;
struct ocfs2_dinode * di = ( struct ocfs2_dinode * ) di_bh - > b_data ;
struct ocfs2_inode_info * oi = OCFS2_I ( inode ) ;
struct ocfs2_super * osb = OCFS2_SB ( inode - > i_sb ) ;
struct buffer_head * new_bh = NULL ;
struct ocfs2_refcount_block * rb ;
struct ocfs2_refcount_tree * new_tree = NULL , * tree = NULL ;
u16 suballoc_bit_start ;
u32 num_got ;
u64 first_blkno ;
BUG_ON ( oi - > ip_dyn_features & OCFS2_HAS_REFCOUNT_FL ) ;
mlog ( 0 , " create tree for inode %lu \n " , inode - > i_ino ) ;
ret = ocfs2_reserve_new_metadata_blocks ( osb , 1 , & meta_ac ) ;
if ( ret ) {
mlog_errno ( ret ) ;
goto out ;
}
handle = ocfs2_start_trans ( osb , OCFS2_REFCOUNT_TREE_CREATE_CREDITS ) ;
if ( IS_ERR ( handle ) ) {
ret = PTR_ERR ( handle ) ;
mlog_errno ( ret ) ;
goto out ;
}
ret = ocfs2_journal_access_di ( handle , INODE_CACHE ( inode ) , di_bh ,
OCFS2_JOURNAL_ACCESS_WRITE ) ;
if ( ret ) {
mlog_errno ( ret ) ;
goto out_commit ;
}
ret = ocfs2_claim_metadata ( osb , handle , meta_ac , 1 ,
& suballoc_bit_start , & num_got ,
& first_blkno ) ;
if ( ret ) {
mlog_errno ( ret ) ;
goto out_commit ;
}
new_tree = ocfs2_allocate_refcount_tree ( osb , first_blkno ) ;
if ( ! new_tree ) {
ret = - ENOMEM ;
mlog_errno ( ret ) ;
goto out_commit ;
}
new_bh = sb_getblk ( inode - > i_sb , first_blkno ) ;
ocfs2_set_new_buffer_uptodate ( & new_tree - > rf_ci , new_bh ) ;
ret = ocfs2_journal_access_rb ( handle , & new_tree - > rf_ci , new_bh ,
OCFS2_JOURNAL_ACCESS_CREATE ) ;
if ( ret ) {
mlog_errno ( ret ) ;
goto out_commit ;
}
/* Initialize ocfs2_refcount_block. */
rb = ( struct ocfs2_refcount_block * ) new_bh - > b_data ;
memset ( rb , 0 , inode - > i_sb - > s_blocksize ) ;
strcpy ( ( void * ) rb , OCFS2_REFCOUNT_BLOCK_SIGNATURE ) ;
rb - > rf_suballoc_slot = cpu_to_le16 ( osb - > slot_num ) ;
rb - > rf_suballoc_bit = cpu_to_le16 ( suballoc_bit_start ) ;
rb - > rf_fs_generation = cpu_to_le32 ( osb - > fs_generation ) ;
rb - > rf_blkno = cpu_to_le64 ( first_blkno ) ;
rb - > rf_count = cpu_to_le32 ( 1 ) ;
rb - > rf_records . rl_count =
cpu_to_le16 ( ocfs2_refcount_recs_per_rb ( osb - > sb ) ) ;
spin_lock ( & osb - > osb_lock ) ;
rb - > rf_generation = osb - > s_next_generation + + ;
spin_unlock ( & osb - > osb_lock ) ;
ocfs2_journal_dirty ( handle , new_bh ) ;
spin_lock ( & oi - > ip_lock ) ;
oi - > ip_dyn_features | = OCFS2_HAS_REFCOUNT_FL ;
di - > i_dyn_features = cpu_to_le16 ( oi - > ip_dyn_features ) ;
di - > i_refcount_loc = cpu_to_le64 ( first_blkno ) ;
spin_unlock ( & oi - > ip_lock ) ;
mlog ( 0 , " created tree for inode %lu, refblock %llu \n " ,
inode - > i_ino , ( unsigned long long ) first_blkno ) ;
ocfs2_journal_dirty ( handle , di_bh ) ;
/*
* We have to init the tree lock here since it will use
* the generation number to create it .
*/
new_tree - > rf_generation = le32_to_cpu ( rb - > rf_generation ) ;
ocfs2_init_refcount_tree_lock ( osb , new_tree , first_blkno ,
new_tree - > rf_generation ) ;
spin_lock ( & osb - > osb_lock ) ;
tree = ocfs2_find_refcount_tree ( osb , first_blkno ) ;
/*
* We ' ve just created a new refcount tree in this block . If
* we found a refcount tree on the ocfs2_super , it must be
* one we just deleted . We free the old tree before
* inserting the new tree .
*/
BUG_ON ( tree & & tree - > rf_generation = = new_tree - > rf_generation ) ;
if ( tree )
ocfs2_erase_refcount_tree_from_list_no_lock ( osb , tree ) ;
ocfs2_insert_refcount_tree ( osb , new_tree ) ;
spin_unlock ( & osb - > osb_lock ) ;
new_tree = NULL ;
if ( tree )
ocfs2_refcount_tree_put ( tree ) ;
out_commit :
ocfs2_commit_trans ( osb , handle ) ;
out :
if ( new_tree ) {
ocfs2_metadata_cache_exit ( & new_tree - > rf_ci ) ;
kfree ( new_tree ) ;
}
brelse ( new_bh ) ;
if ( meta_ac )
ocfs2_free_alloc_context ( meta_ac ) ;
return ret ;
}
static int ocfs2_set_refcount_tree ( struct inode * inode ,
struct buffer_head * di_bh ,
u64 refcount_loc )
{
int ret ;
handle_t * handle = NULL ;
struct ocfs2_dinode * di = ( struct ocfs2_dinode * ) di_bh - > b_data ;
struct ocfs2_inode_info * oi = OCFS2_I ( inode ) ;
struct ocfs2_super * osb = OCFS2_SB ( inode - > i_sb ) ;
struct buffer_head * ref_root_bh = NULL ;
struct ocfs2_refcount_block * rb ;
struct ocfs2_refcount_tree * ref_tree ;
BUG_ON ( oi - > ip_dyn_features & OCFS2_HAS_REFCOUNT_FL ) ;
ret = ocfs2_lock_refcount_tree ( osb , refcount_loc , 1 ,
& ref_tree , & ref_root_bh ) ;
if ( ret ) {
mlog_errno ( ret ) ;
return ret ;
}
handle = ocfs2_start_trans ( osb , OCFS2_REFCOUNT_TREE_SET_CREDITS ) ;
if ( IS_ERR ( handle ) ) {
ret = PTR_ERR ( handle ) ;
mlog_errno ( ret ) ;
goto out ;
}
ret = ocfs2_journal_access_di ( handle , INODE_CACHE ( inode ) , di_bh ,
OCFS2_JOURNAL_ACCESS_WRITE ) ;
if ( ret ) {
mlog_errno ( ret ) ;
goto out_commit ;
}
ret = ocfs2_journal_access_rb ( handle , & ref_tree - > rf_ci , ref_root_bh ,
OCFS2_JOURNAL_ACCESS_WRITE ) ;
if ( ret ) {
mlog_errno ( ret ) ;
goto out_commit ;
}
rb = ( struct ocfs2_refcount_block * ) ref_root_bh - > b_data ;
le32_add_cpu ( & rb - > rf_count , 1 ) ;
ocfs2_journal_dirty ( handle , ref_root_bh ) ;
spin_lock ( & oi - > ip_lock ) ;
oi - > ip_dyn_features | = OCFS2_HAS_REFCOUNT_FL ;
di - > i_dyn_features = cpu_to_le16 ( oi - > ip_dyn_features ) ;
di - > i_refcount_loc = cpu_to_le64 ( refcount_loc ) ;
spin_unlock ( & oi - > ip_lock ) ;
ocfs2_journal_dirty ( handle , di_bh ) ;
out_commit :
ocfs2_commit_trans ( osb , handle ) ;
out :
ocfs2_unlock_refcount_tree ( osb , ref_tree , 1 ) ;
brelse ( ref_root_bh ) ;
return ret ;
}
int ocfs2_remove_refcount_tree ( struct inode * inode , struct buffer_head * di_bh )
{
int ret , delete_tree = 0 ;
handle_t * handle = NULL ;
struct ocfs2_dinode * di = ( struct ocfs2_dinode * ) di_bh - > b_data ;
struct ocfs2_inode_info * oi = OCFS2_I ( inode ) ;
struct ocfs2_super * osb = OCFS2_SB ( inode - > i_sb ) ;
struct ocfs2_refcount_block * rb ;
struct inode * alloc_inode = NULL ;
struct buffer_head * alloc_bh = NULL ;
struct buffer_head * blk_bh = NULL ;
struct ocfs2_refcount_tree * ref_tree ;
int credits = OCFS2_REFCOUNT_TREE_REMOVE_CREDITS ;
u64 blk = 0 , bg_blkno = 0 , ref_blkno = le64_to_cpu ( di - > i_refcount_loc ) ;
u16 bit = 0 ;
if ( ! ( oi - > ip_dyn_features & OCFS2_HAS_REFCOUNT_FL ) )
return 0 ;
BUG_ON ( ! ref_blkno ) ;
ret = ocfs2_lock_refcount_tree ( osb , ref_blkno , 1 , & ref_tree , & blk_bh ) ;
if ( ret ) {
mlog_errno ( ret ) ;
return ret ;
}
rb = ( struct ocfs2_refcount_block * ) blk_bh - > b_data ;
/*
* If we are the last user , we need to free the block .
* So lock the allocator ahead .
*/
if ( le32_to_cpu ( rb - > rf_count ) = = 1 ) {
blk = le64_to_cpu ( rb - > rf_blkno ) ;
bit = le16_to_cpu ( rb - > rf_suballoc_bit ) ;
bg_blkno = ocfs2_which_suballoc_group ( blk , bit ) ;
alloc_inode = ocfs2_get_system_file_inode ( osb ,
EXTENT_ALLOC_SYSTEM_INODE ,
le16_to_cpu ( rb - > rf_suballoc_slot ) ) ;
if ( ! alloc_inode ) {
ret = - ENOMEM ;
mlog_errno ( ret ) ;
goto out ;
}
mutex_lock ( & alloc_inode - > i_mutex ) ;
ret = ocfs2_inode_lock ( alloc_inode , & alloc_bh , 1 ) ;
if ( ret ) {
mlog_errno ( ret ) ;
goto out_mutex ;
}
credits + = OCFS2_SUBALLOC_FREE ;
}
handle = ocfs2_start_trans ( osb , credits ) ;
if ( IS_ERR ( handle ) ) {
ret = PTR_ERR ( handle ) ;
mlog_errno ( ret ) ;
goto out_unlock ;
}
ret = ocfs2_journal_access_di ( handle , INODE_CACHE ( inode ) , di_bh ,
OCFS2_JOURNAL_ACCESS_WRITE ) ;
if ( ret ) {
mlog_errno ( ret ) ;
goto out_commit ;
}
ret = ocfs2_journal_access_rb ( handle , & ref_tree - > rf_ci , blk_bh ,
OCFS2_JOURNAL_ACCESS_WRITE ) ;
if ( ret ) {
mlog_errno ( ret ) ;
goto out_commit ;
}
spin_lock ( & oi - > ip_lock ) ;
oi - > ip_dyn_features & = ~ OCFS2_HAS_REFCOUNT_FL ;
di - > i_dyn_features = cpu_to_le16 ( oi - > ip_dyn_features ) ;
di - > i_refcount_loc = 0 ;
spin_unlock ( & oi - > ip_lock ) ;
ocfs2_journal_dirty ( handle , di_bh ) ;
le32_add_cpu ( & rb - > rf_count , - 1 ) ;
ocfs2_journal_dirty ( handle , blk_bh ) ;
if ( ! rb - > rf_count ) {
delete_tree = 1 ;
ocfs2_erase_refcount_tree_from_list ( osb , ref_tree ) ;
ret = ocfs2_free_suballoc_bits ( handle , alloc_inode ,
alloc_bh , bit , bg_blkno , 1 ) ;
if ( ret )
mlog_errno ( ret ) ;
}
out_commit :
ocfs2_commit_trans ( osb , handle ) ;
out_unlock :
if ( alloc_inode ) {
ocfs2_inode_unlock ( alloc_inode , 1 ) ;
brelse ( alloc_bh ) ;
}
out_mutex :
if ( alloc_inode ) {
mutex_unlock ( & alloc_inode - > i_mutex ) ;
iput ( alloc_inode ) ;
}
out :
ocfs2_unlock_refcount_tree ( osb , ref_tree , 1 ) ;
if ( delete_tree )
ocfs2_refcount_tree_put ( ref_tree ) ;
brelse ( blk_bh ) ;
return ret ;
}
2009-08-11 10:33:14 +04:00
static void ocfs2_find_refcount_rec_in_rl ( struct ocfs2_caching_info * ci ,
struct buffer_head * ref_leaf_bh ,
u64 cpos , unsigned int len ,
struct ocfs2_refcount_rec * ret_rec ,
int * index )
{
int i = 0 ;
struct ocfs2_refcount_block * rb =
( struct ocfs2_refcount_block * ) ref_leaf_bh - > b_data ;
struct ocfs2_refcount_rec * rec = NULL ;
for ( ; i < le16_to_cpu ( rb - > rf_records . rl_used ) ; i + + ) {
rec = & rb - > rf_records . rl_recs [ i ] ;
if ( le64_to_cpu ( rec - > r_cpos ) +
le32_to_cpu ( rec - > r_clusters ) < = cpos )
continue ;
else if ( le64_to_cpu ( rec - > r_cpos ) > cpos )
break ;
/* ok, cpos fail in this rec. Just return. */
if ( ret_rec )
* ret_rec = * rec ;
goto out ;
}
if ( ret_rec ) {
/* We meet with a hole here, so fake the rec. */
ret_rec - > r_cpos = cpu_to_le64 ( cpos ) ;
ret_rec - > r_refcount = 0 ;
if ( i < le16_to_cpu ( rb - > rf_records . rl_used ) & &
le64_to_cpu ( rec - > r_cpos ) < cpos + len )
ret_rec - > r_clusters =
cpu_to_le32 ( le64_to_cpu ( rec - > r_cpos ) - cpos ) ;
else
ret_rec - > r_clusters = cpu_to_le32 ( len ) ;
}
out :
* index = i ;
}
2009-08-18 07:43:49 +04:00
/*
* Try to remove refcount tree . The mechanism is :
* 1 ) Check whether i_clusters = = 0 , if no , exit .
* 2 ) check whether we have i_xattr_loc in dinode . if yes , exit .
* 3 ) Check whether we have inline xattr stored outside , if yes , exit .
* 4 ) Remove the tree .
*/
int ocfs2_try_remove_refcount_tree ( struct inode * inode ,
struct buffer_head * di_bh )
{
int ret ;
struct ocfs2_inode_info * oi = OCFS2_I ( inode ) ;
struct ocfs2_dinode * di = ( struct ocfs2_dinode * ) di_bh - > b_data ;
down_write ( & oi - > ip_xattr_sem ) ;
down_write ( & oi - > ip_alloc_sem ) ;
if ( oi - > ip_clusters )
goto out ;
if ( ( oi - > ip_dyn_features & OCFS2_HAS_XATTR_FL ) & & di - > i_xattr_loc )
goto out ;
if ( oi - > ip_dyn_features & OCFS2_INLINE_XATTR_FL & &
ocfs2_has_inline_xattr_value_outside ( inode , di ) )
goto out ;
ret = ocfs2_remove_refcount_tree ( inode , di_bh ) ;
if ( ret )
mlog_errno ( ret ) ;
out :
up_write ( & oi - > ip_alloc_sem ) ;
up_write ( & oi - > ip_xattr_sem ) ;
return 0 ;
}
2009-11-30 09:32:19 +03:00
/*
* Find the end range for a leaf refcount block indicated by
* el - > l_recs [ index ] . e_blkno .
*/
static int ocfs2_get_refcount_cpos_end ( struct ocfs2_caching_info * ci ,
struct buffer_head * ref_root_bh ,
struct ocfs2_extent_block * eb ,
struct ocfs2_extent_list * el ,
int index , u32 * cpos_end )
{
int ret , i , subtree_root ;
u32 cpos ;
u64 blkno ;
struct super_block * sb = ocfs2_metadata_cache_get_super ( ci ) ;
struct ocfs2_path * left_path = NULL , * right_path = NULL ;
struct ocfs2_extent_tree et ;
struct ocfs2_extent_list * tmp_el ;
if ( index < le16_to_cpu ( el - > l_next_free_rec ) - 1 ) {
/*
* We have a extent rec after index , so just use the e_cpos
* of the next extent rec .
*/
* cpos_end = le32_to_cpu ( el - > l_recs [ index + 1 ] . e_cpos ) ;
return 0 ;
}
if ( ! eb | | ( eb & & ! eb - > h_next_leaf_blk ) ) {
/*
* We are the last extent rec , so any high cpos should
* be stored in this leaf refcount block .
*/
* cpos_end = UINT_MAX ;
return 0 ;
}
/*
* If the extent block isn ' t the last one , we have to find
* the subtree root between this extent block and the next
* leaf extent block and get the corresponding e_cpos from
* the subroot . Otherwise we may corrupt the b - tree .
*/
ocfs2_init_refcount_extent_tree ( & et , ci , ref_root_bh ) ;
left_path = ocfs2_new_path_from_et ( & et ) ;
if ( ! left_path ) {
ret = - ENOMEM ;
mlog_errno ( ret ) ;
goto out ;
}
cpos = le32_to_cpu ( eb - > h_list . l_recs [ index ] . e_cpos ) ;
ret = ocfs2_find_path ( ci , left_path , cpos ) ;
if ( ret ) {
mlog_errno ( ret ) ;
goto out ;
}
right_path = ocfs2_new_path_from_path ( left_path ) ;
if ( ! right_path ) {
ret = - ENOMEM ;
mlog_errno ( ret ) ;
goto out ;
}
ret = ocfs2_find_cpos_for_right_leaf ( sb , left_path , & cpos ) ;
if ( ret ) {
mlog_errno ( ret ) ;
goto out ;
}
ret = ocfs2_find_path ( ci , right_path , cpos ) ;
if ( ret ) {
mlog_errno ( ret ) ;
goto out ;
}
subtree_root = ocfs2_find_subtree_root ( & et , left_path ,
right_path ) ;
tmp_el = left_path - > p_node [ subtree_root ] . el ;
blkno = left_path - > p_node [ subtree_root + 1 ] . bh - > b_blocknr ;
for ( i = 0 ; i < le32_to_cpu ( tmp_el - > l_next_free_rec ) ; i + + ) {
if ( le64_to_cpu ( tmp_el - > l_recs [ i ] . e_blkno ) = = blkno ) {
* cpos_end = le32_to_cpu ( tmp_el - > l_recs [ i + 1 ] . e_cpos ) ;
break ;
}
}
BUG_ON ( i = = le32_to_cpu ( tmp_el - > l_next_free_rec ) ) ;
out :
ocfs2_free_path ( left_path ) ;
ocfs2_free_path ( right_path ) ;
return ret ;
}
2009-08-11 10:33:14 +04:00
/*
* Given a cpos and len , try to find the refcount record which contains cpos .
* 1. If cpos can be found in one refcount record , return the record .
* 2. If cpos can ' t be found , return a fake record which start from cpos
* and end at a small value between cpos + len and start of the next record .
* This fake record has r_refcount = 0.
*/
static int ocfs2_get_refcount_rec ( struct ocfs2_caching_info * ci ,
struct buffer_head * ref_root_bh ,
u64 cpos , unsigned int len ,
struct ocfs2_refcount_rec * ret_rec ,
int * index ,
struct buffer_head * * ret_bh )
{
int ret = 0 , i , found ;
2009-11-30 09:32:19 +03:00
u32 low_cpos , uninitialized_var ( cpos_end ) ;
2009-08-11 10:33:14 +04:00
struct ocfs2_extent_list * el ;
2009-11-30 09:32:19 +03:00
struct ocfs2_extent_rec * rec = NULL ;
struct ocfs2_extent_block * eb = NULL ;
2009-08-11 10:33:14 +04:00
struct buffer_head * eb_bh = NULL , * ref_leaf_bh = NULL ;
struct super_block * sb = ocfs2_metadata_cache_get_super ( ci ) ;
struct ocfs2_refcount_block * rb =
( struct ocfs2_refcount_block * ) ref_root_bh - > b_data ;
if ( ! ( le32_to_cpu ( rb - > rf_flags ) & OCFS2_REFCOUNT_TREE_FL ) ) {
ocfs2_find_refcount_rec_in_rl ( ci , ref_root_bh , cpos , len ,
ret_rec , index ) ;
* ret_bh = ref_root_bh ;
get_bh ( ref_root_bh ) ;
return 0 ;
}
el = & rb - > rf_list ;
low_cpos = cpos & OCFS2_32BIT_POS_MASK ;
if ( el - > l_tree_depth ) {
ret = ocfs2_find_leaf ( ci , el , low_cpos , & eb_bh ) ;
if ( ret ) {
mlog_errno ( ret ) ;
goto out ;
}
eb = ( struct ocfs2_extent_block * ) eb_bh - > b_data ;
el = & eb - > h_list ;
if ( el - > l_tree_depth ) {
ocfs2_error ( sb ,
" refcount tree %llu has non zero tree "
" depth in leaf btree tree block %llu \n " ,
( unsigned long long ) ocfs2_metadata_cache_owner ( ci ) ,
( unsigned long long ) eb_bh - > b_blocknr ) ;
ret = - EROFS ;
goto out ;
}
}
found = 0 ;
for ( i = le16_to_cpu ( el - > l_next_free_rec ) - 1 ; i > = 0 ; i - - ) {
rec = & el - > l_recs [ i ] ;
if ( le32_to_cpu ( rec - > e_cpos ) < = low_cpos ) {
found = 1 ;
break ;
}
}
2009-11-30 09:32:19 +03:00
if ( found ) {
ret = ocfs2_get_refcount_cpos_end ( ci , ref_root_bh ,
eb , el , i , & cpos_end ) ;
if ( ret ) {
mlog_errno ( ret ) ;
goto out ;
}
2009-08-11 10:33:14 +04:00
2009-11-30 09:32:19 +03:00
if ( cpos_end < low_cpos + len )
len = cpos_end - low_cpos ;
2009-08-11 10:33:14 +04:00
}
ret = ocfs2_read_refcount_block ( ci , le64_to_cpu ( rec - > e_blkno ) ,
& ref_leaf_bh ) ;
if ( ret ) {
mlog_errno ( ret ) ;
goto out ;
}
ocfs2_find_refcount_rec_in_rl ( ci , ref_leaf_bh , cpos , len ,
ret_rec , index ) ;
* ret_bh = ref_leaf_bh ;
out :
brelse ( eb_bh ) ;
return ret ;
}
enum ocfs2_ref_rec_contig {
REF_CONTIG_NONE = 0 ,
REF_CONTIG_LEFT ,
REF_CONTIG_RIGHT ,
REF_CONTIG_LEFTRIGHT ,
} ;
static enum ocfs2_ref_rec_contig
ocfs2_refcount_rec_adjacent ( struct ocfs2_refcount_block * rb ,
int index )
{
if ( ( rb - > rf_records . rl_recs [ index ] . r_refcount = =
rb - > rf_records . rl_recs [ index + 1 ] . r_refcount ) & &
( le64_to_cpu ( rb - > rf_records . rl_recs [ index ] . r_cpos ) +
le32_to_cpu ( rb - > rf_records . rl_recs [ index ] . r_clusters ) = =
le64_to_cpu ( rb - > rf_records . rl_recs [ index + 1 ] . r_cpos ) ) )
return REF_CONTIG_RIGHT ;
return REF_CONTIG_NONE ;
}
static enum ocfs2_ref_rec_contig
ocfs2_refcount_rec_contig ( struct ocfs2_refcount_block * rb ,
int index )
{
enum ocfs2_ref_rec_contig ret = REF_CONTIG_NONE ;
if ( index < le16_to_cpu ( rb - > rf_records . rl_used ) - 1 )
ret = ocfs2_refcount_rec_adjacent ( rb , index ) ;
if ( index > 0 ) {
enum ocfs2_ref_rec_contig tmp ;
tmp = ocfs2_refcount_rec_adjacent ( rb , index - 1 ) ;
if ( tmp = = REF_CONTIG_RIGHT ) {
if ( ret = = REF_CONTIG_RIGHT )
ret = REF_CONTIG_LEFTRIGHT ;
else
ret = REF_CONTIG_LEFT ;
}
}
return ret ;
}
static void ocfs2_rotate_refcount_rec_left ( struct ocfs2_refcount_block * rb ,
int index )
{
BUG_ON ( rb - > rf_records . rl_recs [ index ] . r_refcount ! =
rb - > rf_records . rl_recs [ index + 1 ] . r_refcount ) ;
le32_add_cpu ( & rb - > rf_records . rl_recs [ index ] . r_clusters ,
le32_to_cpu ( rb - > rf_records . rl_recs [ index + 1 ] . r_clusters ) ) ;
if ( index < le16_to_cpu ( rb - > rf_records . rl_used ) - 2 )
memmove ( & rb - > rf_records . rl_recs [ index + 1 ] ,
& rb - > rf_records . rl_recs [ index + 2 ] ,
sizeof ( struct ocfs2_refcount_rec ) *
( le16_to_cpu ( rb - > rf_records . rl_used ) - index - 2 ) ) ;
memset ( & rb - > rf_records . rl_recs [ le16_to_cpu ( rb - > rf_records . rl_used ) - 1 ] ,
0 , sizeof ( struct ocfs2_refcount_rec ) ) ;
le16_add_cpu ( & rb - > rf_records . rl_used , - 1 ) ;
}
/*
* Merge the refcount rec if we are contiguous with the adjacent recs .
*/
static void ocfs2_refcount_rec_merge ( struct ocfs2_refcount_block * rb ,
int index )
{
enum ocfs2_ref_rec_contig contig =
ocfs2_refcount_rec_contig ( rb , index ) ;
if ( contig = = REF_CONTIG_NONE )
return ;
if ( contig = = REF_CONTIG_LEFT | | contig = = REF_CONTIG_LEFTRIGHT ) {
BUG_ON ( index = = 0 ) ;
index - - ;
}
ocfs2_rotate_refcount_rec_left ( rb , index ) ;
if ( contig = = REF_CONTIG_LEFTRIGHT )
ocfs2_rotate_refcount_rec_left ( rb , index ) ;
}
2009-08-18 07:24:49 +04:00
/*
* Change the refcount indexed by " index " in ref_bh .
* If refcount reaches 0 , remove it .
*/
2009-08-11 10:33:14 +04:00
static int ocfs2_change_refcount_rec ( handle_t * handle ,
struct ocfs2_caching_info * ci ,
struct buffer_head * ref_leaf_bh ,
2009-08-18 07:44:03 +04:00
int index , int merge , int change )
2009-08-11 10:33:14 +04:00
{
int ret ;
struct ocfs2_refcount_block * rb =
( struct ocfs2_refcount_block * ) ref_leaf_bh - > b_data ;
2009-08-18 07:24:49 +04:00
struct ocfs2_refcount_list * rl = & rb - > rf_records ;
struct ocfs2_refcount_rec * rec = & rl - > rl_recs [ index ] ;
2009-08-11 10:33:14 +04:00
ret = ocfs2_journal_access_rb ( handle , ci , ref_leaf_bh ,
OCFS2_JOURNAL_ACCESS_WRITE ) ;
if ( ret ) {
mlog_errno ( ret ) ;
goto out ;
}
mlog ( 0 , " change index %d, old count %u, change %d \n " , index ,
le32_to_cpu ( rec - > r_refcount ) , change ) ;
le32_add_cpu ( & rec - > r_refcount , change ) ;
2009-08-18 07:24:49 +04:00
if ( ! rec - > r_refcount ) {
if ( index ! = le16_to_cpu ( rl - > rl_used ) - 1 ) {
memmove ( rec , rec + 1 ,
( le16_to_cpu ( rl - > rl_used ) - index - 1 ) *
sizeof ( struct ocfs2_refcount_rec ) ) ;
memset ( & rl - > rl_recs [ le16_to_cpu ( rl - > rl_used ) - 1 ] ,
0 , sizeof ( struct ocfs2_refcount_rec ) ) ;
}
le16_add_cpu ( & rl - > rl_used , - 1 ) ;
2009-08-18 07:44:03 +04:00
} else if ( merge )
2009-08-18 07:24:49 +04:00
ocfs2_refcount_rec_merge ( rb , index ) ;
2009-08-11 10:33:14 +04:00
ret = ocfs2_journal_dirty ( handle , ref_leaf_bh ) ;
if ( ret )
mlog_errno ( ret ) ;
out :
return ret ;
}
static int ocfs2_expand_inline_ref_root ( handle_t * handle ,
struct ocfs2_caching_info * ci ,
struct buffer_head * ref_root_bh ,
struct buffer_head * * ref_leaf_bh ,
struct ocfs2_alloc_context * meta_ac )
{
int ret ;
u16 suballoc_bit_start ;
u32 num_got ;
u64 blkno ;
struct super_block * sb = ocfs2_metadata_cache_get_super ( ci ) ;
struct buffer_head * new_bh = NULL ;
struct ocfs2_refcount_block * new_rb ;
struct ocfs2_refcount_block * root_rb =
( struct ocfs2_refcount_block * ) ref_root_bh - > b_data ;
ret = ocfs2_journal_access_rb ( handle , ci , ref_root_bh ,
OCFS2_JOURNAL_ACCESS_WRITE ) ;
if ( ret ) {
mlog_errno ( ret ) ;
goto out ;
}
ret = ocfs2_claim_metadata ( OCFS2_SB ( sb ) , handle , meta_ac , 1 ,
& suballoc_bit_start , & num_got ,
& blkno ) ;
if ( ret ) {
mlog_errno ( ret ) ;
goto out ;
}
new_bh = sb_getblk ( sb , blkno ) ;
if ( new_bh = = NULL ) {
ret = - EIO ;
mlog_errno ( ret ) ;
goto out ;
}
ocfs2_set_new_buffer_uptodate ( ci , new_bh ) ;
ret = ocfs2_journal_access_rb ( handle , ci , new_bh ,
OCFS2_JOURNAL_ACCESS_CREATE ) ;
if ( ret ) {
mlog_errno ( ret ) ;
goto out ;
}
/*
* Initialize ocfs2_refcount_block .
* It should contain the same information as the old root .
* so just memcpy it and change the corresponding field .
*/
memcpy ( new_bh - > b_data , ref_root_bh - > b_data , sb - > s_blocksize ) ;
new_rb = ( struct ocfs2_refcount_block * ) new_bh - > b_data ;
new_rb - > rf_suballoc_slot = cpu_to_le16 ( OCFS2_SB ( sb ) - > slot_num ) ;
new_rb - > rf_suballoc_bit = cpu_to_le16 ( suballoc_bit_start ) ;
new_rb - > rf_blkno = cpu_to_le64 ( blkno ) ;
new_rb - > rf_cpos = cpu_to_le32 ( 0 ) ;
new_rb - > rf_parent = cpu_to_le64 ( ref_root_bh - > b_blocknr ) ;
new_rb - > rf_flags = cpu_to_le32 ( OCFS2_REFCOUNT_LEAF_FL ) ;
ocfs2_journal_dirty ( handle , new_bh ) ;
/* Now change the root. */
memset ( & root_rb - > rf_list , 0 , sb - > s_blocksize -
offsetof ( struct ocfs2_refcount_block , rf_list ) ) ;
root_rb - > rf_list . l_count = cpu_to_le16 ( ocfs2_extent_recs_per_rb ( sb ) ) ;
root_rb - > rf_clusters = cpu_to_le32 ( 1 ) ;
root_rb - > rf_list . l_next_free_rec = cpu_to_le16 ( 1 ) ;
root_rb - > rf_list . l_recs [ 0 ] . e_blkno = cpu_to_le64 ( blkno ) ;
root_rb - > rf_list . l_recs [ 0 ] . e_leaf_clusters = cpu_to_le16 ( 1 ) ;
root_rb - > rf_flags = cpu_to_le32 ( OCFS2_REFCOUNT_TREE_FL ) ;
ocfs2_journal_dirty ( handle , ref_root_bh ) ;
mlog ( 0 , " new leaf block %llu, used %u \n " , ( unsigned long long ) blkno ,
le16_to_cpu ( new_rb - > rf_records . rl_used ) ) ;
* ref_leaf_bh = new_bh ;
new_bh = NULL ;
out :
brelse ( new_bh ) ;
return ret ;
}
static int ocfs2_refcount_rec_no_intersect ( struct ocfs2_refcount_rec * prev ,
struct ocfs2_refcount_rec * next )
{
if ( ocfs2_get_ref_rec_low_cpos ( prev ) + le32_to_cpu ( prev - > r_clusters ) < =
ocfs2_get_ref_rec_low_cpos ( next ) )
return 1 ;
return 0 ;
}
static int cmp_refcount_rec_by_low_cpos ( const void * a , const void * b )
{
const struct ocfs2_refcount_rec * l = a , * r = b ;
u32 l_cpos = ocfs2_get_ref_rec_low_cpos ( l ) ;
u32 r_cpos = ocfs2_get_ref_rec_low_cpos ( r ) ;
if ( l_cpos > r_cpos )
return 1 ;
if ( l_cpos < r_cpos )
return - 1 ;
return 0 ;
}
static int cmp_refcount_rec_by_cpos ( const void * a , const void * b )
{
const struct ocfs2_refcount_rec * l = a , * r = b ;
u64 l_cpos = le64_to_cpu ( l - > r_cpos ) ;
u64 r_cpos = le64_to_cpu ( r - > r_cpos ) ;
if ( l_cpos > r_cpos )
return 1 ;
if ( l_cpos < r_cpos )
return - 1 ;
return 0 ;
}
static void swap_refcount_rec ( void * a , void * b , int size )
{
struct ocfs2_refcount_rec * l = a , * r = b , tmp ;
tmp = * ( struct ocfs2_refcount_rec * ) l ;
* ( struct ocfs2_refcount_rec * ) l =
* ( struct ocfs2_refcount_rec * ) r ;
* ( struct ocfs2_refcount_rec * ) r = tmp ;
}
/*
* The refcount cpos are ordered by their 64 bit cpos ,
* But we will use the low 32 bit to be the e_cpos in the b - tree .
* So we need to make sure that this pos isn ' t intersected with others .
*
* Note : The refcount block is already sorted by their low 32 bit cpos ,
* So just try the middle pos first , and we will exit when we find
* the good position .
*/
static int ocfs2_find_refcount_split_pos ( struct ocfs2_refcount_list * rl ,
u32 * split_pos , int * split_index )
{
int num_used = le16_to_cpu ( rl - > rl_used ) ;
int delta , middle = num_used / 2 ;
for ( delta = 0 ; delta < middle ; delta + + ) {
/* Let's check delta earlier than middle */
if ( ocfs2_refcount_rec_no_intersect (
& rl - > rl_recs [ middle - delta - 1 ] ,
& rl - > rl_recs [ middle - delta ] ) ) {
* split_index = middle - delta ;
break ;
}
/* For even counts, don't walk off the end */
if ( ( middle + delta + 1 ) = = num_used )
continue ;
/* Now try delta past middle */
if ( ocfs2_refcount_rec_no_intersect (
& rl - > rl_recs [ middle + delta ] ,
& rl - > rl_recs [ middle + delta + 1 ] ) ) {
* split_index = middle + delta + 1 ;
break ;
}
}
if ( delta > = middle )
return - ENOSPC ;
* split_pos = ocfs2_get_ref_rec_low_cpos ( & rl - > rl_recs [ * split_index ] ) ;
return 0 ;
}
static int ocfs2_divide_leaf_refcount_block ( struct buffer_head * ref_leaf_bh ,
struct buffer_head * new_bh ,
u32 * split_cpos )
{
int split_index = 0 , num_moved , ret ;
u32 cpos = 0 ;
struct ocfs2_refcount_block * rb =
( struct ocfs2_refcount_block * ) ref_leaf_bh - > b_data ;
struct ocfs2_refcount_list * rl = & rb - > rf_records ;
struct ocfs2_refcount_block * new_rb =
( struct ocfs2_refcount_block * ) new_bh - > b_data ;
struct ocfs2_refcount_list * new_rl = & new_rb - > rf_records ;
mlog ( 0 , " split old leaf refcount block %llu, count = %u, used = %u \n " ,
( unsigned long long ) ref_leaf_bh - > b_blocknr ,
le32_to_cpu ( rl - > rl_count ) , le32_to_cpu ( rl - > rl_used ) ) ;
/*
* XXX : Improvement later .
* If we know all the high 32 bit cpos is the same , no need to sort .
*
* In order to make the whole process safe , we do :
* 1. sort the entries by their low 32 bit cpos first so that we can
* find the split cpos easily .
* 2. call ocfs2_insert_extent to insert the new refcount block .
* 3. move the refcount rec to the new block .
* 4. sort the entries by their 64 bit cpos .
* 5. dirty the new_rb and rb .
*/
sort ( & rl - > rl_recs , le16_to_cpu ( rl - > rl_used ) ,
sizeof ( struct ocfs2_refcount_rec ) ,
cmp_refcount_rec_by_low_cpos , swap_refcount_rec ) ;
ret = ocfs2_find_refcount_split_pos ( rl , & cpos , & split_index ) ;
if ( ret ) {
mlog_errno ( ret ) ;
return ret ;
}
new_rb - > rf_cpos = cpu_to_le32 ( cpos ) ;
/* move refcount records starting from split_index to the new block. */
num_moved = le16_to_cpu ( rl - > rl_used ) - split_index ;
memcpy ( new_rl - > rl_recs , & rl - > rl_recs [ split_index ] ,
num_moved * sizeof ( struct ocfs2_refcount_rec ) ) ;
/*ok, remove the entries we just moved over to the other block. */
memset ( & rl - > rl_recs [ split_index ] , 0 ,
num_moved * sizeof ( struct ocfs2_refcount_rec ) ) ;
/* change old and new rl_used accordingly. */
le16_add_cpu ( & rl - > rl_used , - num_moved ) ;
2009-11-30 10:08:40 +03:00
new_rl - > rl_used = cpu_to_le16 ( num_moved ) ;
2009-08-11 10:33:14 +04:00
sort ( & rl - > rl_recs , le16_to_cpu ( rl - > rl_used ) ,
sizeof ( struct ocfs2_refcount_rec ) ,
cmp_refcount_rec_by_cpos , swap_refcount_rec ) ;
sort ( & new_rl - > rl_recs , le16_to_cpu ( new_rl - > rl_used ) ,
sizeof ( struct ocfs2_refcount_rec ) ,
cmp_refcount_rec_by_cpos , swap_refcount_rec ) ;
* split_cpos = cpos ;
return 0 ;
}
static int ocfs2_new_leaf_refcount_block ( handle_t * handle ,
struct ocfs2_caching_info * ci ,
struct buffer_head * ref_root_bh ,
struct buffer_head * ref_leaf_bh ,
struct ocfs2_alloc_context * meta_ac )
{
int ret ;
u16 suballoc_bit_start ;
u32 num_got , new_cpos ;
u64 blkno ;
struct super_block * sb = ocfs2_metadata_cache_get_super ( ci ) ;
struct ocfs2_refcount_block * root_rb =
( struct ocfs2_refcount_block * ) ref_root_bh - > b_data ;
struct buffer_head * new_bh = NULL ;
struct ocfs2_refcount_block * new_rb ;
struct ocfs2_extent_tree ref_et ;
BUG_ON ( ! ( le32_to_cpu ( root_rb - > rf_flags ) & OCFS2_REFCOUNT_TREE_FL ) ) ;
ret = ocfs2_journal_access_rb ( handle , ci , ref_root_bh ,
OCFS2_JOURNAL_ACCESS_WRITE ) ;
if ( ret ) {
mlog_errno ( ret ) ;
goto out ;
}
ret = ocfs2_journal_access_rb ( handle , ci , ref_leaf_bh ,
OCFS2_JOURNAL_ACCESS_WRITE ) ;
if ( ret ) {
mlog_errno ( ret ) ;
goto out ;
}
ret = ocfs2_claim_metadata ( OCFS2_SB ( sb ) , handle , meta_ac , 1 ,
& suballoc_bit_start , & num_got ,
& blkno ) ;
if ( ret ) {
mlog_errno ( ret ) ;
goto out ;
}
new_bh = sb_getblk ( sb , blkno ) ;
if ( new_bh = = NULL ) {
ret = - EIO ;
mlog_errno ( ret ) ;
goto out ;
}
ocfs2_set_new_buffer_uptodate ( ci , new_bh ) ;
ret = ocfs2_journal_access_rb ( handle , ci , new_bh ,
OCFS2_JOURNAL_ACCESS_CREATE ) ;
if ( ret ) {
mlog_errno ( ret ) ;
goto out ;
}
/* Initialize ocfs2_refcount_block. */
new_rb = ( struct ocfs2_refcount_block * ) new_bh - > b_data ;
memset ( new_rb , 0 , sb - > s_blocksize ) ;
strcpy ( ( void * ) new_rb , OCFS2_REFCOUNT_BLOCK_SIGNATURE ) ;
new_rb - > rf_suballoc_slot = cpu_to_le16 ( OCFS2_SB ( sb ) - > slot_num ) ;
new_rb - > rf_suballoc_bit = cpu_to_le16 ( suballoc_bit_start ) ;
new_rb - > rf_fs_generation = cpu_to_le32 ( OCFS2_SB ( sb ) - > fs_generation ) ;
new_rb - > rf_blkno = cpu_to_le64 ( blkno ) ;
new_rb - > rf_parent = cpu_to_le64 ( ref_root_bh - > b_blocknr ) ;
new_rb - > rf_flags = cpu_to_le32 ( OCFS2_REFCOUNT_LEAF_FL ) ;
new_rb - > rf_records . rl_count =
cpu_to_le16 ( ocfs2_refcount_recs_per_rb ( sb ) ) ;
new_rb - > rf_generation = root_rb - > rf_generation ;
ret = ocfs2_divide_leaf_refcount_block ( ref_leaf_bh , new_bh , & new_cpos ) ;
if ( ret ) {
mlog_errno ( ret ) ;
goto out ;
}
ocfs2_journal_dirty ( handle , ref_leaf_bh ) ;
ocfs2_journal_dirty ( handle , new_bh ) ;
ocfs2_init_refcount_extent_tree ( & ref_et , ci , ref_root_bh ) ;
mlog ( 0 , " insert new leaf block %llu at %u \n " ,
( unsigned long long ) new_bh - > b_blocknr , new_cpos ) ;
/* Insert the new leaf block with the specific offset cpos. */
ret = ocfs2_insert_extent ( handle , & ref_et , new_cpos , new_bh - > b_blocknr ,
1 , 0 , meta_ac ) ;
if ( ret )
mlog_errno ( ret ) ;
out :
brelse ( new_bh ) ;
return ret ;
}
static int ocfs2_expand_refcount_tree ( handle_t * handle ,
struct ocfs2_caching_info * ci ,
struct buffer_head * ref_root_bh ,
struct buffer_head * ref_leaf_bh ,
struct ocfs2_alloc_context * meta_ac )
{
int ret ;
struct buffer_head * expand_bh = NULL ;
if ( ref_root_bh = = ref_leaf_bh ) {
/*
* the old root bh hasn ' t been expanded to a b - tree ,
* so expand it first .
*/
ret = ocfs2_expand_inline_ref_root ( handle , ci , ref_root_bh ,
& expand_bh , meta_ac ) ;
if ( ret ) {
mlog_errno ( ret ) ;
goto out ;
}
} else {
expand_bh = ref_leaf_bh ;
get_bh ( expand_bh ) ;
}
/* Now add a new refcount block into the tree.*/
ret = ocfs2_new_leaf_refcount_block ( handle , ci , ref_root_bh ,
expand_bh , meta_ac ) ;
if ( ret )
mlog_errno ( ret ) ;
out :
brelse ( expand_bh ) ;
return ret ;
}
/*
* Adjust the extent rec in b - tree representing ref_leaf_bh .
*
* Only called when we have inserted a new refcount rec at index 0
* which means ocfs2_extent_rec . e_cpos may need some change .
*/
static int ocfs2_adjust_refcount_rec ( handle_t * handle ,
struct ocfs2_caching_info * ci ,
struct buffer_head * ref_root_bh ,
struct buffer_head * ref_leaf_bh ,
struct ocfs2_refcount_rec * rec )
{
int ret = 0 , i ;
u32 new_cpos , old_cpos ;
struct ocfs2_path * path = NULL ;
struct ocfs2_extent_tree et ;
struct ocfs2_refcount_block * rb =
( struct ocfs2_refcount_block * ) ref_root_bh - > b_data ;
struct ocfs2_extent_list * el ;
if ( ! ( le32_to_cpu ( rb - > rf_flags ) & OCFS2_REFCOUNT_TREE_FL ) )
goto out ;
rb = ( struct ocfs2_refcount_block * ) ref_leaf_bh - > b_data ;
old_cpos = le32_to_cpu ( rb - > rf_cpos ) ;
new_cpos = le64_to_cpu ( rec - > r_cpos ) & OCFS2_32BIT_POS_MASK ;
if ( old_cpos < = new_cpos )
goto out ;
ocfs2_init_refcount_extent_tree ( & et , ci , ref_root_bh ) ;
path = ocfs2_new_path_from_et ( & et ) ;
if ( ! path ) {
ret = - ENOMEM ;
mlog_errno ( ret ) ;
goto out ;
}
ret = ocfs2_find_path ( ci , path , old_cpos ) ;
if ( ret ) {
mlog_errno ( ret ) ;
goto out ;
}
/*
* 2 more credits , one for the leaf refcount block , one for
* the extent block contains the extent rec .
*/
ret = ocfs2_extend_trans ( handle , handle - > h_buffer_credits + 2 ) ;
if ( ret < 0 ) {
mlog_errno ( ret ) ;
goto out ;
}
ret = ocfs2_journal_access_rb ( handle , ci , ref_leaf_bh ,
OCFS2_JOURNAL_ACCESS_WRITE ) ;
if ( ret < 0 ) {
mlog_errno ( ret ) ;
goto out ;
}
ret = ocfs2_journal_access_eb ( handle , ci , path_leaf_bh ( path ) ,
OCFS2_JOURNAL_ACCESS_WRITE ) ;
if ( ret < 0 ) {
mlog_errno ( ret ) ;
goto out ;
}
/* change the leaf extent block first. */
el = path_leaf_el ( path ) ;
for ( i = 0 ; i < le16_to_cpu ( el - > l_next_free_rec ) ; i + + )
if ( le32_to_cpu ( el - > l_recs [ i ] . e_cpos ) = = old_cpos )
break ;
BUG_ON ( i = = le16_to_cpu ( el - > l_next_free_rec ) ) ;
el - > l_recs [ i ] . e_cpos = cpu_to_le32 ( new_cpos ) ;
/* change the r_cpos in the leaf block. */
rb - > rf_cpos = cpu_to_le32 ( new_cpos ) ;
ocfs2_journal_dirty ( handle , path_leaf_bh ( path ) ) ;
ocfs2_journal_dirty ( handle , ref_leaf_bh ) ;
out :
ocfs2_free_path ( path ) ;
return ret ;
}
static int ocfs2_insert_refcount_rec ( handle_t * handle ,
struct ocfs2_caching_info * ci ,
struct buffer_head * ref_root_bh ,
struct buffer_head * ref_leaf_bh ,
struct ocfs2_refcount_rec * rec ,
2009-08-18 07:44:03 +04:00
int index , int merge ,
2009-08-11 10:33:14 +04:00
struct ocfs2_alloc_context * meta_ac )
{
int ret ;
struct ocfs2_refcount_block * rb =
( struct ocfs2_refcount_block * ) ref_leaf_bh - > b_data ;
struct ocfs2_refcount_list * rf_list = & rb - > rf_records ;
struct buffer_head * new_bh = NULL ;
BUG_ON ( le32_to_cpu ( rb - > rf_flags ) & OCFS2_REFCOUNT_TREE_FL ) ;
if ( rf_list - > rl_used = = rf_list - > rl_count ) {
u64 cpos = le64_to_cpu ( rec - > r_cpos ) ;
u32 len = le32_to_cpu ( rec - > r_clusters ) ;
ret = ocfs2_expand_refcount_tree ( handle , ci , ref_root_bh ,
ref_leaf_bh , meta_ac ) ;
if ( ret ) {
mlog_errno ( ret ) ;
goto out ;
}
ret = ocfs2_get_refcount_rec ( ci , ref_root_bh ,
cpos , len , NULL , & index ,
& new_bh ) ;
if ( ret ) {
mlog_errno ( ret ) ;
goto out ;
}
ref_leaf_bh = new_bh ;
rb = ( struct ocfs2_refcount_block * ) ref_leaf_bh - > b_data ;
rf_list = & rb - > rf_records ;
}
ret = ocfs2_journal_access_rb ( handle , ci , ref_leaf_bh ,
OCFS2_JOURNAL_ACCESS_WRITE ) ;
if ( ret ) {
mlog_errno ( ret ) ;
goto out ;
}
if ( index < le16_to_cpu ( rf_list - > rl_used ) )
memmove ( & rf_list - > rl_recs [ index + 1 ] ,
& rf_list - > rl_recs [ index ] ,
( le16_to_cpu ( rf_list - > rl_used ) - index ) *
sizeof ( struct ocfs2_refcount_rec ) ) ;
mlog ( 0 , " insert refcount record start %llu, len %u, count %u "
" to leaf block %llu at index %d \n " ,
( unsigned long long ) le64_to_cpu ( rec - > r_cpos ) ,
le32_to_cpu ( rec - > r_clusters ) , le32_to_cpu ( rec - > r_refcount ) ,
( unsigned long long ) ref_leaf_bh - > b_blocknr , index ) ;
rf_list - > rl_recs [ index ] = * rec ;
le16_add_cpu ( & rf_list - > rl_used , 1 ) ;
2009-08-18 07:44:03 +04:00
if ( merge )
ocfs2_refcount_rec_merge ( rb , index ) ;
2009-08-11 10:33:14 +04:00
ret = ocfs2_journal_dirty ( handle , ref_leaf_bh ) ;
if ( ret ) {
mlog_errno ( ret ) ;
goto out ;
}
if ( index = = 0 ) {
ret = ocfs2_adjust_refcount_rec ( handle , ci ,
ref_root_bh ,
ref_leaf_bh , rec ) ;
if ( ret )
mlog_errno ( ret ) ;
}
out :
brelse ( new_bh ) ;
return ret ;
}
/*
* Split the refcount_rec indexed by " index " in ref_leaf_bh .
* This is much simple than our b - tree code .
* split_rec is the new refcount rec we want to insert .
* If split_rec - > r_refcount > 0 , we are changing the refcount ( in case we
* increase refcount or decrease a refcount to non - zero ) .
* If split_rec - > r_refcount = = 0 , we are punching a hole in current refcount
* rec ( in case we decrease a refcount to zero ) .
*/
static int ocfs2_split_refcount_rec ( handle_t * handle ,
struct ocfs2_caching_info * ci ,
struct buffer_head * ref_root_bh ,
struct buffer_head * ref_leaf_bh ,
struct ocfs2_refcount_rec * split_rec ,
2009-08-18 07:44:03 +04:00
int index , int merge ,
2009-08-11 10:33:14 +04:00
struct ocfs2_alloc_context * meta_ac ,
struct ocfs2_cached_dealloc_ctxt * dealloc )
{
int ret , recs_need ;
u32 len ;
struct ocfs2_refcount_block * rb =
( struct ocfs2_refcount_block * ) ref_leaf_bh - > b_data ;
struct ocfs2_refcount_list * rf_list = & rb - > rf_records ;
struct ocfs2_refcount_rec * orig_rec = & rf_list - > rl_recs [ index ] ;
struct ocfs2_refcount_rec * tail_rec = NULL ;
struct buffer_head * new_bh = NULL ;
BUG_ON ( le32_to_cpu ( rb - > rf_flags ) & OCFS2_REFCOUNT_TREE_FL ) ;
mlog ( 0 , " original r_pos %llu, cluster %u, split %llu, cluster %u \n " ,
le64_to_cpu ( orig_rec - > r_cpos ) , le32_to_cpu ( orig_rec - > r_clusters ) ,
le64_to_cpu ( split_rec - > r_cpos ) ,
le32_to_cpu ( split_rec - > r_clusters ) ) ;
/*
* If we just need to split the header or tail clusters ,
* no more recs are needed , just split is OK .
* Otherwise we at least need one new recs .
*/
if ( ! split_rec - > r_refcount & &
( split_rec - > r_cpos = = orig_rec - > r_cpos | |
le64_to_cpu ( split_rec - > r_cpos ) +
le32_to_cpu ( split_rec - > r_clusters ) = =
le64_to_cpu ( orig_rec - > r_cpos ) + le32_to_cpu ( orig_rec - > r_clusters ) ) )
recs_need = 0 ;
else
recs_need = 1 ;
/*
* We need one more rec if we split in the middle and the new rec have
* some refcount in it .
*/
if ( split_rec - > r_refcount & &
( split_rec - > r_cpos ! = orig_rec - > r_cpos & &
le64_to_cpu ( split_rec - > r_cpos ) +
le32_to_cpu ( split_rec - > r_clusters ) ! =
le64_to_cpu ( orig_rec - > r_cpos ) + le32_to_cpu ( orig_rec - > r_clusters ) ) )
recs_need + + ;
/* If the leaf block don't have enough record, expand it. */
2009-11-30 10:08:40 +03:00
if ( le16_to_cpu ( rf_list - > rl_used ) + recs_need >
le16_to_cpu ( rf_list - > rl_count ) ) {
2009-08-11 10:33:14 +04:00
struct ocfs2_refcount_rec tmp_rec ;
u64 cpos = le64_to_cpu ( orig_rec - > r_cpos ) ;
len = le32_to_cpu ( orig_rec - > r_clusters ) ;
ret = ocfs2_expand_refcount_tree ( handle , ci , ref_root_bh ,
ref_leaf_bh , meta_ac ) ;
if ( ret ) {
mlog_errno ( ret ) ;
goto out ;
}
/*
* We have to re - get it since now cpos may be moved to
* another leaf block .
*/
ret = ocfs2_get_refcount_rec ( ci , ref_root_bh ,
cpos , len , & tmp_rec , & index ,
& new_bh ) ;
if ( ret ) {
mlog_errno ( ret ) ;
goto out ;
}
ref_leaf_bh = new_bh ;
rb = ( struct ocfs2_refcount_block * ) ref_leaf_bh - > b_data ;
rf_list = & rb - > rf_records ;
orig_rec = & rf_list - > rl_recs [ index ] ;
}
ret = ocfs2_journal_access_rb ( handle , ci , ref_leaf_bh ,
OCFS2_JOURNAL_ACCESS_WRITE ) ;
if ( ret ) {
mlog_errno ( ret ) ;
goto out ;
}
/*
* We have calculated out how many new records we need and store
* in recs_need , so spare enough space first by moving the records
* after " index " to the end .
*/
if ( index ! = le16_to_cpu ( rf_list - > rl_used ) - 1 )
memmove ( & rf_list - > rl_recs [ index + 1 + recs_need ] ,
& rf_list - > rl_recs [ index + 1 ] ,
( le16_to_cpu ( rf_list - > rl_used ) - index - 1 ) *
sizeof ( struct ocfs2_refcount_rec ) ) ;
len = ( le64_to_cpu ( orig_rec - > r_cpos ) +
le32_to_cpu ( orig_rec - > r_clusters ) ) -
( le64_to_cpu ( split_rec - > r_cpos ) +
le32_to_cpu ( split_rec - > r_clusters ) ) ;
/*
* If we have " len " , the we will split in the tail and move it
* to the end of the space we have just spared .
*/
if ( len ) {
tail_rec = & rf_list - > rl_recs [ index + recs_need ] ;
memcpy ( tail_rec , orig_rec , sizeof ( struct ocfs2_refcount_rec ) ) ;
le64_add_cpu ( & tail_rec - > r_cpos ,
le32_to_cpu ( tail_rec - > r_clusters ) - len ) ;
2009-11-30 10:08:40 +03:00
tail_rec - > r_clusters = cpu_to_le32 ( len ) ;
2009-08-11 10:33:14 +04:00
}
/*
* If the split pos isn ' t the same as the original one , we need to
* split in the head .
*
* Note : We have the chance that split_rec . r_refcount = 0 ,
* recs_need = 0 and len > 0 , which means we just cut the head from
* the orig_rec and in that case we have done some modification in
* orig_rec above , so the check for r_cpos is faked .
*/
if ( split_rec - > r_cpos ! = orig_rec - > r_cpos & & tail_rec ! = orig_rec ) {
len = le64_to_cpu ( split_rec - > r_cpos ) -
le64_to_cpu ( orig_rec - > r_cpos ) ;
orig_rec - > r_clusters = cpu_to_le32 ( len ) ;
index + + ;
}
le16_add_cpu ( & rf_list - > rl_used , recs_need ) ;
if ( split_rec - > r_refcount ) {
rf_list - > rl_recs [ index ] = * split_rec ;
mlog ( 0 , " insert refcount record start %llu, len %u, count %u "
" to leaf block %llu at index %d \n " ,
( unsigned long long ) le64_to_cpu ( split_rec - > r_cpos ) ,
le32_to_cpu ( split_rec - > r_clusters ) ,
le32_to_cpu ( split_rec - > r_refcount ) ,
( unsigned long long ) ref_leaf_bh - > b_blocknr , index ) ;
2009-08-18 07:44:03 +04:00
if ( merge )
ocfs2_refcount_rec_merge ( rb , index ) ;
2009-08-11 10:33:14 +04:00
}
ret = ocfs2_journal_dirty ( handle , ref_leaf_bh ) ;
if ( ret )
mlog_errno ( ret ) ;
out :
brelse ( new_bh ) ;
return ret ;
}
2009-08-18 07:44:03 +04:00
static int __ocfs2_increase_refcount ( handle_t * handle ,
struct ocfs2_caching_info * ci ,
struct buffer_head * ref_root_bh ,
u64 cpos , u32 len , int merge ,
struct ocfs2_alloc_context * meta_ac ,
struct ocfs2_cached_dealloc_ctxt * dealloc )
2009-08-11 10:33:14 +04:00
{
int ret = 0 , index ;
struct buffer_head * ref_leaf_bh = NULL ;
struct ocfs2_refcount_rec rec ;
unsigned int set_len = 0 ;
mlog ( 0 , " Tree owner %llu, add refcount start %llu, len %u \n " ,
( unsigned long long ) ocfs2_metadata_cache_owner ( ci ) ,
( unsigned long long ) cpos , len ) ;
while ( len ) {
ret = ocfs2_get_refcount_rec ( ci , ref_root_bh ,
cpos , len , & rec , & index ,
& ref_leaf_bh ) ;
if ( ret ) {
mlog_errno ( ret ) ;
goto out ;
}
set_len = le32_to_cpu ( rec . r_clusters ) ;
/*
* Here we may meet with 3 situations :
*
* 1. If we find an already existing record , and the length
* is the same , cool , we just need to increase the r_refcount
* and it is OK .
* 2. If we find a hole , just insert it with r_refcount = 1.
* 3. If we are in the middle of one extent record , split
* it .
*/
if ( rec . r_refcount & & le64_to_cpu ( rec . r_cpos ) = = cpos & &
set_len < = len ) {
mlog ( 0 , " increase refcount rec, start %llu, len %u, "
" count %u \n " , ( unsigned long long ) cpos , set_len ,
le32_to_cpu ( rec . r_refcount ) ) ;
ret = ocfs2_change_refcount_rec ( handle , ci ,
2009-08-18 07:44:03 +04:00
ref_leaf_bh , index ,
merge , 1 ) ;
2009-08-11 10:33:14 +04:00
if ( ret ) {
mlog_errno ( ret ) ;
goto out ;
}
} else if ( ! rec . r_refcount ) {
rec . r_refcount = cpu_to_le32 ( 1 ) ;
mlog ( 0 , " insert refcount rec, start %llu, len %u \n " ,
( unsigned long long ) le64_to_cpu ( rec . r_cpos ) ,
set_len ) ;
ret = ocfs2_insert_refcount_rec ( handle , ci , ref_root_bh ,
ref_leaf_bh ,
2009-08-18 07:44:03 +04:00
& rec , index ,
merge , meta_ac ) ;
2009-08-11 10:33:14 +04:00
if ( ret ) {
mlog_errno ( ret ) ;
goto out ;
}
} else {
set_len = min ( ( u64 ) ( cpos + len ) ,
le64_to_cpu ( rec . r_cpos ) + set_len ) - cpos ;
rec . r_cpos = cpu_to_le64 ( cpos ) ;
rec . r_clusters = cpu_to_le32 ( set_len ) ;
le32_add_cpu ( & rec . r_refcount , 1 ) ;
mlog ( 0 , " split refcount rec, start %llu, "
" len %u, count %u \n " ,
( unsigned long long ) le64_to_cpu ( rec . r_cpos ) ,
set_len , le32_to_cpu ( rec . r_refcount ) ) ;
ret = ocfs2_split_refcount_rec ( handle , ci ,
ref_root_bh , ref_leaf_bh ,
2009-08-18 07:44:03 +04:00
& rec , index , merge ,
2009-08-11 10:33:14 +04:00
meta_ac , dealloc ) ;
if ( ret ) {
mlog_errno ( ret ) ;
goto out ;
}
}
cpos + = set_len ;
len - = set_len ;
brelse ( ref_leaf_bh ) ;
ref_leaf_bh = NULL ;
}
out :
brelse ( ref_leaf_bh ) ;
return ret ;
}
2009-08-18 07:24:49 +04:00
static int ocfs2_remove_refcount_extent ( handle_t * handle ,
struct ocfs2_caching_info * ci ,
struct buffer_head * ref_root_bh ,
struct buffer_head * ref_leaf_bh ,
struct ocfs2_alloc_context * meta_ac ,
struct ocfs2_cached_dealloc_ctxt * dealloc )
{
int ret ;
struct super_block * sb = ocfs2_metadata_cache_get_super ( ci ) ;
struct ocfs2_refcount_block * rb =
( struct ocfs2_refcount_block * ) ref_leaf_bh - > b_data ;
struct ocfs2_extent_tree et ;
BUG_ON ( rb - > rf_records . rl_used ) ;
ocfs2_init_refcount_extent_tree ( & et , ci , ref_root_bh ) ;
ret = ocfs2_remove_extent ( handle , & et , le32_to_cpu ( rb - > rf_cpos ) ,
1 , meta_ac , dealloc ) ;
if ( ret ) {
mlog_errno ( ret ) ;
goto out ;
}
ocfs2_remove_from_cache ( ci , ref_leaf_bh ) ;
/*
* add the freed block to the dealloc so that it will be freed
* when we run dealloc .
*/
ret = ocfs2_cache_block_dealloc ( dealloc , EXTENT_ALLOC_SYSTEM_INODE ,
le16_to_cpu ( rb - > rf_suballoc_slot ) ,
le64_to_cpu ( rb - > rf_blkno ) ,
le16_to_cpu ( rb - > rf_suballoc_bit ) ) ;
if ( ret ) {
mlog_errno ( ret ) ;
goto out ;
}
ret = ocfs2_journal_access_rb ( handle , ci , ref_root_bh ,
OCFS2_JOURNAL_ACCESS_WRITE ) ;
if ( ret ) {
mlog_errno ( ret ) ;
goto out ;
}
rb = ( struct ocfs2_refcount_block * ) ref_root_bh - > b_data ;
le32_add_cpu ( & rb - > rf_clusters , - 1 ) ;
/*
* check whether we need to restore the root refcount block if
* there is no leaf extent block at atll .
*/
if ( ! rb - > rf_list . l_next_free_rec ) {
BUG_ON ( rb - > rf_clusters ) ;
mlog ( 0 , " reset refcount tree root %llu to be a record block. \n " ,
( unsigned long long ) ref_root_bh - > b_blocknr ) ;
rb - > rf_flags = 0 ;
rb - > rf_parent = 0 ;
rb - > rf_cpos = 0 ;
memset ( & rb - > rf_records , 0 , sb - > s_blocksize -
offsetof ( struct ocfs2_refcount_block , rf_records ) ) ;
rb - > rf_records . rl_count =
cpu_to_le16 ( ocfs2_refcount_recs_per_rb ( sb ) ) ;
}
ocfs2_journal_dirty ( handle , ref_root_bh ) ;
out :
return ret ;
}
2009-08-18 07:44:03 +04:00
int ocfs2_increase_refcount ( handle_t * handle ,
struct ocfs2_caching_info * ci ,
struct buffer_head * ref_root_bh ,
u64 cpos , u32 len ,
struct ocfs2_alloc_context * meta_ac ,
struct ocfs2_cached_dealloc_ctxt * dealloc )
{
return __ocfs2_increase_refcount ( handle , ci , ref_root_bh ,
cpos , len , 1 ,
meta_ac , dealloc ) ;
}
2009-08-18 07:24:49 +04:00
static int ocfs2_decrease_refcount_rec ( handle_t * handle ,
struct ocfs2_caching_info * ci ,
struct buffer_head * ref_root_bh ,
struct buffer_head * ref_leaf_bh ,
int index , u64 cpos , unsigned int len ,
struct ocfs2_alloc_context * meta_ac ,
struct ocfs2_cached_dealloc_ctxt * dealloc )
{
int ret ;
struct ocfs2_refcount_block * rb =
( struct ocfs2_refcount_block * ) ref_leaf_bh - > b_data ;
struct ocfs2_refcount_rec * rec = & rb - > rf_records . rl_recs [ index ] ;
BUG_ON ( cpos < le64_to_cpu ( rec - > r_cpos ) ) ;
BUG_ON ( cpos + len >
le64_to_cpu ( rec - > r_cpos ) + le32_to_cpu ( rec - > r_clusters ) ) ;
if ( cpos = = le64_to_cpu ( rec - > r_cpos ) & &
len = = le32_to_cpu ( rec - > r_clusters ) )
ret = ocfs2_change_refcount_rec ( handle , ci ,
2009-08-18 07:44:03 +04:00
ref_leaf_bh , index , 1 , - 1 ) ;
2009-08-18 07:24:49 +04:00
else {
struct ocfs2_refcount_rec split = * rec ;
split . r_cpos = cpu_to_le64 ( cpos ) ;
split . r_clusters = cpu_to_le32 ( len ) ;
le32_add_cpu ( & split . r_refcount , - 1 ) ;
mlog ( 0 , " split refcount rec, start %llu, "
" len %u, count %u, original start %llu, len %u \n " ,
( unsigned long long ) le64_to_cpu ( split . r_cpos ) ,
len , le32_to_cpu ( split . r_refcount ) ,
( unsigned long long ) le64_to_cpu ( rec - > r_cpos ) ,
le32_to_cpu ( rec - > r_clusters ) ) ;
ret = ocfs2_split_refcount_rec ( handle , ci ,
ref_root_bh , ref_leaf_bh ,
2009-08-18 07:44:03 +04:00
& split , index , 1 ,
2009-08-18 07:24:49 +04:00
meta_ac , dealloc ) ;
}
if ( ret ) {
mlog_errno ( ret ) ;
goto out ;
}
/* Remove the leaf refcount block if it contains no refcount record. */
if ( ! rb - > rf_records . rl_used & & ref_leaf_bh ! = ref_root_bh ) {
ret = ocfs2_remove_refcount_extent ( handle , ci , ref_root_bh ,
ref_leaf_bh , meta_ac ,
dealloc ) ;
if ( ret )
mlog_errno ( ret ) ;
}
out :
return ret ;
}
static int __ocfs2_decrease_refcount ( handle_t * handle ,
struct ocfs2_caching_info * ci ,
struct buffer_head * ref_root_bh ,
u64 cpos , u32 len ,
struct ocfs2_alloc_context * meta_ac ,
2009-08-18 07:30:55 +04:00
struct ocfs2_cached_dealloc_ctxt * dealloc ,
int delete )
2009-08-18 07:24:49 +04:00
{
int ret = 0 , index = 0 ;
struct ocfs2_refcount_rec rec ;
unsigned int r_count = 0 , r_len ;
struct super_block * sb = ocfs2_metadata_cache_get_super ( ci ) ;
struct buffer_head * ref_leaf_bh = NULL ;
2009-08-18 07:30:55 +04:00
mlog ( 0 , " Tree owner %llu, decrease refcount start %llu, "
" len %u, delete %u \n " ,
2009-08-18 07:24:49 +04:00
( unsigned long long ) ocfs2_metadata_cache_owner ( ci ) ,
2009-08-18 07:30:55 +04:00
( unsigned long long ) cpos , len , delete ) ;
2009-08-18 07:24:49 +04:00
while ( len ) {
ret = ocfs2_get_refcount_rec ( ci , ref_root_bh ,
cpos , len , & rec , & index ,
& ref_leaf_bh ) ;
if ( ret ) {
mlog_errno ( ret ) ;
goto out ;
}
r_count = le32_to_cpu ( rec . r_refcount ) ;
BUG_ON ( r_count = = 0 ) ;
2009-08-18 07:30:55 +04:00
if ( ! delete )
BUG_ON ( r_count > 1 ) ;
2009-08-18 07:24:49 +04:00
r_len = min ( ( u64 ) ( cpos + len ) , le64_to_cpu ( rec . r_cpos ) +
le32_to_cpu ( rec . r_clusters ) ) - cpos ;
ret = ocfs2_decrease_refcount_rec ( handle , ci , ref_root_bh ,
ref_leaf_bh , index ,
cpos , r_len ,
meta_ac , dealloc ) ;
if ( ret ) {
mlog_errno ( ret ) ;
goto out ;
}
2009-08-18 07:30:55 +04:00
if ( le32_to_cpu ( rec . r_refcount ) = = 1 & & delete ) {
2009-08-18 07:24:49 +04:00
ret = ocfs2_cache_cluster_dealloc ( dealloc ,
ocfs2_clusters_to_blocks ( sb , cpos ) ,
r_len ) ;
if ( ret ) {
mlog_errno ( ret ) ;
goto out ;
}
}
cpos + = r_len ;
len - = r_len ;
brelse ( ref_leaf_bh ) ;
ref_leaf_bh = NULL ;
}
out :
brelse ( ref_leaf_bh ) ;
return ret ;
}
/* Caller must hold refcount tree lock. */
int ocfs2_decrease_refcount ( struct inode * inode ,
handle_t * handle , u32 cpos , u32 len ,
struct ocfs2_alloc_context * meta_ac ,
2009-08-18 07:30:55 +04:00
struct ocfs2_cached_dealloc_ctxt * dealloc ,
int delete )
2009-08-18 07:24:49 +04:00
{
int ret ;
u64 ref_blkno ;
struct ocfs2_inode_info * oi = OCFS2_I ( inode ) ;
struct buffer_head * ref_root_bh = NULL ;
struct ocfs2_refcount_tree * tree ;
BUG_ON ( ! ( oi - > ip_dyn_features & OCFS2_HAS_REFCOUNT_FL ) ) ;
ret = ocfs2_get_refcount_block ( inode , & ref_blkno ) ;
if ( ret ) {
mlog_errno ( ret ) ;
goto out ;
}
ret = ocfs2_get_refcount_tree ( OCFS2_SB ( inode - > i_sb ) , ref_blkno , & tree ) ;
if ( ret ) {
mlog_errno ( ret ) ;
goto out ;
}
ret = ocfs2_read_refcount_block ( & tree - > rf_ci , tree - > rf_blkno ,
& ref_root_bh ) ;
if ( ret ) {
mlog_errno ( ret ) ;
goto out ;
}
ret = __ocfs2_decrease_refcount ( handle , & tree - > rf_ci , ref_root_bh ,
2009-08-18 07:30:55 +04:00
cpos , len , meta_ac , dealloc , delete ) ;
2009-08-18 07:24:49 +04:00
if ( ret )
mlog_errno ( ret ) ;
out :
brelse ( ref_root_bh ) ;
return ret ;
}
2009-08-18 07:28:39 +04:00
/*
* Mark the already - existing extent at cpos as refcounted for len clusters .
* This adds the refcount extent flag .
*
* If the existing extent is larger than the request , initiate a
* split . An attempt will be made at merging with adjacent extents .
*
* The caller is responsible for passing down meta_ac if we ' ll need it .
*/
static int ocfs2_mark_extent_refcounted ( struct inode * inode ,
struct ocfs2_extent_tree * et ,
handle_t * handle , u32 cpos ,
u32 len , u32 phys ,
struct ocfs2_alloc_context * meta_ac ,
struct ocfs2_cached_dealloc_ctxt * dealloc )
{
int ret ;
mlog ( 0 , " Inode %lu refcount tree cpos %u, len %u, phys cluster %u \n " ,
inode - > i_ino , cpos , len , phys ) ;
if ( ! ocfs2_refcount_tree ( OCFS2_SB ( inode - > i_sb ) ) ) {
ocfs2_error ( inode - > i_sb , " Inode %lu want to use refcount "
" tree, but the feature bit is not set in the "
" super block. " , inode - > i_ino ) ;
ret = - EROFS ;
goto out ;
}
ret = ocfs2_change_extent_flag ( handle , et , cpos ,
len , phys , meta_ac , dealloc ,
OCFS2_EXT_REFCOUNTED , 0 ) ;
if ( ret )
mlog_errno ( ret ) ;
out :
return ret ;
}
2009-08-18 07:29:12 +04:00
/*
* Given some contiguous physical clusters , calculate what we need
* for modifying their refcount .
*/
static int ocfs2_calc_refcount_meta_credits ( struct super_block * sb ,
struct ocfs2_caching_info * ci ,
struct buffer_head * ref_root_bh ,
u64 start_cpos ,
u32 clusters ,
int * meta_add ,
int * credits )
{
int ret = 0 , index , ref_blocks = 0 , recs_add = 0 ;
u64 cpos = start_cpos ;
struct ocfs2_refcount_block * rb ;
struct ocfs2_refcount_rec rec ;
struct buffer_head * ref_leaf_bh = NULL , * prev_bh = NULL ;
u32 len ;
mlog ( 0 , " start_cpos %llu, clusters %u \n " ,
( unsigned long long ) start_cpos , clusters ) ;
while ( clusters ) {
ret = ocfs2_get_refcount_rec ( ci , ref_root_bh ,
cpos , clusters , & rec ,
& index , & ref_leaf_bh ) ;
if ( ret ) {
mlog_errno ( ret ) ;
goto out ;
}
if ( ref_leaf_bh ! = prev_bh ) {
/*
* Now we encounter a new leaf block , so calculate
* whether we need to extend the old leaf .
*/
if ( prev_bh ) {
rb = ( struct ocfs2_refcount_block * )
prev_bh - > b_data ;
if ( le64_to_cpu ( rb - > rf_records . rl_used ) +
recs_add >
le16_to_cpu ( rb - > rf_records . rl_count ) )
ref_blocks + + ;
}
recs_add = 0 ;
* credits + = 1 ;
brelse ( prev_bh ) ;
prev_bh = ref_leaf_bh ;
get_bh ( prev_bh ) ;
}
rb = ( struct ocfs2_refcount_block * ) ref_leaf_bh - > b_data ;
mlog ( 0 , " recs_add %d,cpos %llu, clusters %u, rec->r_cpos %llu, "
" rec->r_clusters %u, rec->r_refcount %u, index %d \n " ,
recs_add , ( unsigned long long ) cpos , clusters ,
( unsigned long long ) le64_to_cpu ( rec . r_cpos ) ,
le32_to_cpu ( rec . r_clusters ) ,
le32_to_cpu ( rec . r_refcount ) , index ) ;
len = min ( ( u64 ) cpos + clusters , le64_to_cpu ( rec . r_cpos ) +
le32_to_cpu ( rec . r_clusters ) ) - cpos ;
/*
* If the refcount rec already exist , cool . We just need
* to check whether there is a split . Otherwise we just need
* to increase the refcount .
* If we will insert one , increases recs_add .
*
* We record all the records which will be inserted to the
* same refcount block , so that we can tell exactly whether
* we need a new refcount block or not .
*/
if ( rec . r_refcount ) {
/* Check whether we need a split at the beginning. */
if ( cpos = = start_cpos & &
cpos ! = le64_to_cpu ( rec . r_cpos ) )
recs_add + + ;
/* Check whether we need a split in the end. */
if ( cpos + clusters < le64_to_cpu ( rec . r_cpos ) +
le32_to_cpu ( rec . r_clusters ) )
recs_add + + ;
} else
recs_add + + ;
brelse ( ref_leaf_bh ) ;
ref_leaf_bh = NULL ;
clusters - = len ;
cpos + = len ;
}
if ( prev_bh ) {
rb = ( struct ocfs2_refcount_block * ) prev_bh - > b_data ;
if ( le64_to_cpu ( rb - > rf_records . rl_used ) + recs_add >
le16_to_cpu ( rb - > rf_records . rl_count ) )
ref_blocks + + ;
* credits + = 1 ;
}
if ( ! ref_blocks )
goto out ;
mlog ( 0 , " we need ref_blocks %d \n " , ref_blocks ) ;
* meta_add + = ref_blocks ;
* credits + = ref_blocks ;
/*
* So we may need ref_blocks to insert into the tree .
* That also means we need to change the b - tree and add that number
* of records since we never merge them .
* We need one more block for expansion since the new created leaf
* block is also full and needs split .
*/
rb = ( struct ocfs2_refcount_block * ) ref_root_bh - > b_data ;
if ( le32_to_cpu ( rb - > rf_flags ) & OCFS2_REFCOUNT_TREE_FL ) {
struct ocfs2_extent_tree et ;
ocfs2_init_refcount_extent_tree ( & et , ci , ref_root_bh ) ;
* meta_add + = ocfs2_extend_meta_needed ( et . et_root_el ) ;
* credits + = ocfs2_calc_extend_credits ( sb ,
et . et_root_el ,
ref_blocks ) ;
} else {
* credits + = OCFS2_EXPAND_REFCOUNT_TREE_CREDITS ;
* meta_add + = 1 ;
}
out :
brelse ( ref_leaf_bh ) ;
brelse ( prev_bh ) ;
return ret ;
}
/*
* For refcount tree , we will decrease some contiguous clusters
* refcount count , so just go through it to see how many blocks
* we gonna touch and whether we need to create new blocks .
*
* Normally the refcount blocks store these refcount should be
tree-wide: fix assorted typos all over the place
That is "success", "unknown", "through", "performance", "[re|un]mapping"
, "access", "default", "reasonable", "[con]currently", "temperature"
, "channel", "[un]used", "application", "example","hierarchy", "therefore"
, "[over|under]flow", "contiguous", "threshold", "enough" and others.
Signed-off-by: André Goddard Rosa <andre.goddard@gmail.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
2009-11-14 18:09:05 +03:00
* contiguous also , so that we can get the number easily .
2009-08-18 07:29:12 +04:00
* As for meta_ac , we will at most add split 2 refcount record and
* 2 more refcount block , so just check it in a rough way .
*
* Caller must hold refcount tree lock .
*/
int ocfs2_prepare_refcount_change_for_del ( struct inode * inode ,
struct buffer_head * di_bh ,
u64 phys_blkno ,
u32 clusters ,
int * credits ,
struct ocfs2_alloc_context * * meta_ac )
{
int ret , ref_blocks = 0 ;
struct ocfs2_dinode * di = ( struct ocfs2_dinode * ) di_bh - > b_data ;
struct ocfs2_inode_info * oi = OCFS2_I ( inode ) ;
struct buffer_head * ref_root_bh = NULL ;
struct ocfs2_refcount_tree * tree ;
u64 start_cpos = ocfs2_blocks_to_clusters ( inode - > i_sb , phys_blkno ) ;
if ( ! ocfs2_refcount_tree ( OCFS2_SB ( inode - > i_sb ) ) ) {
ocfs2_error ( inode - > i_sb , " Inode %lu want to use refcount "
" tree, but the feature bit is not set in the "
" super block. " , inode - > i_ino ) ;
ret = - EROFS ;
goto out ;
}
BUG_ON ( ! ( oi - > ip_dyn_features & OCFS2_HAS_REFCOUNT_FL ) ) ;
ret = ocfs2_get_refcount_tree ( OCFS2_SB ( inode - > i_sb ) ,
le64_to_cpu ( di - > i_refcount_loc ) , & tree ) ;
if ( ret ) {
mlog_errno ( ret ) ;
goto out ;
}
ret = ocfs2_read_refcount_block ( & tree - > rf_ci ,
le64_to_cpu ( di - > i_refcount_loc ) ,
& ref_root_bh ) ;
if ( ret ) {
mlog_errno ( ret ) ;
goto out ;
}
ret = ocfs2_calc_refcount_meta_credits ( inode - > i_sb ,
& tree - > rf_ci ,
ref_root_bh ,
start_cpos , clusters ,
& ref_blocks , credits ) ;
if ( ret ) {
mlog_errno ( ret ) ;
goto out ;
}
mlog ( 0 , " reserve new metadata %d, credits = %d \n " ,
ref_blocks , * credits ) ;
if ( ref_blocks ) {
ret = ocfs2_reserve_new_metadata_blocks ( OCFS2_SB ( inode - > i_sb ) ,
ref_blocks , meta_ac ) ;
if ( ret )
mlog_errno ( ret ) ;
}
out :
brelse ( ref_root_bh ) ;
return ret ;
}
2009-08-25 04:05:12 +04:00
# define MAX_CONTIG_BYTES 1048576
static inline unsigned int ocfs2_cow_contig_clusters ( struct super_block * sb )
{
return ocfs2_clusters_for_bytes ( sb , MAX_CONTIG_BYTES ) ;
}
static inline unsigned int ocfs2_cow_contig_mask ( struct super_block * sb )
{
return ~ ( ocfs2_cow_contig_clusters ( sb ) - 1 ) ;
}
/*
* Given an extent that starts at ' start ' and an I / O that starts at ' cpos ' ,
* find an offset ( start + ( n * contig_clusters ) ) that is closest to cpos
* while still being less than or equal to it .
*
* The goal is to break the extent at a multiple of contig_clusters .
*/
static inline unsigned int ocfs2_cow_align_start ( struct super_block * sb ,
unsigned int start ,
unsigned int cpos )
{
BUG_ON ( start > cpos ) ;
return start + ( ( cpos - start ) & ocfs2_cow_contig_mask ( sb ) ) ;
}
/*
* Given a cluster count of len , pad it out so that it is a multiple
* of contig_clusters .
*/
static inline unsigned int ocfs2_cow_align_length ( struct super_block * sb ,
unsigned int len )
{
unsigned int padded =
( len + ( ocfs2_cow_contig_clusters ( sb ) - 1 ) ) &
ocfs2_cow_contig_mask ( sb ) ;
/* Did we wrap? */
if ( padded < len )
padded = UINT_MAX ;
return padded ;
}
/*
* Calculate out the start and number of virtual clusters we need to to CoW .
*
* cpos is vitual start cluster position we want to do CoW in a
* file and write_len is the cluster length .
2009-08-26 05:47:28 +04:00
* max_cpos is the place where we want to stop CoW intentionally .
2009-08-25 04:05:12 +04:00
*
* Normal we will start CoW from the beginning of extent record cotaining cpos .
* We try to break up extents on boundaries of MAX_CONTIG_BYTES so that we
* get good I / O from the resulting extent tree .
*/
static int ocfs2_refcount_cal_cow_clusters ( struct inode * inode ,
2009-08-24 10:31:03 +04:00
struct ocfs2_extent_list * el ,
2009-08-25 04:05:12 +04:00
u32 cpos ,
u32 write_len ,
2009-08-26 05:47:28 +04:00
u32 max_cpos ,
2009-08-25 04:05:12 +04:00
u32 * cow_start ,
u32 * cow_len )
{
int ret = 0 ;
int tree_height = le16_to_cpu ( el - > l_tree_depth ) , i ;
struct buffer_head * eb_bh = NULL ;
struct ocfs2_extent_block * eb = NULL ;
struct ocfs2_extent_rec * rec ;
unsigned int want_clusters , rec_end = 0 ;
int contig_clusters = ocfs2_cow_contig_clusters ( inode - > i_sb ) ;
int leaf_clusters ;
2009-08-26 05:47:28 +04:00
BUG_ON ( cpos + write_len > max_cpos ) ;
2009-08-25 04:05:12 +04:00
if ( tree_height > 0 ) {
ret = ocfs2_find_leaf ( INODE_CACHE ( inode ) , el , cpos , & eb_bh ) ;
if ( ret ) {
mlog_errno ( ret ) ;
goto out ;
}
eb = ( struct ocfs2_extent_block * ) eb_bh - > b_data ;
el = & eb - > h_list ;
if ( el - > l_tree_depth ) {
ocfs2_error ( inode - > i_sb ,
" Inode %lu has non zero tree depth in "
" leaf block %llu \n " , inode - > i_ino ,
( unsigned long long ) eb_bh - > b_blocknr ) ;
ret = - EROFS ;
goto out ;
}
}
* cow_len = 0 ;
for ( i = 0 ; i < le16_to_cpu ( el - > l_next_free_rec ) ; i + + ) {
rec = & el - > l_recs [ i ] ;
if ( ocfs2_is_empty_extent ( rec ) ) {
mlog_bug_on_msg ( i ! = 0 , " Inode %lu has empty record in "
" index %d \n " , inode - > i_ino , i ) ;
continue ;
}
if ( le32_to_cpu ( rec - > e_cpos ) +
le16_to_cpu ( rec - > e_leaf_clusters ) < = cpos )
continue ;
if ( * cow_len = = 0 ) {
/*
* We should find a refcounted record in the
* first pass .
*/
BUG_ON ( ! ( rec - > e_flags & OCFS2_EXT_REFCOUNTED ) ) ;
* cow_start = le32_to_cpu ( rec - > e_cpos ) ;
}
/*
2009-08-26 05:47:28 +04:00
* If we encounter a hole , a non - refcounted record or
* pass the max_cpos , stop the search .
2009-08-25 04:05:12 +04:00
*/
if ( ( ! ( rec - > e_flags & OCFS2_EXT_REFCOUNTED ) ) | |
2009-08-26 05:47:28 +04:00
( * cow_len & & rec_end ! = le32_to_cpu ( rec - > e_cpos ) ) | |
( max_cpos < = le32_to_cpu ( rec - > e_cpos ) ) )
2009-08-25 04:05:12 +04:00
break ;
leaf_clusters = le16_to_cpu ( rec - > e_leaf_clusters ) ;
rec_end = le32_to_cpu ( rec - > e_cpos ) + leaf_clusters ;
2009-08-26 05:47:28 +04:00
if ( rec_end > max_cpos ) {
rec_end = max_cpos ;
leaf_clusters = rec_end - le32_to_cpu ( rec - > e_cpos ) ;
}
2009-08-25 04:05:12 +04:00
/*
* How many clusters do we actually need from
* this extent ? First we see how many we actually
* need to complete the write . If that ' s smaller
* than contig_clusters , we try for contig_clusters .
*/
if ( ! * cow_len )
want_clusters = write_len ;
else
want_clusters = ( cpos + write_len ) -
( * cow_start + * cow_len ) ;
if ( want_clusters < contig_clusters )
want_clusters = contig_clusters ;
/*
* If the write does not cover the whole extent , we
* need to calculate how we ' re going to split the extent .
* We try to do it on contig_clusters boundaries .
*
* Any extent smaller than contig_clusters will be
* CoWed in its entirety .
*/
if ( leaf_clusters < = contig_clusters )
* cow_len + = leaf_clusters ;
else if ( * cow_len | | ( * cow_start = = cpos ) ) {
/*
* This extent needs to be CoW ' d from its
* beginning , so all we have to do is compute
* how many clusters to grab . We align
* want_clusters to the edge of contig_clusters
* to get better I / O .
*/
want_clusters = ocfs2_cow_align_length ( inode - > i_sb ,
want_clusters ) ;
if ( leaf_clusters < want_clusters )
* cow_len + = leaf_clusters ;
else
* cow_len + = want_clusters ;
} else if ( ( * cow_start + contig_clusters ) > =
( cpos + write_len ) ) {
/*
* Breaking off contig_clusters at the front
* of the extent will cover our write . That ' s
* easy .
*/
* cow_len = contig_clusters ;
} else if ( ( rec_end - cpos ) < = contig_clusters ) {
/*
* Breaking off contig_clusters at the tail of
* this extent will cover cpos .
*/
* cow_start = rec_end - contig_clusters ;
* cow_len = contig_clusters ;
} else if ( ( rec_end - cpos ) < = want_clusters ) {
/*
* While we can ' t fit the entire write in this
* extent , we know that the write goes from cpos
* to the end of the extent . Break that off .
* We try to break it at some multiple of
* contig_clusters from the front of the extent .
* Failing that ( ie , cpos is within
* contig_clusters of the front ) , we ' ll CoW the
* entire extent .
*/
* cow_start = ocfs2_cow_align_start ( inode - > i_sb ,
* cow_start , cpos ) ;
* cow_len = rec_end - * cow_start ;
} else {
/*
* Ok , the entire write lives in the middle of
* this extent . Let ' s try to slice the extent up
* nicely . Optimally , our CoW region starts at
* m * contig_clusters from the beginning of the
* extent and goes for n * contig_clusters ,
* covering the entire write .
*/
* cow_start = ocfs2_cow_align_start ( inode - > i_sb ,
* cow_start , cpos ) ;
want_clusters = ( cpos + write_len ) - * cow_start ;
want_clusters = ocfs2_cow_align_length ( inode - > i_sb ,
want_clusters ) ;
if ( * cow_start + want_clusters < = rec_end )
* cow_len = want_clusters ;
else
* cow_len = rec_end - * cow_start ;
}
/* Have we covered our entire write yet? */
if ( ( * cow_start + * cow_len ) > = ( cpos + write_len ) )
break ;
/*
* If we reach the end of the extent block and don ' t get enough
* clusters , continue with the next extent block if possible .
*/
if ( i + 1 = = le16_to_cpu ( el - > l_next_free_rec ) & &
eb & & eb - > h_next_leaf_blk ) {
brelse ( eb_bh ) ;
eb_bh = NULL ;
ret = ocfs2_read_extent_block ( INODE_CACHE ( inode ) ,
le64_to_cpu ( eb - > h_next_leaf_blk ) ,
& eb_bh ) ;
if ( ret ) {
mlog_errno ( ret ) ;
goto out ;
}
eb = ( struct ocfs2_extent_block * ) eb_bh - > b_data ;
el = & eb - > h_list ;
i = - 1 ;
}
}
out :
brelse ( eb_bh ) ;
return ret ;
}
/*
* Prepare meta_ac , data_ac and calculate credits when we want to add some
* num_clusters in data_tree " et " and change the refcount for the old
* clusters ( starting form p_cluster ) in the refcount tree .
*
* Note :
* 1. since we may split the old tree , so we at most will need num_clusters + 2
* more new leaf records .
* 2. In some case , we may not need to reserve new clusters ( e . g , reflink ) , so
* just give data_ac = NULL .
*/
static int ocfs2_lock_refcount_allocators ( struct super_block * sb ,
u32 p_cluster , u32 num_clusters ,
struct ocfs2_extent_tree * et ,
struct ocfs2_caching_info * ref_ci ,
struct buffer_head * ref_root_bh ,
struct ocfs2_alloc_context * * meta_ac ,
struct ocfs2_alloc_context * * data_ac ,
int * credits )
{
int ret = 0 , meta_add = 0 ;
int num_free_extents = ocfs2_num_free_extents ( OCFS2_SB ( sb ) , et ) ;
if ( num_free_extents < 0 ) {
ret = num_free_extents ;
mlog_errno ( ret ) ;
goto out ;
}
if ( num_free_extents < num_clusters + 2 )
meta_add =
ocfs2_extend_meta_needed ( et - > et_root_el ) ;
* credits + = ocfs2_calc_extend_credits ( sb , et - > et_root_el ,
num_clusters + 2 ) ;
ret = ocfs2_calc_refcount_meta_credits ( sb , ref_ci , ref_root_bh ,
p_cluster , num_clusters ,
& meta_add , credits ) ;
if ( ret ) {
mlog_errno ( ret ) ;
goto out ;
}
mlog ( 0 , " reserve new metadata %d, clusters %u, credits = %d \n " ,
meta_add , num_clusters , * credits ) ;
ret = ocfs2_reserve_new_metadata_blocks ( OCFS2_SB ( sb ) , meta_add ,
meta_ac ) ;
if ( ret ) {
mlog_errno ( ret ) ;
goto out ;
}
if ( data_ac ) {
ret = ocfs2_reserve_clusters ( OCFS2_SB ( sb ) , num_clusters ,
data_ac ) ;
if ( ret )
mlog_errno ( ret ) ;
}
out :
if ( ret ) {
if ( * meta_ac ) {
ocfs2_free_alloc_context ( * meta_ac ) ;
* meta_ac = NULL ;
}
}
return ret ;
}
static int ocfs2_clear_cow_buffer ( handle_t * handle , struct buffer_head * bh )
{
BUG_ON ( buffer_dirty ( bh ) ) ;
clear_buffer_mapped ( bh ) ;
return 0 ;
}
2009-08-24 10:31:03 +04:00
static int ocfs2_duplicate_clusters_by_page ( handle_t * handle ,
struct ocfs2_cow_context * context ,
u32 cpos , u32 old_cluster ,
u32 new_cluster , u32 new_len )
2009-08-25 04:05:12 +04:00
{
int ret = 0 , partial ;
2009-08-24 10:31:03 +04:00
struct ocfs2_caching_info * ci = context - > data_et . et_ci ;
2009-08-25 04:05:12 +04:00
struct super_block * sb = ocfs2_metadata_cache_get_super ( ci ) ;
u64 new_block = ocfs2_clusters_to_blocks ( sb , new_cluster ) ;
struct page * page ;
pgoff_t page_index ;
unsigned int from , to ;
loff_t offset , end , map_end ;
struct address_space * mapping = context - > inode - > i_mapping ;
mlog ( 0 , " old_cluster %u, new %u, len %u at offset %u \n " , old_cluster ,
new_cluster , new_len , cpos ) ;
offset = ( ( loff_t ) cpos ) < < OCFS2_SB ( sb ) - > s_clustersize_bits ;
end = offset + ( new_len < < OCFS2_SB ( sb ) - > s_clustersize_bits ) ;
while ( offset < end ) {
page_index = offset > > PAGE_CACHE_SHIFT ;
2010-01-30 18:32:19 +03:00
map_end = ( ( loff_t ) page_index + 1 ) < < PAGE_CACHE_SHIFT ;
2009-08-25 04:05:12 +04:00
if ( map_end > end )
map_end = end ;
/* from, to is the offset within the page. */
from = offset & ( PAGE_CACHE_SIZE - 1 ) ;
to = PAGE_CACHE_SIZE ;
if ( map_end & ( PAGE_CACHE_SIZE - 1 ) )
to = map_end & ( PAGE_CACHE_SIZE - 1 ) ;
page = grab_cache_page ( mapping , page_index ) ;
2010-02-01 12:05:33 +03:00
/*
* In case PAGE_CACHE_SIZE < = CLUSTER_SIZE , This page
* can ' t be dirtied before we CoW it out .
*/
if ( PAGE_CACHE_SIZE < = OCFS2_SB ( sb ) - > s_clustersize )
BUG_ON ( PageDirty ( page ) ) ;
2009-08-25 04:05:12 +04:00
if ( ! PageUptodate ( page ) ) {
ret = block_read_full_page ( page , ocfs2_get_block ) ;
if ( ret ) {
mlog_errno ( ret ) ;
goto unlock ;
}
lock_page ( page ) ;
}
if ( page_has_buffers ( page ) ) {
ret = walk_page_buffers ( handle , page_buffers ( page ) ,
from , to , & partial ,
ocfs2_clear_cow_buffer ) ;
if ( ret ) {
mlog_errno ( ret ) ;
goto unlock ;
}
}
ocfs2_map_and_dirty_page ( context - > inode ,
handle , from , to ,
page , 0 , & new_block ) ;
mark_page_accessed ( page ) ;
unlock :
unlock_page ( page ) ;
page_cache_release ( page ) ;
page = NULL ;
offset = map_end ;
if ( ret )
break ;
}
return ret ;
}
2009-08-18 07:43:17 +04:00
static int ocfs2_duplicate_clusters_by_jbd ( handle_t * handle ,
struct ocfs2_cow_context * context ,
u32 cpos , u32 old_cluster ,
u32 new_cluster , u32 new_len )
{
int ret = 0 ;
struct super_block * sb = context - > inode - > i_sb ;
struct ocfs2_caching_info * ci = context - > data_et . et_ci ;
int i , blocks = ocfs2_clusters_to_blocks ( sb , new_len ) ;
u64 old_block = ocfs2_clusters_to_blocks ( sb , old_cluster ) ;
u64 new_block = ocfs2_clusters_to_blocks ( sb , new_cluster ) ;
struct ocfs2_super * osb = OCFS2_SB ( sb ) ;
struct buffer_head * old_bh = NULL ;
struct buffer_head * new_bh = NULL ;
mlog ( 0 , " old_cluster %u, new %u, len %u \n " , old_cluster ,
new_cluster , new_len ) ;
for ( i = 0 ; i < blocks ; i + + , old_block + + , new_block + + ) {
new_bh = sb_getblk ( osb - > sb , new_block ) ;
if ( new_bh = = NULL ) {
ret = - EIO ;
mlog_errno ( ret ) ;
break ;
}
ocfs2_set_new_buffer_uptodate ( ci , new_bh ) ;
ret = ocfs2_read_block ( ci , old_block , & old_bh , NULL ) ;
if ( ret ) {
mlog_errno ( ret ) ;
break ;
}
ret = ocfs2_journal_access ( handle , ci , new_bh ,
OCFS2_JOURNAL_ACCESS_CREATE ) ;
if ( ret ) {
mlog_errno ( ret ) ;
break ;
}
memcpy ( new_bh - > b_data , old_bh - > b_data , sb - > s_blocksize ) ;
ret = ocfs2_journal_dirty ( handle , new_bh ) ;
if ( ret ) {
mlog_errno ( ret ) ;
break ;
}
brelse ( new_bh ) ;
brelse ( old_bh ) ;
new_bh = NULL ;
old_bh = NULL ;
}
brelse ( new_bh ) ;
brelse ( old_bh ) ;
return ret ;
}
2009-08-25 04:05:12 +04:00
static int ocfs2_clear_ext_refcount ( handle_t * handle ,
struct ocfs2_extent_tree * et ,
u32 cpos , u32 p_cluster , u32 len ,
unsigned int ext_flags ,
struct ocfs2_alloc_context * meta_ac ,
struct ocfs2_cached_dealloc_ctxt * dealloc )
{
int ret , index ;
struct ocfs2_extent_rec replace_rec ;
struct ocfs2_path * path = NULL ;
struct ocfs2_extent_list * el ;
struct super_block * sb = ocfs2_metadata_cache_get_super ( et - > et_ci ) ;
u64 ino = ocfs2_metadata_cache_owner ( et - > et_ci ) ;
mlog ( 0 , " inode %llu cpos %u, len %u, p_cluster %u, ext_flags %u \n " ,
( unsigned long long ) ino , cpos , len , p_cluster , ext_flags ) ;
memset ( & replace_rec , 0 , sizeof ( replace_rec ) ) ;
replace_rec . e_cpos = cpu_to_le32 ( cpos ) ;
replace_rec . e_leaf_clusters = cpu_to_le16 ( len ) ;
replace_rec . e_blkno = cpu_to_le64 ( ocfs2_clusters_to_blocks ( sb ,
p_cluster ) ) ;
replace_rec . e_flags = ext_flags ;
replace_rec . e_flags & = ~ OCFS2_EXT_REFCOUNTED ;
path = ocfs2_new_path_from_et ( et ) ;
if ( ! path ) {
ret = - ENOMEM ;
mlog_errno ( ret ) ;
goto out ;
}
ret = ocfs2_find_path ( et - > et_ci , path , cpos ) ;
if ( ret ) {
mlog_errno ( ret ) ;
goto out ;
}
el = path_leaf_el ( path ) ;
index = ocfs2_search_extent_list ( el , cpos ) ;
if ( index = = - 1 | | index > = le16_to_cpu ( el - > l_next_free_rec ) ) {
ocfs2_error ( sb ,
" Inode %llu has an extent at cpos %u which can no "
" longer be found. \n " ,
( unsigned long long ) ino , cpos ) ;
ret = - EROFS ;
goto out ;
}
ret = ocfs2_split_extent ( handle , et , path , index ,
& replace_rec , meta_ac , dealloc ) ;
if ( ret )
mlog_errno ( ret ) ;
out :
ocfs2_free_path ( path ) ;
return ret ;
}
static int ocfs2_replace_clusters ( handle_t * handle ,
struct ocfs2_cow_context * context ,
u32 cpos , u32 old ,
u32 new , u32 len ,
unsigned int ext_flags )
{
int ret ;
2009-08-24 10:31:03 +04:00
struct ocfs2_caching_info * ci = context - > data_et . et_ci ;
2009-08-25 04:05:12 +04:00
u64 ino = ocfs2_metadata_cache_owner ( ci ) ;
mlog ( 0 , " inode %llu, cpos %u, old %u, new %u, len %u, ext_flags %u \n " ,
( unsigned long long ) ino , cpos , old , new , len , ext_flags ) ;
/*If the old clusters is unwritten, no need to duplicate. */
if ( ! ( ext_flags & OCFS2_EXT_UNWRITTEN ) ) {
2009-08-24 10:31:03 +04:00
ret = context - > cow_duplicate_clusters ( handle , context , cpos ,
old , new , len ) ;
2009-08-25 04:05:12 +04:00
if ( ret ) {
mlog_errno ( ret ) ;
goto out ;
}
}
2009-08-24 10:31:03 +04:00
ret = ocfs2_clear_ext_refcount ( handle , & context - > data_et ,
2009-08-25 04:05:12 +04:00
cpos , new , len , ext_flags ,
context - > meta_ac , & context - > dealloc ) ;
if ( ret )
mlog_errno ( ret ) ;
out :
return ret ;
}
static int ocfs2_cow_sync_writeback ( struct super_block * sb ,
struct ocfs2_cow_context * context ,
u32 cpos , u32 num_clusters )
{
int ret = 0 ;
loff_t offset , end , map_end ;
pgoff_t page_index ;
struct page * page ;
if ( ocfs2_should_order_data ( context - > inode ) )
return 0 ;
offset = ( ( loff_t ) cpos ) < < OCFS2_SB ( sb ) - > s_clustersize_bits ;
end = offset + ( num_clusters < < OCFS2_SB ( sb ) - > s_clustersize_bits ) ;
ret = filemap_fdatawrite_range ( context - > inode - > i_mapping ,
offset , end - 1 ) ;
if ( ret < 0 ) {
mlog_errno ( ret ) ;
return ret ;
}
while ( offset < end ) {
page_index = offset > > PAGE_CACHE_SHIFT ;
2010-01-30 18:32:19 +03:00
map_end = ( ( loff_t ) page_index + 1 ) < < PAGE_CACHE_SHIFT ;
2009-08-25 04:05:12 +04:00
if ( map_end > end )
map_end = end ;
page = grab_cache_page ( context - > inode - > i_mapping , page_index ) ;
BUG_ON ( ! page ) ;
wait_on_page_writeback ( page ) ;
if ( PageError ( page ) ) {
ret = - EIO ;
mlog_errno ( ret ) ;
} else
mark_page_accessed ( page ) ;
unlock_page ( page ) ;
page_cache_release ( page ) ;
page = NULL ;
offset = map_end ;
if ( ret )
break ;
}
return ret ;
}
2009-08-24 10:31:03 +04:00
static int ocfs2_di_get_clusters ( struct ocfs2_cow_context * context ,
u32 v_cluster , u32 * p_cluster ,
u32 * num_clusters ,
unsigned int * extent_flags )
{
return ocfs2_get_clusters ( context - > inode , v_cluster , p_cluster ,
num_clusters , extent_flags ) ;
}
2009-08-25 04:05:12 +04:00
static int ocfs2_make_clusters_writable ( struct super_block * sb ,
struct ocfs2_cow_context * context ,
u32 cpos , u32 p_cluster ,
u32 num_clusters , unsigned int e_flags )
{
2009-08-18 07:30:55 +04:00
int ret , delete , index , credits = 0 ;
2009-08-25 04:05:12 +04:00
u32 new_bit , new_len ;
2009-08-18 07:30:55 +04:00
unsigned int set_len ;
2009-08-25 04:05:12 +04:00
struct ocfs2_super * osb = OCFS2_SB ( sb ) ;
handle_t * handle ;
2009-08-18 07:30:55 +04:00
struct buffer_head * ref_leaf_bh = NULL ;
2009-08-24 10:31:03 +04:00
struct ocfs2_caching_info * ref_ci = & context - > ref_tree - > rf_ci ;
2009-08-18 07:30:55 +04:00
struct ocfs2_refcount_rec rec ;
mlog ( 0 , " cpos %u, p_cluster %u, num_clusters %u, e_flags %u \n " ,
cpos , p_cluster , num_clusters , e_flags ) ;
2009-08-25 04:05:12 +04:00
ret = ocfs2_lock_refcount_allocators ( sb , p_cluster , num_clusters ,
2009-08-24 10:31:03 +04:00
& context - > data_et ,
ref_ci ,
2009-08-25 04:05:12 +04:00
context - > ref_root_bh ,
& context - > meta_ac ,
& context - > data_ac , & credits ) ;
if ( ret ) {
mlog_errno ( ret ) ;
return ret ;
}
2009-08-18 07:43:17 +04:00
if ( context - > post_refcount )
credits + = context - > post_refcount - > credits ;
credits + = context - > extra_credits ;
2009-08-25 04:05:12 +04:00
handle = ocfs2_start_trans ( osb , credits ) ;
if ( IS_ERR ( handle ) ) {
ret = PTR_ERR ( handle ) ;
mlog_errno ( ret ) ;
goto out ;
}
while ( num_clusters ) {
2009-08-24 10:31:03 +04:00
ret = ocfs2_get_refcount_rec ( ref_ci , context - > ref_root_bh ,
2009-08-18 07:30:55 +04:00
p_cluster , num_clusters ,
& rec , & index , & ref_leaf_bh ) ;
2009-08-25 04:05:12 +04:00
if ( ret ) {
mlog_errno ( ret ) ;
goto out_commit ;
}
2009-08-18 07:30:55 +04:00
BUG_ON ( ! rec . r_refcount ) ;
set_len = min ( ( u64 ) p_cluster + num_clusters ,
le64_to_cpu ( rec . r_cpos ) +
le32_to_cpu ( rec . r_clusters ) ) - p_cluster ;
/*
* There are many different situation here .
* 1. If refcount = = 1 , remove the flag and don ' t COW .
* 2. If refcount > 1 , allocate clusters .
* Here we may not allocate r_len once at a time , so continue
* until we reach num_clusters .
*/
if ( le32_to_cpu ( rec . r_refcount ) = = 1 ) {
delete = 0 ;
2009-08-24 10:31:03 +04:00
ret = ocfs2_clear_ext_refcount ( handle ,
& context - > data_et ,
2009-08-18 07:30:55 +04:00
cpos , p_cluster ,
set_len , e_flags ,
context - > meta_ac ,
& context - > dealloc ) ;
if ( ret ) {
mlog_errno ( ret ) ;
goto out_commit ;
}
} else {
delete = 1 ;
ret = __ocfs2_claim_clusters ( osb , handle ,
context - > data_ac ,
1 , set_len ,
& new_bit , & new_len ) ;
if ( ret ) {
mlog_errno ( ret ) ;
goto out_commit ;
}
ret = ocfs2_replace_clusters ( handle , context ,
cpos , p_cluster , new_bit ,
new_len , e_flags ) ;
if ( ret ) {
mlog_errno ( ret ) ;
goto out_commit ;
}
set_len = new_len ;
}
2009-08-24 10:31:03 +04:00
ret = __ocfs2_decrease_refcount ( handle , ref_ci ,
2009-08-18 07:30:55 +04:00
context - > ref_root_bh ,
p_cluster , set_len ,
context - > meta_ac ,
& context - > dealloc , delete ) ;
2009-08-25 04:05:12 +04:00
if ( ret ) {
mlog_errno ( ret ) ;
goto out_commit ;
}
2009-08-18 07:30:55 +04:00
cpos + = set_len ;
p_cluster + = set_len ;
num_clusters - = set_len ;
brelse ( ref_leaf_bh ) ;
ref_leaf_bh = NULL ;
2009-08-25 04:05:12 +04:00
}
2009-08-18 07:43:17 +04:00
/* handle any post_cow action. */
if ( context - > post_refcount & & context - > post_refcount - > func ) {
ret = context - > post_refcount - > func ( context - > inode , handle ,
context - > post_refcount - > para ) ;
if ( ret ) {
mlog_errno ( ret ) ;
goto out_commit ;
}
}
2009-08-25 04:05:12 +04:00
/*
* Here we should write the new page out first if we are
* in write - back mode .
*/
2009-08-18 07:43:17 +04:00
if ( context - > get_clusters = = ocfs2_di_get_clusters ) {
ret = ocfs2_cow_sync_writeback ( sb , context , cpos , num_clusters ) ;
if ( ret )
mlog_errno ( ret ) ;
}
2009-08-25 04:05:12 +04:00
out_commit :
ocfs2_commit_trans ( osb , handle ) ;
out :
if ( context - > data_ac ) {
ocfs2_free_alloc_context ( context - > data_ac ) ;
context - > data_ac = NULL ;
}
if ( context - > meta_ac ) {
ocfs2_free_alloc_context ( context - > meta_ac ) ;
context - > meta_ac = NULL ;
}
2009-08-18 07:30:55 +04:00
brelse ( ref_leaf_bh ) ;
2009-08-25 04:05:12 +04:00
return ret ;
}
2009-08-24 10:31:03 +04:00
static int ocfs2_replace_cow ( struct ocfs2_cow_context * context )
2009-08-25 04:05:12 +04:00
{
int ret = 0 ;
2009-08-24 10:31:03 +04:00
struct inode * inode = context - > inode ;
u32 cow_start = context - > cow_start , cow_len = context - > cow_len ;
u32 p_cluster , num_clusters ;
2009-08-25 04:05:12 +04:00
unsigned int ext_flags ;
struct ocfs2_super * osb = OCFS2_SB ( inode - > i_sb ) ;
if ( ! ocfs2_refcount_tree ( OCFS2_SB ( inode - > i_sb ) ) ) {
ocfs2_error ( inode - > i_sb , " Inode %lu want to use refcount "
" tree, but the feature bit is not set in the "
" super block. " , inode - > i_ino ) ;
return - EROFS ;
}
ocfs2_init_dealloc_ctxt ( & context - > dealloc ) ;
while ( cow_len ) {
2009-08-24 10:31:03 +04:00
ret = context - > get_clusters ( context , cow_start , & p_cluster ,
& num_clusters , & ext_flags ) ;
2009-08-25 04:05:12 +04:00
if ( ret ) {
mlog_errno ( ret ) ;
break ;
}
BUG_ON ( ! ( ext_flags & OCFS2_EXT_REFCOUNTED ) ) ;
if ( cow_len < num_clusters )
num_clusters = cow_len ;
ret = ocfs2_make_clusters_writable ( inode - > i_sb , context ,
cow_start , p_cluster ,
num_clusters , ext_flags ) ;
if ( ret ) {
mlog_errno ( ret ) ;
break ;
}
cow_len - = num_clusters ;
cow_start + = num_clusters ;
}
if ( ocfs2_dealloc_has_cluster ( & context - > dealloc ) ) {
ocfs2_schedule_truncate_log_flush ( osb , 1 ) ;
ocfs2_run_deallocs ( osb , & context - > dealloc ) ;
}
return ret ;
}
/*
2009-08-26 05:47:28 +04:00
* Starting at cpos , try to CoW write_len clusters . Don ' t CoW
* past max_cpos . This will stop when it runs into a hole or an
* unrefcounted extent .
2009-08-25 04:05:12 +04:00
*/
static int ocfs2_refcount_cow_hunk ( struct inode * inode ,
struct buffer_head * di_bh ,
2009-08-26 05:47:28 +04:00
u32 cpos , u32 write_len , u32 max_cpos )
2009-08-25 04:05:12 +04:00
{
int ret ;
u32 cow_start = 0 , cow_len = 0 ;
struct ocfs2_inode_info * oi = OCFS2_I ( inode ) ;
struct ocfs2_super * osb = OCFS2_SB ( inode - > i_sb ) ;
struct ocfs2_dinode * di = ( struct ocfs2_dinode * ) di_bh - > b_data ;
struct buffer_head * ref_root_bh = NULL ;
struct ocfs2_refcount_tree * ref_tree ;
2009-08-24 10:31:03 +04:00
struct ocfs2_cow_context * context = NULL ;
2009-08-25 04:05:12 +04:00
BUG_ON ( ! ( oi - > ip_dyn_features & OCFS2_HAS_REFCOUNT_FL ) ) ;
2009-08-24 10:31:03 +04:00
ret = ocfs2_refcount_cal_cow_clusters ( inode , & di - > id2 . i_list ,
2009-08-26 05:47:28 +04:00
cpos , write_len , max_cpos ,
2009-08-25 04:05:12 +04:00
& cow_start , & cow_len ) ;
if ( ret ) {
mlog_errno ( ret ) ;
goto out ;
}
2009-08-26 05:47:28 +04:00
2009-08-25 04:05:12 +04:00
mlog ( 0 , " CoW inode %lu, cpos %u, write_len %u, cow_start %u, "
" cow_len %u \n " , inode - > i_ino ,
cpos , write_len , cow_start , cow_len ) ;
BUG_ON ( cow_len = = 0 ) ;
2009-08-24 10:31:03 +04:00
context = kzalloc ( sizeof ( struct ocfs2_cow_context ) , GFP_NOFS ) ;
if ( ! context ) {
ret = - ENOMEM ;
mlog_errno ( ret ) ;
goto out ;
}
2009-08-25 04:05:12 +04:00
ret = ocfs2_lock_refcount_tree ( osb , le64_to_cpu ( di - > i_refcount_loc ) ,
1 , & ref_tree , & ref_root_bh ) ;
if ( ret ) {
mlog_errno ( ret ) ;
goto out ;
}
2009-08-24 10:31:03 +04:00
context - > inode = inode ;
context - > cow_start = cow_start ;
context - > cow_len = cow_len ;
context - > ref_tree = ref_tree ;
context - > ref_root_bh = ref_root_bh ;
context - > cow_duplicate_clusters = ocfs2_duplicate_clusters_by_page ;
context - > get_clusters = ocfs2_di_get_clusters ;
ocfs2_init_dinode_extent_tree ( & context - > data_et ,
INODE_CACHE ( inode ) , di_bh ) ;
ret = ocfs2_replace_cow ( context ) ;
2009-08-25 04:05:12 +04:00
if ( ret )
mlog_errno ( ret ) ;
2009-08-24 10:31:03 +04:00
/*
* truncate the extent map here since no matter whether we meet with
* any error during the action , we shouldn ' t trust cached extent map
* any more .
*/
ocfs2_extent_map_trunc ( inode , cow_start ) ;
2009-08-25 04:05:12 +04:00
ocfs2_unlock_refcount_tree ( osb , ref_tree , 1 ) ;
brelse ( ref_root_bh ) ;
out :
2009-08-24 10:31:03 +04:00
kfree ( context ) ;
2009-08-25 04:05:12 +04:00
return ret ;
}
/*
* CoW any and all clusters between cpos and cpos + write_len .
2009-08-26 05:47:28 +04:00
* Don ' t CoW past max_cpos . If this returns successfully , all
* clusters between cpos and cpos + write_len are safe to modify .
2009-08-25 04:05:12 +04:00
*/
int ocfs2_refcount_cow ( struct inode * inode ,
struct buffer_head * di_bh ,
2009-08-26 05:47:28 +04:00
u32 cpos , u32 write_len , u32 max_cpos )
2009-08-25 04:05:12 +04:00
{
int ret = 0 ;
u32 p_cluster , num_clusters ;
unsigned int ext_flags ;
while ( write_len ) {
ret = ocfs2_get_clusters ( inode , cpos , & p_cluster ,
& num_clusters , & ext_flags ) ;
if ( ret ) {
mlog_errno ( ret ) ;
break ;
}
if ( write_len < num_clusters )
num_clusters = write_len ;
if ( ext_flags & OCFS2_EXT_REFCOUNTED ) {
ret = ocfs2_refcount_cow_hunk ( inode , di_bh , cpos ,
2009-08-26 05:47:28 +04:00
num_clusters , max_cpos ) ;
2009-08-25 04:05:12 +04:00
if ( ret ) {
mlog_errno ( ret ) ;
break ;
}
}
write_len - = num_clusters ;
cpos + = num_clusters ;
}
return ret ;
}
2009-08-22 19:54:27 +04:00
2009-08-18 07:43:17 +04:00
static int ocfs2_xattr_value_get_clusters ( struct ocfs2_cow_context * context ,
u32 v_cluster , u32 * p_cluster ,
u32 * num_clusters ,
unsigned int * extent_flags )
{
struct inode * inode = context - > inode ;
struct ocfs2_xattr_value_root * xv = context - > cow_object ;
return ocfs2_xattr_get_clusters ( inode , v_cluster , p_cluster ,
num_clusters , & xv - > xr_list ,
extent_flags ) ;
}
/*
* Given a xattr value root , calculate the most meta / credits we need for
* refcount tree change if we truncate it to 0.
*/
int ocfs2_refcounted_xattr_delete_need ( struct inode * inode ,
struct ocfs2_caching_info * ref_ci ,
struct buffer_head * ref_root_bh ,
struct ocfs2_xattr_value_root * xv ,
int * meta_add , int * credits )
{
int ret = 0 , index , ref_blocks = 0 ;
u32 p_cluster , num_clusters ;
u32 cpos = 0 , clusters = le32_to_cpu ( xv - > xr_clusters ) ;
struct ocfs2_refcount_block * rb ;
struct ocfs2_refcount_rec rec ;
struct buffer_head * ref_leaf_bh = NULL ;
while ( cpos < clusters ) {
ret = ocfs2_xattr_get_clusters ( inode , cpos , & p_cluster ,
& num_clusters , & xv - > xr_list ,
NULL ) ;
if ( ret ) {
mlog_errno ( ret ) ;
goto out ;
}
cpos + = num_clusters ;
while ( num_clusters ) {
ret = ocfs2_get_refcount_rec ( ref_ci , ref_root_bh ,
p_cluster , num_clusters ,
& rec , & index ,
& ref_leaf_bh ) ;
if ( ret ) {
mlog_errno ( ret ) ;
goto out ;
}
BUG_ON ( ! rec . r_refcount ) ;
rb = ( struct ocfs2_refcount_block * ) ref_leaf_bh - > b_data ;
/*
* We really don ' t know whether the other clusters is in
* this refcount block or not , so just take the worst
* case that all the clusters are in this block and each
* one will split a refcount rec , so totally we need
* clusters * 2 new refcount rec .
*/
if ( le64_to_cpu ( rb - > rf_records . rl_used ) + clusters * 2 >
le16_to_cpu ( rb - > rf_records . rl_count ) )
ref_blocks + + ;
* credits + = 1 ;
brelse ( ref_leaf_bh ) ;
ref_leaf_bh = NULL ;
if ( num_clusters < = le32_to_cpu ( rec . r_clusters ) )
break ;
else
num_clusters - = le32_to_cpu ( rec . r_clusters ) ;
p_cluster + = num_clusters ;
}
}
* meta_add + = ref_blocks ;
if ( ! ref_blocks )
goto out ;
rb = ( struct ocfs2_refcount_block * ) ref_root_bh - > b_data ;
if ( le32_to_cpu ( rb - > rf_flags ) & OCFS2_REFCOUNT_TREE_FL )
* credits + = OCFS2_EXPAND_REFCOUNT_TREE_CREDITS ;
else {
struct ocfs2_extent_tree et ;
ocfs2_init_refcount_extent_tree ( & et , ref_ci , ref_root_bh ) ;
* credits + = ocfs2_calc_extend_credits ( inode - > i_sb ,
et . et_root_el ,
ref_blocks ) ;
}
out :
brelse ( ref_leaf_bh ) ;
return ret ;
}
/*
* Do CoW for xattr .
*/
int ocfs2_refcount_cow_xattr ( struct inode * inode ,
struct ocfs2_dinode * di ,
struct ocfs2_xattr_value_buf * vb ,
struct ocfs2_refcount_tree * ref_tree ,
struct buffer_head * ref_root_bh ,
u32 cpos , u32 write_len ,
struct ocfs2_post_refcount * post )
{
int ret ;
struct ocfs2_xattr_value_root * xv = vb - > vb_xv ;
struct ocfs2_inode_info * oi = OCFS2_I ( inode ) ;
struct ocfs2_cow_context * context = NULL ;
u32 cow_start , cow_len ;
BUG_ON ( ! ( oi - > ip_dyn_features & OCFS2_HAS_REFCOUNT_FL ) ) ;
ret = ocfs2_refcount_cal_cow_clusters ( inode , & xv - > xr_list ,
cpos , write_len , UINT_MAX ,
& cow_start , & cow_len ) ;
if ( ret ) {
mlog_errno ( ret ) ;
goto out ;
}
BUG_ON ( cow_len = = 0 ) ;
context = kzalloc ( sizeof ( struct ocfs2_cow_context ) , GFP_NOFS ) ;
if ( ! context ) {
ret = - ENOMEM ;
mlog_errno ( ret ) ;
goto out ;
}
context - > inode = inode ;
context - > cow_start = cow_start ;
context - > cow_len = cow_len ;
context - > ref_tree = ref_tree ;
context - > ref_root_bh = ref_root_bh ; ;
context - > cow_object = xv ;
context - > cow_duplicate_clusters = ocfs2_duplicate_clusters_by_jbd ;
/* We need the extra credits for duplicate_clusters by jbd. */
context - > extra_credits =
ocfs2_clusters_to_blocks ( inode - > i_sb , 1 ) * cow_len ;
context - > get_clusters = ocfs2_xattr_value_get_clusters ;
context - > post_refcount = post ;
ocfs2_init_xattr_value_extent_tree ( & context - > data_et ,
INODE_CACHE ( inode ) , vb ) ;
ret = ocfs2_replace_cow ( context ) ;
if ( ret )
mlog_errno ( ret ) ;
out :
kfree ( context ) ;
return ret ;
}
2009-08-22 19:54:27 +04:00
/*
* Insert a new extent into refcount tree and mark a extent rec
* as refcounted in the dinode tree .
*/
int ocfs2_add_refcount_flag ( struct inode * inode ,
struct ocfs2_extent_tree * data_et ,
struct ocfs2_caching_info * ref_ci ,
struct buffer_head * ref_root_bh ,
u32 cpos , u32 p_cluster , u32 num_clusters ,
2009-09-21 09:04:19 +04:00
struct ocfs2_cached_dealloc_ctxt * dealloc ,
struct ocfs2_post_refcount * post )
2009-08-22 19:54:27 +04:00
{
int ret ;
handle_t * handle ;
int credits = 1 , ref_blocks = 0 ;
struct ocfs2_super * osb = OCFS2_SB ( inode - > i_sb ) ;
struct ocfs2_alloc_context * meta_ac = NULL ;
ret = ocfs2_calc_refcount_meta_credits ( inode - > i_sb ,
ref_ci , ref_root_bh ,
p_cluster , num_clusters ,
& ref_blocks , & credits ) ;
if ( ret ) {
mlog_errno ( ret ) ;
goto out ;
}
mlog ( 0 , " reserve new metadata %d, credits = %d \n " ,
ref_blocks , credits ) ;
if ( ref_blocks ) {
ret = ocfs2_reserve_new_metadata_blocks ( OCFS2_SB ( inode - > i_sb ) ,
ref_blocks , & meta_ac ) ;
if ( ret ) {
mlog_errno ( ret ) ;
goto out ;
}
}
2009-09-21 09:04:19 +04:00
if ( post )
credits + = post - > credits ;
2009-08-22 19:54:27 +04:00
handle = ocfs2_start_trans ( osb , credits ) ;
if ( IS_ERR ( handle ) ) {
ret = PTR_ERR ( handle ) ;
mlog_errno ( ret ) ;
goto out ;
}
ret = ocfs2_mark_extent_refcounted ( inode , data_et , handle ,
cpos , num_clusters , p_cluster ,
meta_ac , dealloc ) ;
if ( ret ) {
mlog_errno ( ret ) ;
goto out_commit ;
}
2009-08-18 07:44:03 +04:00
ret = __ocfs2_increase_refcount ( handle , ref_ci , ref_root_bh ,
p_cluster , num_clusters , 0 ,
meta_ac , dealloc ) ;
2009-09-21 09:04:19 +04:00
if ( ret ) {
2009-08-22 19:54:27 +04:00
mlog_errno ( ret ) ;
2009-09-21 09:04:19 +04:00
goto out_commit ;
}
if ( post & & post - > func ) {
ret = post - > func ( inode , handle , post - > para ) ;
if ( ret )
mlog_errno ( ret ) ;
}
2009-08-22 19:54:27 +04:00
out_commit :
ocfs2_commit_trans ( osb , handle ) ;
out :
if ( meta_ac )
ocfs2_free_alloc_context ( meta_ac ) ;
return ret ;
}
2009-08-18 07:40:59 +04:00
static int ocfs2_change_ctime ( struct inode * inode ,
struct buffer_head * di_bh )
{
int ret ;
handle_t * handle ;
struct ocfs2_dinode * di = ( struct ocfs2_dinode * ) di_bh - > b_data ;
handle = ocfs2_start_trans ( OCFS2_SB ( inode - > i_sb ) ,
OCFS2_INODE_UPDATE_CREDITS ) ;
if ( IS_ERR ( handle ) ) {
ret = PTR_ERR ( handle ) ;
mlog_errno ( ret ) ;
goto out ;
}
ret = ocfs2_journal_access_di ( handle , INODE_CACHE ( inode ) , di_bh ,
OCFS2_JOURNAL_ACCESS_WRITE ) ;
if ( ret ) {
mlog_errno ( ret ) ;
goto out_commit ;
}
inode - > i_ctime = CURRENT_TIME ;
di - > i_ctime = cpu_to_le64 ( inode - > i_ctime . tv_sec ) ;
di - > i_ctime_nsec = cpu_to_le32 ( inode - > i_ctime . tv_nsec ) ;
ocfs2_journal_dirty ( handle , di_bh ) ;
out_commit :
ocfs2_commit_trans ( OCFS2_SB ( inode - > i_sb ) , handle ) ;
out :
return ret ;
}
2009-08-22 19:54:27 +04:00
static int ocfs2_attach_refcount_tree ( struct inode * inode ,
struct buffer_head * di_bh )
{
2009-08-18 07:40:59 +04:00
int ret , data_changed = 0 ;
2009-08-22 19:54:27 +04:00
struct buffer_head * ref_root_bh = NULL ;
struct ocfs2_inode_info * oi = OCFS2_I ( inode ) ;
struct ocfs2_dinode * di = ( struct ocfs2_dinode * ) di_bh - > b_data ;
struct ocfs2_super * osb = OCFS2_SB ( inode - > i_sb ) ;
struct ocfs2_refcount_tree * ref_tree ;
unsigned int ext_flags ;
loff_t size ;
u32 cpos , num_clusters , clusters , p_cluster ;
struct ocfs2_cached_dealloc_ctxt dealloc ;
struct ocfs2_extent_tree di_et ;
ocfs2_init_dealloc_ctxt ( & dealloc ) ;
if ( ! ( oi - > ip_dyn_features & OCFS2_HAS_REFCOUNT_FL ) ) {
ret = ocfs2_create_refcount_tree ( inode , di_bh ) ;
if ( ret ) {
mlog_errno ( ret ) ;
goto out ;
}
}
BUG_ON ( ! di - > i_refcount_loc ) ;
ret = ocfs2_lock_refcount_tree ( osb ,
le64_to_cpu ( di - > i_refcount_loc ) , 1 ,
& ref_tree , & ref_root_bh ) ;
if ( ret ) {
mlog_errno ( ret ) ;
goto out ;
}
2009-10-15 07:10:49 +04:00
if ( oi - > ip_dyn_features & OCFS2_INLINE_DATA_FL )
goto attach_xattr ;
2009-08-22 19:54:27 +04:00
ocfs2_init_dinode_extent_tree ( & di_et , INODE_CACHE ( inode ) , di_bh ) ;
size = i_size_read ( inode ) ;
clusters = ocfs2_clusters_for_bytes ( inode - > i_sb , size ) ;
cpos = 0 ;
while ( cpos < clusters ) {
ret = ocfs2_get_clusters ( inode , cpos , & p_cluster ,
& num_clusters , & ext_flags ) ;
if ( p_cluster & & ! ( ext_flags & OCFS2_EXT_REFCOUNTED ) ) {
ret = ocfs2_add_refcount_flag ( inode , & di_et ,
& ref_tree - > rf_ci ,
ref_root_bh , cpos ,
p_cluster , num_clusters ,
2009-09-21 09:04:19 +04:00
& dealloc , NULL ) ;
2009-08-22 19:54:27 +04:00
if ( ret ) {
mlog_errno ( ret ) ;
2009-08-18 07:40:59 +04:00
goto unlock ;
2009-08-22 19:54:27 +04:00
}
2009-08-18 07:40:59 +04:00
data_changed = 1 ;
2009-08-22 19:54:27 +04:00
}
cpos + = num_clusters ;
}
2009-10-15 07:10:49 +04:00
attach_xattr :
2009-09-21 09:04:19 +04:00
if ( oi - > ip_dyn_features & OCFS2_HAS_XATTR_FL ) {
ret = ocfs2_xattr_attach_refcount_tree ( inode , di_bh ,
& ref_tree - > rf_ci ,
ref_root_bh ,
& dealloc ) ;
if ( ret ) {
mlog_errno ( ret ) ;
goto unlock ;
}
}
2009-08-18 07:40:59 +04:00
if ( data_changed ) {
ret = ocfs2_change_ctime ( inode , di_bh ) ;
if ( ret )
mlog_errno ( ret ) ;
}
unlock :
2009-08-22 19:54:27 +04:00
ocfs2_unlock_refcount_tree ( osb , ref_tree , 1 ) ;
brelse ( ref_root_bh ) ;
if ( ! ret & & ocfs2_dealloc_has_cluster ( & dealloc ) ) {
ocfs2_schedule_truncate_log_flush ( osb , 1 ) ;
ocfs2_run_deallocs ( osb , & dealloc ) ;
}
out :
/*
* Empty the extent map so that we may get the right extent
* record from the disk .
*/
ocfs2_extent_map_trunc ( inode , 0 ) ;
return ret ;
}
static int ocfs2_add_refcounted_extent ( struct inode * inode ,
struct ocfs2_extent_tree * et ,
struct ocfs2_caching_info * ref_ci ,
struct buffer_head * ref_root_bh ,
u32 cpos , u32 p_cluster , u32 num_clusters ,
unsigned int ext_flags ,
struct ocfs2_cached_dealloc_ctxt * dealloc )
{
int ret ;
handle_t * handle ;
int credits = 0 ;
struct ocfs2_super * osb = OCFS2_SB ( inode - > i_sb ) ;
struct ocfs2_alloc_context * meta_ac = NULL ;
ret = ocfs2_lock_refcount_allocators ( inode - > i_sb ,
p_cluster , num_clusters ,
et , ref_ci ,
ref_root_bh , & meta_ac ,
NULL , & credits ) ;
if ( ret ) {
mlog_errno ( ret ) ;
goto out ;
}
handle = ocfs2_start_trans ( osb , credits ) ;
if ( IS_ERR ( handle ) ) {
ret = PTR_ERR ( handle ) ;
mlog_errno ( ret ) ;
goto out ;
}
ret = ocfs2_insert_extent ( handle , et , cpos ,
2009-11-30 10:08:40 +03:00
ocfs2_clusters_to_blocks ( inode - > i_sb , p_cluster ) ,
2009-08-22 19:54:27 +04:00
num_clusters , ext_flags , meta_ac ) ;
if ( ret ) {
mlog_errno ( ret ) ;
goto out_commit ;
}
2009-08-18 07:43:55 +04:00
ret = ocfs2_increase_refcount ( handle , ref_ci , ref_root_bh ,
p_cluster , num_clusters ,
meta_ac , dealloc ) ;
2009-08-22 19:54:27 +04:00
if ( ret )
mlog_errno ( ret ) ;
out_commit :
ocfs2_commit_trans ( osb , handle ) ;
out :
if ( meta_ac )
ocfs2_free_alloc_context ( meta_ac ) ;
return ret ;
}
2009-10-15 07:10:49 +04:00
static int ocfs2_duplicate_inline_data ( struct inode * s_inode ,
struct buffer_head * s_bh ,
struct inode * t_inode ,
struct buffer_head * t_bh )
{
int ret ;
handle_t * handle ;
struct ocfs2_super * osb = OCFS2_SB ( s_inode - > i_sb ) ;
struct ocfs2_dinode * s_di = ( struct ocfs2_dinode * ) s_bh - > b_data ;
struct ocfs2_dinode * t_di = ( struct ocfs2_dinode * ) t_bh - > b_data ;
BUG_ON ( ! ( OCFS2_I ( s_inode ) - > ip_dyn_features & OCFS2_INLINE_DATA_FL ) ) ;
handle = ocfs2_start_trans ( osb , OCFS2_INODE_UPDATE_CREDITS ) ;
if ( IS_ERR ( handle ) ) {
ret = PTR_ERR ( handle ) ;
mlog_errno ( ret ) ;
goto out ;
}
ret = ocfs2_journal_access_di ( handle , INODE_CACHE ( t_inode ) , t_bh ,
OCFS2_JOURNAL_ACCESS_WRITE ) ;
if ( ret ) {
mlog_errno ( ret ) ;
goto out_commit ;
}
t_di - > id2 . i_data . id_count = s_di - > id2 . i_data . id_count ;
memcpy ( t_di - > id2 . i_data . id_data , s_di - > id2 . i_data . id_data ,
le16_to_cpu ( s_di - > id2 . i_data . id_count ) ) ;
spin_lock ( & OCFS2_I ( t_inode ) - > ip_lock ) ;
OCFS2_I ( t_inode ) - > ip_dyn_features | = OCFS2_INLINE_DATA_FL ;
t_di - > i_dyn_features = cpu_to_le16 ( OCFS2_I ( t_inode ) - > ip_dyn_features ) ;
spin_unlock ( & OCFS2_I ( t_inode ) - > ip_lock ) ;
ocfs2_journal_dirty ( handle , t_bh ) ;
out_commit :
ocfs2_commit_trans ( osb , handle ) ;
out :
return ret ;
}
2009-08-22 19:54:27 +04:00
static int ocfs2_duplicate_extent_list ( struct inode * s_inode ,
struct inode * t_inode ,
struct buffer_head * t_bh ,
struct ocfs2_caching_info * ref_ci ,
struct buffer_head * ref_root_bh ,
struct ocfs2_cached_dealloc_ctxt * dealloc )
{
int ret = 0 ;
u32 p_cluster , num_clusters , clusters , cpos ;
loff_t size ;
unsigned int ext_flags ;
struct ocfs2_extent_tree et ;
ocfs2_init_dinode_extent_tree ( & et , INODE_CACHE ( t_inode ) , t_bh ) ;
size = i_size_read ( s_inode ) ;
clusters = ocfs2_clusters_for_bytes ( s_inode - > i_sb , size ) ;
cpos = 0 ;
while ( cpos < clusters ) {
ret = ocfs2_get_clusters ( s_inode , cpos , & p_cluster ,
& num_clusters , & ext_flags ) ;
if ( p_cluster ) {
ret = ocfs2_add_refcounted_extent ( t_inode , & et ,
ref_ci , ref_root_bh ,
cpos , p_cluster ,
num_clusters ,
ext_flags ,
dealloc ) ;
if ( ret ) {
mlog_errno ( ret ) ;
goto out ;
}
}
cpos + = num_clusters ;
}
out :
return ret ;
}
2009-08-18 07:40:59 +04:00
/*
* change the new file ' s attributes to the src .
*
* reflink creates a snapshot of a file , that means the attributes
* must be identical except for three exceptions - nlink , ino , and ctime .
*/
static int ocfs2_complete_reflink ( struct inode * s_inode ,
struct buffer_head * s_bh ,
struct inode * t_inode ,
2009-08-18 07:47:56 +04:00
struct buffer_head * t_bh ,
bool preserve )
2009-08-18 07:40:59 +04:00
{
int ret ;
handle_t * handle ;
struct ocfs2_dinode * s_di = ( struct ocfs2_dinode * ) s_bh - > b_data ;
struct ocfs2_dinode * di = ( struct ocfs2_dinode * ) t_bh - > b_data ;
loff_t size = i_size_read ( s_inode ) ;
handle = ocfs2_start_trans ( OCFS2_SB ( t_inode - > i_sb ) ,
OCFS2_INODE_UPDATE_CREDITS ) ;
if ( IS_ERR ( handle ) ) {
ret = PTR_ERR ( handle ) ;
mlog_errno ( ret ) ;
return ret ;
}
ret = ocfs2_journal_access_di ( handle , INODE_CACHE ( t_inode ) , t_bh ,
OCFS2_JOURNAL_ACCESS_WRITE ) ;
if ( ret ) {
mlog_errno ( ret ) ;
goto out_commit ;
}
spin_lock ( & OCFS2_I ( t_inode ) - > ip_lock ) ;
OCFS2_I ( t_inode ) - > ip_clusters = OCFS2_I ( s_inode ) - > ip_clusters ;
OCFS2_I ( t_inode ) - > ip_attr = OCFS2_I ( s_inode ) - > ip_attr ;
OCFS2_I ( t_inode ) - > ip_dyn_features = OCFS2_I ( s_inode ) - > ip_dyn_features ;
spin_unlock ( & OCFS2_I ( t_inode ) - > ip_lock ) ;
i_size_write ( t_inode , size ) ;
di - > i_xattr_inline_size = s_di - > i_xattr_inline_size ;
di - > i_clusters = s_di - > i_clusters ;
di - > i_size = s_di - > i_size ;
di - > i_dyn_features = s_di - > i_dyn_features ;
di - > i_attr = s_di - > i_attr ;
2009-08-18 07:47:56 +04:00
if ( preserve ) {
di - > i_uid = s_di - > i_uid ;
di - > i_gid = s_di - > i_gid ;
di - > i_mode = s_di - > i_mode ;
/*
* update time .
* we want mtime to appear identical to the source and
* update ctime .
*/
t_inode - > i_ctime = CURRENT_TIME ;
2009-08-18 07:40:59 +04:00
2009-08-18 07:47:56 +04:00
di - > i_ctime = cpu_to_le64 ( t_inode - > i_ctime . tv_sec ) ;
di - > i_ctime_nsec = cpu_to_le32 ( t_inode - > i_ctime . tv_nsec ) ;
2009-08-18 07:40:59 +04:00
2009-08-18 07:47:56 +04:00
t_inode - > i_mtime = s_inode - > i_mtime ;
di - > i_mtime = s_di - > i_mtime ;
di - > i_mtime_nsec = s_di - > i_mtime_nsec ;
}
2009-08-18 07:40:59 +04:00
ocfs2_journal_dirty ( handle , t_bh ) ;
out_commit :
ocfs2_commit_trans ( OCFS2_SB ( t_inode - > i_sb ) , handle ) ;
return ret ;
}
2009-08-22 19:54:27 +04:00
static int ocfs2_create_reflink_node ( struct inode * s_inode ,
struct buffer_head * s_bh ,
struct inode * t_inode ,
2009-08-18 07:47:56 +04:00
struct buffer_head * t_bh ,
bool preserve )
2009-08-22 19:54:27 +04:00
{
int ret ;
struct buffer_head * ref_root_bh = NULL ;
struct ocfs2_cached_dealloc_ctxt dealloc ;
struct ocfs2_super * osb = OCFS2_SB ( s_inode - > i_sb ) ;
struct ocfs2_refcount_block * rb ;
struct ocfs2_dinode * di = ( struct ocfs2_dinode * ) s_bh - > b_data ;
struct ocfs2_refcount_tree * ref_tree ;
ocfs2_init_dealloc_ctxt ( & dealloc ) ;
ret = ocfs2_set_refcount_tree ( t_inode , t_bh ,
le64_to_cpu ( di - > i_refcount_loc ) ) ;
if ( ret ) {
mlog_errno ( ret ) ;
goto out ;
}
2009-10-15 07:10:49 +04:00
if ( OCFS2_I ( s_inode ) - > ip_dyn_features & OCFS2_INLINE_DATA_FL ) {
ret = ocfs2_duplicate_inline_data ( s_inode , s_bh ,
t_inode , t_bh ) ;
if ( ret )
mlog_errno ( ret ) ;
goto out ;
}
2009-08-22 19:54:27 +04:00
ret = ocfs2_lock_refcount_tree ( osb , le64_to_cpu ( di - > i_refcount_loc ) ,
1 , & ref_tree , & ref_root_bh ) ;
if ( ret ) {
mlog_errno ( ret ) ;
goto out ;
}
rb = ( struct ocfs2_refcount_block * ) ref_root_bh - > b_data ;
ret = ocfs2_duplicate_extent_list ( s_inode , t_inode , t_bh ,
& ref_tree - > rf_ci , ref_root_bh ,
& dealloc ) ;
2009-08-18 07:40:59 +04:00
if ( ret ) {
mlog_errno ( ret ) ;
goto out_unlock_refcount ;
}
out_unlock_refcount :
2009-08-22 19:54:27 +04:00
ocfs2_unlock_refcount_tree ( osb , ref_tree , 1 ) ;
brelse ( ref_root_bh ) ;
out :
if ( ocfs2_dealloc_has_cluster ( & dealloc ) ) {
ocfs2_schedule_truncate_log_flush ( osb , 1 ) ;
ocfs2_run_deallocs ( osb , & dealloc ) ;
}
return ret ;
}
2009-09-21 06:38:17 +04:00
static int __ocfs2_reflink ( struct dentry * old_dentry ,
struct buffer_head * old_bh ,
struct inode * new_inode ,
bool preserve )
{
int ret ;
struct inode * inode = old_dentry - > d_inode ;
struct buffer_head * new_bh = NULL ;
ret = filemap_fdatawrite ( inode - > i_mapping ) ;
if ( ret ) {
mlog_errno ( ret ) ;
goto out ;
}
ret = ocfs2_attach_refcount_tree ( inode , old_bh ) ;
if ( ret ) {
mlog_errno ( ret ) ;
goto out ;
}
mutex_lock ( & new_inode - > i_mutex ) ;
ret = ocfs2_inode_lock ( new_inode , & new_bh , 1 ) ;
if ( ret ) {
mlog_errno ( ret ) ;
goto out_unlock ;
}
ret = ocfs2_create_reflink_node ( inode , old_bh ,
new_inode , new_bh , preserve ) ;
if ( ret ) {
mlog_errno ( ret ) ;
goto inode_unlock ;
}
if ( OCFS2_I ( inode ) - > ip_dyn_features & OCFS2_HAS_XATTR_FL ) {
ret = ocfs2_reflink_xattrs ( inode , old_bh ,
new_inode , new_bh ,
preserve ) ;
2009-10-15 07:10:48 +04:00
if ( ret ) {
2009-09-21 06:38:17 +04:00
mlog_errno ( ret ) ;
2009-10-15 07:10:48 +04:00
goto inode_unlock ;
}
2009-09-21 06:38:17 +04:00
}
2009-10-15 07:10:48 +04:00
ret = ocfs2_complete_reflink ( inode , old_bh ,
new_inode , new_bh , preserve ) ;
if ( ret )
mlog_errno ( ret ) ;
2009-09-21 06:38:17 +04:00
inode_unlock :
ocfs2_inode_unlock ( new_inode , 1 ) ;
brelse ( new_bh ) ;
out_unlock :
mutex_unlock ( & new_inode - > i_mutex ) ;
out :
if ( ! ret ) {
ret = filemap_fdatawait ( inode - > i_mapping ) ;
if ( ret )
mlog_errno ( ret ) ;
}
return ret ;
}
static int ocfs2_reflink ( struct dentry * old_dentry , struct inode * dir ,
struct dentry * new_dentry , bool preserve )
{
int error ;
struct inode * inode = old_dentry - > d_inode ;
struct buffer_head * old_bh = NULL ;
struct inode * new_orphan_inode = NULL ;
if ( ! ocfs2_refcount_tree ( OCFS2_SB ( inode - > i_sb ) ) )
return - EOPNOTSUPP ;
error = ocfs2_create_inode_in_orphan ( dir , inode - > i_mode ,
& new_orphan_inode ) ;
if ( error ) {
mlog_errno ( error ) ;
goto out ;
}
error = ocfs2_inode_lock ( inode , & old_bh , 1 ) ;
if ( error ) {
mlog_errno ( error ) ;
goto out ;
}
down_write ( & OCFS2_I ( inode ) - > ip_xattr_sem ) ;
down_write ( & OCFS2_I ( inode ) - > ip_alloc_sem ) ;
error = __ocfs2_reflink ( old_dentry , old_bh ,
new_orphan_inode , preserve ) ;
up_write ( & OCFS2_I ( inode ) - > ip_alloc_sem ) ;
up_write ( & OCFS2_I ( inode ) - > ip_xattr_sem ) ;
ocfs2_inode_unlock ( inode , 1 ) ;
brelse ( old_bh ) ;
if ( error ) {
mlog_errno ( error ) ;
goto out ;
}
/* If the security isn't preserved, we need to re-initialize them. */
if ( ! preserve ) {
error = ocfs2_init_security_and_acl ( dir , new_orphan_inode ) ;
if ( error )
mlog_errno ( error ) ;
}
out :
if ( ! error ) {
error = ocfs2_mv_orphaned_inode_to_new ( dir , new_orphan_inode ,
new_dentry ) ;
if ( error )
mlog_errno ( error ) ;
}
if ( new_orphan_inode ) {
/*
* We need to open_unlock the inode no matter whether we
* succeed or not , so that other nodes can delete it later .
*/
ocfs2_open_unlock ( new_orphan_inode ) ;
if ( error )
iput ( new_orphan_inode ) ;
}
return error ;
}
2009-09-21 07:25:14 +04:00
/*
* Below here are the bits used by OCFS2_IOC_REFLINK ( ) to fake
* sys_reflink ( ) . This will go away when vfs_reflink ( ) exists in
* fs / namei . c .
*/
/* copied from may_create in VFS. */
static inline int ocfs2_may_create ( struct inode * dir , struct dentry * child )
{
if ( child - > d_inode )
return - EEXIST ;
if ( IS_DEADDIR ( dir ) )
return - ENOENT ;
return inode_permission ( dir , MAY_WRITE | MAY_EXEC ) ;
}
/* copied from user_path_parent. */
static int ocfs2_user_path_parent ( const char __user * path ,
struct nameidata * nd , char * * name )
{
char * s = getname ( path ) ;
int error ;
if ( IS_ERR ( s ) )
return PTR_ERR ( s ) ;
error = path_lookup ( s , LOOKUP_PARENT , nd ) ;
if ( error )
putname ( s ) ;
else
* name = s ;
return error ;
}
/**
* ocfs2_vfs_reflink - Create a reference - counted link
*
* @ old_dentry : source dentry + inode
* @ dir : directory to create the target
* @ new_dentry : target dentry
* @ preserve : if true , preserve all file attributes
*/
2009-11-30 10:08:40 +03:00
static int ocfs2_vfs_reflink ( struct dentry * old_dentry , struct inode * dir ,
struct dentry * new_dentry , bool preserve )
2009-09-21 07:25:14 +04:00
{
struct inode * inode = old_dentry - > d_inode ;
int error ;
if ( ! inode )
return - ENOENT ;
error = ocfs2_may_create ( dir , new_dentry ) ;
if ( error )
return error ;
if ( dir - > i_sb ! = inode - > i_sb )
return - EXDEV ;
/*
* A reflink to an append - only or immutable file cannot be created .
*/
if ( IS_APPEND ( inode ) | | IS_IMMUTABLE ( inode ) )
return - EPERM ;
/* Only regular files can be reflinked. */
if ( ! S_ISREG ( inode - > i_mode ) )
return - EPERM ;
/*
* If the caller wants to preserve ownership , they require the
* rights to do so .
*/
if ( preserve ) {
if ( ( current_fsuid ( ) ! = inode - > i_uid ) & & ! capable ( CAP_CHOWN ) )
return - EPERM ;
if ( ! in_group_p ( inode - > i_gid ) & & ! capable ( CAP_CHOWN ) )
return - EPERM ;
}
/*
* If the caller is modifying any aspect of the attributes , they
* are not creating a snapshot . They need read permission on the
* file .
*/
if ( ! preserve ) {
error = inode_permission ( inode , MAY_READ ) ;
if ( error )
return error ;
}
mutex_lock ( & inode - > i_mutex ) ;
vfs_dq_init ( dir ) ;
error = ocfs2_reflink ( old_dentry , dir , new_dentry , preserve ) ;
mutex_unlock ( & inode - > i_mutex ) ;
if ( ! error )
fsnotify_create ( dir , new_dentry ) ;
return error ;
}
/*
* Most codes are copied from sys_linkat .
*/
int ocfs2_reflink_ioctl ( struct inode * inode ,
const char __user * oldname ,
const char __user * newname ,
bool preserve )
{
struct dentry * new_dentry ;
struct nameidata nd ;
struct path old_path ;
int error ;
char * to = NULL ;
if ( ! ocfs2_refcount_tree ( OCFS2_SB ( inode - > i_sb ) ) )
return - EOPNOTSUPP ;
error = user_path_at ( AT_FDCWD , oldname , 0 , & old_path ) ;
if ( error ) {
mlog_errno ( error ) ;
return error ;
}
error = ocfs2_user_path_parent ( newname , & nd , & to ) ;
if ( error ) {
mlog_errno ( error ) ;
goto out ;
}
error = - EXDEV ;
if ( old_path . mnt ! = nd . path . mnt )
goto out_release ;
new_dentry = lookup_create ( & nd , 0 ) ;
error = PTR_ERR ( new_dentry ) ;
if ( IS_ERR ( new_dentry ) ) {
mlog_errno ( error ) ;
goto out_unlock ;
}
error = mnt_want_write ( nd . path . mnt ) ;
if ( error ) {
mlog_errno ( error ) ;
goto out_dput ;
}
error = ocfs2_vfs_reflink ( old_path . dentry ,
nd . path . dentry - > d_inode ,
new_dentry , preserve ) ;
mnt_drop_write ( nd . path . mnt ) ;
out_dput :
dput ( new_dentry ) ;
out_unlock :
mutex_unlock ( & nd . path . dentry - > d_inode - > i_mutex ) ;
out_release :
path_put ( & nd . path ) ;
putname ( to ) ;
out :
path_put ( & old_path ) ;
return error ;
}