2009-03-13 10:10:06 -04:00
/*
* Copyright ( C ) 2009 Oracle . All rights reserved .
*
* This program is free software ; you can redistribute it and / or
* modify it under the terms of the GNU General Public
* License v2 as published by the Free Software Foundation .
*
* This program is distributed in the hope that it will be useful ,
* but WITHOUT ANY WARRANTY ; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the GNU
* General Public License for more details .
*
* You should have received a copy of the GNU General Public
* License along with this program ; if not , write to the
* Free Software Foundation , Inc . , 59 Temple Place - Suite 330 ,
* Boston , MA 021110 - 1307 , USA .
*/
# include <linux/sched.h>
# include <linux/sort.h>
# include "ctree.h"
# include "delayed-ref.h"
# include "transaction.h"
/*
* delayed back reference update tracking . For subvolume trees
* we queue up extent allocations and backref maintenance for
* delayed processing . This avoids deep call chains where we
* add extents in the middle of btrfs_search_slot , and it allows
* us to buffer up frequently modified backrefs in an rb tree instead
* of hammering updates on the extent allocation tree .
*
* Right now this code is only used for reference counted trees , but
* the long term goal is to get rid of the similar code for delayed
* extent tree modifications .
*/
/*
* entries in the rb tree are ordered by the byte number of the extent
* and by the byte number of the parent block .
*/
static int comp_entry ( struct btrfs_delayed_ref_node * ref ,
u64 bytenr , u64 parent )
{
if ( bytenr < ref - > bytenr )
return - 1 ;
if ( bytenr > ref - > bytenr )
return 1 ;
if ( parent < ref - > parent )
return - 1 ;
if ( parent > ref - > parent )
return 1 ;
return 0 ;
}
/*
* insert a new ref into the rbtree . This returns any existing refs
* for the same ( bytenr , parent ) tuple , or NULL if the new node was properly
* inserted .
*/
static struct btrfs_delayed_ref_node * tree_insert ( struct rb_root * root ,
u64 bytenr , u64 parent ,
struct rb_node * node )
{
struct rb_node * * p = & root - > rb_node ;
struct rb_node * parent_node = NULL ;
struct btrfs_delayed_ref_node * entry ;
int cmp ;
while ( * p ) {
parent_node = * p ;
entry = rb_entry ( parent_node , struct btrfs_delayed_ref_node ,
rb_node ) ;
cmp = comp_entry ( entry , bytenr , parent ) ;
if ( cmp < 0 )
p = & ( * p ) - > rb_left ;
else if ( cmp > 0 )
p = & ( * p ) - > rb_right ;
else
return entry ;
}
entry = rb_entry ( node , struct btrfs_delayed_ref_node , rb_node ) ;
rb_link_node ( node , parent_node , p ) ;
rb_insert_color ( node , root ) ;
return NULL ;
}
/*
* find an entry based on ( bytenr , parent ) . This returns the delayed
* ref if it was able to find one , or NULL if nothing was in that spot
*/
static struct btrfs_delayed_ref_node * tree_search ( struct rb_root * root ,
2009-03-13 10:17:05 -04:00
u64 bytenr , u64 parent ,
struct btrfs_delayed_ref_node * * last )
2009-03-13 10:10:06 -04:00
{
struct rb_node * n = root - > rb_node ;
struct btrfs_delayed_ref_node * entry ;
int cmp ;
while ( n ) {
entry = rb_entry ( n , struct btrfs_delayed_ref_node , rb_node ) ;
WARN_ON ( ! entry - > in_tree ) ;
2009-03-13 10:17:05 -04:00
if ( last )
* last = entry ;
2009-03-13 10:10:06 -04:00
cmp = comp_entry ( entry , bytenr , parent ) ;
if ( cmp < 0 )
n = n - > rb_left ;
else if ( cmp > 0 )
n = n - > rb_right ;
else
return entry ;
}
return NULL ;
}
2009-03-13 10:17:05 -04:00
int btrfs_delayed_ref_lock ( struct btrfs_trans_handle * trans ,
struct btrfs_delayed_ref_head * head )
2009-03-13 10:10:06 -04:00
{
2009-03-13 10:17:05 -04:00
struct btrfs_delayed_ref_root * delayed_refs ;
delayed_refs = & trans - > transaction - > delayed_refs ;
assert_spin_locked ( & delayed_refs - > lock ) ;
if ( mutex_trylock ( & head - > mutex ) )
return 0 ;
atomic_inc ( & head - > node . refs ) ;
spin_unlock ( & delayed_refs - > lock ) ;
mutex_lock ( & head - > mutex ) ;
spin_lock ( & delayed_refs - > lock ) ;
if ( ! head - > node . in_tree ) {
mutex_unlock ( & head - > mutex ) ;
btrfs_put_delayed_ref ( & head - > node ) ;
return - EAGAIN ;
}
btrfs_put_delayed_ref ( & head - > node ) ;
return 0 ;
}
int btrfs_find_ref_cluster ( struct btrfs_trans_handle * trans ,
struct list_head * cluster , u64 start )
{
int count = 0 ;
struct btrfs_delayed_ref_root * delayed_refs ;
2009-03-13 10:10:06 -04:00
struct rb_node * node ;
2009-03-13 10:17:05 -04:00
struct btrfs_delayed_ref_node * ref ;
2009-03-13 10:10:06 -04:00
struct btrfs_delayed_ref_head * head ;
2009-03-13 10:17:05 -04:00
delayed_refs = & trans - > transaction - > delayed_refs ;
if ( start = = 0 ) {
node = rb_first ( & delayed_refs - > root ) ;
} else {
ref = NULL ;
tree_search ( & delayed_refs - > root , start , ( u64 ) - 1 , & ref ) ;
if ( ref ) {
struct btrfs_delayed_ref_node * tmp ;
node = rb_prev ( & ref - > rb_node ) ;
while ( node ) {
tmp = rb_entry ( node ,
struct btrfs_delayed_ref_node ,
rb_node ) ;
if ( tmp - > bytenr < start )
break ;
ref = tmp ;
node = rb_prev ( & ref - > rb_node ) ;
}
node = & ref - > rb_node ;
} else
node = rb_first ( & delayed_refs - > root ) ;
}
again :
while ( node & & count < 32 ) {
ref = rb_entry ( node , struct btrfs_delayed_ref_node , rb_node ) ;
2009-03-13 10:10:06 -04:00
if ( btrfs_delayed_ref_is_head ( ref ) ) {
head = btrfs_delayed_node_to_head ( ref ) ;
2009-03-13 10:17:05 -04:00
if ( list_empty ( & head - > cluster ) ) {
list_add_tail ( & head - > cluster , cluster ) ;
delayed_refs - > run_delayed_start =
head - > node . bytenr ;
count + + ;
WARN_ON ( delayed_refs - > num_heads_ready = = 0 ) ;
delayed_refs - > num_heads_ready - - ;
} else if ( count ) {
/* the goal of the clustering is to find extents
* that are likely to end up in the same extent
* leaf on disk . So , we don ' t want them spread
* all over the tree . Stop now if we ' ve hit
* a head that was already in use
*/
2009-03-13 10:10:06 -04:00
break ;
}
}
2009-03-13 10:17:05 -04:00
node = rb_next ( node ) ;
2009-03-13 10:10:06 -04:00
}
2009-03-13 10:17:05 -04:00
if ( count ) {
return 0 ;
} else if ( start ) {
/*
* we ' ve gone to the end of the rbtree without finding any
* clusters . start from the beginning and try again
*/
start = 0 ;
node = rb_first ( & delayed_refs - > root ) ;
goto again ;
}
return 1 ;
2009-03-13 10:10:06 -04:00
}
/*
* This checks to see if there are any delayed refs in the
* btree for a given bytenr . It returns one if it finds any
* and zero otherwise .
*
* If it only finds a head node , it returns 0.
*
* The idea is to use this when deciding if you can safely delete an
* extent from the extent allocation tree . There may be a pending
* ref in the rbtree that adds or removes references , so as long as this
* returns one you need to leave the BTRFS_EXTENT_ITEM in the extent
* allocation tree .
*/
int btrfs_delayed_ref_pending ( struct btrfs_trans_handle * trans , u64 bytenr )
{
struct btrfs_delayed_ref_node * ref ;
struct btrfs_delayed_ref_root * delayed_refs ;
struct rb_node * prev_node ;
int ret = 0 ;
delayed_refs = & trans - > transaction - > delayed_refs ;
spin_lock ( & delayed_refs - > lock ) ;
2009-03-13 10:17:05 -04:00
ref = tree_search ( & delayed_refs - > root , bytenr , ( u64 ) - 1 , NULL ) ;
2009-03-13 10:10:06 -04:00
if ( ref ) {
prev_node = rb_prev ( & ref - > rb_node ) ;
if ( ! prev_node )
goto out ;
ref = rb_entry ( prev_node , struct btrfs_delayed_ref_node ,
rb_node ) ;
if ( ref - > bytenr = = bytenr )
ret = 1 ;
}
out :
spin_unlock ( & delayed_refs - > lock ) ;
return ret ;
}
/*
* helper function to lookup reference count
*
* the head node for delayed ref is used to store the sum of all the
* reference count modifications queued up in the rbtree . This way you
* can check to see what the reference count would be if all of the
* delayed refs are processed .
*/
int btrfs_lookup_extent_ref ( struct btrfs_trans_handle * trans ,
struct btrfs_root * root , u64 bytenr ,
u64 num_bytes , u32 * refs )
{
struct btrfs_delayed_ref_node * ref ;
struct btrfs_delayed_ref_head * head ;
struct btrfs_delayed_ref_root * delayed_refs ;
struct btrfs_path * path ;
struct extent_buffer * leaf ;
struct btrfs_extent_item * ei ;
struct btrfs_key key ;
u32 num_refs ;
int ret ;
path = btrfs_alloc_path ( ) ;
if ( ! path )
return - ENOMEM ;
key . objectid = bytenr ;
key . type = BTRFS_EXTENT_ITEM_KEY ;
key . offset = num_bytes ;
delayed_refs = & trans - > transaction - > delayed_refs ;
again :
ret = btrfs_search_slot ( trans , root - > fs_info - > extent_root ,
& key , path , 0 , 0 ) ;
if ( ret < 0 )
goto out ;
if ( ret = = 0 ) {
leaf = path - > nodes [ 0 ] ;
ei = btrfs_item_ptr ( leaf , path - > slots [ 0 ] ,
struct btrfs_extent_item ) ;
num_refs = btrfs_extent_refs ( leaf , ei ) ;
} else {
num_refs = 0 ;
ret = 0 ;
}
spin_lock ( & delayed_refs - > lock ) ;
2009-03-13 10:17:05 -04:00
ref = tree_search ( & delayed_refs - > root , bytenr , ( u64 ) - 1 , NULL ) ;
2009-03-13 10:10:06 -04:00
if ( ref ) {
head = btrfs_delayed_node_to_head ( ref ) ;
if ( mutex_trylock ( & head - > mutex ) ) {
num_refs + = ref - > ref_mod ;
mutex_unlock ( & head - > mutex ) ;
* refs = num_refs ;
goto out ;
}
atomic_inc ( & ref - > refs ) ;
spin_unlock ( & delayed_refs - > lock ) ;
btrfs_release_path ( root - > fs_info - > extent_root , path ) ;
mutex_lock ( & head - > mutex ) ;
mutex_unlock ( & head - > mutex ) ;
btrfs_put_delayed_ref ( ref ) ;
goto again ;
} else {
* refs = num_refs ;
}
out :
spin_unlock ( & delayed_refs - > lock ) ;
btrfs_free_path ( path ) ;
return ret ;
}
/*
* helper function to update an extent delayed ref in the
* rbtree . existing and update must both have the same
* bytenr and parent
*
* This may free existing if the update cancels out whatever
* operation it was doing .
*/
static noinline void
update_existing_ref ( struct btrfs_trans_handle * trans ,
struct btrfs_delayed_ref_root * delayed_refs ,
struct btrfs_delayed_ref_node * existing ,
struct btrfs_delayed_ref_node * update )
{
struct btrfs_delayed_ref * existing_ref ;
struct btrfs_delayed_ref * ref ;
existing_ref = btrfs_delayed_node_to_ref ( existing ) ;
ref = btrfs_delayed_node_to_ref ( update ) ;
if ( ref - > pin )
existing_ref - > pin = 1 ;
if ( ref - > action ! = existing_ref - > action ) {
/*
* this is effectively undoing either an add or a
* drop . We decrement the ref_mod , and if it goes
* down to zero we just delete the entry without
* every changing the extent allocation tree .
*/
existing - > ref_mod - - ;
if ( existing - > ref_mod = = 0 ) {
rb_erase ( & existing - > rb_node ,
& delayed_refs - > root ) ;
existing - > in_tree = 0 ;
btrfs_put_delayed_ref ( existing ) ;
delayed_refs - > num_entries - - ;
if ( trans - > delayed_ref_updates )
trans - > delayed_ref_updates - - ;
}
} else {
if ( existing_ref - > action = = BTRFS_ADD_DELAYED_REF ) {
/* if we're adding refs, make sure all the
* details match up . The extent could
* have been totally freed and reallocated
* by a different owner before the delayed
* ref entries were removed .
*/
existing_ref - > owner_objectid = ref - > owner_objectid ;
existing_ref - > generation = ref - > generation ;
existing_ref - > root = ref - > root ;
existing - > num_bytes = update - > num_bytes ;
}
/*
* the action on the existing ref matches
* the action on the ref we ' re trying to add .
* Bump the ref_mod by one so the backref that
* is eventually added / removed has the correct
* reference count
*/
existing - > ref_mod + = update - > ref_mod ;
}
}
/*
* helper function to update the accounting in the head ref
* existing and update must have the same bytenr
*/
static noinline void
update_existing_head_ref ( struct btrfs_delayed_ref_node * existing ,
struct btrfs_delayed_ref_node * update )
{
struct btrfs_delayed_ref_head * existing_ref ;
struct btrfs_delayed_ref_head * ref ;
existing_ref = btrfs_delayed_node_to_head ( existing ) ;
ref = btrfs_delayed_node_to_head ( update ) ;
if ( ref - > must_insert_reserved ) {
/* if the extent was freed and then
* reallocated before the delayed ref
* entries were processed , we can end up
* with an existing head ref without
* the must_insert_reserved flag set .
* Set it again here
*/
existing_ref - > must_insert_reserved = ref - > must_insert_reserved ;
/*
* update the num_bytes so we make sure the accounting
* is done correctly
*/
existing - > num_bytes = update - > num_bytes ;
}
/*
* update the reference mod on the head to reflect this new operation
*/
existing - > ref_mod + = update - > ref_mod ;
}
/*
* helper function to actually insert a delayed ref into the rbtree .
* this does all the dirty work in terms of maintaining the correct
* overall modification count in the head node and properly dealing
* with updating existing nodes as new modifications are queued .
*/
static noinline int __btrfs_add_delayed_ref ( struct btrfs_trans_handle * trans ,
struct btrfs_delayed_ref_node * ref ,
u64 bytenr , u64 num_bytes , u64 parent , u64 ref_root ,
u64 ref_generation , u64 owner_objectid , int action ,
int pin )
{
struct btrfs_delayed_ref_node * existing ;
struct btrfs_delayed_ref * full_ref ;
2009-03-13 10:17:05 -04:00
struct btrfs_delayed_ref_head * head_ref = NULL ;
2009-03-13 10:10:06 -04:00
struct btrfs_delayed_ref_root * delayed_refs ;
int count_mod = 1 ;
int must_insert_reserved = 0 ;
/*
* the head node stores the sum of all the mods , so dropping a ref
* should drop the sum in the head node by one .
*/
2009-03-25 09:55:11 -04:00
if ( parent = = ( u64 ) - 1 ) {
if ( action = = BTRFS_DROP_DELAYED_REF )
count_mod = - 1 ;
else if ( action = = BTRFS_UPDATE_DELAYED_HEAD )
count_mod = 0 ;
}
2009-03-13 10:10:06 -04:00
/*
* BTRFS_ADD_DELAYED_EXTENT means that we need to update
* the reserved accounting when the extent is finally added , or
* if a later modification deletes the delayed ref without ever
* inserting the extent into the extent allocation tree .
* ref - > must_insert_reserved is the flag used to record
* that accounting mods are required .
*
* Once we record must_insert_reserved , switch the action to
* BTRFS_ADD_DELAYED_REF because other special casing is not required .
*/
if ( action = = BTRFS_ADD_DELAYED_EXTENT ) {
must_insert_reserved = 1 ;
action = BTRFS_ADD_DELAYED_REF ;
} else {
must_insert_reserved = 0 ;
}
delayed_refs = & trans - > transaction - > delayed_refs ;
/* first set the basic ref node struct up */
atomic_set ( & ref - > refs , 1 ) ;
ref - > bytenr = bytenr ;
ref - > parent = parent ;
ref - > ref_mod = count_mod ;
ref - > in_tree = 1 ;
ref - > num_bytes = num_bytes ;
if ( btrfs_delayed_ref_is_head ( ref ) ) {
head_ref = btrfs_delayed_node_to_head ( ref ) ;
head_ref - > must_insert_reserved = must_insert_reserved ;
2009-03-13 10:17:05 -04:00
INIT_LIST_HEAD ( & head_ref - > cluster ) ;
2009-03-13 10:10:06 -04:00
mutex_init ( & head_ref - > mutex ) ;
} else {
full_ref = btrfs_delayed_node_to_ref ( ref ) ;
full_ref - > root = ref_root ;
full_ref - > generation = ref_generation ;
full_ref - > owner_objectid = owner_objectid ;
full_ref - > pin = pin ;
full_ref - > action = action ;
}
existing = tree_insert ( & delayed_refs - > root , bytenr ,
parent , & ref - > rb_node ) ;
if ( existing ) {
if ( btrfs_delayed_ref_is_head ( ref ) )
update_existing_head_ref ( existing , ref ) ;
else
update_existing_ref ( trans , delayed_refs , existing , ref ) ;
/*
* we ' ve updated the existing ref , free the newly
* allocated ref
*/
kfree ( ref ) ;
} else {
2009-03-13 10:17:05 -04:00
if ( btrfs_delayed_ref_is_head ( ref ) ) {
delayed_refs - > num_heads + + ;
delayed_refs - > num_heads_ready + + ;
}
2009-03-13 10:10:06 -04:00
delayed_refs - > num_entries + + ;
trans - > delayed_ref_updates + + ;
}
return 0 ;
}
/*
* add a delayed ref to the tree . This does all of the accounting required
* to make sure the delayed ref is eventually processed before this
* transaction commits .
*/
int btrfs_add_delayed_ref ( struct btrfs_trans_handle * trans ,
u64 bytenr , u64 num_bytes , u64 parent , u64 ref_root ,
u64 ref_generation , u64 owner_objectid , int action ,
int pin )
{
struct btrfs_delayed_ref * ref ;
struct btrfs_delayed_ref_head * head_ref ;
struct btrfs_delayed_ref_root * delayed_refs ;
int ret ;
ref = kmalloc ( sizeof ( * ref ) , GFP_NOFS ) ;
if ( ! ref )
return - ENOMEM ;
/*
* the parent = 0 case comes from cases where we don ' t actually
* know the parent yet . It will get updated later via a add / drop
* pair .
*/
if ( parent = = 0 )
parent = bytenr ;
head_ref = kmalloc ( sizeof ( * head_ref ) , GFP_NOFS ) ;
if ( ! head_ref ) {
kfree ( ref ) ;
return - ENOMEM ;
}
delayed_refs = & trans - > transaction - > delayed_refs ;
spin_lock ( & delayed_refs - > lock ) ;
/*
* insert both the head node and the new ref without dropping
* the spin lock
*/
ret = __btrfs_add_delayed_ref ( trans , & head_ref - > node , bytenr , num_bytes ,
( u64 ) - 1 , 0 , 0 , 0 , action , pin ) ;
BUG_ON ( ret ) ;
ret = __btrfs_add_delayed_ref ( trans , & ref - > node , bytenr , num_bytes ,
parent , ref_root , ref_generation ,
owner_objectid , action , pin ) ;
BUG_ON ( ret ) ;
spin_unlock ( & delayed_refs - > lock ) ;
return 0 ;
}
2009-03-13 10:11:24 -04:00
/*
* this does a simple search for the head node for a given extent .
* It must be called with the delayed ref spinlock held , and it returns
* the head node if any where found , or NULL if not .
*/
struct btrfs_delayed_ref_head *
btrfs_find_delayed_ref_head ( struct btrfs_trans_handle * trans , u64 bytenr )
{
struct btrfs_delayed_ref_node * ref ;
struct btrfs_delayed_ref_root * delayed_refs ;
delayed_refs = & trans - > transaction - > delayed_refs ;
2009-03-13 10:17:05 -04:00
ref = tree_search ( & delayed_refs - > root , bytenr , ( u64 ) - 1 , NULL ) ;
2009-03-13 10:11:24 -04:00
if ( ref )
return btrfs_delayed_node_to_head ( ref ) ;
return NULL ;
}
2009-03-13 10:10:06 -04:00
/*
* add a delayed ref to the tree . This does all of the accounting required
* to make sure the delayed ref is eventually processed before this
* transaction commits .
*
* The main point of this call is to add and remove a backreference in a single
* shot , taking the lock only once , and only searching for the head node once .
*
* It is the same as doing a ref add and delete in two separate calls .
*/
int btrfs_update_delayed_ref ( struct btrfs_trans_handle * trans ,
u64 bytenr , u64 num_bytes , u64 orig_parent ,
u64 parent , u64 orig_ref_root , u64 ref_root ,
u64 orig_ref_generation , u64 ref_generation ,
u64 owner_objectid , int pin )
{
struct btrfs_delayed_ref * ref ;
struct btrfs_delayed_ref * old_ref ;
struct btrfs_delayed_ref_head * head_ref ;
struct btrfs_delayed_ref_root * delayed_refs ;
int ret ;
ref = kmalloc ( sizeof ( * ref ) , GFP_NOFS ) ;
if ( ! ref )
return - ENOMEM ;
old_ref = kmalloc ( sizeof ( * old_ref ) , GFP_NOFS ) ;
if ( ! old_ref ) {
kfree ( ref ) ;
return - ENOMEM ;
}
/*
* the parent = 0 case comes from cases where we don ' t actually
* know the parent yet . It will get updated later via a add / drop
* pair .
*/
if ( parent = = 0 )
parent = bytenr ;
if ( orig_parent = = 0 )
orig_parent = bytenr ;
head_ref = kmalloc ( sizeof ( * head_ref ) , GFP_NOFS ) ;
if ( ! head_ref ) {
kfree ( ref ) ;
kfree ( old_ref ) ;
return - ENOMEM ;
}
delayed_refs = & trans - > transaction - > delayed_refs ;
spin_lock ( & delayed_refs - > lock ) ;
/*
* insert both the head node and the new ref without dropping
* the spin lock
*/
ret = __btrfs_add_delayed_ref ( trans , & head_ref - > node , bytenr , num_bytes ,
( u64 ) - 1 , 0 , 0 , 0 ,
2009-03-25 09:55:11 -04:00
BTRFS_UPDATE_DELAYED_HEAD , 0 ) ;
2009-03-13 10:10:06 -04:00
BUG_ON ( ret ) ;
ret = __btrfs_add_delayed_ref ( trans , & ref - > node , bytenr , num_bytes ,
parent , ref_root , ref_generation ,
owner_objectid , BTRFS_ADD_DELAYED_REF , 0 ) ;
BUG_ON ( ret ) ;
ret = __btrfs_add_delayed_ref ( trans , & old_ref - > node , bytenr , num_bytes ,
orig_parent , orig_ref_root ,
orig_ref_generation , owner_objectid ,
BTRFS_DROP_DELAYED_REF , pin ) ;
BUG_ON ( ret ) ;
spin_unlock ( & delayed_refs - > lock ) ;
return 0 ;
}