2012-04-29 10:39:43 +00:00
/*
* Copyright ( c ) 2000 - 2002 , 2005 Silicon Graphics , Inc .
* Copyright ( c ) 2010 David Chinner .
* Copyright ( c ) 2011 Christoph Hellwig .
* All Rights Reserved .
*
* This program is free software ; you can redistribute it and / or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation .
*
* This program is distributed in the hope that it would be useful ,
* but WITHOUT ANY WARRANTY ; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the
* GNU General Public License for more details .
*
* You should have received a copy of the GNU General Public License
* along with this program ; if not , write the Free Software Foundation ,
* Inc . , 51 Franklin St , Fifth Floor , Boston , MA 02110 - 1301 USA
*/
# include "xfs.h"
# include "xfs_fs.h"
# include "xfs_types.h"
# include "xfs_log.h"
# include "xfs_trans.h"
# include "xfs_sb.h"
# include "xfs_ag.h"
# include "xfs_mount.h"
# include "xfs_bmap_btree.h"
# include "xfs_alloc.h"
# include "xfs_inode.h"
# include "xfs_extent_busy.h"
# include "xfs_trace.h"
void
2012-04-29 10:41:10 +00:00
xfs_extent_busy_insert (
2012-04-29 10:39:43 +00:00
struct xfs_trans * tp ,
xfs_agnumber_t agno ,
xfs_agblock_t bno ,
xfs_extlen_t len ,
unsigned int flags )
{
2012-04-29 10:41:10 +00:00
struct xfs_extent_busy * new ;
struct xfs_extent_busy * busyp ;
2012-04-29 10:39:43 +00:00
struct xfs_perag * pag ;
struct rb_node * * rbp ;
struct rb_node * parent = NULL ;
2012-04-29 10:41:10 +00:00
new = kmem_zalloc ( sizeof ( struct xfs_extent_busy ) , KM_MAYFAIL ) ;
2012-04-29 10:39:43 +00:00
if ( ! new ) {
/*
* No Memory ! Since it is now not possible to track the free
* block , make this a synchronous transaction to insure that
* the block is not reused before this transaction commits .
*/
2012-04-29 10:41:10 +00:00
trace_xfs_extent_busy_enomem ( tp - > t_mountp , agno , bno , len ) ;
2012-04-29 10:39:43 +00:00
xfs_trans_set_sync ( tp ) ;
return ;
}
new - > agno = agno ;
new - > bno = bno ;
new - > length = len ;
INIT_LIST_HEAD ( & new - > list ) ;
new - > flags = flags ;
/* trace before insert to be able to see failed inserts */
2012-04-29 10:41:10 +00:00
trace_xfs_extent_busy ( tp - > t_mountp , agno , bno , len ) ;
2012-04-29 10:39:43 +00:00
pag = xfs_perag_get ( tp - > t_mountp , new - > agno ) ;
spin_lock ( & pag - > pagb_lock ) ;
rbp = & pag - > pagb_tree . rb_node ;
while ( * rbp ) {
parent = * rbp ;
2012-04-29 10:41:10 +00:00
busyp = rb_entry ( parent , struct xfs_extent_busy , rb_node ) ;
2012-04-29 10:39:43 +00:00
if ( new - > bno < busyp - > bno ) {
rbp = & ( * rbp ) - > rb_left ;
ASSERT ( new - > bno + new - > length < = busyp - > bno ) ;
} else if ( new - > bno > busyp - > bno ) {
rbp = & ( * rbp ) - > rb_right ;
ASSERT ( bno > = busyp - > bno + busyp - > length ) ;
} else {
ASSERT ( 0 ) ;
}
}
rb_link_node ( & new - > rb_node , parent , rbp ) ;
rb_insert_color ( & new - > rb_node , & pag - > pagb_tree ) ;
list_add ( & new - > list , & tp - > t_busy ) ;
spin_unlock ( & pag - > pagb_lock ) ;
xfs_perag_put ( pag ) ;
}
/*
* Search for a busy extent within the range of the extent we are about to
* allocate . You need to be holding the busy extent tree lock when calling
2012-04-29 10:41:10 +00:00
* xfs_extent_busy_search ( ) . This function returns 0 for no overlapping busy
2012-04-29 10:39:43 +00:00
* extent , - 1 for an overlapping but not exact busy extent , and 1 for an exact
* match . This is done so that a non - zero return indicates an overlap that
* will require a synchronous transaction , but it can still be
* used to distinguish between a partial or exact match .
*/
int
2012-04-29 10:41:10 +00:00
xfs_extent_busy_search (
2012-04-29 10:39:43 +00:00
struct xfs_mount * mp ,
xfs_agnumber_t agno ,
xfs_agblock_t bno ,
xfs_extlen_t len )
{
struct xfs_perag * pag ;
struct rb_node * rbp ;
2012-04-29 10:41:10 +00:00
struct xfs_extent_busy * busyp ;
2012-04-29 10:39:43 +00:00
int match = 0 ;
pag = xfs_perag_get ( mp , agno ) ;
spin_lock ( & pag - > pagb_lock ) ;
rbp = pag - > pagb_tree . rb_node ;
/* find closest start bno overlap */
while ( rbp ) {
2012-04-29 10:41:10 +00:00
busyp = rb_entry ( rbp , struct xfs_extent_busy , rb_node ) ;
2012-04-29 10:39:43 +00:00
if ( bno < busyp - > bno ) {
/* may overlap, but exact start block is lower */
if ( bno + len > busyp - > bno )
match = - 1 ;
rbp = rbp - > rb_left ;
} else if ( bno > busyp - > bno ) {
/* may overlap, but exact start block is higher */
if ( bno < busyp - > bno + busyp - > length )
match = - 1 ;
rbp = rbp - > rb_right ;
} else {
/* bno matches busyp, length determines exact match */
match = ( busyp - > length = = len ) ? 1 : - 1 ;
break ;
}
}
spin_unlock ( & pag - > pagb_lock ) ;
xfs_perag_put ( pag ) ;
return match ;
}
/*
* The found free extent [ fbno , fend ] overlaps part or all of the given busy
* extent . If the overlap covers the beginning , the end , or all of the busy
* extent , the overlapping portion can be made unbusy and used for the
* allocation . We can ' t split a busy extent because we can ' t modify a
* transaction / CIL context busy list , but we can update an entries block
* number or length .
*
* Returns true if the extent can safely be reused , or false if the search
* needs to be restarted .
*/
STATIC bool
2012-04-29 10:41:10 +00:00
xfs_extent_busy_update_extent (
2012-04-29 10:39:43 +00:00
struct xfs_mount * mp ,
struct xfs_perag * pag ,
2012-04-29 10:41:10 +00:00
struct xfs_extent_busy * busyp ,
2012-04-29 10:39:43 +00:00
xfs_agblock_t fbno ,
xfs_extlen_t flen ,
bool userdata )
{
xfs_agblock_t fend = fbno + flen ;
xfs_agblock_t bbno = busyp - > bno ;
xfs_agblock_t bend = bbno + busyp - > length ;
/*
* This extent is currently being discarded . Give the thread
* performing the discard a chance to mark the extent unbusy
* and retry .
*/
2012-04-29 10:41:10 +00:00
if ( busyp - > flags & XFS_EXTENT_BUSY_DISCARDED ) {
2012-04-29 10:39:43 +00:00
spin_unlock ( & pag - > pagb_lock ) ;
delay ( 1 ) ;
spin_lock ( & pag - > pagb_lock ) ;
return false ;
}
/*
* If there is a busy extent overlapping a user allocation , we have
* no choice but to force the log and retry the search .
*
* Fortunately this does not happen during normal operation , but
* only if the filesystem is very low on space and has to dip into
* the AGFL for normal allocations .
*/
if ( userdata )
goto out_force_log ;
if ( bbno < fbno & & bend > fend ) {
/*
* Case 1 :
* bbno bend
* + BBBBBBBBBBBBBBBBB +
* + - - - - - - - - - +
* fbno fend
*/
/*
* We would have to split the busy extent to be able to track
* it correct , which we cannot do because we would have to
* modify the list of busy extents attached to the transaction
* or CIL context , which is immutable .
*
* Force out the log to clear the busy extent and retry the
* search .
*/
goto out_force_log ;
} else if ( bbno > = fbno & & bend < = fend ) {
/*
* Case 2 :
* bbno bend
* + BBBBBBBBBBBBBBBBB +
* + - - - - - - - - - - - - - - - - - +
* fbno fend
*
* Case 3 :
* bbno bend
* + BBBBBBBBBBBBBBBBB +
* + - - - - - - - - - - - - - - - - - - - - - - - - - - +
* fbno fend
*
* Case 4 :
* bbno bend
* + BBBBBBBBBBBBBBBBB +
* + - - - - - - - - - - - - - - - - - - - - - - - - - - +
* fbno fend
*
* Case 5 :
* bbno bend
* + BBBBBBBBBBBBBBBBB +
* + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +
* fbno fend
*
*/
/*
* The busy extent is fully covered by the extent we are
* allocating , and can simply be removed from the rbtree .
* However we cannot remove it from the immutable list
* tracking busy extents in the transaction or CIL context ,
* so set the length to zero to mark it invalid .
*
* We also need to restart the busy extent search from the
* tree root , because erasing the node can rearrange the
* tree topology .
*/
rb_erase ( & busyp - > rb_node , & pag - > pagb_tree ) ;
busyp - > length = 0 ;
return false ;
} else if ( fend < bend ) {
/*
* Case 6 :
* bbno bend
* + BBBBBBBBBBBBBBBBB +
* + - - - - - - - - - +
* fbno fend
*
* Case 7 :
* bbno bend
* + BBBBBBBBBBBBBBBBB +
* + - - - - - - - - - - - - - - - - - - +
* fbno fend
*
*/
busyp - > bno = fend ;
} else if ( bbno < fbno ) {
/*
* Case 8 :
* bbno bend
* + BBBBBBBBBBBBBBBBB +
* + - - - - - - - - - - - - - +
* fbno fend
*
* Case 9 :
* bbno bend
* + BBBBBBBBBBBBBBBBB +
* + - - - - - - - - - - - - - - - - - - - - - - +
* fbno fend
*/
busyp - > length = fbno - busyp - > bno ;
} else {
ASSERT ( 0 ) ;
}
2012-04-29 10:41:10 +00:00
trace_xfs_extent_busy_reuse ( mp , pag - > pag_agno , fbno , flen ) ;
2012-04-29 10:39:43 +00:00
return true ;
out_force_log :
spin_unlock ( & pag - > pagb_lock ) ;
xfs_log_force ( mp , XFS_LOG_SYNC ) ;
2012-04-29 10:41:10 +00:00
trace_xfs_extent_busy_force ( mp , pag - > pag_agno , fbno , flen ) ;
2012-04-29 10:39:43 +00:00
spin_lock ( & pag - > pagb_lock ) ;
return false ;
}
/*
* For a given extent [ fbno , flen ] , make sure we can reuse it safely .
*/
void
2012-04-29 10:41:10 +00:00
xfs_extent_busy_reuse (
2012-04-29 10:39:43 +00:00
struct xfs_mount * mp ,
xfs_agnumber_t agno ,
xfs_agblock_t fbno ,
xfs_extlen_t flen ,
bool userdata )
{
struct xfs_perag * pag ;
struct rb_node * rbp ;
ASSERT ( flen > 0 ) ;
pag = xfs_perag_get ( mp , agno ) ;
spin_lock ( & pag - > pagb_lock ) ;
restart :
rbp = pag - > pagb_tree . rb_node ;
while ( rbp ) {
2012-04-29 10:41:10 +00:00
struct xfs_extent_busy * busyp =
rb_entry ( rbp , struct xfs_extent_busy , rb_node ) ;
2012-04-29 10:39:43 +00:00
xfs_agblock_t bbno = busyp - > bno ;
xfs_agblock_t bend = bbno + busyp - > length ;
if ( fbno + flen < = bbno ) {
rbp = rbp - > rb_left ;
continue ;
} else if ( fbno > = bend ) {
rbp = rbp - > rb_right ;
continue ;
}
2012-04-29 10:41:10 +00:00
if ( ! xfs_extent_busy_update_extent ( mp , pag , busyp , fbno , flen ,
2012-04-29 10:39:43 +00:00
userdata ) )
goto restart ;
}
spin_unlock ( & pag - > pagb_lock ) ;
xfs_perag_put ( pag ) ;
}
/*
* For a given extent [ fbno , flen ] , search the busy extent list to find a
* subset of the extent that is not busy . If * rlen is smaller than
* args - > minlen no suitable extent could be found , and the higher level
* code needs to force out the log and retry the allocation .
*/
2012-05-10 13:55:33 -05:00
void
2012-04-29 10:41:10 +00:00
xfs_extent_busy_trim (
2012-04-29 10:39:43 +00:00
struct xfs_alloc_arg * args ,
xfs_agblock_t bno ,
xfs_extlen_t len ,
xfs_agblock_t * rbno ,
xfs_extlen_t * rlen )
{
xfs_agblock_t fbno ;
xfs_extlen_t flen ;
struct rb_node * rbp ;
ASSERT ( len > 0 ) ;
spin_lock ( & args - > pag - > pagb_lock ) ;
restart :
fbno = bno ;
flen = len ;
rbp = args - > pag - > pagb_tree . rb_node ;
while ( rbp & & flen > = args - > minlen ) {
2012-04-29 10:41:10 +00:00
struct xfs_extent_busy * busyp =
rb_entry ( rbp , struct xfs_extent_busy , rb_node ) ;
2012-04-29 10:39:43 +00:00
xfs_agblock_t fend = fbno + flen ;
xfs_agblock_t bbno = busyp - > bno ;
xfs_agblock_t bend = bbno + busyp - > length ;
if ( fend < = bbno ) {
rbp = rbp - > rb_left ;
continue ;
} else if ( fbno > = bend ) {
rbp = rbp - > rb_right ;
continue ;
}
/*
* If this is a metadata allocation , try to reuse the busy
* extent instead of trimming the allocation .
*/
if ( ! args - > userdata & &
2012-04-29 10:41:10 +00:00
! ( busyp - > flags & XFS_EXTENT_BUSY_DISCARDED ) ) {
if ( ! xfs_extent_busy_update_extent ( args - > mp , args - > pag ,
2012-04-29 10:39:43 +00:00
busyp , fbno , flen ,
false ) )
goto restart ;
continue ;
}
if ( bbno < = fbno ) {
/* start overlap */
/*
* Case 1 :
* bbno bend
* + BBBBBBBBBBBBBBBBB +
* + - - - - - - - - - +
* fbno fend
*
* Case 2 :
* bbno bend
* + BBBBBBBBBBBBBBBBB +
* + - - - - - - - - - - - - - +
* fbno fend
*
* Case 3 :
* bbno bend
* + BBBBBBBBBBBBBBBBB +
* + - - - - - - - - - - - - - +
* fbno fend
*
* Case 4 :
* bbno bend
* + BBBBBBBBBBBBBBBBB +
* + - - - - - - - - - - - - - - - - - +
* fbno fend
*
* No unbusy region in extent , return failure .
*/
if ( fend < = bend )
goto fail ;
/*
* Case 5 :
* bbno bend
* + BBBBBBBBBBBBBBBBB +
* + - - - - - - - - - - - - - - - - - - - - - - +
* fbno fend
*
* Case 6 :
* bbno bend
* + BBBBBBBBBBBBBBBBB +
* + - - - - - - - - - - - - - - - - - - - - - - - - - - +
* fbno fend
*
* Needs to be trimmed to :
* + - - - - - - - +
* fbno fend
*/
fbno = bend ;
} else if ( bend > = fend ) {
/* end overlap */
/*
* Case 7 :
* bbno bend
* + BBBBBBBBBBBBBBBBB +
* + - - - - - - - - - - - - - - - - - - +
* fbno fend
*
* Case 8 :
* bbno bend
* + BBBBBBBBBBBBBBBBB +
* + - - - - - - - - - - - - - - - - - - - - - - - - - - +
* fbno fend
*
* Needs to be trimmed to :
* + - - - - - - - +
* fbno fend
*/
fend = bbno ;
} else {
/* middle overlap */
/*
* Case 9 :
* bbno bend
* + BBBBBBBBBBBBBBBBB +
* + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +
* fbno fend
*
* Can be trimmed to :
* + - - - - - - - + OR + - - - - - - - +
* fbno fend fbno fend
*
* Backward allocation leads to significant
* fragmentation of directories , which degrades
* directory performance , therefore we always want to
* choose the option that produces forward allocation
* patterns .
* Preferring the lower bno extent will make the next
* request use " fend " as the start of the next
* allocation ; if the segment is no longer busy at
* that point , we ' ll get a contiguous allocation , but
* even if it is still busy , we will get a forward
* allocation .
* We try to avoid choosing the segment at " bend " ,
* because that can lead to the next allocation
* taking the segment at " fbno " , which would be a
* backward allocation . We only use the segment at
* " fbno " if it is much larger than the current
* requested size , because in that case there ' s a
* good chance subsequent allocations will be
* contiguous .
*/
if ( bbno - fbno > = args - > maxlen ) {
/* left candidate fits perfect */
fend = bbno ;
} else if ( fend - bend > = args - > maxlen * 4 ) {
/* right candidate has enough free space */
fbno = bend ;
} else if ( bbno - fbno > = args - > minlen ) {
/* left candidate fits minimum requirement */
fend = bbno ;
} else {
goto fail ;
}
}
flen = fend - fbno ;
}
spin_unlock ( & args - > pag - > pagb_lock ) ;
if ( fbno ! = bno | | flen ! = len ) {
2012-04-29 10:41:10 +00:00
trace_xfs_extent_busy_trim ( args - > mp , args - > agno , bno , len ,
2012-04-29 10:39:43 +00:00
fbno , flen ) ;
}
* rbno = fbno ;
* rlen = flen ;
return ;
fail :
/*
* Return a zero extent length as failure indications . All callers
* re - check if the trimmed extent satisfies the minlen requirement .
*/
spin_unlock ( & args - > pag - > pagb_lock ) ;
2012-04-29 10:41:10 +00:00
trace_xfs_extent_busy_trim ( args - > mp , args - > agno , bno , len , fbno , 0 ) ;
2012-04-29 10:39:43 +00:00
* rbno = fbno ;
* rlen = 0 ;
}
2012-04-29 10:41:10 +00:00
STATIC void
xfs_extent_busy_clear_one (
2012-04-29 10:39:43 +00:00
struct xfs_mount * mp ,
struct xfs_perag * pag ,
2012-04-29 10:41:10 +00:00
struct xfs_extent_busy * busyp )
2012-04-29 10:39:43 +00:00
{
if ( busyp - > length ) {
2012-04-29 10:41:10 +00:00
trace_xfs_extent_busy_clear ( mp , busyp - > agno , busyp - > bno ,
2012-04-29 10:39:43 +00:00
busyp - > length ) ;
rb_erase ( & busyp - > rb_node , & pag - > pagb_tree ) ;
}
list_del_init ( & busyp - > list ) ;
kmem_free ( busyp ) ;
}
/*
* Remove all extents on the passed in list from the busy extents tree .
* If do_discard is set skip extents that need to be discarded , and mark
* these as undergoing a discard operation instead .
*/
void
2012-04-29 10:41:10 +00:00
xfs_extent_busy_clear (
2012-04-29 10:39:43 +00:00
struct xfs_mount * mp ,
struct list_head * list ,
bool do_discard )
{
2012-04-29 10:41:10 +00:00
struct xfs_extent_busy * busyp , * n ;
2012-04-29 10:39:43 +00:00
struct xfs_perag * pag = NULL ;
xfs_agnumber_t agno = NULLAGNUMBER ;
list_for_each_entry_safe ( busyp , n , list , list ) {
if ( busyp - > agno ! = agno ) {
if ( pag ) {
spin_unlock ( & pag - > pagb_lock ) ;
xfs_perag_put ( pag ) ;
}
pag = xfs_perag_get ( mp , busyp - > agno ) ;
spin_lock ( & pag - > pagb_lock ) ;
agno = busyp - > agno ;
}
if ( do_discard & & busyp - > length & &
2012-04-29 10:41:10 +00:00
! ( busyp - > flags & XFS_EXTENT_BUSY_SKIP_DISCARD ) )
busyp - > flags = XFS_EXTENT_BUSY_DISCARDED ;
2012-04-29 10:39:43 +00:00
else
2012-04-29 10:41:10 +00:00
xfs_extent_busy_clear_one ( mp , pag , busyp ) ;
2012-04-29 10:39:43 +00:00
}
if ( pag ) {
spin_unlock ( & pag - > pagb_lock ) ;
xfs_perag_put ( pag ) ;
}
}
/*
* Callback for list_sort to sort busy extents by the AG they reside in .
*/
int
2012-04-29 10:41:10 +00:00
xfs_extent_busy_ag_cmp (
2012-04-29 10:39:43 +00:00
void * priv ,
struct list_head * a ,
struct list_head * b )
{
2012-04-29 10:41:10 +00:00
return container_of ( a , struct xfs_extent_busy , list ) - > agno -
container_of ( b , struct xfs_extent_busy , list ) - > agno ;
2012-04-29 10:39:43 +00:00
}