2018-06-06 05:42:14 +03:00
// SPDX-License-Identifier: GPL-2.0
2012-04-29 14:39:43 +04:00
/*
* Copyright ( c ) 2000 - 2002 , 2005 Silicon Graphics , Inc .
* Copyright ( c ) 2010 David Chinner .
* Copyright ( c ) 2011 Christoph Hellwig .
* All Rights Reserved .
*/
# include "xfs.h"
# include "xfs_fs.h"
2013-10-23 03:36:05 +04:00
# include "xfs_format.h"
2013-10-23 03:50:10 +04:00
# include "xfs_log_format.h"
2013-10-23 03:36:05 +04:00
# include "xfs_shared.h"
2013-10-23 03:50:10 +04:00
# include "xfs_trans_resv.h"
2012-04-29 14:39:43 +04:00
# include "xfs_sb.h"
# include "xfs_mount.h"
# include "xfs_alloc.h"
# include "xfs_extent_busy.h"
# include "xfs_trace.h"
2013-10-23 03:50:10 +04:00
# include "xfs_trans.h"
# include "xfs_log.h"
2012-04-29 14:39:43 +04:00
void
2012-04-29 14:41:10 +04:00
xfs_extent_busy_insert (
2012-04-29 14:39:43 +04:00
struct xfs_trans * tp ,
xfs_agnumber_t agno ,
xfs_agblock_t bno ,
xfs_extlen_t len ,
unsigned int flags )
{
2012-04-29 14:41:10 +04:00
struct xfs_extent_busy * new ;
struct xfs_extent_busy * busyp ;
2012-04-29 14:39:43 +04:00
struct xfs_perag * pag ;
struct rb_node * * rbp ;
struct rb_node * parent = NULL ;
2019-08-26 22:06:22 +03:00
new = kmem_zalloc ( sizeof ( struct xfs_extent_busy ) , 0 ) ;
2012-04-29 14:39:43 +04:00
new - > agno = agno ;
new - > bno = bno ;
new - > length = len ;
INIT_LIST_HEAD ( & new - > list ) ;
new - > flags = flags ;
/* trace before insert to be able to see failed inserts */
2012-04-29 14:41:10 +04:00
trace_xfs_extent_busy ( tp - > t_mountp , agno , bno , len ) ;
2012-04-29 14:39:43 +04:00
pag = xfs_perag_get ( tp - > t_mountp , new - > agno ) ;
spin_lock ( & pag - > pagb_lock ) ;
rbp = & pag - > pagb_tree . rb_node ;
while ( * rbp ) {
parent = * rbp ;
2012-04-29 14:41:10 +04:00
busyp = rb_entry ( parent , struct xfs_extent_busy , rb_node ) ;
2012-04-29 14:39:43 +04:00
if ( new - > bno < busyp - > bno ) {
rbp = & ( * rbp ) - > rb_left ;
ASSERT ( new - > bno + new - > length < = busyp - > bno ) ;
} else if ( new - > bno > busyp - > bno ) {
rbp = & ( * rbp ) - > rb_right ;
ASSERT ( bno > = busyp - > bno + busyp - > length ) ;
} else {
ASSERT ( 0 ) ;
}
}
rb_link_node ( & new - > rb_node , parent , rbp ) ;
rb_insert_color ( & new - > rb_node , & pag - > pagb_tree ) ;
list_add ( & new - > list , & tp - > t_busy ) ;
spin_unlock ( & pag - > pagb_lock ) ;
xfs_perag_put ( pag ) ;
}
/*
* Search for a busy extent within the range of the extent we are about to
* allocate . You need to be holding the busy extent tree lock when calling
2012-04-29 14:41:10 +04:00
* xfs_extent_busy_search ( ) . This function returns 0 for no overlapping busy
2012-04-29 14:39:43 +04:00
* extent , - 1 for an overlapping but not exact busy extent , and 1 for an exact
* match . This is done so that a non - zero return indicates an overlap that
* will require a synchronous transaction , but it can still be
* used to distinguish between a partial or exact match .
*/
int
2012-04-29 14:41:10 +04:00
xfs_extent_busy_search (
2012-04-29 14:39:43 +04:00
struct xfs_mount * mp ,
xfs_agnumber_t agno ,
xfs_agblock_t bno ,
xfs_extlen_t len )
{
struct xfs_perag * pag ;
struct rb_node * rbp ;
2012-04-29 14:41:10 +04:00
struct xfs_extent_busy * busyp ;
2012-04-29 14:39:43 +04:00
int match = 0 ;
pag = xfs_perag_get ( mp , agno ) ;
spin_lock ( & pag - > pagb_lock ) ;
rbp = pag - > pagb_tree . rb_node ;
/* find closest start bno overlap */
while ( rbp ) {
2012-04-29 14:41:10 +04:00
busyp = rb_entry ( rbp , struct xfs_extent_busy , rb_node ) ;
2012-04-29 14:39:43 +04:00
if ( bno < busyp - > bno ) {
/* may overlap, but exact start block is lower */
if ( bno + len > busyp - > bno )
match = - 1 ;
rbp = rbp - > rb_left ;
} else if ( bno > busyp - > bno ) {
/* may overlap, but exact start block is higher */
if ( bno < busyp - > bno + busyp - > length )
match = - 1 ;
rbp = rbp - > rb_right ;
} else {
/* bno matches busyp, length determines exact match */
match = ( busyp - > length = = len ) ? 1 : - 1 ;
break ;
}
}
spin_unlock ( & pag - > pagb_lock ) ;
xfs_perag_put ( pag ) ;
return match ;
}
/*
* The found free extent [ fbno , fend ] overlaps part or all of the given busy
* extent . If the overlap covers the beginning , the end , or all of the busy
* extent , the overlapping portion can be made unbusy and used for the
* allocation . We can ' t split a busy extent because we can ' t modify a
2013-08-12 07:14:57 +04:00
* transaction / CIL context busy list , but we can update an entry ' s block
2012-04-29 14:39:43 +04:00
* number or length .
*
* Returns true if the extent can safely be reused , or false if the search
* needs to be restarted .
*/
STATIC bool
2012-04-29 14:41:10 +04:00
xfs_extent_busy_update_extent (
2012-04-29 14:39:43 +04:00
struct xfs_mount * mp ,
struct xfs_perag * pag ,
2012-04-29 14:41:10 +04:00
struct xfs_extent_busy * busyp ,
2012-04-29 14:39:43 +04:00
xfs_agblock_t fbno ,
xfs_extlen_t flen ,
2013-09-02 14:49:36 +04:00
bool userdata ) __releases ( & pag - > pagb_lock )
__acquires ( & pag - > pagb_lock )
2012-04-29 14:39:43 +04:00
{
xfs_agblock_t fend = fbno + flen ;
xfs_agblock_t bbno = busyp - > bno ;
xfs_agblock_t bend = bbno + busyp - > length ;
/*
* This extent is currently being discarded . Give the thread
* performing the discard a chance to mark the extent unbusy
* and retry .
*/
2012-04-29 14:41:10 +04:00
if ( busyp - > flags & XFS_EXTENT_BUSY_DISCARDED ) {
2012-04-29 14:39:43 +04:00
spin_unlock ( & pag - > pagb_lock ) ;
delay ( 1 ) ;
spin_lock ( & pag - > pagb_lock ) ;
return false ;
}
/*
* If there is a busy extent overlapping a user allocation , we have
* no choice but to force the log and retry the search .
*
* Fortunately this does not happen during normal operation , but
* only if the filesystem is very low on space and has to dip into
* the AGFL for normal allocations .
*/
if ( userdata )
goto out_force_log ;
if ( bbno < fbno & & bend > fend ) {
/*
* Case 1 :
* bbno bend
* + BBBBBBBBBBBBBBBBB +
* + - - - - - - - - - +
* fbno fend
*/
/*
* We would have to split the busy extent to be able to track
* it correct , which we cannot do because we would have to
* modify the list of busy extents attached to the transaction
* or CIL context , which is immutable .
*
* Force out the log to clear the busy extent and retry the
* search .
*/
goto out_force_log ;
} else if ( bbno > = fbno & & bend < = fend ) {
/*
* Case 2 :
* bbno bend
* + BBBBBBBBBBBBBBBBB +
* + - - - - - - - - - - - - - - - - - +
* fbno fend
*
* Case 3 :
* bbno bend
* + BBBBBBBBBBBBBBBBB +
* + - - - - - - - - - - - - - - - - - - - - - - - - - - +
* fbno fend
*
* Case 4 :
* bbno bend
* + BBBBBBBBBBBBBBBBB +
* + - - - - - - - - - - - - - - - - - - - - - - - - - - +
* fbno fend
*
* Case 5 :
* bbno bend
* + BBBBBBBBBBBBBBBBB +
* + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +
* fbno fend
*
*/
/*
* The busy extent is fully covered by the extent we are
* allocating , and can simply be removed from the rbtree .
* However we cannot remove it from the immutable list
* tracking busy extents in the transaction or CIL context ,
* so set the length to zero to mark it invalid .
*
* We also need to restart the busy extent search from the
* tree root , because erasing the node can rearrange the
* tree topology .
*/
rb_erase ( & busyp - > rb_node , & pag - > pagb_tree ) ;
busyp - > length = 0 ;
return false ;
} else if ( fend < bend ) {
/*
* Case 6 :
* bbno bend
* + BBBBBBBBBBBBBBBBB +
* + - - - - - - - - - +
* fbno fend
*
* Case 7 :
* bbno bend
* + BBBBBBBBBBBBBBBBB +
* + - - - - - - - - - - - - - - - - - - +
* fbno fend
*
*/
busyp - > bno = fend ;
} else if ( bbno < fbno ) {
/*
* Case 8 :
* bbno bend
* + BBBBBBBBBBBBBBBBB +
* + - - - - - - - - - - - - - +
* fbno fend
*
* Case 9 :
* bbno bend
* + BBBBBBBBBBBBBBBBB +
* + - - - - - - - - - - - - - - - - - - - - - - +
* fbno fend
*/
busyp - > length = fbno - busyp - > bno ;
} else {
ASSERT ( 0 ) ;
}
2012-04-29 14:41:10 +04:00
trace_xfs_extent_busy_reuse ( mp , pag - > pag_agno , fbno , flen ) ;
2012-04-29 14:39:43 +04:00
return true ;
out_force_log :
spin_unlock ( & pag - > pagb_lock ) ;
xfs_log_force ( mp , XFS_LOG_SYNC ) ;
2012-04-29 14:41:10 +04:00
trace_xfs_extent_busy_force ( mp , pag - > pag_agno , fbno , flen ) ;
2012-04-29 14:39:43 +04:00
spin_lock ( & pag - > pagb_lock ) ;
return false ;
}
/*
* For a given extent [ fbno , flen ] , make sure we can reuse it safely .
*/
void
2012-04-29 14:41:10 +04:00
xfs_extent_busy_reuse (
2012-04-29 14:39:43 +04:00
struct xfs_mount * mp ,
xfs_agnumber_t agno ,
xfs_agblock_t fbno ,
xfs_extlen_t flen ,
bool userdata )
{
struct xfs_perag * pag ;
struct rb_node * rbp ;
ASSERT ( flen > 0 ) ;
pag = xfs_perag_get ( mp , agno ) ;
spin_lock ( & pag - > pagb_lock ) ;
restart :
rbp = pag - > pagb_tree . rb_node ;
while ( rbp ) {
2012-04-29 14:41:10 +04:00
struct xfs_extent_busy * busyp =
rb_entry ( rbp , struct xfs_extent_busy , rb_node ) ;
2012-04-29 14:39:43 +04:00
xfs_agblock_t bbno = busyp - > bno ;
xfs_agblock_t bend = bbno + busyp - > length ;
if ( fbno + flen < = bbno ) {
rbp = rbp - > rb_left ;
continue ;
} else if ( fbno > = bend ) {
rbp = rbp - > rb_right ;
continue ;
}
2012-04-29 14:41:10 +04:00
if ( ! xfs_extent_busy_update_extent ( mp , pag , busyp , fbno , flen ,
2012-04-29 14:39:43 +04:00
userdata ) )
goto restart ;
}
spin_unlock ( & pag - > pagb_lock ) ;
xfs_perag_put ( pag ) ;
}
/*
* For a given extent [ fbno , flen ] , search the busy extent list to find a
* subset of the extent that is not busy . If * rlen is smaller than
* args - > minlen no suitable extent could be found , and the higher level
* code needs to force out the log and retry the allocation .
2017-02-08 01:06:57 +03:00
*
* Return the current busy generation for the AG if the extent is busy . This
* value can be used to wait for at least one of the currently busy extents
* to be cleared . Note that the busy list is not guaranteed to be empty after
* the gen is woken . The state of a specific extent must always be confirmed
* with another call to xfs_extent_busy_trim ( ) before it can be used .
2012-04-29 14:39:43 +04:00
*/
2017-02-08 01:06:57 +03:00
bool
2012-04-29 14:41:10 +04:00
xfs_extent_busy_trim (
2012-04-29 14:39:43 +04:00
struct xfs_alloc_arg * args ,
2017-02-08 01:06:57 +03:00
xfs_agblock_t * bno ,
xfs_extlen_t * len ,
unsigned * busy_gen )
2012-04-29 14:39:43 +04:00
{
xfs_agblock_t fbno ;
xfs_extlen_t flen ;
struct rb_node * rbp ;
2017-02-08 01:06:57 +03:00
bool ret = false ;
2012-04-29 14:39:43 +04:00
2017-02-15 05:16:50 +03:00
ASSERT ( * len > 0 ) ;
2012-04-29 14:39:43 +04:00
spin_lock ( & args - > pag - > pagb_lock ) ;
restart :
2017-02-08 01:06:57 +03:00
fbno = * bno ;
flen = * len ;
2012-04-29 14:39:43 +04:00
rbp = args - > pag - > pagb_tree . rb_node ;
while ( rbp & & flen > = args - > minlen ) {
2012-04-29 14:41:10 +04:00
struct xfs_extent_busy * busyp =
rb_entry ( rbp , struct xfs_extent_busy , rb_node ) ;
2012-04-29 14:39:43 +04:00
xfs_agblock_t fend = fbno + flen ;
xfs_agblock_t bbno = busyp - > bno ;
xfs_agblock_t bend = bbno + busyp - > length ;
if ( fend < = bbno ) {
rbp = rbp - > rb_left ;
continue ;
} else if ( fbno > = bend ) {
rbp = rbp - > rb_right ;
continue ;
}
/*
* If this is a metadata allocation , try to reuse the busy
* extent instead of trimming the allocation .
*/
2019-10-30 22:25:00 +03:00
if ( ! ( args - > datatype & XFS_ALLOC_USERDATA ) & &
2012-04-29 14:41:10 +04:00
! ( busyp - > flags & XFS_EXTENT_BUSY_DISCARDED ) ) {
if ( ! xfs_extent_busy_update_extent ( args - > mp , args - > pag ,
2012-04-29 14:39:43 +04:00
busyp , fbno , flen ,
false ) )
goto restart ;
continue ;
}
if ( bbno < = fbno ) {
/* start overlap */
/*
* Case 1 :
* bbno bend
* + BBBBBBBBBBBBBBBBB +
* + - - - - - - - - - +
* fbno fend
*
* Case 2 :
* bbno bend
* + BBBBBBBBBBBBBBBBB +
* + - - - - - - - - - - - - - +
* fbno fend
*
* Case 3 :
* bbno bend
* + BBBBBBBBBBBBBBBBB +
* + - - - - - - - - - - - - - +
* fbno fend
*
* Case 4 :
* bbno bend
* + BBBBBBBBBBBBBBBBB +
* + - - - - - - - - - - - - - - - - - +
* fbno fend
*
* No unbusy region in extent , return failure .
*/
if ( fend < = bend )
goto fail ;
/*
* Case 5 :
* bbno bend
* + BBBBBBBBBBBBBBBBB +
* + - - - - - - - - - - - - - - - - - - - - - - +
* fbno fend
*
* Case 6 :
* bbno bend
* + BBBBBBBBBBBBBBBBB +
* + - - - - - - - - - - - - - - - - - - - - - - - - - - +
* fbno fend
*
* Needs to be trimmed to :
* + - - - - - - - +
* fbno fend
*/
fbno = bend ;
} else if ( bend > = fend ) {
/* end overlap */
/*
* Case 7 :
* bbno bend
* + BBBBBBBBBBBBBBBBB +
* + - - - - - - - - - - - - - - - - - - +
* fbno fend
*
* Case 8 :
* bbno bend
* + BBBBBBBBBBBBBBBBB +
* + - - - - - - - - - - - - - - - - - - - - - - - - - - +
* fbno fend
*
* Needs to be trimmed to :
* + - - - - - - - +
* fbno fend
*/
fend = bbno ;
} else {
/* middle overlap */
/*
* Case 9 :
* bbno bend
* + BBBBBBBBBBBBBBBBB +
* + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +
* fbno fend
*
* Can be trimmed to :
* + - - - - - - - + OR + - - - - - - - +
* fbno fend fbno fend
*
* Backward allocation leads to significant
* fragmentation of directories , which degrades
* directory performance , therefore we always want to
* choose the option that produces forward allocation
* patterns .
* Preferring the lower bno extent will make the next
* request use " fend " as the start of the next
* allocation ; if the segment is no longer busy at
* that point , we ' ll get a contiguous allocation , but
* even if it is still busy , we will get a forward
* allocation .
* We try to avoid choosing the segment at " bend " ,
* because that can lead to the next allocation
* taking the segment at " fbno " , which would be a
* backward allocation . We only use the segment at
* " fbno " if it is much larger than the current
* requested size , because in that case there ' s a
* good chance subsequent allocations will be
* contiguous .
*/
if ( bbno - fbno > = args - > maxlen ) {
/* left candidate fits perfect */
fend = bbno ;
} else if ( fend - bend > = args - > maxlen * 4 ) {
/* right candidate has enough free space */
fbno = bend ;
} else if ( bbno - fbno > = args - > minlen ) {
/* left candidate fits minimum requirement */
fend = bbno ;
} else {
goto fail ;
}
}
flen = fend - fbno ;
}
2017-02-08 01:06:57 +03:00
out :
2012-04-29 14:39:43 +04:00
2017-02-08 01:06:57 +03:00
if ( fbno ! = * bno | | flen ! = * len ) {
trace_xfs_extent_busy_trim ( args - > mp , args - > agno , * bno , * len ,
2012-04-29 14:39:43 +04:00
fbno , flen ) ;
2017-02-08 01:06:57 +03:00
* bno = fbno ;
* len = flen ;
* busy_gen = args - > pag - > pagb_gen ;
ret = true ;
2012-04-29 14:39:43 +04:00
}
2017-02-08 01:06:57 +03:00
spin_unlock ( & args - > pag - > pagb_lock ) ;
return ret ;
2012-04-29 14:39:43 +04:00
fail :
/*
* Return a zero extent length as failure indications . All callers
* re - check if the trimmed extent satisfies the minlen requirement .
*/
2017-02-08 01:06:57 +03:00
flen = 0 ;
goto out ;
2012-04-29 14:39:43 +04:00
}
2012-04-29 14:41:10 +04:00
STATIC void
xfs_extent_busy_clear_one (
2012-04-29 14:39:43 +04:00
struct xfs_mount * mp ,
struct xfs_perag * pag ,
2012-04-29 14:41:10 +04:00
struct xfs_extent_busy * busyp )
2012-04-29 14:39:43 +04:00
{
if ( busyp - > length ) {
2012-04-29 14:41:10 +04:00
trace_xfs_extent_busy_clear ( mp , busyp - > agno , busyp - > bno ,
2012-04-29 14:39:43 +04:00
busyp - > length ) ;
rb_erase ( & busyp - > rb_node , & pag - > pagb_tree ) ;
}
list_del_init ( & busyp - > list ) ;
kmem_free ( busyp ) ;
}
2017-02-08 01:06:57 +03:00
static void
xfs_extent_busy_put_pag (
struct xfs_perag * pag ,
bool wakeup )
__releases ( pag - > pagb_lock )
{
if ( wakeup ) {
pag - > pagb_gen + + ;
wake_up_all ( & pag - > pagb_wait ) ;
}
spin_unlock ( & pag - > pagb_lock ) ;
xfs_perag_put ( pag ) ;
}
2012-04-29 14:39:43 +04:00
/*
* Remove all extents on the passed in list from the busy extents tree .
* If do_discard is set skip extents that need to be discarded , and mark
* these as undergoing a discard operation instead .
*/
void
2012-04-29 14:41:10 +04:00
xfs_extent_busy_clear (
2012-04-29 14:39:43 +04:00
struct xfs_mount * mp ,
struct list_head * list ,
bool do_discard )
{
2012-04-29 14:41:10 +04:00
struct xfs_extent_busy * busyp , * n ;
2012-04-29 14:39:43 +04:00
struct xfs_perag * pag = NULL ;
xfs_agnumber_t agno = NULLAGNUMBER ;
2017-02-08 01:06:57 +03:00
bool wakeup = false ;
2012-04-29 14:39:43 +04:00
list_for_each_entry_safe ( busyp , n , list , list ) {
if ( busyp - > agno ! = agno ) {
2017-02-08 01:06:57 +03:00
if ( pag )
xfs_extent_busy_put_pag ( pag , wakeup ) ;
2012-04-29 14:39:43 +04:00
agno = busyp - > agno ;
2017-02-08 01:06:57 +03:00
pag = xfs_perag_get ( mp , agno ) ;
spin_lock ( & pag - > pagb_lock ) ;
wakeup = false ;
2012-04-29 14:39:43 +04:00
}
if ( do_discard & & busyp - > length & &
2017-02-08 01:06:57 +03:00
! ( busyp - > flags & XFS_EXTENT_BUSY_SKIP_DISCARD ) ) {
2012-04-29 14:41:10 +04:00
busyp - > flags = XFS_EXTENT_BUSY_DISCARDED ;
2017-02-08 01:06:57 +03:00
} else {
2012-04-29 14:41:10 +04:00
xfs_extent_busy_clear_one ( mp , pag , busyp ) ;
2017-02-08 01:06:57 +03:00
wakeup = true ;
}
2012-04-29 14:39:43 +04:00
}
2017-02-08 01:06:57 +03:00
if ( pag )
xfs_extent_busy_put_pag ( pag , wakeup ) ;
}
/*
* Flush out all busy extents for this AG .
*/
void
xfs_extent_busy_flush (
struct xfs_mount * mp ,
struct xfs_perag * pag ,
unsigned busy_gen )
{
DEFINE_WAIT ( wait ) ;
2018-03-14 09:15:27 +03:00
int error ;
2017-02-08 01:06:57 +03:00
2018-03-14 09:15:28 +03:00
error = xfs_log_force ( mp , XFS_LOG_SYNC ) ;
2017-02-08 01:06:57 +03:00
if ( error )
return ;
do {
prepare_to_wait ( & pag - > pagb_wait , & wait , TASK_KILLABLE ) ;
if ( busy_gen ! = READ_ONCE ( pag - > pagb_gen ) )
break ;
schedule ( ) ;
} while ( 1 ) ;
finish_wait ( & pag - > pagb_wait , & wait ) ;
}
void
xfs_extent_busy_wait_all (
struct xfs_mount * mp )
{
DEFINE_WAIT ( wait ) ;
xfs_agnumber_t agno ;
for ( agno = 0 ; agno < mp - > m_sb . sb_agcount ; agno + + ) {
struct xfs_perag * pag = xfs_perag_get ( mp , agno ) ;
do {
prepare_to_wait ( & pag - > pagb_wait , & wait , TASK_KILLABLE ) ;
if ( RB_EMPTY_ROOT ( & pag - > pagb_tree ) )
break ;
schedule ( ) ;
} while ( 1 ) ;
finish_wait ( & pag - > pagb_wait , & wait ) ;
2012-04-29 14:39:43 +04:00
xfs_perag_put ( pag ) ;
}
}
/*
* Callback for list_sort to sort busy extents by the AG they reside in .
*/
int
2012-04-29 14:41:10 +04:00
xfs_extent_busy_ag_cmp (
2012-04-29 14:39:43 +04:00
void * priv ,
2017-02-08 01:07:57 +03:00
struct list_head * l1 ,
struct list_head * l2 )
2012-04-29 14:39:43 +04:00
{
2017-02-08 01:07:57 +03:00
struct xfs_extent_busy * b1 =
container_of ( l1 , struct xfs_extent_busy , list ) ;
struct xfs_extent_busy * b2 =
container_of ( l2 , struct xfs_extent_busy , list ) ;
s32 diff ;
diff = b1 - > agno - b2 - > agno ;
if ( ! diff )
diff = b1 - > bno - b2 - > bno ;
return diff ;
2012-04-29 14:39:43 +04:00
}