2019-07-02 19:39:38 +03:00
// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Copyright ( C ) 2019 Oracle . All Rights Reserved .
* Author : Darrick J . Wong < darrick . wong @ oracle . com >
*/
# include "xfs.h"
# include "xfs_fs.h"
# include "xfs_shared.h"
# include "xfs_format.h"
# include "xfs_log_format.h"
# include "xfs_trans_resv.h"
# include "xfs_mount.h"
# include "xfs_inode.h"
# include "xfs_btree.h"
# include "xfs_ialloc.h"
# include "xfs_ialloc_btree.h"
# include "xfs_iwalk.h"
# include "xfs_error.h"
# include "xfs_trace.h"
# include "xfs_icache.h"
# include "xfs_health.h"
# include "xfs_trans.h"
2019-07-03 17:33:26 +03:00
# include "xfs_pwork.h"
2021-06-02 03:48:24 +03:00
# include "xfs_ag.h"
2019-07-02 19:39:38 +03:00
/*
* Walking Inodes in the Filesystem
* = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
*
* This iterator function walks a subset of filesystem inodes in increasing
* order from @ startino until there are no more inodes . For each allocated
* inode it finds , it calls a walk function with the relevant inode number and
* a pointer to caller - provided data . The walk function can return the usual
* negative error code to stop the iteration ; 0 to continue the iteration ; or
2019-08-29 00:37:57 +03:00
* - ECANCELED to stop the iteration . This return value is returned to the
2019-07-02 19:39:38 +03:00
* caller .
*
* Internally , we allow the walk function to do anything , which means that we
* cannot maintain the inobt cursor or our lock on the AGI buffer . We
* therefore cache the inobt records in kernel memory and only call the walk
* function when our memory buffer is full . @ nr_recs is the number of records
* that we ' ve cached , and @ sz_recs is the size of our cache .
*
* It is the responsibility of the walk function to ensure it accesses
* allocated inodes , as the inobt records may be stale by the time they are
* acted upon .
*/
struct xfs_iwalk_ag {
2019-07-03 17:33:26 +03:00
/* parallel work control data; will be null if single threaded */
struct xfs_pwork pwork ;
2019-07-02 19:39:38 +03:00
struct xfs_mount * mp ;
struct xfs_trans * tp ;
2021-06-02 03:48:24 +03:00
struct xfs_perag * pag ;
2019-07-02 19:39:38 +03:00
/* Where do we start the traversal? */
xfs_ino_t startino ;
2020-11-14 20:59:22 +03:00
/* What was the last inode number we saw when iterating the inobt? */
xfs_ino_t lastino ;
2019-07-02 19:39:38 +03:00
/* Array of inobt records we cache. */
struct xfs_inobt_rec_incore * recs ;
/* Number of entries allocated for the @recs array. */
unsigned int sz_recs ;
/* Number of entries in the @recs array that are in use. */
unsigned int nr_recs ;
/* Inode walk function and data pointer. */
xfs_iwalk_fn iwalk_fn ;
2019-07-02 19:39:43 +03:00
xfs_inobt_walk_fn inobt_walk_fn ;
2019-07-02 19:39:38 +03:00
void * data ;
2019-07-02 19:39:43 +03:00
/*
* Make it look like the inodes up to startino are free so that
* bulkstat can start its inode iteration at the correct place without
* needing to special case everywhere .
*/
unsigned int trim_start : 1 ;
/* Skip empty inobt records? */
unsigned int skip_empty : 1 ;
2021-08-06 21:05:43 +03:00
/* Drop the (hopefully empty) transaction when calling iwalk_fn. */
unsigned int drop_trans : 1 ;
2019-07-02 19:39:38 +03:00
} ;
2019-07-02 19:39:41 +03:00
/*
* Loop over all clusters in a chunk for a given incore inode allocation btree
* record . Do a readahead if there are any allocated inodes in that cluster .
*/
STATIC void
xfs_iwalk_ichunk_ra (
struct xfs_mount * mp ,
2021-06-02 03:48:24 +03:00
struct xfs_perag * pag ,
2019-07-02 19:39:41 +03:00
struct xfs_inobt_rec_incore * irec )
{
struct xfs_ino_geometry * igeo = M_IGEO ( mp ) ;
xfs_agblock_t agbno ;
struct blk_plug plug ;
int i ; /* inode chunk index */
agbno = XFS_AGINO_TO_AGBNO ( mp , irec - > ir_startino ) ;
blk_start_plug ( & plug ) ;
2019-07-02 19:39:42 +03:00
for ( i = 0 ; i < XFS_INODES_PER_CHUNK ; i + = igeo - > inodes_per_cluster ) {
xfs_inofree_t imask ;
imask = xfs_inobt_maskn ( i , igeo - > inodes_per_cluster ) ;
if ( imask & ~ irec - > ir_free ) {
2021-06-02 03:48:24 +03:00
xfs_btree_reada_bufs ( mp , pag - > pag_agno , agbno ,
2019-07-02 19:39:41 +03:00
igeo - > blocks_per_cluster ,
& xfs_inode_buf_ops ) ;
}
2019-07-02 19:39:42 +03:00
agbno + = igeo - > blocks_per_cluster ;
2019-07-02 19:39:41 +03:00
}
blk_finish_plug ( & plug ) ;
}
/*
2019-07-02 19:39:42 +03:00
* Set the bits in @ irec ' s free mask that correspond to the inodes before
* @ agino so that we skip them . This is how we restart an inode walk that was
* interrupted in the middle of an inode record .
2019-07-02 19:39:41 +03:00
*/
2019-07-02 19:39:42 +03:00
STATIC void
xfs_iwalk_adjust_start (
2019-07-02 19:39:41 +03:00
xfs_agino_t agino , /* starting inode of chunk */
struct xfs_inobt_rec_incore * irec ) /* btree record */
{
int idx ; /* index into inode chunk */
2019-07-02 19:39:41 +03:00
int i ;
2019-07-02 19:39:41 +03:00
2019-07-02 19:39:41 +03:00
idx = agino - irec - > ir_startino ;
2019-07-02 19:39:41 +03:00
2019-07-02 19:39:41 +03:00
/*
* We got a right chunk with some left inodes allocated at it . Grab
* the chunk record . Mark all the uninteresting inodes free because
* they ' re before our start point .
*/
for ( i = 0 ; i < idx ; i + + ) {
if ( XFS_INOBT_MASK ( i ) & ~ irec - > ir_free )
irec - > ir_freecount + + ;
2019-07-02 19:39:41 +03:00
}
2019-07-02 19:39:41 +03:00
irec - > ir_free | = xfs_inobt_maskn ( 0 , idx ) ;
2019-07-02 19:39:41 +03:00
}
2019-07-02 19:39:38 +03:00
/* Allocate memory for a walk. */
STATIC int
xfs_iwalk_alloc (
struct xfs_iwalk_ag * iwag )
{
size_t size ;
ASSERT ( iwag - > recs = = NULL ) ;
iwag - > nr_recs = 0 ;
/* Allocate a prefetch buffer for inobt records. */
size = iwag - > sz_recs * sizeof ( struct xfs_inobt_rec_incore ) ;
iwag - > recs = kmem_alloc ( size , KM_MAYFAIL ) ;
if ( iwag - > recs = = NULL )
return - ENOMEM ;
return 0 ;
}
/* Free memory we allocated for a walk. */
STATIC void
xfs_iwalk_free (
struct xfs_iwalk_ag * iwag )
{
kmem_free ( iwag - > recs ) ;
iwag - > recs = NULL ;
}
/* For each inuse inode in each cached inobt record, call our function. */
STATIC int
xfs_iwalk_ag_recs (
2021-06-02 03:48:24 +03:00
struct xfs_iwalk_ag * iwag )
2019-07-02 19:39:38 +03:00
{
2021-06-02 03:48:24 +03:00
struct xfs_mount * mp = iwag - > mp ;
struct xfs_trans * tp = iwag - > tp ;
struct xfs_perag * pag = iwag - > pag ;
xfs_ino_t ino ;
unsigned int i , j ;
int error ;
2019-07-02 19:39:38 +03:00
for ( i = 0 ; i < iwag - > nr_recs ; i + + ) {
struct xfs_inobt_rec_incore * irec = & iwag - > recs [ i ] ;
2021-06-02 03:48:24 +03:00
trace_xfs_iwalk_ag_rec ( mp , pag - > pag_agno , irec ) ;
2019-07-02 19:39:38 +03:00
2019-07-03 17:33:26 +03:00
if ( xfs_pwork_want_abort ( & iwag - > pwork ) )
return 0 ;
2019-07-02 19:39:43 +03:00
if ( iwag - > inobt_walk_fn ) {
2021-06-02 03:48:24 +03:00
error = iwag - > inobt_walk_fn ( mp , tp , pag - > pag_agno , irec ,
2019-07-02 19:39:43 +03:00
iwag - > data ) ;
if ( error )
return error ;
}
if ( ! iwag - > iwalk_fn )
continue ;
2019-07-02 19:39:38 +03:00
for ( j = 0 ; j < XFS_INODES_PER_CHUNK ; j + + ) {
2019-07-03 17:33:26 +03:00
if ( xfs_pwork_want_abort ( & iwag - > pwork ) )
return 0 ;
2019-07-02 19:39:38 +03:00
/* Skip if this inode is free */
if ( XFS_INOBT_MASK ( j ) & irec - > ir_free )
continue ;
/* Otherwise call our function. */
2021-06-02 03:48:24 +03:00
ino = XFS_AGINO_TO_INO ( mp , pag - > pag_agno ,
irec - > ir_startino + j ) ;
2019-07-02 19:39:38 +03:00
error = iwag - > iwalk_fn ( mp , tp , ino , iwag - > data ) ;
if ( error )
return error ;
}
}
return 0 ;
}
/* Delete cursor and let go of AGI. */
static inline void
xfs_iwalk_del_inobt (
struct xfs_trans * tp ,
struct xfs_btree_cur * * curpp ,
struct xfs_buf * * agi_bpp ,
int error )
{
if ( * curpp ) {
xfs_btree_del_cursor ( * curpp , error ) ;
* curpp = NULL ;
}
if ( * agi_bpp ) {
xfs_trans_brelse ( tp , * agi_bpp ) ;
* agi_bpp = NULL ;
}
}
/*
* Set ourselves up for walking inobt records starting from a given point in
* the filesystem .
*
* If caller passed in a nonzero start inode number , load the record from the
* inobt and make the record look like all the inodes before agino are free so
* that we skip them , and then move the cursor to the next inobt record . This
* is how we support starting an iwalk in the middle of an inode chunk .
*
* If the caller passed in a start number of zero , move the cursor to the first
* inobt record .
*
* The caller is responsible for cleaning up the cursor and buffer pointer
* regardless of the error status .
*/
STATIC int
xfs_iwalk_ag_start (
struct xfs_iwalk_ag * iwag ,
xfs_agino_t agino ,
struct xfs_btree_cur * * curpp ,
struct xfs_buf * * agi_bpp ,
int * has_more )
{
struct xfs_mount * mp = iwag - > mp ;
struct xfs_trans * tp = iwag - > tp ;
2021-06-02 03:48:24 +03:00
struct xfs_perag * pag = iwag - > pag ;
2019-07-02 19:39:42 +03:00
struct xfs_inobt_rec_incore * irec ;
2019-07-02 19:39:38 +03:00
int error ;
/* Set up a fresh cursor and empty the inobt cache. */
iwag - > nr_recs = 0 ;
2021-06-02 03:48:24 +03:00
error = xfs_inobt_cur ( mp , tp , pag , XFS_BTNUM_INO , curpp , agi_bpp ) ;
2019-07-02 19:39:38 +03:00
if ( error )
return error ;
/* Starting at the beginning of the AG? That's easy! */
if ( agino = = 0 )
return xfs_inobt_lookup ( * curpp , 0 , XFS_LOOKUP_GE , has_more ) ;
/*
* Otherwise , we have to grab the inobt record where we left off , stuff
* the record into our cache , and then see if there are more records .
2019-07-02 19:39:42 +03:00
* We require a lookup cache of at least two elements so that the
* caller doesn ' t have to deal with tearing down the cursor to walk the
* records .
2019-07-02 19:39:38 +03:00
*/
2019-07-02 19:39:42 +03:00
error = xfs_inobt_lookup ( * curpp , agino , XFS_LOOKUP_LE , has_more ) ;
2019-07-02 19:39:38 +03:00
if ( error )
return error ;
2019-07-02 19:39:42 +03:00
/*
* If the LE lookup at @ agino yields no records , jump ahead to the
* inobt cursor increment to see if there are more records to process .
*/
if ( ! * has_more )
goto out_advance ;
/* Get the record, should always work */
irec = & iwag - > recs [ iwag - > nr_recs ] ;
error = xfs_inobt_get_rec ( * curpp , irec , has_more ) ;
if ( error )
return error ;
xfs: kill the XFS_WANT_CORRUPT_* macros
The XFS_WANT_CORRUPT_* macros conceal subtle side effects such as the
creation of local variables and redirections of the code flow. This is
pretty ugly, so replace them with explicit XFS_IS_CORRUPT tests that
remove both of those ugly points. The change was performed with the
following coccinelle script:
@@
expression mp, test;
identifier label;
@@
- XFS_WANT_CORRUPTED_GOTO(mp, test, label);
+ if (XFS_IS_CORRUPT(mp, !test)) { error = -EFSCORRUPTED; goto label; }
@@
expression mp, test;
@@
- XFS_WANT_CORRUPTED_RETURN(mp, test);
+ if (XFS_IS_CORRUPT(mp, !test)) return -EFSCORRUPTED;
@@
expression mp, lval, rval;
@@
- XFS_IS_CORRUPT(mp, !(lval == rval))
+ XFS_IS_CORRUPT(mp, lval != rval)
@@
expression mp, e1, e2;
@@
- XFS_IS_CORRUPT(mp, !(e1 && e2))
+ XFS_IS_CORRUPT(mp, !e1 || !e2)
@@
expression e1, e2;
@@
- !(e1 == e2)
+ e1 != e2
@@
expression e1, e2, e3, e4, e5, e6;
@@
- !(e1 == e2 && e3 == e4) || e5 != e6
+ e1 != e2 || e3 != e4 || e5 != e6
@@
expression e1, e2, e3, e4, e5, e6;
@@
- !(e1 == e2 || (e3 <= e4 && e5 <= e6))
+ e1 != e2 && (e3 > e4 || e5 > e6)
@@
expression mp, e1, e2;
@@
- XFS_IS_CORRUPT(mp, !(e1 <= e2))
+ XFS_IS_CORRUPT(mp, e1 > e2)
@@
expression mp, e1, e2;
@@
- XFS_IS_CORRUPT(mp, !(e1 < e2))
+ XFS_IS_CORRUPT(mp, e1 >= e2)
@@
expression mp, e1;
@@
- XFS_IS_CORRUPT(mp, !!e1)
+ XFS_IS_CORRUPT(mp, e1)
@@
expression mp, e1, e2;
@@
- XFS_IS_CORRUPT(mp, !(e1 || e2))
+ XFS_IS_CORRUPT(mp, !e1 && !e2)
@@
expression mp, e1, e2, e3, e4;
@@
- XFS_IS_CORRUPT(mp, !(e1 == e2) && !(e3 == e4))
+ XFS_IS_CORRUPT(mp, e1 != e2 && e3 != e4)
@@
expression mp, e1, e2, e3, e4;
@@
- XFS_IS_CORRUPT(mp, !(e1 <= e2) || !(e3 >= e4))
+ XFS_IS_CORRUPT(mp, e1 > e2 || e3 < e4)
@@
expression mp, e1, e2, e3, e4;
@@
- XFS_IS_CORRUPT(mp, !(e1 == e2) && !(e3 <= e4))
+ XFS_IS_CORRUPT(mp, e1 != e2 && e3 > e4)
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
2019-11-11 23:52:18 +03:00
if ( XFS_IS_CORRUPT ( mp , * has_more ! = 1 ) )
return - EFSCORRUPTED ;
2019-07-02 19:39:42 +03:00
2021-06-02 03:48:24 +03:00
iwag - > lastino = XFS_AGINO_TO_INO ( mp , pag - > pag_agno ,
2020-11-14 20:59:22 +03:00
irec - > ir_startino + XFS_INODES_PER_CHUNK - 1 ) ;
2019-07-02 19:39:42 +03:00
/*
* If the LE lookup yielded an inobt record before the cursor position ,
* skip it and see if there ' s another one after it .
*/
if ( irec - > ir_startino + XFS_INODES_PER_CHUNK < = agino )
goto out_advance ;
/*
* If agino fell in the middle of the inode record , make it look like
* the inodes up to agino are free so that we don ' t return them again .
*/
2019-07-02 19:39:43 +03:00
if ( iwag - > trim_start )
xfs_iwalk_adjust_start ( agino , irec ) ;
2019-07-02 19:39:38 +03:00
/*
* The prefetch calculation is supposed to give us a large enough inobt
* record cache that grab_ichunk can stage a partial first record and
* the loop body can cache a record without having to check for cache
* space until after it reads an inobt record .
*/
2019-07-02 19:39:42 +03:00
iwag - > nr_recs + + ;
2019-07-02 19:39:38 +03:00
ASSERT ( iwag - > nr_recs < iwag - > sz_recs ) ;
2019-07-02 19:39:42 +03:00
out_advance :
2019-07-02 19:39:38 +03:00
return xfs_btree_increment ( * curpp , 0 , has_more ) ;
}
/*
* The inobt record cache is full , so preserve the inobt cursor state and
* run callbacks on the cached inobt records . When we ' re done , restore the
* cursor state to wherever the cursor would have been had the cache not been
* full ( and therefore we could ' ve just incremented the cursor ) if * @ has_more
* is true . On exit , * @ has_more will indicate whether or not the caller should
* try for more inode records .
*/
STATIC int
xfs_iwalk_run_callbacks (
struct xfs_iwalk_ag * iwag ,
struct xfs_btree_cur * * curpp ,
struct xfs_buf * * agi_bpp ,
int * has_more )
{
struct xfs_mount * mp = iwag - > mp ;
struct xfs_inobt_rec_incore * irec ;
2020-11-14 20:59:22 +03:00
xfs_agino_t next_agino ;
2019-07-02 19:39:38 +03:00
int error ;
2020-11-14 20:59:22 +03:00
next_agino = XFS_INO_TO_AGINO ( mp , iwag - > lastino ) + 1 ;
2019-07-02 19:39:38 +03:00
ASSERT ( iwag - > nr_recs > 0 ) ;
/* Delete cursor but remember the last record we cached... */
2021-08-06 21:05:43 +03:00
xfs_iwalk_del_inobt ( iwag - > tp , curpp , agi_bpp , 0 ) ;
2019-07-02 19:39:38 +03:00
irec = & iwag - > recs [ iwag - > nr_recs - 1 ] ;
2020-12-07 21:41:46 +03:00
ASSERT ( next_agino > = irec - > ir_startino + XFS_INODES_PER_CHUNK ) ;
2019-07-02 19:39:38 +03:00
2021-08-06 21:05:43 +03:00
if ( iwag - > drop_trans ) {
xfs_trans_cancel ( iwag - > tp ) ;
iwag - > tp = NULL ;
}
2019-07-02 19:39:38 +03:00
error = xfs_iwalk_ag_recs ( iwag ) ;
if ( error )
return error ;
/* ...empty the cache... */
iwag - > nr_recs = 0 ;
if ( ! has_more )
return 0 ;
2021-08-06 21:05:43 +03:00
if ( iwag - > drop_trans ) {
error = xfs_trans_alloc_empty ( mp , & iwag - > tp ) ;
if ( error )
return error ;
}
2019-07-02 19:39:38 +03:00
/* ...and recreate the cursor just past where we left off. */
2021-08-06 21:05:43 +03:00
error = xfs_inobt_cur ( mp , iwag - > tp , iwag - > pag , XFS_BTNUM_INO , curpp ,
agi_bpp ) ;
2019-07-02 19:39:38 +03:00
if ( error )
return error ;
2020-11-14 20:59:22 +03:00
return xfs_inobt_lookup ( * curpp , next_agino , XFS_LOOKUP_GE , has_more ) ;
2019-07-02 19:39:38 +03:00
}
/* Walk all inodes in a single AG, from @iwag->startino to the end of the AG. */
STATIC int
xfs_iwalk_ag (
struct xfs_iwalk_ag * iwag )
{
struct xfs_mount * mp = iwag - > mp ;
2021-06-02 03:48:24 +03:00
struct xfs_perag * pag = iwag - > pag ;
2019-07-02 19:39:38 +03:00
struct xfs_buf * agi_bp = NULL ;
struct xfs_btree_cur * cur = NULL ;
xfs_agino_t agino ;
int has_more ;
int error = 0 ;
/* Set up our cursor at the right place in the inode btree. */
2021-06-02 03:48:24 +03:00
ASSERT ( pag - > pag_agno = = XFS_INO_TO_AGNO ( mp , iwag - > startino ) ) ;
2019-07-02 19:39:38 +03:00
agino = XFS_INO_TO_AGINO ( mp , iwag - > startino ) ;
2021-06-02 03:48:24 +03:00
error = xfs_iwalk_ag_start ( iwag , agino , & cur , & agi_bp , & has_more ) ;
2019-07-02 19:39:38 +03:00
while ( ! error & & has_more ) {
struct xfs_inobt_rec_incore * irec ;
2020-11-14 20:59:22 +03:00
xfs_ino_t rec_fsino ;
2019-07-02 19:39:38 +03:00
cond_resched ( ) ;
2019-07-03 17:33:26 +03:00
if ( xfs_pwork_want_abort ( & iwag - > pwork ) )
goto out ;
2019-07-02 19:39:38 +03:00
/* Fetch the inobt record. */
irec = & iwag - > recs [ iwag - > nr_recs ] ;
error = xfs_inobt_get_rec ( cur , irec , & has_more ) ;
if ( error | | ! has_more )
break ;
2020-11-14 20:59:22 +03:00
/* Make sure that we always move forward. */
2021-06-02 03:48:24 +03:00
rec_fsino = XFS_AGINO_TO_INO ( mp , pag - > pag_agno , irec - > ir_startino ) ;
2020-11-14 20:59:22 +03:00
if ( iwag - > lastino ! = NULLFSINO & &
XFS_IS_CORRUPT ( mp , iwag - > lastino > = rec_fsino ) ) {
error = - EFSCORRUPTED ;
goto out ;
}
iwag - > lastino = rec_fsino + XFS_INODES_PER_CHUNK - 1 ;
2019-07-02 19:39:38 +03:00
/* No allocated inodes in this chunk; skip it. */
2019-07-02 19:39:43 +03:00
if ( iwag - > skip_empty & & irec - > ir_freecount = = irec - > ir_count ) {
2019-07-02 19:39:38 +03:00
error = xfs_btree_increment ( cur , 0 , & has_more ) ;
if ( error )
break ;
continue ;
}
/*
* Start readahead for this inode chunk in anticipation of
* walking the inodes .
*/
2019-07-02 19:39:43 +03:00
if ( iwag - > iwalk_fn )
2021-06-02 03:48:24 +03:00
xfs_iwalk_ichunk_ra ( mp , pag , irec ) ;
2019-07-02 19:39:38 +03:00
/*
* If there ' s space in the buffer for more records , increment
* the btree cursor and grab more .
*/
if ( + + iwag - > nr_recs < iwag - > sz_recs ) {
error = xfs_btree_increment ( cur , 0 , & has_more ) ;
if ( error | | ! has_more )
break ;
continue ;
}
/*
* Otherwise , we need to save cursor state and run the callback
* function on the cached records . The run_callbacks function
* is supposed to return a cursor pointing to the record where
* we would be if we had been able to increment like above .
*/
ASSERT ( has_more ) ;
2021-06-02 03:48:24 +03:00
error = xfs_iwalk_run_callbacks ( iwag , & cur , & agi_bp , & has_more ) ;
2019-07-02 19:39:38 +03:00
}
if ( iwag - > nr_recs = = 0 | | error )
goto out ;
/* Walk the unprocessed records in the cache. */
2021-06-02 03:48:24 +03:00
error = xfs_iwalk_run_callbacks ( iwag , & cur , & agi_bp , & has_more ) ;
2019-07-02 19:39:38 +03:00
out :
2021-08-06 21:05:43 +03:00
xfs_iwalk_del_inobt ( iwag - > tp , & cur , & agi_bp , error ) ;
2019-07-02 19:39:38 +03:00
return error ;
}
2019-07-02 19:39:40 +03:00
/*
* We experimentally determined that the reduction in ioctl call overhead
* diminishes when userspace asks for more than 2048 inodes , so we ' ll cap
* prefetch at this point .
*/
# define IWALK_MAX_INODE_PREFETCH (2048U)
2019-07-02 19:39:38 +03:00
/*
* Given the number of inodes to prefetch , set the number of inobt records that
* we cache in memory , which controls the number of inodes we try to read
2019-07-02 19:39:40 +03:00
* ahead . Set the maximum if @ inodes = = 0.
2019-07-02 19:39:38 +03:00
*/
static inline unsigned int
xfs_iwalk_prefetch (
2019-07-02 19:39:40 +03:00
unsigned int inodes )
2019-07-02 19:39:38 +03:00
{
2019-07-02 19:39:40 +03:00
unsigned int inobt_records ;
/*
* If the caller didn ' t tell us the number of inodes they wanted ,
* assume the maximum prefetch possible for best performance .
* Otherwise , cap prefetch at that maximum so that we don ' t start an
* absurd amount of prefetch .
*/
if ( inodes = = 0 )
inodes = IWALK_MAX_INODE_PREFETCH ;
inodes = min ( inodes , IWALK_MAX_INODE_PREFETCH ) ;
/* Round the inode count up to a full chunk. */
inodes = round_up ( inodes , XFS_INODES_PER_CHUNK ) ;
/*
* In order to convert the number of inodes to prefetch into an
* estimate of the number of inobt records to cache , we require a
* conversion factor that reflects our expectations of the average
* loading factor of an inode chunk . Based on data gathered , most
* ( but not all ) filesystems manage to keep the inode chunks totally
* full , so we ' ll underestimate slightly so that our readahead will
* still deliver the performance we want on aging filesystems :
*
* inobt = inodes / ( INODES_PER_CHUNK * ( 4 / 5 ) ) ;
*
* The funny math is to avoid integer division .
*/
inobt_records = ( inodes * 5 ) / ( 4 * XFS_INODES_PER_CHUNK ) ;
/*
* Allocate enough space to prefetch at least two inobt records so that
* we can cache both the record where the iwalk started and the next
* record . This simplifies the AG inode walk loop setup code .
*/
return max ( inobt_records , 2U ) ;
2019-07-02 19:39:38 +03:00
}
/*
* Walk all inodes in the filesystem starting from @ startino . The @ iwalk_fn
* will be called for each allocated inode , being passed the inode ' s number and
* @ data . @ max_prefetch controls how many inobt records ' worth of inodes we
* try to readahead .
*/
int
xfs_iwalk (
struct xfs_mount * mp ,
struct xfs_trans * tp ,
xfs_ino_t startino ,
2019-07-04 06:36:28 +03:00
unsigned int flags ,
2019-07-02 19:39:38 +03:00
xfs_iwalk_fn iwalk_fn ,
unsigned int inode_records ,
void * data )
{
struct xfs_iwalk_ag iwag = {
. mp = mp ,
. tp = tp ,
. iwalk_fn = iwalk_fn ,
. data = data ,
. startino = startino ,
. sz_recs = xfs_iwalk_prefetch ( inode_records ) ,
2019-07-02 19:39:43 +03:00
. trim_start = 1 ,
. skip_empty = 1 ,
2019-07-03 17:33:26 +03:00
. pwork = XFS_PWORK_SINGLE_THREADED ,
2020-11-14 20:59:22 +03:00
. lastino = NULLFSINO ,
2019-07-02 19:39:43 +03:00
} ;
2021-06-02 03:48:24 +03:00
struct xfs_perag * pag ;
2019-07-02 19:39:43 +03:00
xfs_agnumber_t agno = XFS_INO_TO_AGNO ( mp , startino ) ;
int error ;
ASSERT ( agno < mp - > m_sb . sb_agcount ) ;
2019-07-04 06:36:28 +03:00
ASSERT ( ! ( flags & ~ XFS_IWALK_FLAGS_ALL ) ) ;
2019-07-02 19:39:43 +03:00
error = xfs_iwalk_alloc ( & iwag ) ;
if ( error )
return error ;
2021-06-02 03:48:24 +03:00
for_each_perag_from ( mp , agno , pag ) {
iwag . pag = pag ;
2019-07-02 19:39:43 +03:00
error = xfs_iwalk_ag ( & iwag ) ;
if ( error )
break ;
iwag . startino = XFS_AGINO_TO_INO ( mp , agno + 1 , 0 ) ;
2019-07-04 06:36:28 +03:00
if ( flags & XFS_INOBT_WALK_SAME_AG )
break ;
2021-06-02 03:48:24 +03:00
iwag . pag = NULL ;
2019-07-02 19:39:43 +03:00
}
2021-06-02 03:48:24 +03:00
if ( iwag . pag )
xfs_perag_put ( pag ) ;
2019-07-02 19:39:43 +03:00
xfs_iwalk_free ( & iwag ) ;
return error ;
}
2019-07-03 17:33:26 +03:00
/* Run per-thread iwalk work. */
static int
xfs_iwalk_ag_work (
struct xfs_mount * mp ,
struct xfs_pwork * pwork )
{
struct xfs_iwalk_ag * iwag ;
int error = 0 ;
iwag = container_of ( pwork , struct xfs_iwalk_ag , pwork ) ;
if ( xfs_pwork_want_abort ( pwork ) )
goto out ;
error = xfs_iwalk_alloc ( iwag ) ;
if ( error )
goto out ;
2021-08-06 21:05:43 +03:00
/*
* Grab an empty transaction so that we can use its recursive buffer
* locking abilities to detect cycles in the inobt without deadlocking .
*/
error = xfs_trans_alloc_empty ( mp , & iwag - > tp ) ;
if ( error )
goto out ;
iwag - > drop_trans = 1 ;
2019-07-03 17:33:26 +03:00
error = xfs_iwalk_ag ( iwag ) ;
2021-08-06 21:05:43 +03:00
if ( iwag - > tp )
xfs_trans_cancel ( iwag - > tp ) ;
2019-07-03 17:33:26 +03:00
xfs_iwalk_free ( iwag ) ;
out :
2021-06-02 03:48:24 +03:00
xfs_perag_put ( iwag - > pag ) ;
2019-07-03 17:33:26 +03:00
kmem_free ( iwag ) ;
return error ;
}
/*
* Walk all the inodes in the filesystem using multiple threads to process each
* AG .
*/
int
xfs_iwalk_threaded (
struct xfs_mount * mp ,
xfs_ino_t startino ,
2019-07-04 06:36:28 +03:00
unsigned int flags ,
2019-07-03 17:33:26 +03:00
xfs_iwalk_fn iwalk_fn ,
unsigned int inode_records ,
2019-07-03 17:33:27 +03:00
bool polled ,
2019-07-03 17:33:26 +03:00
void * data )
{
struct xfs_pwork_ctl pctl ;
2021-06-02 03:48:24 +03:00
struct xfs_perag * pag ;
2019-07-03 17:33:26 +03:00
xfs_agnumber_t agno = XFS_INO_TO_AGNO ( mp , startino ) ;
int error ;
ASSERT ( agno < mp - > m_sb . sb_agcount ) ;
2019-07-04 06:36:28 +03:00
ASSERT ( ! ( flags & ~ XFS_IWALK_FLAGS_ALL ) ) ;
2019-07-03 17:33:26 +03:00
2021-01-23 03:48:41 +03:00
error = xfs_pwork_init ( mp , & pctl , xfs_iwalk_ag_work , " xfs_iwalk " ) ;
2019-07-03 17:33:26 +03:00
if ( error )
return error ;
2021-06-02 03:48:24 +03:00
for_each_perag_from ( mp , agno , pag ) {
2019-07-03 17:33:26 +03:00
struct xfs_iwalk_ag * iwag ;
if ( xfs_pwork_ctl_want_abort ( & pctl ) )
break ;
2019-08-26 22:06:22 +03:00
iwag = kmem_zalloc ( sizeof ( struct xfs_iwalk_ag ) , 0 ) ;
2019-07-03 17:33:26 +03:00
iwag - > mp = mp ;
2021-06-02 03:48:24 +03:00
/*
* perag is being handed off to async work , so take another
* reference for the async work to release .
*/
atomic_inc ( & pag - > pag_ref ) ;
iwag - > pag = pag ;
2019-07-03 17:33:26 +03:00
iwag - > iwalk_fn = iwalk_fn ;
iwag - > data = data ;
iwag - > startino = startino ;
iwag - > sz_recs = xfs_iwalk_prefetch ( inode_records ) ;
2020-11-14 20:59:22 +03:00
iwag - > lastino = NULLFSINO ;
2019-07-03 17:33:26 +03:00
xfs_pwork_queue ( & pctl , & iwag - > pwork ) ;
2021-06-02 03:48:24 +03:00
startino = XFS_AGINO_TO_INO ( mp , pag - > pag_agno + 1 , 0 ) ;
2019-07-04 06:36:28 +03:00
if ( flags & XFS_INOBT_WALK_SAME_AG )
break ;
2019-07-03 17:33:26 +03:00
}
2021-06-02 03:48:24 +03:00
if ( pag )
xfs_perag_put ( pag ) ;
2019-07-03 17:33:27 +03:00
if ( polled )
xfs_pwork_poll ( & pctl ) ;
2019-07-03 17:33:26 +03:00
return xfs_pwork_destroy ( & pctl ) ;
}
2019-07-02 19:39:43 +03:00
/*
* Allow callers to cache up to a page ' s worth of inobt records . This reflects
* the existing inumbers prefetching behavior . Since the inobt walk does not
* itself do anything with the inobt records , we can set a fairly high limit
* here .
*/
# define MAX_INOBT_WALK_PREFETCH \
( PAGE_SIZE / sizeof ( struct xfs_inobt_rec_incore ) )
/*
* Given the number of records that the user wanted , set the number of inobt
* records that we buffer in memory . Set the maximum if @ inobt_records = = 0.
*/
static inline unsigned int
xfs_inobt_walk_prefetch (
unsigned int inobt_records )
{
/*
* If the caller didn ' t tell us the number of inobt records they
* wanted , assume the maximum prefetch possible for best performance .
*/
if ( inobt_records = = 0 )
inobt_records = MAX_INOBT_WALK_PREFETCH ;
/*
* Allocate enough space to prefetch at least two inobt records so that
* we can cache both the record where the iwalk started and the next
* record . This simplifies the AG inode walk loop setup code .
*/
inobt_records = max ( inobt_records , 2U ) ;
/*
* Cap prefetch at that maximum so that we don ' t use an absurd amount
* of memory .
*/
return min_t ( unsigned int , inobt_records , MAX_INOBT_WALK_PREFETCH ) ;
}
/*
* Walk all inode btree records in the filesystem starting from @ startino . The
* @ inobt_walk_fn will be called for each btree record , being passed the incore
* record and @ data . @ max_prefetch controls how many inobt records we try to
* cache ahead of time .
*/
int
xfs_inobt_walk (
struct xfs_mount * mp ,
struct xfs_trans * tp ,
xfs_ino_t startino ,
2019-07-04 06:36:28 +03:00
unsigned int flags ,
2019-07-02 19:39:43 +03:00
xfs_inobt_walk_fn inobt_walk_fn ,
unsigned int inobt_records ,
void * data )
{
struct xfs_iwalk_ag iwag = {
. mp = mp ,
. tp = tp ,
. inobt_walk_fn = inobt_walk_fn ,
. data = data ,
. startino = startino ,
. sz_recs = xfs_inobt_walk_prefetch ( inobt_records ) ,
2019-07-03 17:33:26 +03:00
. pwork = XFS_PWORK_SINGLE_THREADED ,
2020-11-14 20:59:22 +03:00
. lastino = NULLFSINO ,
2019-07-02 19:39:38 +03:00
} ;
2021-06-02 03:48:24 +03:00
struct xfs_perag * pag ;
2019-07-02 19:39:38 +03:00
xfs_agnumber_t agno = XFS_INO_TO_AGNO ( mp , startino ) ;
int error ;
ASSERT ( agno < mp - > m_sb . sb_agcount ) ;
2019-07-04 06:36:28 +03:00
ASSERT ( ! ( flags & ~ XFS_INOBT_WALK_FLAGS_ALL ) ) ;
2019-07-02 19:39:38 +03:00
error = xfs_iwalk_alloc ( & iwag ) ;
if ( error )
return error ;
2021-06-02 03:48:24 +03:00
for_each_perag_from ( mp , agno , pag ) {
iwag . pag = pag ;
2019-07-02 19:39:38 +03:00
error = xfs_iwalk_ag ( & iwag ) ;
if ( error )
break ;
2021-06-02 03:48:24 +03:00
iwag . startino = XFS_AGINO_TO_INO ( mp , pag - > pag_agno + 1 , 0 ) ;
2019-07-04 06:36:28 +03:00
if ( flags & XFS_INOBT_WALK_SAME_AG )
break ;
2021-06-02 03:48:24 +03:00
iwag . pag = NULL ;
2019-07-02 19:39:38 +03:00
}
2021-06-02 03:48:24 +03:00
if ( iwag . pag )
xfs_perag_put ( pag ) ;
2019-07-02 19:39:38 +03:00
xfs_iwalk_free ( & iwag ) ;
return error ;
}