xfs: iscan batching should handle unallocated inodes too

The inode scanner tries to reduce contention on the AGI header buffer
lock by grabbing references to consecutive allocated inodes.  Batching
stops as soon as we encounter an unallocated inode.  This is unfortunate
because in the worst case performance collapses to the old "one at a
time" behavior if every other inode is free.

This is correct behavior, but we could do better.  Unallocated inodes by
definition have nothing to scan, which means the iscan can ignore them
as long as someone ensures that the scan data will reflect another
thread allocating the inode and adding interesting metadata to that
inode.  That mechanism is, of course, the live update hooks.

Therefore, extend the batching mechanism to track unallocated inodes
adjacent to the scan cursor.  The _want_live_update predicate can tell
the caller's live update hook to incorporate all live updates to what
the scanner thinks is an unallocated inode if (after dropping the AGI)
some other thread allocates one of those inodes and begins using it.

Note that we cannot just copy the ir_free bitmap into the scan cursor
because the batching stops if iget says the inode is in an intermediate
state (e.g. on the inactivation list) and cannot be igrabbed.

Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
This commit is contained in:
Darrick J. Wong 2024-02-22 12:30:48 -08:00
parent a7a686cb07
commit 82334a79c6
3 changed files with 119 additions and 15 deletions

View File

@ -61,7 +61,8 @@ xchk_iscan_find_next(
struct xfs_buf *agi_bp,
struct xfs_perag *pag,
xfs_inofree_t *allocmaskp,
xfs_agino_t *cursor)
xfs_agino_t *cursor,
uint8_t *nr_inodesp)
{
struct xfs_scrub *sc = iscan->sc;
struct xfs_inobt_rec_incore rec;
@ -147,6 +148,7 @@ xchk_iscan_find_next(
ASSERT(next >= 0);
*cursor = rec.ir_startino + next;
*allocmaskp = allocmask >> next;
*nr_inodesp = XFS_INODES_PER_CHUNK - next;
break;
}
}
@ -228,7 +230,8 @@ xchk_iscan_advance(
struct xchk_iscan *iscan,
struct xfs_perag **pagp,
struct xfs_buf **agi_bpp,
xfs_inofree_t *allocmaskp)
xfs_inofree_t *allocmaskp,
uint8_t *nr_inodesp)
{
struct xfs_scrub *sc = iscan->sc;
struct xfs_mount *mp = sc->mp;
@ -255,7 +258,7 @@ xchk_iscan_advance(
agino = XFS_INO_TO_AGINO(mp, iscan->cursor_ino);
ret = xchk_iscan_find_next(iscan, agi_bp, pag, allocmaskp,
&agino);
&agino, nr_inodesp);
if (ret)
goto out_buf;
@ -344,12 +347,14 @@ xchk_iscan_iget(
struct xchk_iscan *iscan,
struct xfs_perag *pag,
struct xfs_buf *agi_bp,
xfs_inofree_t allocmask)
xfs_inofree_t allocmask,
uint8_t nr_inodes)
{
struct xfs_scrub *sc = iscan->sc;
struct xfs_mount *mp = sc->mp;
xfs_ino_t ino = iscan->cursor_ino;
unsigned int idx = 0;
unsigned int i;
int error;
ASSERT(iscan->__inodes[0] == NULL);
@ -399,10 +404,28 @@ xchk_iscan_iget(
/*
* Now that we've filled the first slot in __inodes, try to fill the
* rest of the batch with consecutively ordered inodes. to reduce the
* number of _iter calls. If we can't get an inode, we stop and return
* what we have.
* number of _iter calls. Make a bitmap of unallocated inodes from the
* zeroes in the inuse bitmap; these inodes will not be scanned, but
* the _want_live_update predicate will pass through all live updates.
*
* If we can't iget an allocated inode, stop and return what we have.
*/
for (; allocmask & 1; allocmask >>= 1, ino++, idx++) {
mutex_lock(&iscan->lock);
iscan->__batch_ino = ino - 1;
iscan->__skipped_inomask = 0;
mutex_unlock(&iscan->lock);
for (i = 1; i < nr_inodes; i++, ino++, allocmask >>= 1) {
if (!(allocmask & 1)) {
ASSERT(!(iscan->__skipped_inomask & (1ULL << i)));
mutex_lock(&iscan->lock);
iscan->cursor_ino = ino;
iscan->__skipped_inomask |= (1ULL << i);
mutex_unlock(&iscan->lock);
continue;
}
ASSERT(iscan->__inodes[idx] == NULL);
error = xfs_iget(sc->mp, sc->tp, ino, XFS_IGET_NORETRY, 0,
@ -413,14 +436,42 @@ xchk_iscan_iget(
mutex_lock(&iscan->lock);
iscan->cursor_ino = ino;
mutex_unlock(&iscan->lock);
idx++;
}
trace_xchk_iscan_iget_batch(sc->mp, iscan, idx);
trace_xchk_iscan_iget_batch(sc->mp, iscan, nr_inodes, idx);
xfs_trans_brelse(sc->tp, agi_bp);
xfs_perag_put(pag);
return idx;
}
/*
* Advance the visit cursor to reflect skipped inodes beyond whatever we
* scanned.
*/
STATIC void
xchk_iscan_finish_batch(
struct xchk_iscan *iscan)
{
xfs_ino_t highest_skipped;
mutex_lock(&iscan->lock);
if (iscan->__batch_ino != NULLFSINO) {
highest_skipped = iscan->__batch_ino +
xfs_highbit64(iscan->__skipped_inomask);
iscan->__visited_ino = max(iscan->__visited_ino,
highest_skipped);
trace_xchk_iscan_skip(iscan);
}
iscan->__batch_ino = NULLFSINO;
iscan->__skipped_inomask = 0;
mutex_unlock(&iscan->lock);
}
/*
* Advance the inode scan cursor to the next allocated inode and return up to
* 64 consecutive allocated inodes starting with the cursor position.
@ -432,6 +483,8 @@ xchk_iscan_iter_batch(
struct xfs_scrub *sc = iscan->sc;
int ret;
xchk_iscan_finish_batch(iscan);
if (iscan->iget_timeout)
iscan->__iget_deadline = jiffies +
msecs_to_jiffies(iscan->iget_timeout);
@ -440,8 +493,10 @@ xchk_iscan_iter_batch(
struct xfs_buf *agi_bp = NULL;
struct xfs_perag *pag = NULL;
xfs_inofree_t allocmask = 0;
uint8_t nr_inodes = 0;
ret = xchk_iscan_advance(iscan, &pag, &agi_bp, &allocmask);
ret = xchk_iscan_advance(iscan, &pag, &agi_bp, &allocmask,
&nr_inodes);
if (ret != 1)
return ret;
@ -452,7 +507,7 @@ xchk_iscan_iter_batch(
break;
}
ret = xchk_iscan_iget(iscan, pag, agi_bp, allocmask);
ret = xchk_iscan_iget(iscan, pag, agi_bp, allocmask, nr_inodes);
} while (ret == -EAGAIN);
return ret;
@ -559,6 +614,9 @@ xchk_iscan_start(
start_ino = xchk_iscan_rotor(sc->mp);
iscan->__batch_ino = NULLFSINO;
iscan->__skipped_inomask = 0;
iscan->sc = sc;
clear_bit(XCHK_ISCAN_OPSTATE_ABORTED, &iscan->__opstate);
iscan->iget_timeout = iget_timeout;
@ -587,6 +645,26 @@ xchk_iscan_mark_visited(
mutex_unlock(&iscan->lock);
}
/*
* Did we skip this inode because it wasn't allocated when we loaded the batch?
* If so, it is newly allocated and will not be scanned. All live updates to
* this inode must be passed to the caller to maintain scan correctness.
*/
static inline bool
xchk_iscan_skipped(
const struct xchk_iscan *iscan,
xfs_ino_t ino)
{
if (iscan->__batch_ino == NULLFSINO)
return false;
if (ino < iscan->__batch_ino)
return false;
if (ino >= iscan->__batch_ino + XFS_INODES_PER_CHUNK)
return false;
return iscan->__skipped_inomask & (1ULL << (ino - iscan->__batch_ino));
}
/*
* Do we need a live update for this inode? This is true if the scanner thread
* has visited this inode and the scan hasn't been aborted due to errors.
@ -622,6 +700,15 @@ xchk_iscan_want_live_update(
goto unlock;
}
/*
* This inode was not allocated at the time of the iscan batch.
* The caller should receive all updates.
*/
if (xchk_iscan_skipped(iscan, ino)) {
ret = true;
goto unlock;
}
/*
* The visited cursor hasn't yet wrapped around the end of the FS. If
* @ino is inside the starred range, the caller should receive updates:

View File

@ -44,8 +44,12 @@ struct xchk_iscan {
/*
* The scan grabs batches of inodes and stashes them here before
* handing them out with _iter.
* handing them out with _iter. Unallocated inodes are set in the
* mask so that all updates to that inode are selected for live
* update propagation.
*/
xfs_ino_t __batch_ino;
xfs_inofree_t __skipped_inomask;
struct xfs_inode *__inodes[XFS_INODES_PER_CHUNK];
};

View File

@ -1172,6 +1172,7 @@ DEFINE_EVENT(xchk_iscan_class, name, \
TP_ARGS(iscan))
DEFINE_ISCAN_EVENT(xchk_iscan_move_cursor);
DEFINE_ISCAN_EVENT(xchk_iscan_visit);
DEFINE_ISCAN_EVENT(xchk_iscan_skip);
DEFINE_ISCAN_EVENT(xchk_iscan_advance_ag);
DECLARE_EVENT_CLASS(xchk_iscan_ino_class,
@ -1229,25 +1230,37 @@ TRACE_EVENT(xchk_iscan_iget,
TRACE_EVENT(xchk_iscan_iget_batch,
TP_PROTO(struct xfs_mount *mp, struct xchk_iscan *iscan,
unsigned int nr),
TP_ARGS(mp, iscan, nr),
unsigned int nr, unsigned int avail),
TP_ARGS(mp, iscan, nr, avail),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(xfs_ino_t, cursor)
__field(xfs_ino_t, visited)
__field(unsigned int, nr)
__field(unsigned int, avail)
__field(unsigned int, unavail)
__field(xfs_ino_t, batch_ino)
__field(unsigned long long, skipmask)
),
TP_fast_assign(
__entry->dev = mp->m_super->s_dev;
__entry->cursor = iscan->cursor_ino;
__entry->visited = iscan->__visited_ino;
__entry->nr = nr;
__entry->avail = avail;
__entry->unavail = hweight64(iscan->__skipped_inomask);
__entry->batch_ino = iscan->__batch_ino;
__entry->skipmask = iscan->__skipped_inomask;
),
TP_printk("dev %d:%d iscan cursor 0x%llx visited 0x%llx nr %d",
TP_printk("dev %d:%d iscan cursor 0x%llx visited 0x%llx batchino 0x%llx skipmask 0x%llx nr %u avail %u unavail %u",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->cursor,
__entry->visited,
__entry->nr)
__entry->batch_ino,
__entry->skipmask,
__entry->nr,
__entry->avail,
__entry->unavail)
);
TRACE_EVENT(xchk_iscan_iget_retry_wait,