xfs: force small EFIs for reaping btree extents

Introduce the concept of a defer ops barrier to separate consecutively
queued pending work items of the same type.  With a barrier in place,
the two work items will be tracked separately, and receive separate log
intent items.  The goal here is to prevent reaping of old metadata
blocks from creating unnecessarily huge EFIs that could then run the
risk of overflowing the scrub transaction.

Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
This commit is contained in:
Darrick J. Wong 2023-12-06 18:41:00 -08:00
parent 6bb9ea8ecd
commit 3f3cec0310
3 changed files with 104 additions and 11 deletions

View File

@ -182,6 +182,58 @@ static struct kmem_cache *xfs_defer_pending_cache;
* Note that the continuation requested between t2 and t3 is likely to
* reoccur.
*/
STATIC struct xfs_log_item *
xfs_defer_barrier_create_intent(
struct xfs_trans *tp,
struct list_head *items,
unsigned int count,
bool sort)
{
return NULL;
}
STATIC void
xfs_defer_barrier_abort_intent(
struct xfs_log_item *intent)
{
/* empty */
}
STATIC struct xfs_log_item *
xfs_defer_barrier_create_done(
struct xfs_trans *tp,
struct xfs_log_item *intent,
unsigned int count)
{
return NULL;
}
STATIC int
xfs_defer_barrier_finish_item(
struct xfs_trans *tp,
struct xfs_log_item *done,
struct list_head *item,
struct xfs_btree_cur **state)
{
ASSERT(0);
return -EFSCORRUPTED;
}
STATIC void
xfs_defer_barrier_cancel_item(
struct list_head *item)
{
ASSERT(0);
}
static const struct xfs_defer_op_type xfs_barrier_defer_type = {
.max_items = 1,
.create_intent = xfs_defer_barrier_create_intent,
.abort_intent = xfs_defer_barrier_abort_intent,
.create_done = xfs_defer_barrier_create_done,
.finish_item = xfs_defer_barrier_finish_item,
.cancel_item = xfs_defer_barrier_cancel_item,
};
static const struct xfs_defer_op_type *defer_op_types[] = {
[XFS_DEFER_OPS_TYPE_BMAP] = &xfs_bmap_update_defer_type,
@ -190,6 +242,7 @@ static const struct xfs_defer_op_type *defer_op_types[] = {
[XFS_DEFER_OPS_TYPE_FREE] = &xfs_extent_free_defer_type,
[XFS_DEFER_OPS_TYPE_AGFL_FREE] = &xfs_agfl_free_defer_type,
[XFS_DEFER_OPS_TYPE_ATTR] = &xfs_attr_defer_type,
[XFS_DEFER_OPS_TYPE_BARRIER] = &xfs_barrier_defer_type,
};
/* Create a log intent done item for a log intent item. */
@ -779,6 +832,23 @@ xfs_defer_can_append(
return true;
}
/* Create a new pending item at the end of the transaction list. */
static inline struct xfs_defer_pending *
xfs_defer_alloc(
struct xfs_trans *tp,
enum xfs_defer_ops_type type)
{
struct xfs_defer_pending *dfp;
dfp = kmem_cache_zalloc(xfs_defer_pending_cache,
GFP_NOFS | __GFP_NOFAIL);
dfp->dfp_type = type;
INIT_LIST_HEAD(&dfp->dfp_work);
list_add_tail(&dfp->dfp_list, &tp->t_dfops);
return dfp;
}
/* Add an item for later deferred processing. */
struct xfs_defer_pending *
xfs_defer_add(
@ -793,23 +863,38 @@ xfs_defer_add(
BUILD_BUG_ON(ARRAY_SIZE(defer_op_types) != XFS_DEFER_OPS_TYPE_MAX);
dfp = xfs_defer_find_last(tp, type, ops);
if (!dfp || !xfs_defer_can_append(dfp, ops)) {
/* Create a new pending item at the end of the intake list. */
dfp = kmem_cache_zalloc(xfs_defer_pending_cache,
GFP_NOFS | __GFP_NOFAIL);
dfp->dfp_type = type;
dfp->dfp_intent = NULL;
dfp->dfp_done = NULL;
dfp->dfp_count = 0;
INIT_LIST_HEAD(&dfp->dfp_work);
list_add_tail(&dfp->dfp_list, &tp->t_dfops);
}
if (!dfp || !xfs_defer_can_append(dfp, ops))
dfp = xfs_defer_alloc(tp, type);
xfs_defer_add_item(dfp, li);
trace_xfs_defer_add_item(tp->t_mountp, dfp, li);
return dfp;
}
/*
* Add a defer ops barrier to force two otherwise adjacent deferred work items
* to be tracked separately and have separate log items.
*/
void
xfs_defer_add_barrier(
struct xfs_trans *tp)
{
struct xfs_defer_pending *dfp;
const enum xfs_defer_ops_type type = XFS_DEFER_OPS_TYPE_BARRIER;
const struct xfs_defer_op_type *ops = defer_op_types[type];
ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
/* If the last defer op added was a barrier, we're done. */
dfp = xfs_defer_find_last(tp, type, ops);
if (dfp)
return;
xfs_defer_alloc(tp, type);
trace_xfs_defer_add_item(tp->t_mountp, dfp, NULL);
}
/*
* Create a pending deferred work item to replay the recovered intent item
* and add it to the list.

View File

@ -20,6 +20,7 @@ enum xfs_defer_ops_type {
XFS_DEFER_OPS_TYPE_FREE,
XFS_DEFER_OPS_TYPE_AGFL_FREE,
XFS_DEFER_OPS_TYPE_ATTR,
XFS_DEFER_OPS_TYPE_BARRIER,
XFS_DEFER_OPS_TYPE_MAX,
};
@ -163,4 +164,6 @@ xfs_defer_add_item(
int __init xfs_defer_init_item_caches(void);
void xfs_defer_destroy_item_caches(void);
void xfs_defer_add_barrier(struct xfs_trans *tp);
#endif /* __XFS_DEFER_H__ */

View File

@ -31,6 +31,7 @@
#include "xfs_da_btree.h"
#include "xfs_attr.h"
#include "xfs_attr_remote.h"
#include "xfs_defer.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
#include "scrub/trace.h"
@ -409,6 +410,8 @@ xreap_agextent_iter(
/*
* Use deferred frees to get rid of the old btree blocks to try to
* minimize the window in which we could crash and lose the old blocks.
* Add a defer ops barrier every other extent to avoid stressing the
* system with large EFIs.
*/
error = xfs_free_extent_later(sc->tp, fsbno, *aglenp, rs->oinfo,
rs->resv, true);
@ -416,6 +419,8 @@ xreap_agextent_iter(
return error;
rs->deferred++;
if (rs->deferred % 2 == 0)
xfs_defer_add_barrier(sc->tp);
return 0;
}