xfs: detect mergeable and overlapping btree records [v24.5]
While I was doing differential fuzz analysis between xfs_scrub and xfs_repair, I noticed that xfs_repair was only partially effective at detecting btree records that can be merged, and xfs_scrub totally didn't notice at all. For every interval btree type except for the bmbt, there should never exist two adjacent records with adjacent keyspaces because the blockcount field is always large enough to span the entire keyspace of the domain. This is because the free space, rmap, and refcount btrees have a blockcount field large enough to store the maximum AG length, and there can never be an allocation larger than an AG. The bmbt is a different story due to its ondisk encoding where the blockcount is only 21 bits wide. Because AGs can span up to 2^31 blocks and the RT volume can span up to 2^52 blocks, a preallocation of 2^22 blocks will be expressed as two records of 2^21 length. We don't opportunistically combine records when doing bmbt operations, which is why the fsck tools have never complained about this scenario. Offline repair is partially effective at detecting mergeable records because I taught it to do that for the rmap and refcount btrees. This series enhances the free space, rmap, and refcount scrubbers to detect mergeable records. For the bmbt, it will flag the file as being eligible for an optimization to shrink the size of the data structure. The last patch in this set also enhances the rmap scrubber to detect records that overlap incorrectly. This check is done automatically for non-overlapping btree types, but we have to do it separately for the rmapbt because there are constraints on which allocation types are allowed to overlap. Signed-off-by: Darrick J. Wong <djwong@kernel.org> -----BEGIN PGP SIGNATURE----- iHUEABYKAB0WIQQ2qTKExjcn+O1o2YRKO3ySh0YRpgUCZDYdqAAKCRBKO3ySh0YR pj1kAP97i2gcFebCwLC+YTDFQoiGVYTf2Mo/bHcPopP/ptXFSAD9HYeG8xM0T3aB EGrmyaXSPcO+l/txMGTx8ACCJjxOFwg= =G9gi -----END PGP SIGNATURE----- Merge tag 'scrub-detect-mergeable-records-6.4_2023-04-11' of git://git.kernel.org/pub/scm/linux/kernel/git/djwong/xfs-linux into guilt/xfs-for-next xfs: detect mergeable and overlapping btree records [v24.5] While I was doing differential fuzz analysis between xfs_scrub and xfs_repair, I noticed that xfs_repair was only partially effective at detecting btree records that can be merged, and xfs_scrub totally didn't notice at all. For every interval btree type except for the bmbt, there should never exist two adjacent records with adjacent keyspaces because the blockcount field is always large enough to span the entire keyspace of the domain. This is because the free space, rmap, and refcount btrees have a blockcount field large enough to store the maximum AG length, and there can never be an allocation larger than an AG. The bmbt is a different story due to its ondisk encoding where the blockcount is only 21 bits wide. Because AGs can span up to 2^31 blocks and the RT volume can span up to 2^52 blocks, a preallocation of 2^22 blocks will be expressed as two records of 2^21 length. We don't opportunistically combine records when doing bmbt operations, which is why the fsck tools have never complained about this scenario. Offline repair is partially effective at detecting mergeable records because I taught it to do that for the rmap and refcount btrees. This series enhances the free space, rmap, and refcount scrubbers to detect mergeable records. For the bmbt, it will flag the file as being eligible for an optimization to shrink the size of the data structure. The last patch in this set also enhances the rmap scrubber to detect records that overlap incorrectly. This check is done automatically for non-overlapping btree types, but we have to do it separately for the rmapbt because there are constraints on which allocation types are allowed to overlap. Signed-off-by: Darrick J. Wong <djwong@kernel.org> Signed-off-by: Dave Chinner <david@fromorbit.com>
This commit is contained in:
commit
b9fcf89f6b
@ -31,6 +31,12 @@ xchk_setup_ag_allocbt(
|
||||
}
|
||||
|
||||
/* Free space btree scrubber. */
|
||||
|
||||
struct xchk_alloc {
|
||||
/* Previous free space extent. */
|
||||
struct xfs_alloc_rec_incore prev;
|
||||
};
|
||||
|
||||
/*
|
||||
* Ensure there's a corresponding cntbt/bnobt record matching this
|
||||
* bnobt/cntbt record, respectively.
|
||||
@ -93,6 +99,24 @@ xchk_allocbt_xref(
|
||||
xchk_xref_is_not_cow_staging(sc, agbno, len);
|
||||
}
|
||||
|
||||
/* Flag failures for records that could be merged. */
|
||||
STATIC void
|
||||
xchk_allocbt_mergeable(
|
||||
struct xchk_btree *bs,
|
||||
struct xchk_alloc *ca,
|
||||
const struct xfs_alloc_rec_incore *irec)
|
||||
{
|
||||
if (bs->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
|
||||
return;
|
||||
|
||||
if (ca->prev.ar_blockcount > 0 &&
|
||||
ca->prev.ar_startblock + ca->prev.ar_blockcount == irec->ar_startblock &&
|
||||
ca->prev.ar_blockcount + irec->ar_blockcount < (uint32_t)~0U)
|
||||
xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
|
||||
|
||||
memcpy(&ca->prev, irec, sizeof(*irec));
|
||||
}
|
||||
|
||||
/* Scrub a bnobt/cntbt record. */
|
||||
STATIC int
|
||||
xchk_allocbt_rec(
|
||||
@ -100,6 +124,7 @@ xchk_allocbt_rec(
|
||||
const union xfs_btree_rec *rec)
|
||||
{
|
||||
struct xfs_alloc_rec_incore irec;
|
||||
struct xchk_alloc *ca = bs->private;
|
||||
|
||||
xfs_alloc_btrec_to_irec(rec, &irec);
|
||||
if (xfs_alloc_check_irec(bs->cur, &irec) != NULL) {
|
||||
@ -107,6 +132,7 @@ xchk_allocbt_rec(
|
||||
return 0;
|
||||
}
|
||||
|
||||
xchk_allocbt_mergeable(bs, ca, &irec);
|
||||
xchk_allocbt_xref(bs->sc, &irec);
|
||||
|
||||
return 0;
|
||||
@ -118,10 +144,11 @@ xchk_allocbt(
|
||||
struct xfs_scrub *sc,
|
||||
xfs_btnum_t which)
|
||||
{
|
||||
struct xchk_alloc ca = { };
|
||||
struct xfs_btree_cur *cur;
|
||||
|
||||
cur = which == XFS_BTNUM_BNO ? sc->sa.bno_cur : sc->sa.cnt_cur;
|
||||
return xchk_btree(sc, cur, xchk_allocbt_rec, &XFS_RMAP_OINFO_AG, NULL);
|
||||
return xchk_btree(sc, cur, xchk_allocbt_rec, &XFS_RMAP_OINFO_AG, &ca);
|
||||
}
|
||||
|
||||
int
|
||||
|
@ -333,6 +333,9 @@ xchk_refcountbt_xref(
|
||||
}
|
||||
|
||||
struct xchk_refcbt_records {
|
||||
/* Previous refcount record. */
|
||||
struct xfs_refcount_irec prev_rec;
|
||||
|
||||
/* The next AG block where we aren't expecting shared extents. */
|
||||
xfs_agblock_t next_unshared_agbno;
|
||||
|
||||
@ -390,6 +393,46 @@ xchk_refcountbt_xref_gaps(
|
||||
xchk_should_check_xref(sc, &error, &sc->sa.rmap_cur);
|
||||
}
|
||||
|
||||
static inline bool
|
||||
xchk_refcount_mergeable(
|
||||
struct xchk_refcbt_records *rrc,
|
||||
const struct xfs_refcount_irec *r2)
|
||||
{
|
||||
const struct xfs_refcount_irec *r1 = &rrc->prev_rec;
|
||||
|
||||
/* Ignore if prev_rec is not yet initialized. */
|
||||
if (r1->rc_blockcount > 0)
|
||||
return false;
|
||||
|
||||
if (r1->rc_domain != r2->rc_domain)
|
||||
return false;
|
||||
if (r1->rc_startblock + r1->rc_blockcount != r2->rc_startblock)
|
||||
return false;
|
||||
if (r1->rc_refcount != r2->rc_refcount)
|
||||
return false;
|
||||
if ((unsigned long long)r1->rc_blockcount + r2->rc_blockcount >
|
||||
MAXREFCEXTLEN)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Flag failures for records that could be merged. */
|
||||
STATIC void
|
||||
xchk_refcountbt_check_mergeable(
|
||||
struct xchk_btree *bs,
|
||||
struct xchk_refcbt_records *rrc,
|
||||
const struct xfs_refcount_irec *irec)
|
||||
{
|
||||
if (bs->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
|
||||
return;
|
||||
|
||||
if (xchk_refcount_mergeable(rrc, irec))
|
||||
xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
|
||||
|
||||
memcpy(&rrc->prev_rec, irec, sizeof(struct xfs_refcount_irec));
|
||||
}
|
||||
|
||||
/* Scrub a refcountbt record. */
|
||||
STATIC int
|
||||
xchk_refcountbt_rec(
|
||||
@ -414,6 +457,7 @@ xchk_refcountbt_rec(
|
||||
xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
|
||||
rrc->prev_domain = irec.rc_domain;
|
||||
|
||||
xchk_refcountbt_check_mergeable(bs, rrc, &irec);
|
||||
xchk_refcountbt_xref(bs->sc, &irec);
|
||||
|
||||
/*
|
||||
|
@ -32,6 +32,21 @@ xchk_setup_ag_rmapbt(
|
||||
|
||||
/* Reverse-mapping scrubber. */
|
||||
|
||||
struct xchk_rmap {
|
||||
/*
|
||||
* The furthest-reaching of the rmapbt records that we've already
|
||||
* processed. This enables us to detect overlapping records for space
|
||||
* allocations that cannot be shared.
|
||||
*/
|
||||
struct xfs_rmap_irec overlap_rec;
|
||||
|
||||
/*
|
||||
* The previous rmapbt record, so that we can check for two records
|
||||
* that could be one.
|
||||
*/
|
||||
struct xfs_rmap_irec prev_rec;
|
||||
};
|
||||
|
||||
/* Cross-reference a rmap against the refcount btree. */
|
||||
STATIC void
|
||||
xchk_rmapbt_xref_refc(
|
||||
@ -139,12 +154,108 @@ xchk_rmapbt_check_unwritten_in_keyflags(
|
||||
}
|
||||
}
|
||||
|
||||
static inline bool
|
||||
xchk_rmapbt_is_shareable(
|
||||
struct xfs_scrub *sc,
|
||||
const struct xfs_rmap_irec *irec)
|
||||
{
|
||||
if (!xfs_has_reflink(sc->mp))
|
||||
return false;
|
||||
if (XFS_RMAP_NON_INODE_OWNER(irec->rm_owner))
|
||||
return false;
|
||||
if (irec->rm_flags & (XFS_RMAP_BMBT_BLOCK | XFS_RMAP_ATTR_FORK |
|
||||
XFS_RMAP_UNWRITTEN))
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Flag failures for records that overlap but cannot. */
|
||||
STATIC void
|
||||
xchk_rmapbt_check_overlapping(
|
||||
struct xchk_btree *bs,
|
||||
struct xchk_rmap *cr,
|
||||
const struct xfs_rmap_irec *irec)
|
||||
{
|
||||
xfs_agblock_t pnext, inext;
|
||||
|
||||
if (bs->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
|
||||
return;
|
||||
|
||||
/* No previous record? */
|
||||
if (cr->overlap_rec.rm_blockcount == 0)
|
||||
goto set_prev;
|
||||
|
||||
/* Do overlap_rec and irec overlap? */
|
||||
pnext = cr->overlap_rec.rm_startblock + cr->overlap_rec.rm_blockcount;
|
||||
if (pnext <= irec->rm_startblock)
|
||||
goto set_prev;
|
||||
|
||||
/* Overlap is only allowed if both records are data fork mappings. */
|
||||
if (!xchk_rmapbt_is_shareable(bs->sc, &cr->overlap_rec) ||
|
||||
!xchk_rmapbt_is_shareable(bs->sc, irec))
|
||||
xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
|
||||
|
||||
/* Save whichever rmap record extends furthest. */
|
||||
inext = irec->rm_startblock + irec->rm_blockcount;
|
||||
if (pnext > inext)
|
||||
return;
|
||||
|
||||
set_prev:
|
||||
memcpy(&cr->overlap_rec, irec, sizeof(struct xfs_rmap_irec));
|
||||
}
|
||||
|
||||
/* Decide if two reverse-mapping records can be merged. */
|
||||
static inline bool
|
||||
xchk_rmap_mergeable(
|
||||
struct xchk_rmap *cr,
|
||||
const struct xfs_rmap_irec *r2)
|
||||
{
|
||||
const struct xfs_rmap_irec *r1 = &cr->prev_rec;
|
||||
|
||||
/* Ignore if prev_rec is not yet initialized. */
|
||||
if (cr->prev_rec.rm_blockcount == 0)
|
||||
return false;
|
||||
|
||||
if (r1->rm_owner != r2->rm_owner)
|
||||
return false;
|
||||
if (r1->rm_startblock + r1->rm_blockcount != r2->rm_startblock)
|
||||
return false;
|
||||
if ((unsigned long long)r1->rm_blockcount + r2->rm_blockcount >
|
||||
XFS_RMAP_LEN_MAX)
|
||||
return false;
|
||||
if (XFS_RMAP_NON_INODE_OWNER(r2->rm_owner))
|
||||
return true;
|
||||
/* must be an inode owner below here */
|
||||
if (r1->rm_flags != r2->rm_flags)
|
||||
return false;
|
||||
if (r1->rm_flags & XFS_RMAP_BMBT_BLOCK)
|
||||
return true;
|
||||
return r1->rm_offset + r1->rm_blockcount == r2->rm_offset;
|
||||
}
|
||||
|
||||
/* Flag failures for records that could be merged. */
|
||||
STATIC void
|
||||
xchk_rmapbt_check_mergeable(
|
||||
struct xchk_btree *bs,
|
||||
struct xchk_rmap *cr,
|
||||
const struct xfs_rmap_irec *irec)
|
||||
{
|
||||
if (bs->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
|
||||
return;
|
||||
|
||||
if (xchk_rmap_mergeable(cr, irec))
|
||||
xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
|
||||
|
||||
memcpy(&cr->prev_rec, irec, sizeof(struct xfs_rmap_irec));
|
||||
}
|
||||
|
||||
/* Scrub an rmapbt record. */
|
||||
STATIC int
|
||||
xchk_rmapbt_rec(
|
||||
struct xchk_btree *bs,
|
||||
const union xfs_btree_rec *rec)
|
||||
{
|
||||
struct xchk_rmap *cr = bs->private;
|
||||
struct xfs_rmap_irec irec;
|
||||
|
||||
if (xfs_rmap_btrec_to_irec(rec, &irec) != NULL ||
|
||||
@ -154,6 +265,8 @@ xchk_rmapbt_rec(
|
||||
}
|
||||
|
||||
xchk_rmapbt_check_unwritten_in_keyflags(bs);
|
||||
xchk_rmapbt_check_mergeable(bs, cr, &irec);
|
||||
xchk_rmapbt_check_overlapping(bs, cr, &irec);
|
||||
xchk_rmapbt_xref(bs->sc, &irec);
|
||||
return 0;
|
||||
}
|
||||
@ -163,8 +276,17 @@ int
|
||||
xchk_rmapbt(
|
||||
struct xfs_scrub *sc)
|
||||
{
|
||||
return xchk_btree(sc, sc->sa.rmap_cur, xchk_rmapbt_rec,
|
||||
&XFS_RMAP_OINFO_AG, NULL);
|
||||
struct xchk_rmap *cr;
|
||||
int error;
|
||||
|
||||
cr = kzalloc(sizeof(struct xchk_rmap), XCHK_GFP_FLAGS);
|
||||
if (!cr)
|
||||
return -ENOMEM;
|
||||
|
||||
error = xchk_btree(sc, sc->sa.rmap_cur, xchk_rmapbt_rec,
|
||||
&XFS_RMAP_OINFO_AG, cr);
|
||||
kfree(cr);
|
||||
return error;
|
||||
}
|
||||
|
||||
/* xref check that the extent is owned only by a given owner */
|
||||
|
Loading…
Reference in New Issue
Block a user