xfs: inode-related repair fixes [v30.3 13/16]
While doing QA of the online fsck code, I made a few observations: First, nobody was checking that the di_onlink field is actually zero; Second, that allocating a temporary file for repairs can fail (and thus bring down the entire fs) if the inode cluster is corrupt; and Third, that file link counts do not pin at ~0U to prevent integer overflows. Fourth, the x{chk,rep}_metadata_inode_fork functions should be subclassing the main scrub context, not modifying the parent's setup willy-nilly. This scattered patchset fixes those three problems. This has been running on the djcloud for months with no problems. Enjoy! Signed-off-by: Darrick J. Wong <djwong@kernel.org> -----BEGIN PGP SIGNATURE----- iHUEABYKAB0WIQQ2qTKExjcn+O1o2YRKO3ySh0YRpgUCZh23VAAKCRBKO3ySh0YR phVNAQCUkBa3kFggj8pFTqmJUmbKK+umIBIpmQpQVEFeVVzjtwD/azZpYcexuMKY 3V81P3KZCOvs/KY0wJupB+5uLdJc5w4= =brWH -----END PGP SIGNATURE----- Merge tag 'inode-repair-improvements-6.10_2024-04-15' of https://git.kernel.org/pub/scm/linux/kernel/git/djwong/xfs-linux into xfs-6.10-mergeA xfs: inode-related repair fixes While doing QA of the online fsck code, I made a few observations: First, nobody was checking that the di_onlink field is actually zero; Second, that allocating a temporary file for repairs can fail (and thus bring down the entire fs) if the inode cluster is corrupt; and Third, that file link counts do not pin at ~0U to prevent integer overflows. Fourth, the x{chk,rep}_metadata_inode_fork functions should be subclassing the main scrub context, not modifying the parent's setup willy-nilly. This scattered patchset fixes those three problems. Signed-off-by: Darrick J. Wong <djwong@kernel.org> Signed-off-by: Chandan Babu R <chandanbabu@kernel.org> * tag 'inode-repair-improvements-6.10_2024-04-15' of https://git.kernel.org/pub/scm/linux/kernel/git/djwong/xfs-linux: xfs: create subordinate scrub contexts for xchk_metadata_inode_subtype xfs: pin inodes that would otherwise overflow link count xfs: try to avoid allocating from sick inode clusters xfs: check unused nlink fields in the ondisk inode
This commit is contained in:
commit
9ba8e658d8
@ -899,6 +899,12 @@ static inline uint xfs_dinode_size(int version)
|
||||
*/
|
||||
#define XFS_MAXLINK ((1U << 31) - 1U)
|
||||
|
||||
/*
|
||||
* Any file that hits the maximum ondisk link count should be pinned to avoid
|
||||
* a use-after-free situation.
|
||||
*/
|
||||
#define XFS_NLINK_PINNED (~0U)
|
||||
|
||||
/*
|
||||
* Values for di_format
|
||||
*
|
||||
|
@ -1057,6 +1057,33 @@ xfs_inobt_first_free_inode(
|
||||
return xfs_lowbit64(realfree);
|
||||
}
|
||||
|
||||
/*
|
||||
* If this AG has corrupt inodes, check if allocating this inode would fail
|
||||
* with corruption errors. Returns 0 if we're clear, or EAGAIN to try again
|
||||
* somewhere else.
|
||||
*/
|
||||
static int
|
||||
xfs_dialloc_check_ino(
|
||||
struct xfs_perag *pag,
|
||||
struct xfs_trans *tp,
|
||||
xfs_ino_t ino)
|
||||
{
|
||||
struct xfs_imap imap;
|
||||
struct xfs_buf *bp;
|
||||
int error;
|
||||
|
||||
error = xfs_imap(pag, tp, ino, &imap, 0);
|
||||
if (error)
|
||||
return -EAGAIN;
|
||||
|
||||
error = xfs_imap_to_bp(pag->pag_mount, tp, &imap, &bp);
|
||||
if (error)
|
||||
return -EAGAIN;
|
||||
|
||||
xfs_trans_brelse(tp, bp);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Allocate an inode using the inobt-only algorithm.
|
||||
*/
|
||||
@ -1309,6 +1336,13 @@ alloc_inode:
|
||||
ASSERT((XFS_AGINO_TO_OFFSET(mp, rec.ir_startino) %
|
||||
XFS_INODES_PER_CHUNK) == 0);
|
||||
ino = XFS_AGINO_TO_INO(mp, pag->pag_agno, rec.ir_startino + offset);
|
||||
|
||||
if (xfs_ag_has_sickness(pag, XFS_SICK_AG_INODES)) {
|
||||
error = xfs_dialloc_check_ino(pag, tp, ino);
|
||||
if (error)
|
||||
goto error0;
|
||||
}
|
||||
|
||||
rec.ir_free &= ~XFS_INOBT_MASK(offset);
|
||||
rec.ir_freecount--;
|
||||
error = xfs_inobt_update(cur, &rec);
|
||||
@ -1584,6 +1618,12 @@ xfs_dialloc_ag(
|
||||
XFS_INODES_PER_CHUNK) == 0);
|
||||
ino = XFS_AGINO_TO_INO(mp, pag->pag_agno, rec.ir_startino + offset);
|
||||
|
||||
if (xfs_ag_has_sickness(pag, XFS_SICK_AG_INODES)) {
|
||||
error = xfs_dialloc_check_ino(pag, tp, ino);
|
||||
if (error)
|
||||
goto error_cur;
|
||||
}
|
||||
|
||||
/*
|
||||
* Modify or remove the finobt record.
|
||||
*/
|
||||
|
@ -491,6 +491,14 @@ xfs_dinode_verify(
|
||||
return __this_address;
|
||||
}
|
||||
|
||||
if (dip->di_version > 1) {
|
||||
if (dip->di_onlink)
|
||||
return __this_address;
|
||||
} else {
|
||||
if (dip->di_nlink)
|
||||
return __this_address;
|
||||
}
|
||||
|
||||
/* don't allow invalid i_size */
|
||||
di_size = be64_to_cpu(dip->di_size);
|
||||
if (di_size & (1ULL << 63))
|
||||
|
@ -1203,27 +1203,12 @@ xchk_metadata_inode_subtype(
|
||||
struct xfs_scrub *sc,
|
||||
unsigned int scrub_type)
|
||||
{
|
||||
__u32 smtype = sc->sm->sm_type;
|
||||
unsigned int sick_mask = sc->sick_mask;
|
||||
struct xfs_scrub_subord *sub;
|
||||
int error;
|
||||
|
||||
sc->sm->sm_type = scrub_type;
|
||||
|
||||
switch (scrub_type) {
|
||||
case XFS_SCRUB_TYPE_INODE:
|
||||
error = xchk_inode(sc);
|
||||
break;
|
||||
case XFS_SCRUB_TYPE_BMBTD:
|
||||
error = xchk_bmap_data(sc);
|
||||
break;
|
||||
default:
|
||||
ASSERT(0);
|
||||
error = -EFSCORRUPTED;
|
||||
break;
|
||||
}
|
||||
|
||||
sc->sick_mask = sick_mask;
|
||||
sc->sm->sm_type = smtype;
|
||||
sub = xchk_scrub_create_subord(sc, scrub_type);
|
||||
error = sub->sc.ops->scrub(&sub->sc);
|
||||
xchk_scrub_free_subord(sub);
|
||||
return error;
|
||||
}
|
||||
|
||||
|
@ -1145,7 +1145,9 @@ xrep_dir_set_nlink(
|
||||
struct xfs_scrub *sc = rd->sc;
|
||||
struct xfs_inode *dp = sc->ip;
|
||||
struct xfs_perag *pag;
|
||||
unsigned int new_nlink = rd->subdirs + 2;
|
||||
unsigned int new_nlink = min_t(unsigned long long,
|
||||
rd->subdirs + 2,
|
||||
XFS_NLINK_PINNED);
|
||||
int error;
|
||||
|
||||
/*
|
||||
@ -1201,13 +1203,6 @@ xrep_dir_swap(
|
||||
bool ip_local, temp_local;
|
||||
int error = 0;
|
||||
|
||||
/*
|
||||
* If we found enough subdirs to overflow this directory's link count,
|
||||
* bail out to userspace before we modify anything.
|
||||
*/
|
||||
if (rd->subdirs + 2 > XFS_MAXLINK)
|
||||
return -EFSCORRUPTED;
|
||||
|
||||
/*
|
||||
* If we never found the parent for this directory, temporarily assign
|
||||
* the root dir as the parent; we'll move this to the orphanage after
|
||||
|
@ -516,6 +516,17 @@ xrep_dinode_mode(
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Fix unused link count fields having nonzero values. */
|
||||
STATIC void
|
||||
xrep_dinode_nlinks(
|
||||
struct xfs_dinode *dip)
|
||||
{
|
||||
if (dip->di_version > 1)
|
||||
dip->di_onlink = 0;
|
||||
else
|
||||
dip->di_nlink = 0;
|
||||
}
|
||||
|
||||
/* Fix any conflicting flags that the verifiers complain about. */
|
||||
STATIC void
|
||||
xrep_dinode_flags(
|
||||
@ -1377,6 +1388,7 @@ xrep_dinode_core(
|
||||
iget_error = xrep_dinode_mode(ri, dip);
|
||||
if (iget_error)
|
||||
goto write;
|
||||
xrep_dinode_nlinks(dip);
|
||||
xrep_dinode_flags(sc, dip, ri->rt_extents > 0);
|
||||
xrep_dinode_size(ri, dip);
|
||||
xrep_dinode_extsize_hints(sc, dip);
|
||||
|
@ -607,9 +607,11 @@ xchk_nlinks_compare_inode(
|
||||
* this as a corruption. The VFS won't let users increase the link
|
||||
* count, but it will let them decrease it.
|
||||
*/
|
||||
if (total_links > XFS_MAXLINK) {
|
||||
if (total_links > XFS_NLINK_PINNED) {
|
||||
xchk_ino_set_corrupt(sc, ip->i_ino);
|
||||
goto out_corrupt;
|
||||
} else if (total_links > XFS_MAXLINK) {
|
||||
xchk_ino_set_warning(sc, ip->i_ino);
|
||||
}
|
||||
|
||||
/* Link counts should match. */
|
||||
|
@ -238,14 +238,10 @@ xrep_nlinks_repair_inode(
|
||||
|
||||
/* Commit the new link count if it changed. */
|
||||
if (total_links != actual_nlink) {
|
||||
if (total_links > XFS_MAXLINK) {
|
||||
trace_xrep_nlinks_unfixable_inode(mp, ip, &obs);
|
||||
goto out_trans;
|
||||
}
|
||||
|
||||
trace_xrep_nlinks_update_inode(mp, ip, &obs);
|
||||
|
||||
set_nlink(VFS_I(ip), total_links);
|
||||
set_nlink(VFS_I(ip), min_t(unsigned long long, total_links,
|
||||
XFS_NLINK_PINNED));
|
||||
dirty = true;
|
||||
}
|
||||
|
||||
|
@ -1009,55 +1009,27 @@ xrep_metadata_inode_subtype(
|
||||
struct xfs_scrub *sc,
|
||||
unsigned int scrub_type)
|
||||
{
|
||||
__u32 smtype = sc->sm->sm_type;
|
||||
__u32 smflags = sc->sm->sm_flags;
|
||||
unsigned int sick_mask = sc->sick_mask;
|
||||
struct xfs_scrub_subord *sub;
|
||||
int error;
|
||||
|
||||
/*
|
||||
* Let's see if the inode needs repair. We're going to open-code calls
|
||||
* to the scrub and repair functions so that we can hang on to the
|
||||
* Let's see if the inode needs repair. Use a subordinate scrub context
|
||||
* to call the scrub and repair functions so that we can hang on to the
|
||||
* resources that we already acquired instead of using the standard
|
||||
* setup/teardown routines.
|
||||
*/
|
||||
sc->sm->sm_flags &= ~XFS_SCRUB_FLAGS_OUT;
|
||||
sc->sm->sm_type = scrub_type;
|
||||
|
||||
switch (scrub_type) {
|
||||
case XFS_SCRUB_TYPE_INODE:
|
||||
error = xchk_inode(sc);
|
||||
break;
|
||||
case XFS_SCRUB_TYPE_BMBTD:
|
||||
error = xchk_bmap_data(sc);
|
||||
break;
|
||||
case XFS_SCRUB_TYPE_BMBTA:
|
||||
error = xchk_bmap_attr(sc);
|
||||
break;
|
||||
default:
|
||||
ASSERT(0);
|
||||
error = -EFSCORRUPTED;
|
||||
}
|
||||
sub = xchk_scrub_create_subord(sc, scrub_type);
|
||||
error = sub->sc.ops->scrub(&sub->sc);
|
||||
if (error)
|
||||
goto out;
|
||||
|
||||
if (!xrep_will_attempt(sc))
|
||||
if (!xrep_will_attempt(&sub->sc))
|
||||
goto out;
|
||||
|
||||
/*
|
||||
* Repair some part of the inode. This will potentially join the inode
|
||||
* to the transaction.
|
||||
*/
|
||||
switch (scrub_type) {
|
||||
case XFS_SCRUB_TYPE_INODE:
|
||||
error = xrep_inode(sc);
|
||||
break;
|
||||
case XFS_SCRUB_TYPE_BMBTD:
|
||||
error = xrep_bmap(sc, XFS_DATA_FORK, false);
|
||||
break;
|
||||
case XFS_SCRUB_TYPE_BMBTA:
|
||||
error = xrep_bmap(sc, XFS_ATTR_FORK, false);
|
||||
break;
|
||||
}
|
||||
error = sub->sc.ops->repair(&sub->sc);
|
||||
if (error)
|
||||
goto out;
|
||||
|
||||
@ -1066,10 +1038,10 @@ xrep_metadata_inode_subtype(
|
||||
* that the inode will not be joined to the transaction when we exit
|
||||
* the function.
|
||||
*/
|
||||
error = xfs_defer_finish(&sc->tp);
|
||||
error = xfs_defer_finish(&sub->sc.tp);
|
||||
if (error)
|
||||
goto out;
|
||||
error = xfs_trans_roll(&sc->tp);
|
||||
error = xfs_trans_roll(&sub->sc.tp);
|
||||
if (error)
|
||||
goto out;
|
||||
|
||||
@ -1077,31 +1049,18 @@ xrep_metadata_inode_subtype(
|
||||
* Clear the corruption flags and re-check the metadata that we just
|
||||
* repaired.
|
||||
*/
|
||||
sc->sm->sm_flags &= ~XFS_SCRUB_FLAGS_OUT;
|
||||
|
||||
switch (scrub_type) {
|
||||
case XFS_SCRUB_TYPE_INODE:
|
||||
error = xchk_inode(sc);
|
||||
break;
|
||||
case XFS_SCRUB_TYPE_BMBTD:
|
||||
error = xchk_bmap_data(sc);
|
||||
break;
|
||||
case XFS_SCRUB_TYPE_BMBTA:
|
||||
error = xchk_bmap_attr(sc);
|
||||
break;
|
||||
}
|
||||
sub->sc.sm->sm_flags &= ~XFS_SCRUB_FLAGS_OUT;
|
||||
error = sub->sc.ops->scrub(&sub->sc);
|
||||
if (error)
|
||||
goto out;
|
||||
|
||||
/* If corruption persists, the repair has failed. */
|
||||
if (xchk_needs_repair(sc->sm)) {
|
||||
if (xchk_needs_repair(sub->sc.sm)) {
|
||||
error = -EFSCORRUPTED;
|
||||
goto out;
|
||||
}
|
||||
out:
|
||||
sc->sick_mask = sick_mask;
|
||||
sc->sm->sm_type = smtype;
|
||||
sc->sm->sm_flags = smflags;
|
||||
xchk_scrub_free_subord(sub);
|
||||
return error;
|
||||
}
|
||||
|
||||
|
@ -177,6 +177,39 @@ xchk_fsgates_disable(
|
||||
}
|
||||
#undef FSGATES_MASK
|
||||
|
||||
/* Free the resources associated with a scrub subtype. */
|
||||
void
|
||||
xchk_scrub_free_subord(
|
||||
struct xfs_scrub_subord *sub)
|
||||
{
|
||||
struct xfs_scrub *sc = sub->parent_sc;
|
||||
|
||||
ASSERT(sc->ip == sub->sc.ip);
|
||||
ASSERT(sc->orphanage == sub->sc.orphanage);
|
||||
ASSERT(sc->tempip == sub->sc.tempip);
|
||||
|
||||
sc->sm->sm_type = sub->old_smtype;
|
||||
sc->sm->sm_flags = sub->old_smflags |
|
||||
(sc->sm->sm_flags & XFS_SCRUB_FLAGS_OUT);
|
||||
sc->tp = sub->sc.tp;
|
||||
|
||||
if (sub->sc.buf) {
|
||||
if (sub->sc.buf_cleanup)
|
||||
sub->sc.buf_cleanup(sub->sc.buf);
|
||||
kvfree(sub->sc.buf);
|
||||
}
|
||||
if (sub->sc.xmbtp)
|
||||
xmbuf_free(sub->sc.xmbtp);
|
||||
if (sub->sc.xfile)
|
||||
xfile_destroy(sub->sc.xfile);
|
||||
|
||||
sc->ilock_flags = sub->sc.ilock_flags;
|
||||
sc->orphanage_ilock_flags = sub->sc.orphanage_ilock_flags;
|
||||
sc->temp_ilock_flags = sub->sc.temp_ilock_flags;
|
||||
|
||||
kfree(sub);
|
||||
}
|
||||
|
||||
/* Free all the resources and finish the transactions. */
|
||||
STATIC int
|
||||
xchk_teardown(
|
||||
@ -505,6 +538,36 @@ static inline void xchk_postmortem(struct xfs_scrub *sc)
|
||||
}
|
||||
#endif /* CONFIG_XFS_ONLINE_REPAIR */
|
||||
|
||||
/*
|
||||
* Create a new scrub context from an existing one, but with a different scrub
|
||||
* type.
|
||||
*/
|
||||
struct xfs_scrub_subord *
|
||||
xchk_scrub_create_subord(
|
||||
struct xfs_scrub *sc,
|
||||
unsigned int subtype)
|
||||
{
|
||||
struct xfs_scrub_subord *sub;
|
||||
|
||||
sub = kzalloc(sizeof(*sub), XCHK_GFP_FLAGS);
|
||||
if (!sub)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
sub->old_smtype = sc->sm->sm_type;
|
||||
sub->old_smflags = sc->sm->sm_flags;
|
||||
sub->parent_sc = sc;
|
||||
memcpy(&sub->sc, sc, sizeof(struct xfs_scrub));
|
||||
sub->sc.ops = &meta_scrub_ops[subtype];
|
||||
sub->sc.sm->sm_type = subtype;
|
||||
sub->sc.sm->sm_flags &= ~XFS_SCRUB_FLAGS_OUT;
|
||||
sub->sc.buf = NULL;
|
||||
sub->sc.buf_cleanup = NULL;
|
||||
sub->sc.xfile = NULL;
|
||||
sub->sc.xmbtp = NULL;
|
||||
|
||||
return sub;
|
||||
}
|
||||
|
||||
/* Dispatch metadata scrubbing. */
|
||||
int
|
||||
xfs_scrub_metadata(
|
||||
|
@ -156,6 +156,17 @@ struct xfs_scrub {
|
||||
*/
|
||||
#define XREP_FSGATES_ALL (XREP_FSGATES_EXCHANGE_RANGE)
|
||||
|
||||
struct xfs_scrub_subord {
|
||||
struct xfs_scrub sc;
|
||||
struct xfs_scrub *parent_sc;
|
||||
unsigned int old_smtype;
|
||||
unsigned int old_smflags;
|
||||
};
|
||||
|
||||
struct xfs_scrub_subord *xchk_scrub_create_subord(struct xfs_scrub *sc,
|
||||
unsigned int subtype);
|
||||
void xchk_scrub_free_subord(struct xfs_scrub_subord *sub);
|
||||
|
||||
/* Metadata scrubbers */
|
||||
int xchk_tester(struct xfs_scrub *sc);
|
||||
int xchk_superblock(struct xfs_scrub *sc);
|
||||
|
@ -890,22 +890,25 @@ xfs_init_new_inode(
|
||||
*/
|
||||
static int /* error */
|
||||
xfs_droplink(
|
||||
xfs_trans_t *tp,
|
||||
xfs_inode_t *ip)
|
||||
struct xfs_trans *tp,
|
||||
struct xfs_inode *ip)
|
||||
{
|
||||
if (VFS_I(ip)->i_nlink == 0) {
|
||||
xfs_alert(ip->i_mount,
|
||||
"%s: Attempt to drop inode (%llu) with nlink zero.",
|
||||
__func__, ip->i_ino);
|
||||
return -EFSCORRUPTED;
|
||||
}
|
||||
struct inode *inode = VFS_I(ip);
|
||||
|
||||
xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG);
|
||||
|
||||
drop_nlink(VFS_I(ip));
|
||||
if (inode->i_nlink == 0) {
|
||||
xfs_info_ratelimited(tp->t_mountp,
|
||||
"Inode 0x%llx link count dropped below zero. Pinning link count.",
|
||||
ip->i_ino);
|
||||
set_nlink(inode, XFS_NLINK_PINNED);
|
||||
}
|
||||
if (inode->i_nlink != XFS_NLINK_PINNED)
|
||||
drop_nlink(inode);
|
||||
|
||||
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
|
||||
|
||||
if (VFS_I(ip)->i_nlink)
|
||||
if (inode->i_nlink)
|
||||
return 0;
|
||||
|
||||
return xfs_iunlink(tp, ip);
|
||||
@ -919,9 +922,17 @@ xfs_bumplink(
|
||||
struct xfs_trans *tp,
|
||||
struct xfs_inode *ip)
|
||||
{
|
||||
struct inode *inode = VFS_I(ip);
|
||||
|
||||
xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG);
|
||||
|
||||
inc_nlink(VFS_I(ip));
|
||||
if (inode->i_nlink == XFS_NLINK_PINNED - 1)
|
||||
xfs_info_ratelimited(tp->t_mountp,
|
||||
"Inode 0x%llx link count exceeded maximum. Pinning link count.",
|
||||
ip->i_ino);
|
||||
if (inode->i_nlink != XFS_NLINK_PINNED)
|
||||
inc_nlink(inode);
|
||||
|
||||
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user