diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h index 10153ce116d4..f1818c54af6f 100644 --- a/fs/xfs/libxfs/xfs_format.h +++ b/fs/xfs/libxfs/xfs_format.h @@ -899,6 +899,12 @@ static inline uint xfs_dinode_size(int version) */ #define XFS_MAXLINK ((1U << 31) - 1U) +/* + * Any file that hits the maximum ondisk link count should be pinned to avoid + * a use-after-free situation. + */ +#define XFS_NLINK_PINNED (~0U) + /* * Values for di_format * diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c index cb37f0007731..14c81f227c5b 100644 --- a/fs/xfs/libxfs/xfs_ialloc.c +++ b/fs/xfs/libxfs/xfs_ialloc.c @@ -1057,6 +1057,33 @@ xfs_inobt_first_free_inode( return xfs_lowbit64(realfree); } +/* + * If this AG has corrupt inodes, check if allocating this inode would fail + * with corruption errors. Returns 0 if we're clear, or EAGAIN to try again + * somewhere else. + */ +static int +xfs_dialloc_check_ino( + struct xfs_perag *pag, + struct xfs_trans *tp, + xfs_ino_t ino) +{ + struct xfs_imap imap; + struct xfs_buf *bp; + int error; + + error = xfs_imap(pag, tp, ino, &imap, 0); + if (error) + return -EAGAIN; + + error = xfs_imap_to_bp(pag->pag_mount, tp, &imap, &bp); + if (error) + return -EAGAIN; + + xfs_trans_brelse(tp, bp); + return 0; +} + /* * Allocate an inode using the inobt-only algorithm. */ @@ -1309,6 +1336,13 @@ alloc_inode: ASSERT((XFS_AGINO_TO_OFFSET(mp, rec.ir_startino) % XFS_INODES_PER_CHUNK) == 0); ino = XFS_AGINO_TO_INO(mp, pag->pag_agno, rec.ir_startino + offset); + + if (xfs_ag_has_sickness(pag, XFS_SICK_AG_INODES)) { + error = xfs_dialloc_check_ino(pag, tp, ino); + if (error) + goto error0; + } + rec.ir_free &= ~XFS_INOBT_MASK(offset); rec.ir_freecount--; error = xfs_inobt_update(cur, &rec); @@ -1584,6 +1618,12 @@ xfs_dialloc_ag( XFS_INODES_PER_CHUNK) == 0); ino = XFS_AGINO_TO_INO(mp, pag->pag_agno, rec.ir_startino + offset); + if (xfs_ag_has_sickness(pag, XFS_SICK_AG_INODES)) { + error = xfs_dialloc_check_ino(pag, tp, ino); + if (error) + goto error_cur; + } + /* * Modify or remove the finobt record. */ diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c index d0dcce462bf4..d79002343d0b 100644 --- a/fs/xfs/libxfs/xfs_inode_buf.c +++ b/fs/xfs/libxfs/xfs_inode_buf.c @@ -491,6 +491,14 @@ xfs_dinode_verify( return __this_address; } + if (dip->di_version > 1) { + if (dip->di_onlink) + return __this_address; + } else { + if (dip->di_nlink) + return __this_address; + } + /* don't allow invalid i_size */ di_size = be64_to_cpu(dip->di_size); if (di_size & (1ULL << 63)) diff --git a/fs/xfs/scrub/common.c b/fs/xfs/scrub/common.c index a2da2bef509a..48302532d10d 100644 --- a/fs/xfs/scrub/common.c +++ b/fs/xfs/scrub/common.c @@ -1203,27 +1203,12 @@ xchk_metadata_inode_subtype( struct xfs_scrub *sc, unsigned int scrub_type) { - __u32 smtype = sc->sm->sm_type; - unsigned int sick_mask = sc->sick_mask; + struct xfs_scrub_subord *sub; int error; - sc->sm->sm_type = scrub_type; - - switch (scrub_type) { - case XFS_SCRUB_TYPE_INODE: - error = xchk_inode(sc); - break; - case XFS_SCRUB_TYPE_BMBTD: - error = xchk_bmap_data(sc); - break; - default: - ASSERT(0); - error = -EFSCORRUPTED; - break; - } - - sc->sick_mask = sick_mask; - sc->sm->sm_type = smtype; + sub = xchk_scrub_create_subord(sc, scrub_type); + error = sub->sc.ops->scrub(&sub->sc); + xchk_scrub_free_subord(sub); return error; } diff --git a/fs/xfs/scrub/dir_repair.c b/fs/xfs/scrub/dir_repair.c index c150b2efa2c2..38957da26b94 100644 --- a/fs/xfs/scrub/dir_repair.c +++ b/fs/xfs/scrub/dir_repair.c @@ -1145,7 +1145,9 @@ xrep_dir_set_nlink( struct xfs_scrub *sc = rd->sc; struct xfs_inode *dp = sc->ip; struct xfs_perag *pag; - unsigned int new_nlink = rd->subdirs + 2; + unsigned int new_nlink = min_t(unsigned long long, + rd->subdirs + 2, + XFS_NLINK_PINNED); int error; /* @@ -1201,13 +1203,6 @@ xrep_dir_swap( bool ip_local, temp_local; int error = 0; - /* - * If we found enough subdirs to overflow this directory's link count, - * bail out to userspace before we modify anything. - */ - if (rd->subdirs + 2 > XFS_MAXLINK) - return -EFSCORRUPTED; - /* * If we never found the parent for this directory, temporarily assign * the root dir as the parent; we'll move this to the orphanage after diff --git a/fs/xfs/scrub/inode_repair.c b/fs/xfs/scrub/inode_repair.c index 0dde5df2f8d3..e3b74ea50fde 100644 --- a/fs/xfs/scrub/inode_repair.c +++ b/fs/xfs/scrub/inode_repair.c @@ -516,6 +516,17 @@ xrep_dinode_mode( return 0; } +/* Fix unused link count fields having nonzero values. */ +STATIC void +xrep_dinode_nlinks( + struct xfs_dinode *dip) +{ + if (dip->di_version > 1) + dip->di_onlink = 0; + else + dip->di_nlink = 0; +} + /* Fix any conflicting flags that the verifiers complain about. */ STATIC void xrep_dinode_flags( @@ -1377,6 +1388,7 @@ xrep_dinode_core( iget_error = xrep_dinode_mode(ri, dip); if (iget_error) goto write; + xrep_dinode_nlinks(dip); xrep_dinode_flags(sc, dip, ri->rt_extents > 0); xrep_dinode_size(ri, dip); xrep_dinode_extsize_hints(sc, dip); diff --git a/fs/xfs/scrub/nlinks.c b/fs/xfs/scrub/nlinks.c index c456523fac9c..fcb9c473f372 100644 --- a/fs/xfs/scrub/nlinks.c +++ b/fs/xfs/scrub/nlinks.c @@ -607,9 +607,11 @@ xchk_nlinks_compare_inode( * this as a corruption. The VFS won't let users increase the link * count, but it will let them decrease it. */ - if (total_links > XFS_MAXLINK) { + if (total_links > XFS_NLINK_PINNED) { xchk_ino_set_corrupt(sc, ip->i_ino); goto out_corrupt; + } else if (total_links > XFS_MAXLINK) { + xchk_ino_set_warning(sc, ip->i_ino); } /* Link counts should match. */ diff --git a/fs/xfs/scrub/nlinks_repair.c b/fs/xfs/scrub/nlinks_repair.c index 0cb67339eac8..83f8637bb08f 100644 --- a/fs/xfs/scrub/nlinks_repair.c +++ b/fs/xfs/scrub/nlinks_repair.c @@ -238,14 +238,10 @@ xrep_nlinks_repair_inode( /* Commit the new link count if it changed. */ if (total_links != actual_nlink) { - if (total_links > XFS_MAXLINK) { - trace_xrep_nlinks_unfixable_inode(mp, ip, &obs); - goto out_trans; - } - trace_xrep_nlinks_update_inode(mp, ip, &obs); - set_nlink(VFS_I(ip), total_links); + set_nlink(VFS_I(ip), min_t(unsigned long long, total_links, + XFS_NLINK_PINNED)); dirty = true; } diff --git a/fs/xfs/scrub/repair.c b/fs/xfs/scrub/repair.c index 369f0430e4ba..b6aff89679d5 100644 --- a/fs/xfs/scrub/repair.c +++ b/fs/xfs/scrub/repair.c @@ -1009,55 +1009,27 @@ xrep_metadata_inode_subtype( struct xfs_scrub *sc, unsigned int scrub_type) { - __u32 smtype = sc->sm->sm_type; - __u32 smflags = sc->sm->sm_flags; - unsigned int sick_mask = sc->sick_mask; + struct xfs_scrub_subord *sub; int error; /* - * Let's see if the inode needs repair. We're going to open-code calls - * to the scrub and repair functions so that we can hang on to the + * Let's see if the inode needs repair. Use a subordinate scrub context + * to call the scrub and repair functions so that we can hang on to the * resources that we already acquired instead of using the standard * setup/teardown routines. */ - sc->sm->sm_flags &= ~XFS_SCRUB_FLAGS_OUT; - sc->sm->sm_type = scrub_type; - - switch (scrub_type) { - case XFS_SCRUB_TYPE_INODE: - error = xchk_inode(sc); - break; - case XFS_SCRUB_TYPE_BMBTD: - error = xchk_bmap_data(sc); - break; - case XFS_SCRUB_TYPE_BMBTA: - error = xchk_bmap_attr(sc); - break; - default: - ASSERT(0); - error = -EFSCORRUPTED; - } + sub = xchk_scrub_create_subord(sc, scrub_type); + error = sub->sc.ops->scrub(&sub->sc); if (error) goto out; - - if (!xrep_will_attempt(sc)) + if (!xrep_will_attempt(&sub->sc)) goto out; /* * Repair some part of the inode. This will potentially join the inode * to the transaction. */ - switch (scrub_type) { - case XFS_SCRUB_TYPE_INODE: - error = xrep_inode(sc); - break; - case XFS_SCRUB_TYPE_BMBTD: - error = xrep_bmap(sc, XFS_DATA_FORK, false); - break; - case XFS_SCRUB_TYPE_BMBTA: - error = xrep_bmap(sc, XFS_ATTR_FORK, false); - break; - } + error = sub->sc.ops->repair(&sub->sc); if (error) goto out; @@ -1066,10 +1038,10 @@ xrep_metadata_inode_subtype( * that the inode will not be joined to the transaction when we exit * the function. */ - error = xfs_defer_finish(&sc->tp); + error = xfs_defer_finish(&sub->sc.tp); if (error) goto out; - error = xfs_trans_roll(&sc->tp); + error = xfs_trans_roll(&sub->sc.tp); if (error) goto out; @@ -1077,31 +1049,18 @@ xrep_metadata_inode_subtype( * Clear the corruption flags and re-check the metadata that we just * repaired. */ - sc->sm->sm_flags &= ~XFS_SCRUB_FLAGS_OUT; - - switch (scrub_type) { - case XFS_SCRUB_TYPE_INODE: - error = xchk_inode(sc); - break; - case XFS_SCRUB_TYPE_BMBTD: - error = xchk_bmap_data(sc); - break; - case XFS_SCRUB_TYPE_BMBTA: - error = xchk_bmap_attr(sc); - break; - } + sub->sc.sm->sm_flags &= ~XFS_SCRUB_FLAGS_OUT; + error = sub->sc.ops->scrub(&sub->sc); if (error) goto out; /* If corruption persists, the repair has failed. */ - if (xchk_needs_repair(sc->sm)) { + if (xchk_needs_repair(sub->sc.sm)) { error = -EFSCORRUPTED; goto out; } out: - sc->sick_mask = sick_mask; - sc->sm->sm_type = smtype; - sc->sm->sm_flags = smflags; + xchk_scrub_free_subord(sub); return error; } diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c index 301d5b753fdd..ebb06838c31b 100644 --- a/fs/xfs/scrub/scrub.c +++ b/fs/xfs/scrub/scrub.c @@ -177,6 +177,39 @@ xchk_fsgates_disable( } #undef FSGATES_MASK +/* Free the resources associated with a scrub subtype. */ +void +xchk_scrub_free_subord( + struct xfs_scrub_subord *sub) +{ + struct xfs_scrub *sc = sub->parent_sc; + + ASSERT(sc->ip == sub->sc.ip); + ASSERT(sc->orphanage == sub->sc.orphanage); + ASSERT(sc->tempip == sub->sc.tempip); + + sc->sm->sm_type = sub->old_smtype; + sc->sm->sm_flags = sub->old_smflags | + (sc->sm->sm_flags & XFS_SCRUB_FLAGS_OUT); + sc->tp = sub->sc.tp; + + if (sub->sc.buf) { + if (sub->sc.buf_cleanup) + sub->sc.buf_cleanup(sub->sc.buf); + kvfree(sub->sc.buf); + } + if (sub->sc.xmbtp) + xmbuf_free(sub->sc.xmbtp); + if (sub->sc.xfile) + xfile_destroy(sub->sc.xfile); + + sc->ilock_flags = sub->sc.ilock_flags; + sc->orphanage_ilock_flags = sub->sc.orphanage_ilock_flags; + sc->temp_ilock_flags = sub->sc.temp_ilock_flags; + + kfree(sub); +} + /* Free all the resources and finish the transactions. */ STATIC int xchk_teardown( @@ -505,6 +538,36 @@ static inline void xchk_postmortem(struct xfs_scrub *sc) } #endif /* CONFIG_XFS_ONLINE_REPAIR */ +/* + * Create a new scrub context from an existing one, but with a different scrub + * type. + */ +struct xfs_scrub_subord * +xchk_scrub_create_subord( + struct xfs_scrub *sc, + unsigned int subtype) +{ + struct xfs_scrub_subord *sub; + + sub = kzalloc(sizeof(*sub), XCHK_GFP_FLAGS); + if (!sub) + return ERR_PTR(-ENOMEM); + + sub->old_smtype = sc->sm->sm_type; + sub->old_smflags = sc->sm->sm_flags; + sub->parent_sc = sc; + memcpy(&sub->sc, sc, sizeof(struct xfs_scrub)); + sub->sc.ops = &meta_scrub_ops[subtype]; + sub->sc.sm->sm_type = subtype; + sub->sc.sm->sm_flags &= ~XFS_SCRUB_FLAGS_OUT; + sub->sc.buf = NULL; + sub->sc.buf_cleanup = NULL; + sub->sc.xfile = NULL; + sub->sc.xmbtp = NULL; + + return sub; +} + /* Dispatch metadata scrubbing. */ int xfs_scrub_metadata( diff --git a/fs/xfs/scrub/scrub.h b/fs/xfs/scrub/scrub.h index 7abe498f7a46..54a4242bc79c 100644 --- a/fs/xfs/scrub/scrub.h +++ b/fs/xfs/scrub/scrub.h @@ -156,6 +156,17 @@ struct xfs_scrub { */ #define XREP_FSGATES_ALL (XREP_FSGATES_EXCHANGE_RANGE) +struct xfs_scrub_subord { + struct xfs_scrub sc; + struct xfs_scrub *parent_sc; + unsigned int old_smtype; + unsigned int old_smflags; +}; + +struct xfs_scrub_subord *xchk_scrub_create_subord(struct xfs_scrub *sc, + unsigned int subtype); +void xchk_scrub_free_subord(struct xfs_scrub_subord *sub); + /* Metadata scrubbers */ int xchk_tester(struct xfs_scrub *sc); int xchk_superblock(struct xfs_scrub *sc); diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index fed0cd6bffdf..03dcb4ac0431 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -890,22 +890,25 @@ xfs_init_new_inode( */ static int /* error */ xfs_droplink( - xfs_trans_t *tp, - xfs_inode_t *ip) + struct xfs_trans *tp, + struct xfs_inode *ip) { - if (VFS_I(ip)->i_nlink == 0) { - xfs_alert(ip->i_mount, - "%s: Attempt to drop inode (%llu) with nlink zero.", - __func__, ip->i_ino); - return -EFSCORRUPTED; - } + struct inode *inode = VFS_I(ip); xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG); - drop_nlink(VFS_I(ip)); + if (inode->i_nlink == 0) { + xfs_info_ratelimited(tp->t_mountp, + "Inode 0x%llx link count dropped below zero. Pinning link count.", + ip->i_ino); + set_nlink(inode, XFS_NLINK_PINNED); + } + if (inode->i_nlink != XFS_NLINK_PINNED) + drop_nlink(inode); + xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); - if (VFS_I(ip)->i_nlink) + if (inode->i_nlink) return 0; return xfs_iunlink(tp, ip); @@ -919,9 +922,17 @@ xfs_bumplink( struct xfs_trans *tp, struct xfs_inode *ip) { + struct inode *inode = VFS_I(ip); + xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG); - inc_nlink(VFS_I(ip)); + if (inode->i_nlink == XFS_NLINK_PINNED - 1) + xfs_info_ratelimited(tp->t_mountp, + "Inode 0x%llx link count exceeded maximum. Pinning link count.", + ip->i_ino); + if (inode->i_nlink != XFS_NLINK_PINNED) + inc_nlink(inode); + xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); }