6b631c60c9
Fix the file link counts since we just computed the correct ones. Signed-off-by: Darrick J. Wong <djwong@kernel.org> Reviewed-by: Christoph Hellwig <hch@lst.de>
931 lines
24 KiB
C
931 lines
24 KiB
C
// SPDX-License-Identifier: GPL-2.0-or-later
|
|
/*
|
|
* Copyright (c) 2021-2024 Oracle. All Rights Reserved.
|
|
* Author: Darrick J. Wong <djwong@kernel.org>
|
|
*/
|
|
#include "xfs.h"
|
|
#include "xfs_fs.h"
|
|
#include "xfs_shared.h"
|
|
#include "xfs_format.h"
|
|
#include "xfs_trans_resv.h"
|
|
#include "xfs_mount.h"
|
|
#include "xfs_log_format.h"
|
|
#include "xfs_trans.h"
|
|
#include "xfs_inode.h"
|
|
#include "xfs_icache.h"
|
|
#include "xfs_iwalk.h"
|
|
#include "xfs_ialloc.h"
|
|
#include "xfs_dir2.h"
|
|
#include "xfs_dir2_priv.h"
|
|
#include "xfs_ag.h"
|
|
#include "scrub/scrub.h"
|
|
#include "scrub/common.h"
|
|
#include "scrub/repair.h"
|
|
#include "scrub/xfile.h"
|
|
#include "scrub/xfarray.h"
|
|
#include "scrub/iscan.h"
|
|
#include "scrub/nlinks.h"
|
|
#include "scrub/trace.h"
|
|
#include "scrub/readdir.h"
|
|
|
|
/*
|
|
* Live Inode Link Count Checking
|
|
* ==============================
|
|
*
|
|
* Inode link counts are "summary" metadata, in the sense that they are
|
|
* computed as the number of directory entries referencing each file on the
|
|
* filesystem. Therefore, we compute the correct link counts by creating a
|
|
* shadow link count structure and walking every inode.
|
|
*/
|
|
|
|
/* Set us up to scrub inode link counts. */
|
|
int
|
|
xchk_setup_nlinks(
|
|
struct xfs_scrub *sc)
|
|
{
|
|
xchk_fsgates_enable(sc, XCHK_FSGATES_DIRENTS);
|
|
|
|
sc->buf = kzalloc(sizeof(struct xchk_nlink_ctrs), XCHK_GFP_FLAGS);
|
|
if (!sc->buf)
|
|
return -ENOMEM;
|
|
|
|
return xchk_setup_fs(sc);
|
|
}
|
|
|
|
/*
|
|
* Part 1: Collecting file link counts. For each file, we create a shadow link
|
|
* counting structure, then walk the entire directory tree, incrementing parent
|
|
* and child link counts for each directory entry seen.
|
|
*
|
|
* To avoid false corruption reports in part 2, any failure in this part must
|
|
* set the INCOMPLETE flag even when a negative errno is returned. This care
|
|
* must be taken with certain errno values (i.e. EFSBADCRC, EFSCORRUPTED,
|
|
* ECANCELED) that are absorbed into a scrub state flag update by
|
|
* xchk_*_process_error. Scrub and repair share the same incore data
|
|
* structures, so the INCOMPLETE flag is critical to prevent a repair based on
|
|
* insufficient information.
|
|
*
|
|
* Because we are scanning a live filesystem, it's possible that another thread
|
|
* will try to update the link counts for an inode that we've already scanned.
|
|
* This will cause our counts to be incorrect. Therefore, we hook all
|
|
* directory entry updates because that is when link count updates occur. By
|
|
* shadowing transaction updates in this manner, live nlink check can ensure by
|
|
* locking the inode and the shadow structure that its own copies are not out
|
|
* of date. Because the hook code runs in a different process context from the
|
|
* scrub code and the scrub state flags are not accessed atomically, failures
|
|
* in the hook code must abort the iscan and the scrubber must notice the
|
|
* aborted scan and set the incomplete flag.
|
|
*
|
|
* Note that we use jump labels and srcu notifier hooks to minimize the
|
|
* overhead when live nlinks is /not/ running. Locking order for nlink
|
|
* observations is inode ILOCK -> iscan_lock/xchk_nlink_ctrs lock.
|
|
*/
|
|
|
|
/*
|
|
* Add a delta to an nlink counter, clamping the value to U32_MAX. Because
|
|
* XFS_MAXLINK < U32_MAX, the checking code will produce the correct results
|
|
* even if we lose some precision.
|
|
*/
|
|
static inline void
|
|
careful_add(
|
|
xfs_nlink_t *nlinkp,
|
|
int delta)
|
|
{
|
|
uint64_t new_value = (uint64_t)(*nlinkp) + delta;
|
|
|
|
BUILD_BUG_ON(XFS_MAXLINK > U32_MAX);
|
|
*nlinkp = min_t(uint64_t, new_value, U32_MAX);
|
|
}
|
|
|
|
/* Update incore link count information. Caller must hold the nlinks lock. */
|
|
STATIC int
|
|
xchk_nlinks_update_incore(
|
|
struct xchk_nlink_ctrs *xnc,
|
|
xfs_ino_t ino,
|
|
int parents_delta,
|
|
int backrefs_delta,
|
|
int children_delta)
|
|
{
|
|
struct xchk_nlink nl;
|
|
int error;
|
|
|
|
if (!xnc->nlinks)
|
|
return 0;
|
|
|
|
error = xfarray_load_sparse(xnc->nlinks, ino, &nl);
|
|
if (error)
|
|
return error;
|
|
|
|
trace_xchk_nlinks_update_incore(xnc->sc->mp, ino, &nl, parents_delta,
|
|
backrefs_delta, children_delta);
|
|
|
|
careful_add(&nl.parents, parents_delta);
|
|
careful_add(&nl.backrefs, backrefs_delta);
|
|
careful_add(&nl.children, children_delta);
|
|
|
|
nl.flags |= XCHK_NLINK_WRITTEN;
|
|
error = xfarray_store(xnc->nlinks, ino, &nl);
|
|
if (error == -EFBIG) {
|
|
/*
|
|
* EFBIG means we tried to store data at too high a byte offset
|
|
* in the sparse array. IOWs, we cannot complete the check and
|
|
* must notify userspace that the check was incomplete.
|
|
*/
|
|
error = -ECANCELED;
|
|
}
|
|
return error;
|
|
}
|
|
|
|
/*
|
|
* Apply a link count change from the regular filesystem into our shadow link
|
|
* count structure based on a directory update in progress.
|
|
*/
|
|
STATIC int
|
|
xchk_nlinks_live_update(
|
|
struct notifier_block *nb,
|
|
unsigned long action,
|
|
void *data)
|
|
{
|
|
struct xfs_dir_update_params *p = data;
|
|
struct xchk_nlink_ctrs *xnc;
|
|
int error;
|
|
|
|
xnc = container_of(nb, struct xchk_nlink_ctrs, dhook.dirent_hook.nb);
|
|
|
|
trace_xchk_nlinks_live_update(xnc->sc->mp, p->dp, action, p->ip->i_ino,
|
|
p->delta, p->name->name, p->name->len);
|
|
|
|
/*
|
|
* If we've already scanned @dp, update the number of parents that link
|
|
* to @ip. If @ip is a subdirectory, update the number of child links
|
|
* going out of @dp.
|
|
*/
|
|
if (xchk_iscan_want_live_update(&xnc->collect_iscan, p->dp->i_ino)) {
|
|
mutex_lock(&xnc->lock);
|
|
error = xchk_nlinks_update_incore(xnc, p->ip->i_ino, p->delta,
|
|
0, 0);
|
|
if (!error && S_ISDIR(VFS_IC(p->ip)->i_mode))
|
|
error = xchk_nlinks_update_incore(xnc, p->dp->i_ino, 0,
|
|
0, p->delta);
|
|
mutex_unlock(&xnc->lock);
|
|
if (error)
|
|
goto out_abort;
|
|
}
|
|
|
|
/*
|
|
* If @ip is a subdirectory and we've already scanned it, update the
|
|
* number of backrefs pointing to @dp.
|
|
*/
|
|
if (S_ISDIR(VFS_IC(p->ip)->i_mode) &&
|
|
xchk_iscan_want_live_update(&xnc->collect_iscan, p->ip->i_ino)) {
|
|
mutex_lock(&xnc->lock);
|
|
error = xchk_nlinks_update_incore(xnc, p->dp->i_ino, 0,
|
|
p->delta, 0);
|
|
mutex_unlock(&xnc->lock);
|
|
if (error)
|
|
goto out_abort;
|
|
}
|
|
|
|
return NOTIFY_DONE;
|
|
|
|
out_abort:
|
|
xchk_iscan_abort(&xnc->collect_iscan);
|
|
return NOTIFY_DONE;
|
|
}
|
|
|
|
/* Bump the observed link count for the inode referenced by this entry. */
|
|
STATIC int
|
|
xchk_nlinks_collect_dirent(
|
|
struct xfs_scrub *sc,
|
|
struct xfs_inode *dp,
|
|
xfs_dir2_dataptr_t dapos,
|
|
const struct xfs_name *name,
|
|
xfs_ino_t ino,
|
|
void *priv)
|
|
{
|
|
struct xchk_nlink_ctrs *xnc = priv;
|
|
bool dot = false, dotdot = false;
|
|
int error;
|
|
|
|
/* Does this name make sense? */
|
|
if (name->len == 0 || !xfs_dir2_namecheck(name->name, name->len)) {
|
|
error = -ECANCELED;
|
|
goto out_abort;
|
|
}
|
|
|
|
if (name->len == 1 && name->name[0] == '.')
|
|
dot = true;
|
|
else if (name->len == 2 && name->name[0] == '.' &&
|
|
name->name[1] == '.')
|
|
dotdot = true;
|
|
|
|
/* Don't accept a '.' entry that points somewhere else. */
|
|
if (dot && ino != dp->i_ino) {
|
|
error = -ECANCELED;
|
|
goto out_abort;
|
|
}
|
|
|
|
/* Don't accept an invalid inode number. */
|
|
if (!xfs_verify_dir_ino(sc->mp, ino)) {
|
|
error = -ECANCELED;
|
|
goto out_abort;
|
|
}
|
|
|
|
/* Update the shadow link counts if we haven't already failed. */
|
|
|
|
if (xchk_iscan_aborted(&xnc->collect_iscan)) {
|
|
error = -ECANCELED;
|
|
goto out_incomplete;
|
|
}
|
|
|
|
trace_xchk_nlinks_collect_dirent(sc->mp, dp, ino, name);
|
|
|
|
mutex_lock(&xnc->lock);
|
|
|
|
/*
|
|
* If this is a dotdot entry, it is a back link from dp to ino. How
|
|
* we handle this depends on whether or not dp is the root directory.
|
|
*
|
|
* The root directory is its own parent, so we pretend the dotdot entry
|
|
* establishes the "parent" of the root directory. Increment the
|
|
* number of parents of the root directory.
|
|
*
|
|
* Otherwise, increment the number of backrefs pointing back to ino.
|
|
*/
|
|
if (dotdot) {
|
|
if (dp == sc->mp->m_rootip)
|
|
error = xchk_nlinks_update_incore(xnc, ino, 1, 0, 0);
|
|
else
|
|
error = xchk_nlinks_update_incore(xnc, ino, 0, 1, 0);
|
|
if (error)
|
|
goto out_unlock;
|
|
}
|
|
|
|
/*
|
|
* If this dirent is a forward link from dp to ino, increment the
|
|
* number of parents linking into ino.
|
|
*/
|
|
if (!dot && !dotdot) {
|
|
error = xchk_nlinks_update_incore(xnc, ino, 1, 0, 0);
|
|
if (error)
|
|
goto out_unlock;
|
|
}
|
|
|
|
/*
|
|
* If this dirent is a forward link to a subdirectory, increment the
|
|
* number of child links of dp.
|
|
*/
|
|
if (!dot && !dotdot && name->type == XFS_DIR3_FT_DIR) {
|
|
error = xchk_nlinks_update_incore(xnc, dp->i_ino, 0, 0, 1);
|
|
if (error)
|
|
goto out_unlock;
|
|
}
|
|
|
|
mutex_unlock(&xnc->lock);
|
|
return 0;
|
|
|
|
out_unlock:
|
|
mutex_unlock(&xnc->lock);
|
|
out_abort:
|
|
xchk_iscan_abort(&xnc->collect_iscan);
|
|
out_incomplete:
|
|
xchk_set_incomplete(sc);
|
|
return error;
|
|
}
|
|
|
|
/* Walk a directory to bump the observed link counts of the children. */
|
|
STATIC int
|
|
xchk_nlinks_collect_dir(
|
|
struct xchk_nlink_ctrs *xnc,
|
|
struct xfs_inode *dp)
|
|
{
|
|
struct xfs_scrub *sc = xnc->sc;
|
|
unsigned int lock_mode;
|
|
int error = 0;
|
|
|
|
/* Prevent anyone from changing this directory while we walk it. */
|
|
xfs_ilock(dp, XFS_IOLOCK_SHARED);
|
|
lock_mode = xfs_ilock_data_map_shared(dp);
|
|
|
|
/*
|
|
* The dotdot entry of an unlinked directory still points to the last
|
|
* parent, but the parent no longer links to this directory. Skip the
|
|
* directory to avoid overcounting.
|
|
*/
|
|
if (VFS_I(dp)->i_nlink == 0)
|
|
goto out_unlock;
|
|
|
|
/*
|
|
* We cannot count file links if the directory looks as though it has
|
|
* been zapped by the inode record repair code.
|
|
*/
|
|
if (xchk_dir_looks_zapped(dp)) {
|
|
error = -EBUSY;
|
|
goto out_abort;
|
|
}
|
|
|
|
error = xchk_dir_walk(sc, dp, xchk_nlinks_collect_dirent, xnc);
|
|
if (error == -ECANCELED) {
|
|
error = 0;
|
|
goto out_unlock;
|
|
}
|
|
if (error)
|
|
goto out_abort;
|
|
|
|
xchk_iscan_mark_visited(&xnc->collect_iscan, dp);
|
|
goto out_unlock;
|
|
|
|
out_abort:
|
|
xchk_set_incomplete(sc);
|
|
xchk_iscan_abort(&xnc->collect_iscan);
|
|
out_unlock:
|
|
xfs_iunlock(dp, lock_mode);
|
|
xfs_iunlock(dp, XFS_IOLOCK_SHARED);
|
|
return error;
|
|
}
|
|
|
|
/* If this looks like a valid pointer, count it. */
|
|
static inline int
|
|
xchk_nlinks_collect_metafile(
|
|
struct xchk_nlink_ctrs *xnc,
|
|
xfs_ino_t ino)
|
|
{
|
|
if (!xfs_verify_ino(xnc->sc->mp, ino))
|
|
return 0;
|
|
|
|
trace_xchk_nlinks_collect_metafile(xnc->sc->mp, ino);
|
|
return xchk_nlinks_update_incore(xnc, ino, 1, 0, 0);
|
|
}
|
|
|
|
/* Bump the link counts of metadata files rooted in the superblock. */
|
|
STATIC int
|
|
xchk_nlinks_collect_metafiles(
|
|
struct xchk_nlink_ctrs *xnc)
|
|
{
|
|
struct xfs_mount *mp = xnc->sc->mp;
|
|
int error = -ECANCELED;
|
|
|
|
|
|
if (xchk_iscan_aborted(&xnc->collect_iscan))
|
|
goto out_incomplete;
|
|
|
|
mutex_lock(&xnc->lock);
|
|
error = xchk_nlinks_collect_metafile(xnc, mp->m_sb.sb_rbmino);
|
|
if (error)
|
|
goto out_abort;
|
|
|
|
error = xchk_nlinks_collect_metafile(xnc, mp->m_sb.sb_rsumino);
|
|
if (error)
|
|
goto out_abort;
|
|
|
|
error = xchk_nlinks_collect_metafile(xnc, mp->m_sb.sb_uquotino);
|
|
if (error)
|
|
goto out_abort;
|
|
|
|
error = xchk_nlinks_collect_metafile(xnc, mp->m_sb.sb_gquotino);
|
|
if (error)
|
|
goto out_abort;
|
|
|
|
error = xchk_nlinks_collect_metafile(xnc, mp->m_sb.sb_pquotino);
|
|
if (error)
|
|
goto out_abort;
|
|
mutex_unlock(&xnc->lock);
|
|
|
|
return 0;
|
|
|
|
out_abort:
|
|
mutex_unlock(&xnc->lock);
|
|
xchk_iscan_abort(&xnc->collect_iscan);
|
|
out_incomplete:
|
|
xchk_set_incomplete(xnc->sc);
|
|
return error;
|
|
}
|
|
|
|
/* Advance the collection scan cursor for this non-directory file. */
|
|
static inline int
|
|
xchk_nlinks_collect_file(
|
|
struct xchk_nlink_ctrs *xnc,
|
|
struct xfs_inode *ip)
|
|
{
|
|
xfs_ilock(ip, XFS_IOLOCK_SHARED);
|
|
xchk_iscan_mark_visited(&xnc->collect_iscan, ip);
|
|
xfs_iunlock(ip, XFS_IOLOCK_SHARED);
|
|
return 0;
|
|
}
|
|
|
|
/* Walk all directories and count inode links. */
|
|
STATIC int
|
|
xchk_nlinks_collect(
|
|
struct xchk_nlink_ctrs *xnc)
|
|
{
|
|
struct xfs_scrub *sc = xnc->sc;
|
|
struct xfs_inode *ip;
|
|
int error;
|
|
|
|
/* Count the rt and quota files that are rooted in the superblock. */
|
|
error = xchk_nlinks_collect_metafiles(xnc);
|
|
if (error)
|
|
return error;
|
|
|
|
/*
|
|
* Set up for a potentially lengthy filesystem scan by reducing our
|
|
* transaction resource usage for the duration. Specifically:
|
|
*
|
|
* Cancel the transaction to release the log grant space while we scan
|
|
* the filesystem.
|
|
*
|
|
* Create a new empty transaction to eliminate the possibility of the
|
|
* inode scan deadlocking on cyclical metadata.
|
|
*
|
|
* We pass the empty transaction to the file scanning function to avoid
|
|
* repeatedly cycling empty transactions. This can be done even though
|
|
* we take the IOLOCK to quiesce the file because empty transactions
|
|
* do not take sb_internal.
|
|
*/
|
|
xchk_trans_cancel(sc);
|
|
error = xchk_trans_alloc_empty(sc);
|
|
if (error)
|
|
return error;
|
|
|
|
while ((error = xchk_iscan_iter(&xnc->collect_iscan, &ip)) == 1) {
|
|
if (S_ISDIR(VFS_I(ip)->i_mode))
|
|
error = xchk_nlinks_collect_dir(xnc, ip);
|
|
else
|
|
error = xchk_nlinks_collect_file(xnc, ip);
|
|
xchk_irele(sc, ip);
|
|
if (error)
|
|
break;
|
|
|
|
if (xchk_should_terminate(sc, &error))
|
|
break;
|
|
}
|
|
xchk_iscan_iter_finish(&xnc->collect_iscan);
|
|
if (error) {
|
|
xchk_set_incomplete(sc);
|
|
/*
|
|
* If we couldn't grab an inode that was busy with a state
|
|
* change, change the error code so that we exit to userspace
|
|
* as quickly as possible.
|
|
*/
|
|
if (error == -EBUSY)
|
|
return -ECANCELED;
|
|
return error;
|
|
}
|
|
|
|
/*
|
|
* Switch out for a real transaction in preparation for building a new
|
|
* tree.
|
|
*/
|
|
xchk_trans_cancel(sc);
|
|
return xchk_setup_fs(sc);
|
|
}
|
|
|
|
/*
|
|
* Part 2: Comparing file link counters. Walk each inode and compare the link
|
|
* counts against our shadow information; and then walk each shadow link count
|
|
* structure (that wasn't covered in the first part), comparing it against the
|
|
* file.
|
|
*/
|
|
|
|
/* Read the observed link count for comparison with the actual inode. */
|
|
STATIC int
|
|
xchk_nlinks_comparison_read(
|
|
struct xchk_nlink_ctrs *xnc,
|
|
xfs_ino_t ino,
|
|
struct xchk_nlink *obs)
|
|
{
|
|
struct xchk_nlink nl;
|
|
int error;
|
|
|
|
error = xfarray_load_sparse(xnc->nlinks, ino, &nl);
|
|
if (error)
|
|
return error;
|
|
|
|
nl.flags |= (XCHK_NLINK_COMPARE_SCANNED | XCHK_NLINK_WRITTEN);
|
|
|
|
error = xfarray_store(xnc->nlinks, ino, &nl);
|
|
if (error == -EFBIG) {
|
|
/*
|
|
* EFBIG means we tried to store data at too high a byte offset
|
|
* in the sparse array. IOWs, we cannot complete the check and
|
|
* must notify userspace that the check was incomplete. This
|
|
* shouldn't really happen outside of the collection phase.
|
|
*/
|
|
xchk_set_incomplete(xnc->sc);
|
|
return -ECANCELED;
|
|
}
|
|
if (error)
|
|
return error;
|
|
|
|
/* Copy the counters, but do not expose the internal state. */
|
|
obs->parents = nl.parents;
|
|
obs->backrefs = nl.backrefs;
|
|
obs->children = nl.children;
|
|
obs->flags = 0;
|
|
return 0;
|
|
}
|
|
|
|
/* Check our link count against an inode. */
|
|
STATIC int
|
|
xchk_nlinks_compare_inode(
|
|
struct xchk_nlink_ctrs *xnc,
|
|
struct xfs_inode *ip)
|
|
{
|
|
struct xchk_nlink obs;
|
|
struct xfs_scrub *sc = xnc->sc;
|
|
uint64_t total_links;
|
|
unsigned int actual_nlink;
|
|
int error;
|
|
|
|
xfs_ilock(ip, XFS_ILOCK_SHARED);
|
|
mutex_lock(&xnc->lock);
|
|
|
|
if (xchk_iscan_aborted(&xnc->collect_iscan)) {
|
|
xchk_set_incomplete(xnc->sc);
|
|
error = -ECANCELED;
|
|
goto out_scanlock;
|
|
}
|
|
|
|
error = xchk_nlinks_comparison_read(xnc, ip->i_ino, &obs);
|
|
if (error)
|
|
goto out_scanlock;
|
|
|
|
/*
|
|
* If we don't have ftype to get an accurate count of the subdirectory
|
|
* entries in this directory, take advantage of the fact that on a
|
|
* consistent ftype=0 filesystem, the number of subdirectory
|
|
* backreferences (dotdot entries) pointing towards this directory
|
|
* should be equal to the number of subdirectory entries in the
|
|
* directory.
|
|
*/
|
|
if (!xfs_has_ftype(sc->mp) && S_ISDIR(VFS_I(ip)->i_mode))
|
|
obs.children = obs.backrefs;
|
|
|
|
total_links = xchk_nlink_total(ip, &obs);
|
|
actual_nlink = VFS_I(ip)->i_nlink;
|
|
|
|
trace_xchk_nlinks_compare_inode(sc->mp, ip, &obs);
|
|
|
|
/*
|
|
* If we found so many parents that we'd overflow i_nlink, we must flag
|
|
* this as a corruption. The VFS won't let users increase the link
|
|
* count, but it will let them decrease it.
|
|
*/
|
|
if (total_links > XFS_MAXLINK) {
|
|
xchk_ino_set_corrupt(sc, ip->i_ino);
|
|
goto out_corrupt;
|
|
}
|
|
|
|
/* Link counts should match. */
|
|
if (total_links != actual_nlink) {
|
|
xchk_ino_set_corrupt(sc, ip->i_ino);
|
|
goto out_corrupt;
|
|
}
|
|
|
|
if (S_ISDIR(VFS_I(ip)->i_mode) && actual_nlink > 0) {
|
|
/*
|
|
* The collection phase ignores directories with zero link
|
|
* count, so we ignore them here too.
|
|
*
|
|
* The number of subdirectory backreferences (dotdot entries)
|
|
* pointing towards this directory should be equal to the
|
|
* number of subdirectory entries in the directory.
|
|
*/
|
|
if (obs.children != obs.backrefs)
|
|
xchk_ino_xref_set_corrupt(sc, ip->i_ino);
|
|
} else {
|
|
/*
|
|
* Non-directories and unlinked directories should not have
|
|
* back references.
|
|
*/
|
|
if (obs.backrefs != 0) {
|
|
xchk_ino_set_corrupt(sc, ip->i_ino);
|
|
goto out_corrupt;
|
|
}
|
|
|
|
/*
|
|
* Non-directories and unlinked directories should not have
|
|
* children.
|
|
*/
|
|
if (obs.children != 0) {
|
|
xchk_ino_set_corrupt(sc, ip->i_ino);
|
|
goto out_corrupt;
|
|
}
|
|
}
|
|
|
|
if (ip == sc->mp->m_rootip) {
|
|
/*
|
|
* For the root of a directory tree, both the '.' and '..'
|
|
* entries should point to the root directory. The dotdot
|
|
* entry is counted as a parent of the root /and/ a backref of
|
|
* the root directory.
|
|
*/
|
|
if (obs.parents != 1) {
|
|
xchk_ino_set_corrupt(sc, ip->i_ino);
|
|
goto out_corrupt;
|
|
}
|
|
} else if (actual_nlink > 0) {
|
|
/*
|
|
* Linked files that are not the root directory should have at
|
|
* least one parent.
|
|
*/
|
|
if (obs.parents == 0) {
|
|
xchk_ino_set_corrupt(sc, ip->i_ino);
|
|
goto out_corrupt;
|
|
}
|
|
}
|
|
|
|
out_corrupt:
|
|
if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
|
|
error = -ECANCELED;
|
|
out_scanlock:
|
|
mutex_unlock(&xnc->lock);
|
|
xfs_iunlock(ip, XFS_ILOCK_SHARED);
|
|
return error;
|
|
}
|
|
|
|
/*
|
|
* Check our link count against an inode that wasn't checked previously. This
|
|
* is intended to catch directories with dangling links, though we could be
|
|
* racing with inode allocation in other threads.
|
|
*/
|
|
STATIC int
|
|
xchk_nlinks_compare_inum(
|
|
struct xchk_nlink_ctrs *xnc,
|
|
xfs_ino_t ino)
|
|
{
|
|
struct xchk_nlink obs;
|
|
struct xfs_mount *mp = xnc->sc->mp;
|
|
struct xfs_trans *tp = xnc->sc->tp;
|
|
struct xfs_buf *agi_bp;
|
|
struct xfs_inode *ip;
|
|
int error;
|
|
|
|
/*
|
|
* The first iget failed, so try again with the variant that returns
|
|
* either an incore inode or the AGI buffer. If the function returns
|
|
* EINVAL/ENOENT, it should have passed us the AGI buffer so that we
|
|
* can guarantee that the inode won't be allocated while we check for
|
|
* a zero link count in the observed link count data.
|
|
*/
|
|
error = xchk_iget_agi(xnc->sc, ino, &agi_bp, &ip);
|
|
if (!error) {
|
|
/* Actually got an inode, so use the inode compare. */
|
|
error = xchk_nlinks_compare_inode(xnc, ip);
|
|
xchk_irele(xnc->sc, ip);
|
|
return error;
|
|
}
|
|
if (error == -ENOENT || error == -EINVAL) {
|
|
/* No inode was found. Check for zero link count below. */
|
|
error = 0;
|
|
}
|
|
if (error)
|
|
goto out_agi;
|
|
|
|
/* Ensure that we have protected against inode allocation/freeing. */
|
|
if (agi_bp == NULL) {
|
|
ASSERT(agi_bp != NULL);
|
|
xchk_set_incomplete(xnc->sc);
|
|
return -ECANCELED;
|
|
}
|
|
|
|
if (xchk_iscan_aborted(&xnc->collect_iscan)) {
|
|
xchk_set_incomplete(xnc->sc);
|
|
error = -ECANCELED;
|
|
goto out_agi;
|
|
}
|
|
|
|
mutex_lock(&xnc->lock);
|
|
error = xchk_nlinks_comparison_read(xnc, ino, &obs);
|
|
if (error)
|
|
goto out_scanlock;
|
|
|
|
trace_xchk_nlinks_check_zero(mp, ino, &obs);
|
|
|
|
/*
|
|
* If we can't grab the inode, the link count had better be zero. We
|
|
* still hold the AGI to prevent inode allocation/freeing.
|
|
*/
|
|
if (xchk_nlink_total(NULL, &obs) != 0) {
|
|
xchk_ino_set_corrupt(xnc->sc, ino);
|
|
error = -ECANCELED;
|
|
}
|
|
|
|
out_scanlock:
|
|
mutex_unlock(&xnc->lock);
|
|
out_agi:
|
|
if (agi_bp)
|
|
xfs_trans_brelse(tp, agi_bp);
|
|
return error;
|
|
}
|
|
|
|
/*
|
|
* Try to visit every inode in the filesystem to compare the link count. Move
|
|
* on if we can't grab an inode, since we'll revisit unchecked nlink records in
|
|
* the second part.
|
|
*/
|
|
static int
|
|
xchk_nlinks_compare_iter(
|
|
struct xchk_nlink_ctrs *xnc,
|
|
struct xfs_inode **ipp)
|
|
{
|
|
int error;
|
|
|
|
do {
|
|
error = xchk_iscan_iter(&xnc->compare_iscan, ipp);
|
|
} while (error == -EBUSY);
|
|
|
|
return error;
|
|
}
|
|
|
|
/* Compare the link counts we observed against the live information. */
|
|
STATIC int
|
|
xchk_nlinks_compare(
|
|
struct xchk_nlink_ctrs *xnc)
|
|
{
|
|
struct xchk_nlink nl;
|
|
struct xfs_scrub *sc = xnc->sc;
|
|
struct xfs_inode *ip;
|
|
xfarray_idx_t cur = XFARRAY_CURSOR_INIT;
|
|
int error;
|
|
|
|
if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
|
|
return 0;
|
|
|
|
/*
|
|
* Create a new empty transaction so that we can advance the iscan
|
|
* cursor without deadlocking if the inobt has a cycle and push on the
|
|
* inactivation workqueue.
|
|
*/
|
|
xchk_trans_cancel(sc);
|
|
error = xchk_trans_alloc_empty(sc);
|
|
if (error)
|
|
return error;
|
|
|
|
/*
|
|
* Use the inobt to walk all allocated inodes to compare the link
|
|
* counts. Inodes skipped by _compare_iter will be tried again in the
|
|
* next phase of the scan.
|
|
*/
|
|
xchk_iscan_start(sc, 0, 0, &xnc->compare_iscan);
|
|
while ((error = xchk_nlinks_compare_iter(xnc, &ip)) == 1) {
|
|
error = xchk_nlinks_compare_inode(xnc, ip);
|
|
xchk_iscan_mark_visited(&xnc->compare_iscan, ip);
|
|
xchk_irele(sc, ip);
|
|
if (error)
|
|
break;
|
|
|
|
if (xchk_should_terminate(sc, &error))
|
|
break;
|
|
}
|
|
xchk_iscan_iter_finish(&xnc->compare_iscan);
|
|
xchk_iscan_teardown(&xnc->compare_iscan);
|
|
if (error)
|
|
return error;
|
|
|
|
if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
|
|
return 0;
|
|
|
|
/*
|
|
* Walk all the non-null nlink observations that weren't checked in the
|
|
* previous step.
|
|
*/
|
|
mutex_lock(&xnc->lock);
|
|
while ((error = xfarray_iter(xnc->nlinks, &cur, &nl)) == 1) {
|
|
xfs_ino_t ino = cur - 1;
|
|
|
|
if (nl.flags & XCHK_NLINK_COMPARE_SCANNED)
|
|
continue;
|
|
|
|
mutex_unlock(&xnc->lock);
|
|
|
|
error = xchk_nlinks_compare_inum(xnc, ino);
|
|
if (error)
|
|
return error;
|
|
|
|
if (xchk_should_terminate(xnc->sc, &error))
|
|
return error;
|
|
|
|
mutex_lock(&xnc->lock);
|
|
}
|
|
mutex_unlock(&xnc->lock);
|
|
|
|
return error;
|
|
}
|
|
|
|
/* Tear down everything associated with a nlinks check. */
|
|
static void
|
|
xchk_nlinks_teardown_scan(
|
|
void *priv)
|
|
{
|
|
struct xchk_nlink_ctrs *xnc = priv;
|
|
|
|
/* Discourage any hook functions that might be running. */
|
|
xchk_iscan_abort(&xnc->collect_iscan);
|
|
|
|
xfs_dir_hook_del(xnc->sc->mp, &xnc->dhook);
|
|
|
|
xfarray_destroy(xnc->nlinks);
|
|
xnc->nlinks = NULL;
|
|
|
|
xchk_iscan_teardown(&xnc->collect_iscan);
|
|
mutex_destroy(&xnc->lock);
|
|
xnc->sc = NULL;
|
|
}
|
|
|
|
/*
|
|
* Scan all inodes in the entire filesystem to generate link count data. If
|
|
* the scan is successful, the counts will be left alive for a repair. If any
|
|
* error occurs, we'll tear everything down.
|
|
*/
|
|
STATIC int
|
|
xchk_nlinks_setup_scan(
|
|
struct xfs_scrub *sc,
|
|
struct xchk_nlink_ctrs *xnc)
|
|
{
|
|
struct xfs_mount *mp = sc->mp;
|
|
char *descr;
|
|
unsigned long long max_inos;
|
|
xfs_agnumber_t last_agno = mp->m_sb.sb_agcount - 1;
|
|
xfs_agino_t first_agino, last_agino;
|
|
int error;
|
|
|
|
ASSERT(xnc->sc == NULL);
|
|
xnc->sc = sc;
|
|
|
|
mutex_init(&xnc->lock);
|
|
|
|
/* Retry iget every tenth of a second for up to 30 seconds. */
|
|
xchk_iscan_start(sc, 30000, 100, &xnc->collect_iscan);
|
|
|
|
/*
|
|
* Set up enough space to store an nlink record for the highest
|
|
* possible inode number in this system.
|
|
*/
|
|
xfs_agino_range(mp, last_agno, &first_agino, &last_agino);
|
|
max_inos = XFS_AGINO_TO_INO(mp, last_agno, last_agino) + 1;
|
|
descr = xchk_xfile_descr(sc, "file link counts");
|
|
error = xfarray_create(descr, min(XFS_MAXINUMBER + 1, max_inos),
|
|
sizeof(struct xchk_nlink), &xnc->nlinks);
|
|
kfree(descr);
|
|
if (error)
|
|
goto out_teardown;
|
|
|
|
/*
|
|
* Hook into the directory entry code so that we can capture updates to
|
|
* file link counts. The hook only triggers for inodes that were
|
|
* already scanned, and the scanner thread takes each inode's ILOCK,
|
|
* which means that any in-progress inode updates will finish before we
|
|
* can scan the inode.
|
|
*/
|
|
ASSERT(sc->flags & XCHK_FSGATES_DIRENTS);
|
|
xfs_dir_hook_setup(&xnc->dhook, xchk_nlinks_live_update);
|
|
error = xfs_dir_hook_add(mp, &xnc->dhook);
|
|
if (error)
|
|
goto out_teardown;
|
|
|
|
/* Use deferred cleanup to pass the inode link count data to repair. */
|
|
sc->buf_cleanup = xchk_nlinks_teardown_scan;
|
|
return 0;
|
|
|
|
out_teardown:
|
|
xchk_nlinks_teardown_scan(xnc);
|
|
return error;
|
|
}
|
|
|
|
/* Scrub the link count of all inodes on the filesystem. */
|
|
int
|
|
xchk_nlinks(
|
|
struct xfs_scrub *sc)
|
|
{
|
|
struct xchk_nlink_ctrs *xnc = sc->buf;
|
|
int error = 0;
|
|
|
|
/* Set ourselves up to check link counts on the live filesystem. */
|
|
error = xchk_nlinks_setup_scan(sc, xnc);
|
|
if (error)
|
|
return error;
|
|
|
|
/* Walk all inodes, picking up link count information. */
|
|
error = xchk_nlinks_collect(xnc);
|
|
if (!xchk_xref_process_error(sc, 0, 0, &error))
|
|
return error;
|
|
|
|
/* Fail fast if we're not playing with a full dataset. */
|
|
if (xchk_iscan_aborted(&xnc->collect_iscan))
|
|
xchk_set_incomplete(sc);
|
|
if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_INCOMPLETE)
|
|
return 0;
|
|
|
|
/* Compare link counts. */
|
|
error = xchk_nlinks_compare(xnc);
|
|
if (!xchk_xref_process_error(sc, 0, 0, &error))
|
|
return error;
|
|
|
|
/* Check one last time for an incomplete dataset. */
|
|
if (xchk_iscan_aborted(&xnc->collect_iscan))
|
|
xchk_set_incomplete(sc);
|
|
|
|
return 0;
|
|
}
|