diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile index 5e3ac7ec8fa5..7dbe6b3befb3 100644 --- a/fs/xfs/Makefile +++ b/fs/xfs/Makefile @@ -165,6 +165,7 @@ xfs-y += $(addprefix scrub/, \ ialloc.o \ inode.o \ iscan.o \ + listxattr.o \ nlinks.o \ parent.o \ readdir.o \ @@ -194,6 +195,7 @@ ifeq ($(CONFIG_XFS_ONLINE_REPAIR),y) xfs-y += $(addprefix scrub/, \ agheader_repair.o \ alloc_repair.o \ + attr_repair.o \ bmap_repair.o \ cow_repair.o \ fscounters_repair.o \ @@ -208,6 +210,7 @@ xfs-y += $(addprefix scrub/, \ repair.o \ rmap_repair.o \ tempfile.o \ + xfblob.o \ ) xfs-$(CONFIG_XFS_RT) += $(addprefix scrub/, \ diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c index b3c9666cd011..05d22c5e3885 100644 --- a/fs/xfs/libxfs/xfs_attr.c +++ b/fs/xfs/libxfs/xfs_attr.c @@ -1055,7 +1055,7 @@ out_trans_cancel: * External routines when attribute list is inside the inode *========================================================================*/ -static inline int xfs_attr_sf_totsize(struct xfs_inode *dp) +int xfs_attr_sf_totsize(struct xfs_inode *dp) { struct xfs_attr_sf_hdr *sf = dp->i_af.if_data; diff --git a/fs/xfs/libxfs/xfs_attr.h b/fs/xfs/libxfs/xfs_attr.h index 81be9b3e4004..e4f55008552b 100644 --- a/fs/xfs/libxfs/xfs_attr.h +++ b/fs/xfs/libxfs/xfs_attr.h @@ -618,4 +618,6 @@ extern struct kmem_cache *xfs_attr_intent_cache; int __init xfs_attr_intent_init_cache(void); void xfs_attr_intent_destroy_cache(void); +int xfs_attr_sf_totsize(struct xfs_inode *dp); + #endif /* __XFS_ATTR_H__ */ diff --git a/fs/xfs/libxfs/xfs_da_format.h b/fs/xfs/libxfs/xfs_da_format.h index 060e5c96b70f..aac3fe039614 100644 --- a/fs/xfs/libxfs/xfs_da_format.h +++ b/fs/xfs/libxfs/xfs_da_format.h @@ -721,6 +721,11 @@ struct xfs_attr3_leafblock { #define XFS_ATTR_INCOMPLETE (1u << XFS_ATTR_INCOMPLETE_BIT) #define XFS_ATTR_NSP_ONDISK_MASK (XFS_ATTR_ROOT | XFS_ATTR_SECURE) +#define XFS_ATTR_NAMESPACE_STR \ + { XFS_ATTR_LOCAL, "local" }, \ + { XFS_ATTR_ROOT, "root" }, \ + { XFS_ATTR_SECURE, "secure" } + /* * Alignment for namelist and valuelist entries (since they are mixed * there can be only one alignment value) diff --git a/fs/xfs/libxfs/xfs_exchmaps.c b/fs/xfs/libxfs/xfs_exchmaps.c index 3880ae32eecf..44ab6a9235c0 100644 --- a/fs/xfs/libxfs/xfs_exchmaps.c +++ b/fs/xfs/libxfs/xfs_exchmaps.c @@ -675,7 +675,7 @@ xfs_exchmaps_rmapbt_blocks( } /* Estimate the bmbt and rmapbt overhead required to exchange mappings. */ -static int +int xfs_exchmaps_estimate_overhead( struct xfs_exchmaps_req *req) { diff --git a/fs/xfs/libxfs/xfs_exchmaps.h b/fs/xfs/libxfs/xfs_exchmaps.h index d8718fca606e..fa822dff202a 100644 --- a/fs/xfs/libxfs/xfs_exchmaps.h +++ b/fs/xfs/libxfs/xfs_exchmaps.h @@ -97,6 +97,7 @@ xfs_exchmaps_reqfork(const struct xfs_exchmaps_req *req) return XFS_DATA_FORK; } +int xfs_exchmaps_estimate_overhead(struct xfs_exchmaps_req *req); int xfs_exchmaps_estimate(struct xfs_exchmaps_req *req); extern struct kmem_cache *xfs_exchmaps_intent_cache; diff --git a/fs/xfs/scrub/attr.c b/fs/xfs/scrub/attr.c index 0c467f4f8e77..8853e4d0eee3 100644 --- a/fs/xfs/scrub/attr.c +++ b/fs/xfs/scrub/attr.c @@ -10,6 +10,7 @@ #include "xfs_trans_resv.h" #include "xfs_mount.h" #include "xfs_log_format.h" +#include "xfs_trans.h" #include "xfs_inode.h" #include "xfs_da_format.h" #include "xfs_da_btree.h" @@ -20,6 +21,8 @@ #include "scrub/common.h" #include "scrub/dabtree.h" #include "scrub/attr.h" +#include "scrub/listxattr.h" +#include "scrub/repair.h" /* Free the buffers linked from the xattr buffer. */ static void @@ -35,6 +38,8 @@ xchk_xattr_buf_cleanup( kvfree(ab->value); ab->value = NULL; ab->value_sz = 0; + kvfree(ab->name); + ab->name = NULL; } /* @@ -65,7 +70,7 @@ xchk_xattr_want_freemap( * reallocating the buffer if necessary. Buffer contents are not preserved * across a reallocation. */ -static int +int xchk_setup_xattr_buf( struct xfs_scrub *sc, size_t value_size) @@ -95,6 +100,12 @@ xchk_setup_xattr_buf( return -ENOMEM; } + if (xchk_could_repair(sc)) { + ab->name = kvmalloc(XATTR_NAME_MAX + 1, XCHK_GFP_FLAGS); + if (!ab->name) + return -ENOMEM; + } + resize_value: if (ab->value_sz >= value_size) return 0; @@ -121,6 +132,12 @@ xchk_setup_xattr( { int error; + if (xchk_could_repair(sc)) { + error = xrep_setup_xattr(sc); + if (error) + return error; + } + /* * We failed to get memory while checking attrs, so this time try to * get all the memory we're ever going to need. Allocate the buffer @@ -137,90 +154,81 @@ xchk_setup_xattr( /* Extended Attributes */ -struct xchk_xattr { - struct xfs_attr_list_context context; - struct xfs_scrub *sc; -}; - /* * Check that an extended attribute key can be looked up by hash. * - * We use the XFS attribute list iterator (i.e. xfs_attr_list_ilocked) - * to call this function for every attribute key in an inode. Once - * we're here, we load the attribute value to see if any errors happen, - * or if we get more or less data than we expected. + * We use the extended attribute walk helper to call this function for every + * attribute key in an inode. Once we're here, we load the attribute value to + * see if any errors happen, or if we get more or less data than we expected. */ -static void -xchk_xattr_listent( - struct xfs_attr_list_context *context, - int flags, - unsigned char *name, - int namelen, - int valuelen) +static int +xchk_xattr_actor( + struct xfs_scrub *sc, + struct xfs_inode *ip, + unsigned int attr_flags, + const unsigned char *name, + unsigned int namelen, + const void *value, + unsigned int valuelen, + void *priv) { struct xfs_da_args args = { .op_flags = XFS_DA_OP_NOTIME, - .attr_filter = flags & XFS_ATTR_NSP_ONDISK_MASK, - .geo = context->dp->i_mount->m_attr_geo, + .attr_filter = attr_flags & XFS_ATTR_NSP_ONDISK_MASK, + .geo = sc->mp->m_attr_geo, .whichfork = XFS_ATTR_FORK, - .dp = context->dp, + .dp = ip, .name = name, .namelen = namelen, .hashval = xfs_da_hashname(name, namelen), - .trans = context->tp, + .trans = sc->tp, .valuelen = valuelen, - .owner = context->dp->i_ino, + .owner = ip->i_ino, }; struct xchk_xattr_buf *ab; - struct xchk_xattr *sx; int error = 0; - sx = container_of(context, struct xchk_xattr, context); - ab = sx->sc->buf; + ab = sc->buf; - if (xchk_should_terminate(sx->sc, &error)) { - context->seen_enough = error; - return; - } + if (xchk_should_terminate(sc, &error)) + return error; - if (flags & XFS_ATTR_INCOMPLETE) { + if (attr_flags & XFS_ATTR_INCOMPLETE) { /* Incomplete attr key, just mark the inode for preening. */ - xchk_ino_set_preen(sx->sc, context->dp->i_ino); - return; + xchk_ino_set_preen(sc, ip->i_ino); + return 0; } /* Only one namespace bit allowed. */ - if (hweight32(flags & XFS_ATTR_NSP_ONDISK_MASK) > 1) { - xchk_fblock_set_corrupt(sx->sc, XFS_ATTR_FORK, args.blkno); - goto fail_xref; + if (hweight32(attr_flags & XFS_ATTR_NSP_ONDISK_MASK) > 1) { + xchk_fblock_set_corrupt(sc, XFS_ATTR_FORK, args.blkno); + return -ECANCELED; } /* Does this name make sense? */ if (!xfs_attr_namecheck(name, namelen)) { - xchk_fblock_set_corrupt(sx->sc, XFS_ATTR_FORK, args.blkno); - goto fail_xref; + xchk_fblock_set_corrupt(sc, XFS_ATTR_FORK, args.blkno); + return -ECANCELED; } /* - * Local xattr values are stored in the attr leaf block, so we don't - * need to retrieve the value from a remote block to detect corruption - * problems. + * Local and shortform xattr values are stored in the attr leaf block, + * so we don't need to retrieve the value from a remote block to detect + * corruption problems. */ - if (flags & XFS_ATTR_LOCAL) - goto fail_xref; + if (value) + return 0; /* - * Try to allocate enough memory to extrat the attr value. If that - * doesn't work, we overload the seen_enough variable to convey - * the error message back to the main scrub function. + * Try to allocate enough memory to extract the attr value. If that + * doesn't work, return -EDEADLOCK as a signal to try again with a + * maximally sized buffer. */ - error = xchk_setup_xattr_buf(sx->sc, valuelen); + error = xchk_setup_xattr_buf(sc, valuelen); if (error == -ENOMEM) error = -EDEADLOCK; - if (error) { - context->seen_enough = error; - return; - } + if (error) + return error; args.value = ab->value; @@ -228,16 +236,13 @@ xchk_xattr_listent( /* ENODATA means the hash lookup failed and the attr is bad */ if (error == -ENODATA) error = -EFSCORRUPTED; - if (!xchk_fblock_process_error(sx->sc, XFS_ATTR_FORK, args.blkno, + if (!xchk_fblock_process_error(sc, XFS_ATTR_FORK, args.blkno, &error)) - goto fail_xref; + return error; if (args.valuelen != valuelen) - xchk_fblock_set_corrupt(sx->sc, XFS_ATTR_FORK, - args.blkno); -fail_xref: - if (sx->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) - context->seen_enough = 1; - return; + xchk_fblock_set_corrupt(sc, XFS_ATTR_FORK, args.blkno); + + return 0; } /* @@ -247,7 +252,7 @@ fail_xref: * Within a char, the lowest bit of the char represents the byte with * the smallest address */ -STATIC bool +bool xchk_xattr_set_map( struct xfs_scrub *sc, unsigned long *map, @@ -404,6 +409,17 @@ xchk_xattr_block( xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &leafhdr, leaf); hdrsize = xfs_attr3_leaf_hdr_size(leaf); + /* + * Empty xattr leaf blocks mapped at block 0 are probably a byproduct + * of a race between setxattr and a log shutdown. Anywhere else in the + * attr fork is a corruption. + */ + if (leafhdr.count == 0) { + if (blk->blkno == 0) + xchk_da_set_preen(ds, level); + else + xchk_da_set_corrupt(ds, level); + } if (leafhdr.usedbytes > mp->m_attr_geo->blksize) xchk_da_set_corrupt(ds, level); if (leafhdr.firstused > mp->m_attr_geo->blksize) @@ -412,6 +428,8 @@ xchk_xattr_block( xchk_da_set_corrupt(ds, level); if (!xchk_xattr_set_map(ds->sc, ab->usedmap, 0, hdrsize)) xchk_da_set_corrupt(ds, level); + if (leafhdr.holes) + xchk_da_set_preen(ds, level); if (ds->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) goto out; @@ -589,16 +607,6 @@ int xchk_xattr( struct xfs_scrub *sc) { - struct xchk_xattr sx = { - .sc = sc, - .context = { - .dp = sc->ip, - .tp = sc->tp, - .resynch = 1, - .put_listent = xchk_xattr_listent, - .allow_incomplete = true, - }, - }; xfs_dablk_t last_checked = -1U; int error = 0; @@ -627,12 +635,6 @@ xchk_xattr( /* * Look up every xattr in this file by name and hash. * - * Use the backend implementation of xfs_attr_list to call - * xchk_xattr_listent on every attribute key in this inode. - * In other words, we use the same iterator/callback mechanism - * that listattr uses to scrub extended attributes, though in our - * _listent function, we check the value of the attribute. - * * The VFS only locks i_rwsem when modifying attrs, so keep all * three locks held because that's the only way to ensure we're * the only thread poking into the da btree. We traverse the da @@ -640,13 +642,9 @@ xchk_xattr( * iteration, which doesn't really follow the usual buffer * locking order. */ - error = xfs_attr_list_ilocked(&sx.context); + error = xchk_xattr_walk(sc, sc->ip, xchk_xattr_actor, NULL); if (!xchk_fblock_process_error(sc, XFS_ATTR_FORK, 0, &error)) return error; - /* Did our listent function try to return any errors? */ - if (sx.context.seen_enough < 0) - return sx.context.seen_enough; - return 0; } diff --git a/fs/xfs/scrub/attr.h b/fs/xfs/scrub/attr.h index 48fd9402c432..7db58af56646 100644 --- a/fs/xfs/scrub/attr.h +++ b/fs/xfs/scrub/attr.h @@ -16,9 +16,16 @@ struct xchk_xattr_buf { /* Bitmap of free space in xattr leaf blocks. */ unsigned long *freemap; + /* Memory buffer used to hold salvaged xattr names. */ + unsigned char *name; + /* Memory buffer used to extract xattr values. */ void *value; size_t value_sz; }; +bool xchk_xattr_set_map(struct xfs_scrub *sc, unsigned long *map, + unsigned int start, unsigned int len); +int xchk_setup_xattr_buf(struct xfs_scrub *sc, size_t value_size); + #endif /* __XFS_SCRUB_ATTR_H__ */ diff --git a/fs/xfs/scrub/attr_repair.c b/fs/xfs/scrub/attr_repair.c new file mode 100644 index 000000000000..7b4318764d03 --- /dev/null +++ b/fs/xfs/scrub/attr_repair.c @@ -0,0 +1,1207 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (c) 2018-2024 Oracle. All Rights Reserved. + * Author: Darrick J. Wong <djwong@kernel.org> + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "xfs_trans_resv.h" +#include "xfs_mount.h" +#include "xfs_defer.h" +#include "xfs_btree.h" +#include "xfs_bit.h" +#include "xfs_log_format.h" +#include "xfs_trans.h" +#include "xfs_sb.h" +#include "xfs_inode.h" +#include "xfs_da_format.h" +#include "xfs_da_btree.h" +#include "xfs_dir2.h" +#include "xfs_attr.h" +#include "xfs_attr_leaf.h" +#include "xfs_attr_sf.h" +#include "xfs_attr_remote.h" +#include "xfs_bmap.h" +#include "xfs_bmap_util.h" +#include "xfs_exchmaps.h" +#include "xfs_exchrange.h" +#include "xfs_acl.h" +#include "scrub/xfs_scrub.h" +#include "scrub/scrub.h" +#include "scrub/common.h" +#include "scrub/trace.h" +#include "scrub/repair.h" +#include "scrub/tempfile.h" +#include "scrub/tempexch.h" +#include "scrub/xfile.h" +#include "scrub/xfarray.h" +#include "scrub/xfblob.h" +#include "scrub/attr.h" +#include "scrub/reap.h" +#include "scrub/attr_repair.h" + +/* + * Extended Attribute Repair + * ========================= + * + * We repair extended attributes by reading the attr leaf blocks looking for + * attributes entries that look salvageable (name passes verifiers, value can + * be retrieved, etc). Each extended attribute worth salvaging is stashed in + * memory, and the stashed entries are periodically replayed into a temporary + * file to constrain memory use. Batching the construction of the temporary + * extended attribute structure in this fashion reduces lock cycling of the + * file being repaired and the temporary file. + * + * When salvaging completes, the remaining stashed attributes are replayed to + * the temporary file. An atomic file contents exchange is used to commit the + * new xattr blocks to the file being repaired. This will disrupt attrmulti + * cursors. + */ + +struct xrep_xattr_key { + /* Cookie for retrieval of the xattr name. */ + xfblob_cookie name_cookie; + + /* Cookie for retrieval of the xattr value. */ + xfblob_cookie value_cookie; + + /* XFS_ATTR_* flags */ + int flags; + + /* Length of the value and name. */ + uint32_t valuelen; + uint16_t namelen; +}; + +/* + * Stash up to 8 pages of attrs in xattr_records/xattr_blobs before we write + * them to the temp file. + */ +#define XREP_XATTR_MAX_STASH_BYTES (PAGE_SIZE * 8) + +struct xrep_xattr { + struct xfs_scrub *sc; + + /* Information for exchanging attr fork mappings at the end. */ + struct xrep_tempexch tx; + + /* xattr keys */ + struct xfarray *xattr_records; + + /* xattr values */ + struct xfblob *xattr_blobs; + + /* Number of attributes that we are salvaging. */ + unsigned long long attrs_found; +}; + +/* Set up to recreate the extended attributes. */ +int +xrep_setup_xattr( + struct xfs_scrub *sc) +{ + return xrep_tempfile_create(sc, S_IFREG); +} + +/* + * Decide if we want to salvage this attribute. We don't bother with + * incomplete or oversized keys or values. The @value parameter can be null + * for remote attrs. + */ +STATIC int +xrep_xattr_want_salvage( + struct xrep_xattr *rx, + unsigned int attr_flags, + const void *name, + int namelen, + const void *value, + int valuelen) +{ + if (attr_flags & XFS_ATTR_INCOMPLETE) + return false; + if (namelen > XATTR_NAME_MAX || namelen <= 0) + return false; + if (!xfs_attr_namecheck(name, namelen)) + return false; + if (valuelen > XATTR_SIZE_MAX || valuelen < 0) + return false; + if (hweight32(attr_flags & XFS_ATTR_NSP_ONDISK_MASK) > 1) + return false; + return true; +} + +/* Allocate an in-core record to hold xattrs while we rebuild the xattr data. */ +STATIC int +xrep_xattr_salvage_key( + struct xrep_xattr *rx, + int flags, + unsigned char *name, + int namelen, + unsigned char *value, + int valuelen) +{ + struct xrep_xattr_key key = { + .valuelen = valuelen, + .flags = flags & XFS_ATTR_NSP_ONDISK_MASK, + }; + unsigned int i = 0; + int error = 0; + + if (xchk_should_terminate(rx->sc, &error)) + return error; + + /* + * Truncate the name to the first character that would trip namecheck. + * If we no longer have a name after that, ignore this attribute. + */ + while (i < namelen && name[i] != 0) + i++; + if (i == 0) + return 0; + key.namelen = i; + + trace_xrep_xattr_salvage_rec(rx->sc->ip, flags, name, key.namelen, + valuelen); + + error = xfblob_store(rx->xattr_blobs, &key.name_cookie, name, + key.namelen); + if (error) + return error; + + error = xfblob_store(rx->xattr_blobs, &key.value_cookie, value, + key.valuelen); + if (error) + return error; + + error = xfarray_append(rx->xattr_records, &key); + if (error) + return error; + + rx->attrs_found++; + return 0; +} + +/* + * Record a shortform extended attribute key & value for later reinsertion + * into the inode. + */ +STATIC int +xrep_xattr_salvage_sf_attr( + struct xrep_xattr *rx, + struct xfs_attr_sf_hdr *hdr, + struct xfs_attr_sf_entry *sfe) +{ + struct xfs_scrub *sc = rx->sc; + struct xchk_xattr_buf *ab = sc->buf; + unsigned char *name = sfe->nameval; + unsigned char *value = &sfe->nameval[sfe->namelen]; + + if (!xchk_xattr_set_map(sc, ab->usedmap, (char *)name - (char *)hdr, + sfe->namelen)) + return 0; + + if (!xchk_xattr_set_map(sc, ab->usedmap, (char *)value - (char *)hdr, + sfe->valuelen)) + return 0; + + if (!xrep_xattr_want_salvage(rx, sfe->flags, sfe->nameval, + sfe->namelen, value, sfe->valuelen)) + return 0; + + return xrep_xattr_salvage_key(rx, sfe->flags, sfe->nameval, + sfe->namelen, value, sfe->valuelen); +} + +/* + * Record a local format extended attribute key & value for later reinsertion + * into the inode. + */ +STATIC int +xrep_xattr_salvage_local_attr( + struct xrep_xattr *rx, + struct xfs_attr_leaf_entry *ent, + unsigned int nameidx, + const char *buf_end, + struct xfs_attr_leaf_name_local *lentry) +{ + struct xchk_xattr_buf *ab = rx->sc->buf; + unsigned char *value; + unsigned int valuelen; + unsigned int namesize; + + /* + * Decode the leaf local entry format. If something seems wrong, we + * junk the attribute. + */ + value = &lentry->nameval[lentry->namelen]; + valuelen = be16_to_cpu(lentry->valuelen); + namesize = xfs_attr_leaf_entsize_local(lentry->namelen, valuelen); + if ((char *)lentry + namesize > buf_end) + return 0; + if (!xrep_xattr_want_salvage(rx, ent->flags, lentry->nameval, + lentry->namelen, value, valuelen)) + return 0; + if (!xchk_xattr_set_map(rx->sc, ab->usedmap, nameidx, namesize)) + return 0; + + /* Try to save this attribute. */ + return xrep_xattr_salvage_key(rx, ent->flags, lentry->nameval, + lentry->namelen, value, valuelen); +} + +/* + * Record a remote format extended attribute key & value for later reinsertion + * into the inode. + */ +STATIC int +xrep_xattr_salvage_remote_attr( + struct xrep_xattr *rx, + struct xfs_attr_leaf_entry *ent, + unsigned int nameidx, + const char *buf_end, + struct xfs_attr_leaf_name_remote *rentry, + unsigned int ent_idx, + struct xfs_buf *leaf_bp) +{ + struct xchk_xattr_buf *ab = rx->sc->buf; + struct xfs_da_args args = { + .trans = rx->sc->tp, + .dp = rx->sc->ip, + .index = ent_idx, + .geo = rx->sc->mp->m_attr_geo, + .owner = rx->sc->ip->i_ino, + .attr_filter = ent->flags & XFS_ATTR_NSP_ONDISK_MASK, + .namelen = rentry->namelen, + .name = rentry->name, + .value = ab->value, + .valuelen = be32_to_cpu(rentry->valuelen), + }; + unsigned int namesize; + int error; + + /* + * Decode the leaf remote entry format. If something seems wrong, we + * junk the attribute. Note that we should never find a zero-length + * remote attribute value. + */ + namesize = xfs_attr_leaf_entsize_remote(rentry->namelen); + if ((char *)rentry + namesize > buf_end) + return 0; + if (args.valuelen == 0 || + !xrep_xattr_want_salvage(rx, ent->flags, rentry->name, + rentry->namelen, NULL, args.valuelen)) + return 0; + if (!xchk_xattr_set_map(rx->sc, ab->usedmap, nameidx, namesize)) + return 0; + + /* + * Enlarge the buffer (if needed) to hold the value that we're trying + * to salvage from the old extended attribute data. + */ + error = xchk_setup_xattr_buf(rx->sc, args.valuelen); + if (error == -ENOMEM) + error = -EDEADLOCK; + if (error) + return error; + + /* Look up the remote value and stash it for reconstruction. */ + error = xfs_attr3_leaf_getvalue(leaf_bp, &args); + if (error || args.rmtblkno == 0) + goto err_free; + + error = xfs_attr_rmtval_get(&args); + if (error) + goto err_free; + + /* Try to save this attribute. */ + error = xrep_xattr_salvage_key(rx, ent->flags, rentry->name, + rentry->namelen, ab->value, args.valuelen); +err_free: + /* remote value was garbage, junk it */ + if (error == -EFSBADCRC || error == -EFSCORRUPTED) + error = 0; + return error; +} + +/* Extract every xattr key that we can from this attr fork block. */ +STATIC int +xrep_xattr_recover_leaf( + struct xrep_xattr *rx, + struct xfs_buf *bp) +{ + struct xfs_attr3_icleaf_hdr leafhdr; + struct xfs_scrub *sc = rx->sc; + struct xfs_mount *mp = sc->mp; + struct xfs_attr_leafblock *leaf; + struct xfs_attr_leaf_name_local *lentry; + struct xfs_attr_leaf_name_remote *rentry; + struct xfs_attr_leaf_entry *ent; + struct xfs_attr_leaf_entry *entries; + struct xchk_xattr_buf *ab = rx->sc->buf; + char *buf_end; + size_t off; + unsigned int nameidx; + unsigned int hdrsize; + int i; + int error = 0; + + bitmap_zero(ab->usedmap, mp->m_attr_geo->blksize); + + /* Check the leaf header */ + leaf = bp->b_addr; + xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &leafhdr, leaf); + hdrsize = xfs_attr3_leaf_hdr_size(leaf); + xchk_xattr_set_map(sc, ab->usedmap, 0, hdrsize); + entries = xfs_attr3_leaf_entryp(leaf); + + buf_end = (char *)bp->b_addr + mp->m_attr_geo->blksize; + for (i = 0, ent = entries; i < leafhdr.count; ent++, i++) { + if (xchk_should_terminate(sc, &error)) + return error; + + /* Skip key if it conflicts with something else? */ + off = (char *)ent - (char *)leaf; + if (!xchk_xattr_set_map(sc, ab->usedmap, off, + sizeof(xfs_attr_leaf_entry_t))) + continue; + + /* Check the name information. */ + nameidx = be16_to_cpu(ent->nameidx); + if (nameidx < leafhdr.firstused || + nameidx >= mp->m_attr_geo->blksize) + continue; + + if (ent->flags & XFS_ATTR_LOCAL) { + lentry = xfs_attr3_leaf_name_local(leaf, i); + error = xrep_xattr_salvage_local_attr(rx, ent, nameidx, + buf_end, lentry); + } else { + rentry = xfs_attr3_leaf_name_remote(leaf, i); + error = xrep_xattr_salvage_remote_attr(rx, ent, nameidx, + buf_end, rentry, i, bp); + } + if (error) + return error; + } + + return 0; +} + +/* Try to recover shortform attrs. */ +STATIC int +xrep_xattr_recover_sf( + struct xrep_xattr *rx) +{ + struct xfs_scrub *sc = rx->sc; + struct xchk_xattr_buf *ab = sc->buf; + struct xfs_attr_sf_hdr *hdr; + struct xfs_attr_sf_entry *sfe; + struct xfs_attr_sf_entry *next; + struct xfs_ifork *ifp; + unsigned char *end; + int i; + int error = 0; + + ifp = xfs_ifork_ptr(rx->sc->ip, XFS_ATTR_FORK); + hdr = ifp->if_data; + + bitmap_zero(ab->usedmap, ifp->if_bytes); + end = (unsigned char *)ifp->if_data + ifp->if_bytes; + xchk_xattr_set_map(sc, ab->usedmap, 0, sizeof(*hdr)); + + sfe = xfs_attr_sf_firstentry(hdr); + if ((unsigned char *)sfe > end) + return 0; + + for (i = 0; i < hdr->count; i++) { + if (xchk_should_terminate(sc, &error)) + return error; + + next = xfs_attr_sf_nextentry(sfe); + if ((unsigned char *)next > end) + break; + + if (xchk_xattr_set_map(sc, ab->usedmap, + (char *)sfe - (char *)hdr, + sizeof(struct xfs_attr_sf_entry))) { + /* + * No conflicts with the sf entry; let's save this + * attribute. + */ + error = xrep_xattr_salvage_sf_attr(rx, hdr, sfe); + if (error) + return error; + } + + sfe = next; + } + + return 0; +} + +/* + * Try to return a buffer of xattr data for a given physical extent. + * + * Because the buffer cache get function complains if it finds a buffer + * matching the block number but not matching the length, we must be careful to + * look for incore buffers (up to the maximum length of a remote value) that + * could be hiding anywhere in the physical range. If we find an incore + * buffer, we can pass that to the caller. Optionally, read a single block and + * pass that back. + * + * Note the subtlety that remote attr value blocks for which there is no incore + * buffer will be passed to the callback one block at a time. These buffers + * will not have any ops attached and must be staled to prevent aliasing with + * multiblock buffers once we drop the ILOCK. + */ +STATIC int +xrep_xattr_find_buf( + struct xfs_mount *mp, + xfs_fsblock_t fsbno, + xfs_extlen_t max_len, + bool can_read, + struct xfs_buf **bpp) +{ + struct xrep_bufscan scan = { + .daddr = XFS_FSB_TO_DADDR(mp, fsbno), + .max_sectors = xrep_bufscan_max_sectors(mp, max_len), + .daddr_step = XFS_FSB_TO_BB(mp, 1), + }; + struct xfs_buf *bp; + + while ((bp = xrep_bufscan_advance(mp, &scan)) != NULL) { + *bpp = bp; + return 0; + } + + if (!can_read) { + *bpp = NULL; + return 0; + } + + return xfs_buf_read(mp->m_ddev_targp, scan.daddr, XFS_FSB_TO_BB(mp, 1), + XBF_TRYLOCK, bpp, NULL); +} + +/* + * Deal with a buffer that we found during our walk of the attr fork. + * + * Attribute leaf and node blocks are simple -- they're a single block, so we + * can walk them one at a time and we never have to worry about discontiguous + * multiblock buffers like we do for directories. + * + * Unfortunately, remote attr blocks add a lot of complexity here. Each disk + * block is totally self contained, in the sense that the v5 header provides no + * indication that there could be more data in the next block. The incore + * buffers can span multiple blocks, though they never cross extent records. + * However, they don't necessarily start or end on an extent record boundary. + * Therefore, we need a special buffer find function to walk the buffer cache + * for us. + * + * The caller must hold the ILOCK on the file being repaired. We use + * XBF_TRYLOCK here to skip any locked buffer on the assumption that we don't + * own the block and don't want to hang the system on a potentially garbage + * buffer. + */ +STATIC int +xrep_xattr_recover_block( + struct xrep_xattr *rx, + xfs_dablk_t dabno, + xfs_fsblock_t fsbno, + xfs_extlen_t max_len, + xfs_extlen_t *actual_len) +{ + struct xfs_da_blkinfo *info; + struct xfs_buf *bp; + int error; + + error = xrep_xattr_find_buf(rx->sc->mp, fsbno, max_len, true, &bp); + if (error) + return error; + info = bp->b_addr; + *actual_len = XFS_BB_TO_FSB(rx->sc->mp, bp->b_length); + + trace_xrep_xattr_recover_leafblock(rx->sc->ip, dabno, + be16_to_cpu(info->magic)); + + /* + * If the buffer has the right magic number for an attr leaf block and + * passes a structure check (we don't care about checksums), salvage + * as much as we can from the block. */ + if (info->magic == cpu_to_be16(XFS_ATTR3_LEAF_MAGIC) && + xrep_buf_verify_struct(bp, &xfs_attr3_leaf_buf_ops) && + xfs_attr3_leaf_header_check(bp, rx->sc->ip->i_ino) == NULL) + error = xrep_xattr_recover_leaf(rx, bp); + + /* + * If the buffer didn't already have buffer ops set, it was read in by + * the _find_buf function and could very well be /part/ of a multiblock + * remote block. Mark it stale so that it doesn't hang around in + * memory to cause problems. + */ + if (bp->b_ops == NULL) + xfs_buf_stale(bp); + + xfs_buf_relse(bp); + return error; +} + +/* Insert one xattr key/value. */ +STATIC int +xrep_xattr_insert_rec( + struct xrep_xattr *rx, + const struct xrep_xattr_key *key) +{ + struct xfs_da_args args = { + .dp = rx->sc->tempip, + .attr_filter = key->flags, + .attr_flags = XATTR_CREATE, + .namelen = key->namelen, + .valuelen = key->valuelen, + .owner = rx->sc->ip->i_ino, + }; + struct xchk_xattr_buf *ab = rx->sc->buf; + int error; + + /* + * Grab pointers to the scrub buffer so that we can use them to insert + * attrs into the temp file. + */ + args.name = ab->name; + args.value = ab->value; + + /* + * The attribute name is stored near the end of the in-core buffer, + * though we reserve one more byte to ensure null termination. + */ + ab->name[XATTR_NAME_MAX] = 0; + + error = xfblob_load(rx->xattr_blobs, key->name_cookie, ab->name, + key->namelen); + if (error) + return error; + + error = xfblob_free(rx->xattr_blobs, key->name_cookie); + if (error) + return error; + + error = xfblob_load(rx->xattr_blobs, key->value_cookie, args.value, + key->valuelen); + if (error) + return error; + + error = xfblob_free(rx->xattr_blobs, key->value_cookie); + if (error) + return error; + + ab->name[key->namelen] = 0; + + trace_xrep_xattr_insert_rec(rx->sc->tempip, key->flags, ab->name, + key->namelen, key->valuelen); + + /* + * xfs_attr_set creates and commits its own transaction. If the attr + * already exists, we'll just drop it during the rebuild. + */ + error = xfs_attr_set(&args); + if (error == -EEXIST) + error = 0; + + return error; +} + +/* + * Periodically flush salvaged attributes to the temporary file. This is done + * to reduce the memory requirements of the xattr rebuild because files can + * contain millions of attributes. + */ +STATIC int +xrep_xattr_flush_stashed( + struct xrep_xattr *rx) +{ + xfarray_idx_t array_cur; + int error; + + /* + * Entering this function, the scrub context has a reference to the + * inode being repaired, the temporary file, and a scrub transaction + * that we use during xattr salvaging to avoid livelocking if there + * are cycles in the xattr structures. We hold ILOCK_EXCL on both + * the inode being repaired, though it is not ijoined to the scrub + * transaction. + * + * To constrain kernel memory use, we occasionally flush salvaged + * xattrs from the xfarray and xfblob structures into the temporary + * file in preparation for exchanging the xattr structures at the end. + * Updating the temporary file requires a transaction, so we commit the + * scrub transaction and drop the two ILOCKs so that xfs_attr_set can + * allocate whatever transaction it wants. + * + * We still hold IOLOCK_EXCL on the inode being repaired, which + * prevents anyone from modifying the damaged xattr data while we + * repair it. + */ + error = xrep_trans_commit(rx->sc); + if (error) + return error; + xchk_iunlock(rx->sc, XFS_ILOCK_EXCL); + + /* + * Take the IOLOCK of the temporary file while we modify xattrs. This + * isn't strictly required because the temporary file is never revealed + * to userspace, but we follow the same locking rules. We still hold + * sc->ip's IOLOCK. + */ + error = xrep_tempfile_iolock_polled(rx->sc); + if (error) + return error; + + /* Add all the salvaged attrs to the temporary file. */ + foreach_xfarray_idx(rx->xattr_records, array_cur) { + struct xrep_xattr_key key; + + error = xfarray_load(rx->xattr_records, array_cur, &key); + if (error) + return error; + + error = xrep_xattr_insert_rec(rx, &key); + if (error) + return error; + } + + /* Empty out both arrays now that we've added the entries. */ + xfarray_truncate(rx->xattr_records); + xfblob_truncate(rx->xattr_blobs); + + xrep_tempfile_iounlock(rx->sc); + + /* Recreate the salvage transaction and relock the inode. */ + error = xchk_trans_alloc(rx->sc, 0); + if (error) + return error; + xchk_ilock(rx->sc, XFS_ILOCK_EXCL); + return 0; +} + +/* Decide if we've stashed too much xattr data in memory. */ +static inline bool +xrep_xattr_want_flush_stashed( + struct xrep_xattr *rx) +{ + unsigned long long bytes; + + bytes = xfarray_bytes(rx->xattr_records) + + xfblob_bytes(rx->xattr_blobs); + return bytes > XREP_XATTR_MAX_STASH_BYTES; +} + +/* Extract as many attribute keys and values as we can. */ +STATIC int +xrep_xattr_recover( + struct xrep_xattr *rx) +{ + struct xfs_bmbt_irec got; + struct xfs_scrub *sc = rx->sc; + struct xfs_da_geometry *geo = sc->mp->m_attr_geo; + xfs_fileoff_t offset; + xfs_extlen_t len; + xfs_dablk_t dabno; + int nmap; + int error; + + /* + * Iterate each xattr leaf block in the attr fork to scan them for any + * attributes that we might salvage. + */ + for (offset = 0; + offset < XFS_MAX_FILEOFF; + offset = got.br_startoff + got.br_blockcount) { + nmap = 1; + error = xfs_bmapi_read(sc->ip, offset, XFS_MAX_FILEOFF - offset, + &got, &nmap, XFS_BMAPI_ATTRFORK); + if (error) + return error; + if (nmap != 1) + return -EFSCORRUPTED; + if (!xfs_bmap_is_written_extent(&got)) + continue; + + for (dabno = round_up(got.br_startoff, geo->fsbcount); + dabno < got.br_startoff + got.br_blockcount; + dabno += len) { + xfs_fileoff_t curr_offset = dabno - got.br_startoff; + xfs_extlen_t maxlen; + + if (xchk_should_terminate(rx->sc, &error)) + return error; + + maxlen = min_t(xfs_filblks_t, INT_MAX, + got.br_blockcount - curr_offset); + error = xrep_xattr_recover_block(rx, dabno, + curr_offset + got.br_startblock, + maxlen, &len); + if (error) + return error; + + if (xrep_xattr_want_flush_stashed(rx)) { + error = xrep_xattr_flush_stashed(rx); + if (error) + return error; + } + } + } + + return 0; +} + +/* + * Reset the extended attribute fork to a state where we can start re-adding + * the salvaged attributes. + */ +STATIC int +xrep_xattr_fork_remove( + struct xfs_scrub *sc, + struct xfs_inode *ip) +{ + struct xfs_attr_sf_hdr *hdr; + struct xfs_ifork *ifp = xfs_ifork_ptr(ip, XFS_ATTR_FORK); + + /* + * If the data fork is in btree format, we can't change di_forkoff + * because we could run afoul of the rule that the data fork isn't + * supposed to be in btree format if there's enough space in the fork + * that it could have used extents format. Instead, reinitialize the + * attr fork to have a shortform structure with zero attributes. + */ + if (ip->i_df.if_format == XFS_DINODE_FMT_BTREE) { + ifp->if_format = XFS_DINODE_FMT_LOCAL; + hdr = xfs_idata_realloc(ip, (int)sizeof(*hdr) - ifp->if_bytes, + XFS_ATTR_FORK); + hdr->count = 0; + hdr->totsize = cpu_to_be16(sizeof(*hdr)); + xfs_trans_log_inode(sc->tp, ip, + XFS_ILOG_CORE | XFS_ILOG_ADATA); + return 0; + } + + /* If we still have attr fork extents, something's wrong. */ + if (ifp->if_nextents != 0) { + struct xfs_iext_cursor icur; + struct xfs_bmbt_irec irec; + unsigned int i = 0; + + xfs_emerg(sc->mp, + "inode 0x%llx attr fork still has %llu attr extents, format %d?!", + ip->i_ino, ifp->if_nextents, ifp->if_format); + for_each_xfs_iext(ifp, &icur, &irec) { + xfs_err(sc->mp, + "[%u]: startoff %llu startblock %llu blockcount %llu state %u", + i++, irec.br_startoff, + irec.br_startblock, irec.br_blockcount, + irec.br_state); + } + ASSERT(0); + return -EFSCORRUPTED; + } + + xfs_attr_fork_remove(ip, sc->tp); + return 0; +} + +/* + * Free all the attribute fork blocks of the file being repaired and delete the + * fork. The caller must ILOCK the scrub file and join it to the transaction. + * This function returns with the inode joined to a clean transaction. + */ +int +xrep_xattr_reset_fork( + struct xfs_scrub *sc) +{ + int error; + + trace_xrep_xattr_reset_fork(sc->ip, sc->ip); + + /* Unmap all the attr blocks. */ + if (xfs_ifork_has_extents(&sc->ip->i_af)) { + error = xrep_reap_ifork(sc, sc->ip, XFS_ATTR_FORK); + if (error) + return error; + } + + error = xrep_xattr_fork_remove(sc, sc->ip); + if (error) + return error; + + return xfs_trans_roll_inode(&sc->tp, sc->ip); +} + +/* + * Free all the attribute fork blocks of the temporary file and delete the attr + * fork. The caller must ILOCK the tempfile and join it to the transaction. + * This function returns with the inode joined to a clean scrub transaction. + */ +STATIC int +xrep_xattr_reset_tempfile_fork( + struct xfs_scrub *sc) +{ + int error; + + trace_xrep_xattr_reset_fork(sc->ip, sc->tempip); + + /* + * Wipe out the attr fork of the temp file so that regular inode + * inactivation won't trip over the corrupt attr fork. + */ + if (xfs_ifork_has_extents(&sc->tempip->i_af)) { + error = xrep_reap_ifork(sc, sc->tempip, XFS_ATTR_FORK); + if (error) + return error; + } + + return xrep_xattr_fork_remove(sc, sc->tempip); +} + +/* + * Find all the extended attributes for this inode by scraping them out of the + * attribute key blocks by hand, and flushing them into the temp file. + * When we're done, free the staging memory before exchanging the xattr + * structures to reduce memory usage. + */ +STATIC int +xrep_xattr_salvage_attributes( + struct xrep_xattr *rx) +{ + struct xfs_inode *ip = rx->sc->ip; + int error; + + /* Short format xattrs are easy! */ + if (rx->sc->ip->i_af.if_format == XFS_DINODE_FMT_LOCAL) { + error = xrep_xattr_recover_sf(rx); + if (error) + return error; + + return xrep_xattr_flush_stashed(rx); + } + + /* + * For non-inline xattr structures, the salvage function scans the + * buffer cache looking for potential attr leaf blocks. The scan + * requires the ability to lock any buffer found and runs independently + * of any transaction <-> buffer item <-> buffer linkage. Therefore, + * roll the transaction to ensure there are no buffers joined. We hold + * the ILOCK independently of the transaction. + */ + error = xfs_trans_roll(&rx->sc->tp); + if (error) + return error; + + error = xfs_iread_extents(rx->sc->tp, ip, XFS_ATTR_FORK); + if (error) + return error; + + error = xrep_xattr_recover(rx); + if (error) + return error; + + return xrep_xattr_flush_stashed(rx); +} + +/* + * Prepare both inodes' attribute forks for an exchange. Promote the tempfile + * from short format to leaf format, and if the file being repaired has a short + * format attr fork, turn it into an empty extent list. + */ +STATIC int +xrep_xattr_swap_prep( + struct xfs_scrub *sc, + bool temp_local, + bool ip_local) +{ + int error; + + /* + * If the tempfile's attributes are in shortform format, convert that + * to a single leaf extent so that we can use the atomic mapping + * exchange. + */ + if (temp_local) { + struct xfs_da_args args = { + .dp = sc->tempip, + .geo = sc->mp->m_attr_geo, + .whichfork = XFS_ATTR_FORK, + .trans = sc->tp, + .total = 1, + .owner = sc->ip->i_ino, + }; + + error = xfs_attr_shortform_to_leaf(&args); + if (error) + return error; + + /* + * Roll the deferred log items to get us back to a clean + * transaction. + */ + error = xfs_defer_finish(&sc->tp); + if (error) + return error; + } + + /* + * If the file being repaired had a shortform attribute fork, convert + * that to an empty extent list in preparation for the atomic mapping + * exchange. + */ + if (ip_local) { + struct xfs_ifork *ifp; + + ifp = xfs_ifork_ptr(sc->ip, XFS_ATTR_FORK); + + xfs_idestroy_fork(ifp); + ifp->if_format = XFS_DINODE_FMT_EXTENTS; + ifp->if_nextents = 0; + ifp->if_bytes = 0; + ifp->if_data = NULL; + ifp->if_height = 0; + + xfs_trans_log_inode(sc->tp, sc->ip, + XFS_ILOG_CORE | XFS_ILOG_ADATA); + } + + return 0; +} + +/* Exchange the temporary file's attribute fork with the one being repaired. */ +STATIC int +xrep_xattr_swap( + struct xfs_scrub *sc, + struct xrep_tempexch *tx) +{ + bool ip_local, temp_local; + int error = 0; + + ip_local = sc->ip->i_af.if_format == XFS_DINODE_FMT_LOCAL; + temp_local = sc->tempip->i_af.if_format == XFS_DINODE_FMT_LOCAL; + + /* + * If the both files have a local format attr fork and the rebuilt + * xattr data would fit in the repaired file's attr fork, just copy + * the contents from the tempfile and declare ourselves done. + */ + if (ip_local && temp_local) { + int forkoff; + int newsize; + + newsize = xfs_attr_sf_totsize(sc->tempip); + forkoff = xfs_attr_shortform_bytesfit(sc->ip, newsize); + if (forkoff > 0) { + sc->ip->i_forkoff = forkoff; + xrep_tempfile_copyout_local(sc, XFS_ATTR_FORK); + return 0; + } + } + + /* Otherwise, make sure both attr forks are in block-mapping mode. */ + error = xrep_xattr_swap_prep(sc, temp_local, ip_local); + if (error) + return error; + + return xrep_tempexch_contents(sc, tx); +} + +/* + * Exchange the new extended attribute data (which we created in the tempfile) + * with the file being repaired. + */ +STATIC int +xrep_xattr_rebuild_tree( + struct xrep_xattr *rx) +{ + struct xfs_scrub *sc = rx->sc; + int error; + + /* + * If we didn't find any attributes to salvage, repair the file by + * zapping its attr fork. + */ + if (rx->attrs_found == 0) { + xfs_trans_ijoin(sc->tp, sc->ip, 0); + error = xrep_xattr_reset_fork(sc); + if (error) + return error; + + goto forget_acls; + } + + trace_xrep_xattr_rebuild_tree(sc->ip, sc->tempip); + + /* + * Commit the repair transaction and drop the ILOCKs so that we can use + * the atomic file content exchange helper functions to compute the + * correct resource reservations. + * + * We still hold IOLOCK_EXCL (aka i_rwsem) which will prevent xattr + * modifications, but there's nothing to prevent userspace from reading + * the attributes until we're ready for the exchange operation. Reads + * will return -EIO without shutting down the fs, so we're ok with + * that. + */ + error = xrep_trans_commit(sc); + if (error) + return error; + + xchk_iunlock(sc, XFS_ILOCK_EXCL); + + /* + * Take the IOLOCK on the temporary file so that we can run xattr + * operations with the same locks held as we would for a normal file. + * We still hold sc->ip's IOLOCK. + */ + error = xrep_tempfile_iolock_polled(rx->sc); + if (error) + return error; + + /* Allocate exchange transaction and lock both inodes. */ + error = xrep_tempexch_trans_alloc(rx->sc, XFS_ATTR_FORK, &rx->tx); + if (error) + return error; + + /* + * Exchange the blocks mapped by the tempfile's attr fork with the file + * being repaired. The old attr blocks will then be attached to the + * tempfile, so reap its attr fork. + */ + error = xrep_xattr_swap(sc, &rx->tx); + if (error) + return error; + + error = xrep_xattr_reset_tempfile_fork(sc); + if (error) + return error; + + /* + * Roll to get a transaction without any inodes joined to it. Then we + * can drop the tempfile's ILOCK and IOLOCK before doing more work on + * the scrub target file. + */ + error = xfs_trans_roll(&sc->tp); + if (error) + return error; + + xrep_tempfile_iunlock(sc); + xrep_tempfile_iounlock(sc); + +forget_acls: + /* Invalidate cached ACLs now that we've reloaded all the xattrs. */ + xfs_forget_acl(VFS_I(sc->ip), SGI_ACL_FILE); + xfs_forget_acl(VFS_I(sc->ip), SGI_ACL_DEFAULT); + return 0; +} + +/* Tear down all the incore scan stuff we created. */ +STATIC void +xrep_xattr_teardown( + struct xrep_xattr *rx) +{ + xfblob_destroy(rx->xattr_blobs); + xfarray_destroy(rx->xattr_records); + kfree(rx); +} + +/* Set up the filesystem scan so we can regenerate extended attributes. */ +STATIC int +xrep_xattr_setup_scan( + struct xfs_scrub *sc, + struct xrep_xattr **rxp) +{ + struct xrep_xattr *rx; + char *descr; + int max_len; + int error; + + rx = kzalloc(sizeof(struct xrep_xattr), XCHK_GFP_FLAGS); + if (!rx) + return -ENOMEM; + rx->sc = sc; + + /* + * Allocate enough memory to handle loading local attr values from the + * xfblob data while flushing stashed attrs to the temporary file. + * We only realloc the buffer when salvaging remote attr values. + */ + max_len = xfs_attr_leaf_entsize_local_max(sc->mp->m_attr_geo->blksize); + error = xchk_setup_xattr_buf(rx->sc, max_len); + if (error == -ENOMEM) + error = -EDEADLOCK; + if (error) + goto out_rx; + + /* Set up some staging for salvaged attribute keys and values */ + descr = xchk_xfile_ino_descr(sc, "xattr keys"); + error = xfarray_create(descr, 0, sizeof(struct xrep_xattr_key), + &rx->xattr_records); + kfree(descr); + if (error) + goto out_rx; + + descr = xchk_xfile_ino_descr(sc, "xattr names"); + error = xfblob_create(descr, &rx->xattr_blobs); + kfree(descr); + if (error) + goto out_keys; + + *rxp = rx; + return 0; +out_keys: + xfarray_destroy(rx->xattr_records); +out_rx: + kfree(rx); + return error; +} + +/* + * Repair the extended attribute metadata. + * + * XXX: Remote attribute value buffers encompass the entire (up to 64k) buffer. + * The buffer cache in XFS can't handle aliased multiblock buffers, so this + * might misbehave if the attr fork is crosslinked with other filesystem + * metadata. + */ +int +xrep_xattr( + struct xfs_scrub *sc) +{ + struct xrep_xattr *rx = NULL; + int error; + + if (!xfs_inode_hasattr(sc->ip)) + return -ENOENT; + + /* The rmapbt is required to reap the old attr fork. */ + if (!xfs_has_rmapbt(sc->mp)) + return -EOPNOTSUPP; + + error = xrep_xattr_setup_scan(sc, &rx); + if (error) + return error; + + ASSERT(sc->ilock_flags & XFS_ILOCK_EXCL); + + error = xrep_xattr_salvage_attributes(rx); + if (error) + goto out_scan; + + /* Last chance to abort before we start committing fixes. */ + if (xchk_should_terminate(sc, &error)) + goto out_scan; + + error = xrep_xattr_rebuild_tree(rx); + if (error) + goto out_scan; + +out_scan: + xrep_xattr_teardown(rx); + return error; +} diff --git a/fs/xfs/scrub/attr_repair.h b/fs/xfs/scrub/attr_repair.h new file mode 100644 index 000000000000..0a9ffa7cfa90 --- /dev/null +++ b/fs/xfs/scrub/attr_repair.h @@ -0,0 +1,11 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (c) 2018-2024 Oracle. All Rights Reserved. + * Author: Darrick J. Wong <djwong@kernel.org> + */ +#ifndef __XFS_SCRUB_ATTR_REPAIR_H__ +#define __XFS_SCRUB_ATTR_REPAIR_H__ + +int xrep_xattr_reset_fork(struct xfs_scrub *sc); + +#endif /* __XFS_SCRUB_ATTR_REPAIR_H__ */ diff --git a/fs/xfs/scrub/dab_bitmap.h b/fs/xfs/scrub/dab_bitmap.h new file mode 100644 index 000000000000..0c6e3aad4395 --- /dev/null +++ b/fs/xfs/scrub/dab_bitmap.h @@ -0,0 +1,37 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (c) 2022-2024 Oracle. All Rights Reserved. + * Author: Darrick J. Wong <djwong@kernel.org> + */ +#ifndef __XFS_SCRUB_DAB_BITMAP_H__ +#define __XFS_SCRUB_DAB_BITMAP_H__ + +/* Bitmaps, but for type-checked for xfs_dablk_t */ + +struct xdab_bitmap { + struct xbitmap32 dabitmap; +}; + +static inline void xdab_bitmap_init(struct xdab_bitmap *bitmap) +{ + xbitmap32_init(&bitmap->dabitmap); +} + +static inline void xdab_bitmap_destroy(struct xdab_bitmap *bitmap) +{ + xbitmap32_destroy(&bitmap->dabitmap); +} + +static inline int xdab_bitmap_set(struct xdab_bitmap *bitmap, + xfs_dablk_t dabno, xfs_extlen_t len) +{ + return xbitmap32_set(&bitmap->dabitmap, dabno, len); +} + +static inline bool xdab_bitmap_test(struct xdab_bitmap *bitmap, + xfs_dablk_t dabno, xfs_extlen_t *len) +{ + return xbitmap32_test(&bitmap->dabitmap, dabno, len); +} + +#endif /* __XFS_SCRUB_DAB_BITMAP_H__ */ diff --git a/fs/xfs/scrub/dabtree.c b/fs/xfs/scrub/dabtree.c index c71254088dff..056de4819f86 100644 --- a/fs/xfs/scrub/dabtree.c +++ b/fs/xfs/scrub/dabtree.c @@ -78,6 +78,22 @@ xchk_da_set_corrupt( __return_address); } +/* Flag a da btree node in need of optimization. */ +void +xchk_da_set_preen( + struct xchk_da_btree *ds, + int level) +{ + struct xfs_scrub *sc = ds->sc; + + sc->sm->sm_flags |= XFS_SCRUB_OFLAG_PREEN; + trace_xchk_fblock_preen(sc, ds->dargs.whichfork, + xfs_dir2_da_to_db(ds->dargs.geo, + ds->state->path.blk[level].blkno), + __return_address); +} + +/* Find an entry at a certain level in a da btree. */ static struct xfs_da_node_entry * xchk_da_btree_node_entry( struct xchk_da_btree *ds, diff --git a/fs/xfs/scrub/dabtree.h b/fs/xfs/scrub/dabtree.h index 4f8c2138a1ec..de291e3b77dd 100644 --- a/fs/xfs/scrub/dabtree.h +++ b/fs/xfs/scrub/dabtree.h @@ -35,6 +35,9 @@ bool xchk_da_process_error(struct xchk_da_btree *ds, int level, int *error); /* Check for da btree corruption. */ void xchk_da_set_corrupt(struct xchk_da_btree *ds, int level); +void xchk_da_set_preen(struct xchk_da_btree *ds, int level); + +void xchk_da_set_preen(struct xchk_da_btree *ds, int level); int xchk_da_btree_hash(struct xchk_da_btree *ds, int level, __be32 *hashp); int xchk_da_btree(struct xfs_scrub *sc, int whichfork, diff --git a/fs/xfs/scrub/listxattr.c b/fs/xfs/scrub/listxattr.c new file mode 100644 index 000000000000..cbe5911ecbbc --- /dev/null +++ b/fs/xfs/scrub/listxattr.c @@ -0,0 +1,312 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (c) 2022-2024 Oracle. All Rights Reserved. + * Author: Darrick J. Wong <djwong@kernel.org> + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h" +#include "xfs_mount.h" +#include "xfs_inode.h" +#include "xfs_da_format.h" +#include "xfs_da_btree.h" +#include "xfs_attr.h" +#include "xfs_attr_leaf.h" +#include "xfs_attr_sf.h" +#include "xfs_trans.h" +#include "scrub/scrub.h" +#include "scrub/bitmap.h" +#include "scrub/dab_bitmap.h" +#include "scrub/listxattr.h" + +/* Call a function for every entry in a shortform xattr structure. */ +STATIC int +xchk_xattr_walk_sf( + struct xfs_scrub *sc, + struct xfs_inode *ip, + xchk_xattr_fn attr_fn, + void *priv) +{ + struct xfs_attr_sf_hdr *hdr = ip->i_af.if_data; + struct xfs_attr_sf_entry *sfe; + unsigned int i; + int error; + + sfe = xfs_attr_sf_firstentry(hdr); + for (i = 0; i < hdr->count; i++) { + error = attr_fn(sc, ip, sfe->flags, sfe->nameval, sfe->namelen, + &sfe->nameval[sfe->namelen], sfe->valuelen, + priv); + if (error) + return error; + + sfe = xfs_attr_sf_nextentry(sfe); + } + + return 0; +} + +/* Call a function for every entry in this xattr leaf block. */ +STATIC int +xchk_xattr_walk_leaf_entries( + struct xfs_scrub *sc, + struct xfs_inode *ip, + xchk_xattr_fn attr_fn, + struct xfs_buf *bp, + void *priv) +{ + struct xfs_attr3_icleaf_hdr ichdr; + struct xfs_mount *mp = sc->mp; + struct xfs_attr_leafblock *leaf = bp->b_addr; + struct xfs_attr_leaf_entry *entry; + unsigned int i; + int error; + + xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &ichdr, leaf); + entry = xfs_attr3_leaf_entryp(leaf); + + for (i = 0; i < ichdr.count; entry++, i++) { + void *value; + unsigned char *name; + unsigned int namelen, valuelen; + + if (entry->flags & XFS_ATTR_LOCAL) { + struct xfs_attr_leaf_name_local *name_loc; + + name_loc = xfs_attr3_leaf_name_local(leaf, i); + name = name_loc->nameval; + namelen = name_loc->namelen; + value = &name_loc->nameval[name_loc->namelen]; + valuelen = be16_to_cpu(name_loc->valuelen); + } else { + struct xfs_attr_leaf_name_remote *name_rmt; + + name_rmt = xfs_attr3_leaf_name_remote(leaf, i); + name = name_rmt->name; + namelen = name_rmt->namelen; + value = NULL; + valuelen = be32_to_cpu(name_rmt->valuelen); + } + + error = attr_fn(sc, ip, entry->flags, name, namelen, value, + valuelen, priv); + if (error) + return error; + + } + + return 0; +} + +/* + * Call a function for every entry in a leaf-format xattr structure. Avoid + * memory allocations for the loop detector since there's only one block. + */ +STATIC int +xchk_xattr_walk_leaf( + struct xfs_scrub *sc, + struct xfs_inode *ip, + xchk_xattr_fn attr_fn, + void *priv) +{ + struct xfs_buf *leaf_bp; + int error; + + error = xfs_attr3_leaf_read(sc->tp, ip, ip->i_ino, 0, &leaf_bp); + if (error) + return error; + + error = xchk_xattr_walk_leaf_entries(sc, ip, attr_fn, leaf_bp, priv); + xfs_trans_brelse(sc->tp, leaf_bp); + return error; +} + +/* Find the leftmost leaf in the xattr dabtree. */ +STATIC int +xchk_xattr_find_leftmost_leaf( + struct xfs_scrub *sc, + struct xfs_inode *ip, + struct xdab_bitmap *seen_dablks, + struct xfs_buf **leaf_bpp) +{ + struct xfs_da3_icnode_hdr nodehdr; + struct xfs_mount *mp = sc->mp; + struct xfs_trans *tp = sc->tp; + struct xfs_da_intnode *node; + struct xfs_da_node_entry *btree; + struct xfs_buf *bp; + xfs_failaddr_t fa; + xfs_dablk_t blkno = 0; + unsigned int expected_level = 0; + int error; + + for (;;) { + xfs_extlen_t len = 1; + uint16_t magic; + + /* Make sure we haven't seen this new block already. */ + if (xdab_bitmap_test(seen_dablks, blkno, &len)) + return -EFSCORRUPTED; + + error = xfs_da3_node_read(tp, ip, blkno, &bp, XFS_ATTR_FORK); + if (error) + return error; + + node = bp->b_addr; + magic = be16_to_cpu(node->hdr.info.magic); + if (magic == XFS_ATTR_LEAF_MAGIC || + magic == XFS_ATTR3_LEAF_MAGIC) + break; + + error = -EFSCORRUPTED; + if (magic != XFS_DA_NODE_MAGIC && + magic != XFS_DA3_NODE_MAGIC) + goto out_buf; + + fa = xfs_da3_node_header_check(bp, ip->i_ino); + if (fa) + goto out_buf; + + xfs_da3_node_hdr_from_disk(mp, &nodehdr, node); + + if (nodehdr.count == 0 || nodehdr.level >= XFS_DA_NODE_MAXDEPTH) + goto out_buf; + + /* Check the level from the root node. */ + if (blkno == 0) + expected_level = nodehdr.level - 1; + else if (expected_level != nodehdr.level) + goto out_buf; + else + expected_level--; + + /* Remember that we've seen this node. */ + error = xdab_bitmap_set(seen_dablks, blkno, 1); + if (error) + goto out_buf; + + /* Find the next level towards the leaves of the dabtree. */ + btree = nodehdr.btree; + blkno = be32_to_cpu(btree->before); + xfs_trans_brelse(tp, bp); + } + + error = -EFSCORRUPTED; + fa = xfs_attr3_leaf_header_check(bp, ip->i_ino); + if (fa) + goto out_buf; + + if (expected_level != 0) + goto out_buf; + + /* Remember that we've seen this leaf. */ + error = xdab_bitmap_set(seen_dablks, blkno, 1); + if (error) + goto out_buf; + + *leaf_bpp = bp; + return 0; + +out_buf: + xfs_trans_brelse(tp, bp); + return error; +} + +/* Call a function for every entry in a node-format xattr structure. */ +STATIC int +xchk_xattr_walk_node( + struct xfs_scrub *sc, + struct xfs_inode *ip, + xchk_xattr_fn attr_fn, + void *priv) +{ + struct xfs_attr3_icleaf_hdr leafhdr; + struct xdab_bitmap seen_dablks; + struct xfs_mount *mp = sc->mp; + struct xfs_attr_leafblock *leaf; + struct xfs_buf *leaf_bp; + int error; + + xdab_bitmap_init(&seen_dablks); + + error = xchk_xattr_find_leftmost_leaf(sc, ip, &seen_dablks, &leaf_bp); + if (error) + goto out_bitmap; + + for (;;) { + xfs_extlen_t len; + + error = xchk_xattr_walk_leaf_entries(sc, ip, attr_fn, leaf_bp, + priv); + if (error) + goto out_leaf; + + /* Find the right sibling of this leaf block. */ + leaf = leaf_bp->b_addr; + xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &leafhdr, leaf); + if (leafhdr.forw == 0) + goto out_leaf; + + xfs_trans_brelse(sc->tp, leaf_bp); + + /* Make sure we haven't seen this new leaf already. */ + len = 1; + if (xdab_bitmap_test(&seen_dablks, leafhdr.forw, &len)) { + error = -EFSCORRUPTED; + goto out_bitmap; + } + + error = xfs_attr3_leaf_read(sc->tp, ip, ip->i_ino, + leafhdr.forw, &leaf_bp); + if (error) + goto out_bitmap; + + /* Remember that we've seen this new leaf. */ + error = xdab_bitmap_set(&seen_dablks, leafhdr.forw, 1); + if (error) + goto out_leaf; + } + +out_leaf: + xfs_trans_brelse(sc->tp, leaf_bp); +out_bitmap: + xdab_bitmap_destroy(&seen_dablks); + return error; +} + +/* + * Call a function for every extended attribute in a file. + * + * Callers must hold the ILOCK. No validation or cursor restarts allowed. + * Returns -EFSCORRUPTED on any problem, including loops in the dabtree. + */ +int +xchk_xattr_walk( + struct xfs_scrub *sc, + struct xfs_inode *ip, + xchk_xattr_fn attr_fn, + void *priv) +{ + int error; + + xfs_assert_ilocked(ip, XFS_ILOCK_SHARED | XFS_ILOCK_EXCL); + + if (!xfs_inode_hasattr(ip)) + return 0; + + if (ip->i_af.if_format == XFS_DINODE_FMT_LOCAL) + return xchk_xattr_walk_sf(sc, ip, attr_fn, priv); + + /* attr functions require that the attr fork is loaded */ + error = xfs_iread_extents(sc->tp, ip, XFS_ATTR_FORK); + if (error) + return error; + + if (xfs_attr_is_leaf(ip)) + return xchk_xattr_walk_leaf(sc, ip, attr_fn, priv); + + return xchk_xattr_walk_node(sc, ip, attr_fn, priv); +} diff --git a/fs/xfs/scrub/listxattr.h b/fs/xfs/scrub/listxattr.h new file mode 100644 index 000000000000..48fe89d05946 --- /dev/null +++ b/fs/xfs/scrub/listxattr.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (c) 2022-2024 Oracle. All Rights Reserved. + * Author: Darrick J. Wong <djwong@kernel.org> + */ +#ifndef __XFS_SCRUB_LISTXATTR_H__ +#define __XFS_SCRUB_LISTXATTR_H__ + +typedef int (*xchk_xattr_fn)(struct xfs_scrub *sc, struct xfs_inode *ip, + unsigned int attr_flags, const unsigned char *name, + unsigned int namelen, const void *value, unsigned int valuelen, + void *priv); + +int xchk_xattr_walk(struct xfs_scrub *sc, struct xfs_inode *ip, + xchk_xattr_fn attr_fn, void *priv); + +#endif /* __XFS_SCRUB_LISTXATTR_H__ */ diff --git a/fs/xfs/scrub/repair.c b/fs/xfs/scrub/repair.c index 443e62f72481..04aec0e9e4c3 100644 --- a/fs/xfs/scrub/repair.c +++ b/fs/xfs/scrub/repair.c @@ -32,6 +32,9 @@ #include "xfs_reflink.h" #include "xfs_health.h" #include "xfs_buf_mem.h" +#include "xfs_da_format.h" +#include "xfs_da_btree.h" +#include "xfs_attr.h" #include "scrub/scrub.h" #include "scrub/common.h" #include "scrub/trace.h" @@ -39,6 +42,7 @@ #include "scrub/bitmap.h" #include "scrub/stats.h" #include "scrub/xfile.h" +#include "scrub/attr_repair.h" /* * Attempt to repair some metadata, if the metadata is corrupt and userspace @@ -1136,6 +1140,17 @@ xrep_metadata_inode_forks( return error; } + /* Clear the attr forks since metadata shouldn't have that. */ + if (xfs_inode_hasattr(sc->ip)) { + if (!dirty) { + dirty = true; + xfs_trans_ijoin(sc->tp, sc->ip, 0); + } + error = xrep_xattr_reset_fork(sc); + if (error) + return error; + } + /* * If we modified the inode, roll the transaction but don't rejoin the * inode to the new transaction because xrep_bmap_data can do that. @@ -1201,3 +1216,34 @@ xrep_trans_cancel_hook_dummy( current->journal_info = *cookiep; *cookiep = NULL; } + +/* + * See if this buffer can pass the given ->verify_struct() function. + * + * If the buffer already has ops attached and they're not the ones that were + * passed in, we reject the buffer. Otherwise, we perform the structure test + * (note that we do not check CRCs) and return the outcome of the test. The + * buffer ops and error state are left unchanged. + */ +bool +xrep_buf_verify_struct( + struct xfs_buf *bp, + const struct xfs_buf_ops *ops) +{ + const struct xfs_buf_ops *old_ops = bp->b_ops; + xfs_failaddr_t fa; + int old_error; + + if (old_ops) { + if (old_ops != ops) + return false; + } + + old_error = bp->b_error; + bp->b_ops = ops; + fa = bp->b_ops->verify_struct(bp); + bp->b_ops = old_ops; + bp->b_error = old_error; + + return fa == NULL; +} diff --git a/fs/xfs/scrub/repair.h b/fs/xfs/scrub/repair.h index 0e2b695ab8f6..9cbfd8da5620 100644 --- a/fs/xfs/scrub/repair.h +++ b/fs/xfs/scrub/repair.h @@ -90,6 +90,7 @@ int xrep_bmap(struct xfs_scrub *sc, int whichfork, bool allow_unwritten); int xrep_metadata_inode_forks(struct xfs_scrub *sc); int xrep_setup_ag_rmapbt(struct xfs_scrub *sc); int xrep_setup_ag_refcountbt(struct xfs_scrub *sc); +int xrep_setup_xattr(struct xfs_scrub *sc); /* Repair setup functions */ int xrep_setup_ag_allocbt(struct xfs_scrub *sc); @@ -123,6 +124,7 @@ int xrep_bmap_attr(struct xfs_scrub *sc); int xrep_bmap_cow(struct xfs_scrub *sc); int xrep_nlinks(struct xfs_scrub *sc); int xrep_fscounters(struct xfs_scrub *sc); +int xrep_xattr(struct xfs_scrub *sc); #ifdef CONFIG_XFS_RT int xrep_rtbitmap(struct xfs_scrub *sc); @@ -147,6 +149,8 @@ int xrep_trans_alloc_hook_dummy(struct xfs_mount *mp, void **cookiep, struct xfs_trans **tpp); void xrep_trans_cancel_hook_dummy(void **cookiep, struct xfs_trans *tp); +bool xrep_buf_verify_struct(struct xfs_buf *bp, const struct xfs_buf_ops *ops); + #else #define xrep_ino_dqattach(sc) (0) @@ -190,6 +194,7 @@ xrep_setup_nothing( #define xrep_setup_ag_allocbt xrep_setup_nothing #define xrep_setup_ag_rmapbt xrep_setup_nothing #define xrep_setup_ag_refcountbt xrep_setup_nothing +#define xrep_setup_xattr xrep_setup_nothing #define xrep_setup_inode(sc, imap) ((void)0) @@ -215,6 +220,7 @@ xrep_setup_nothing( #define xrep_nlinks xrep_notsupported #define xrep_fscounters xrep_notsupported #define xrep_rtsummary xrep_notsupported +#define xrep_xattr xrep_notsupported #endif /* CONFIG_XFS_ONLINE_REPAIR */ diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c index 62a064c1a5d3..547189a14b6b 100644 --- a/fs/xfs/scrub/scrub.c +++ b/fs/xfs/scrub/scrub.c @@ -331,7 +331,7 @@ static const struct xchk_meta_ops meta_scrub_ops[] = { .type = ST_INODE, .setup = xchk_setup_xattr, .scrub = xchk_xattr, - .repair = xrep_notsupported, + .repair = xrep_xattr, }, [XFS_SCRUB_TYPE_SYMLINK] = { /* symbolic link */ .type = ST_INODE, diff --git a/fs/xfs/scrub/tempexch.h b/fs/xfs/scrub/tempexch.h index 98222b684b6a..c1dd4adec4f1 100644 --- a/fs/xfs/scrub/tempexch.h +++ b/fs/xfs/scrub/tempexch.h @@ -14,6 +14,8 @@ struct xrep_tempexch { int xrep_tempexch_enable(struct xfs_scrub *sc); int xrep_tempexch_trans_reserve(struct xfs_scrub *sc, int whichfork, struct xrep_tempexch *ti); +int xrep_tempexch_trans_alloc(struct xfs_scrub *sc, int whichfork, + struct xrep_tempexch *ti); int xrep_tempexch_contents(struct xfs_scrub *sc, struct xrep_tempexch *ti); #endif /* CONFIG_XFS_ONLINE_REPAIR */ diff --git a/fs/xfs/scrub/tempfile.c b/fs/xfs/scrub/tempfile.c index 7791336ca820..0b3060be938f 100644 --- a/fs/xfs/scrub/tempfile.c +++ b/fs/xfs/scrub/tempfile.c @@ -239,6 +239,28 @@ xrep_tempfile_iunlock( sc->temp_ilock_flags &= ~XFS_ILOCK_EXCL; } +/* + * Begin the process of making changes to both the file being scrubbed and + * the temporary file by taking ILOCK_EXCL on both. + */ +void +xrep_tempfile_ilock_both( + struct xfs_scrub *sc) +{ + xfs_lock_two_inodes(sc->ip, XFS_ILOCK_EXCL, sc->tempip, XFS_ILOCK_EXCL); + sc->ilock_flags |= XFS_ILOCK_EXCL; + sc->temp_ilock_flags |= XFS_ILOCK_EXCL; +} + +/* Unlock ILOCK_EXCL on both files. */ +void +xrep_tempfile_iunlock_both( + struct xfs_scrub *sc) +{ + xrep_tempfile_iunlock(sc); + xchk_iunlock(sc, XFS_ILOCK_EXCL); +} + /* Release the temporary file. */ void xrep_tempfile_rele( @@ -514,6 +536,89 @@ xrep_tempexch_prep_request( return 0; } +/* + * Fill out the mapping exchange resource estimation structures in preparation + * for exchanging the contents of a metadata file that we've rebuilt in the + * temp file. Caller must hold IOLOCK_EXCL but not ILOCK_EXCL on both files. + */ +STATIC int +xrep_tempexch_estimate( + struct xfs_scrub *sc, + struct xrep_tempexch *tx) +{ + struct xfs_exchmaps_req *req = &tx->req; + struct xfs_ifork *ifp; + struct xfs_ifork *tifp; + int whichfork = xfs_exchmaps_reqfork(req); + int state = 0; + + /* + * The exchmaps code only knows how to exchange file fork space + * mappings. Any fork data in local format must be promoted to a + * single block before the exchange can take place. + */ + ifp = xfs_ifork_ptr(sc->ip, whichfork); + if (ifp->if_format == XFS_DINODE_FMT_LOCAL) + state |= 1; + + tifp = xfs_ifork_ptr(sc->tempip, whichfork); + if (tifp->if_format == XFS_DINODE_FMT_LOCAL) + state |= 2; + + switch (state) { + case 0: + /* Both files have mapped extents; use the regular estimate. */ + return xfs_exchrange_estimate(req); + case 1: + /* + * The file being repaired is in local format, but the temp + * file has mapped extents. To perform the exchange, the file + * being repaired must have its shorform data converted to an + * ondisk block so that the forks will be in extents format. + * We need one resblk for the conversion; the number of + * exchanges is (worst case) the temporary file's extent count + * plus the block we converted. + */ + req->ip1_bcount = sc->tempip->i_nblocks; + req->ip2_bcount = 1; + req->nr_exchanges = 1 + tifp->if_nextents; + req->resblks = 1; + break; + case 2: + /* + * The temporary file is in local format, but the file being + * repaired has mapped extents. To perform the exchange, the + * temp file must have its shortform data converted to an + * ondisk block, and the fork changed to extents format. We + * need one resblk for the conversion; the number of exchanges + * is (worst case) the extent count of the file being repaired + * plus the block we converted. + */ + req->ip1_bcount = 1; + req->ip2_bcount = sc->ip->i_nblocks; + req->nr_exchanges = 1 + ifp->if_nextents; + req->resblks = 1; + break; + case 3: + /* + * Both forks are in local format. To perform the exchange, + * both files must have their shortform data converted to + * fsblocks, and both forks must be converted to extents + * format. We need two resblks for the two conversions, and + * the number of exchanges is 1 since there's only one block at + * fileoff 0. Presumably, the caller could not exchange the + * two inode fork areas directly. + */ + req->ip1_bcount = 1; + req->ip2_bcount = 1; + req->nr_exchanges = 1; + req->resblks = 2; + break; + } + + return xfs_exchmaps_estimate_overhead(req); +} + /* * Obtain a quota reservation to make sure we don't hit EDQUOT. We can skip * this if quota enforcement is disabled or if both inodes' dquots are the @@ -604,6 +709,55 @@ xrep_tempexch_trans_reserve( return xrep_tempexch_reserve_quota(sc, tx); } +/* + * Create a new transaction for a file contents exchange. + * + * This function fills out the mapping excahange request and resource + * estimation structures in preparation for exchanging the contents of a + * metadata file that has been rebuilt in the temp file. Next, it reserves + * space, takes ILOCK_EXCL of both inodes, joins them to the transaction and + * reserves quota for the transaction. + * + * The caller is responsible for dropping both ILOCKs when appropriate. + */ +int +xrep_tempexch_trans_alloc( + struct xfs_scrub *sc, + int whichfork, + struct xrep_tempexch *tx) +{ + unsigned int flags = 0; + int error; + + ASSERT(sc->tp == NULL); + + error = xrep_tempexch_prep_request(sc, whichfork, tx); + if (error) + return error; + + error = xrep_tempexch_estimate(sc, tx); + if (error) + return error; + + if (xfs_has_lazysbcount(sc->mp)) + flags |= XFS_TRANS_RES_FDBLKS; + + error = xrep_tempexch_enable(sc); + if (error) + return error; + + error = xfs_trans_alloc(sc->mp, &M_RES(sc->mp)->tr_itruncate, + tx->req.resblks, 0, flags, &sc->tp); + if (error) + return error; + + sc->temp_ilock_flags |= XFS_ILOCK_EXCL; + sc->ilock_flags |= XFS_ILOCK_EXCL; + xfs_exchrange_ilock(sc->tp, sc->ip, sc->tempip); + + return xrep_tempexch_reserve_quota(sc, tx); +} + /* * Exchange file mappings (and hence file contents) between the file being * repaired and the temporary file. Returns with both inodes locked and joined @@ -637,3 +791,53 @@ xrep_tempexch_contents( return 0; } + +/* + * Write local format data from one of the temporary file's forks into the same + * fork of file being repaired, and exchange the file sizes, if appropriate. + * Caller must ensure that the file being repaired has enough fork space to + * hold all the bytes. + */ +void +xrep_tempfile_copyout_local( + struct xfs_scrub *sc, + int whichfork) +{ + struct xfs_ifork *temp_ifp; + struct xfs_ifork *ifp; + unsigned int ilog_flags = XFS_ILOG_CORE; + + temp_ifp = xfs_ifork_ptr(sc->tempip, whichfork); + ifp = xfs_ifork_ptr(sc->ip, whichfork); + + ASSERT(temp_ifp != NULL); + ASSERT(ifp != NULL); + ASSERT(temp_ifp->if_format == XFS_DINODE_FMT_LOCAL); + ASSERT(ifp->if_format == XFS_DINODE_FMT_LOCAL); + + switch (whichfork) { + case XFS_DATA_FORK: + ASSERT(sc->tempip->i_disk_size <= + xfs_inode_data_fork_size(sc->ip)); + break; + case XFS_ATTR_FORK: + ASSERT(sc->tempip->i_forkoff >= sc->ip->i_forkoff); + break; + default: + ASSERT(0); + return; + } + + /* Recreate @sc->ip's incore fork (ifp) with data from temp_ifp. */ + xfs_idestroy_fork(ifp); + xfs_init_local_fork(sc->ip, whichfork, temp_ifp->if_data, + temp_ifp->if_bytes); + + if (whichfork == XFS_DATA_FORK) { + i_size_write(VFS_I(sc->ip), i_size_read(VFS_I(sc->tempip))); + sc->ip->i_disk_size = sc->tempip->i_disk_size; + } + + ilog_flags |= xfs_ilog_fdata(whichfork); + xfs_trans_log_inode(sc->tp, sc->ip, ilog_flags); +} diff --git a/fs/xfs/scrub/tempfile.h b/fs/xfs/scrub/tempfile.h index 7980f9c4de55..d57e4f145a7c 100644 --- a/fs/xfs/scrub/tempfile.h +++ b/fs/xfs/scrub/tempfile.h @@ -17,6 +17,8 @@ void xrep_tempfile_iounlock(struct xfs_scrub *sc); void xrep_tempfile_ilock(struct xfs_scrub *sc); bool xrep_tempfile_ilock_nowait(struct xfs_scrub *sc); void xrep_tempfile_iunlock(struct xfs_scrub *sc); +void xrep_tempfile_iunlock_both(struct xfs_scrub *sc); +void xrep_tempfile_ilock_both(struct xfs_scrub *sc); int xrep_tempfile_prealloc(struct xfs_scrub *sc, xfs_fileoff_t off, xfs_filblks_t len); @@ -32,6 +34,7 @@ int xrep_tempfile_copyin(struct xfs_scrub *sc, xfs_fileoff_t off, int xrep_tempfile_set_isize(struct xfs_scrub *sc, unsigned long long isize); int xrep_tempfile_roll_trans(struct xfs_scrub *sc); +void xrep_tempfile_copyout_local(struct xfs_scrub *sc, int whichfork); #else static inline void xrep_tempfile_iolock_both(struct xfs_scrub *sc) { diff --git a/fs/xfs/scrub/trace.h b/fs/xfs/scrub/trace.h index 8d05f2adae3d..ffaff7722bf2 100644 --- a/fs/xfs/scrub/trace.h +++ b/fs/xfs/scrub/trace.h @@ -365,6 +365,7 @@ DEFINE_EVENT(xchk_fblock_error_class, name, \ DEFINE_SCRUB_FBLOCK_ERROR_EVENT(xchk_fblock_error); DEFINE_SCRUB_FBLOCK_ERROR_EVENT(xchk_fblock_warning); +DEFINE_SCRUB_FBLOCK_ERROR_EVENT(xchk_fblock_preen); #ifdef CONFIG_XFS_QUOTA DECLARE_EVENT_CLASS(xchk_dqiter_class, @@ -948,6 +949,7 @@ DEFINE_XFILE_EVENT(xfile_store); DEFINE_XFILE_EVENT(xfile_seek_data); DEFINE_XFILE_EVENT(xfile_get_folio); DEFINE_XFILE_EVENT(xfile_put_folio); +DEFINE_XFILE_EVENT(xfile_discard); TRACE_EVENT(xfarray_create, TP_PROTO(struct xfarray *xfa, unsigned long long required_capacity), @@ -2415,6 +2417,89 @@ TRACE_EVENT(xreap_bmapi_binval_scan, __entry->scan_blocks) ); +TRACE_EVENT(xrep_xattr_recover_leafblock, + TP_PROTO(struct xfs_inode *ip, xfs_dablk_t dabno, uint16_t magic), + TP_ARGS(ip, dabno, magic), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, ino) + __field(xfs_dablk_t, dabno) + __field(uint16_t, magic) + ), + TP_fast_assign( + __entry->dev = ip->i_mount->m_super->s_dev; + __entry->ino = ip->i_ino; + __entry->dabno = dabno; + __entry->magic = magic; + ), + TP_printk("dev %d:%d ino 0x%llx dablk 0x%x magic 0x%x", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino, + __entry->dabno, + __entry->magic) +); + +DECLARE_EVENT_CLASS(xrep_xattr_salvage_class, + TP_PROTO(struct xfs_inode *ip, unsigned int flags, char *name, + unsigned int namelen, unsigned int valuelen), + TP_ARGS(ip, flags, name, namelen, valuelen), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, ino) + __field(unsigned int, flags) + __field(unsigned int, namelen) + __dynamic_array(char, name, namelen) + __field(unsigned int, valuelen) + ), + TP_fast_assign( + __entry->dev = ip->i_mount->m_super->s_dev; + __entry->ino = ip->i_ino; + __entry->flags = flags; + __entry->namelen = namelen; + memcpy(__get_str(name), name, namelen); + __entry->valuelen = valuelen; + ), + TP_printk("dev %d:%d ino 0x%llx flags %s name '%.*s' valuelen 0x%x", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino, + __print_flags(__entry->flags, "|", XFS_ATTR_NAMESPACE_STR), + __entry->namelen, + __get_str(name), + __entry->valuelen) +); +#define DEFINE_XREP_XATTR_SALVAGE_EVENT(name) \ +DEFINE_EVENT(xrep_xattr_salvage_class, name, \ + TP_PROTO(struct xfs_inode *ip, unsigned int flags, char *name, \ + unsigned int namelen, unsigned int valuelen), \ + TP_ARGS(ip, flags, name, namelen, valuelen)) +DEFINE_XREP_XATTR_SALVAGE_EVENT(xrep_xattr_salvage_rec); +DEFINE_XREP_XATTR_SALVAGE_EVENT(xrep_xattr_insert_rec); + +TRACE_EVENT(xrep_xattr_class, + TP_PROTO(struct xfs_inode *ip, struct xfs_inode *arg_ip), + TP_ARGS(ip, arg_ip), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, ino) + __field(xfs_ino_t, src_ino) + ), + TP_fast_assign( + __entry->dev = ip->i_mount->m_super->s_dev; + __entry->ino = ip->i_ino; + __entry->src_ino = arg_ip->i_ino; + ), + TP_printk("dev %d:%d ino 0x%llx src 0x%llx", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino, + __entry->src_ino) +) +#define DEFINE_XREP_XATTR_EVENT(name) \ +DEFINE_EVENT(xrep_xattr_class, name, \ + TP_PROTO(struct xfs_inode *ip, struct xfs_inode *arg_ip), \ + TP_ARGS(ip, arg_ip)) +DEFINE_XREP_XATTR_EVENT(xrep_xattr_rebuild_tree); +DEFINE_XREP_XATTR_EVENT(xrep_xattr_reset_fork); + #endif /* IS_ENABLED(CONFIG_XFS_ONLINE_REPAIR) */ #endif /* _TRACE_XFS_SCRUB_TRACE_H */ diff --git a/fs/xfs/scrub/xfarray.c b/fs/xfs/scrub/xfarray.c index 17c982a4821d..b65cd3fc5ac9 100644 --- a/fs/xfs/scrub/xfarray.c +++ b/fs/xfs/scrub/xfarray.c @@ -1051,3 +1051,20 @@ out_free: kvfree(si); return error; } + +/* How many bytes is this array consuming? */ +unsigned long long +xfarray_bytes( + struct xfarray *array) +{ + return xfile_bytes(array->xfile); +} + +/* Empty the entire array. */ +void +xfarray_truncate( + struct xfarray *array) +{ + xfile_discard(array->xfile, 0, MAX_LFS_FILESIZE); + array->nr = 0; +} diff --git a/fs/xfs/scrub/xfarray.h b/fs/xfs/scrub/xfarray.h index acb2f94c56c1..3b10a58e9f14 100644 --- a/fs/xfs/scrub/xfarray.h +++ b/fs/xfs/scrub/xfarray.h @@ -44,6 +44,8 @@ int xfarray_unset(struct xfarray *array, xfarray_idx_t idx); int xfarray_store(struct xfarray *array, xfarray_idx_t idx, const void *ptr); int xfarray_store_anywhere(struct xfarray *array, const void *ptr); bool xfarray_element_is_null(struct xfarray *array, const void *ptr); +void xfarray_truncate(struct xfarray *array); +unsigned long long xfarray_bytes(struct xfarray *array); /* * Load an array element, but zero the buffer if there's no data because we diff --git a/fs/xfs/scrub/xfblob.c b/fs/xfs/scrub/xfblob.c new file mode 100644 index 000000000000..6ef2a9637f16 --- /dev/null +++ b/fs/xfs/scrub/xfblob.c @@ -0,0 +1,168 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (c) 2021-2024 Oracle. All Rights Reserved. + * Author: Darrick J. Wong <djwong@kernel.org> + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "scrub/scrub.h" +#include "scrub/xfile.h" +#include "scrub/xfarray.h" +#include "scrub/xfblob.h" + +/* + * XFS Blob Storage + * ================ + * Stores and retrieves blobs using an xfile. Objects are appended to the file + * and the offset is returned as a magic cookie for retrieval. + */ + +#define XB_KEY_MAGIC 0xABAADDAD +struct xb_key { + uint32_t xb_magic; /* XB_KEY_MAGIC */ + uint32_t xb_size; /* size of the blob, in bytes */ + loff_t xb_offset; /* byte offset of this key */ + /* blob comes after here */ +} __packed; + +/* Initialize a blob storage object. */ +int +xfblob_create( + const char *description, + struct xfblob **blobp) +{ + struct xfblob *blob; + struct xfile *xfile; + int error; + + error = xfile_create(description, 0, &xfile); + if (error) + return error; + + blob = kmalloc(sizeof(struct xfblob), XCHK_GFP_FLAGS); + if (!blob) { + error = -ENOMEM; + goto out_xfile; + } + + blob->xfile = xfile; + blob->last_offset = PAGE_SIZE; + + *blobp = blob; + return 0; + +out_xfile: + xfile_destroy(xfile); + return error; +} + +/* Destroy a blob storage object. */ +void +xfblob_destroy( + struct xfblob *blob) +{ + xfile_destroy(blob->xfile); + kfree(blob); +} + +/* Retrieve a blob. */ +int +xfblob_load( + struct xfblob *blob, + xfblob_cookie cookie, + void *ptr, + uint32_t size) +{ + struct xb_key key; + int error; + + error = xfile_load(blob->xfile, &key, sizeof(key), cookie); + if (error) + return error; + + if (key.xb_magic != XB_KEY_MAGIC || key.xb_offset != cookie) { + ASSERT(0); + return -ENODATA; + } + if (size < key.xb_size) { + ASSERT(0); + return -EFBIG; + } + + return xfile_load(blob->xfile, ptr, key.xb_size, + cookie + sizeof(key)); +} + +/* Store a blob. */ +int +xfblob_store( + struct xfblob *blob, + xfblob_cookie *cookie, + const void *ptr, + uint32_t size) +{ + struct xb_key key = { + .xb_offset = blob->last_offset, + .xb_magic = XB_KEY_MAGIC, + .xb_size = size, + }; + loff_t pos = blob->last_offset; + int error; + + error = xfile_store(blob->xfile, &key, sizeof(key), pos); + if (error) + return error; + + pos += sizeof(key); + error = xfile_store(blob->xfile, ptr, size, pos); + if (error) + goto out_err; + + *cookie = blob->last_offset; + blob->last_offset += sizeof(key) + size; + return 0; +out_err: + xfile_discard(blob->xfile, blob->last_offset, sizeof(key)); + return error; +} + +/* Free a blob. */ +int +xfblob_free( + struct xfblob *blob, + xfblob_cookie cookie) +{ + struct xb_key key; + int error; + + error = xfile_load(blob->xfile, &key, sizeof(key), cookie); + if (error) + return error; + + if (key.xb_magic != XB_KEY_MAGIC || key.xb_offset != cookie) { + ASSERT(0); + return -ENODATA; + } + + xfile_discard(blob->xfile, cookie, sizeof(key) + key.xb_size); + return 0; +} + +/* How many bytes is this blob storage object consuming? */ +unsigned long long +xfblob_bytes( + struct xfblob *blob) +{ + return xfile_bytes(blob->xfile); +} + +/* Drop all the blobs. */ +void +xfblob_truncate( + struct xfblob *blob) +{ + xfile_discard(blob->xfile, PAGE_SIZE, MAX_LFS_FILESIZE - PAGE_SIZE); + blob->last_offset = PAGE_SIZE; +} diff --git a/fs/xfs/scrub/xfblob.h b/fs/xfs/scrub/xfblob.h new file mode 100644 index 000000000000..78a67a06408f --- /dev/null +++ b/fs/xfs/scrub/xfblob.h @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (c) 2021-2024 Oracle. All Rights Reserved. + * Author: Darrick J. Wong <djwong@kernel.org> + */ +#ifndef __XFS_SCRUB_XFBLOB_H__ +#define __XFS_SCRUB_XFBLOB_H__ + +struct xfblob { + struct xfile *xfile; + loff_t last_offset; +}; + +typedef loff_t xfblob_cookie; + +int xfblob_create(const char *descr, struct xfblob **blobp); +void xfblob_destroy(struct xfblob *blob); +int xfblob_load(struct xfblob *blob, xfblob_cookie cookie, void *ptr, + uint32_t size); +int xfblob_store(struct xfblob *blob, xfblob_cookie *cookie, const void *ptr, + uint32_t size); +int xfblob_free(struct xfblob *blob, xfblob_cookie cookie); +unsigned long long xfblob_bytes(struct xfblob *blob); +void xfblob_truncate(struct xfblob *blob); + +#endif /* __XFS_SCRUB_XFBLOB_H__ */ diff --git a/fs/xfs/scrub/xfile.c b/fs/xfs/scrub/xfile.c index 8cdd863db585..4e254a0ba003 100644 --- a/fs/xfs/scrub/xfile.c +++ b/fs/xfs/scrub/xfile.c @@ -310,3 +310,15 @@ xfile_put_folio( folio_unlock(folio); folio_put(folio); } + +/* Discard the page cache that's backing a range of the xfile. */ +void +xfile_discard( + struct xfile *xf, + loff_t pos, + u64 count) +{ + trace_xfile_discard(xf, pos, count); + + shmem_truncate_range(file_inode(xf->file), pos, pos + count - 1); +} diff --git a/fs/xfs/scrub/xfile.h b/fs/xfs/scrub/xfile.h index 76d78dba7e34..cc2cc1714cd4 100644 --- a/fs/xfs/scrub/xfile.h +++ b/fs/xfs/scrub/xfile.h @@ -17,6 +17,7 @@ int xfile_load(struct xfile *xf, void *buf, size_t count, loff_t pos); int xfile_store(struct xfile *xf, const void *buf, size_t count, loff_t pos); +void xfile_discard(struct xfile *xf, loff_t pos, u64 count); loff_t xfile_seek_data(struct xfile *xf, loff_t pos); #define XFILE_MAX_FOLIO_SIZE (PAGE_SIZE << MAX_PAGECACHE_ORDER) @@ -26,4 +27,9 @@ struct folio *xfile_get_folio(struct xfile *xf, loff_t offset, size_t len, unsigned int flags); void xfile_put_folio(struct xfile *xf, struct folio *folio); +static inline unsigned long long xfile_bytes(struct xfile *xf) +{ + return file_inode(xf->file)->i_blocks << SECTOR_SHIFT; +} + #endif /* __XFS_SCRUB_XFILE_H__ */ diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index f0fa02264eda..8a0151e23f3d 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -494,6 +494,9 @@ _xfs_buf_obj_cmp( * it stale has not yet committed. i.e. we are * reallocating a busy extent. Skip this buffer and * continue searching for an exact match. + * + * Note: If we're scanning for incore buffers to stale, don't + * complain if we find non-stale buffers. */ if (!(map->bm_flags & XBM_LIVESCAN)) ASSERT(bp->b_flags & XBF_STALE); diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index 939baf08331b..a80c3063a13f 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -31,6 +31,8 @@ * pos: file offset, in bytes * bytecount: number of bytes * + * dablk: directory or xattr block offset, in filesystem blocks + * * disize: ondisk file size, in bytes * isize: incore file size, in bytes *