From e81ce4241318d5ec943b4fe205953498a66ca071 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 15 Apr 2024 14:54:31 -0700 Subject: [PATCH 1/3] xfs: support preallocating and copying content into temporary files Create the routines we need to preallocate space in a temporary ondisk file and then copy the contents of an xfile into the tempfile. The upcoming rtsummary repair feature will construct the contents of a realtime summary file in memory, after which it will want to copy all that into the ondisk temporary file before atomically committing the new rtsummary contents. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/scrub/tempfile.c | 197 ++++++++++++++++++++++++++++++++++++++++ fs/xfs/scrub/tempfile.h | 15 +++ fs/xfs/scrub/trace.h | 39 ++++++++ 3 files changed, 251 insertions(+) diff --git a/fs/xfs/scrub/tempfile.c b/fs/xfs/scrub/tempfile.c index 68d245749bc1..83e683e16561 100644 --- a/fs/xfs/scrub/tempfile.c +++ b/fs/xfs/scrub/tempfile.c @@ -14,14 +14,18 @@ #include "xfs_inode.h" #include "xfs_ialloc.h" #include "xfs_quota.h" +#include "xfs_bmap.h" #include "xfs_bmap_btree.h" #include "xfs_trans_space.h" #include "xfs_dir2.h" #include "xfs_exchrange.h" +#include "xfs_defer.h" #include "scrub/scrub.h" #include "scrub/common.h" +#include "scrub/repair.h" #include "scrub/trace.h" #include "scrub/tempfile.h" +#include "scrub/xfile.h" /* * Create a temporary file for reconstructing metadata, with the intention of @@ -249,3 +253,196 @@ xrep_tempfile_rele( xchk_irele(sc, sc->tempip); sc->tempip = NULL; } + +/* + * Make sure that the given range of the data fork of the temporary file is + * mapped to written blocks. The caller must ensure that both inodes are + * joined to the transaction. + */ +int +xrep_tempfile_prealloc( + struct xfs_scrub *sc, + xfs_fileoff_t off, + xfs_filblks_t len) +{ + struct xfs_bmbt_irec map; + xfs_fileoff_t end = off + len; + int error; + + ASSERT(sc->tempip != NULL); + ASSERT(!XFS_NOT_DQATTACHED(sc->mp, sc->tempip)); + + for (; off < end; off = map.br_startoff + map.br_blockcount) { + int nmaps = 1; + + /* + * If we have a real extent mapping this block then we're + * in ok shape. + */ + error = xfs_bmapi_read(sc->tempip, off, end - off, &map, &nmaps, + XFS_DATA_FORK); + if (error) + return error; + if (nmaps == 0) { + ASSERT(nmaps != 0); + return -EFSCORRUPTED; + } + + if (xfs_bmap_is_written_extent(&map)) + continue; + + /* + * If we find a delalloc reservation then something is very + * very wrong. Bail out. + */ + if (map.br_startblock == DELAYSTARTBLOCK) + return -EFSCORRUPTED; + + /* + * Make sure this block has a real zeroed extent allocated to + * it. + */ + nmaps = 1; + error = xfs_bmapi_write(sc->tp, sc->tempip, off, end - off, + XFS_BMAPI_CONVERT | XFS_BMAPI_ZERO, 0, &map, + &nmaps); + if (error) + return error; + if (nmaps != 1) + return -EFSCORRUPTED; + + trace_xrep_tempfile_prealloc(sc, XFS_DATA_FORK, &map); + + /* Commit new extent and all deferred work. */ + error = xfs_defer_finish(&sc->tp); + if (error) + return error; + } + + return 0; +} + +/* + * Write data to each block of a file. The given range of the tempfile's data + * fork must already be populated with written extents. + */ +int +xrep_tempfile_copyin( + struct xfs_scrub *sc, + xfs_fileoff_t off, + xfs_filblks_t len, + xrep_tempfile_copyin_fn prep_fn, + void *data) +{ + LIST_HEAD(buffers_list); + struct xfs_mount *mp = sc->mp; + struct xfs_buf *bp; + xfs_fileoff_t flush_mask; + xfs_fileoff_t end = off + len; + loff_t pos = XFS_FSB_TO_B(mp, off); + int error = 0; + + ASSERT(S_ISREG(VFS_I(sc->tempip)->i_mode)); + + /* Flush buffers to disk every 512K */ + flush_mask = XFS_B_TO_FSBT(mp, (1U << 19)) - 1; + + for (; off < end; off++, pos += mp->m_sb.sb_blocksize) { + struct xfs_bmbt_irec map; + int nmaps = 1; + + /* Read block mapping for this file block. */ + error = xfs_bmapi_read(sc->tempip, off, 1, &map, &nmaps, 0); + if (error) + goto out_err; + if (nmaps == 0 || !xfs_bmap_is_written_extent(&map)) { + error = -EFSCORRUPTED; + goto out_err; + } + + /* Get the metadata buffer for this offset in the file. */ + error = xfs_trans_get_buf(sc->tp, mp->m_ddev_targp, + XFS_FSB_TO_DADDR(mp, map.br_startblock), + mp->m_bsize, 0, &bp); + if (error) + goto out_err; + + trace_xrep_tempfile_copyin(sc, XFS_DATA_FORK, &map); + + /* Read in a block's worth of data from the xfile. */ + error = prep_fn(sc, bp, data); + if (error) { + xfs_trans_brelse(sc->tp, bp); + goto out_err; + } + + /* Queue buffer, and flush if we have too much dirty data. */ + xfs_buf_delwri_queue_here(bp, &buffers_list); + xfs_trans_brelse(sc->tp, bp); + + if (!(off & flush_mask)) { + error = xfs_buf_delwri_submit(&buffers_list); + if (error) + goto out_err; + } + } + + /* + * Write the new blocks to disk. If the ordered list isn't empty after + * that, then something went wrong and we have to fail. This should + * never happen, but we'll check anyway. + */ + error = xfs_buf_delwri_submit(&buffers_list); + if (error) + goto out_err; + + if (!list_empty(&buffers_list)) { + ASSERT(list_empty(&buffers_list)); + error = -EIO; + goto out_err; + } + + return 0; + +out_err: + xfs_buf_delwri_cancel(&buffers_list); + return error; +} + +/* + * Set the temporary file's size. Caller must join the tempfile to the scrub + * transaction and is responsible for adjusting block mappings as needed. + */ +int +xrep_tempfile_set_isize( + struct xfs_scrub *sc, + unsigned long long isize) +{ + if (sc->tempip->i_disk_size == isize) + return 0; + + sc->tempip->i_disk_size = isize; + i_size_write(VFS_I(sc->tempip), isize); + return xrep_tempfile_roll_trans(sc); +} + +/* + * Roll a repair transaction involving the temporary file. Caller must join + * both the temporary file and the file being scrubbed to the transaction. + * This function return with both inodes joined to a new scrub transaction, + * or the usual negative errno. + */ +int +xrep_tempfile_roll_trans( + struct xfs_scrub *sc) +{ + int error; + + xfs_trans_log_inode(sc->tp, sc->tempip, XFS_ILOG_CORE); + error = xrep_roll_trans(sc); + if (error) + return error; + + xfs_trans_ijoin(sc->tp, sc->tempip, 0); + return 0; +} diff --git a/fs/xfs/scrub/tempfile.h b/fs/xfs/scrub/tempfile.h index e165e0a3faf6..7980f9c4de55 100644 --- a/fs/xfs/scrub/tempfile.h +++ b/fs/xfs/scrub/tempfile.h @@ -17,6 +17,21 @@ void xrep_tempfile_iounlock(struct xfs_scrub *sc); void xrep_tempfile_ilock(struct xfs_scrub *sc); bool xrep_tempfile_ilock_nowait(struct xfs_scrub *sc); void xrep_tempfile_iunlock(struct xfs_scrub *sc); + +int xrep_tempfile_prealloc(struct xfs_scrub *sc, xfs_fileoff_t off, + xfs_filblks_t len); + +enum xfs_blft; + +typedef int (*xrep_tempfile_copyin_fn)(struct xfs_scrub *sc, + struct xfs_buf *bp, void *data); + +int xrep_tempfile_copyin(struct xfs_scrub *sc, xfs_fileoff_t off, + xfs_filblks_t len, xrep_tempfile_copyin_fn fn, void *data); + +int xrep_tempfile_set_isize(struct xfs_scrub *sc, unsigned long long isize); + +int xrep_tempfile_roll_trans(struct xfs_scrub *sc); #else static inline void xrep_tempfile_iolock_both(struct xfs_scrub *sc) { diff --git a/fs/xfs/scrub/trace.h b/fs/xfs/scrub/trace.h index cbd70ecd3011..ae90731bf6ad 100644 --- a/fs/xfs/scrub/trace.h +++ b/fs/xfs/scrub/trace.h @@ -2314,6 +2314,45 @@ TRACE_EVENT(xrep_tempfile_create, __entry->temp_inum) ); +DECLARE_EVENT_CLASS(xrep_tempfile_class, + TP_PROTO(struct xfs_scrub *sc, int whichfork, + struct xfs_bmbt_irec *irec), + TP_ARGS(sc, whichfork, irec), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, ino) + __field(int, whichfork) + __field(xfs_fileoff_t, lblk) + __field(xfs_filblks_t, len) + __field(xfs_fsblock_t, pblk) + __field(int, state) + ), + TP_fast_assign( + __entry->dev = sc->mp->m_super->s_dev; + __entry->ino = sc->tempip->i_ino; + __entry->whichfork = whichfork; + __entry->lblk = irec->br_startoff; + __entry->len = irec->br_blockcount; + __entry->pblk = irec->br_startblock; + __entry->state = irec->br_state; + ), + TP_printk("dev %d:%d ino 0x%llx whichfork %s fileoff 0x%llx fsbcount 0x%llx startblock 0x%llx state %d", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino, + __print_symbolic(__entry->whichfork, XFS_WHICHFORK_STRINGS), + __entry->lblk, + __entry->len, + __entry->pblk, + __entry->state) +); +#define DEFINE_XREP_TEMPFILE_EVENT(name) \ +DEFINE_EVENT(xrep_tempfile_class, name, \ + TP_PROTO(struct xfs_scrub *sc, int whichfork, \ + struct xfs_bmbt_irec *irec), \ + TP_ARGS(sc, whichfork, irec)) +DEFINE_XREP_TEMPFILE_EVENT(xrep_tempfile_prealloc); +DEFINE_XREP_TEMPFILE_EVENT(xrep_tempfile_copyin); + TRACE_EVENT(xreap_ifork_extent, TP_PROTO(struct xfs_scrub *sc, struct xfs_inode *ip, int whichfork, const struct xfs_bmbt_irec *irec), From 56596d8bffd2f3c80d9844835e6c118d89b67ca9 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 15 Apr 2024 14:54:32 -0700 Subject: [PATCH 2/3] xfs: teach the tempfile to set up atomic file content exchanges Create some new routines to exchange the contents of a temporary file created to stage a repair with another ondisk file. This will be used by the realtime summary repair function to commit atomically the new rtsummary data, which will be staged in the tempfile. The rest of XFS coordinates access to the realtime metadata inodes solely through the ILOCK. For repair to hold its exclusive access to the realtime summary file, it has to allocate a single large transaction and roll it repeatedly throughout the repair while holding the ILOCK. In turn, this means that for now there's only a partial file mapping exchange implementation for the temporary file because we can only work within an existing transaction. For now, the only tempswap functions needed here are to estimate the resource requirements of the exchange, reserve more space/quota to an existing transaction, and kick off the actual exchange. The rest will be added in a later patch in preparation for repairing xattrs and directories. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/scrub/scrub.c | 8 +- fs/xfs/scrub/scrub.h | 7 ++ fs/xfs/scrub/tempexch.h | 21 +++++ fs/xfs/scrub/tempfile.c | 191 ++++++++++++++++++++++++++++++++++++++++ fs/xfs/scrub/trace.h | 1 + 5 files changed, 225 insertions(+), 3 deletions(-) create mode 100644 fs/xfs/scrub/tempexch.h diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c index d9012e9a6afd..ff156edf49a0 100644 --- a/fs/xfs/scrub/scrub.c +++ b/fs/xfs/scrub/scrub.c @@ -149,14 +149,15 @@ xchk_probe( /* Scrub setup and teardown */ +#define FSGATES_MASK (XCHK_FSGATES_ALL | XREP_FSGATES_ALL) static inline void xchk_fsgates_disable( struct xfs_scrub *sc) { - if (!(sc->flags & XCHK_FSGATES_ALL)) + if (!(sc->flags & FSGATES_MASK)) return; - trace_xchk_fsgates_disable(sc, sc->flags & XCHK_FSGATES_ALL); + trace_xchk_fsgates_disable(sc, sc->flags & FSGATES_MASK); if (sc->flags & XCHK_FSGATES_DRAIN) xfs_drain_wait_disable(); @@ -170,8 +171,9 @@ xchk_fsgates_disable( if (sc->flags & XCHK_FSGATES_RMAP) xfs_rmap_hook_disable(); - sc->flags &= ~XCHK_FSGATES_ALL; + sc->flags &= ~FSGATES_MASK; } +#undef FSGATES_MASK /* Free all the resources and finish the transactions. */ STATIC int diff --git a/fs/xfs/scrub/scrub.h b/fs/xfs/scrub/scrub.h index e37d8599718e..d38f0b30416c 100644 --- a/fs/xfs/scrub/scrub.h +++ b/fs/xfs/scrub/scrub.h @@ -131,6 +131,7 @@ struct xfs_scrub { #define XCHK_FSGATES_QUOTA (1U << 4) /* quota live update enabled */ #define XCHK_FSGATES_DIRENTS (1U << 5) /* directory live update enabled */ #define XCHK_FSGATES_RMAP (1U << 6) /* rmapbt live update enabled */ +#define XREP_FSGATES_EXCHANGE_RANGE (1U << 29) /* uses file content exchange */ #define XREP_RESET_PERAG_RESV (1U << 30) /* must reset AG space reservation */ #define XREP_ALREADY_FIXED (1U << 31) /* checking our repair work */ @@ -145,6 +146,12 @@ struct xfs_scrub { XCHK_FSGATES_DIRENTS | \ XCHK_FSGATES_RMAP) +/* + * The sole XREP_FSGATES* flag reflects a log intent item that is protected + * by a log-incompat feature flag. No code patching in use here. + */ +#define XREP_FSGATES_ALL (XREP_FSGATES_EXCHANGE_RANGE) + /* Metadata scrubbers */ int xchk_tester(struct xfs_scrub *sc); int xchk_superblock(struct xfs_scrub *sc); diff --git a/fs/xfs/scrub/tempexch.h b/fs/xfs/scrub/tempexch.h new file mode 100644 index 000000000000..98222b684b6a --- /dev/null +++ b/fs/xfs/scrub/tempexch.h @@ -0,0 +1,21 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (c) 2022-2024 Oracle. All Rights Reserved. + * Author: Darrick J. Wong + */ +#ifndef __XFS_SCRUB_TEMPEXCH_H__ +#define __XFS_SCRUB_TEMPEXCH_H__ + +#ifdef CONFIG_XFS_ONLINE_REPAIR +struct xrep_tempexch { + struct xfs_exchmaps_req req; +}; + +int xrep_tempexch_enable(struct xfs_scrub *sc); +int xrep_tempexch_trans_reserve(struct xfs_scrub *sc, int whichfork, + struct xrep_tempexch *ti); + +int xrep_tempexch_contents(struct xfs_scrub *sc, struct xrep_tempexch *ti); +#endif /* CONFIG_XFS_ONLINE_REPAIR */ + +#endif /* __XFS_SCRUB_TEMPEXCH_H__ */ diff --git a/fs/xfs/scrub/tempfile.c b/fs/xfs/scrub/tempfile.c index 83e683e16561..7791336ca820 100644 --- a/fs/xfs/scrub/tempfile.c +++ b/fs/xfs/scrub/tempfile.c @@ -19,12 +19,14 @@ #include "xfs_trans_space.h" #include "xfs_dir2.h" #include "xfs_exchrange.h" +#include "xfs_exchmaps.h" #include "xfs_defer.h" #include "scrub/scrub.h" #include "scrub/common.h" #include "scrub/repair.h" #include "scrub/trace.h" #include "scrub/tempfile.h" +#include "scrub/tempexch.h" #include "scrub/xfile.h" /* @@ -446,3 +448,192 @@ xrep_tempfile_roll_trans( xfs_trans_ijoin(sc->tp, sc->tempip, 0); return 0; } + +/* Enable file content exchanges. */ +int +xrep_tempexch_enable( + struct xfs_scrub *sc) +{ + if (sc->flags & XREP_FSGATES_EXCHANGE_RANGE) + return 0; + + if (!xfs_has_exchange_range(sc->mp)) + return -EOPNOTSUPP; + + trace_xchk_fsgates_enable(sc, XREP_FSGATES_EXCHANGE_RANGE); + + sc->flags |= XREP_FSGATES_EXCHANGE_RANGE; + return 0; +} + +/* + * Fill out the mapping exchange request in preparation for atomically + * committing the contents of a metadata file that we've rebuilt in the temp + * file. + */ +STATIC int +xrep_tempexch_prep_request( + struct xfs_scrub *sc, + int whichfork, + struct xrep_tempexch *tx) +{ + struct xfs_exchmaps_req *req = &tx->req; + + memset(tx, 0, sizeof(struct xrep_tempexch)); + + /* COW forks don't exist on disk. */ + if (whichfork == XFS_COW_FORK) { + ASSERT(0); + return -EINVAL; + } + + /* Both files should have the relevant forks. */ + if (!xfs_ifork_ptr(sc->ip, whichfork) || + !xfs_ifork_ptr(sc->tempip, whichfork)) { + ASSERT(xfs_ifork_ptr(sc->ip, whichfork) != NULL); + ASSERT(xfs_ifork_ptr(sc->tempip, whichfork) != NULL); + return -EINVAL; + } + + /* Exchange all mappings in both forks. */ + req->ip1 = sc->tempip; + req->ip2 = sc->ip; + req->startoff1 = 0; + req->startoff2 = 0; + switch (whichfork) { + case XFS_ATTR_FORK: + req->flags |= XFS_EXCHMAPS_ATTR_FORK; + break; + case XFS_DATA_FORK: + /* Always exchange sizes when exchanging data fork mappings. */ + req->flags |= XFS_EXCHMAPS_SET_SIZES; + break; + } + req->blockcount = XFS_MAX_FILEOFF; + + return 0; +} + +/* + * Obtain a quota reservation to make sure we don't hit EDQUOT. We can skip + * this if quota enforcement is disabled or if both inodes' dquots are the + * same. The qretry structure must be initialized to zeroes before the first + * call to this function. + */ +STATIC int +xrep_tempexch_reserve_quota( + struct xfs_scrub *sc, + const struct xrep_tempexch *tx) +{ + struct xfs_trans *tp = sc->tp; + const struct xfs_exchmaps_req *req = &tx->req; + int64_t ddelta, rdelta; + int error; + + /* + * Don't bother with a quota reservation if we're not enforcing them + * or the two inodes have the same dquots. + */ + if (!XFS_IS_QUOTA_ON(tp->t_mountp) || req->ip1 == req->ip2 || + (req->ip1->i_udquot == req->ip2->i_udquot && + req->ip1->i_gdquot == req->ip2->i_gdquot && + req->ip1->i_pdquot == req->ip2->i_pdquot)) + return 0; + + /* + * Quota reservation for each file comes from two sources. First, we + * need to account for any net gain in mapped blocks during the + * exchange. Second, we need reservation for the gross gain in mapped + * blocks so that we don't trip over any quota block reservation + * assertions. We must reserve the gross gain because the quota code + * subtracts from bcount the number of blocks that we unmap; it does + * not add that quantity back to the quota block reservation. + */ + ddelta = max_t(int64_t, 0, req->ip2_bcount - req->ip1_bcount); + rdelta = max_t(int64_t, 0, req->ip2_rtbcount - req->ip1_rtbcount); + error = xfs_trans_reserve_quota_nblks(tp, req->ip1, + ddelta + req->ip1_bcount, rdelta + req->ip1_rtbcount, + true); + if (error) + return error; + + ddelta = max_t(int64_t, 0, req->ip1_bcount - req->ip2_bcount); + rdelta = max_t(int64_t, 0, req->ip1_rtbcount - req->ip2_rtbcount); + return xfs_trans_reserve_quota_nblks(tp, req->ip2, + ddelta + req->ip2_bcount, rdelta + req->ip2_rtbcount, + true); +} + +/* + * Prepare an existing transaction for an atomic file contents exchange. + * + * This function fills out the mapping exchange request and resource estimation + * structures in preparation for exchanging the contents of a metadata file + * that has been rebuilt in the temp file. Next, it reserves space and quota + * for the transaction. + * + * The caller must hold ILOCK_EXCL of the scrub target file and the temporary + * file. The caller must join both inodes to the transaction with no unlock + * flags, and is responsible for dropping both ILOCKs when appropriate. Only + * use this when those ILOCKs cannot be dropped. + */ +int +xrep_tempexch_trans_reserve( + struct xfs_scrub *sc, + int whichfork, + struct xrep_tempexch *tx) +{ + int error; + + ASSERT(sc->tp != NULL); + xfs_assert_ilocked(sc->ip, XFS_ILOCK_EXCL); + xfs_assert_ilocked(sc->tempip, XFS_ILOCK_EXCL); + + error = xrep_tempexch_prep_request(sc, whichfork, tx); + if (error) + return error; + + error = xfs_exchmaps_estimate(&tx->req); + if (error) + return error; + + error = xfs_trans_reserve_more(sc->tp, tx->req.resblks, 0); + if (error) + return error; + + return xrep_tempexch_reserve_quota(sc, tx); +} + +/* + * Exchange file mappings (and hence file contents) between the file being + * repaired and the temporary file. Returns with both inodes locked and joined + * to a clean scrub transaction. + */ +int +xrep_tempexch_contents( + struct xfs_scrub *sc, + struct xrep_tempexch *tx) +{ + int error; + + ASSERT(sc->flags & XREP_FSGATES_EXCHANGE_RANGE); + + xfs_exchange_mappings(sc->tp, &tx->req); + error = xfs_defer_finish(&sc->tp); + if (error) + return error; + + /* + * If we exchanged the ondisk sizes of two metadata files, we must + * exchanged the incore sizes as well. + */ + if (tx->req.flags & XFS_EXCHMAPS_SET_SIZES) { + loff_t temp; + + temp = i_size_read(VFS_I(sc->ip)); + i_size_write(VFS_I(sc->ip), i_size_read(VFS_I(sc->tempip))); + i_size_write(VFS_I(sc->tempip), temp); + } + + return 0; +} diff --git a/fs/xfs/scrub/trace.h b/fs/xfs/scrub/trace.h index ae90731bf6ad..8d05f2adae3d 100644 --- a/fs/xfs/scrub/trace.h +++ b/fs/xfs/scrub/trace.h @@ -114,6 +114,7 @@ TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_HEALTHY); { XCHK_FSGATES_QUOTA, "fsgates_quota" }, \ { XCHK_FSGATES_DIRENTS, "fsgates_dirents" }, \ { XCHK_FSGATES_RMAP, "fsgates_rmap" }, \ + { XREP_FSGATES_EXCHANGE_RANGE, "fsgates_exchrange" }, \ { XREP_RESET_PERAG_RESV, "reset_perag_resv" }, \ { XREP_ALREADY_FIXED, "already_fixed" } From abf039e2e4afde98e448253f9a7ecc784a87924d Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 15 Apr 2024 14:54:33 -0700 Subject: [PATCH 3/3] xfs: online repair of realtime summaries Repair the realtime summary data by constructing a new rtsummary file in the scrub temporary file, then atomically swapping the contents. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/Makefile | 1 + fs/xfs/scrub/common.c | 1 + fs/xfs/scrub/repair.h | 3 + fs/xfs/scrub/rtsummary.c | 33 +++--- fs/xfs/scrub/rtsummary.h | 37 +++++++ fs/xfs/scrub/rtsummary_repair.c | 177 ++++++++++++++++++++++++++++++++ fs/xfs/scrub/scrub.c | 3 +- 7 files changed, 239 insertions(+), 16 deletions(-) create mode 100644 fs/xfs/scrub/rtsummary.h create mode 100644 fs/xfs/scrub/rtsummary_repair.c diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile index ae8488ab4d6b..5e3ac7ec8fa5 100644 --- a/fs/xfs/Makefile +++ b/fs/xfs/Makefile @@ -212,6 +212,7 @@ xfs-y += $(addprefix scrub/, \ xfs-$(CONFIG_XFS_RT) += $(addprefix scrub/, \ rtbitmap_repair.o \ + rtsummary_repair.o \ ) xfs-$(CONFIG_XFS_QUOTA) += $(addprefix scrub/, \ diff --git a/fs/xfs/scrub/common.c b/fs/xfs/scrub/common.c index a27d33b6f464..a2da2bef509a 100644 --- a/fs/xfs/scrub/common.c +++ b/fs/xfs/scrub/common.c @@ -31,6 +31,7 @@ #include "xfs_ag.h" #include "xfs_error.h" #include "xfs_quota.h" +#include "xfs_exchmaps.h" #include "scrub/scrub.h" #include "scrub/common.h" #include "scrub/trace.h" diff --git a/fs/xfs/scrub/repair.h b/fs/xfs/scrub/repair.h index ce082d941459..0e2b695ab8f6 100644 --- a/fs/xfs/scrub/repair.h +++ b/fs/xfs/scrub/repair.h @@ -126,8 +126,10 @@ int xrep_fscounters(struct xfs_scrub *sc); #ifdef CONFIG_XFS_RT int xrep_rtbitmap(struct xfs_scrub *sc); +int xrep_rtsummary(struct xfs_scrub *sc); #else # define xrep_rtbitmap xrep_notsupported +# define xrep_rtsummary xrep_notsupported #endif /* CONFIG_XFS_RT */ #ifdef CONFIG_XFS_QUOTA @@ -212,6 +214,7 @@ xrep_setup_nothing( #define xrep_quotacheck xrep_notsupported #define xrep_nlinks xrep_notsupported #define xrep_fscounters xrep_notsupported +#define xrep_rtsummary xrep_notsupported #endif /* CONFIG_XFS_ONLINE_REPAIR */ diff --git a/fs/xfs/scrub/rtsummary.c b/fs/xfs/scrub/rtsummary.c index 5055092bd9e8..3fee603f5244 100644 --- a/fs/xfs/scrub/rtsummary.c +++ b/fs/xfs/scrub/rtsummary.c @@ -17,10 +17,14 @@ #include "xfs_bit.h" #include "xfs_bmap.h" #include "xfs_sb.h" +#include "xfs_exchmaps.h" #include "scrub/scrub.h" #include "scrub/common.h" #include "scrub/trace.h" #include "scrub/xfile.h" +#include "scrub/repair.h" +#include "scrub/tempexch.h" +#include "scrub/rtsummary.h" /* * Realtime Summary @@ -32,18 +36,6 @@ * (potentially large) amount of data in pageable memory. */ -struct xchk_rtsummary { - struct xfs_rtalloc_args args; - - uint64_t rextents; - uint64_t rbmblocks; - uint64_t rsumsize; - unsigned int rsumlevels; - - /* Memory buffer for the summary comparison. */ - union xfs_suminfo_raw words[]; -}; - /* Set us up to check the rtsummary file. */ int xchk_setup_rtsummary( @@ -60,6 +52,12 @@ xchk_setup_rtsummary( return -ENOMEM; sc->buf = rts; + if (xchk_could_repair(sc)) { + error = xrep_setup_rtsummary(sc, rts); + if (error) + return error; + } + /* * Create an xfile to construct a new rtsummary file. The xfile allows * us to avoid pinning kernel memory for this purpose. @@ -70,7 +68,7 @@ xchk_setup_rtsummary( if (error) return error; - error = xchk_trans_alloc(sc, 0); + error = xchk_trans_alloc(sc, rts->resblks); if (error) return error; @@ -135,7 +133,7 @@ xfsum_store( sumoff << XFS_WORDLOG); } -static inline int +inline int xfsum_copyout( struct xfs_scrub *sc, xfs_rtsumoff_t sumoff, @@ -362,7 +360,12 @@ xchk_rtsummary( error = xchk_rtsum_compare(sc); out_rbm: - /* Unlock the rtbitmap since we're done with it. */ + /* + * Unlock the rtbitmap since we're done with it. All other writers of + * the rt free space metadata grab the bitmap and summary ILOCKs in + * that order, so we're still protected against allocation activities + * even if we continue on to the repair function. + */ xfs_iunlock(mp->m_rbmip, XFS_ILOCK_SHARED | XFS_ILOCK_RTBITMAP); return error; } diff --git a/fs/xfs/scrub/rtsummary.h b/fs/xfs/scrub/rtsummary.h new file mode 100644 index 000000000000..e1d50304d8d4 --- /dev/null +++ b/fs/xfs/scrub/rtsummary.h @@ -0,0 +1,37 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (c) 2020-2024 Oracle. All Rights Reserved. + * Author: Darrick J. Wong + */ +#ifndef __XFS_SCRUB_RTSUMMARY_H__ +#define __XFS_SCRUB_RTSUMMARY_H__ + +struct xchk_rtsummary { +#ifdef CONFIG_XFS_ONLINE_REPAIR + struct xrep_tempexch tempexch; +#endif + struct xfs_rtalloc_args args; + + uint64_t rextents; + uint64_t rbmblocks; + uint64_t rsumsize; + unsigned int rsumlevels; + unsigned int resblks; + + /* suminfo position of xfile as we write buffers to disk. */ + xfs_rtsumoff_t prep_wordoff; + + /* Memory buffer for the summary comparison. */ + union xfs_suminfo_raw words[]; +}; + +int xfsum_copyout(struct xfs_scrub *sc, xfs_rtsumoff_t sumoff, + union xfs_suminfo_raw *rawinfo, unsigned int nr_words); + +#ifdef CONFIG_XFS_ONLINE_REPAIR +int xrep_setup_rtsummary(struct xfs_scrub *sc, struct xchk_rtsummary *rts); +#else +# define xrep_setup_rtsummary(sc, rts) (0) +#endif /* CONFIG_XFS_ONLINE_REPAIR */ + +#endif /* __XFS_SCRUB_RTSUMMARY_H__ */ diff --git a/fs/xfs/scrub/rtsummary_repair.c b/fs/xfs/scrub/rtsummary_repair.c new file mode 100644 index 000000000000..c8bb6c4f15d0 --- /dev/null +++ b/fs/xfs/scrub/rtsummary_repair.c @@ -0,0 +1,177 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (c) 2020-2024 Oracle. All Rights Reserved. + * Author: Darrick J. Wong + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "xfs_trans_resv.h" +#include "xfs_mount.h" +#include "xfs_btree.h" +#include "xfs_log_format.h" +#include "xfs_trans.h" +#include "xfs_rtalloc.h" +#include "xfs_inode.h" +#include "xfs_bit.h" +#include "xfs_bmap.h" +#include "xfs_bmap_btree.h" +#include "xfs_exchmaps.h" +#include "xfs_rtbitmap.h" +#include "scrub/scrub.h" +#include "scrub/common.h" +#include "scrub/trace.h" +#include "scrub/repair.h" +#include "scrub/tempfile.h" +#include "scrub/tempexch.h" +#include "scrub/reap.h" +#include "scrub/xfile.h" +#include "scrub/rtsummary.h" + +/* Set us up to repair the rtsummary file. */ +int +xrep_setup_rtsummary( + struct xfs_scrub *sc, + struct xchk_rtsummary *rts) +{ + struct xfs_mount *mp = sc->mp; + unsigned long long blocks; + int error; + + error = xrep_tempfile_create(sc, S_IFREG); + if (error) + return error; + + /* + * If we're doing a repair, we reserve enough blocks to write out a + * completely new summary file, plus twice as many blocks as we would + * need if we can only allocate one block per data fork mapping. This + * should cover the preallocation of the temporary file and exchanging + * the extent mappings. + * + * We cannot use xfs_exchmaps_estimate because we have not yet + * constructed the replacement rtsummary and therefore do not know how + * many extents it will use. By the time we do, we will have a dirty + * transaction (which we cannot drop because we cannot drop the + * rtsummary ILOCK) and cannot ask for more reservation. + */ + blocks = XFS_B_TO_FSB(mp, mp->m_rsumsize); + blocks += xfs_bmbt_calc_size(mp, blocks) * 2; + if (blocks > UINT_MAX) + return -EOPNOTSUPP; + + rts->resblks += blocks; + + /* + * Grab support for atomic file content exchanges before we allocate + * any transactions or grab ILOCKs. + */ + return xrep_tempexch_enable(sc); +} + +static int +xrep_rtsummary_prep_buf( + struct xfs_scrub *sc, + struct xfs_buf *bp, + void *data) +{ + struct xchk_rtsummary *rts = data; + struct xfs_mount *mp = sc->mp; + union xfs_suminfo_raw *ondisk; + int error; + + rts->args.mp = sc->mp; + rts->args.tp = sc->tp; + rts->args.sumbp = bp; + ondisk = xfs_rsumblock_infoptr(&rts->args, 0); + rts->args.sumbp = NULL; + + bp->b_ops = &xfs_rtbuf_ops; + + error = xfsum_copyout(sc, rts->prep_wordoff, ondisk, mp->m_blockwsize); + if (error) + return error; + + rts->prep_wordoff += mp->m_blockwsize; + xfs_trans_buf_set_type(sc->tp, bp, XFS_BLFT_RTSUMMARY_BUF); + return 0; +} + +/* Repair the realtime summary. */ +int +xrep_rtsummary( + struct xfs_scrub *sc) +{ + struct xchk_rtsummary *rts = sc->buf; + struct xfs_mount *mp = sc->mp; + xfs_filblks_t rsumblocks; + int error; + + /* We require the rmapbt to rebuild anything. */ + if (!xfs_has_rmapbt(mp)) + return -EOPNOTSUPP; + + /* Walk away if we disagree on the size of the rt bitmap. */ + if (rts->rbmblocks != mp->m_sb.sb_rbmblocks) + return 0; + + /* Make sure any problems with the fork are fixed. */ + error = xrep_metadata_inode_forks(sc); + if (error) + return error; + + /* + * Try to take ILOCK_EXCL of the temporary file. We had better be the + * only ones holding onto this inode, but we can't block while holding + * the rtsummary file's ILOCK_EXCL. + */ + while (!xrep_tempfile_ilock_nowait(sc)) { + if (xchk_should_terminate(sc, &error)) + return error; + delay(1); + } + + /* Make sure we have space allocated for the entire summary file. */ + rsumblocks = XFS_B_TO_FSB(mp, rts->rsumsize); + xfs_trans_ijoin(sc->tp, sc->ip, 0); + xfs_trans_ijoin(sc->tp, sc->tempip, 0); + error = xrep_tempfile_prealloc(sc, 0, rsumblocks); + if (error) + return error; + + /* Last chance to abort before we start committing fixes. */ + if (xchk_should_terminate(sc, &error)) + return error; + + /* Copy the rtsummary file that we generated. */ + error = xrep_tempfile_copyin(sc, 0, rsumblocks, + xrep_rtsummary_prep_buf, rts); + if (error) + return error; + error = xrep_tempfile_set_isize(sc, rts->rsumsize); + if (error) + return error; + + /* + * Now exchange the contents. Nothing in repair uses the temporary + * buffer, so we can reuse it for the tempfile exchrange information. + */ + error = xrep_tempexch_trans_reserve(sc, XFS_DATA_FORK, &rts->tempexch); + if (error) + return error; + + error = xrep_tempexch_contents(sc, &rts->tempexch); + if (error) + return error; + + /* Reset incore state and blow out the summary cache. */ + if (mp->m_rsum_cache) + memset(mp->m_rsum_cache, 0xFF, mp->m_sb.sb_rbmblocks); + + mp->m_rsumlevels = rts->rsumlevels; + mp->m_rsumsize = rts->rsumsize; + + /* Free the old rtsummary blocks if they're not in use. */ + return xrep_reap_ifork(sc, sc->tempip, XFS_DATA_FORK); +} diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c index ff156edf49a0..62a064c1a5d3 100644 --- a/fs/xfs/scrub/scrub.c +++ b/fs/xfs/scrub/scrub.c @@ -18,6 +18,7 @@ #include "xfs_buf_mem.h" #include "xfs_rmap.h" #include "xfs_exchrange.h" +#include "xfs_exchmaps.h" #include "scrub/scrub.h" #include "scrub/common.h" #include "scrub/trace.h" @@ -354,7 +355,7 @@ static const struct xchk_meta_ops meta_scrub_ops[] = { .type = ST_FS, .setup = xchk_setup_rtsummary, .scrub = xchk_rtsummary, - .repair = xrep_notsupported, + .repair = xrep_rtsummary, }, [XFS_SCRUB_TYPE_UQUOTA] = { /* user quota */ .type = ST_FS,