b7c62d90c1
Add parent pointer attribute during xfs_create, and subroutines to initialize attributes. Note that the xfs_attr_intent object contains a pointer to the caller's xfs_da_args object, so the latter must persist until transaction commit. Signed-off-by: Dave Chinner <dchinner@redhat.com> Signed-off-by: Allison Henderson <allison.henderson@oracle.com> Reviewed-by: Darrick J. Wong <djwong@kernel.org> [djwong: shorten names, adjust to new format, set init_xattrs for parent pointers] Signed-off-by: Darrick J. Wong <djwong@kernel.org> Reviewed-by: Christoph Hellwig <hch@lst.de>
872 lines
22 KiB
C
872 lines
22 KiB
C
// SPDX-License-Identifier: GPL-2.0-or-later
|
|
/*
|
|
* Copyright (c) 2021-2024 Oracle. All Rights Reserved.
|
|
* Author: Darrick J. Wong <djwong@kernel.org>
|
|
*/
|
|
#include "xfs.h"
|
|
#include "xfs_fs.h"
|
|
#include "xfs_shared.h"
|
|
#include "xfs_format.h"
|
|
#include "xfs_trans_resv.h"
|
|
#include "xfs_mount.h"
|
|
#include "xfs_log_format.h"
|
|
#include "xfs_trans.h"
|
|
#include "xfs_inode.h"
|
|
#include "xfs_ialloc.h"
|
|
#include "xfs_quota.h"
|
|
#include "xfs_bmap.h"
|
|
#include "xfs_bmap_btree.h"
|
|
#include "xfs_trans_space.h"
|
|
#include "xfs_dir2.h"
|
|
#include "xfs_exchrange.h"
|
|
#include "xfs_exchmaps.h"
|
|
#include "xfs_defer.h"
|
|
#include "xfs_symlink_remote.h"
|
|
#include "scrub/scrub.h"
|
|
#include "scrub/common.h"
|
|
#include "scrub/repair.h"
|
|
#include "scrub/trace.h"
|
|
#include "scrub/tempfile.h"
|
|
#include "scrub/tempexch.h"
|
|
#include "scrub/xfile.h"
|
|
|
|
/*
|
|
* Create a temporary file for reconstructing metadata, with the intention of
|
|
* atomically exchanging the temporary file's contents with the file that's
|
|
* being repaired.
|
|
*/
|
|
int
|
|
xrep_tempfile_create(
|
|
struct xfs_scrub *sc,
|
|
uint16_t mode)
|
|
{
|
|
struct xfs_mount *mp = sc->mp;
|
|
struct xfs_trans *tp = NULL;
|
|
struct xfs_dquot *udqp = NULL;
|
|
struct xfs_dquot *gdqp = NULL;
|
|
struct xfs_dquot *pdqp = NULL;
|
|
struct xfs_trans_res *tres;
|
|
struct xfs_inode *dp = mp->m_rootip;
|
|
xfs_ino_t ino;
|
|
unsigned int resblks;
|
|
bool is_dir = S_ISDIR(mode);
|
|
int error;
|
|
|
|
if (xfs_is_shutdown(mp))
|
|
return -EIO;
|
|
if (xfs_is_readonly(mp))
|
|
return -EROFS;
|
|
|
|
ASSERT(sc->tp == NULL);
|
|
ASSERT(sc->tempip == NULL);
|
|
|
|
/*
|
|
* Make sure that we have allocated dquot(s) on disk. The temporary
|
|
* inode should be completely root owned so that we don't fail due to
|
|
* quota limits.
|
|
*/
|
|
error = xfs_qm_vop_dqalloc(dp, GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, 0,
|
|
XFS_QMOPT_QUOTALL, &udqp, &gdqp, &pdqp);
|
|
if (error)
|
|
return error;
|
|
|
|
if (is_dir) {
|
|
resblks = xfs_mkdir_space_res(mp, 0);
|
|
tres = &M_RES(mp)->tr_mkdir;
|
|
} else {
|
|
resblks = XFS_IALLOC_SPACE_RES(mp);
|
|
tres = &M_RES(mp)->tr_create_tmpfile;
|
|
}
|
|
|
|
error = xfs_trans_alloc_icreate(mp, tres, udqp, gdqp, pdqp, resblks,
|
|
&tp);
|
|
if (error)
|
|
goto out_release_dquots;
|
|
|
|
/* Allocate inode, set up directory. */
|
|
error = xfs_dialloc(&tp, dp->i_ino, mode, &ino);
|
|
if (error)
|
|
goto out_trans_cancel;
|
|
error = xfs_init_new_inode(&nop_mnt_idmap, tp, dp, ino, mode, 0, 0,
|
|
0, false, &sc->tempip);
|
|
if (error)
|
|
goto out_trans_cancel;
|
|
|
|
/* Change the ownership of the inode to root. */
|
|
VFS_I(sc->tempip)->i_uid = GLOBAL_ROOT_UID;
|
|
VFS_I(sc->tempip)->i_gid = GLOBAL_ROOT_GID;
|
|
sc->tempip->i_diflags &= ~(XFS_DIFLAG_REALTIME | XFS_DIFLAG_RTINHERIT);
|
|
xfs_trans_log_inode(tp, sc->tempip, XFS_ILOG_CORE);
|
|
|
|
/*
|
|
* Mark our temporary file as private so that LSMs and the ACL code
|
|
* don't try to add their own metadata or reason about these files.
|
|
* The file should never be exposed to userspace.
|
|
*/
|
|
VFS_I(sc->tempip)->i_flags |= S_PRIVATE;
|
|
VFS_I(sc->tempip)->i_opflags &= ~IOP_XATTR;
|
|
|
|
if (is_dir) {
|
|
error = xfs_dir_init(tp, sc->tempip, dp);
|
|
if (error)
|
|
goto out_trans_cancel;
|
|
} else if (S_ISLNK(VFS_I(sc->tempip)->i_mode)) {
|
|
/*
|
|
* Initialize the temporary symlink with a meaningless target
|
|
* that won't trip the verifiers. Repair must rewrite the
|
|
* target with meaningful content before swapping with the file
|
|
* being repaired. A single-byte target will not write a
|
|
* remote target block, so the owner is irrelevant.
|
|
*/
|
|
error = xfs_symlink_write_target(tp, sc->tempip,
|
|
sc->tempip->i_ino, ".", 1, 0, 0);
|
|
if (error)
|
|
goto out_trans_cancel;
|
|
}
|
|
|
|
/*
|
|
* Attach the dquot(s) to the inodes and modify them incore.
|
|
* These ids of the inode couldn't have changed since the new
|
|
* inode has been locked ever since it was created.
|
|
*/
|
|
xfs_qm_vop_create_dqattach(tp, sc->tempip, udqp, gdqp, pdqp);
|
|
|
|
/*
|
|
* Put our temp file on the unlinked list so it's purged automatically.
|
|
* All file-based metadata being reconstructed using this file must be
|
|
* atomically exchanged with the original file because the contents
|
|
* here will be purged when the inode is dropped or log recovery cleans
|
|
* out the unlinked list.
|
|
*/
|
|
error = xfs_iunlink(tp, sc->tempip);
|
|
if (error)
|
|
goto out_trans_cancel;
|
|
|
|
error = xfs_trans_commit(tp);
|
|
if (error)
|
|
goto out_release_inode;
|
|
|
|
trace_xrep_tempfile_create(sc);
|
|
|
|
xfs_qm_dqrele(udqp);
|
|
xfs_qm_dqrele(gdqp);
|
|
xfs_qm_dqrele(pdqp);
|
|
|
|
/* Finish setting up the incore / vfs context. */
|
|
xfs_iunlock(sc->tempip, XFS_ILOCK_EXCL);
|
|
xfs_setup_iops(sc->tempip);
|
|
xfs_finish_inode_setup(sc->tempip);
|
|
|
|
sc->temp_ilock_flags = 0;
|
|
return error;
|
|
|
|
out_trans_cancel:
|
|
xfs_trans_cancel(tp);
|
|
out_release_inode:
|
|
/*
|
|
* Wait until after the current transaction is aborted to finish the
|
|
* setup of the inode and release the inode. This prevents recursive
|
|
* transactions and deadlocks from xfs_inactive.
|
|
*/
|
|
if (sc->tempip) {
|
|
xfs_iunlock(sc->tempip, XFS_ILOCK_EXCL);
|
|
xfs_finish_inode_setup(sc->tempip);
|
|
xchk_irele(sc, sc->tempip);
|
|
}
|
|
out_release_dquots:
|
|
xfs_qm_dqrele(udqp);
|
|
xfs_qm_dqrele(gdqp);
|
|
xfs_qm_dqrele(pdqp);
|
|
|
|
return error;
|
|
}
|
|
|
|
/* Take IOLOCK_EXCL on the temporary file, maybe. */
|
|
bool
|
|
xrep_tempfile_iolock_nowait(
|
|
struct xfs_scrub *sc)
|
|
{
|
|
if (xfs_ilock_nowait(sc->tempip, XFS_IOLOCK_EXCL)) {
|
|
sc->temp_ilock_flags |= XFS_IOLOCK_EXCL;
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
/*
|
|
* Take the temporary file's IOLOCK while holding a different inode's IOLOCK.
|
|
* In theory nobody else should hold the tempfile's IOLOCK, but we use trylock
|
|
* to avoid deadlocks and lockdep complaints.
|
|
*/
|
|
int
|
|
xrep_tempfile_iolock_polled(
|
|
struct xfs_scrub *sc)
|
|
{
|
|
int error = 0;
|
|
|
|
while (!xrep_tempfile_iolock_nowait(sc)) {
|
|
if (xchk_should_terminate(sc, &error))
|
|
return error;
|
|
delay(1);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
/* Release IOLOCK_EXCL on the temporary file. */
|
|
void
|
|
xrep_tempfile_iounlock(
|
|
struct xfs_scrub *sc)
|
|
{
|
|
xfs_iunlock(sc->tempip, XFS_IOLOCK_EXCL);
|
|
sc->temp_ilock_flags &= ~XFS_IOLOCK_EXCL;
|
|
}
|
|
|
|
/* Prepare the temporary file for metadata updates by grabbing ILOCK_EXCL. */
|
|
void
|
|
xrep_tempfile_ilock(
|
|
struct xfs_scrub *sc)
|
|
{
|
|
sc->temp_ilock_flags |= XFS_ILOCK_EXCL;
|
|
xfs_ilock(sc->tempip, XFS_ILOCK_EXCL);
|
|
}
|
|
|
|
/* Try to grab ILOCK_EXCL on the temporary file. */
|
|
bool
|
|
xrep_tempfile_ilock_nowait(
|
|
struct xfs_scrub *sc)
|
|
{
|
|
if (xfs_ilock_nowait(sc->tempip, XFS_ILOCK_EXCL)) {
|
|
sc->temp_ilock_flags |= XFS_ILOCK_EXCL;
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
/* Unlock ILOCK_EXCL on the temporary file after an update. */
|
|
void
|
|
xrep_tempfile_iunlock(
|
|
struct xfs_scrub *sc)
|
|
{
|
|
xfs_iunlock(sc->tempip, XFS_ILOCK_EXCL);
|
|
sc->temp_ilock_flags &= ~XFS_ILOCK_EXCL;
|
|
}
|
|
|
|
/*
|
|
* Begin the process of making changes to both the file being scrubbed and
|
|
* the temporary file by taking ILOCK_EXCL on both.
|
|
*/
|
|
void
|
|
xrep_tempfile_ilock_both(
|
|
struct xfs_scrub *sc)
|
|
{
|
|
xfs_lock_two_inodes(sc->ip, XFS_ILOCK_EXCL, sc->tempip, XFS_ILOCK_EXCL);
|
|
sc->ilock_flags |= XFS_ILOCK_EXCL;
|
|
sc->temp_ilock_flags |= XFS_ILOCK_EXCL;
|
|
}
|
|
|
|
/* Unlock ILOCK_EXCL on both files. */
|
|
void
|
|
xrep_tempfile_iunlock_both(
|
|
struct xfs_scrub *sc)
|
|
{
|
|
xrep_tempfile_iunlock(sc);
|
|
xchk_iunlock(sc, XFS_ILOCK_EXCL);
|
|
}
|
|
|
|
/* Release the temporary file. */
|
|
void
|
|
xrep_tempfile_rele(
|
|
struct xfs_scrub *sc)
|
|
{
|
|
if (!sc->tempip)
|
|
return;
|
|
|
|
if (sc->temp_ilock_flags) {
|
|
xfs_iunlock(sc->tempip, sc->temp_ilock_flags);
|
|
sc->temp_ilock_flags = 0;
|
|
}
|
|
|
|
xchk_irele(sc, sc->tempip);
|
|
sc->tempip = NULL;
|
|
}
|
|
|
|
/*
|
|
* Make sure that the given range of the data fork of the temporary file is
|
|
* mapped to written blocks. The caller must ensure that both inodes are
|
|
* joined to the transaction.
|
|
*/
|
|
int
|
|
xrep_tempfile_prealloc(
|
|
struct xfs_scrub *sc,
|
|
xfs_fileoff_t off,
|
|
xfs_filblks_t len)
|
|
{
|
|
struct xfs_bmbt_irec map;
|
|
xfs_fileoff_t end = off + len;
|
|
int error;
|
|
|
|
ASSERT(sc->tempip != NULL);
|
|
ASSERT(!XFS_NOT_DQATTACHED(sc->mp, sc->tempip));
|
|
|
|
for (; off < end; off = map.br_startoff + map.br_blockcount) {
|
|
int nmaps = 1;
|
|
|
|
/*
|
|
* If we have a real extent mapping this block then we're
|
|
* in ok shape.
|
|
*/
|
|
error = xfs_bmapi_read(sc->tempip, off, end - off, &map, &nmaps,
|
|
XFS_DATA_FORK);
|
|
if (error)
|
|
return error;
|
|
if (nmaps == 0) {
|
|
ASSERT(nmaps != 0);
|
|
return -EFSCORRUPTED;
|
|
}
|
|
|
|
if (xfs_bmap_is_written_extent(&map))
|
|
continue;
|
|
|
|
/*
|
|
* If we find a delalloc reservation then something is very
|
|
* very wrong. Bail out.
|
|
*/
|
|
if (map.br_startblock == DELAYSTARTBLOCK)
|
|
return -EFSCORRUPTED;
|
|
|
|
/*
|
|
* Make sure this block has a real zeroed extent allocated to
|
|
* it.
|
|
*/
|
|
nmaps = 1;
|
|
error = xfs_bmapi_write(sc->tp, sc->tempip, off, end - off,
|
|
XFS_BMAPI_CONVERT | XFS_BMAPI_ZERO, 0, &map,
|
|
&nmaps);
|
|
if (error)
|
|
return error;
|
|
if (nmaps != 1)
|
|
return -EFSCORRUPTED;
|
|
|
|
trace_xrep_tempfile_prealloc(sc, XFS_DATA_FORK, &map);
|
|
|
|
/* Commit new extent and all deferred work. */
|
|
error = xfs_defer_finish(&sc->tp);
|
|
if (error)
|
|
return error;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Write data to each block of a file. The given range of the tempfile's data
|
|
* fork must already be populated with written extents.
|
|
*/
|
|
int
|
|
xrep_tempfile_copyin(
|
|
struct xfs_scrub *sc,
|
|
xfs_fileoff_t off,
|
|
xfs_filblks_t len,
|
|
xrep_tempfile_copyin_fn prep_fn,
|
|
void *data)
|
|
{
|
|
LIST_HEAD(buffers_list);
|
|
struct xfs_mount *mp = sc->mp;
|
|
struct xfs_buf *bp;
|
|
xfs_fileoff_t flush_mask;
|
|
xfs_fileoff_t end = off + len;
|
|
loff_t pos = XFS_FSB_TO_B(mp, off);
|
|
int error = 0;
|
|
|
|
ASSERT(S_ISREG(VFS_I(sc->tempip)->i_mode));
|
|
|
|
/* Flush buffers to disk every 512K */
|
|
flush_mask = XFS_B_TO_FSBT(mp, (1U << 19)) - 1;
|
|
|
|
for (; off < end; off++, pos += mp->m_sb.sb_blocksize) {
|
|
struct xfs_bmbt_irec map;
|
|
int nmaps = 1;
|
|
|
|
/* Read block mapping for this file block. */
|
|
error = xfs_bmapi_read(sc->tempip, off, 1, &map, &nmaps, 0);
|
|
if (error)
|
|
goto out_err;
|
|
if (nmaps == 0 || !xfs_bmap_is_written_extent(&map)) {
|
|
error = -EFSCORRUPTED;
|
|
goto out_err;
|
|
}
|
|
|
|
/* Get the metadata buffer for this offset in the file. */
|
|
error = xfs_trans_get_buf(sc->tp, mp->m_ddev_targp,
|
|
XFS_FSB_TO_DADDR(mp, map.br_startblock),
|
|
mp->m_bsize, 0, &bp);
|
|
if (error)
|
|
goto out_err;
|
|
|
|
trace_xrep_tempfile_copyin(sc, XFS_DATA_FORK, &map);
|
|
|
|
/* Read in a block's worth of data from the xfile. */
|
|
error = prep_fn(sc, bp, data);
|
|
if (error) {
|
|
xfs_trans_brelse(sc->tp, bp);
|
|
goto out_err;
|
|
}
|
|
|
|
/* Queue buffer, and flush if we have too much dirty data. */
|
|
xfs_buf_delwri_queue_here(bp, &buffers_list);
|
|
xfs_trans_brelse(sc->tp, bp);
|
|
|
|
if (!(off & flush_mask)) {
|
|
error = xfs_buf_delwri_submit(&buffers_list);
|
|
if (error)
|
|
goto out_err;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Write the new blocks to disk. If the ordered list isn't empty after
|
|
* that, then something went wrong and we have to fail. This should
|
|
* never happen, but we'll check anyway.
|
|
*/
|
|
error = xfs_buf_delwri_submit(&buffers_list);
|
|
if (error)
|
|
goto out_err;
|
|
|
|
if (!list_empty(&buffers_list)) {
|
|
ASSERT(list_empty(&buffers_list));
|
|
error = -EIO;
|
|
goto out_err;
|
|
}
|
|
|
|
return 0;
|
|
|
|
out_err:
|
|
xfs_buf_delwri_cancel(&buffers_list);
|
|
return error;
|
|
}
|
|
|
|
/*
|
|
* Set the temporary file's size. Caller must join the tempfile to the scrub
|
|
* transaction and is responsible for adjusting block mappings as needed.
|
|
*/
|
|
int
|
|
xrep_tempfile_set_isize(
|
|
struct xfs_scrub *sc,
|
|
unsigned long long isize)
|
|
{
|
|
if (sc->tempip->i_disk_size == isize)
|
|
return 0;
|
|
|
|
sc->tempip->i_disk_size = isize;
|
|
i_size_write(VFS_I(sc->tempip), isize);
|
|
return xrep_tempfile_roll_trans(sc);
|
|
}
|
|
|
|
/*
|
|
* Roll a repair transaction involving the temporary file. Caller must join
|
|
* both the temporary file and the file being scrubbed to the transaction.
|
|
* This function return with both inodes joined to a new scrub transaction,
|
|
* or the usual negative errno.
|
|
*/
|
|
int
|
|
xrep_tempfile_roll_trans(
|
|
struct xfs_scrub *sc)
|
|
{
|
|
int error;
|
|
|
|
xfs_trans_log_inode(sc->tp, sc->tempip, XFS_ILOG_CORE);
|
|
error = xrep_roll_trans(sc);
|
|
if (error)
|
|
return error;
|
|
|
|
xfs_trans_ijoin(sc->tp, sc->tempip, 0);
|
|
return 0;
|
|
}
|
|
|
|
/* Enable file content exchanges. */
|
|
int
|
|
xrep_tempexch_enable(
|
|
struct xfs_scrub *sc)
|
|
{
|
|
if (sc->flags & XREP_FSGATES_EXCHANGE_RANGE)
|
|
return 0;
|
|
|
|
if (!xfs_has_exchange_range(sc->mp))
|
|
return -EOPNOTSUPP;
|
|
|
|
trace_xchk_fsgates_enable(sc, XREP_FSGATES_EXCHANGE_RANGE);
|
|
|
|
sc->flags |= XREP_FSGATES_EXCHANGE_RANGE;
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Fill out the mapping exchange request in preparation for atomically
|
|
* committing the contents of a metadata file that we've rebuilt in the temp
|
|
* file.
|
|
*/
|
|
STATIC int
|
|
xrep_tempexch_prep_request(
|
|
struct xfs_scrub *sc,
|
|
int whichfork,
|
|
struct xrep_tempexch *tx)
|
|
{
|
|
struct xfs_exchmaps_req *req = &tx->req;
|
|
|
|
memset(tx, 0, sizeof(struct xrep_tempexch));
|
|
|
|
/* COW forks don't exist on disk. */
|
|
if (whichfork == XFS_COW_FORK) {
|
|
ASSERT(0);
|
|
return -EINVAL;
|
|
}
|
|
|
|
/* Both files should have the relevant forks. */
|
|
if (!xfs_ifork_ptr(sc->ip, whichfork) ||
|
|
!xfs_ifork_ptr(sc->tempip, whichfork)) {
|
|
ASSERT(xfs_ifork_ptr(sc->ip, whichfork) != NULL);
|
|
ASSERT(xfs_ifork_ptr(sc->tempip, whichfork) != NULL);
|
|
return -EINVAL;
|
|
}
|
|
|
|
/* Exchange all mappings in both forks. */
|
|
req->ip1 = sc->tempip;
|
|
req->ip2 = sc->ip;
|
|
req->startoff1 = 0;
|
|
req->startoff2 = 0;
|
|
switch (whichfork) {
|
|
case XFS_ATTR_FORK:
|
|
req->flags |= XFS_EXCHMAPS_ATTR_FORK;
|
|
break;
|
|
case XFS_DATA_FORK:
|
|
/* Always exchange sizes when exchanging data fork mappings. */
|
|
req->flags |= XFS_EXCHMAPS_SET_SIZES;
|
|
break;
|
|
}
|
|
req->blockcount = XFS_MAX_FILEOFF;
|
|
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Fill out the mapping exchange resource estimation structures in preparation
|
|
* for exchanging the contents of a metadata file that we've rebuilt in the
|
|
* temp file. Caller must hold IOLOCK_EXCL but not ILOCK_EXCL on both files.
|
|
*/
|
|
STATIC int
|
|
xrep_tempexch_estimate(
|
|
struct xfs_scrub *sc,
|
|
struct xrep_tempexch *tx)
|
|
{
|
|
struct xfs_exchmaps_req *req = &tx->req;
|
|
struct xfs_ifork *ifp;
|
|
struct xfs_ifork *tifp;
|
|
int whichfork = xfs_exchmaps_reqfork(req);
|
|
int state = 0;
|
|
|
|
/*
|
|
* The exchmaps code only knows how to exchange file fork space
|
|
* mappings. Any fork data in local format must be promoted to a
|
|
* single block before the exchange can take place.
|
|
*/
|
|
ifp = xfs_ifork_ptr(sc->ip, whichfork);
|
|
if (ifp->if_format == XFS_DINODE_FMT_LOCAL)
|
|
state |= 1;
|
|
|
|
tifp = xfs_ifork_ptr(sc->tempip, whichfork);
|
|
if (tifp->if_format == XFS_DINODE_FMT_LOCAL)
|
|
state |= 2;
|
|
|
|
switch (state) {
|
|
case 0:
|
|
/* Both files have mapped extents; use the regular estimate. */
|
|
return xfs_exchrange_estimate(req);
|
|
case 1:
|
|
/*
|
|
* The file being repaired is in local format, but the temp
|
|
* file has mapped extents. To perform the exchange, the file
|
|
* being repaired must have its shorform data converted to an
|
|
* ondisk block so that the forks will be in extents format.
|
|
* We need one resblk for the conversion; the number of
|
|
* exchanges is (worst case) the temporary file's extent count
|
|
* plus the block we converted.
|
|
*/
|
|
req->ip1_bcount = sc->tempip->i_nblocks;
|
|
req->ip2_bcount = 1;
|
|
req->nr_exchanges = 1 + tifp->if_nextents;
|
|
req->resblks = 1;
|
|
break;
|
|
case 2:
|
|
/*
|
|
* The temporary file is in local format, but the file being
|
|
* repaired has mapped extents. To perform the exchange, the
|
|
* temp file must have its shortform data converted to an
|
|
* ondisk block, and the fork changed to extents format. We
|
|
* need one resblk for the conversion; the number of exchanges
|
|
* is (worst case) the extent count of the file being repaired
|
|
* plus the block we converted.
|
|
*/
|
|
req->ip1_bcount = 1;
|
|
req->ip2_bcount = sc->ip->i_nblocks;
|
|
req->nr_exchanges = 1 + ifp->if_nextents;
|
|
req->resblks = 1;
|
|
break;
|
|
case 3:
|
|
/*
|
|
* Both forks are in local format. To perform the exchange,
|
|
* both files must have their shortform data converted to
|
|
* fsblocks, and both forks must be converted to extents
|
|
* format. We need two resblks for the two conversions, and
|
|
* the number of exchanges is 1 since there's only one block at
|
|
* fileoff 0. Presumably, the caller could not exchange the
|
|
* two inode fork areas directly.
|
|
*/
|
|
req->ip1_bcount = 1;
|
|
req->ip2_bcount = 1;
|
|
req->nr_exchanges = 1;
|
|
req->resblks = 2;
|
|
break;
|
|
}
|
|
|
|
return xfs_exchmaps_estimate_overhead(req);
|
|
}
|
|
|
|
/*
|
|
* Obtain a quota reservation to make sure we don't hit EDQUOT. We can skip
|
|
* this if quota enforcement is disabled or if both inodes' dquots are the
|
|
* same. The qretry structure must be initialized to zeroes before the first
|
|
* call to this function.
|
|
*/
|
|
STATIC int
|
|
xrep_tempexch_reserve_quota(
|
|
struct xfs_scrub *sc,
|
|
const struct xrep_tempexch *tx)
|
|
{
|
|
struct xfs_trans *tp = sc->tp;
|
|
const struct xfs_exchmaps_req *req = &tx->req;
|
|
int64_t ddelta, rdelta;
|
|
int error;
|
|
|
|
/*
|
|
* Don't bother with a quota reservation if we're not enforcing them
|
|
* or the two inodes have the same dquots.
|
|
*/
|
|
if (!XFS_IS_QUOTA_ON(tp->t_mountp) || req->ip1 == req->ip2 ||
|
|
(req->ip1->i_udquot == req->ip2->i_udquot &&
|
|
req->ip1->i_gdquot == req->ip2->i_gdquot &&
|
|
req->ip1->i_pdquot == req->ip2->i_pdquot))
|
|
return 0;
|
|
|
|
/*
|
|
* Quota reservation for each file comes from two sources. First, we
|
|
* need to account for any net gain in mapped blocks during the
|
|
* exchange. Second, we need reservation for the gross gain in mapped
|
|
* blocks so that we don't trip over any quota block reservation
|
|
* assertions. We must reserve the gross gain because the quota code
|
|
* subtracts from bcount the number of blocks that we unmap; it does
|
|
* not add that quantity back to the quota block reservation.
|
|
*/
|
|
ddelta = max_t(int64_t, 0, req->ip2_bcount - req->ip1_bcount);
|
|
rdelta = max_t(int64_t, 0, req->ip2_rtbcount - req->ip1_rtbcount);
|
|
error = xfs_trans_reserve_quota_nblks(tp, req->ip1,
|
|
ddelta + req->ip1_bcount, rdelta + req->ip1_rtbcount,
|
|
true);
|
|
if (error)
|
|
return error;
|
|
|
|
ddelta = max_t(int64_t, 0, req->ip1_bcount - req->ip2_bcount);
|
|
rdelta = max_t(int64_t, 0, req->ip1_rtbcount - req->ip2_rtbcount);
|
|
return xfs_trans_reserve_quota_nblks(tp, req->ip2,
|
|
ddelta + req->ip2_bcount, rdelta + req->ip2_rtbcount,
|
|
true);
|
|
}
|
|
|
|
/*
|
|
* Prepare an existing transaction for an atomic file contents exchange.
|
|
*
|
|
* This function fills out the mapping exchange request and resource estimation
|
|
* structures in preparation for exchanging the contents of a metadata file
|
|
* that has been rebuilt in the temp file. Next, it reserves space and quota
|
|
* for the transaction.
|
|
*
|
|
* The caller must hold ILOCK_EXCL of the scrub target file and the temporary
|
|
* file. The caller must join both inodes to the transaction with no unlock
|
|
* flags, and is responsible for dropping both ILOCKs when appropriate. Only
|
|
* use this when those ILOCKs cannot be dropped.
|
|
*/
|
|
int
|
|
xrep_tempexch_trans_reserve(
|
|
struct xfs_scrub *sc,
|
|
int whichfork,
|
|
struct xrep_tempexch *tx)
|
|
{
|
|
int error;
|
|
|
|
ASSERT(sc->tp != NULL);
|
|
xfs_assert_ilocked(sc->ip, XFS_ILOCK_EXCL);
|
|
xfs_assert_ilocked(sc->tempip, XFS_ILOCK_EXCL);
|
|
|
|
error = xrep_tempexch_prep_request(sc, whichfork, tx);
|
|
if (error)
|
|
return error;
|
|
|
|
error = xfs_exchmaps_estimate(&tx->req);
|
|
if (error)
|
|
return error;
|
|
|
|
error = xfs_trans_reserve_more(sc->tp, tx->req.resblks, 0);
|
|
if (error)
|
|
return error;
|
|
|
|
return xrep_tempexch_reserve_quota(sc, tx);
|
|
}
|
|
|
|
/*
|
|
* Create a new transaction for a file contents exchange.
|
|
*
|
|
* This function fills out the mapping excahange request and resource
|
|
* estimation structures in preparation for exchanging the contents of a
|
|
* metadata file that has been rebuilt in the temp file. Next, it reserves
|
|
* space, takes ILOCK_EXCL of both inodes, joins them to the transaction and
|
|
* reserves quota for the transaction.
|
|
*
|
|
* The caller is responsible for dropping both ILOCKs when appropriate.
|
|
*/
|
|
int
|
|
xrep_tempexch_trans_alloc(
|
|
struct xfs_scrub *sc,
|
|
int whichfork,
|
|
struct xrep_tempexch *tx)
|
|
{
|
|
unsigned int flags = 0;
|
|
int error;
|
|
|
|
ASSERT(sc->tp == NULL);
|
|
|
|
error = xrep_tempexch_prep_request(sc, whichfork, tx);
|
|
if (error)
|
|
return error;
|
|
|
|
error = xrep_tempexch_estimate(sc, tx);
|
|
if (error)
|
|
return error;
|
|
|
|
if (xfs_has_lazysbcount(sc->mp))
|
|
flags |= XFS_TRANS_RES_FDBLKS;
|
|
|
|
error = xrep_tempexch_enable(sc);
|
|
if (error)
|
|
return error;
|
|
|
|
error = xfs_trans_alloc(sc->mp, &M_RES(sc->mp)->tr_itruncate,
|
|
tx->req.resblks, 0, flags, &sc->tp);
|
|
if (error)
|
|
return error;
|
|
|
|
sc->temp_ilock_flags |= XFS_ILOCK_EXCL;
|
|
sc->ilock_flags |= XFS_ILOCK_EXCL;
|
|
xfs_exchrange_ilock(sc->tp, sc->ip, sc->tempip);
|
|
|
|
return xrep_tempexch_reserve_quota(sc, tx);
|
|
}
|
|
|
|
/*
|
|
* Exchange file mappings (and hence file contents) between the file being
|
|
* repaired and the temporary file. Returns with both inodes locked and joined
|
|
* to a clean scrub transaction.
|
|
*/
|
|
int
|
|
xrep_tempexch_contents(
|
|
struct xfs_scrub *sc,
|
|
struct xrep_tempexch *tx)
|
|
{
|
|
int error;
|
|
|
|
ASSERT(sc->flags & XREP_FSGATES_EXCHANGE_RANGE);
|
|
|
|
xfs_exchange_mappings(sc->tp, &tx->req);
|
|
error = xfs_defer_finish(&sc->tp);
|
|
if (error)
|
|
return error;
|
|
|
|
/*
|
|
* If we exchanged the ondisk sizes of two metadata files, we must
|
|
* exchanged the incore sizes as well.
|
|
*/
|
|
if (tx->req.flags & XFS_EXCHMAPS_SET_SIZES) {
|
|
loff_t temp;
|
|
|
|
temp = i_size_read(VFS_I(sc->ip));
|
|
i_size_write(VFS_I(sc->ip), i_size_read(VFS_I(sc->tempip)));
|
|
i_size_write(VFS_I(sc->tempip), temp);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Write local format data from one of the temporary file's forks into the same
|
|
* fork of file being repaired, and exchange the file sizes, if appropriate.
|
|
* Caller must ensure that the file being repaired has enough fork space to
|
|
* hold all the bytes.
|
|
*/
|
|
void
|
|
xrep_tempfile_copyout_local(
|
|
struct xfs_scrub *sc,
|
|
int whichfork)
|
|
{
|
|
struct xfs_ifork *temp_ifp;
|
|
struct xfs_ifork *ifp;
|
|
unsigned int ilog_flags = XFS_ILOG_CORE;
|
|
|
|
temp_ifp = xfs_ifork_ptr(sc->tempip, whichfork);
|
|
ifp = xfs_ifork_ptr(sc->ip, whichfork);
|
|
|
|
ASSERT(temp_ifp != NULL);
|
|
ASSERT(ifp != NULL);
|
|
ASSERT(temp_ifp->if_format == XFS_DINODE_FMT_LOCAL);
|
|
ASSERT(ifp->if_format == XFS_DINODE_FMT_LOCAL);
|
|
|
|
switch (whichfork) {
|
|
case XFS_DATA_FORK:
|
|
ASSERT(sc->tempip->i_disk_size <=
|
|
xfs_inode_data_fork_size(sc->ip));
|
|
break;
|
|
case XFS_ATTR_FORK:
|
|
ASSERT(sc->tempip->i_forkoff >= sc->ip->i_forkoff);
|
|
break;
|
|
default:
|
|
ASSERT(0);
|
|
return;
|
|
}
|
|
|
|
/* Recreate @sc->ip's incore fork (ifp) with data from temp_ifp. */
|
|
xfs_idestroy_fork(ifp);
|
|
xfs_init_local_fork(sc->ip, whichfork, temp_ifp->if_data,
|
|
temp_ifp->if_bytes);
|
|
|
|
if (whichfork == XFS_DATA_FORK) {
|
|
i_size_write(VFS_I(sc->ip), i_size_read(VFS_I(sc->tempip)));
|
|
sc->ip->i_disk_size = sc->tempip->i_disk_size;
|
|
}
|
|
|
|
ilog_flags |= xfs_ilog_fdata(whichfork);
|
|
xfs_trans_log_inode(sc->tp, sc->ip, ilog_flags);
|
|
}
|
|
|
|
/* Decide if a given XFS inode is a temporary file for a repair. */
|
|
bool
|
|
xrep_is_tempfile(
|
|
const struct xfs_inode *ip)
|
|
{
|
|
const struct inode *inode = &ip->i_vnode;
|
|
|
|
if (IS_PRIVATE(inode) && !(inode->i_opflags & IOP_XATTR))
|
|
return true;
|
|
|
|
return false;
|
|
}
|