Bug fixes for 6.7-rc2:
* Fix deadlock arising due to intent items in AIL not being cleared when log recovery fails. * Fix stale data exposure bug when remapping COW fork extents to data fork. * Fix deadlock when data device flush fails. * Fix AGFL minimum size calculation. * Select DEBUG_FS instead of XFS_DEBUG when XFS_ONLINE_SCRUB_STATS is selected. * Fix corruption of log inode's extent count field when NREXT64 feature is enabled. Signed-off-by: Chandan Babu R <chandanbabu@kernel.org> -----BEGIN PGP SIGNATURE----- iHUEABYIAB0WIQQjMC4mbgVeU7MxEIYH7y4RirJu9AUCZVNouAAKCRAH7y4RirJu 9O0mAQDePPSRT8ZrR63dxFZ1AW55q4y9iqgBxWcnKEelmVULPwD/byzoAJ46jvcL qpBHUJ1rUIcd/fGqAEkwfG6hKzD99w8= =G+60 -----END PGP SIGNATURE----- Merge tag 'xfs-6.7-fixes-1' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux Pull xfs fixes from Chandan Babu: - Fix deadlock arising due to intent items in AIL not being cleared when log recovery fails - Fix stale data exposure bug when remapping COW fork extents to data fork - Fix deadlock when data device flush fails - Fix AGFL minimum size calculation - Select DEBUG_FS instead of XFS_DEBUG when XFS_ONLINE_SCRUB_STATS is selected - Fix corruption of log inode's extent count field when NREXT64 feature is enabled * tag 'xfs-6.7-fixes-1' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux: xfs: recovery should not clear di_flushiter unconditionally xfs: inode recovery does not validate the recovered inode xfs: fix again select in kconfig XFS_ONLINE_SCRUB_STATS xfs: fix internal error from AGFL exhaustion xfs: up(ic_sema) if flushing data device fails xfs: only remap the written blocks in xfs_reflink_end_cow_extent XFS: Update MAINTAINERS to catch all XFS documentation xfs: abort intent items when recovery intents fail xfs: factor out xfs_defer_pending_abort
This commit is contained in:
commit
b8f1fa2419
@ -23882,8 +23882,7 @@ T: git git://git.kernel.org/pub/scm/fs/xfs/xfs-linux.git
|
||||
P: Documentation/filesystems/xfs-maintainer-entry-profile.rst
|
||||
F: Documentation/ABI/testing/sysfs-fs-xfs
|
||||
F: Documentation/admin-guide/xfs.rst
|
||||
F: Documentation/filesystems/xfs-delayed-logging-design.rst
|
||||
F: Documentation/filesystems/xfs-self-describing-metadata.rst
|
||||
F: Documentation/filesystems/xfs-*
|
||||
F: fs/xfs/
|
||||
F: include/uapi/linux/dqblk_xfs.h
|
||||
F: include/uapi/linux/fsmap.h
|
||||
|
@ -147,7 +147,7 @@ config XFS_ONLINE_SCRUB_STATS
|
||||
bool "XFS online metadata check usage data collection"
|
||||
default y
|
||||
depends on XFS_ONLINE_SCRUB
|
||||
select XFS_DEBUG
|
||||
select DEBUG_FS
|
||||
help
|
||||
If you say Y here, the kernel will gather usage data about
|
||||
the online metadata check subsystem. This includes the number
|
||||
|
@ -2275,16 +2275,37 @@ xfs_alloc_min_freelist(
|
||||
|
||||
ASSERT(mp->m_alloc_maxlevels > 0);
|
||||
|
||||
/*
|
||||
* For a btree shorter than the maximum height, the worst case is that
|
||||
* every level gets split and a new level is added, then while inserting
|
||||
* another entry to refill the AGFL, every level under the old root gets
|
||||
* split again. This is:
|
||||
*
|
||||
* (full height split reservation) + (AGFL refill split height)
|
||||
* = (current height + 1) + (current height - 1)
|
||||
* = (new height) + (new height - 2)
|
||||
* = 2 * new height - 2
|
||||
*
|
||||
* For a btree of maximum height, the worst case is that every level
|
||||
* under the root gets split, then while inserting another entry to
|
||||
* refill the AGFL, every level under the root gets split again. This is
|
||||
* also:
|
||||
*
|
||||
* 2 * (current height - 1)
|
||||
* = 2 * (new height - 1)
|
||||
* = 2 * new height - 2
|
||||
*/
|
||||
|
||||
/* space needed by-bno freespace btree */
|
||||
min_free = min_t(unsigned int, levels[XFS_BTNUM_BNOi] + 1,
|
||||
mp->m_alloc_maxlevels);
|
||||
mp->m_alloc_maxlevels) * 2 - 2;
|
||||
/* space needed by-size freespace btree */
|
||||
min_free += min_t(unsigned int, levels[XFS_BTNUM_CNTi] + 1,
|
||||
mp->m_alloc_maxlevels);
|
||||
mp->m_alloc_maxlevels) * 2 - 2;
|
||||
/* space needed reverse mapping used space btree */
|
||||
if (xfs_has_rmapbt(mp))
|
||||
min_free += min_t(unsigned int, levels[XFS_BTNUM_RMAPi] + 1,
|
||||
mp->m_rmap_maxlevels);
|
||||
mp->m_rmap_maxlevels) * 2 - 2;
|
||||
|
||||
return min_free;
|
||||
}
|
||||
|
@ -245,26 +245,33 @@ xfs_defer_create_intents(
|
||||
return ret;
|
||||
}
|
||||
|
||||
STATIC void
|
||||
xfs_defer_pending_abort(
|
||||
struct xfs_mount *mp,
|
||||
struct list_head *dop_list)
|
||||
{
|
||||
struct xfs_defer_pending *dfp;
|
||||
const struct xfs_defer_op_type *ops;
|
||||
|
||||
/* Abort intent items that don't have a done item. */
|
||||
list_for_each_entry(dfp, dop_list, dfp_list) {
|
||||
ops = defer_op_types[dfp->dfp_type];
|
||||
trace_xfs_defer_pending_abort(mp, dfp);
|
||||
if (dfp->dfp_intent && !dfp->dfp_done) {
|
||||
ops->abort_intent(dfp->dfp_intent);
|
||||
dfp->dfp_intent = NULL;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Abort all the intents that were committed. */
|
||||
STATIC void
|
||||
xfs_defer_trans_abort(
|
||||
struct xfs_trans *tp,
|
||||
struct list_head *dop_pending)
|
||||
{
|
||||
struct xfs_defer_pending *dfp;
|
||||
const struct xfs_defer_op_type *ops;
|
||||
|
||||
trace_xfs_defer_trans_abort(tp, _RET_IP_);
|
||||
|
||||
/* Abort intent items that don't have a done item. */
|
||||
list_for_each_entry(dfp, dop_pending, dfp_list) {
|
||||
ops = defer_op_types[dfp->dfp_type];
|
||||
trace_xfs_defer_pending_abort(tp->t_mountp, dfp);
|
||||
if (dfp->dfp_intent && !dfp->dfp_done) {
|
||||
ops->abort_intent(dfp->dfp_intent);
|
||||
dfp->dfp_intent = NULL;
|
||||
}
|
||||
}
|
||||
xfs_defer_pending_abort(tp->t_mountp, dop_pending);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -756,12 +763,13 @@ xfs_defer_ops_capture(
|
||||
|
||||
/* Release all resources that we used to capture deferred ops. */
|
||||
void
|
||||
xfs_defer_ops_capture_free(
|
||||
xfs_defer_ops_capture_abort(
|
||||
struct xfs_mount *mp,
|
||||
struct xfs_defer_capture *dfc)
|
||||
{
|
||||
unsigned short i;
|
||||
|
||||
xfs_defer_pending_abort(mp, &dfc->dfc_dfops);
|
||||
xfs_defer_cancel_list(mp, &dfc->dfc_dfops);
|
||||
|
||||
for (i = 0; i < dfc->dfc_held.dr_bufs; i++)
|
||||
@ -802,7 +810,7 @@ xfs_defer_ops_capture_and_commit(
|
||||
/* Commit the transaction and add the capture structure to the list. */
|
||||
error = xfs_trans_commit(tp);
|
||||
if (error) {
|
||||
xfs_defer_ops_capture_free(mp, dfc);
|
||||
xfs_defer_ops_capture_abort(mp, dfc);
|
||||
return error;
|
||||
}
|
||||
|
||||
|
@ -121,7 +121,7 @@ int xfs_defer_ops_capture_and_commit(struct xfs_trans *tp,
|
||||
struct list_head *capture_list);
|
||||
void xfs_defer_ops_continue(struct xfs_defer_capture *d, struct xfs_trans *tp,
|
||||
struct xfs_defer_resources *dres);
|
||||
void xfs_defer_ops_capture_free(struct xfs_mount *mp,
|
||||
void xfs_defer_ops_capture_abort(struct xfs_mount *mp,
|
||||
struct xfs_defer_capture *d);
|
||||
void xfs_defer_resources_rele(struct xfs_defer_resources *dres);
|
||||
|
||||
|
@ -510,6 +510,9 @@ xfs_dinode_verify(
|
||||
if (mode && nextents + naextents > nblocks)
|
||||
return __this_address;
|
||||
|
||||
if (nextents + naextents == 0 && nblocks != 0)
|
||||
return __this_address;
|
||||
|
||||
if (S_ISDIR(mode) && nextents > mp->m_dir_geo->max_extents)
|
||||
return __this_address;
|
||||
|
||||
|
@ -286,6 +286,7 @@ xlog_recover_inode_commit_pass2(
|
||||
struct xfs_log_dinode *ldip;
|
||||
uint isize;
|
||||
int need_free = 0;
|
||||
xfs_failaddr_t fa;
|
||||
|
||||
if (item->ri_buf[0].i_len == sizeof(struct xfs_inode_log_format)) {
|
||||
in_f = item->ri_buf[0].i_addr;
|
||||
@ -369,24 +370,26 @@ xlog_recover_inode_commit_pass2(
|
||||
* superblock flag to determine whether we need to look at di_flushiter
|
||||
* to skip replay when the on disk inode is newer than the log one
|
||||
*/
|
||||
if (!xfs_has_v3inodes(mp) &&
|
||||
ldip->di_flushiter < be16_to_cpu(dip->di_flushiter)) {
|
||||
/*
|
||||
* Deal with the wrap case, DI_MAX_FLUSH is less
|
||||
* than smaller numbers
|
||||
*/
|
||||
if (be16_to_cpu(dip->di_flushiter) == DI_MAX_FLUSH &&
|
||||
ldip->di_flushiter < (DI_MAX_FLUSH >> 1)) {
|
||||
/* do nothing */
|
||||
} else {
|
||||
trace_xfs_log_recover_inode_skip(log, in_f);
|
||||
error = 0;
|
||||
goto out_release;
|
||||
if (!xfs_has_v3inodes(mp)) {
|
||||
if (ldip->di_flushiter < be16_to_cpu(dip->di_flushiter)) {
|
||||
/*
|
||||
* Deal with the wrap case, DI_MAX_FLUSH is less
|
||||
* than smaller numbers
|
||||
*/
|
||||
if (be16_to_cpu(dip->di_flushiter) == DI_MAX_FLUSH &&
|
||||
ldip->di_flushiter < (DI_MAX_FLUSH >> 1)) {
|
||||
/* do nothing */
|
||||
} else {
|
||||
trace_xfs_log_recover_inode_skip(log, in_f);
|
||||
error = 0;
|
||||
goto out_release;
|
||||
}
|
||||
}
|
||||
|
||||
/* Take the opportunity to reset the flush iteration count */
|
||||
ldip->di_flushiter = 0;
|
||||
}
|
||||
|
||||
/* Take the opportunity to reset the flush iteration count */
|
||||
ldip->di_flushiter = 0;
|
||||
|
||||
if (unlikely(S_ISREG(ldip->di_mode))) {
|
||||
if ((ldip->di_format != XFS_DINODE_FMT_EXTENTS) &&
|
||||
@ -528,8 +531,19 @@ out_owner_change:
|
||||
(dip->di_mode != 0))
|
||||
error = xfs_recover_inode_owner_change(mp, dip, in_f,
|
||||
buffer_list);
|
||||
/* re-generate the checksum. */
|
||||
/* re-generate the checksum and validate the recovered inode. */
|
||||
xfs_dinode_calc_crc(log->l_mp, dip);
|
||||
fa = xfs_dinode_verify(log->l_mp, in_f->ilf_ino, dip);
|
||||
if (fa) {
|
||||
XFS_CORRUPTION_ERROR(
|
||||
"Bad dinode after recovery",
|
||||
XFS_ERRLEVEL_LOW, mp, dip, sizeof(*dip));
|
||||
xfs_alert(mp,
|
||||
"Metadata corruption detected at %pS, inode 0x%llx",
|
||||
fa, in_f->ilf_ino);
|
||||
error = -EFSCORRUPTED;
|
||||
goto out_release;
|
||||
}
|
||||
|
||||
ASSERT(bp->b_mount == mp);
|
||||
bp->b_flags |= _XBF_LOGRECOVERY;
|
||||
|
@ -1893,9 +1893,7 @@ xlog_write_iclog(
|
||||
* the buffer manually, the code needs to be kept in sync
|
||||
* with the I/O completion path.
|
||||
*/
|
||||
xlog_state_done_syncing(iclog);
|
||||
up(&iclog->ic_sema);
|
||||
return;
|
||||
goto sync;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1925,20 +1923,17 @@ xlog_write_iclog(
|
||||
* avoid shutdown re-entering this path and erroring out again.
|
||||
*/
|
||||
if (log->l_targ != log->l_mp->m_ddev_targp &&
|
||||
blkdev_issue_flush(log->l_mp->m_ddev_targp->bt_bdev)) {
|
||||
xlog_force_shutdown(log, SHUTDOWN_LOG_IO_ERROR);
|
||||
return;
|
||||
}
|
||||
blkdev_issue_flush(log->l_mp->m_ddev_targp->bt_bdev))
|
||||
goto shutdown;
|
||||
}
|
||||
if (iclog->ic_flags & XLOG_ICL_NEED_FUA)
|
||||
iclog->ic_bio.bi_opf |= REQ_FUA;
|
||||
|
||||
iclog->ic_flags &= ~(XLOG_ICL_NEED_FLUSH | XLOG_ICL_NEED_FUA);
|
||||
|
||||
if (xlog_map_iclog_data(&iclog->ic_bio, iclog->ic_data, count)) {
|
||||
xlog_force_shutdown(log, SHUTDOWN_LOG_IO_ERROR);
|
||||
return;
|
||||
}
|
||||
if (xlog_map_iclog_data(&iclog->ic_bio, iclog->ic_data, count))
|
||||
goto shutdown;
|
||||
|
||||
if (is_vmalloc_addr(iclog->ic_data))
|
||||
flush_kernel_vmap_range(iclog->ic_data, count);
|
||||
|
||||
@ -1959,6 +1954,12 @@ xlog_write_iclog(
|
||||
}
|
||||
|
||||
submit_bio(&iclog->ic_bio);
|
||||
return;
|
||||
shutdown:
|
||||
xlog_force_shutdown(log, SHUTDOWN_LOG_IO_ERROR);
|
||||
sync:
|
||||
xlog_state_done_syncing(iclog);
|
||||
up(&iclog->ic_sema);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -2511,7 +2511,7 @@ xlog_abort_defer_ops(
|
||||
|
||||
list_for_each_entry_safe(dfc, next, capture_list, dfc_list) {
|
||||
list_del_init(&dfc->dfc_list);
|
||||
xfs_defer_ops_capture_free(mp, dfc);
|
||||
xfs_defer_ops_capture_abort(mp, dfc);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -784,6 +784,7 @@ xfs_reflink_end_cow_extent(
|
||||
}
|
||||
}
|
||||
del = got;
|
||||
xfs_trim_extent(&del, *offset_fsb, end_fsb - *offset_fsb);
|
||||
|
||||
/* Grab the corresponding mapping in the data fork. */
|
||||
nmaps = 1;
|
||||
|
Loading…
x
Reference in New Issue
Block a user