More new code for 5.12:

- Restore a disused sysctl control knob that was inadvertently dropped
   during the merge window to avoid fstests regressions.
 - Don't speculatively release freed blocks from the busy list until
   we're actually allocating them, which fixes a rare log recovery
   regression.
 - Don't nest transactions when scanning for free space.
 - Add an idiot^Wmaintainer light to detect nested transactions. ;)
 -----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCgAdFiEEUzaAxoMeQq6m2jMV+H93GTRKtOsFAmA3zW8ACgkQ+H93GTRK
 tOtT4xAAmZ5BdQ6V3yUeT/N++L6Ax62T2VzEryZvVK/ZFyVBRYKi9LOL1exq1cja
 HXINPuYWAD8TbGVU9/lZR1yUX/y1VvJR0EPly8EN6WpGFeErSxLs++YzP1Q8iv5i
 ZtniscpGE6JvCcDeRH5kBfklGpyzTf3t6Xe8x+6+/aawf34ChNlM/gQcAyKvYYU5
 Jb9j7BqbRAnhvPEfa554yxIIoZhmTDYY7Wx7VMKCMcOP1lfriC+I1iuiZIMONIQJ
 mMgz9XnHVo256+YvkvwRKp294r+MEkuJL5EBXrs01r3PwVdaigo13qTk8l1ZC3zS
 VYkC/sRoiyMwnJvKEUNtnM3/8Zu/DvPp9iqXiWc60UBGqpBkm8Jgv+W6H7u1FinP
 0M0Wt2wHC7e51uW5G/8QwUXZv+n8IZHyZkkYbjyXRkhfyFlexYwTVchZz9q/RB/A
 HEZ9jcIke8Rwkav4f0kJ00Y/7FQSPn6ItapXf92rl00z3Z5S2sqBaT5kIotsW0Ke
 634yPknkLuBDQg4j8l3A88ik2SNFRQQfBXsjt27He/s2wV0Dj8RjDnLWfoV7P5to
 Sc2lx3HhL4OCojAXXAFP3MDKz0nqcuUTPoPCeS6QKQGcjTzVvoI7ZutXODcxi67k
 Q7AK+gIqHRWA8F+4wciYDwAHMES1rRAa7/iuYmtCtT1sBdXp9NU=
 =g9K3
 -----END PGP SIGNATURE-----

Merge tag 'xfs-5.12-merge-6' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux

Pull more xfs updates from Darrick Wong:
 "The most notable fix here prevents premature reuse of freed metadata
  blocks, and adding the ability to detect accidental nested
  transactions, which are not allowed here.

   - Restore a disused sysctl control knob that was inadvertently
     dropped during the merge window to avoid fstests regressions.

   - Don't speculatively release freed blocks from the busy list until
     we're actually allocating them, which fixes a rare log recovery
     regression.

   - Don't nest transactions when scanning for free space.

   - Add an idiot^Wmaintainer light to detect nested transactions. ;)"

* tag 'xfs-5.12-merge-6' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux:
  xfs: use current->journal_info for detecting transaction recursion
  xfs: don't nest transactions when scanning for eofblocks
  xfs: don't reuse busy extents on extent trim
  xfs: restore speculative_cow_prealloc_lifetime sysctl
This commit is contained in:
Linus Torvalds 2021-02-28 11:45:25 -08:00
commit 03dc748bf1
8 changed files with 94 additions and 70 deletions

View File

@ -284,6 +284,9 @@ The following sysctls are available for the XFS filesystem:
removes unused preallocation from clean inodes and releases removes unused preallocation from clean inodes and releases
the unused space back to the free pool. the unused space back to the free pool.
fs.xfs.speculative_cow_prealloc_lifetime
This is an alias for speculative_prealloc_lifetime.
fs.xfs.error_level (Min: 0 Default: 3 Max: 11) fs.xfs.error_level (Min: 0 Default: 3 Max: 11)
A volume knob for error reporting when internal errors occur. A volume knob for error reporting when internal errors occur.
This will generate detailed messages & backtraces for filesystem This will generate detailed messages & backtraces for filesystem
@ -356,12 +359,13 @@ The following sysctls are available for the XFS filesystem:
Deprecated Sysctls Deprecated Sysctls
================== ==================
=========================== ================ =========================================== ================
Name Removal Schedule Name Removal Schedule
=========================== ================ =========================================== ================
fs.xfs.irix_sgid_inherit September 2025 fs.xfs.irix_sgid_inherit September 2025
fs.xfs.irix_symlink_mode September 2025 fs.xfs.irix_symlink_mode September 2025
=========================== ================ fs.xfs.speculative_cow_prealloc_lifetime September 2025
=========================================== ================
Removed Sysctls Removed Sysctls

View File

@ -1458,13 +1458,6 @@ iomap_do_writepage(struct page *page, struct writeback_control *wbc, void *data)
PF_MEMALLOC)) PF_MEMALLOC))
goto redirty; goto redirty;
/*
* Given that we do not allow direct reclaim to call us, we should
* never be called in a recursive filesystem reclaim context.
*/
if (WARN_ON_ONCE(current->flags & PF_MEMALLOC_NOFS))
goto redirty;
/* /*
* Is this page beyond the end of the file? * Is this page beyond the end of the file?
* *

View File

@ -2805,7 +2805,7 @@ xfs_btree_split_worker(
struct xfs_btree_split_args *args = container_of(work, struct xfs_btree_split_args *args = container_of(work,
struct xfs_btree_split_args, work); struct xfs_btree_split_args, work);
unsigned long pflags; unsigned long pflags;
unsigned long new_pflags = PF_MEMALLOC_NOFS; unsigned long new_pflags = 0;
/* /*
* we are in a transaction context here, but may also be doing work * we are in a transaction context here, but may also be doing work
@ -2817,12 +2817,20 @@ xfs_btree_split_worker(
new_pflags |= PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD; new_pflags |= PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD;
current_set_flags_nested(&pflags, new_pflags); current_set_flags_nested(&pflags, new_pflags);
xfs_trans_set_context(args->cur->bc_tp);
args->result = __xfs_btree_split(args->cur, args->level, args->ptrp, args->result = __xfs_btree_split(args->cur, args->level, args->ptrp,
args->key, args->curp, args->stat); args->key, args->curp, args->stat);
xfs_trans_clear_context(args->cur->bc_tp);
current_restore_flags_nested(&pflags, new_pflags);
/*
* Do not access args after complete() has run here. We don't own args
* and the owner may run and free args before we return here.
*/
complete(args->done); complete(args->done);
current_restore_flags_nested(&pflags, new_pflags);
} }
/* /*

View File

@ -62,7 +62,7 @@ xfs_setfilesize_trans_alloc(
* We hand off the transaction to the completion thread now, so * We hand off the transaction to the completion thread now, so
* clear the flag here. * clear the flag here.
*/ */
current_restore_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS); xfs_trans_clear_context(tp);
return 0; return 0;
} }
@ -125,7 +125,7 @@ xfs_setfilesize_ioend(
* thus we need to mark ourselves as being in a transaction manually. * thus we need to mark ourselves as being in a transaction manually.
* Similarly for freeze protection. * Similarly for freeze protection.
*/ */
current_set_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS); xfs_trans_set_context(tp);
__sb_writers_acquired(VFS_I(ip)->i_sb, SB_FREEZE_FS); __sb_writers_acquired(VFS_I(ip)->i_sb, SB_FREEZE_FS);
/* we abort the update if there was an IO error */ /* we abort the update if there was an IO error */
@ -568,6 +568,12 @@ xfs_vm_writepage(
{ {
struct xfs_writepage_ctx wpc = { }; struct xfs_writepage_ctx wpc = { };
if (WARN_ON_ONCE(current->journal_info)) {
redirty_page_for_writepage(wbc, page);
unlock_page(page);
return 0;
}
return iomap_writepage(page, wbc, &wpc.ctx, &xfs_writeback_ops); return iomap_writepage(page, wbc, &wpc.ctx, &xfs_writeback_ops);
} }
@ -578,6 +584,13 @@ xfs_vm_writepages(
{ {
struct xfs_writepage_ctx wpc = { }; struct xfs_writepage_ctx wpc = { };
/*
* Writing back data in a transaction context can result in recursive
* transactions. This is bad, so issue a warning and get out of here.
*/
if (WARN_ON_ONCE(current->journal_info))
return 0;
xfs_iflags_clear(XFS_I(mapping->host), XFS_ITRUNCATED); xfs_iflags_clear(XFS_I(mapping->host), XFS_ITRUNCATED);
return iomap_writepages(mapping, wbc, &wpc.ctx, &xfs_writeback_ops); return iomap_writepages(mapping, wbc, &wpc.ctx, &xfs_writeback_ops);
} }

View File

@ -344,7 +344,6 @@ xfs_extent_busy_trim(
ASSERT(*len > 0); ASSERT(*len > 0);
spin_lock(&args->pag->pagb_lock); spin_lock(&args->pag->pagb_lock);
restart:
fbno = *bno; fbno = *bno;
flen = *len; flen = *len;
rbp = args->pag->pagb_tree.rb_node; rbp = args->pag->pagb_tree.rb_node;
@ -363,19 +362,6 @@ restart:
continue; continue;
} }
/*
* If this is a metadata allocation, try to reuse the busy
* extent instead of trimming the allocation.
*/
if (!(args->datatype & XFS_ALLOC_USERDATA) &&
!(busyp->flags & XFS_EXTENT_BUSY_DISCARDED)) {
if (!xfs_extent_busy_update_extent(args->mp, args->pag,
busyp, fbno, flen,
false))
goto restart;
continue;
}
if (bbno <= fbno) { if (bbno <= fbno) {
/* start overlap */ /* start overlap */

View File

@ -51,7 +51,7 @@ xfs_panic_mask_proc_handler(
#endif /* CONFIG_PROC_FS */ #endif /* CONFIG_PROC_FS */
STATIC int STATIC int
xfs_deprecate_irix_sgid_inherit_proc_handler( xfs_deprecated_dointvec_minmax(
struct ctl_table *ctl, struct ctl_table *ctl,
int write, int write,
void *buffer, void *buffer,
@ -59,24 +59,8 @@ xfs_deprecate_irix_sgid_inherit_proc_handler(
loff_t *ppos) loff_t *ppos)
{ {
if (write) { if (write) {
printk_once(KERN_WARNING printk_ratelimited(KERN_WARNING
"XFS: " "%s sysctl option is deprecated.\n", "XFS: %s sysctl option is deprecated.\n",
ctl->procname);
}
return proc_dointvec_minmax(ctl, write, buffer, lenp, ppos);
}
STATIC int
xfs_deprecate_irix_symlink_mode_proc_handler(
struct ctl_table *ctl,
int write,
void *buffer,
size_t *lenp,
loff_t *ppos)
{
if (write) {
printk_once(KERN_WARNING
"XFS: " "%s sysctl option is deprecated.\n",
ctl->procname); ctl->procname);
} }
return proc_dointvec_minmax(ctl, write, buffer, lenp, ppos); return proc_dointvec_minmax(ctl, write, buffer, lenp, ppos);
@ -88,7 +72,7 @@ static struct ctl_table xfs_table[] = {
.data = &xfs_params.sgid_inherit.val, .data = &xfs_params.sgid_inherit.val,
.maxlen = sizeof(int), .maxlen = sizeof(int),
.mode = 0644, .mode = 0644,
.proc_handler = xfs_deprecate_irix_sgid_inherit_proc_handler, .proc_handler = xfs_deprecated_dointvec_minmax,
.extra1 = &xfs_params.sgid_inherit.min, .extra1 = &xfs_params.sgid_inherit.min,
.extra2 = &xfs_params.sgid_inherit.max .extra2 = &xfs_params.sgid_inherit.max
}, },
@ -97,7 +81,7 @@ static struct ctl_table xfs_table[] = {
.data = &xfs_params.symlink_mode.val, .data = &xfs_params.symlink_mode.val,
.maxlen = sizeof(int), .maxlen = sizeof(int),
.mode = 0644, .mode = 0644,
.proc_handler = xfs_deprecate_irix_symlink_mode_proc_handler, .proc_handler = xfs_deprecated_dointvec_minmax,
.extra1 = &xfs_params.symlink_mode.min, .extra1 = &xfs_params.symlink_mode.min,
.extra2 = &xfs_params.symlink_mode.max .extra2 = &xfs_params.symlink_mode.max
}, },
@ -201,6 +185,15 @@ static struct ctl_table xfs_table[] = {
.extra1 = &xfs_params.blockgc_timer.min, .extra1 = &xfs_params.blockgc_timer.min,
.extra2 = &xfs_params.blockgc_timer.max, .extra2 = &xfs_params.blockgc_timer.max,
}, },
{
.procname = "speculative_cow_prealloc_lifetime",
.data = &xfs_params.blockgc_timer.val,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = xfs_deprecated_dointvec_minmax,
.extra1 = &xfs_params.blockgc_timer.min,
.extra2 = &xfs_params.blockgc_timer.max,
},
/* please keep this the last entry */ /* please keep this the last entry */
#ifdef CONFIG_PROC_FS #ifdef CONFIG_PROC_FS
{ {

View File

@ -72,6 +72,7 @@ xfs_trans_free(
xfs_extent_busy_clear(tp->t_mountp, &tp->t_busy, false); xfs_extent_busy_clear(tp->t_mountp, &tp->t_busy, false);
trace_xfs_trans_free(tp, _RET_IP_); trace_xfs_trans_free(tp, _RET_IP_);
xfs_trans_clear_context(tp);
if (!(tp->t_flags & XFS_TRANS_NO_WRITECOUNT)) if (!(tp->t_flags & XFS_TRANS_NO_WRITECOUNT))
sb_end_intwrite(tp->t_mountp->m_super); sb_end_intwrite(tp->t_mountp->m_super);
xfs_trans_free_dqinfo(tp); xfs_trans_free_dqinfo(tp);
@ -123,7 +124,8 @@ xfs_trans_dup(
ntp->t_rtx_res = tp->t_rtx_res - tp->t_rtx_res_used; ntp->t_rtx_res = tp->t_rtx_res - tp->t_rtx_res_used;
tp->t_rtx_res = tp->t_rtx_res_used; tp->t_rtx_res = tp->t_rtx_res_used;
ntp->t_pflags = tp->t_pflags;
xfs_trans_switch_context(tp, ntp);
/* move deferred ops over to the new tp */ /* move deferred ops over to the new tp */
xfs_defer_move(ntp, tp); xfs_defer_move(ntp, tp);
@ -157,9 +159,6 @@ xfs_trans_reserve(
int error = 0; int error = 0;
bool rsvd = (tp->t_flags & XFS_TRANS_RESERVE) != 0; bool rsvd = (tp->t_flags & XFS_TRANS_RESERVE) != 0;
/* Mark this thread as being in a transaction */
current_set_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS);
/* /*
* Attempt to reserve the needed disk blocks by decrementing * Attempt to reserve the needed disk blocks by decrementing
* the number needed from the number available. This will * the number needed from the number available. This will
@ -167,10 +166,8 @@ xfs_trans_reserve(
*/ */
if (blocks > 0) { if (blocks > 0) {
error = xfs_mod_fdblocks(mp, -((int64_t)blocks), rsvd); error = xfs_mod_fdblocks(mp, -((int64_t)blocks), rsvd);
if (error != 0) { if (error != 0)
current_restore_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS);
return -ENOSPC; return -ENOSPC;
}
tp->t_blk_res += blocks; tp->t_blk_res += blocks;
} }
@ -244,9 +241,6 @@ undo_blocks:
xfs_mod_fdblocks(mp, (int64_t)blocks, rsvd); xfs_mod_fdblocks(mp, (int64_t)blocks, rsvd);
tp->t_blk_res = 0; tp->t_blk_res = 0;
} }
current_restore_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS);
return error; return error;
} }
@ -260,6 +254,7 @@ xfs_trans_alloc(
struct xfs_trans **tpp) struct xfs_trans **tpp)
{ {
struct xfs_trans *tp; struct xfs_trans *tp;
bool want_retry = true;
int error; int error;
/* /*
@ -267,9 +262,11 @@ xfs_trans_alloc(
* GFP_NOFS allocation context so that we avoid lockdep false positives * GFP_NOFS allocation context so that we avoid lockdep false positives
* by doing GFP_KERNEL allocations inside sb_start_intwrite(). * by doing GFP_KERNEL allocations inside sb_start_intwrite().
*/ */
retry:
tp = kmem_cache_zalloc(xfs_trans_zone, GFP_KERNEL | __GFP_NOFAIL); tp = kmem_cache_zalloc(xfs_trans_zone, GFP_KERNEL | __GFP_NOFAIL);
if (!(flags & XFS_TRANS_NO_WRITECOUNT)) if (!(flags & XFS_TRANS_NO_WRITECOUNT))
sb_start_intwrite(mp->m_super); sb_start_intwrite(mp->m_super);
xfs_trans_set_context(tp);
/* /*
* Zero-reservation ("empty") transactions can't modify anything, so * Zero-reservation ("empty") transactions can't modify anything, so
@ -289,7 +286,9 @@ xfs_trans_alloc(
tp->t_firstblock = NULLFSBLOCK; tp->t_firstblock = NULLFSBLOCK;
error = xfs_trans_reserve(tp, resp, blocks, rtextents); error = xfs_trans_reserve(tp, resp, blocks, rtextents);
if (error == -ENOSPC) { if (error == -ENOSPC && want_retry) {
xfs_trans_cancel(tp);
/* /*
* We weren't able to reserve enough space for the transaction. * We weren't able to reserve enough space for the transaction.
* Flush the other speculative space allocations to free space. * Flush the other speculative space allocations to free space.
@ -297,8 +296,11 @@ xfs_trans_alloc(
* other locks. * other locks.
*/ */
error = xfs_blockgc_free_space(mp, NULL); error = xfs_blockgc_free_space(mp, NULL);
if (!error) if (error)
error = xfs_trans_reserve(tp, resp, blocks, rtextents); return error;
want_retry = false;
goto retry;
} }
if (error) { if (error) {
xfs_trans_cancel(tp); xfs_trans_cancel(tp);
@ -893,7 +895,6 @@ __xfs_trans_commit(
xfs_log_commit_cil(mp, tp, &commit_lsn, regrant); xfs_log_commit_cil(mp, tp, &commit_lsn, regrant);
current_restore_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS);
xfs_trans_free(tp); xfs_trans_free(tp);
/* /*
@ -925,7 +926,6 @@ out_unreserve:
xfs_log_ticket_ungrant(mp->m_log, tp->t_ticket); xfs_log_ticket_ungrant(mp->m_log, tp->t_ticket);
tp->t_ticket = NULL; tp->t_ticket = NULL;
} }
current_restore_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS);
xfs_trans_free_items(tp, !!error); xfs_trans_free_items(tp, !!error);
xfs_trans_free(tp); xfs_trans_free(tp);
@ -985,9 +985,6 @@ xfs_trans_cancel(
tp->t_ticket = NULL; tp->t_ticket = NULL;
} }
/* mark this thread as no longer being in a transaction */
current_restore_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS);
xfs_trans_free_items(tp, dirty); xfs_trans_free_items(tp, dirty);
xfs_trans_free(tp); xfs_trans_free(tp);
} }

View File

@ -281,4 +281,34 @@ int xfs_trans_alloc_ichange(struct xfs_inode *ip, struct xfs_dquot *udqp,
struct xfs_dquot *gdqp, struct xfs_dquot *pdqp, bool force, struct xfs_dquot *gdqp, struct xfs_dquot *pdqp, bool force,
struct xfs_trans **tpp); struct xfs_trans **tpp);
static inline void
xfs_trans_set_context(
struct xfs_trans *tp)
{
ASSERT(current->journal_info == NULL);
tp->t_pflags = memalloc_nofs_save();
current->journal_info = tp;
}
static inline void
xfs_trans_clear_context(
struct xfs_trans *tp)
{
if (current->journal_info == tp) {
memalloc_nofs_restore(tp->t_pflags);
current->journal_info = NULL;
}
}
static inline void
xfs_trans_switch_context(
struct xfs_trans *old_tp,
struct xfs_trans *new_tp)
{
ASSERT(current->journal_info == old_tp);
new_tp->t_pflags = old_tp->t_pflags;
old_tp->t_pflags = 0;
current->journal_info = new_tp;
}
#endif /* __XFS_TRANS_H__ */ #endif /* __XFS_TRANS_H__ */