From a9b0f6f4adb1a8b4219e3e14ab6ef46c14987ac0 Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Tue, 25 Apr 2023 12:26:19 -0400 Subject: [PATCH 01/23] gfs2: simplify gdlm_put_lock with out_free label This patch introduces a new out_free label and consolidates the three places function gdlm_put_lock freed the glock. No change in functionality. Signed-off-by: Bob Peterson Signed-off-by: Andreas Gruenbacher --- fs/gfs2/lock_dlm.c | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c index 71911bf9ab34..54911294687c 100644 --- a/fs/gfs2/lock_dlm.c +++ b/fs/gfs2/lock_dlm.c @@ -296,10 +296,8 @@ static void gdlm_put_lock(struct gfs2_glock *gl) struct lm_lockstruct *ls = &sdp->sd_lockstruct; int error; - if (gl->gl_lksb.sb_lkid == 0) { - gfs2_glock_free(gl); - return; - } + if (gl->gl_lksb.sb_lkid == 0) + goto out_free; clear_bit(GLF_BLOCKING, &gl->gl_flags); gfs2_glstats_inc(gl, GFS2_LKS_DCOUNT); @@ -307,17 +305,13 @@ static void gdlm_put_lock(struct gfs2_glock *gl) gfs2_update_request_times(gl); /* don't want to call dlm if we've unmounted the lock protocol */ - if (test_bit(DFL_UNMOUNT, &ls->ls_recover_flags)) { - gfs2_glock_free(gl); - return; - } + if (test_bit(DFL_UNMOUNT, &ls->ls_recover_flags)) + goto out_free; /* don't want to skip dlm_unlock writing the lvb when lock has one */ if (test_bit(SDF_SKIP_DLM_UNLOCK, &sdp->sd_flags) && - !gl->gl_lksb.sb_lvbptr) { - gfs2_glock_free(gl); - return; - } + !gl->gl_lksb.sb_lvbptr) + goto out_free; again: error = dlm_unlock(ls->ls_dlm, gl->gl_lksb.sb_lkid, DLM_LKF_VALBLK, @@ -331,8 +325,11 @@ again: fs_err(sdp, "gdlm_unlock %x,%llx err=%d\n", gl->gl_name.ln_type, (unsigned long long)gl->gl_name.ln_number, error); - return; } + return; + +out_free: + gfs2_glock_free(gl); } static void gdlm_cancel(struct gfs2_glock *gl) From e4f82bf21f2586aa823fd40de72023236062a4aa Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Wed, 26 Apr 2023 10:46:16 -0400 Subject: [PATCH 02/23] gfs2: fix minor comment typos Signed-off-by: Bob Peterson Signed-off-by: Andreas Gruenbacher --- fs/gfs2/bmap.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index c739b258a2d9..8d611fbcf0bd 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -1729,8 +1729,8 @@ static int punch_hole(struct gfs2_inode *ip, u64 offset, u64 length) if (offset >= maxsize) { /* - * The starting point lies beyond the allocated meta-data; - * there are no blocks do deallocate. + * The starting point lies beyond the allocated metadata; + * there are no blocks to deallocate. */ return 0; } From 9b620429eca9a1dbadf6cf983b11d2cb427411ce Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Thu, 4 May 2023 13:43:22 -0400 Subject: [PATCH 03/23] gfs2: ignore rindex_update failure in dinode_dealloc Before this patch, function gfs2_dinode_dealloc would abort if it got a bad return code from gfs2_rindex_update(). The problem is that it left the dinode in the unlinked (not free) state, which meant subsequent fsck would clean it up and flag an error. That meant some of our QE tests would fail. The sole purpose of gfs2_rindex_update(), in this code path, is to read in any newer rgrps added by gfs2_grow. But since this is a delete operation it won't actually use any of those new rgrps. It can really only twiddle the bits from "Unlinked" to "Free" in an existing rgrp. Therefore the error should not prevent the transition from unlinked to free. This patch makes gfs2_dinode_dealloc ignore the bad return code and proceed with freeing the dinode so the QE tests will not be tripped up. Signed-off-by: Bob Peterson Signed-off-by: Andreas Gruenbacher --- fs/gfs2/super.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index a84bf6444bba..925bd80d2625 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -1131,9 +1131,7 @@ static int gfs2_dinode_dealloc(struct gfs2_inode *ip) return -EIO; } - error = gfs2_rindex_update(sdp); - if (error) - return error; + gfs2_rindex_update(sdp); error = gfs2_quota_hold(ip, NO_UID_QUOTA_CHANGE, NO_GID_QUOTA_CHANGE); if (error) From dac0fc31bea78e3ac1467d2fdb49ffff37e95e84 Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Mon, 15 May 2023 11:37:57 -0500 Subject: [PATCH 04/23] gfs2: Fix gfs2_qa_get imbalance in gfs2_quota_hold This patch fixes a case in which function gfs2_quota_hold encounters an assert error and exits. The lack of gfs2_qa_put causes further problems when the inode is evicted and the get/put count is non-zero. Signed-off-by: Bob Peterson Signed-off-by: Andreas Gruenbacher --- fs/gfs2/quota.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index 1ed17226d9ed..386ca770ce2e 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -591,6 +591,7 @@ int gfs2_quota_hold(struct gfs2_inode *ip, kuid_t uid, kgid_t gid) if (gfs2_assert_warn(sdp, !ip->i_qadata->qa_qd_num) || gfs2_assert_warn(sdp, !test_bit(GIF_QD_LOCKED, &ip->i_flags))) { error = -EIO; + gfs2_qa_put(ip); goto out; } From 17a5934653826644ba8c1ad21b9fc8d6e7e62f34 Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Thu, 4 May 2023 14:27:42 -0400 Subject: [PATCH 05/23] gfs2: Update rl_unlinked before releasing rgrp lock Function gfs2_free_di was changing the rgrp lvb count of unlinked dinodes after the lock was released. This patch moves it inside the lock. Signed-off-by: Bob Peterson Signed-off-by: Andreas Gruenbacher --- fs/gfs2/rgrp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 3b9b76e980ad..9308190895c8 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -2584,8 +2584,8 @@ void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip) gfs2_trans_add_meta(rgd->rd_gl, rgd->rd_bits[0].bi_bh); gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); - rgrp_unlock_local(rgd); be32_add_cpu(&rgd->rd_rgl->rl_unlinked, -1); + rgrp_unlock_local(rgd); gfs2_statfs_change(sdp, 0, +1, -1); trace_gfs2_block_alloc(ip, rgd, ip->i_no_addr, 1, GFS2_BLKST_FREE); From f9da18cd4616fbc9816347b7b2be188653786058 Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Thu, 4 May 2023 14:28:51 -0400 Subject: [PATCH 06/23] gfs2: Don't remember delete unless it's successful This patch changes function evict_unlinked_inode so it does not call gfs2_inode_remember_delete until it gets a good return code from gfs2_dinode_dealloc. Signed-off-by: Bob Peterson Signed-off-by: Andreas Gruenbacher --- fs/gfs2/super.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 925bd80d2625..3a7e7c31eb9c 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -1332,9 +1332,6 @@ static int evict_unlinked_inode(struct inode *inode) goto out; } - if (ip->i_gl) - gfs2_inode_remember_delete(ip->i_gl, ip->i_no_formal_ino); - /* * As soon as we clear the bitmap for the dinode, gfs2_create_inode() * can get called to recreate it, or even gfs2_inode_lookup() if the @@ -1348,6 +1345,9 @@ static int evict_unlinked_inode(struct inode *inode) */ ret = gfs2_dinode_dealloc(ip); + if (!ret && ip->i_gl) + gfs2_inode_remember_delete(ip->i_gl, ip->i_no_formal_ino); + out: return ret; } From 7b7b06d55aeff969ffdfd1170937198f7c02b684 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 12 Jun 2023 07:36:46 +0200 Subject: [PATCH 07/23] gfs2: set FMODE_CAN_ODIRECT instead of a dummy direct_IO method Since commit a2ad63daa88b ("VFS: add FMODE_CAN_ODIRECT file flag"), file systems can just set the FMODE_CAN_ODIRECT flag at open time instead of wiring up a dummy direct_IO method to indicate support for direct I/O. Remove .direct_IO from gfs2_aops and set FMODE_CAN_ODIRECT in gfs2_open_common for regular files that do not use data journalling. Signed-off-by: Christoph Hellwig Signed-off-by: Andreas Gruenbacher --- fs/gfs2/aops.c | 1 - fs/gfs2/file.c | 3 +++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c index a5f4be6b9213..d95125714ebb 100644 --- a/fs/gfs2/aops.c +++ b/fs/gfs2/aops.c @@ -750,7 +750,6 @@ static const struct address_space_operations gfs2_aops = { .release_folio = iomap_release_folio, .invalidate_folio = iomap_invalidate_folio, .bmap = gfs2_bmap, - .direct_IO = noop_direct_IO, .migrate_folio = filemap_migrate_folio, .is_partially_uptodate = iomap_is_partially_uptodate, .error_remove_page = generic_error_remove_page, diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index cb62c8f07d1e..a6ad64cee885 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -630,6 +630,9 @@ int gfs2_open_common(struct inode *inode, struct file *file) ret = generic_file_open(inode, file); if (ret) return ret; + + if (!gfs2_is_jdata(GFS2_I(inode))) + file->f_mode |= FMODE_CAN_ODIRECT; } fp = kzalloc(sizeof(struct gfs2_file), GFP_NOFS); From c8ed1b35931245087968fd95b2ec3dfc50f77769 Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Mon, 12 Jun 2023 12:26:23 -0500 Subject: [PATCH 08/23] gfs2: Fix duplicate should_fault_in_pages() call In gfs2_file_buffered_write(), we currently jump from the second call of function should_fault_in_pages() to above the first call, so should_fault_in_pages() is getting called twice in a row, causing it to accidentally fall back to single-page writes rather than trying the more efficient multi-page writes first. Fix that by moving the retry label to the correct place, behind the first call to should_fault_in_pages(). Fixes: e1fa9ea85ce8 ("gfs2: Stop using glock holder auto-demotion for now") Signed-off-by: Bob Peterson Signed-off-by: Andreas Gruenbacher --- fs/gfs2/file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index a6ad64cee885..464ae4bf209e 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -1033,8 +1033,8 @@ static ssize_t gfs2_file_buffered_write(struct kiocb *iocb, } gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, gh); -retry: if (should_fault_in_pages(from, iocb, &prev_count, &window_size)) { +retry: window_size -= fault_in_iov_iter_readable(from, window_size); if (!window_size) { ret = -EFAULT; From 6fa0a72cbbe45db4ed967a51f9e6f4e3afe61d20 Mon Sep 17 00:00:00 2001 From: Tuo Li Date: Tue, 13 Jun 2023 11:06:37 +0800 Subject: [PATCH 09/23] gfs2: Fix possible data races in gfs2_show_options() Some fields such as gt_logd_secs of the struct gfs2_tune are accessed without holding the lock gt_spin in gfs2_show_options(): val = sdp->sd_tune.gt_logd_secs; if (val != 30) seq_printf(s, ",commit=%d", val); And thus can cause data races when gfs2_show_options() and other functions such as gfs2_reconfigure() are concurrently executed: spin_lock(>->gt_spin); gt->gt_logd_secs = newargs->ar_commit; To fix these possible data races, the lock sdp->sd_tune.gt_spin is acquired before accessing the fields of gfs2_tune and released after these accesses. Further changes by Andreas: - Don't hold the spin lock over the seq_printf operations. Reported-by: BassCheck Signed-off-by: Tuo Li Signed-off-by: Andreas Gruenbacher --- fs/gfs2/super.c | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 3a7e7c31eb9c..c89b28030495 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -1004,7 +1004,14 @@ static int gfs2_show_options(struct seq_file *s, struct dentry *root) { struct gfs2_sbd *sdp = root->d_sb->s_fs_info; struct gfs2_args *args = &sdp->sd_args; - int val; + unsigned int logd_secs, statfs_slow, statfs_quantum, quota_quantum; + + spin_lock(&sdp->sd_tune.gt_spin); + logd_secs = sdp->sd_tune.gt_logd_secs; + quota_quantum = sdp->sd_tune.gt_quota_quantum; + statfs_quantum = sdp->sd_tune.gt_statfs_quantum; + statfs_slow = sdp->sd_tune.gt_statfs_slow; + spin_unlock(&sdp->sd_tune.gt_spin); if (is_ancestor(root, sdp->sd_master_dir)) seq_puts(s, ",meta"); @@ -1059,17 +1066,14 @@ static int gfs2_show_options(struct seq_file *s, struct dentry *root) } if (args->ar_discard) seq_puts(s, ",discard"); - val = sdp->sd_tune.gt_logd_secs; - if (val != 30) - seq_printf(s, ",commit=%d", val); - val = sdp->sd_tune.gt_statfs_quantum; - if (val != 30) - seq_printf(s, ",statfs_quantum=%d", val); - else if (sdp->sd_tune.gt_statfs_slow) + if (logd_secs != 30) + seq_printf(s, ",commit=%d", logd_secs); + if (statfs_quantum != 30) + seq_printf(s, ",statfs_quantum=%d", statfs_quantum); + else if (statfs_slow) seq_puts(s, ",statfs_quantum=0"); - val = sdp->sd_tune.gt_quota_quantum; - if (val != 60) - seq_printf(s, ",quota_quantum=%d", val); + if (quota_quantum != 60) + seq_printf(s, ",quota_quantum=%d", quota_quantum); if (args->ar_statfs_percent) seq_printf(s, ",statfs_percent=%d", args->ar_statfs_percent); if (args->ar_errors != GFS2_ERRORS_DEFAULT) { From cea44032bc799b088bce1ea73c08269bb59b47d0 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 7 Jun 2023 07:19:50 -0500 Subject: [PATCH 10/23] gfs2: retry interrupted internal reads The iomap-based read operations done by gfs2 for its system files, such as rindex, may sometimes be interrupted and return -EINTR. This confuses some users of gfs2_internal_read(). Fix that by retrying interrupted reads. Signed-off-by: Bob Peterson Signed-off-by: Andreas Gruenbacher --- fs/gfs2/aops.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c index d95125714ebb..dacc21b1ae00 100644 --- a/fs/gfs2/aops.c +++ b/fs/gfs2/aops.c @@ -491,13 +491,16 @@ int gfs2_internal_read(struct gfs2_inode *ip, char *buf, loff_t *pos, void *p; do { + page = read_cache_page(mapping, index, gfs2_read_folio, NULL); + if (IS_ERR(page)) { + if (PTR_ERR(page) == -EINTR) + continue; + return PTR_ERR(page); + } + p = kmap_atomic(page); amt = size - copied; if (offset + size > PAGE_SIZE) amt = PAGE_SIZE - offset; - page = read_cache_page(mapping, index, gfs2_read_folio, NULL); - if (IS_ERR(page)) - return PTR_ERR(page); - p = kmap_atomic(page); memcpy(buf + copied, p + offset, amt); kunmap_atomic(p); put_page(page); From af1abe11466f1a6cb6ba22ee0d815c21c3559947 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 16 Nov 2022 14:19:06 +0100 Subject: [PATCH 11/23] gfs2: Rename remaining "transaction" glock references The transaction glock was repurposed to serve as the new freeze glock years ago. Don't refer to it as the transaction glock anymore. Also, to be more precise, call it the "freeze glock" instead of the "freeze lock". Ditto for the journal glock. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/glock.c | 4 ++-- fs/gfs2/ops_fstype.c | 2 +- fs/gfs2/recovery.c | 8 ++++---- fs/gfs2/super.c | 2 +- fs/gfs2/util.c | 2 +- 5 files changed, 9 insertions(+), 9 deletions(-) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 5adc7d85dbf3..1438e7465e30 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -145,8 +145,8 @@ static void gfs2_glock_dealloc(struct rcu_head *rcu) * * We need to allow some glocks to be enqueued, dequeued, promoted, and demoted * when we're withdrawn. For example, to maintain metadata integrity, we should - * disallow the use of inode and rgrp glocks when withdrawn. Other glocks, like - * iopen or the transaction glocks may be safely used because none of their + * disallow the use of inode and rgrp glocks when withdrawn. Other glocks like + * the iopen or freeze glock may be safely used because none of their * metadata goes through the journal. So in general, we should disallow all * glocks that are journaled, and allow all the others. One exception is: * we need to allow our active journal to be promoted and demoted so others diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 9af9ddb61ca0..3b93e4a22dfd 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -434,7 +434,7 @@ static int init_locking(struct gfs2_sbd *sdp, struct gfs2_holder *mount_gh, error = gfs2_glock_get(sdp, GFS2_FREEZE_LOCK, &gfs2_freeze_glops, CREATE, &sdp->sd_freeze_gl); if (error) { - fs_err(sdp, "can't create transaction glock: %d\n", error); + fs_err(sdp, "can't create freeze glock: %d\n", error); goto fail_rename; } diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c index 2bb085a72e8e..d8e522f389aa 100644 --- a/fs/gfs2/recovery.c +++ b/fs/gfs2/recovery.c @@ -420,10 +420,10 @@ void gfs2_recover_func(struct work_struct *work) if (sdp->sd_args.ar_spectator) goto fail; if (jd->jd_jid != sdp->sd_lockstruct.ls_jid) { - fs_info(sdp, "jid=%u: Trying to acquire journal lock...\n", + fs_info(sdp, "jid=%u: Trying to acquire journal glock...\n", jd->jd_jid); jlocked = 1; - /* Acquire the journal lock so we can do recovery */ + /* Acquire the journal glock so we can do recovery */ error = gfs2_glock_nq_num(sdp, jd->jd_jid, &gfs2_journal_glops, LM_ST_EXCLUSIVE, @@ -465,10 +465,10 @@ void gfs2_recover_func(struct work_struct *work) ktime_ms_delta(t_jhd, t_jlck)); if (!(head.lh_flags & GFS2_LOG_HEAD_UNMOUNT)) { - fs_info(sdp, "jid=%u: Acquiring the transaction lock...\n", + fs_info(sdp, "jid=%u: Acquiring the freeze glock...\n", jd->jd_jid); - /* Acquire a shared hold on the freeze lock */ + /* Acquire a shared hold on the freeze glock */ error = gfs2_freeze_lock(sdp, &thaw_gh, LM_FLAG_PRIORITY); if (error) diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index c89b28030495..6afd29079843 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -463,7 +463,7 @@ static int gfs2_write_inode(struct inode *inode, struct writeback_control *wbc) * @flags: The type of dirty * * Unfortunately it can be called under any combination of inode - * glock and transaction lock, so we have to check carefully. + * glock and freeze glock, so we have to check carefully. * * At the moment this deals only with atime - it should be possible * to expand that role in future, once a review of the locking has diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c index 7a6aeffcdf5c..c84242ef4903 100644 --- a/fs/gfs2/util.c +++ b/fs/gfs2/util.c @@ -107,7 +107,7 @@ int gfs2_freeze_lock(struct gfs2_sbd *sdp, struct gfs2_holder *freeze_gh, error = gfs2_glock_nq_init(sdp->sd_freeze_gl, LM_ST_SHARED, flags, freeze_gh); if (error && error != GLR_TRYFAILED) - fs_err(sdp, "can't lock the freeze lock: %d\n", error); + fs_err(sdp, "can't lock the freeze glock: %d\n", error); return error; } From 097cca525adf10f35c9dac037155564f1b1a688b Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Mon, 14 Nov 2022 16:40:15 +0100 Subject: [PATCH 12/23] gfs2: Rename the {freeze,thaw}_super callbacks Rename gfs2_freeze to gfs2_freeze_super and gfs2_unfreeze to gfs2_thaw_super to match the names of the corresponding super operations. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/super.c | 12 ++++++------ fs/gfs2/util.c | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 6afd29079843..bfaeef04a4f7 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -697,12 +697,12 @@ void gfs2_freeze_func(struct work_struct *work) } /** - * gfs2_freeze - prevent further writes to the filesystem + * gfs2_freeze_super - prevent further writes to the filesystem * @sb: the VFS structure for the filesystem * */ -static int gfs2_freeze(struct super_block *sb) +static int gfs2_freeze_super(struct super_block *sb) { struct gfs2_sbd *sdp = sb->s_fs_info; int error; @@ -742,12 +742,12 @@ out: } /** - * gfs2_unfreeze - reallow writes to the filesystem + * gfs2_thaw_super - reallow writes to the filesystem * @sb: the VFS structure for the filesystem * */ -static int gfs2_unfreeze(struct super_block *sb) +static int gfs2_thaw_super(struct super_block *sb) { struct gfs2_sbd *sdp = sb->s_fs_info; @@ -1530,8 +1530,8 @@ const struct super_operations gfs2_super_ops = { .evict_inode = gfs2_evict_inode, .put_super = gfs2_put_super, .sync_fs = gfs2_sync_fs, - .freeze_super = gfs2_freeze, - .thaw_super = gfs2_unfreeze, + .freeze_super = gfs2_freeze_super, + .thaw_super = gfs2_thaw_super, .statfs = gfs2_statfs, .drop_inode = gfs2_drop_inode, .show_options = gfs2_show_options, diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c index c84242ef4903..16ac3b3e7b88 100644 --- a/fs/gfs2/util.c +++ b/fs/gfs2/util.c @@ -188,7 +188,7 @@ static void signal_our_withdraw(struct gfs2_sbd *sdp) sdp->sd_jinode_gh.gh_flags |= GL_NOCACHE; gfs2_glock_dq(&sdp->sd_jinode_gh); if (test_bit(SDF_FS_FROZEN, &sdp->sd_flags)) { - /* Make sure gfs2_unfreeze works if partially-frozen */ + /* Make sure gfs2_thaw_super works if partially-frozen */ flush_work(&sdp->sd_freeze_work); atomic_set(&sdp->sd_freeze_state, SFS_FROZEN); thaw_super(sdp->sd_vfs); From e392edd5d52a6742595ecaf8270c1af3e96b9a38 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Mon, 14 Nov 2022 18:26:00 +0100 Subject: [PATCH 13/23] gfs2: Rename gfs2_freeze_lock{ => _shared } Rename gfs2_freeze_lock to gfs2_freeze_lock_shared to make it a bit more obvious that this function establishes the "thawed" state of the freeze glock. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/ops_fstype.c | 4 ++-- fs/gfs2/recovery.c | 2 +- fs/gfs2/super.c | 2 +- fs/gfs2/util.c | 10 +++++----- fs/gfs2/util.h | 5 +++-- 5 files changed, 12 insertions(+), 11 deletions(-) diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 3b93e4a22dfd..4fb25496e92f 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -1269,7 +1269,7 @@ static int gfs2_fill_super(struct super_block *sb, struct fs_context *fc) } } - error = gfs2_freeze_lock(sdp, &freeze_gh, 0); + error = gfs2_freeze_lock_shared(sdp, &freeze_gh, 0); if (error) goto fail_per_node; @@ -1592,7 +1592,7 @@ static int gfs2_reconfigure(struct fs_context *fc) if ((sb->s_flags ^ fc->sb_flags) & SB_RDONLY) { struct gfs2_holder freeze_gh; - error = gfs2_freeze_lock(sdp, &freeze_gh, 0); + error = gfs2_freeze_lock_shared(sdp, &freeze_gh, 0); if (error) return -EINVAL; diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c index d8e522f389aa..61ef07da40b2 100644 --- a/fs/gfs2/recovery.c +++ b/fs/gfs2/recovery.c @@ -470,7 +470,7 @@ void gfs2_recover_func(struct work_struct *work) /* Acquire a shared hold on the freeze glock */ - error = gfs2_freeze_lock(sdp, &thaw_gh, LM_FLAG_PRIORITY); + error = gfs2_freeze_lock_shared(sdp, &thaw_gh, LM_FLAG_PRIORITY); if (error) goto fail_gunlock_ji; diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index bfaeef04a4f7..01ea53504e9d 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -677,7 +677,7 @@ void gfs2_freeze_func(struct work_struct *work) struct super_block *sb = sdp->sd_vfs; atomic_inc(&sb->s_active); - error = gfs2_freeze_lock(sdp, &freeze_gh, 0); + error = gfs2_freeze_lock_shared(sdp, &freeze_gh, 0); if (error) { gfs2_assert_withdraw(sdp, 0); } else { diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c index 16ac3b3e7b88..817c4f42ae7b 100644 --- a/fs/gfs2/util.c +++ b/fs/gfs2/util.c @@ -93,13 +93,13 @@ out_unlock: } /** - * gfs2_freeze_lock - hold the freeze glock + * gfs2_freeze_lock_shared - hold the freeze glock * @sdp: the superblock * @freeze_gh: pointer to the requested holder * @caller_flags: any additional flags needed by the caller */ -int gfs2_freeze_lock(struct gfs2_sbd *sdp, struct gfs2_holder *freeze_gh, - int caller_flags) +int gfs2_freeze_lock_shared(struct gfs2_sbd *sdp, struct gfs2_holder *freeze_gh, + int caller_flags) { int flags = LM_FLAG_NOEXP | GL_EXACT | caller_flags; int error; @@ -157,8 +157,8 @@ static void signal_our_withdraw(struct gfs2_sbd *sdp) gfs2_holder_mark_uninitialized(&freeze_gh); if (sdp->sd_freeze_gl && !gfs2_glock_is_locked_by_me(sdp->sd_freeze_gl)) { - ret = gfs2_freeze_lock(sdp, &freeze_gh, - log_write_allowed ? 0 : LM_FLAG_TRY); + ret = gfs2_freeze_lock_shared(sdp, &freeze_gh, + log_write_allowed ? 0 : LM_FLAG_TRY); if (ret == GLR_TRYFAILED) ret = 0; } diff --git a/fs/gfs2/util.h b/fs/gfs2/util.h index 78ec190f4155..3291e33e81e9 100644 --- a/fs/gfs2/util.h +++ b/fs/gfs2/util.h @@ -149,8 +149,9 @@ int gfs2_io_error_i(struct gfs2_sbd *sdp, const char *function, extern int check_journal_clean(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd, bool verbose); -extern int gfs2_freeze_lock(struct gfs2_sbd *sdp, - struct gfs2_holder *freeze_gh, int caller_flags); +extern int gfs2_freeze_lock_shared(struct gfs2_sbd *sdp, + struct gfs2_holder *freeze_gh, + int caller_flags); extern void gfs2_freeze_unlock(struct gfs2_holder *freeze_gh); #define gfs2_io_error(sdp) \ From 9e4f09565f79a806af3e8846e81c957c4653a7dc Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 17 Nov 2022 11:53:07 +0100 Subject: [PATCH 14/23] gfs2: Reconfiguring frozen filesystem already rejected Reconfiguring a frozen filesystem is already rejected in reconfigure_super(), so there is no need to check for that condition again at the filesystem level. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/ops_fstype.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 4fb25496e92f..12eedba45dbb 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -1590,12 +1590,6 @@ static int gfs2_reconfigure(struct fs_context *fc) fc->sb_flags |= SB_RDONLY; if ((sb->s_flags ^ fc->sb_flags) & SB_RDONLY) { - struct gfs2_holder freeze_gh; - - error = gfs2_freeze_lock_shared(sdp, &freeze_gh, 0); - if (error) - return -EINVAL; - if (fc->sb_flags & SB_RDONLY) { gfs2_make_fs_ro(sdp); } else { @@ -1603,7 +1597,6 @@ static int gfs2_reconfigure(struct fs_context *fc) if (error) errorfc(fc, "unable to remount read-write"); } - gfs2_freeze_unlock(&freeze_gh); } sdp->sd_args = *newargs; From cad1e15804a83afd9a5c1d95a428d60d1f9c0340 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Mon, 21 Nov 2022 23:09:38 +0100 Subject: [PATCH 15/23] gfs2: Rename SDF_{FS_FROZEN => FREEZE_INITIATOR} Rename the SDF_FS_FROZEN flag to SDF_FREEZE_INITIATOR to indicate more clearly that the node that has this flag set is the initiator of the freeze. Signed-off-by: Andreas Gruenbacher sd_flags); - wake_up_bit(&sdp->sd_flags, SDF_FS_FROZEN); + clear_bit_unlock(SDF_FREEZE_INITIATOR, &sdp->sd_flags); + wake_up_bit(&sdp->sd_flags, SDF_FREEZE_INITIATOR); return; } @@ -735,7 +735,7 @@ static int gfs2_freeze_super(struct super_block *sb) fs_err(sdp, "retrying...\n"); msleep(1000); } - set_bit(SDF_FS_FROZEN, &sdp->sd_flags); + set_bit(SDF_FREEZE_INITIATOR, &sdp->sd_flags); out: mutex_unlock(&sdp->sd_freeze_mutex); return error; @@ -760,7 +760,7 @@ static int gfs2_thaw_super(struct super_block *sb) gfs2_freeze_unlock(&sdp->sd_freeze_gh); mutex_unlock(&sdp->sd_freeze_mutex); - return wait_on_bit(&sdp->sd_flags, SDF_FS_FROZEN, TASK_INTERRUPTIBLE); + return wait_on_bit(&sdp->sd_flags, SDF_FREEZE_INITIATOR, TASK_INTERRUPTIBLE); } /** diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c index 454dc2ff8b5e..bc752de01573 100644 --- a/fs/gfs2/sys.c +++ b/fs/gfs2/sys.c @@ -111,7 +111,7 @@ static ssize_t status_show(struct gfs2_sbd *sdp, char *buf) test_bit(SDF_RORECOVERY, &f), test_bit(SDF_SKIP_DLM_UNLOCK, &f), test_bit(SDF_FORCE_AIL_FLUSH, &f), - test_bit(SDF_FS_FROZEN, &f), + test_bit(SDF_FREEZE_INITIATOR, &f), test_bit(SDF_WITHDRAWING, &f), test_bit(SDF_WITHDRAW_IN_PROG, &f), test_bit(SDF_REMOTE_WITHDRAW, &f), diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c index 817c4f42ae7b..1743caee5505 100644 --- a/fs/gfs2/util.c +++ b/fs/gfs2/util.c @@ -187,7 +187,7 @@ static void signal_our_withdraw(struct gfs2_sbd *sdp) } sdp->sd_jinode_gh.gh_flags |= GL_NOCACHE; gfs2_glock_dq(&sdp->sd_jinode_gh); - if (test_bit(SDF_FS_FROZEN, &sdp->sd_flags)) { + if (test_bit(SDF_FREEZE_INITIATOR, &sdp->sd_flags)) { /* Make sure gfs2_thaw_super works if partially-frozen */ flush_work(&sdp->sd_freeze_work); atomic_set(&sdp->sd_freeze_state, SFS_FROZEN); From b77b4a4815a9651d1d6e07b8e6548eee9531a5eb Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Mon, 14 Nov 2022 23:34:50 +0100 Subject: [PATCH 16/23] gfs2: Rework freeze / thaw logic So far, at mount time, gfs2 would take the freeze glock in shared mode and then immediately drop it again, turning it into a cached glock that can be reclaimed at any time. To freeze the filesystem cluster-wide, the node initiating the freeze would take the freeze glock in exclusive mode, which would cause the freeze glock's freeze_go_sync() callback to run on each node. There, gfs2 would freeze the filesystem and schedule gfs2_freeze_func() to run. gfs2_freeze_func() would re-acquire the freeze glock in shared mode, thaw the filesystem, and drop the freeze glock again. The initiating node would keep the freeze glock held in exclusive mode. To thaw the filesystem, the initiating node would drop the freeze glock again, which would allow gfs2_freeze_func() to resume on all nodes, leaving the filesystem in the thawed state. It turns out that in freeze_go_sync(), we cannot reliably and safely freeze the filesystem. This is primarily because the final unmount of a filesystem takes a write lock on the s_umount rw semaphore before calling into gfs2_put_super(), and freeze_go_sync() needs to call freeze_super() which also takes a write lock on the same semaphore, causing a deadlock. We could work around this by trying to take an active reference on the super block first, which would prevent unmount from running at the same time. But that can fail, and freeze_go_sync() isn't actually allowed to fail. To get around this, this patch changes the freeze glock locking scheme as follows: At mount time, each node takes the freeze glock in shared mode. To freeze a filesystem, the initiating node first freezes the filesystem locally and then drops and re-acquires the freeze glock in exclusive mode. All other nodes notice that there is contention on the freeze glock in their go_callback callbacks, and they schedule gfs2_freeze_func() to run. There, they freeze the filesystem locally and drop and re-acquire the freeze glock before re-thawing the filesystem. This is happening outside of the glock state engine, so there, we are allowed to fail. From a cluster point of view, taking and immediately dropping a glock is indistinguishable from taking the glock and only dropping it upon contention, so this new scheme is compatible with the old one. Thanks to Li Dong for reporting a locking bug in gfs2_freeze_func() in a previous version of this commit. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/glops.c | 52 +++++-------- fs/gfs2/log.c | 2 - fs/gfs2/ops_fstype.c | 5 +- fs/gfs2/recovery.c | 24 +++--- fs/gfs2/super.c | 174 +++++++++++++++++++++++++++++++++---------- fs/gfs2/super.h | 1 + fs/gfs2/util.c | 30 ++------ 7 files changed, 178 insertions(+), 110 deletions(-) diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index 01d433ed6ce7..7c48c7afd6a4 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c @@ -561,47 +561,33 @@ static void inode_go_dump(struct seq_file *seq, struct gfs2_glock *gl, } /** - * freeze_go_sync - promote/demote the freeze glock + * freeze_go_callback - A cluster node is requesting a freeze * @gl: the glock + * @remote: true if this came from a different cluster node */ -static int freeze_go_sync(struct gfs2_glock *gl) +static void freeze_go_callback(struct gfs2_glock *gl, bool remote) { - int error = 0; struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; + struct super_block *sb = sdp->sd_vfs; + + if (!remote || + gl->gl_state != LM_ST_SHARED || + gl->gl_demote_state != LM_ST_UNLOCKED) + return; /* - * We need to check gl_state == LM_ST_SHARED here and not gl_req == - * LM_ST_EXCLUSIVE. That's because when any node does a freeze, - * all the nodes should have the freeze glock in SH mode and they all - * call do_xmote: One for EX and the others for UN. They ALL must - * freeze locally, and they ALL must queue freeze work. The freeze_work - * calls freeze_func, which tries to reacquire the freeze glock in SH, - * effectively waiting for the thaw on the node who holds it in EX. - * Once thawed, the work func acquires the freeze glock in - * SH and everybody goes back to thawed. + * Try to get an active super block reference to prevent racing with + * unmount (see trylock_super()). But note that unmount isn't the only + * place where a write lock on s_umount is taken, and we can fail here + * because of things like remount as well. */ - if (gl->gl_state == LM_ST_SHARED && !gfs2_withdrawn(sdp) && - !test_bit(SDF_NORECOVERY, &sdp->sd_flags)) { - atomic_set(&sdp->sd_freeze_state, SFS_STARTING_FREEZE); - error = freeze_super(sdp->sd_vfs); - if (error) { - fs_info(sdp, "GFS2: couldn't freeze filesystem: %d\n", - error); - if (gfs2_withdrawn(sdp)) { - atomic_set(&sdp->sd_freeze_state, SFS_UNFROZEN); - return 0; - } - gfs2_assert_withdraw(sdp, 0); - } - queue_work(gfs2_freeze_wq, &sdp->sd_freeze_work); - if (test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) - gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_FREEZE | - GFS2_LFC_FREEZE_GO_SYNC); - else /* read-only mounts */ - atomic_set(&sdp->sd_freeze_state, SFS_FROZEN); + if (down_read_trylock(&sb->s_umount)) { + atomic_inc(&sb->s_active); + up_read(&sb->s_umount); + if (!queue_work(gfs2_freeze_wq, &sdp->sd_freeze_work)) + deactivate_super(sb); } - return 0; } /** @@ -761,9 +747,9 @@ const struct gfs2_glock_operations gfs2_rgrp_glops = { }; const struct gfs2_glock_operations gfs2_freeze_glops = { - .go_sync = freeze_go_sync, .go_xmote_bh = freeze_go_xmote_bh, .go_demote_ok = freeze_go_demote_ok, + .go_callback = freeze_go_callback, .go_type = LM_TYPE_NONDISK, .go_flags = GLOF_NONDISK, }; diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index d750d1128bed..dca535311dee 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c @@ -1136,8 +1136,6 @@ repeat: if (flags & (GFS2_LOG_HEAD_FLUSH_SHUTDOWN | GFS2_LOG_HEAD_FLUSH_FREEZE)) gfs2_log_shutdown(sdp); - if (flags & GFS2_LOG_HEAD_FLUSH_FREEZE) - atomic_set(&sdp->sd_freeze_state, SFS_FROZEN); } out_end: diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 12eedba45dbb..4ce5718719ac 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -1140,7 +1140,6 @@ static int gfs2_fill_super(struct super_block *sb, struct fs_context *fc) int silent = fc->sb_flags & SB_SILENT; struct gfs2_sbd *sdp; struct gfs2_holder mount_gh; - struct gfs2_holder freeze_gh; int error; sdp = init_sbd(sb); @@ -1269,15 +1268,15 @@ static int gfs2_fill_super(struct super_block *sb, struct fs_context *fc) } } - error = gfs2_freeze_lock_shared(sdp, &freeze_gh, 0); + error = gfs2_freeze_lock_shared(sdp, &sdp->sd_freeze_gh, 0); if (error) goto fail_per_node; if (!sb_rdonly(sb)) error = gfs2_make_fs_rw(sdp); - gfs2_freeze_unlock(&freeze_gh); if (error) { + gfs2_freeze_unlock(&sdp->sd_freeze_gh); if (sdp->sd_quotad_process) kthread_stop(sdp->sd_quotad_process); sdp->sd_quotad_process = NULL; diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c index 61ef07da40b2..afeda936e2be 100644 --- a/fs/gfs2/recovery.c +++ b/fs/gfs2/recovery.c @@ -404,7 +404,7 @@ void gfs2_recover_func(struct work_struct *work) struct gfs2_inode *ip = GFS2_I(jd->jd_inode); struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode); struct gfs2_log_header_host head; - struct gfs2_holder j_gh, ji_gh, thaw_gh; + struct gfs2_holder j_gh, ji_gh; ktime_t t_start, t_jlck, t_jhd, t_tlck, t_rep; int ro = 0; unsigned int pass; @@ -465,14 +465,14 @@ void gfs2_recover_func(struct work_struct *work) ktime_ms_delta(t_jhd, t_jlck)); if (!(head.lh_flags & GFS2_LOG_HEAD_UNMOUNT)) { - fs_info(sdp, "jid=%u: Acquiring the freeze glock...\n", - jd->jd_jid); + mutex_lock(&sdp->sd_freeze_mutex); - /* Acquire a shared hold on the freeze glock */ - - error = gfs2_freeze_lock_shared(sdp, &thaw_gh, LM_FLAG_PRIORITY); - if (error) + if (atomic_read(&sdp->sd_freeze_state) != SFS_UNFROZEN) { + mutex_unlock(&sdp->sd_freeze_mutex); + fs_warn(sdp, "jid=%u: Can't replay: filesystem " + "is frozen\n", jd->jd_jid); goto fail_gunlock_ji; + } if (test_bit(SDF_RORECOVERY, &sdp->sd_flags)) { ro = 1; @@ -496,7 +496,7 @@ void gfs2_recover_func(struct work_struct *work) fs_warn(sdp, "jid=%u: Can't replay: read-only block " "device\n", jd->jd_jid); error = -EROFS; - goto fail_gunlock_thaw; + goto fail_gunlock_nofreeze; } t_tlck = ktime_get(); @@ -514,7 +514,7 @@ void gfs2_recover_func(struct work_struct *work) lops_after_scan(jd, error, pass); if (error) { up_read(&sdp->sd_log_flush_lock); - goto fail_gunlock_thaw; + goto fail_gunlock_nofreeze; } } @@ -522,7 +522,7 @@ void gfs2_recover_func(struct work_struct *work) clean_journal(jd, &head); up_read(&sdp->sd_log_flush_lock); - gfs2_freeze_unlock(&thaw_gh); + mutex_unlock(&sdp->sd_freeze_mutex); t_rep = ktime_get(); fs_info(sdp, "jid=%u: Journal replayed in %lldms [jlck:%lldms, " "jhead:%lldms, tlck:%lldms, replay:%lldms]\n", @@ -543,8 +543,8 @@ void gfs2_recover_func(struct work_struct *work) fs_info(sdp, "jid=%u: Done\n", jd->jd_jid); goto done; -fail_gunlock_thaw: - gfs2_freeze_unlock(&thaw_gh); +fail_gunlock_nofreeze: + mutex_unlock(&sdp->sd_freeze_mutex); fail_gunlock_ji: if (jlocked) { gfs2_glock_dq_uninit(&ji_gh); diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 9a428e09d38c..81c8d07a4540 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -332,7 +332,12 @@ static int gfs2_lock_fs_check_clean(struct gfs2_sbd *sdp) struct lfcc *lfcc; LIST_HEAD(list); struct gfs2_log_header_host lh; - int error; + int error, error2; + + /* + * Grab all the journal glocks in SH mode. We are *probably* doing + * that to prevent recovery. + */ list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) { lfcc = kmalloc(sizeof(struct lfcc), GFP_KERNEL); @@ -349,11 +354,13 @@ static int gfs2_lock_fs_check_clean(struct gfs2_sbd *sdp) list_add(&lfcc->list, &list); } + gfs2_freeze_unlock(&sdp->sd_freeze_gh); + error = gfs2_glock_nq_init(sdp->sd_freeze_gl, LM_ST_EXCLUSIVE, LM_FLAG_NOEXP | GL_NOPID, &sdp->sd_freeze_gh); if (error) - goto out; + goto relock_shared; list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) { error = gfs2_jdesc_check(jd); @@ -368,8 +375,14 @@ static int gfs2_lock_fs_check_clean(struct gfs2_sbd *sdp) } } - if (error) - gfs2_freeze_unlock(&sdp->sd_freeze_gh); + if (!error) + goto out; /* success */ + + gfs2_freeze_unlock(&sdp->sd_freeze_gh); + +relock_shared: + error2 = gfs2_freeze_lock_shared(sdp, &sdp->sd_freeze_gh, 0); + gfs2_assert_withdraw(sdp, !error2); out: while (!list_empty(&list)) { @@ -615,6 +628,8 @@ restart: /* Release stuff */ + gfs2_freeze_unlock(&sdp->sd_freeze_gh); + iput(sdp->sd_jindex); iput(sdp->sd_statfs_inode); iput(sdp->sd_rindex); @@ -669,31 +684,82 @@ static int gfs2_sync_fs(struct super_block *sb, int wait) return sdp->sd_log_error; } +static int gfs2_freeze_locally(struct gfs2_sbd *sdp) +{ + struct super_block *sb = sdp->sd_vfs; + int error; + + atomic_set(&sdp->sd_freeze_state, SFS_STARTING_FREEZE); + + error = freeze_super(sb); + if (error) + goto fail; + + if (test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) { + gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_FREEZE | + GFS2_LFC_FREEZE_GO_SYNC); + if (gfs2_withdrawn(sdp)) { + thaw_super(sb); + error = -EIO; + goto fail; + } + } + return 0; + +fail: + atomic_set(&sdp->sd_freeze_state, SFS_UNFROZEN); + return error; +} + +static int gfs2_do_thaw(struct gfs2_sbd *sdp) +{ + struct super_block *sb = sdp->sd_vfs; + int error; + + error = gfs2_freeze_lock_shared(sdp, &sdp->sd_freeze_gh, 0); + if (error) + goto fail; + error = thaw_super(sb); + if (!error) + return 0; + +fail: + fs_info(sdp, "GFS2: couldn't thaw filesystem: %d\n", error); + gfs2_assert_withdraw(sdp, 0); + return error; +} + void gfs2_freeze_func(struct work_struct *work) { - int error; - struct gfs2_holder freeze_gh; struct gfs2_sbd *sdp = container_of(work, struct gfs2_sbd, sd_freeze_work); struct super_block *sb = sdp->sd_vfs; + int error; - atomic_inc(&sb->s_active); - error = gfs2_freeze_lock_shared(sdp, &freeze_gh, 0); - if (error) { - gfs2_assert_withdraw(sdp, 0); - } else { - atomic_set(&sdp->sd_freeze_state, SFS_UNFROZEN); - error = thaw_super(sb); - if (error) { - fs_info(sdp, "GFS2: couldn't thaw filesystem: %d\n", - error); - gfs2_assert_withdraw(sdp, 0); - } - gfs2_freeze_unlock(&freeze_gh); - } + mutex_lock(&sdp->sd_freeze_mutex); + error = -EBUSY; + if (atomic_read(&sdp->sd_freeze_state) != SFS_UNFROZEN) + goto freeze_failed; + + error = gfs2_freeze_locally(sdp); + if (error) + goto freeze_failed; + + gfs2_freeze_unlock(&sdp->sd_freeze_gh); + atomic_set(&sdp->sd_freeze_state, SFS_FROZEN); + + error = gfs2_do_thaw(sdp); + if (error) + goto out; + + atomic_set(&sdp->sd_freeze_state, SFS_UNFROZEN); + goto out; + +freeze_failed: + fs_info(sdp, "GFS2: couldn't freeze filesystem: %d\n", error); + +out: + mutex_unlock(&sdp->sd_freeze_mutex); deactivate_super(sb); - clear_bit_unlock(SDF_FREEZE_INITIATOR, &sdp->sd_flags); - wake_up_bit(&sdp->sd_flags, SDF_FREEZE_INITIATOR); - return; } /** @@ -707,21 +773,27 @@ static int gfs2_freeze_super(struct super_block *sb) struct gfs2_sbd *sdp = sb->s_fs_info; int error; - mutex_lock(&sdp->sd_freeze_mutex); - if (atomic_read(&sdp->sd_freeze_state) != SFS_UNFROZEN) { - error = -EBUSY; + if (!mutex_trylock(&sdp->sd_freeze_mutex)) + return -EBUSY; + error = -EBUSY; + if (atomic_read(&sdp->sd_freeze_state) != SFS_UNFROZEN) goto out; - } for (;;) { - if (gfs2_withdrawn(sdp)) { - error = -EINVAL; + error = gfs2_freeze_locally(sdp); + if (error) { + fs_info(sdp, "GFS2: couldn't freeze filesystem: %d\n", + error); goto out; } error = gfs2_lock_fs_check_clean(sdp); if (!error) - break; + break; /* success */ + + error = gfs2_do_thaw(sdp); + if (error) + goto out; if (error == -EBUSY) fs_err(sdp, "waiting for recovery before freeze\n"); @@ -735,8 +807,12 @@ static int gfs2_freeze_super(struct super_block *sb) fs_err(sdp, "retrying...\n"); msleep(1000); } - set_bit(SDF_FREEZE_INITIATOR, &sdp->sd_flags); + out: + if (!error) { + set_bit(SDF_FREEZE_INITIATOR, &sdp->sd_flags); + atomic_set(&sdp->sd_freeze_state, SFS_FROZEN); + } mutex_unlock(&sdp->sd_freeze_mutex); return error; } @@ -750,17 +826,39 @@ out: static int gfs2_thaw_super(struct super_block *sb) { struct gfs2_sbd *sdp = sb->s_fs_info; + int error; - mutex_lock(&sdp->sd_freeze_mutex); - if (atomic_read(&sdp->sd_freeze_state) != SFS_FROZEN || - !gfs2_holder_initialized(&sdp->sd_freeze_gh)) { - mutex_unlock(&sdp->sd_freeze_mutex); - return -EINVAL; - } + if (!mutex_trylock(&sdp->sd_freeze_mutex)) + return -EBUSY; + error = -EINVAL; + if (!test_bit(SDF_FREEZE_INITIATOR, &sdp->sd_flags)) + goto out; gfs2_freeze_unlock(&sdp->sd_freeze_gh); + + error = gfs2_do_thaw(sdp); + + if (!error) { + clear_bit(SDF_FREEZE_INITIATOR, &sdp->sd_flags); + atomic_set(&sdp->sd_freeze_state, SFS_UNFROZEN); + } +out: + mutex_unlock(&sdp->sd_freeze_mutex); + return error; +} + +void gfs2_thaw_freeze_initiator(struct super_block *sb) +{ + struct gfs2_sbd *sdp = sb->s_fs_info; + + mutex_lock(&sdp->sd_freeze_mutex); + if (!test_bit(SDF_FREEZE_INITIATOR, &sdp->sd_flags)) + goto out; + + gfs2_freeze_unlock(&sdp->sd_freeze_gh); + +out: mutex_unlock(&sdp->sd_freeze_mutex); - return wait_on_bit(&sdp->sd_flags, SDF_FREEZE_INITIATOR, TASK_INTERRUPTIBLE); } /** diff --git a/fs/gfs2/super.h b/fs/gfs2/super.h index 58d13fd77aed..bba58629bc45 100644 --- a/fs/gfs2/super.h +++ b/fs/gfs2/super.h @@ -46,6 +46,7 @@ extern void gfs2_statfs_change_out(const struct gfs2_statfs_change_host *sc, extern void update_statfs(struct gfs2_sbd *sdp, struct buffer_head *m_bh); extern int gfs2_statfs_sync(struct super_block *sb, int type); extern void gfs2_freeze_func(struct work_struct *work); +extern void gfs2_thaw_freeze_initiator(struct super_block *sb); extern void free_local_statfs_inodes(struct gfs2_sbd *sdp); extern struct inode *find_local_statfs_inode(struct gfs2_sbd *sdp, diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c index 1743caee5505..00494dffb47c 100644 --- a/fs/gfs2/util.c +++ b/fs/gfs2/util.c @@ -124,7 +124,6 @@ static void signal_our_withdraw(struct gfs2_sbd *sdp) struct gfs2_inode *ip; struct gfs2_glock *i_gl; u64 no_formal_ino; - int log_write_allowed = test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags); int ret = 0; int tries; @@ -152,24 +151,18 @@ static void signal_our_withdraw(struct gfs2_sbd *sdp) */ clear_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags); if (!sb_rdonly(sdp->sd_vfs)) { - struct gfs2_holder freeze_gh; + bool locked = mutex_trylock(&sdp->sd_freeze_mutex); + + gfs2_make_fs_ro(sdp); + + if (locked) + mutex_unlock(&sdp->sd_freeze_mutex); - gfs2_holder_mark_uninitialized(&freeze_gh); - if (sdp->sd_freeze_gl && - !gfs2_glock_is_locked_by_me(sdp->sd_freeze_gl)) { - ret = gfs2_freeze_lock_shared(sdp, &freeze_gh, - log_write_allowed ? 0 : LM_FLAG_TRY); - if (ret == GLR_TRYFAILED) - ret = 0; - } - if (!ret) - gfs2_make_fs_ro(sdp); /* * Dequeue any pending non-system glock holders that can no * longer be granted because the file system is withdrawn. */ gfs2_gl_dq_holders(sdp); - gfs2_freeze_unlock(&freeze_gh); } if (sdp->sd_lockstruct.ls_ops->lm_lock == NULL) { /* lock_nolock */ @@ -187,15 +180,8 @@ static void signal_our_withdraw(struct gfs2_sbd *sdp) } sdp->sd_jinode_gh.gh_flags |= GL_NOCACHE; gfs2_glock_dq(&sdp->sd_jinode_gh); - if (test_bit(SDF_FREEZE_INITIATOR, &sdp->sd_flags)) { - /* Make sure gfs2_thaw_super works if partially-frozen */ - flush_work(&sdp->sd_freeze_work); - atomic_set(&sdp->sd_freeze_state, SFS_FROZEN); - thaw_super(sdp->sd_vfs); - } else { - wait_on_bit(&i_gl->gl_flags, GLF_DEMOTE, - TASK_UNINTERRUPTIBLE); - } + gfs2_thaw_freeze_initiator(sdp->sd_vfs); + wait_on_bit(&i_gl->gl_flags, GLF_DEMOTE, TASK_UNINTERRUPTIBLE); /* * holder_uninit to force glock_put, to force dlm to let go From 5432af15f8772d5e1a44d59d6ffcd513da8436b4 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 18 Aug 2022 16:12:24 +0200 Subject: [PATCH 17/23] gfs2: Replace sd_freeze_state with SDF_FROZEN flag Replace sd_freeze_state with a new SDF_FROZEN flag. There no longer is a need for indicating that a freeze is in progress (SDF_STARTING_FREEZE); we are now protecting the critical sections with the sd_freeze_mutex. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/incore.h | 8 +------- fs/gfs2/log.c | 9 ++++----- fs/gfs2/ops_fstype.c | 1 - fs/gfs2/recovery.c | 2 +- fs/gfs2/super.c | 23 ++++++++--------------- fs/gfs2/sys.c | 2 ++ fs/gfs2/trans.c | 3 +-- 7 files changed, 17 insertions(+), 31 deletions(-) diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 1f10529ebcf5..8b1b50d19de1 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -608,12 +608,7 @@ enum { withdrawing */ SDF_DEACTIVATING = 15, SDF_EVICTING = 16, -}; - -enum gfs2_freeze_state { - SFS_UNFROZEN = 0, - SFS_STARTING_FREEZE = 1, - SFS_FROZEN = 2, + SDF_FROZEN = 17, }; #define GFS2_FSNAME_LEN 256 @@ -841,7 +836,6 @@ struct gfs2_sbd { /* For quiescing the filesystem */ struct gfs2_holder sd_freeze_gh; - atomic_t sd_freeze_state; struct mutex sd_freeze_mutex; char sd_fsname[GFS2_FSNAME_LEN + 3 * sizeof(int) + 2]; diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index dca535311dee..aa568796207c 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c @@ -914,9 +914,8 @@ void gfs2_write_log_header(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd, static void log_write_header(struct gfs2_sbd *sdp, u32 flags) { blk_opf_t op_flags = REQ_PREFLUSH | REQ_FUA | REQ_META | REQ_SYNC; - enum gfs2_freeze_state state = atomic_read(&sdp->sd_freeze_state); - gfs2_assert_withdraw(sdp, (state != SFS_FROZEN)); + gfs2_assert_withdraw(sdp, !test_bit(SDF_FROZEN, &sdp->sd_flags)); if (test_bit(SDF_NOBARRIERS, &sdp->sd_flags)) { gfs2_ordered_wait(sdp); @@ -1036,7 +1035,7 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, u32 flags) { struct gfs2_trans *tr = NULL; unsigned int reserved_blocks = 0, used_blocks = 0; - enum gfs2_freeze_state state = atomic_read(&sdp->sd_freeze_state); + bool frozen = test_bit(SDF_FROZEN, &sdp->sd_flags); unsigned int first_log_head; unsigned int reserved_revokes = 0; @@ -1067,7 +1066,7 @@ repeat: if (tr) { sdp->sd_log_tr = NULL; tr->tr_first = first_log_head; - if (unlikely (state == SFS_FROZEN)) { + if (unlikely(frozen)) { if (gfs2_assert_withdraw_delayed(sdp, !tr->tr_num_buf_new && !tr->tr_num_databuf_new)) goto out_withdraw; @@ -1092,7 +1091,7 @@ repeat: if (flags & GFS2_LOG_HEAD_FLUSH_SHUTDOWN) clear_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags); - if (unlikely(state == SFS_FROZEN)) + if (unlikely(frozen)) if (gfs2_assert_withdraw_delayed(sdp, !reserved_revokes)) goto out_withdraw; diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 4ce5718719ac..24acd17e530c 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -135,7 +135,6 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb) init_rwsem(&sdp->sd_log_flush_lock); atomic_set(&sdp->sd_log_in_flight, 0); init_waitqueue_head(&sdp->sd_log_flush_wait); - atomic_set(&sdp->sd_freeze_state, SFS_UNFROZEN); mutex_init(&sdp->sd_freeze_mutex); return sdp; diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c index afeda936e2be..9c7a9f640bad 100644 --- a/fs/gfs2/recovery.c +++ b/fs/gfs2/recovery.c @@ -467,7 +467,7 @@ void gfs2_recover_func(struct work_struct *work) if (!(head.lh_flags & GFS2_LOG_HEAD_UNMOUNT)) { mutex_lock(&sdp->sd_freeze_mutex); - if (atomic_read(&sdp->sd_freeze_state) != SFS_UNFROZEN) { + if (test_bit(SDF_FROZEN, &sdp->sd_flags)) { mutex_unlock(&sdp->sd_freeze_mutex); fs_warn(sdp, "jid=%u: Can't replay: filesystem " "is frozen\n", jd->jd_jid); diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 81c8d07a4540..6dcbfb9b9306 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -689,26 +689,19 @@ static int gfs2_freeze_locally(struct gfs2_sbd *sdp) struct super_block *sb = sdp->sd_vfs; int error; - atomic_set(&sdp->sd_freeze_state, SFS_STARTING_FREEZE); - error = freeze_super(sb); if (error) - goto fail; + return error; if (test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) { gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_FREEZE | GFS2_LFC_FREEZE_GO_SYNC); if (gfs2_withdrawn(sdp)) { thaw_super(sb); - error = -EIO; - goto fail; + return -EIO; } } return 0; - -fail: - atomic_set(&sdp->sd_freeze_state, SFS_UNFROZEN); - return error; } static int gfs2_do_thaw(struct gfs2_sbd *sdp) @@ -737,7 +730,7 @@ void gfs2_freeze_func(struct work_struct *work) mutex_lock(&sdp->sd_freeze_mutex); error = -EBUSY; - if (atomic_read(&sdp->sd_freeze_state) != SFS_UNFROZEN) + if (test_bit(SDF_FROZEN, &sdp->sd_flags)) goto freeze_failed; error = gfs2_freeze_locally(sdp); @@ -745,13 +738,13 @@ void gfs2_freeze_func(struct work_struct *work) goto freeze_failed; gfs2_freeze_unlock(&sdp->sd_freeze_gh); - atomic_set(&sdp->sd_freeze_state, SFS_FROZEN); + set_bit(SDF_FROZEN, &sdp->sd_flags); error = gfs2_do_thaw(sdp); if (error) goto out; - atomic_set(&sdp->sd_freeze_state, SFS_UNFROZEN); + clear_bit(SDF_FROZEN, &sdp->sd_flags); goto out; freeze_failed: @@ -776,7 +769,7 @@ static int gfs2_freeze_super(struct super_block *sb) if (!mutex_trylock(&sdp->sd_freeze_mutex)) return -EBUSY; error = -EBUSY; - if (atomic_read(&sdp->sd_freeze_state) != SFS_UNFROZEN) + if (test_bit(SDF_FROZEN, &sdp->sd_flags)) goto out; for (;;) { @@ -811,7 +804,7 @@ static int gfs2_freeze_super(struct super_block *sb) out: if (!error) { set_bit(SDF_FREEZE_INITIATOR, &sdp->sd_flags); - atomic_set(&sdp->sd_freeze_state, SFS_FROZEN); + set_bit(SDF_FROZEN, &sdp->sd_flags); } mutex_unlock(&sdp->sd_freeze_mutex); return error; @@ -840,7 +833,7 @@ static int gfs2_thaw_super(struct super_block *sb) if (!error) { clear_bit(SDF_FREEZE_INITIATOR, &sdp->sd_flags); - atomic_set(&sdp->sd_freeze_state, SFS_UNFROZEN); + clear_bit(SDF_FROZEN, &sdp->sd_flags); } out: mutex_unlock(&sdp->sd_freeze_mutex); diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c index bc752de01573..2dfbe2f188dd 100644 --- a/fs/gfs2/sys.c +++ b/fs/gfs2/sys.c @@ -82,6 +82,7 @@ static ssize_t status_show(struct gfs2_sbd *sdp, char *buf) "RO Recovery: %d\n" "Skip DLM Unlock: %d\n" "Force AIL Flush: %d\n" + "FS Freeze Initiator: %d\n" "FS Frozen: %d\n" "Withdrawing: %d\n" "Withdraw In Prog: %d\n" @@ -112,6 +113,7 @@ static ssize_t status_show(struct gfs2_sbd *sdp, char *buf) test_bit(SDF_SKIP_DLM_UNLOCK, &f), test_bit(SDF_FORCE_AIL_FLUSH, &f), test_bit(SDF_FREEZE_INITIATOR, &f), + test_bit(SDF_FROZEN, &f), test_bit(SDF_WITHDRAWING, &f), test_bit(SDF_WITHDRAW_IN_PROG, &f), test_bit(SDF_REMOTE_WITHDRAW, &f), diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c index 63fec11ef2ce..ec1631257978 100644 --- a/fs/gfs2/trans.c +++ b/fs/gfs2/trans.c @@ -233,7 +233,6 @@ void gfs2_trans_add_meta(struct gfs2_glock *gl, struct buffer_head *bh) struct gfs2_bufdata *bd; struct gfs2_meta_header *mh; struct gfs2_trans *tr = current->journal_info; - enum gfs2_freeze_state state = atomic_read(&sdp->sd_freeze_state); lock_buffer(bh); if (buffer_pinned(bh)) { @@ -267,7 +266,7 @@ void gfs2_trans_add_meta(struct gfs2_glock *gl, struct buffer_head *bh) (unsigned long long)bd->bd_bh->b_blocknr); BUG(); } - if (unlikely(state == SFS_FROZEN)) { + if (unlikely(test_bit(SDF_FROZEN, &sdp->sd_flags))) { fs_info(sdp, "GFS2:adding buf while frozen\n"); gfs2_assert_withdraw(sdp, 0); } From 6c7410f44961cf72d49a18e455ad4ae833f6fb7c Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Mon, 28 Nov 2022 02:30:35 +0100 Subject: [PATCH 18/23] gfs2: gfs2_freeze_lock_shared cleanup All the remaining users of gfs2_freeze_lock_shared() set freeze_gh to &sdp->sd_freeze_gh and flags to 0, so remove those two parameters. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/ops_fstype.c | 2 +- fs/gfs2/super.c | 4 ++-- fs/gfs2/util.c | 9 +++------ fs/gfs2/util.h | 4 +--- 4 files changed, 7 insertions(+), 12 deletions(-) diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 24acd17e530c..9375409fd0c5 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -1267,7 +1267,7 @@ static int gfs2_fill_super(struct super_block *sb, struct fs_context *fc) } } - error = gfs2_freeze_lock_shared(sdp, &sdp->sd_freeze_gh, 0); + error = gfs2_freeze_lock_shared(sdp); if (error) goto fail_per_node; diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 6dcbfb9b9306..9f4d5d6549ee 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -381,7 +381,7 @@ static int gfs2_lock_fs_check_clean(struct gfs2_sbd *sdp) gfs2_freeze_unlock(&sdp->sd_freeze_gh); relock_shared: - error2 = gfs2_freeze_lock_shared(sdp, &sdp->sd_freeze_gh, 0); + error2 = gfs2_freeze_lock_shared(sdp); gfs2_assert_withdraw(sdp, !error2); out: @@ -709,7 +709,7 @@ static int gfs2_do_thaw(struct gfs2_sbd *sdp) struct super_block *sb = sdp->sd_vfs; int error; - error = gfs2_freeze_lock_shared(sdp, &sdp->sd_freeze_gh, 0); + error = gfs2_freeze_lock_shared(sdp); if (error) goto fail; error = thaw_super(sb); diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c index 00494dffb47c..b9db034c7f58 100644 --- a/fs/gfs2/util.c +++ b/fs/gfs2/util.c @@ -95,17 +95,14 @@ out_unlock: /** * gfs2_freeze_lock_shared - hold the freeze glock * @sdp: the superblock - * @freeze_gh: pointer to the requested holder - * @caller_flags: any additional flags needed by the caller */ -int gfs2_freeze_lock_shared(struct gfs2_sbd *sdp, struct gfs2_holder *freeze_gh, - int caller_flags) +int gfs2_freeze_lock_shared(struct gfs2_sbd *sdp) { - int flags = LM_FLAG_NOEXP | GL_EXACT | caller_flags; + int flags = LM_FLAG_NOEXP | GL_EXACT; int error; error = gfs2_glock_nq_init(sdp->sd_freeze_gl, LM_ST_SHARED, flags, - freeze_gh); + &sdp->sd_freeze_gh); if (error && error != GLR_TRYFAILED) fs_err(sdp, "can't lock the freeze glock: %d\n", error); return error; diff --git a/fs/gfs2/util.h b/fs/gfs2/util.h index 3291e33e81e9..cdb839529175 100644 --- a/fs/gfs2/util.h +++ b/fs/gfs2/util.h @@ -149,9 +149,7 @@ int gfs2_io_error_i(struct gfs2_sbd *sdp, const char *function, extern int check_journal_clean(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd, bool verbose); -extern int gfs2_freeze_lock_shared(struct gfs2_sbd *sdp, - struct gfs2_holder *freeze_gh, - int caller_flags); +extern int gfs2_freeze_lock_shared(struct gfs2_sbd *sdp); extern void gfs2_freeze_unlock(struct gfs2_holder *freeze_gh); #define gfs2_io_error(sdp) \ From f246dd4b78e0efda8aa3f93f831a44e12ef0e59e Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 21 Jun 2023 22:32:06 +0200 Subject: [PATCH 19/23] gfs: Get rid of unnucessary locking in inode_go_dump Commit 27a2660f1ef9 ("gfs2: Dump nrpages for inodes and their glocks") added some locking around reading inode->i_data.nrpages. That locking doesn't do anything really, so get rid of it. With that, the glock argument to ->go_dump() can be made const again as well. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/glops.c | 17 ++++++----------- fs/gfs2/incore.h | 2 +- 2 files changed, 7 insertions(+), 12 deletions(-) diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index 7c48c7afd6a4..54319328b16b 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c @@ -236,7 +236,7 @@ static void rgrp_go_inval(struct gfs2_glock *gl, int flags) truncate_inode_pages_range(mapping, start, end); } -static void gfs2_rgrp_go_dump(struct seq_file *seq, struct gfs2_glock *gl, +static void gfs2_rgrp_go_dump(struct seq_file *seq, const struct gfs2_glock *gl, const char *fs_id_buf) { struct gfs2_rgrpd *rgd = gl->gl_object; @@ -536,28 +536,23 @@ static int inode_go_held(struct gfs2_holder *gh) * */ -static void inode_go_dump(struct seq_file *seq, struct gfs2_glock *gl, +static void inode_go_dump(struct seq_file *seq, const struct gfs2_glock *gl, const char *fs_id_buf) { struct gfs2_inode *ip = gl->gl_object; - struct inode *inode; - unsigned long nrpages; + const struct inode *inode = &ip->i_inode; if (ip == NULL) return; - inode = &ip->i_inode; - xa_lock_irq(&inode->i_data.i_pages); - nrpages = inode->i_data.nrpages; - xa_unlock_irq(&inode->i_data.i_pages); - gfs2_print_dbg(seq, "%s I: n:%llu/%llu t:%u f:0x%02lx d:0x%08x s:%llu " "p:%lu\n", fs_id_buf, (unsigned long long)ip->i_no_formal_ino, (unsigned long long)ip->i_no_addr, - IF2DT(ip->i_inode.i_mode), ip->i_flags, + IF2DT(inode->i_mode), ip->i_flags, (unsigned int)ip->i_diskflags, - (unsigned long long)i_size_read(inode), nrpages); + (unsigned long long)i_size_read(inode), + inode->i_data.nrpages); } /** diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 8b1b50d19de1..04f2d78e8658 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -221,7 +221,7 @@ struct gfs2_glock_operations { int (*go_demote_ok) (const struct gfs2_glock *gl); int (*go_instantiate) (struct gfs2_glock *gl); int (*go_held)(struct gfs2_holder *gh); - void (*go_dump)(struct seq_file *seq, struct gfs2_glock *gl, + void (*go_dump)(struct seq_file *seq, const struct gfs2_glock *gl, const char *fs_id_buf); void (*go_callback)(struct gfs2_glock *gl, bool remote); void (*go_free)(struct gfs2_glock *gl); From 58721bd46c9aaa2d890b2d61cbb8740745455aa9 Mon Sep 17 00:00:00 2001 From: Deepak R Varma Date: Mon, 26 Jun 2023 12:21:09 +0530 Subject: [PATCH 20/23] gfs2: Replace deprecated kmap_atomic with kmap_local_page kmap_atomic() is deprecated in favor of kmap_local_{folio,page}(). Therefore, replace kmap_atomic() with kmap_local_page() in gfs2_internal_read() and stuffed_readpage(). kmap_atomic() disables page-faults and preemption (the latter only for !PREEMPT_RT kernels), However, the code within the mapping/un-mapping in gfs2_internal_read() and stuffed_readpage() does not depend on the above-mentioned side effects. Therefore, a mere replacement of the old API with the new one is all that is required (i.e., there is no need to explicitly add any calls to pagefault_disable() and/or preempt_disable()). Signed-off-by: Deepak R Varma Signed-off-by: Andreas Gruenbacher --- fs/gfs2/aops.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c index dacc21b1ae00..c45bac9b5408 100644 --- a/fs/gfs2/aops.c +++ b/fs/gfs2/aops.c @@ -431,10 +431,10 @@ static int stuffed_readpage(struct gfs2_inode *ip, struct page *page) if (error) return error; - kaddr = kmap_atomic(page); + kaddr = kmap_local_page(page); memcpy(kaddr, dibh->b_data + sizeof(struct gfs2_dinode), dsize); memset(kaddr + dsize, 0, PAGE_SIZE - dsize); - kunmap_atomic(kaddr); + kunmap_local(kaddr); flush_dcache_page(page); brelse(dibh); SetPageUptodate(page); @@ -497,12 +497,12 @@ int gfs2_internal_read(struct gfs2_inode *ip, char *buf, loff_t *pos, continue; return PTR_ERR(page); } - p = kmap_atomic(page); + p = kmap_local_page(page); amt = size - copied; if (offset + size > PAGE_SIZE) amt = PAGE_SIZE - offset; memcpy(buf + copied, p + offset, amt); - kunmap_atomic(p); + kunmap_local(p); put_page(page); copied += amt; index++; From b0c21c6d527491276b1f7c9580bd2bf08c081add Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Tue, 27 Jun 2023 00:22:54 +0200 Subject: [PATCH 21/23] gfs2: Convert remaining kmap_atomic calls to kmap_local_page Replace the remaining instances of kmap_atomic() ... kunmap_atomic() with kmap_local_page() ... kunmap_local(). In gfs2_write_buf_to_page(), we can call flush_dcache_page() after unmapping the page. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/lops.c | 13 +++++++------ fs/gfs2/quota.c | 4 ++-- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c index 1902413d5d12..17641d90394b 100644 --- a/fs/gfs2/lops.c +++ b/fs/gfs2/lops.c @@ -427,10 +427,11 @@ static bool gfs2_jhead_pg_srch(struct gfs2_jdesc *jd, { struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode); struct gfs2_log_header_host lh; - void *kaddr = kmap_atomic(page); + void *kaddr; unsigned int offset; bool ret = false; + kaddr = kmap_local_page(page); for (offset = 0; offset < PAGE_SIZE; offset += sdp->sd_sb.sb_bsize) { if (!__get_log_header(sdp, kaddr + offset, 0, &lh)) { if (lh.lh_sequence >= head->lh_sequence) @@ -441,7 +442,7 @@ static bool gfs2_jhead_pg_srch(struct gfs2_jdesc *jd, } } } - kunmap_atomic(kaddr); + kunmap_local(kaddr); return ret; } @@ -626,11 +627,11 @@ static void gfs2_check_magic(struct buffer_head *bh) __be32 *ptr; clear_buffer_escaped(bh); - kaddr = kmap_atomic(bh->b_page); + kaddr = kmap_local_page(bh->b_page); ptr = kaddr + bh_offset(bh); if (*ptr == cpu_to_be32(GFS2_MAGIC)) set_buffer_escaped(bh); - kunmap_atomic(kaddr); + kunmap_local(kaddr); } static int blocknr_cmp(void *priv, const struct list_head *a, @@ -699,10 +700,10 @@ static void gfs2_before_commit(struct gfs2_sbd *sdp, unsigned int limit, void *kaddr; page = mempool_alloc(gfs2_page_pool, GFP_NOIO); ptr = page_address(page); - kaddr = kmap_atomic(bd2->bd_bh->b_page); + kaddr = kmap_local_page(bd2->bd_bh->b_page); memcpy(ptr, kaddr + bh_offset(bd2->bd_bh), bd2->bd_bh->b_size); - kunmap_atomic(kaddr); + kunmap_local(kaddr); *(__be32 *)ptr = 0; clear_buffer_escaped(bd2->bd_bh); unlock_buffer(bd2->bd_bh); diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index 386ca770ce2e..42a3f1e6b553 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -764,10 +764,10 @@ static int gfs2_write_buf_to_page(struct gfs2_inode *ip, unsigned long index, } /* Write to the page, now that we have setup the buffer(s) */ - kaddr = kmap_atomic(page); + kaddr = kmap_local_page(page); memcpy(kaddr + off, buf, bytes); + kunmap_local(kaddr); flush_dcache_page(page); - kunmap_atomic(kaddr); unlock_page(page); put_page(page); From d68d0c6c3fc781c8a130e6a4d0666dbbd69e917b Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Tue, 27 Jun 2023 15:34:36 +0200 Subject: [PATCH 22/23] gfs2: Use memcpy_{from,to}_page where appropriate Replace kmap_local_page() + memcpy() + kunmap_local() sequences with memcpy_{from,to}_page() where we are not doing anything else with the mapped page. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/aops.c | 5 +---- fs/gfs2/lops.c | 12 +++++------- fs/gfs2/quota.c | 5 +---- 3 files changed, 7 insertions(+), 15 deletions(-) diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c index c45bac9b5408..b11ec198183b 100644 --- a/fs/gfs2/aops.c +++ b/fs/gfs2/aops.c @@ -488,7 +488,6 @@ int gfs2_internal_read(struct gfs2_inode *ip, char *buf, loff_t *pos, unsigned copied = 0; unsigned amt; struct page *page; - void *p; do { page = read_cache_page(mapping, index, gfs2_read_folio, NULL); @@ -497,12 +496,10 @@ int gfs2_internal_read(struct gfs2_inode *ip, char *buf, loff_t *pos, continue; return PTR_ERR(page); } - p = kmap_local_page(page); amt = size - copied; if (offset + size > PAGE_SIZE) amt = PAGE_SIZE - offset; - memcpy(buf + copied, p + offset, amt); - kunmap_local(p); + memcpy_from_page(buf + copied, page, offset, amt); put_page(page); copied += amt; index++; diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c index 17641d90394b..251322b01631 100644 --- a/fs/gfs2/lops.c +++ b/fs/gfs2/lops.c @@ -697,14 +697,12 @@ static void gfs2_before_commit(struct gfs2_sbd *sdp, unsigned int limit, lock_buffer(bd2->bd_bh); if (buffer_escaped(bd2->bd_bh)) { - void *kaddr; + void *p; + page = mempool_alloc(gfs2_page_pool, GFP_NOIO); - ptr = page_address(page); - kaddr = kmap_local_page(bd2->bd_bh->b_page); - memcpy(ptr, kaddr + bh_offset(bd2->bd_bh), - bd2->bd_bh->b_size); - kunmap_local(kaddr); - *(__be32 *)ptr = 0; + p = page_address(page); + memcpy_from_page(p, page, bh_offset(bd2->bd_bh), bd2->bd_bh->b_size); + *(__be32 *)p = 0; clear_buffer_escaped(bd2->bd_bh); unlock_buffer(bd2->bd_bh); brelse(bd2->bd_bh); diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index 42a3f1e6b553..7765346e3617 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -712,7 +712,6 @@ static int gfs2_write_buf_to_page(struct gfs2_inode *ip, unsigned long index, struct address_space *mapping = inode->i_mapping; struct page *page; struct buffer_head *bh; - void *kaddr; u64 blk; unsigned bsize = sdp->sd_sb.sb_bsize, bnum = 0, boff = 0; unsigned to_write = bytes, pg_off = off; @@ -764,9 +763,7 @@ static int gfs2_write_buf_to_page(struct gfs2_inode *ip, unsigned long index, } /* Write to the page, now that we have setup the buffer(s) */ - kaddr = kmap_local_page(page); - memcpy(kaddr + off, buf, bytes); - kunmap_local(kaddr); + memcpy_to_page(page, off, buf, bytes); flush_dcache_page(page); unlock_page(page); put_page(page); From 432928c9377959684c748a9bc6553ed2d3c2ea4f Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Wed, 28 Jun 2023 09:54:41 -0500 Subject: [PATCH 23/23] gfs2: Add quota_change type Function do_qc has two main uses: (1) to re-sync the local quota changes (qd) to the master quotas, and (2) normal quota changes. In the case of normal quota changes, the change can be positive or negative, as the quota usage goes up and down. Before this patch function do_qc was distinguishing one from another by whether the resulting value is or isn't zero: In the case of a re-sync (called do_sync) the quota value is moved from the temporary value to a master value, so the amount is added to one and subtracted from the other. The problem is that since the values can be positive or negative we can occasionally run into situations where we are not doing a re-sync but the quota change just happens to cancel out the previous value. In the case of a re-sync extra references and locks are taken, and so do_qc needs to release them. In the case of a normal quota change, no extra references and locks are taken, so it must not try to release them. The problem is: if the quota change is not a re-sync but the value just happens to cancel out the original quota change, the resulting zero value fools do_qc into thinking this is a re-sync and therefore it must release the extra references. This results in problems, mainly having to do with slot reference numbers going smaller than zero. This patch introduces new constants, QC_SYNC and QC_CHANGE so do_qc can really tell the difference. For QC_SYNC calls it must release the extra references acquired by gfs2_quota_unlock's call to qd_check_sync. For QC_CHANGE calls it does not have extra references to put. Note that this allows quota changes back to a value of zero, and so I removed an assert warning related to that. Signed-off-by: Bob Peterson Signed-off-by: Andreas Gruenbacher --- fs/gfs2/quota.c | 20 ++++++++++++-------- fs/gfs2/util.c | 6 +++--- 2 files changed, 15 insertions(+), 11 deletions(-) diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index 7765346e3617..704192b73605 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -75,6 +75,9 @@ #define GFS2_QD_HASH_SIZE BIT(GFS2_QD_HASH_SHIFT) #define GFS2_QD_HASH_MASK (GFS2_QD_HASH_SIZE - 1) +#define QC_CHANGE 0 +#define QC_SYNC 1 + /* Lock order: qd_lock -> bucket lock -> qd->lockref.lock -> lru lock */ /* -> sd_bitmap_lock */ static DEFINE_SPINLOCK(qd_lock); @@ -470,7 +473,6 @@ static int qd_fish(struct gfs2_sbd *sdp, struct gfs2_quota_data **qdp) spin_unlock(&qd_lock); if (qd) { - gfs2_assert_warn(sdp, qd->qd_change_sync); error = bh_get(qd); if (error) { clear_bit(QDF_LOCKED, &qd->qd_flags); @@ -662,7 +664,7 @@ static int sort_qd(const void *a, const void *b) return 0; } -static void do_qc(struct gfs2_quota_data *qd, s64 change) +static void do_qc(struct gfs2_quota_data *qd, s64 change, int qc_type) { struct gfs2_sbd *sdp = qd->qd_gl->gl_name.ln_sbd; struct gfs2_inode *ip = GFS2_I(sdp->sd_qc_inode); @@ -687,16 +689,18 @@ static void do_qc(struct gfs2_quota_data *qd, s64 change) qd->qd_change = x; spin_unlock(&qd_lock); - if (!x) { + if (qc_type == QC_CHANGE) { + if (!test_and_set_bit(QDF_CHANGE, &qd->qd_flags)) { + qd_hold(qd); + slot_hold(qd); + } + } else { gfs2_assert_warn(sdp, test_bit(QDF_CHANGE, &qd->qd_flags)); clear_bit(QDF_CHANGE, &qd->qd_flags); qc->qc_flags = 0; qc->qc_id = 0; slot_put(qd); qd_put(qd); - } else if (!test_and_set_bit(QDF_CHANGE, &qd->qd_flags)) { - qd_hold(qd); - slot_hold(qd); } if (change < 0) /* Reset quiet flag if we freed some blocks */ @@ -953,7 +957,7 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda) if (error) goto out_end_trans; - do_qc(qd, -qd->qd_change_sync); + do_qc(qd, -qd->qd_change_sync, QC_SYNC); set_bit(QDF_REFRESH, &qd->qd_flags); } @@ -1279,7 +1283,7 @@ void gfs2_quota_change(struct gfs2_inode *ip, s64 change, if (qid_eq(qd->qd_id, make_kqid_uid(uid)) || qid_eq(qd->qd_id, make_kqid_gid(gid))) { - do_qc(qd, change); + do_qc(qd, change, QC_CHANGE); } } } diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c index b9db034c7f58..dac22b1c1a2e 100644 --- a/fs/gfs2/util.c +++ b/fs/gfs2/util.c @@ -98,12 +98,12 @@ out_unlock: */ int gfs2_freeze_lock_shared(struct gfs2_sbd *sdp) { - int flags = LM_FLAG_NOEXP | GL_EXACT; int error; - error = gfs2_glock_nq_init(sdp->sd_freeze_gl, LM_ST_SHARED, flags, + error = gfs2_glock_nq_init(sdp->sd_freeze_gl, LM_ST_SHARED, + LM_FLAG_NOEXP | GL_EXACT, &sdp->sd_freeze_gh); - if (error && error != GLR_TRYFAILED) + if (error) fs_err(sdp, "can't lock the freeze glock: %d\n", error); return error; }