From 99b8520c00711556d62a9261eec9acfac94ecc9b Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Mon, 18 Mar 2024 22:22:32 +0100 Subject: [PATCH 01/29] gfs2: Remove unnecessary function prototype Function __gfs2_glock_dq() gets defined before it is used, so there is no need for a separate function declaration. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/glock.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 4ea6c8bfb4e6..873d76670238 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -61,7 +61,6 @@ struct gfs2_glock_iter { typedef void (*glock_examiner) (struct gfs2_glock * gl); static void do_xmote(struct gfs2_glock *gl, struct gfs2_holder *gh, unsigned int target); -static void __gfs2_glock_dq(struct gfs2_holder *gh); static void handle_callback(struct gfs2_glock *gl, unsigned int state, unsigned long delay, bool remote); From 932a9052dc731d99865d478043e84b52937b8d54 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 11 Apr 2024 09:23:15 +0200 Subject: [PATCH 02/29] gfs2: Remove useless return statement in run_queue The return statement at the end of run_queue() is useless. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/glock.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 873d76670238..d1fb0b2b5649 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -909,7 +909,6 @@ out_sched: out_unlock: clear_bit(GLF_LOCK, &gl->gl_flags); smp_mb__after_atomic(); - return; } /** From 121e730112788ab9aceedfb38d59dae2dee83301 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Mon, 18 Mar 2024 20:43:17 +0100 Subject: [PATCH 03/29] gfs2: Rename GLF_FREEING to GLF_UNLOCKED Rename the GLF_FREEING flag to GLF_UNLOCKED, and the ->go_free glock operation to ->go_unlocked. This mechanism is used to wait for the underlying DLM lock to be unlocked; being able to free the glock is a consequence of the DLM lock being unlocked. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/glock.c | 2 +- fs/gfs2/glops.c | 16 ++++++++-------- fs/gfs2/incore.h | 4 ++-- fs/gfs2/lock_dlm.c | 4 ++-- fs/gfs2/util.c | 6 +++--- 5 files changed, 16 insertions(+), 16 deletions(-) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index d1fb0b2b5649..959668e22c9d 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -2378,7 +2378,7 @@ static const char *gflags2str(char *buf, const struct gfs2_glock *gl) *p++ = 'o'; if (test_bit(GLF_BLOCKING, gflags)) *p++ = 'b'; - if (test_bit(GLF_FREEING, gflags)) + if (test_bit(GLF_UNLOCKED, gflags)) *p++ = 'x'; if (test_bit(GLF_INSTANTIATE_NEEDED, gflags)) *p++ = 'n'; diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index 68677fb69a73..39bb6758a2a0 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c @@ -648,21 +648,21 @@ static void iopen_go_callback(struct gfs2_glock *gl, bool remote) } /** - * inode_go_free - wake up anyone waiting for dlm's unlock ast to free it - * @gl: glock being freed + * inode_go_unlocked - wake up anyone waiting for dlm's unlock ast + * @gl: glock being unlocked * * For now, this is only used for the journal inode glock. In withdraw - * situations, we need to wait for the glock to be freed so that we know + * situations, we need to wait for the glock to be unlocked so that we know * other nodes may proceed with recovery / journal replay. */ -static void inode_go_free(struct gfs2_glock *gl) +static void inode_go_unlocked(struct gfs2_glock *gl) { /* Note that we cannot reference gl_object because it's already set * to NULL by this point in its lifecycle. */ - if (!test_bit(GLF_FREEING, &gl->gl_flags)) + if (!test_bit(GLF_UNLOCKED, &gl->gl_flags)) return; - clear_bit_unlock(GLF_FREEING, &gl->gl_flags); - wake_up_bit(&gl->gl_flags, GLF_FREEING); + clear_bit_unlock(GLF_UNLOCKED, &gl->gl_flags); + wake_up_bit(&gl->gl_flags, GLF_UNLOCKED); } /** @@ -728,7 +728,7 @@ const struct gfs2_glock_operations gfs2_inode_glops = { .go_dump = inode_go_dump, .go_type = LM_TYPE_INODE, .go_flags = GLOF_ASPACE | GLOF_LRU | GLOF_LVB, - .go_free = inode_go_free, + .go_unlocked = inode_go_unlocked, }; const struct gfs2_glock_operations gfs2_rgrp_glops = { diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 60abd7050c99..aa6dbde9cd19 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -224,7 +224,7 @@ struct gfs2_glock_operations { void (*go_dump)(struct seq_file *seq, const struct gfs2_glock *gl, const char *fs_id_buf); void (*go_callback)(struct gfs2_glock *gl, bool remote); - void (*go_free)(struct gfs2_glock *gl); + void (*go_unlocked)(struct gfs2_glock *gl); const int go_subclass; const int go_type; const unsigned long go_flags; @@ -329,7 +329,7 @@ enum { GLF_LRU = 13, GLF_OBJECT = 14, /* Used only for tracing */ GLF_BLOCKING = 15, - GLF_FREEING = 16, /* Wait for glock to be freed */ + GLF_UNLOCKED = 16, /* Wait for glock to be unlocked */ GLF_TRY_TO_EVICT = 17, /* iopen glocks only */ GLF_VERIFY_EVICT = 18, /* iopen glocks only */ }; diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c index 49059274a528..cb2ad0031f9e 100644 --- a/fs/gfs2/lock_dlm.c +++ b/fs/gfs2/lock_dlm.c @@ -134,8 +134,8 @@ static void gdlm_ast(void *arg) switch (gl->gl_lksb.sb_status) { case -DLM_EUNLOCK: /* Unlocked, so glock can be freed */ - if (gl->gl_ops->go_free) - gl->gl_ops->go_free(gl); + if (gl->gl_ops->go_unlocked) + gl->gl_ops->go_unlocked(gl); gfs2_glock_free(gl); return; case -DLM_ECANCEL: /* Cancel while getting lock */ diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c index af4758d8d894..7e79b1a9e35f 100644 --- a/fs/gfs2/util.c +++ b/fs/gfs2/util.c @@ -206,9 +206,9 @@ static void signal_our_withdraw(struct gfs2_sbd *sdp) * on other nodes to be successful, otherwise we remain the owner of * the glock as far as dlm is concerned. */ - if (i_gl->gl_ops->go_free) { - set_bit(GLF_FREEING, &i_gl->gl_flags); - wait_on_bit(&i_gl->gl_flags, GLF_FREEING, TASK_UNINTERRUPTIBLE); + if (i_gl->gl_ops->go_unlocked) { + set_bit(GLF_UNLOCKED, &i_gl->gl_flags); + wait_on_bit(&i_gl->gl_flags, GLF_UNLOCKED, TASK_UNINTERRUPTIBLE); } /* From 0a0383a93e5d28f2873a72b8378c2b36404e431e Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Tue, 19 Mar 2024 01:01:28 +0100 Subject: [PATCH 04/29] gfs2: Rename GLF_REPLY_PENDING to GLF_HAVE_REPLY The GLF_REPLY_PENDING flag indicates to glock_work_func() that in response to a locking request, DLM has sent a reply that needs to be processed. A flag with that name could as well indicate that we are waiting on a reply from DLM, however. To disambiguate these two cases, rename the flag to GLF_HAVE_REPLY. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/glock.c | 14 +++++++------- fs/gfs2/incore.h | 2 +- fs/gfs2/trace_gfs2.h | 2 +- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 959668e22c9d..34af0e7db98b 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -1109,8 +1109,8 @@ static void glock_work_func(struct work_struct *work) unsigned int drop_refs = 1; spin_lock(&gl->gl_lockref.lock); - if (test_bit(GLF_REPLY_PENDING, &gl->gl_flags)) { - clear_bit(GLF_REPLY_PENDING, &gl->gl_flags); + if (test_bit(GLF_HAVE_REPLY, &gl->gl_flags)) { + clear_bit(GLF_HAVE_REPLY, &gl->gl_flags); finish_xmote(gl, gl->gl_reply); drop_refs++; } @@ -1642,7 +1642,7 @@ unlock: add_to_queue(gh); if (unlikely((LM_FLAG_NOEXP & gh->gh_flags) && test_and_clear_bit(GLF_FROZEN, &gl->gl_flags))) { - set_bit(GLF_REPLY_PENDING, &gl->gl_flags); + set_bit(GLF_HAVE_REPLY, &gl->gl_flags); gl->gl_lockref.count++; gfs2_glock_queue_work(gl, 0); } @@ -1930,7 +1930,7 @@ void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state) gl->gl_name.ln_type == LM_TYPE_INODE) { if (time_before(now, holdtime)) delay = holdtime - now; - if (test_bit(GLF_REPLY_PENDING, &gl->gl_flags)) + if (test_bit(GLF_HAVE_REPLY, &gl->gl_flags)) delay = gl->gl_hold_time; } handle_callback(gl, state, delay, true); @@ -1993,7 +1993,7 @@ void gfs2_glock_complete(struct gfs2_glock *gl, int ret) } gl->gl_lockref.count++; - set_bit(GLF_REPLY_PENDING, &gl->gl_flags); + set_bit(GLF_HAVE_REPLY, &gl->gl_flags); gfs2_glock_queue_work(gl, 0); spin_unlock(&gl->gl_lockref.lock); } @@ -2186,7 +2186,7 @@ static void thaw_glock(struct gfs2_glock *gl) return; spin_lock(&gl->gl_lockref.lock); - set_bit(GLF_REPLY_PENDING, &gl->gl_flags); + set_bit(GLF_HAVE_REPLY, &gl->gl_flags); gfs2_glock_queue_work(gl, 0); spin_unlock(&gl->gl_lockref.lock); } @@ -2364,7 +2364,7 @@ static const char *gflags2str(char *buf, const struct gfs2_glock *gl) *p++ = 'f'; if (test_bit(GLF_INVALIDATE_IN_PROGRESS, gflags)) *p++ = 'i'; - if (test_bit(GLF_REPLY_PENDING, gflags)) + if (test_bit(GLF_HAVE_REPLY, gflags)) *p++ = 'r'; if (test_bit(GLF_INITIAL, gflags)) *p++ = 'I'; diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index aa6dbde9cd19..e4423075433b 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -322,7 +322,7 @@ enum { GLF_DIRTY = 6, GLF_LFLUSH = 7, GLF_INVALIDATE_IN_PROGRESS = 8, - GLF_REPLY_PENDING = 9, + GLF_HAVE_REPLY = 9, GLF_INITIAL = 10, GLF_FROZEN = 11, GLF_INSTANTIATE_IN_PROG = 12, /* instantiate happening now */ diff --git a/fs/gfs2/trace_gfs2.h b/fs/gfs2/trace_gfs2.h index a5deb9f86831..3721f0333dd7 100644 --- a/fs/gfs2/trace_gfs2.h +++ b/fs/gfs2/trace_gfs2.h @@ -53,7 +53,7 @@ {(1UL << GLF_DIRTY), "y" }, \ {(1UL << GLF_LFLUSH), "f" }, \ {(1UL << GLF_INVALIDATE_IN_PROGRESS), "i" }, \ - {(1UL << GLF_REPLY_PENDING), "r" }, \ + {(1UL << GLF_HAVE_REPLY), "r" }, \ {(1UL << GLF_INITIAL), "I" }, \ {(1UL << GLF_FROZEN), "F" }, \ {(1UL << GLF_LRU), "L" }, \ From 1fb5f67e21805333daca4b0c96416254de96400e Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 28 Mar 2024 06:15:08 +0100 Subject: [PATCH 05/29] gfs2: Rename GLF_FROZEN to GLF_HAVE_FROZEN_REPLY The GLF_FROZEN flag indicates that a reply to a DLM locking request has been received, but should not be processed at this time. To clarify that meaning, rename the flag to GLF_HAVE_FROZEN_REPLY. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/glock.c | 8 ++++---- fs/gfs2/incore.h | 2 +- fs/gfs2/trace_gfs2.h | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 34af0e7db98b..bac7078e4870 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -1641,7 +1641,7 @@ unlock: spin_lock(&gl->gl_lockref.lock); add_to_queue(gh); if (unlikely((LM_FLAG_NOEXP & gh->gh_flags) && - test_and_clear_bit(GLF_FROZEN, &gl->gl_flags))) { + test_and_clear_bit(GLF_HAVE_FROZEN_REPLY, &gl->gl_flags))) { set_bit(GLF_HAVE_REPLY, &gl->gl_flags); gl->gl_lockref.count++; gfs2_glock_queue_work(gl, 0); @@ -1986,7 +1986,7 @@ void gfs2_glock_complete(struct gfs2_glock *gl, int ret) if (unlikely(test_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags))) { if (gfs2_should_freeze(gl)) { - set_bit(GLF_FROZEN, &gl->gl_flags); + set_bit(GLF_HAVE_FROZEN_REPLY, &gl->gl_flags); spin_unlock(&gl->gl_lockref.lock); return; } @@ -2180,7 +2180,7 @@ void gfs2_flush_delete_work(struct gfs2_sbd *sdp) static void thaw_glock(struct gfs2_glock *gl) { - if (!test_and_clear_bit(GLF_FROZEN, &gl->gl_flags)) + if (!test_and_clear_bit(GLF_HAVE_FROZEN_REPLY, &gl->gl_flags)) return; if (!lockref_get_not_dead(&gl->gl_lockref)) return; @@ -2368,7 +2368,7 @@ static const char *gflags2str(char *buf, const struct gfs2_glock *gl) *p++ = 'r'; if (test_bit(GLF_INITIAL, gflags)) *p++ = 'I'; - if (test_bit(GLF_FROZEN, gflags)) + if (test_bit(GLF_HAVE_FROZEN_REPLY, gflags)) *p++ = 'F'; if (!list_empty(&gl->gl_holders)) *p++ = 'q'; diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index e4423075433b..73f24b86338c 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -324,7 +324,7 @@ enum { GLF_INVALIDATE_IN_PROGRESS = 8, GLF_HAVE_REPLY = 9, GLF_INITIAL = 10, - GLF_FROZEN = 11, + GLF_HAVE_FROZEN_REPLY = 11, GLF_INSTANTIATE_IN_PROG = 12, /* instantiate happening now */ GLF_LRU = 13, GLF_OBJECT = 14, /* Used only for tracing */ diff --git a/fs/gfs2/trace_gfs2.h b/fs/gfs2/trace_gfs2.h index 3721f0333dd7..65748bfaef69 100644 --- a/fs/gfs2/trace_gfs2.h +++ b/fs/gfs2/trace_gfs2.h @@ -55,7 +55,7 @@ {(1UL << GLF_INVALIDATE_IN_PROGRESS), "i" }, \ {(1UL << GLF_HAVE_REPLY), "r" }, \ {(1UL << GLF_INITIAL), "I" }, \ - {(1UL << GLF_FROZEN), "F" }, \ + {(1UL << GLF_HAVE_FROZEN_REPLY), "F" }, \ {(1UL << GLF_LRU), "L" }, \ {(1UL << GLF_OBJECT), "o" }, \ {(1UL << GLF_BLOCKING), "b" }) From edeb180f1c8cfe2e789109766b636430adb5a0a3 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Sat, 30 Mar 2024 04:41:48 +0100 Subject: [PATCH 06/29] gfs2: Rename handle_callback to request_demote Function handle_callback() is used to request a glock demote. This often happens in response to a conflicting remote locking request and subsequent bast callback from DLM, but there are other reasons for triggering a demote request as well, such as when trying to release a glock in response to memory pressure. To clarify that, rename the function to request_demote(). Signed-off-by: Andreas Gruenbacher --- fs/gfs2/glock.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index bac7078e4870..c583ba05e8f5 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -61,8 +61,8 @@ struct gfs2_glock_iter { typedef void (*glock_examiner) (struct gfs2_glock * gl); static void do_xmote(struct gfs2_glock *gl, struct gfs2_holder *gh, unsigned int target); -static void handle_callback(struct gfs2_glock *gl, unsigned int state, - unsigned long delay, bool remote); +static void request_demote(struct gfs2_glock *gl, unsigned int state, + unsigned long delay, bool remote); static struct dentry *gfs2_root; static struct workqueue_struct *glock_workqueue; @@ -811,7 +811,7 @@ skip_inval: (target != LM_ST_UNLOCKED || test_bit(SDF_WITHDRAW_RECOVERY, &sdp->sd_flags))) { if (!is_system_glock(gl)) { - handle_callback(gl, LM_ST_UNLOCKED, 0, false); /* sets demote */ + request_demote(gl, LM_ST_UNLOCKED, 0, false); /* * Ordinarily, we would call dlm and its callback would call * finish_xmote, which would call state_change() to the new state. @@ -1459,7 +1459,7 @@ out: } /** - * handle_callback - process a demote request + * request_demote - process a demote request * @gl: the glock * @state: the state the caller wants us to change to * @delay: zero to demote immediately; otherwise pending demote @@ -1469,8 +1469,8 @@ out: * practise: LM_ST_SHARED and LM_ST_UNLOCKED */ -static void handle_callback(struct gfs2_glock *gl, unsigned int state, - unsigned long delay, bool remote) +static void request_demote(struct gfs2_glock *gl, unsigned int state, + unsigned long delay, bool remote) { if (delay) set_bit(GLF_PENDING_DEMOTE, &gl->gl_flags); @@ -1686,7 +1686,7 @@ static void __gfs2_glock_dq(struct gfs2_holder *gh) * below. */ if (gh->gh_flags & GL_NOCACHE) - handle_callback(gl, LM_ST_UNLOCKED, 0, false); + request_demote(gl, LM_ST_UNLOCKED, 0, false); list_del_init(&gh->gh_list); clear_bit(HIF_HOLDER, &gh->gh_iflags); @@ -1933,7 +1933,7 @@ void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state) if (test_bit(GLF_HAVE_REPLY, &gl->gl_flags)) delay = gl->gl_hold_time; } - handle_callback(gl, state, delay, true); + request_demote(gl, state, delay, true); gfs2_glock_queue_work(gl, delay); spin_unlock(&gl->gl_lockref.lock); } @@ -2062,7 +2062,7 @@ add_back_to_lru: freed++; gl->gl_lockref.count++; if (demote_ok(gl)) - handle_callback(gl, LM_ST_UNLOCKED, 0, false); + request_demote(gl, LM_ST_UNLOCKED, 0, false); gfs2_glock_queue_work(gl, 0); spin_unlock(&gl->gl_lockref.lock); cond_resched_lock(&lru_lock); @@ -2205,7 +2205,7 @@ static void clear_glock(struct gfs2_glock *gl) if (!__lockref_is_dead(&gl->gl_lockref)) { gl->gl_lockref.count++; if (gl->gl_state != LM_ST_UNLOCKED) - handle_callback(gl, LM_ST_UNLOCKED, 0, false); + request_demote(gl, LM_ST_UNLOCKED, 0, false); gfs2_glock_queue_work(gl, 0); } spin_unlock(&gl->gl_lockref.lock); From 97d6fdcd79752af8686ab58a0b9389ba80ae0fae Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Mon, 25 Mar 2024 16:59:40 +0100 Subject: [PATCH 07/29] gfs2: Update glocks documentation Rearrange the table of locking modes and associated caching capability to be in order of increasing caching capability. Update the description of the glock operations. Signed-off-by: Andreas Gruenbacher --- Documentation/filesystems/gfs2-glocks.rst | 52 +++++++++++------------ 1 file changed, 26 insertions(+), 26 deletions(-) diff --git a/Documentation/filesystems/gfs2-glocks.rst b/Documentation/filesystems/gfs2-glocks.rst index 8a5842929b60..17ce3413608a 100644 --- a/Documentation/filesystems/gfs2-glocks.rst +++ b/Documentation/filesystems/gfs2-glocks.rst @@ -40,14 +40,14 @@ shared lock mode, SH. In GFS2 the DF mode is used exclusively for direct I/O operations. The glocks are basically a lock plus some routines which deal with cache management. The following rules apply for the cache: -========== ========== ============== ========== ============== -Glock mode Cache data Cache Metadata Dirty Data Dirty Metadata -========== ========== ============== ========== ============== - UN No No No No - SH Yes Yes No No - DF No Yes No No - EX Yes Yes Yes Yes -========== ========== ============== ========== ============== +========== ============== ========== ========== ============== +Glock mode Cache Metadata Cache data Dirty Data Dirty Metadata +========== ============== ========== ========== ============== + UN No No No No + DF Yes No No No + SH Yes Yes No No + EX Yes Yes Yes Yes +========== ============== ========== ========== ============== These rules are implemented using the various glock operations which are defined for each type of glock. Not all types of glocks use @@ -55,23 +55,24 @@ all the modes. Only inode glocks use the DF mode for example. Table of glock operations and per type constants: -============= ============================================================= +============== ============================================================= Field Purpose -============= ============================================================= -go_xmote_th Called before remote state change (e.g. to sync dirty data) +============== ============================================================= +go_sync Called before remote state change (e.g. to sync dirty data) go_xmote_bh Called after remote state change (e.g. to refill cache) go_inval Called if remote state change requires invalidating the cache go_demote_ok Returns boolean value of whether its ok to demote a glock (e.g. checks timeout, and that there is no cached data) -go_lock Called for the first local holder of a lock -go_unlock Called on the final local unlock of a lock +go_instantiate Called when a glock has been acquired +go_held Called every time a glock holder is acquired go_dump Called to print content of object for debugfs file, or on error to dump glock to the log. -go_type The type of the glock, ``LM_TYPE_*`` go_callback Called if the DLM sends a callback to drop this lock +go_unlocked Called when a glock is unlocked (dlm_unlock()) +go_type The type of the glock, ``LM_TYPE_*`` go_flags GLOF_ASPACE is set, if the glock has an address space associated with it -============= ============================================================= +============== ============================================================= The minimum hold time for each lock is the time after a remote lock grant for which we ignore remote demote requests. This is in order to @@ -82,26 +83,25 @@ to by multiple nodes. By delaying the demotion in response to a remote callback, that gives the userspace program time to make some progress before the pages are unmapped. -There is a plan to try and remove the go_lock and go_unlock callbacks -if possible, in order to try and speed up the fast path though the locking. -Also, eventually we hope to make the glock "EX" mode locally shared -such that any local locking will be done with the i_mutex as required -rather than via the glock. +Eventually, we hope to make the glock "EX" mode locally shared such that any +local locking will be done with the i_mutex as required rather than via the +glock. Locking rules for glock operations: -============= ====================== ============================= +============== ====================== ============================= Operation GLF_LOCK bit lock held gl_lockref.lock spinlock held -============= ====================== ============================= -go_xmote_th Yes No +============== ====================== ============================= +go_sync Yes No go_xmote_bh Yes No go_inval Yes No go_demote_ok Sometimes Yes -go_lock Yes No -go_unlock Yes No +go_instantiate No No +go_held No No go_dump Sometimes Yes go_callback Sometimes (N/A) Yes -============= ====================== ============================= +go_unlocked Yes No +============== ====================== ============================= .. Note:: From c8cf2d9f189bf53beefd53b90cc65c556d16dd5a Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Sat, 30 Mar 2024 06:51:41 +0100 Subject: [PATCH 08/29] gfs2: Remove outdated comment in glock_work_func Signed-off-by: Andreas Gruenbacher --- fs/gfs2/glock.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index c583ba05e8f5..ef4fbb197c99 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -1137,11 +1137,7 @@ static void glock_work_func(struct work_struct *work) gfs2_glock_queue_work(gl, delay); } - /* - * Drop the remaining glock references manually here. (Mind that - * gfs2_glock_queue_work depends on the lockref spinlock begin held - * here as well.) - */ + /* Drop the remaining glock references manually. */ gl->gl_lockref.count -= drop_refs; if (!gl->gl_lockref.count) { __gfs2_glock_put(gl); From c8758ad005c98b15cd8b7a559dc51f8ddbc56d0a Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 3 Apr 2024 07:09:30 +0200 Subject: [PATCH 09/29] gfs2: Invert the GLF_INITIAL flag Invert the meaning of the GLF_INITIAL flag: right now, when GLF_INITIAL is set, a DLM lock exists and we have a valid identifier for it; when GLF_INITIAL is cleared, no DLM lock exists (yet). This is confusing. In addition, it makes more sense to highlight the exceptional case (i.e., no DLM lock exists yet) in glock dumps and trace points than to highlight the common case. To avoid confusion between the "old" and the "new" meaning of the flag, use 'a' instead of 'I' to represent the flag. For improved code consistency, check if the GLF_INITIAL flag is cleared to determine whether a DLM lock exists instead of checking if the lock identifier is non-zero. Document what the flag is used for. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/glock.c | 6 ++++-- fs/gfs2/lock_dlm.c | 24 +++++++++++++++++------- fs/gfs2/trace_gfs2.h | 2 +- 3 files changed, 22 insertions(+), 10 deletions(-) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index ef4fbb197c99..5fed5a22a8e7 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -1237,7 +1237,9 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, atomic_inc(&sdp->sd_glock_disposal); gl->gl_node.next = NULL; - gl->gl_flags = glops->go_instantiate ? BIT(GLF_INSTANTIATE_NEEDED) : 0; + gl->gl_flags = BIT(GLF_INITIAL); + if (glops->go_instantiate) + gl->gl_flags |= BIT(GLF_INSTANTIATE_NEEDED); gl->gl_name = name; lockdep_set_subclass(&gl->gl_lockref.lock, glops->go_subclass); gl->gl_lockref.count = 1; @@ -2363,7 +2365,7 @@ static const char *gflags2str(char *buf, const struct gfs2_glock *gl) if (test_bit(GLF_HAVE_REPLY, gflags)) *p++ = 'r'; if (test_bit(GLF_INITIAL, gflags)) - *p++ = 'I'; + *p++ = 'a'; if (test_bit(GLF_HAVE_FROZEN_REPLY, gflags)) *p++ = 'F'; if (!list_empty(&gl->gl_holders)) diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c index cb2ad0031f9e..fa5134df985f 100644 --- a/fs/gfs2/lock_dlm.c +++ b/fs/gfs2/lock_dlm.c @@ -163,11 +163,21 @@ static void gdlm_ast(void *arg) BUG(); } - set_bit(GLF_INITIAL, &gl->gl_flags); + /* + * The GLF_INITIAL flag is initially set for new glocks. Upon the + * first successful new (non-conversion) request, we clear this flag to + * indicate that a DLM lock exists and that gl->gl_lksb.sb_lkid is the + * identifier to use for identifying it. + * + * Any failed initial requests do not create a DLM lock, so we ignore + * the gl->gl_lksb.sb_lkid values that come with such requests. + */ + + clear_bit(GLF_INITIAL, &gl->gl_flags); gfs2_glock_complete(gl, ret); return; out: - if (!test_bit(GLF_INITIAL, &gl->gl_flags)) + if (test_bit(GLF_INITIAL, &gl->gl_flags)) gl->gl_lksb.sb_lkid = 0; gfs2_glock_complete(gl, ret); } @@ -239,7 +249,7 @@ static u32 make_flags(struct gfs2_glock *gl, const unsigned int gfs_flags, BUG(); } - if (gl->gl_lksb.sb_lkid != 0) { + if (!test_bit(GLF_INITIAL, &gl->gl_flags)) { lkf |= DLM_LKF_CONVERT; if (test_bit(GLF_BLOCKING, &gl->gl_flags)) lkf |= DLM_LKF_QUECVT; @@ -270,14 +280,14 @@ static int gdlm_lock(struct gfs2_glock *gl, unsigned int req_state, lkf = make_flags(gl, flags, req); gfs2_glstats_inc(gl, GFS2_LKS_DCOUNT); gfs2_sbstats_inc(gl, GFS2_LKS_DCOUNT); - if (gl->gl_lksb.sb_lkid) { - gfs2_update_request_times(gl); - } else { + if (test_bit(GLF_INITIAL, &gl->gl_flags)) { memset(strname, ' ', GDLM_STRNAME_BYTES - 1); strname[GDLM_STRNAME_BYTES - 1] = '\0'; gfs2_reverse_hex(strname + 7, gl->gl_name.ln_type); gfs2_reverse_hex(strname + 23, gl->gl_name.ln_number); gl->gl_dstamp = ktime_get_real(); + } else { + gfs2_update_request_times(gl); } /* * Submit the actual lock request. @@ -301,7 +311,7 @@ static void gdlm_put_lock(struct gfs2_glock *gl) BUG_ON(!__lockref_is_dead(&gl->gl_lockref)); - if (gl->gl_lksb.sb_lkid == 0) { + if (test_bit(GLF_INITIAL, &gl->gl_flags)) { gfs2_glock_free(gl); return; } diff --git a/fs/gfs2/trace_gfs2.h b/fs/gfs2/trace_gfs2.h index 65748bfaef69..8eae8d62a413 100644 --- a/fs/gfs2/trace_gfs2.h +++ b/fs/gfs2/trace_gfs2.h @@ -54,7 +54,7 @@ {(1UL << GLF_LFLUSH), "f" }, \ {(1UL << GLF_INVALIDATE_IN_PROGRESS), "i" }, \ {(1UL << GLF_HAVE_REPLY), "r" }, \ - {(1UL << GLF_INITIAL), "I" }, \ + {(1UL << GLF_INITIAL), "a" }, \ {(1UL << GLF_HAVE_FROZEN_REPLY), "F" }, \ {(1UL << GLF_LRU), "L" }, \ {(1UL << GLF_OBJECT), "o" }, \ From 8f6b8f142bdab2ed8c8fcd00def947c372382401 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Mon, 8 Apr 2024 09:36:48 +0200 Subject: [PATCH 10/29] gfs2: gfs2_glock_get cleanup Clean up the messy code in gfs2_glock_get(). No change in functionality. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/glock.c | 33 +++++++++++++-------------------- 1 file changed, 13 insertions(+), 20 deletions(-) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 5fed5a22a8e7..2d4e927c4d2f 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -1203,13 +1203,10 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, .ln_sbd = sdp }; struct gfs2_glock *gl, *tmp; struct address_space *mapping; - int ret = 0; gl = find_insert_glock(&name, NULL); - if (gl) { - *glp = gl; - return 0; - } + if (gl) + goto found; if (!create) return -ENOENT; @@ -1271,23 +1268,19 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, } tmp = find_insert_glock(&name, gl); - if (!tmp) { - *glp = gl; - goto out; - } - if (IS_ERR(tmp)) { - ret = PTR_ERR(tmp); - goto out_free; - } - *glp = tmp; + if (tmp) { + gfs2_glock_dealloc(&gl->gl_rcu); + if (atomic_dec_and_test(&sdp->sd_glock_disposal)) + wake_up(&sdp->sd_kill_wait); -out_free: - gfs2_glock_dealloc(&gl->gl_rcu); - if (atomic_dec_and_test(&sdp->sd_glock_disposal)) - wake_up(&sdp->sd_kill_wait); + if (IS_ERR(tmp)) + return PTR_ERR(tmp); + gl = tmp; + } -out: - return ret; +found: + *glp = gl; + return 0; } /** From 51568ac2e9d49b66f456dacd376af308f8695497 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Tue, 9 Apr 2024 09:24:27 +0200 Subject: [PATCH 11/29] gfs2: Report when glocks cannot be freed for a long time When glocks cannot be freed for a long time, avoid the "task blocked for more than N seconds" messages and report how many glocks are still outstanding, instead. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/glock.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 2d4e927c4d2f..b819f8b8d98b 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -2248,13 +2248,25 @@ void gfs2_gl_dq_holders(struct gfs2_sbd *sdp) void gfs2_gl_hash_clear(struct gfs2_sbd *sdp) { + unsigned long start = jiffies; + bool timed_out = false; + set_bit(SDF_SKIP_DLM_UNLOCK, &sdp->sd_flags); flush_workqueue(glock_workqueue); glock_hash_walk(clear_glock, sdp); flush_workqueue(glock_workqueue); - wait_event_timeout(sdp->sd_kill_wait, - atomic_read(&sdp->sd_glock_disposal) == 0, - HZ * 600); + while (!timed_out) { + wait_event_timeout(sdp->sd_kill_wait, + !atomic_read(&sdp->sd_glock_disposal), + HZ * 60); + if (!atomic_read(&sdp->sd_glock_disposal)) + break; + timed_out = time_after(jiffies, start + (HZ * 600)); + fs_warn(sdp, "%u glocks left after %u seconds%s\n", + atomic_read(&sdp->sd_glock_disposal), + jiffies_to_msecs(jiffies - start) / 1000, + timed_out ? ":" : "; still waiting"); + } gfs2_lm_unmount(sdp); gfs2_free_dead_glocks(sdp); glock_hash_walk(dump_glock_func, sdp); From 30e388d573673474cbd089dec83688331c117add Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 11 Apr 2024 14:13:30 +0200 Subject: [PATCH 12/29] gfs2: Switch to a per-filesystem glock workqueue Switch to a per-filesystem glock workqueue. Additional workqueues are cheap nowadays, and keeping separate workqueues allows to flush the work of each filesystem without affecting the others. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/glock.c | 20 +++++++------------- fs/gfs2/incore.h | 1 + fs/gfs2/ops_fstype.c | 12 ++++++++++-- 3 files changed, 18 insertions(+), 15 deletions(-) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index b819f8b8d98b..32991cb22023 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -65,7 +65,6 @@ static void request_demote(struct gfs2_glock *gl, unsigned int state, unsigned long delay, bool remote); static struct dentry *gfs2_root; -static struct workqueue_struct *glock_workqueue; static LIST_HEAD(lru_list); static atomic_t lru_count = ATOMIC_INIT(0); static DEFINE_SPINLOCK(lru_lock); @@ -274,7 +273,9 @@ static void gfs2_glock_remove_from_lru(struct gfs2_glock *gl) * work queue. */ static void gfs2_glock_queue_work(struct gfs2_glock *gl, unsigned long delay) { - if (!queue_delayed_work(glock_workqueue, &gl->gl_work, delay)) { + struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; + + if (!queue_delayed_work(sdp->sd_glock_wq, &gl->gl_work, delay)) { /* * We are holding the lockref spinlock, and the work was still * queued above. The queued work (glock_work_func) takes that @@ -2252,9 +2253,10 @@ void gfs2_gl_hash_clear(struct gfs2_sbd *sdp) bool timed_out = false; set_bit(SDF_SKIP_DLM_UNLOCK, &sdp->sd_flags); - flush_workqueue(glock_workqueue); + flush_workqueue(sdp->sd_glock_wq); glock_hash_walk(clear_glock, sdp); - flush_workqueue(glock_workqueue); + flush_workqueue(sdp->sd_glock_wq); + while (!timed_out) { wait_event_timeout(sdp->sd_kill_wait, !atomic_read(&sdp->sd_glock_disposal), @@ -2270,6 +2272,7 @@ void gfs2_gl_hash_clear(struct gfs2_sbd *sdp) gfs2_lm_unmount(sdp); gfs2_free_dead_glocks(sdp); glock_hash_walk(dump_glock_func, sdp); + destroy_workqueue(sdp->sd_glock_wq); } static const char *state2str(unsigned state) @@ -2534,16 +2537,8 @@ int __init gfs2_glock_init(void) if (ret < 0) return ret; - glock_workqueue = alloc_workqueue("glock_workqueue", WQ_MEM_RECLAIM | - WQ_HIGHPRI | WQ_FREEZABLE, 0); - if (!glock_workqueue) { - rhashtable_destroy(&gl_hash_table); - return -ENOMEM; - } - glock_shrinker = shrinker_alloc(0, "gfs2-glock"); if (!glock_shrinker) { - destroy_workqueue(glock_workqueue); rhashtable_destroy(&gl_hash_table); return -ENOMEM; } @@ -2563,7 +2558,6 @@ void gfs2_glock_exit(void) { shrinker_free(glock_shrinker); rhashtable_destroy(&gl_hash_table); - destroy_workqueue(glock_workqueue); } static void gfs2_glock_iter_next(struct gfs2_glock_iter *gi, loff_t n) diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 73f24b86338c..5ee46af1f4bd 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -772,6 +772,7 @@ struct gfs2_sbd { /* Workqueue stuff */ + struct workqueue_struct *sd_glock_wq; struct workqueue_struct *sd_delete_wq; /* Daemon stuff */ diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 05975ec76d35..0561edd6cc86 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -1188,11 +1188,17 @@ static int gfs2_fill_super(struct super_block *sb, struct fs_context *fc) snprintf(sdp->sd_fsname, sizeof(sdp->sd_fsname), "%s", sdp->sd_table_name); + error = -ENOMEM; + sdp->sd_glock_wq = alloc_workqueue("gfs2-glock/%s", + WQ_MEM_RECLAIM | WQ_HIGHPRI | WQ_FREEZABLE, 0, + sdp->sd_fsname); + if (!sdp->sd_glock_wq) + goto fail_free; + sdp->sd_delete_wq = alloc_workqueue("gfs2-delete/%s", WQ_MEM_RECLAIM | WQ_FREEZABLE, 0, sdp->sd_fsname); - error = -ENOMEM; if (!sdp->sd_delete_wq) - goto fail_free; + goto fail_glock_wq; error = gfs2_sys_fs_add(sdp); if (error) @@ -1301,6 +1307,8 @@ fail_debug: gfs2_sys_fs_del(sdp); fail_delete_wq: destroy_workqueue(sdp->sd_delete_wq); +fail_glock_wq: + destroy_workqueue(sdp->sd_glock_wq); fail_free: free_sbd(sdp); sb->s_fs_info = NULL; From 767fd5a0160774178a597b7a7b6e07915fe00efa Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 10 Apr 2024 06:58:00 +0200 Subject: [PATCH 13/29] gfs2: Revise glock reference counting model In the current glock reference counting model, a bias of one is added to a glock's refcount when it is locked (gl->gl_state != LM_ST_UNLOCKED). A glock is removed from the lru_list when it is enqueued, and added back when it is dequeued. This isn't a very appropriate model because most glocks are held for long periods of time (for example, the inode "owns" references to its inode and iopen glocks as long as the inode is cached even when the glock state changes to LM_ST_UNLOCKED), and they can only be freed when they are no longer referenced, anyway. Fix this by getting rid of the refcount bias for locked glocks. That way, we can use lockref_put_or_lock() to efficiently drop all but the last glock reference, and put the glock onto the lru_list when the last reference is dropped. When find_insert_glock() returns a reference to a cached glock, it removes the glock from the lru_list. Dumping the "glocks" and "glstats" debugfs files also takes glock references, but instead of removing the glocks from the lru_list in that case as well, we leave them on the list. This ensures that dumping those files won't perturb the order of the glocks on the lru_list. In addition, when the last reference to an *unlocked* glock is dropped, we immediately free it; this preserves the preexisting behavior. If it later turns out that caching unlocked glocks is useful in some situations, we can change the caching strategy. It is currently unclear if a glock that has no active references can have the GLF_LFLUSH flag set. To make sure that such a glock won't accidentally be evicted due to memory pressure, we add a GLF_LFLUSH check to gfs2_dispose_glock_lru(). Signed-off-by: Andreas Gruenbacher --- fs/gfs2/glock.c | 56 ++++++++++++++++++++++++++----------------------- fs/gfs2/glock.h | 1 - fs/gfs2/super.c | 1 - 3 files changed, 30 insertions(+), 28 deletions(-) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 32991cb22023..e2a72c21194a 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -237,7 +237,7 @@ static int demote_ok(const struct gfs2_glock *gl) } -void gfs2_glock_add_to_lru(struct gfs2_glock *gl) +static void gfs2_glock_add_to_lru(struct gfs2_glock *gl) { if (!(gl->gl_ops->go_flags & GLOF_LRU)) return; @@ -305,6 +305,20 @@ static void __gfs2_glock_put(struct gfs2_glock *gl) sdp->sd_lockstruct.ls_ops->lm_put_lock(gl); } +static bool __gfs2_glock_put_or_lock(struct gfs2_glock *gl) +{ + if (lockref_put_or_lock(&gl->gl_lockref)) + return true; + GLOCK_BUG_ON(gl, gl->gl_lockref.count != 1); + if (gl->gl_state != LM_ST_UNLOCKED) { + gl->gl_lockref.count--; + gfs2_glock_add_to_lru(gl); + spin_unlock(&gl->gl_lockref.lock); + return true; + } + return false; +} + /** * gfs2_glock_put() - Decrement reference count on glock * @gl: The glock to put @@ -313,7 +327,7 @@ static void __gfs2_glock_put(struct gfs2_glock *gl) void gfs2_glock_put(struct gfs2_glock *gl) { - if (lockref_put_or_lock(&gl->gl_lockref)) + if (__gfs2_glock_put_or_lock(gl)) return; __gfs2_glock_put(gl); @@ -328,10 +342,9 @@ void gfs2_glock_put(struct gfs2_glock *gl) */ void gfs2_glock_put_async(struct gfs2_glock *gl) { - if (lockref_put_or_lock(&gl->gl_lockref)) + if (__gfs2_glock_put_or_lock(gl)) return; - GLOCK_BUG_ON(gl, gl->gl_lockref.count != 1); gfs2_glock_queue_work(gl, 0); spin_unlock(&gl->gl_lockref.lock); } @@ -570,18 +583,6 @@ static inline struct gfs2_holder *find_last_waiter(const struct gfs2_glock *gl) static void state_change(struct gfs2_glock *gl, unsigned int new_state) { - int held1, held2; - - held1 = (gl->gl_state != LM_ST_UNLOCKED); - held2 = (new_state != LM_ST_UNLOCKED); - - if (held1 != held2) { - GLOCK_BUG_ON(gl, __lockref_is_dead(&gl->gl_lockref)); - if (held2) - gl->gl_lockref.count++; - else - gl->gl_lockref.count--; - } if (new_state != gl->gl_target) /* shorten our minimum hold time */ gl->gl_hold_time = max(gl->gl_hold_time - GL_GLOCK_HOLD_DECR, @@ -1139,10 +1140,14 @@ static void glock_work_func(struct work_struct *work) } /* Drop the remaining glock references manually. */ + GLOCK_BUG_ON(gl, gl->gl_lockref.count < drop_refs); gl->gl_lockref.count -= drop_refs; if (!gl->gl_lockref.count) { - __gfs2_glock_put(gl); - return; + if (gl->gl_state == LM_ST_UNLOCKED) { + __gfs2_glock_put(gl); + return; + } + gfs2_glock_add_to_lru(gl); } spin_unlock(&gl->gl_lockref.lock); } @@ -1178,6 +1183,8 @@ again: out: rcu_read_unlock(); finish_wait(wq, &wait.wait); + if (gl) + gfs2_glock_remove_from_lru(gl); return gl; } @@ -1626,9 +1633,6 @@ unlock: return error; } - if (test_bit(GLF_LRU, &gl->gl_flags)) - gfs2_glock_remove_from_lru(gl); - gh->gh_error = 0; spin_lock(&gl->gl_lockref.lock); add_to_queue(gh); @@ -1693,9 +1697,6 @@ static void __gfs2_glock_dq(struct gfs2_holder *gh) fast_path = 1; } - if (!test_bit(GLF_LFLUSH, &gl->gl_flags) && demote_ok(gl)) - gfs2_glock_add_to_lru(gl); - if (unlikely(!fast_path)) { gl->gl_lockref.count++; if (test_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) && @@ -2008,10 +2009,12 @@ static int glock_cmp(void *priv, const struct list_head *a, static bool can_free_glock(struct gfs2_glock *gl) { - bool held = gl->gl_state != LM_ST_UNLOCKED; + struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; return !test_bit(GLF_LOCK, &gl->gl_flags) && - gl->gl_lockref.count == held; + !gl->gl_lockref.count && + (!test_bit(GLF_LFLUSH, &gl->gl_flags) || + test_bit(SDF_KILL, &sdp->sd_flags)); } /** @@ -2177,6 +2180,7 @@ static void thaw_glock(struct gfs2_glock *gl) if (!lockref_get_not_dead(&gl->gl_lockref)) return; + gfs2_glock_remove_from_lru(gl); spin_lock(&gl->gl_lockref.lock); set_bit(GLF_HAVE_REPLY, &gl->gl_flags); gfs2_glock_queue_work(gl, 0); diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h index 19aef6d53267..adf0091cc98f 100644 --- a/fs/gfs2/glock.h +++ b/fs/gfs2/glock.h @@ -250,7 +250,6 @@ void gfs2_flush_delete_work(struct gfs2_sbd *sdp); void gfs2_gl_hash_clear(struct gfs2_sbd *sdp); void gfs2_gl_dq_holders(struct gfs2_sbd *sdp); void gfs2_glock_thaw(struct gfs2_sbd *sdp); -void gfs2_glock_add_to_lru(struct gfs2_glock *gl); void gfs2_glock_free(struct gfs2_glock *gl); void gfs2_glock_free_later(struct gfs2_glock *gl); diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 7a5aedfcd52a..6678060ed4d2 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -1524,7 +1524,6 @@ out: if (ip->i_gl) { glock_clear_object(ip->i_gl, ip); wait_on_bit_io(&ip->i_flags, GIF_GLOP_PENDING, TASK_UNINTERRUPTIBLE); - gfs2_glock_add_to_lru(ip->i_gl); gfs2_glock_put_eventually(ip->i_gl); rcu_assign_pointer(ip->i_gl, NULL); } From 3f4475bf24de31cce4a0c7d1372d4ab02b1f1407 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Sat, 30 Mar 2024 05:08:32 +0100 Subject: [PATCH 14/29] Revert "GFS2: Don't add all glocks to the lru" This reverts commit e7ccaf5fe1590667b3fa2f8df5c5ec9ba0dc5b85. Before commit e7ccaf5fe159, every time a resource group glock was dequeued by gfs2_glock_dq(), it was added to the glock LRU list even though the glock was still referenced by the resource group and could never be evicted, anyway. Commit e7ccaf5fe159 added a GLOF_LRU hack to avoid that overhead for resource group glocks, and that hack was since adopted for some other types of glocks as well. We now no longer add glocks to the glock LRU list while they are still referenced. This solves the underlying problem, and obsoletes the GLOF_LRU hack. Signed-off-by: Andreas Gruenbacher (cherry picked from commit 3e5257c810cba91e274d07f3db5cf013c7c830be) --- fs/gfs2/glock.c | 7 ------- fs/gfs2/glops.c | 8 ++++---- fs/gfs2/incore.h | 1 - 3 files changed, 4 insertions(+), 12 deletions(-) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index e2a72c21194a..19f8df91b72e 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -239,11 +239,7 @@ static int demote_ok(const struct gfs2_glock *gl) static void gfs2_glock_add_to_lru(struct gfs2_glock *gl) { - if (!(gl->gl_ops->go_flags & GLOF_LRU)) - return; - spin_lock(&lru_lock); - list_move_tail(&gl->gl_lru, &lru_list); if (!test_bit(GLF_LRU, &gl->gl_flags)) { @@ -256,9 +252,6 @@ static void gfs2_glock_add_to_lru(struct gfs2_glock *gl) static void gfs2_glock_remove_from_lru(struct gfs2_glock *gl) { - if (!(gl->gl_ops->go_flags & GLOF_LRU)) - return; - spin_lock(&lru_lock); if (test_bit(GLF_LRU, &gl->gl_flags)) { list_del_init(&gl->gl_lru); diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index 39bb6758a2a0..7bc7f6785abd 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c @@ -727,7 +727,7 @@ const struct gfs2_glock_operations gfs2_inode_glops = { .go_held = inode_go_held, .go_dump = inode_go_dump, .go_type = LM_TYPE_INODE, - .go_flags = GLOF_ASPACE | GLOF_LRU | GLOF_LVB, + .go_flags = GLOF_ASPACE | GLOF_LVB, .go_unlocked = inode_go_unlocked, }; @@ -751,13 +751,13 @@ const struct gfs2_glock_operations gfs2_iopen_glops = { .go_type = LM_TYPE_IOPEN, .go_callback = iopen_go_callback, .go_dump = inode_go_dump, - .go_flags = GLOF_LRU | GLOF_NONDISK, + .go_flags = GLOF_NONDISK, .go_subclass = 1, }; const struct gfs2_glock_operations gfs2_flock_glops = { .go_type = LM_TYPE_FLOCK, - .go_flags = GLOF_LRU | GLOF_NONDISK, + .go_flags = GLOF_NONDISK, }; const struct gfs2_glock_operations gfs2_nondisk_glops = { @@ -768,7 +768,7 @@ const struct gfs2_glock_operations gfs2_nondisk_glops = { const struct gfs2_glock_operations gfs2_quota_glops = { .go_type = LM_TYPE_QUOTA, - .go_flags = GLOF_LVB | GLOF_LRU | GLOF_NONDISK, + .go_flags = GLOF_LVB | GLOF_NONDISK, }; const struct gfs2_glock_operations gfs2_journal_glops = { diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 5ee46af1f4bd..f75982d45635 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -230,7 +230,6 @@ struct gfs2_glock_operations { const unsigned long go_flags; #define GLOF_ASPACE 1 /* address space attached */ #define GLOF_LVB 2 /* Lock Value Block attached */ -#define GLOF_LRU 4 /* LRU managed */ #define GLOF_NONDISK 8 /* not I/O related */ }; From 713f8834389f4b34bc8b449412202543c8b32214 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 28 Mar 2024 20:46:25 +0100 Subject: [PATCH 15/29] gfs2: Get rid of demote_ok checks The demote_ok glock operation is only still used to prevent the inode glocks of the "jindex" and "rindex" directories from getting recycled while they are still referenced by sdp->sd_jindex and sdp->sd_rindex. However, the LRU walking code will no longer recycle glocks which are referenced, so the demote_ok glock operation is obsolete and can be removed. Each of a glock's holders in the gl_holders list is holding a reference on the glock, so when the list of holders isn't empty in demote_ok(), the existing reference count check will already prevent the glock from getting released. This means that demote_ok() is obsolete as well. Signed-off-by: Andreas Gruenbacher --- Documentation/filesystems/gfs2-glocks.rst | 3 --- fs/gfs2/glock.c | 23 +---------------------- fs/gfs2/glops.c | 18 ------------------ fs/gfs2/incore.h | 1 - 4 files changed, 1 insertion(+), 44 deletions(-) diff --git a/Documentation/filesystems/gfs2-glocks.rst b/Documentation/filesystems/gfs2-glocks.rst index 17ce3413608a..adc0d4c4d979 100644 --- a/Documentation/filesystems/gfs2-glocks.rst +++ b/Documentation/filesystems/gfs2-glocks.rst @@ -61,8 +61,6 @@ Field Purpose go_sync Called before remote state change (e.g. to sync dirty data) go_xmote_bh Called after remote state change (e.g. to refill cache) go_inval Called if remote state change requires invalidating the cache -go_demote_ok Returns boolean value of whether its ok to demote a glock - (e.g. checks timeout, and that there is no cached data) go_instantiate Called when a glock has been acquired go_held Called every time a glock holder is acquired go_dump Called to print content of object for debugfs file, or on @@ -95,7 +93,6 @@ Operation GLF_LOCK bit lock held gl_lockref.lock spinlock held go_sync Yes No go_xmote_bh Yes No go_inval Yes No -go_demote_ok Sometimes Yes go_instantiate No No go_held No No go_dump Sometimes Yes diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 19f8df91b72e..7f68e3d217e6 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -216,27 +216,6 @@ struct gfs2_glock *gfs2_glock_hold(struct gfs2_glock *gl) return gl; } -/** - * demote_ok - Check to see if it's ok to unlock a glock - * @gl: the glock - * - * Returns: 1 if it's ok - */ - -static int demote_ok(const struct gfs2_glock *gl) -{ - const struct gfs2_glock_operations *glops = gl->gl_ops; - - if (gl->gl_state == LM_ST_UNLOCKED) - return 0; - if (!list_empty(&gl->gl_holders)) - return 0; - if (glops->go_demote_ok) - return glops->go_demote_ok(gl); - return 1; -} - - static void gfs2_glock_add_to_lru(struct gfs2_glock *gl) { spin_lock(&lru_lock); @@ -2049,7 +2028,7 @@ add_back_to_lru: clear_bit(GLF_LRU, &gl->gl_flags); freed++; gl->gl_lockref.count++; - if (demote_ok(gl)) + if (gl->gl_state != LM_ST_UNLOCKED) request_demote(gl, LM_ST_UNLOCKED, 0, false); gfs2_glock_queue_work(gl, 0); spin_unlock(&gl->gl_lockref.lock); diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index 7bc7f6785abd..95d8081681dc 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c @@ -385,23 +385,6 @@ static void inode_go_inval(struct gfs2_glock *gl, int flags) gfs2_clear_glop_pending(ip); } -/** - * inode_go_demote_ok - Check to see if it's ok to unlock an inode glock - * @gl: the glock - * - * Returns: 1 if it's ok - */ - -static int inode_go_demote_ok(const struct gfs2_glock *gl) -{ - struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; - - if (sdp->sd_jindex == gl->gl_object || sdp->sd_rindex == gl->gl_object) - return 0; - - return 1; -} - static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf) { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); @@ -722,7 +705,6 @@ const struct gfs2_glock_operations gfs2_meta_glops = { const struct gfs2_glock_operations gfs2_inode_glops = { .go_sync = inode_go_sync, .go_inval = inode_go_inval, - .go_demote_ok = inode_go_demote_ok, .go_instantiate = inode_go_instantiate, .go_held = inode_go_held, .go_dump = inode_go_dump, diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index f75982d45635..90fd2584357a 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -218,7 +218,6 @@ struct gfs2_glock_operations { int (*go_sync) (struct gfs2_glock *gl); int (*go_xmote_bh)(struct gfs2_glock *gl); void (*go_inval) (struct gfs2_glock *gl, int flags); - int (*go_demote_ok) (const struct gfs2_glock *gl); int (*go_instantiate) (struct gfs2_glock *gl); int (*go_held)(struct gfs2_holder *gh); void (*go_dump)(struct seq_file *seq, const struct gfs2_glock *gl, From 51316523d1f233c1cea182d4cccbc3b22ef75d87 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Sat, 1 Jun 2024 21:49:54 +0200 Subject: [PATCH 16/29] gfs2: Minor gfs2_quota_init error path cleanup Add a fail_brelse label and use it where useful. Move variable bh out of the loop to extend its visibility to the new label. No functional change. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/quota.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index aa9cf0102848..d75eeb327060 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -1407,6 +1407,7 @@ int gfs2_quota_init(struct gfs2_sbd *sdp) unsigned int found = 0; unsigned int hash; unsigned int bm_size; + struct buffer_head *bh; u64 dblock; u32 extlen = 0; int error; @@ -1426,7 +1427,6 @@ int gfs2_quota_init(struct gfs2_sbd *sdp) return error; for (x = 0; x < blocks; x++) { - struct buffer_head *bh; const struct gfs2_quota_change *qc; unsigned int y; @@ -1440,10 +1440,8 @@ int gfs2_quota_init(struct gfs2_sbd *sdp) bh = gfs2_meta_ra(ip->i_gl, dblock, extlen); if (!bh) goto fail; - if (gfs2_metatype_check(sdp, bh, GFS2_METATYPE_QC)) { - brelse(bh); - goto fail; - } + if (gfs2_metatype_check(sdp, bh, GFS2_METATYPE_QC)) + goto fail_brelse; qc = (const struct gfs2_quota_change *)(bh->b_data + sizeof(struct gfs2_meta_header)); for (y = 0; y < sdp->sd_qc_per_block && slot < sdp->sd_quota_slots; @@ -1461,10 +1459,8 @@ int gfs2_quota_init(struct gfs2_sbd *sdp) hash = gfs2_qd_hash(sdp, qc_id); qd = qd_alloc(hash, sdp, qc_id); - if (qd == NULL) { - brelse(bh); - goto fail; - } + if (qd == NULL) + goto fail_brelse; set_bit(QDF_CHANGE, &qd->qd_flags); qd->qd_change = qc_change; @@ -1494,6 +1490,8 @@ int gfs2_quota_init(struct gfs2_sbd *sdp) return 0; +fail_brelse: + brelse(bh); fail: gfs2_quota_cleanup(sdp); return error; From de0d95c26c41c1776e4d23b1a8b3f2bcfd1dae5f Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Mon, 3 Jun 2024 18:45:00 +0200 Subject: [PATCH 17/29] gfs2: Check quota consistency on mount In gfs2_quota_init(), make sure that the per-node "quota_change%u" file doesn't contain duplicate uids/gids. Those duplicates would cause us to acquire the glock corresponding to those ids repeatedly, which the glock code doesn't allow. When finding inconsistencies, we wipe them out and ignore them. The resulting quotas will likely be inconsistent, and running quotacheck(1) is advised. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/quota.c | 37 +++++++++++++++++++++++++++++++------ 1 file changed, 31 insertions(+), 6 deletions(-) diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index d75eeb327060..2984eaafdf6f 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -1427,7 +1427,7 @@ int gfs2_quota_init(struct gfs2_sbd *sdp) return error; for (x = 0; x < blocks; x++) { - const struct gfs2_quota_change *qc; + struct gfs2_quota_change *qc; unsigned int y; if (!extlen) { @@ -1443,10 +1443,10 @@ int gfs2_quota_init(struct gfs2_sbd *sdp) if (gfs2_metatype_check(sdp, bh, GFS2_METATYPE_QC)) goto fail_brelse; - qc = (const struct gfs2_quota_change *)(bh->b_data + sizeof(struct gfs2_meta_header)); + qc = (struct gfs2_quota_change *)(bh->b_data + sizeof(struct gfs2_meta_header)); for (y = 0; y < sdp->sd_qc_per_block && slot < sdp->sd_quota_slots; y++, slot++) { - struct gfs2_quota_data *qd; + struct gfs2_quota_data *old_qd, *qd; s64 qc_change = be64_to_cpu(qc->qc_change); u32 qc_flags = be32_to_cpu(qc->qc_flags); enum quota_type qtype = (qc_flags & GFS2_QCF_USER) ? @@ -1468,18 +1468,41 @@ int gfs2_quota_init(struct gfs2_sbd *sdp) qd->qd_slot_ref = 1; spin_lock(&qd_lock); + spin_lock_bucket(hash); + old_qd = gfs2_qd_search_bucket(hash, sdp, qc_id); + if (old_qd) { + fs_err(sdp, "Corruption found in quota_change%u" + "file: duplicate identifier in " + "slot %u\n", + sdp->sd_jdesc->jd_jid, slot); + + spin_unlock_bucket(hash); + spin_unlock(&qd_lock); + qd_put(old_qd); + + gfs2_glock_put(qd->qd_gl); + kmem_cache_free(gfs2_quotad_cachep, qd); + + /* zero out the duplicate slot */ + lock_buffer(bh); + memset(qc, 0, sizeof(*qc)); + mark_buffer_dirty(bh); + unlock_buffer(bh); + + continue; + } BUG_ON(test_and_set_bit(slot, sdp->sd_quota_bitmap)); list_add(&qd->qd_list, &sdp->sd_quota_list); atomic_inc(&sdp->sd_quota_count); - spin_unlock(&qd_lock); - - spin_lock_bucket(hash); hlist_bl_add_head_rcu(&qd->qd_hlist, &qd_hash_table[hash]); spin_unlock_bucket(hash); + spin_unlock(&qd_lock); found++; } + if (buffer_dirty(bh)) + sync_dirty_buffer(bh); brelse(bh); dblock++; extlen--; @@ -1491,6 +1514,8 @@ int gfs2_quota_init(struct gfs2_sbd *sdp) return 0; fail_brelse: + if (buffer_dirty(bh)) + sync_dirty_buffer(bh); brelse(bh); fail: gfs2_quota_cleanup(sdp); From 2aedfe847b4d91eabee11a44c27244055cef4eb3 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Fri, 7 Jun 2024 02:11:12 +0200 Subject: [PATCH 18/29] gfs2: Revert "introduce qd_bh_get_or_undo" The qd_bh_get_or_undo() helper introduced by that commit doesn't improve the code much, so revert it and clean things up in a more useful way in the next commit. This reverts commit 7dbc6ae60dd7089d8ed42892b6a66c138f0aa7a0. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/quota.c | 36 +++++++++++++++++------------------- 1 file changed, 17 insertions(+), 19 deletions(-) diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index 2984eaafdf6f..e0840b412d28 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -491,20 +491,6 @@ static int qd_check_sync(struct gfs2_sbd *sdp, struct gfs2_quota_data *qd, return 1; } -static int qd_bh_get_or_undo(struct gfs2_sbd *sdp, struct gfs2_quota_data *qd) -{ - int error; - - error = bh_get(qd); - if (!error) - return 0; - - clear_bit(QDF_LOCKED, &qd->qd_flags); - slot_put(qd); - qd_put(qd); - return error; -} - static int qd_fish(struct gfs2_sbd *sdp, struct gfs2_quota_data **qdp) { struct gfs2_quota_data *qd = NULL, *iter; @@ -527,12 +513,17 @@ static int qd_fish(struct gfs2_sbd *sdp, struct gfs2_quota_data **qdp) spin_unlock(&qd_lock); if (qd) { - error = qd_bh_get_or_undo(sdp, qd); - if (error) + error = bh_get(qd); + if (error) { + clear_bit(QDF_LOCKED, &qd->qd_flags); + slot_put(qd); + qd_put(qd); return error; - *qdp = qd; + } } + *qdp = qd; + return 0; } @@ -1189,8 +1180,15 @@ void gfs2_quota_unlock(struct gfs2_inode *ip) if (!found) continue; - if (!qd_bh_get_or_undo(sdp, qd)) - qda[count++] = qd; + gfs2_assert_warn(sdp, qd->qd_change_sync); + if (bh_get(qd)) { + clear_bit(QDF_LOCKED, &qd->qd_flags); + slot_put(qd); + qd_put(qd); + continue; + } + + qda[count++] = qd; } if (count) { From 59ebc33201237bf38e5adca3794716100660c5b4 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Fri, 7 Jun 2024 02:23:54 +0200 Subject: [PATCH 19/29] gfs2: qd_check_sync cleanups Rename qd_check_sync() to qd_grab_sync() and make it return a bool. Turn the sync_gen pointer into a regular u64 and pass in U64_MAX instead of a NULL pointer when sync generation checking isn't needed. Introduce a new qd_ungrab_sync() helper for undoing the effects of qd_grab_sync() if the subsequent bh_get() on the qd object fails. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/quota.c | 40 ++++++++++++++++++++++------------------ 1 file changed, 22 insertions(+), 18 deletions(-) diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index e0840b412d28..bbd66adaec3d 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -462,13 +462,13 @@ static void bh_put(struct gfs2_quota_data *qd) mutex_unlock(&sdp->sd_quota_mutex); } -static int qd_check_sync(struct gfs2_sbd *sdp, struct gfs2_quota_data *qd, - u64 *sync_gen) +static bool qd_grab_sync(struct gfs2_sbd *sdp, struct gfs2_quota_data *qd, + u64 sync_gen) { if (test_bit(QDF_LOCKED, &qd->qd_flags) || !test_bit(QDF_CHANGE, &qd->qd_flags) || - (sync_gen && (qd->qd_sync_gen >= *sync_gen))) - return 0; + qd->qd_sync_gen >= sync_gen) + return false; /* * If qd_change is 0 it means a pending quota change was negated. @@ -478,17 +478,24 @@ static int qd_check_sync(struct gfs2_sbd *sdp, struct gfs2_quota_data *qd, if (!qd->qd_change && test_and_clear_bit(QDF_CHANGE, &qd->qd_flags)) { slot_put(qd); qd_put(qd); - return 0; + return false; } if (!lockref_get_not_dead(&qd->qd_lockref)) - return 0; + return false; list_move_tail(&qd->qd_list, &sdp->sd_quota_list); set_bit(QDF_LOCKED, &qd->qd_flags); qd->qd_change_sync = qd->qd_change; slot_hold(qd); - return 1; + return true; +} + +static void qd_ungrab_sync(struct gfs2_quota_data *qd) +{ + clear_bit(QDF_LOCKED, &qd->qd_flags); + slot_put(qd); + qd_put(qd); } static int qd_fish(struct gfs2_sbd *sdp, struct gfs2_quota_data **qdp) @@ -504,7 +511,7 @@ static int qd_fish(struct gfs2_sbd *sdp, struct gfs2_quota_data **qdp) spin_lock(&qd_lock); list_for_each_entry(iter, &sdp->sd_quota_list, qd_list) { - if (qd_check_sync(sdp, iter, &sdp->sd_quota_sync_gen)) { + if (qd_grab_sync(sdp, iter, sdp->sd_quota_sync_gen)) { qd = iter; break; } @@ -515,9 +522,7 @@ static int qd_fish(struct gfs2_sbd *sdp, struct gfs2_quota_data **qdp) if (qd) { error = bh_get(qd); if (error) { - clear_bit(QDF_LOCKED, &qd->qd_flags); - slot_put(qd); - qd_put(qd); + qd_ungrab_sync(qd); return error; } } @@ -1157,7 +1162,6 @@ void gfs2_quota_unlock(struct gfs2_inode *ip) struct gfs2_quota_data *qda[2 * GFS2_MAXQUOTAS]; unsigned int count = 0; u32 x; - int found; if (!test_and_clear_bit(GIF_QD_LOCKED, &ip->i_flags)) return; @@ -1165,6 +1169,7 @@ void gfs2_quota_unlock(struct gfs2_inode *ip) for (x = 0; x < ip->i_qadata->qa_qd_num; x++) { struct gfs2_quota_data *qd; bool sync; + int error; qd = ip->i_qadata->qa_qd[x]; sync = need_sync(qd); @@ -1174,17 +1179,16 @@ void gfs2_quota_unlock(struct gfs2_inode *ip) continue; spin_lock(&qd_lock); - found = qd_check_sync(sdp, qd, NULL); + sync = qd_grab_sync(sdp, qd, U64_MAX); spin_unlock(&qd_lock); - if (!found) + if (!sync) continue; gfs2_assert_warn(sdp, qd->qd_change_sync); - if (bh_get(qd)) { - clear_bit(QDF_LOCKED, &qd->qd_flags); - slot_put(qd); - qd_put(qd); + error = bh_get(qd); + if (error) { + qd_ungrab_sync(qd); continue; } From 4b4b6374dc6134849f2bdca81fa2945b6ed6d9fc Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Mon, 3 Jun 2024 19:04:09 +0200 Subject: [PATCH 20/29] gfs2: Revert "ignore negated quota changes" Commit 4c6a08125f22 ("gfs2: ignore negated quota changes") skips quota changes with qd_change == 0 instead of writing them back, which leaves behind non-zero qd_change values in the affected slots. The kernel then assumes that those slots are unused, while the qd_change values on disk indicate that they are indeed still in use. The next time the filesystem is mounted, those invalid slots are read in from disk, which will cause inconsistencies. Revert that commit to avoid filesystem corruption. This reverts commit 4c6a08125f2249531ec01783a5f4317d7342add5. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/quota.c | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index bbd66adaec3d..f0aaae07a598 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -470,17 +470,6 @@ static bool qd_grab_sync(struct gfs2_sbd *sdp, struct gfs2_quota_data *qd, qd->qd_sync_gen >= sync_gen) return false; - /* - * If qd_change is 0 it means a pending quota change was negated. - * We should not sync it, but we still have a qd reference and slot - * reference taken by gfs2_quota_change -> do_qc that need to be put. - */ - if (!qd->qd_change && test_and_clear_bit(QDF_CHANGE, &qd->qd_flags)) { - slot_put(qd); - qd_put(qd); - return false; - } - if (!lockref_get_not_dead(&qd->qd_lockref)) return false; From ec4b5200c8af9ce021399d3192b3379c089396c3 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 5 Jun 2024 22:13:15 +0200 Subject: [PATCH 21/29] gfs2: Revert "Add quota_change type" Commit 432928c93779 ("gfs2: Add quota_change type") makes the incorrect assertion that function do_qc() should behave differently in the two contexts it is used in, but that isn't actually true. In all cases, do_qc() grabs a "reference" when it starts using a slot in the per-node quota changes file, and it releases that "reference" when no more residual changes remain. Revert that broken commit. There are some remaining issues with function do_qc() which are addressed in the next commit. This reverts commit 432928c9377959684c748a9bc6553ed2d3c2ea4f. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/quota.c | 19 +++++++------------ fs/gfs2/util.c | 6 +++--- 2 files changed, 10 insertions(+), 15 deletions(-) diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index f0aaae07a598..371fad7d64e7 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -75,9 +75,6 @@ #define GFS2_QD_HASH_SIZE BIT(GFS2_QD_HASH_SHIFT) #define GFS2_QD_HASH_MASK (GFS2_QD_HASH_SIZE - 1) -#define QC_CHANGE 0 -#define QC_SYNC 1 - /* Lock order: qd_lock -> bucket lock -> qd->lockref.lock -> lru lock */ /* -> sd_bitmap_lock */ static DEFINE_SPINLOCK(qd_lock); @@ -695,7 +692,7 @@ static int sort_qd(const void *a, const void *b) return 0; } -static void do_qc(struct gfs2_quota_data *qd, s64 change, int qc_type) +static void do_qc(struct gfs2_quota_data *qd, s64 change) { struct gfs2_sbd *sdp = qd->qd_sbd; struct gfs2_inode *ip = GFS2_I(sdp->sd_qc_inode); @@ -720,18 +717,16 @@ static void do_qc(struct gfs2_quota_data *qd, s64 change, int qc_type) qd->qd_change = x; spin_unlock(&qd_lock); - if (qc_type == QC_CHANGE) { - if (!test_and_set_bit(QDF_CHANGE, &qd->qd_flags)) { - qd_hold(qd); - slot_hold(qd); - } - } else { + if (!x) { gfs2_assert_warn(sdp, test_bit(QDF_CHANGE, &qd->qd_flags)); clear_bit(QDF_CHANGE, &qd->qd_flags); qc->qc_flags = 0; qc->qc_id = 0; slot_put(qd); qd_put(qd); + } else if (!test_and_set_bit(QDF_CHANGE, &qd->qd_flags)) { + qd_hold(qd); + slot_hold(qd); } if (change < 0) /* Reset quiet flag if we freed some blocks */ @@ -977,7 +972,7 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda) if (error) goto out_end_trans; - do_qc(qd, -qd->qd_change_sync, QC_SYNC); + do_qc(qd, -qd->qd_change_sync); set_bit(QDF_REFRESH, &qd->qd_flags); } @@ -1303,7 +1298,7 @@ void gfs2_quota_change(struct gfs2_inode *ip, s64 change, if (qid_eq(qd->qd_id, make_kqid_uid(uid)) || qid_eq(qd->qd_id, make_kqid_gid(gid))) { - do_qc(qd, change, QC_CHANGE); + do_qc(qd, change); } } } diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c index 7e79b1a9e35f..13be8d1d228b 100644 --- a/fs/gfs2/util.c +++ b/fs/gfs2/util.c @@ -99,12 +99,12 @@ out_unlock: */ int gfs2_freeze_lock_shared(struct gfs2_sbd *sdp) { + int flags = LM_FLAG_NOEXP | GL_EXACT; int error; - error = gfs2_glock_nq_init(sdp->sd_freeze_gl, LM_ST_SHARED, - LM_FLAG_NOEXP | GL_EXACT, + error = gfs2_glock_nq_init(sdp->sd_freeze_gl, LM_ST_SHARED, flags, &sdp->sd_freeze_gh); - if (error) + if (error && error != GLR_TRYFAILED) fs_err(sdp, "can't lock the freeze glock: %d\n", error); return error; } From 7da4d6e178f405d7abdfc42ea7ac0074e9a6aa45 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 6 Jun 2024 00:24:40 +0200 Subject: [PATCH 22/29] gfs2: Fix and clean up function do_qc Function do_qc() is supposed to be conceptually simple: it alters the current in-memory and on-disk quota change values for a given uid/gid by a given delta. If the on-disk record isn't defined yet, a new record is created. If the on-disk record exists and the resulting change value is zero, there no longer is a need for that record and so the record is deleted. On top of that, some reference counting is involved when creating and deleting records. Currently, instead of doing the above, do_qc() alters the on-disk value and then it sets the in-memory value to the on-disk value. This is incorrect when the on-disk value differs from the in-memory value. The two values are allowed to differ when quota changes are synced to the global quota file. Fix by changing both values by the same amount. In addition, do_qc() currently gets confused when the delta value is 0. It isn't supposed to be called that way, but that assumption isn't mentioned and it makes the code harder to read. Make the code more explicit. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/quota.c | 34 +++++++++++++++++++++------------- 1 file changed, 21 insertions(+), 13 deletions(-) diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index 371fad7d64e7..29a69756cc32 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -702,32 +702,40 @@ static void do_qc(struct gfs2_quota_data *qd, s64 change) mutex_lock(&sdp->sd_quota_mutex); gfs2_trans_add_meta(ip->i_gl, qd->qd_bh); - if (!test_bit(QDF_CHANGE, &qd->qd_flags)) { - qc->qc_change = 0; - qc->qc_flags = 0; - if (qd->qd_id.type == USRQUOTA) - qc->qc_flags = cpu_to_be32(GFS2_QCF_USER); - qc->qc_id = cpu_to_be32(from_kqid(&init_user_ns, qd->qd_id)); - } + /* + * The QDF_CHANGE flag indicates that the slot in the quota change file + * is used. Here, we use the value of qc->qc_change when the slot is + * used, and we assume a value of 0 otherwise. + */ - x = be64_to_cpu(qc->qc_change) + change; - qc->qc_change = cpu_to_be64(x); + x = 0; + if (test_bit(QDF_CHANGE, &qd->qd_flags)) + x = be64_to_cpu(qc->qc_change); + x += change; spin_lock(&qd_lock); - qd->qd_change = x; + qd->qd_change += change; spin_unlock(&qd_lock); - if (!x) { - gfs2_assert_warn(sdp, test_bit(QDF_CHANGE, &qd->qd_flags)); + if (!x && test_bit(QDF_CHANGE, &qd->qd_flags)) { + /* The slot in the quota change file becomes unused. */ clear_bit(QDF_CHANGE, &qd->qd_flags); qc->qc_flags = 0; qc->qc_id = 0; slot_put(qd); qd_put(qd); - } else if (!test_and_set_bit(QDF_CHANGE, &qd->qd_flags)) { + } else if (x && !test_bit(QDF_CHANGE, &qd->qd_flags)) { + /* The slot in the quota change file becomes used. */ + set_bit(QDF_CHANGE, &qd->qd_flags); qd_hold(qd); slot_hold(qd); + + qc->qc_flags = 0; + if (qd->qd_id.type == USRQUOTA) + qc->qc_flags = cpu_to_be32(GFS2_QCF_USER); + qc->qc_id = cpu_to_be32(from_kqid(&init_user_ns, qd->qd_id)); } + qc->qc_change = cpu_to_be64(x); if (change < 0) /* Reset quiet flag if we freed some blocks */ clear_bit(QDF_QMSG_QUIET, &qd->qd_flags); From b510af07aaa4d8a7095bc0368020d8bdba5af942 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 6 Jun 2024 05:37:02 +0200 Subject: [PATCH 23/29] gfs2: quota need_sync cleanup Rename variable 'value' to 'change' as it stores a change in value. Add new 'value' and 'limit' variables for the current value and limit. Only fetch the tuning parameters when we need them. Get rid of unnecessary nesting. No change in functionality. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/quota.c | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index 29a69756cc32..11bd16d8f81c 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -1117,34 +1117,32 @@ static bool need_sync(struct gfs2_quota_data *qd) { struct gfs2_sbd *sdp = qd->qd_sbd; struct gfs2_tune *gt = &sdp->sd_tune; - s64 value; + s64 value, change, limit; unsigned int num, den; if (!qd->qd_qb.qb_limit) return false; spin_lock(&qd_lock); - value = qd->qd_change; + change = qd->qd_change; spin_unlock(&qd_lock); + if (change <= 0) + return false; + value = (s64)be64_to_cpu(qd->qd_qb.qb_value); + limit = (s64)be64_to_cpu(qd->qd_qb.qb_limit); + if (value >= limit) + return false; + spin_lock(>->gt_spin); num = gt->gt_quota_scale_num; den = gt->gt_quota_scale_den; spin_unlock(>->gt_spin); - if (value <= 0) + change *= gfs2_jindex_size(sdp) * num; + change = div_s64(change, den); + if (value + change < limit) return false; - else if ((s64)be64_to_cpu(qd->qd_qb.qb_value) >= - (s64)be64_to_cpu(qd->qd_qb.qb_limit)) - return false; - else { - value *= gfs2_jindex_size(sdp) * num; - value = div_s64(value, den); - value += (s64)be64_to_cpu(qd->qd_qb.qb_value); - if (value < (s64)be64_to_cpu(qd->qd_qb.qb_limit)) - return false; - } - return true; } From 614abc11870ee7ec5a32c81b7ecf4232ede48ecb Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Fri, 7 Jun 2024 11:47:47 +0200 Subject: [PATCH 24/29] gfs2: Fold qd_fish into gfs2_quota_sync The split between qd_fish() and gfs2_quota_sync() is rather unfortunate as qd_fish() is repeatedly called to scan sdp->sd_quota_list only to find the next object to that needs syncing; if there are multiple objects on the list that need syncing, it makes more sense to grab them all in one go. This is relatively easy to do when qd_fish() is folded into gfs2_quota_sync(). Signed-off-by: Andreas Gruenbacher --- fs/gfs2/quota.c | 77 +++++++++++++++++++------------------------------ 1 file changed, 29 insertions(+), 48 deletions(-) diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index 11bd16d8f81c..ae02403407aa 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -484,40 +484,6 @@ static void qd_ungrab_sync(struct gfs2_quota_data *qd) qd_put(qd); } -static int qd_fish(struct gfs2_sbd *sdp, struct gfs2_quota_data **qdp) -{ - struct gfs2_quota_data *qd = NULL, *iter; - int error; - - *qdp = NULL; - - if (sb_rdonly(sdp->sd_vfs)) - return 0; - - spin_lock(&qd_lock); - - list_for_each_entry(iter, &sdp->sd_quota_list, qd_list) { - if (qd_grab_sync(sdp, iter, sdp->sd_quota_sync_gen)) { - qd = iter; - break; - } - } - - spin_unlock(&qd_lock); - - if (qd) { - error = bh_get(qd); - if (error) { - qd_ungrab_sync(qd); - return error; - } - } - - *qdp = qd; - - return 0; -} - static void qdsb_put(struct gfs2_quota_data *qd) { bh_put(qd); @@ -1332,10 +1298,10 @@ int gfs2_quota_sync(struct super_block *sb, int type) struct gfs2_sbd *sdp = sb->s_fs_info; struct gfs2_quota_data **qda; unsigned int max_qd = PAGE_SIZE / sizeof(struct gfs2_holder); - unsigned int num_qd; - unsigned int x; int error = 0; + if (sb_rdonly(sdp->sd_vfs)) + return 0; if (!qd_changed(sdp)) return 0; @@ -1347,24 +1313,39 @@ int gfs2_quota_sync(struct super_block *sb, int type) sdp->sd_quota_sync_gen++; do { - num_qd = 0; + struct gfs2_quota_data *iter; + unsigned int num_qd = 0; + unsigned int x; - for (;;) { - error = qd_fish(sdp, qda + num_qd); - if (error || !qda[num_qd]) - break; - if (++num_qd == max_qd) - break; + spin_lock(&qd_lock); + list_for_each_entry(iter, &sdp->sd_quota_list, qd_list) { + if (qd_grab_sync(sdp, iter, sdp->sd_quota_sync_gen)) { + qda[num_qd++] = iter; + if (num_qd == max_qd) + break; + } } + spin_unlock(&qd_lock); - if (num_qd) { + if (!num_qd) + break; + + for (x = 0; x < num_qd; x++) { + error = bh_get(qda[x]); if (!error) - error = do_sync(num_qd, qda); + continue; - for (x = 0; x < num_qd; x++) - qd_unlock(qda[x]); + while (x < num_qd) + qd_ungrab_sync(qda[--num_qd]); + break; } - } while (!error && num_qd == max_qd); + + if (!error) + error = do_sync(num_qd, qda); + + for (x = 0; x < num_qd; x++) + qd_unlock(qda[x]); + } while (!error); mutex_unlock(&sdp->sd_quota_sync_mutex); kfree(qda); From d5563f42f59ed2cddf1021a34c9cdd8f4a89021c Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 6 Jun 2024 01:15:36 +0200 Subject: [PATCH 25/29] gfs2: Add some missing quota locking The quota code is missing some locking between local quota changes and syncing those quota changes to the global quota file (gfs2_quota_sync); in particular, qd->qd_change needs to be kept in sync with the QDF_CHANGE change flag and the number of references held. Use the qd->qd_lockref.lock spinlock for that. With the qd->qd_lockref.lock spinlock held, we can no longer call lockref_get(), so turn qd_hold() into a variant that assumes that the lock is held. This function is really supposed to take an additional reference when one or more references are already held, so check for that instead of checking if the lockref is dead. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/quota.c | 82 ++++++++++++++++++++++++++++++++----------------- 1 file changed, 53 insertions(+), 29 deletions(-) diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index ae02403407aa..283c6ff21911 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -316,11 +316,11 @@ static int qd_get(struct gfs2_sbd *sdp, struct kqid qid, } -static void qd_hold(struct gfs2_quota_data *qd) +static void __qd_hold(struct gfs2_quota_data *qd) { struct gfs2_sbd *sdp = qd->qd_sbd; - gfs2_assert(sdp, !__lockref_is_dead(&qd->qd_lockref)); - lockref_get(&qd->qd_lockref); + gfs2_assert(sdp, qd->qd_lockref.count > 0); + qd->qd_lockref.count++; } static void qd_put(struct gfs2_quota_data *qd) @@ -462,19 +462,27 @@ static void bh_put(struct gfs2_quota_data *qd) static bool qd_grab_sync(struct gfs2_sbd *sdp, struct gfs2_quota_data *qd, u64 sync_gen) { + bool ret = false; + + spin_lock(&qd->qd_lockref.lock); if (test_bit(QDF_LOCKED, &qd->qd_flags) || !test_bit(QDF_CHANGE, &qd->qd_flags) || qd->qd_sync_gen >= sync_gen) - return false; + goto out; - if (!lockref_get_not_dead(&qd->qd_lockref)) - return false; + if (__lockref_is_dead(&qd->qd_lockref)) + goto out; + qd->qd_lockref.count++; list_move_tail(&qd->qd_list, &sdp->sd_quota_list); set_bit(QDF_LOCKED, &qd->qd_flags); qd->qd_change_sync = qd->qd_change; slot_hold(qd); - return true; + ret = true; + +out: + spin_unlock(&qd->qd_lockref.lock); + return ret; } static void qd_ungrab_sync(struct gfs2_quota_data *qd) @@ -493,8 +501,10 @@ static void qdsb_put(struct gfs2_quota_data *qd) static void qd_unlock(struct gfs2_quota_data *qd) { + spin_lock(&qd->qd_lockref.lock); gfs2_assert_warn(qd->qd_sbd, test_bit(QDF_LOCKED, &qd->qd_flags)); clear_bit(QDF_LOCKED, &qd->qd_flags); + spin_unlock(&qd->qd_lockref.lock); qdsb_put(qd); } @@ -663,6 +673,7 @@ static void do_qc(struct gfs2_quota_data *qd, s64 change) struct gfs2_sbd *sdp = qd->qd_sbd; struct gfs2_inode *ip = GFS2_I(sdp->sd_qc_inode); struct gfs2_quota_change *qc = qd->qd_bh_qc; + bool needs_put = false; s64 x; mutex_lock(&sdp->sd_quota_mutex); @@ -674,26 +685,24 @@ static void do_qc(struct gfs2_quota_data *qd, s64 change) * used, and we assume a value of 0 otherwise. */ + spin_lock(&qd->qd_lockref.lock); + x = 0; if (test_bit(QDF_CHANGE, &qd->qd_flags)) x = be64_to_cpu(qc->qc_change); x += change; - - spin_lock(&qd_lock); qd->qd_change += change; - spin_unlock(&qd_lock); if (!x && test_bit(QDF_CHANGE, &qd->qd_flags)) { /* The slot in the quota change file becomes unused. */ clear_bit(QDF_CHANGE, &qd->qd_flags); qc->qc_flags = 0; qc->qc_id = 0; - slot_put(qd); - qd_put(qd); + needs_put = true; } else if (x && !test_bit(QDF_CHANGE, &qd->qd_flags)) { /* The slot in the quota change file becomes used. */ set_bit(QDF_CHANGE, &qd->qd_flags); - qd_hold(qd); + __qd_hold(qd); slot_hold(qd); qc->qc_flags = 0; @@ -703,6 +712,12 @@ static void do_qc(struct gfs2_quota_data *qd, s64 change) } qc->qc_change = cpu_to_be64(x); + spin_unlock(&qd->qd_lockref.lock); + + if (needs_put) { + slot_put(qd); + qd_put(qd); + } if (change < 0) /* Reset quiet flag if we freed some blocks */ clear_bit(QDF_QMSG_QUIET, &qd->qd_flags); mutex_unlock(&sdp->sd_quota_mutex); @@ -844,6 +859,7 @@ static int gfs2_adjust_quota(struct gfs2_sbd *sdp, loff_t loc, be64_add_cpu(&q.qu_value, change); if (((s64)be64_to_cpu(q.qu_value)) < 0) q.qu_value = 0; /* Never go negative on quota usage */ + spin_lock(&qd->qd_lockref.lock); qd->qd_qb.qb_value = q.qu_value; if (fdq) { if (fdq->d_fieldmask & QC_SPC_SOFT) { @@ -859,6 +875,7 @@ static int gfs2_adjust_quota(struct gfs2_sbd *sdp, loff_t loc, qd->qd_qb.qb_value = q.qu_value; } } + spin_unlock(&qd->qd_lockref.lock); err = gfs2_write_disk_quota(sdp, &q, loc); if (!err) { @@ -990,7 +1007,9 @@ static int update_qd(struct gfs2_sbd *sdp, struct gfs2_quota_data *qd) qlvb->qb_limit = q.qu_limit; qlvb->qb_warn = q.qu_warn; qlvb->qb_value = q.qu_value; + spin_lock(&qd->qd_lockref.lock); qd->qd_qb = *qlvb; + spin_unlock(&qd->qd_lockref.lock); return 0; } @@ -1012,7 +1031,9 @@ restart: if (test_and_clear_bit(QDF_REFRESH, &qd->qd_flags)) force_refresh = FORCE; + spin_lock(&qd->qd_lockref.lock); qd->qd_qb = *(struct gfs2_quota_lvb *)qd->qd_gl->gl_lksb.sb_lvbptr; + spin_unlock(&qd->qd_lockref.lock); if (force_refresh || qd->qd_qb.qb_magic != cpu_to_be32(GFS2_MAGIC)) { gfs2_glock_dq_uninit(q_gh); @@ -1085,20 +1106,19 @@ static bool need_sync(struct gfs2_quota_data *qd) struct gfs2_tune *gt = &sdp->sd_tune; s64 value, change, limit; unsigned int num, den; + int ret = false; + spin_lock(&qd->qd_lockref.lock); if (!qd->qd_qb.qb_limit) - return false; + goto out; - spin_lock(&qd_lock); change = qd->qd_change; - spin_unlock(&qd_lock); - if (change <= 0) - return false; + goto out; value = (s64)be64_to_cpu(qd->qd_qb.qb_value); limit = (s64)be64_to_cpu(qd->qd_qb.qb_limit); if (value >= limit) - return false; + goto out; spin_lock(>->gt_spin); num = gt->gt_quota_scale_num; @@ -1108,8 +1128,12 @@ static bool need_sync(struct gfs2_quota_data *qd) change *= gfs2_jindex_size(sdp) * num; change = div_s64(change, den); if (value + change < limit) - return false; - return true; + goto out; + + ret = true; +out: + spin_unlock(&qd->qd_lockref.lock); + return ret; } void gfs2_quota_unlock(struct gfs2_inode *ip) @@ -1211,12 +1235,12 @@ int gfs2_quota_check(struct gfs2_inode *ip, kuid_t uid, kgid_t gid, qid_eq(qd->qd_id, make_kqid_gid(gid)))) continue; + spin_lock(&qd->qd_lockref.lock); warn = (s64)be64_to_cpu(qd->qd_qb.qb_warn); limit = (s64)be64_to_cpu(qd->qd_qb.qb_limit); value = (s64)be64_to_cpu(qd->qd_qb.qb_value); - spin_lock(&qd_lock); value += qd->qd_change; - spin_unlock(&qd_lock); + spin_unlock(&qd->qd_lockref.lock); if (limit > 0 && (limit - value) < ap->allowed) ap->allowed = limit - value; @@ -1282,12 +1306,12 @@ static bool qd_changed(struct gfs2_sbd *sdp) spin_lock(&qd_lock); list_for_each_entry(qd, &sdp->sd_quota_list, qd_list) { - if (test_bit(QDF_LOCKED, &qd->qd_flags) || - !test_bit(QDF_CHANGE, &qd->qd_flags)) - continue; - - changed = true; - break; + spin_lock(&qd->qd_lockref.lock); + changed = !test_bit(QDF_LOCKED, &qd->qd_flags) && + test_bit(QDF_CHANGE, &qd->qd_flags); + spin_unlock(&qd->qd_lockref.lock); + if (changed) + break; } spin_unlock(&qd_lock); return changed; From 8d89e068deccb4f34d412df4042f37a75e126259 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Mon, 10 Jun 2024 16:31:22 +0200 Subject: [PATCH 26/29] gfs2: Get rid of some unnecessary quota locking With the locking the previous patch has introduced for each struct gfs2_quota_data object, sd_quota_mutex has become largely irrelevant. By waiting on the buffer head instead of waiting on the mutex in get_bh(), it becomes completely irrelevant and can be removed. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/incore.h | 1 - fs/gfs2/ops_fstype.c | 1 - fs/gfs2/quota.c | 53 ++++++++++++++++++++++---------------------- 3 files changed, 27 insertions(+), 28 deletions(-) diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 90fd2584357a..aa4ef67a34e0 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -782,7 +782,6 @@ struct gfs2_sbd { struct list_head sd_quota_list; atomic_t sd_quota_count; - struct mutex sd_quota_mutex; struct mutex sd_quota_sync_mutex; wait_queue_head_t sd_quota_wait; diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 0561edd6cc86..ff1f3e3dc65c 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -103,7 +103,6 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb) init_completion(&sdp->sd_journal_ready); INIT_LIST_HEAD(&sdp->sd_quota_list); - mutex_init(&sdp->sd_quota_mutex); mutex_init(&sdp->sd_quota_sync_mutex); init_waitqueue_head(&sdp->sd_quota_wait); spin_lock_init(&sdp->sd_bitmap_lock); diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index 283c6ff21911..931a133a5f96 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -397,16 +397,17 @@ static int bh_get(struct gfs2_quota_data *qd) struct inode *inode = sdp->sd_qc_inode; struct gfs2_inode *ip = GFS2_I(inode); unsigned int block, offset; - struct buffer_head *bh; + struct buffer_head *bh = NULL; struct iomap iomap = { }; int error; - mutex_lock(&sdp->sd_quota_mutex); - - if (qd->qd_bh_count++) { - mutex_unlock(&sdp->sd_quota_mutex); + spin_lock(&qd->qd_lockref.lock); + if (qd->qd_bh_count) { + qd->qd_bh_count++; + spin_unlock(&qd->qd_lockref.lock); return 0; } + spin_unlock(&qd->qd_lockref.lock); block = qd->qd_slot / sdp->sd_qc_per_block; offset = qd->qd_slot % sdp->sd_qc_per_block; @@ -415,48 +416,50 @@ static int bh_get(struct gfs2_quota_data *qd) (loff_t)block << inode->i_blkbits, i_blocksize(inode), &iomap); if (error) - goto fail; + return error; error = -ENOENT; if (iomap.type != IOMAP_MAPPED) - goto fail; + return error; error = gfs2_meta_read(ip->i_gl, iomap.addr >> inode->i_blkbits, DIO_WAIT, 0, &bh); if (error) - goto fail; + return error; error = -EIO; if (gfs2_metatype_check(sdp, bh, GFS2_METATYPE_QC)) - goto fail_brelse; + goto out; - qd->qd_bh = bh; - qd->qd_bh_qc = (struct gfs2_quota_change *) - (bh->b_data + sizeof(struct gfs2_meta_header) + - offset * sizeof(struct gfs2_quota_change)); + spin_lock(&qd->qd_lockref.lock); + if (qd->qd_bh == NULL) { + qd->qd_bh = bh; + qd->qd_bh_qc = (struct gfs2_quota_change *) + (bh->b_data + sizeof(struct gfs2_meta_header) + + offset * sizeof(struct gfs2_quota_change)); + bh = NULL; + } + qd->qd_bh_count++; + spin_unlock(&qd->qd_lockref.lock); + error = 0; - mutex_unlock(&sdp->sd_quota_mutex); - - return 0; - -fail_brelse: +out: brelse(bh); -fail: - qd->qd_bh_count--; - mutex_unlock(&sdp->sd_quota_mutex); return error; } static void bh_put(struct gfs2_quota_data *qd) { struct gfs2_sbd *sdp = qd->qd_sbd; + struct buffer_head *bh = NULL; - mutex_lock(&sdp->sd_quota_mutex); + spin_lock(&qd->qd_lockref.lock); gfs2_assert(sdp, qd->qd_bh_count); if (!--qd->qd_bh_count) { - brelse(qd->qd_bh); + bh = qd->qd_bh; qd->qd_bh = NULL; qd->qd_bh_qc = NULL; } - mutex_unlock(&sdp->sd_quota_mutex); + spin_unlock(&qd->qd_lockref.lock); + brelse(bh); } static bool qd_grab_sync(struct gfs2_sbd *sdp, struct gfs2_quota_data *qd, @@ -676,7 +679,6 @@ static void do_qc(struct gfs2_quota_data *qd, s64 change) bool needs_put = false; s64 x; - mutex_lock(&sdp->sd_quota_mutex); gfs2_trans_add_meta(ip->i_gl, qd->qd_bh); /* @@ -720,7 +722,6 @@ static void do_qc(struct gfs2_quota_data *qd, s64 change) } if (change < 0) /* Reset quiet flag if we freed some blocks */ clear_bit(QDF_QMSG_QUIET, &qd->qd_flags); - mutex_unlock(&sdp->sd_quota_mutex); } static int gfs2_write_buf_to_page(struct gfs2_sbd *sdp, unsigned long index, From d9a75a60699dedaac17d2b5170bb2e3cdc03481e Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Fri, 7 Jun 2024 15:54:21 +0200 Subject: [PATCH 27/29] gfs2: Be more careful with the quota sync generation The quota sync generation is only ever updated under sd_quota_sync_mutex by gfs2_quota_sync(), but its current value is fetched ouside of that mutex, so use WRITE_ONCE() and READ_ONCE() when accessing it without holding that mutex. Pass the current sync generation to do_sync() from its callers to ensure that we're not recording the wrong generation when the syncing is done. Also, make sure that qd->qd_sync_gen only ever moves forward. In gfs2_quota_sync(), only write the new sync generation when we know that there are changes. This eliminates the need for function sd_changed(), which we will remove in the next commit. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/quota.c | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index 931a133a5f96..4f2caa06ca93 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -891,7 +891,8 @@ static int gfs2_adjust_quota(struct gfs2_sbd *sdp, loff_t loc, return err; } -static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda) +static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda, + u64 sync_gen) { struct gfs2_sbd *sdp = (*qda)->qd_sbd; struct gfs2_inode *ip = GFS2_I(sdp->sd_quota_inode); @@ -982,8 +983,13 @@ out_dq: gfs2_log_flush(ip->i_gl->gl_name.ln_sbd, ip->i_gl, GFS2_LOG_HEAD_FLUSH_NORMAL | GFS2_LFC_DO_SYNC); if (!error) { - for (x = 0; x < num_qd; x++) - qda[x]->qd_sync_gen = sdp->sd_quota_sync_gen; + for (x = 0; x < num_qd; x++) { + qd = qda[x]; + spin_lock(&qd->qd_lockref.lock); + if (qd->qd_sync_gen < sync_gen) + qd->qd_sync_gen = sync_gen; + spin_unlock(&qd->qd_lockref.lock); + } } return error; } @@ -1177,7 +1183,9 @@ void gfs2_quota_unlock(struct gfs2_inode *ip) } if (count) { - do_sync(count, qda); + u64 sync_gen = READ_ONCE(sdp->sd_quota_sync_gen); + + do_sync(count, qda, sync_gen); for (x = 0; x < count; x++) qd_unlock(qda[x]); } @@ -1323,6 +1331,7 @@ int gfs2_quota_sync(struct super_block *sb, int type) struct gfs2_sbd *sdp = sb->s_fs_info; struct gfs2_quota_data **qda; unsigned int max_qd = PAGE_SIZE / sizeof(struct gfs2_holder); + u64 sync_gen; int error = 0; if (sb_rdonly(sdp->sd_vfs)) @@ -1335,7 +1344,7 @@ int gfs2_quota_sync(struct super_block *sb, int type) return -ENOMEM; mutex_lock(&sdp->sd_quota_sync_mutex); - sdp->sd_quota_sync_gen++; + sync_gen = sdp->sd_quota_sync_gen + 1; do { struct gfs2_quota_data *iter; @@ -1344,7 +1353,7 @@ int gfs2_quota_sync(struct super_block *sb, int type) spin_lock(&qd_lock); list_for_each_entry(iter, &sdp->sd_quota_list, qd_list) { - if (qd_grab_sync(sdp, iter, sdp->sd_quota_sync_gen)) { + if (qd_grab_sync(sdp, iter, sync_gen)) { qda[num_qd++] = iter; if (num_qd == max_qd) break; @@ -1365,8 +1374,10 @@ int gfs2_quota_sync(struct super_block *sb, int type) break; } - if (!error) - error = do_sync(num_qd, qda); + if (!error) { + WRITE_ONCE(sdp->sd_quota_sync_gen, sync_gen); + error = do_sync(num_qd, qda, sync_gen); + } for (x = 0; x < num_qd; x++) qd_unlock(qda[x]); From 5a1906a476bc84145f20cd1941aa1250d38db4aa Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Mon, 10 Jun 2024 23:26:36 +0200 Subject: [PATCH 28/29] gfs2: Revert "check for no eligible quota changes" Since the previous commit, function gfs2_quota_sync() will not cause the sync generation to creep forward by one every time the function is called; this helps keep things a but more tidy. We also don't care that this function allocates a page of memory every time it is called, so no good reason for keeping qd_changed() anymore, which just duplicates qd_grab_sync(). This reverts commit 06aa6fd31a5f402b055e12ea53bb7b086359d3c8. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/quota.c | 20 -------------------- 1 file changed, 20 deletions(-) diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index 4f2caa06ca93..2e6bc77f4f81 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -1308,24 +1308,6 @@ void gfs2_quota_change(struct gfs2_inode *ip, s64 change, } } -static bool qd_changed(struct gfs2_sbd *sdp) -{ - struct gfs2_quota_data *qd; - bool changed = false; - - spin_lock(&qd_lock); - list_for_each_entry(qd, &sdp->sd_quota_list, qd_list) { - spin_lock(&qd->qd_lockref.lock); - changed = !test_bit(QDF_LOCKED, &qd->qd_flags) && - test_bit(QDF_CHANGE, &qd->qd_flags); - spin_unlock(&qd->qd_lockref.lock); - if (changed) - break; - } - spin_unlock(&qd_lock); - return changed; -} - int gfs2_quota_sync(struct super_block *sb, int type) { struct gfs2_sbd *sdp = sb->s_fs_info; @@ -1336,8 +1318,6 @@ int gfs2_quota_sync(struct super_block *sb, int type) if (sb_rdonly(sdp->sd_vfs)) return 0; - if (!qd_changed(sdp)) - return 0; qda = kcalloc(max_qd, sizeof(struct gfs2_quota_data *), GFP_KERNEL); if (!qda) From f75efefb6db305b5b5c56a9b9ae2d72b54f20780 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Mon, 8 Jul 2024 21:46:16 +0200 Subject: [PATCH 29/29] gfs2: Clean up glock demote logic The logic for determining when to demote a glock in glock_work_func(), introduced in commit 7cf8dcd3b68a ("GFS2: Automatically adjust glock min hold time"), doesn't make sense: inode glocks have a minimum hold time that delays demotion, while all other glocks are expected to be demoted immediately. Instead of demoting non-inode glocks immediately, glock_work_func() schedules glock work for them to be demoted, however. Get rid of that unnecessary indirection. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/glock.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 7f68e3d217e6..12a769077ea0 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -1091,11 +1091,13 @@ static void glock_work_func(struct work_struct *work) if (test_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) && gl->gl_state != LM_ST_UNLOCKED && gl->gl_demote_state != LM_ST_EXCLUSIVE) { - unsigned long holdtime, now = jiffies; + if (gl->gl_name.ln_type == LM_TYPE_INODE) { + unsigned long holdtime, now = jiffies; - holdtime = gl->gl_tchange + gl->gl_hold_time; - if (time_before(now, holdtime)) - delay = holdtime - now; + holdtime = gl->gl_tchange + gl->gl_hold_time; + if (time_before(now, holdtime)) + delay = holdtime - now; + } if (!delay) { clear_bit(GLF_PENDING_DEMOTE, &gl->gl_flags); @@ -1106,8 +1108,6 @@ static void glock_work_func(struct work_struct *work) if (delay) { /* Keep one glock reference for the work we requeue. */ drop_refs--; - if (gl->gl_name.ln_type != LM_TYPE_INODE) - delay = 0; gfs2_glock_queue_work(gl, delay); }