2025-01-26 10:04:02 +03:00 · 2013-06-13 10:17:20 +10:00 · 2013-06-13 10:17:20 +10:00 · f408caea2a
commit f408caea2a
parent 7513f0ba61
2 changed files with 38 additions and 0 deletions
--- a/ctdb/config/ctdb.sysconfig
+++ b/ctdb/config/ctdb.sysconfig
@ -129,6 +129,19 @@ CTDB_RECOVERY_LOCK="/some/place/on/shared/storage"
 # CTDB_MONITOR_FREE_MEMORY_WARN=100
 # CTDB_MONITOR_FREE_MEMORY=10

+# Should the 60.nfs monitor event try to correct the number of nfsd
+# threads?  This works around a limitation in some NFS initscripts
+# where some threads can be stuck in host filesystem calls (perhaps
+# due to slow storage), a restart occurs, some threads don't exit, the
+# start only adds the missing number of threads, the stuck threads
+# exit, and the result is a lower than expected thread count.  Note
+# that if you must also set $RPCNFSDCOUNT (RedHat/Debian) or
+# $USE_KERNEL_NFSD_NUMBER (SUSE) in your NFS configuration so the
+# monitoring code knows how many threads there should be - if neither
+# of these are set then this option will be ignored.  The default is
+# to not do this check.
+# CTDB_MONITOR_NFS_THREAD_COUNT="yes"
+
 # When set to yes, the CTDB node will start in DISABLED mode and not host
 # any public ip addresses. The administrator needs to explicitely enable
 # the node with "ctdb enable"
--- a/ctdb/config/events.d/60.nfs
+++ b/ctdb/config/events.d/60.nfs
@ -26,6 +26,29 @@ service_reconfigure ()
    } >/dev/null 2>&1
 }

+nfs_check_thread_count ()
+{
+    [ "$CTDB_MONITOR_NFS_THREAD_COUNT" = "yes" ] || return 0
+
+    # If $RPCNFSDCOUNT/$USE_KERNEL_NFSD_NUMBER isn't set then we could
+    # guess the default from the initscript.  However, let's just
+    # assume that those using the default don't care about the number
+    # of threads and that they have switched on this feature in error.
+    _configured_threads="${RPCNFSDCOUNT:-${USE_KERNEL_NFSD_NUMBER}}"
+    [ -n "$_configured_threads" ] || return 0
+
+    # nfsd should be running the configured number of threads.  If
+    # there are a different number of threads then tell nfsd the
+    # correct number.  
+    _running_threads=$(get_proc "fs/nfsd/threads")
+    # Intentionally not arithmetic comparison - avoids extra errors
+    # when get_proc() fails...
+    if [ "$_running_threads" != "$_configured_threads" ] ; then
+	echo "Attempting to correct number of nfsd threads from ${_running_threads} to ${_configured_threads}"
+	set_proc "fs/nfsd/threads" "$_configured_threads"
+    fi
+}
+
 loadconfig

 [ "$NFS_SERVER_MODE" != "ganesha" ] || exit 0
@ -71,6 +94,8 @@ case "$1" in

 	nfs_check_rpc_services

+	nfs_check_thread_count
+
 	# Every 10 minutes, update the statd state database for which
 	# clients need notifications
 	nfs_statd_update 600