glusterd: Fix for shd not coming up

Problem: After creating and starting n(n is large) distribute-replicated volumes using a script, if we create and start (n+1)th distribute-replicate volume manually self heal daemon is down. Solution: In glusterd_proc_stop after giving SIGTERM signal if the process is still running, we are giving a SIGKILL. As SIGKILL will not perform any cleanup process, we need to remove the pidfile. Fixes: bz#1589253 Change-Id: I7c114334eec74c8d0f21b3e45cf7db6b8ef28af1 Signed-off-by: Sanju Rakonde <srakonde@redhat.com>
2018-06-08 19:39:58 +05:30 · 2018-06-08 19:39:58 +05:30 · 87f392e7fa
commit 87f392e7fa
parent 8e63ca9bba
3 changed files with 6 additions and 4 deletions
--- a/libglusterfs/src/common-utils.c
+++ b/libglusterfs/src/common-utils.c
@ -4001,9 +4001,6 @@ gf_is_service_running (char *pidfile, int *pid)
        ret = lockf (fno, F_TEST, 0);
        if (ret == -1)
                running = _gf_true;
-        if (!pid) {
-                goto out;
-        }

        ret = fscanf (file, "%d", pid);
        if (ret <= 0) {
--- a/xlators/mgmt/glusterd/src/glusterd-messages.h
+++ b/xlators/mgmt/glusterd/src/glusterd-messages.h
@ -636,7 +636,8 @@ GLFS_MSGID(GLUSTERD,
        GD_MSG_LOCALTIME_LOGGING_DISABLE,
        GD_MSG_PORTS_EXHAUSTED,
        GD_MSG_CHANGELOG_GET_FAIL,
-        GD_MSG_MANAGER_FUNCTION_FAILED
+        GD_MSG_MANAGER_FUNCTION_FAILED,
+        GD_MSG_REMOVE_FILE_FAILED
 );

 #endif /* !_GLUSTERD_MESSAGES_H_ */
--- a/xlators/mgmt/glusterd/src/glusterd-proc-mgmt.c
+++ b/xlators/mgmt/glusterd/src/glusterd-proc-mgmt.c
@ -13,6 +13,7 @@
 #include <signal.h>

 #include "glusterd.h"
+#include "glusterd-utils.h"
 #include "common-utils.h"
 #include "xlator.h"
 #include "logging.h"
@ -120,6 +121,9 @@ glusterd_proc_stop (glusterd_proc_t *proc, int sig, int flags)
                                "reason:%s", pid, strerror(errno));
                        goto out;
                }
+                ret = glusterd_unlink_file (proc->pidfile);
+                if (ret)
+                        goto out;
        }

        ret = 0;