glusterd: daemon restart logic should adhere server side quorum

Just like brick processes, other daemon services should also follow the same
logic of quorum checks to see if a particular service needs to come up if
glusterd is restarted or the incoming friend add/update request is received
(in glusterd_restart_bricks () function)

Change-Id: I54a1fbdaa1571cc45eed627181b81463fead47a3
BUG: 1383893
Signed-off-by: Atin Mukherjee <amukherj@redhat.com>
Reviewed-on: https://review.gluster.org/15626
NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
Smoke: Gluster Build System <jenkins@build.gluster.org>
Reviewed-by: Prashanth Pai <ppai@redhat.com>
This commit is contained in:
Atin Mukherjee 2016-10-05 14:59:51 +05:30
parent a3b4c70afe
commit a9f660bc9d
2 changed files with 64 additions and 6 deletions

View File

@ -0,0 +1,57 @@
#!/bin/bash
# This test checks for if shd or any other daemons brought down (apart from
# brick processes) is not brought up automatically when glusterd on the other
# node is (re)started
. $(dirname $0)/../../include.rc
. $(dirname $0)/../../volume.rc
. $(dirname $0)/../../cluster.rc
function shd_up_status_1 {
$CLI_1 volume status | grep "localhost" | grep "Self-heal Daemon" | awk '{print $7}'
}
function shd_up_status_2 {
$CLI_2 volume status | grep "localhost" | grep "Self-heal Daemon" | awk '{print $7}'
}
function get_shd_pid_2 {
$CLI_2 volume status | grep "localhost" | grep "Self-heal Daemon" | awk '{print $8}'
}
cleanup;
TEST launch_cluster 3
TEST $CLI_1 peer probe $H2;
EXPECT_WITHIN $PROBE_TIMEOUT 1 peer_count
TEST $CLI_1 peer probe $H3;
EXPECT_WITHIN $PROBE_TIMEOUT 2 peer_count
# Lets create the volume
TEST $CLI_1 volume create $V0 replica 2 $H1:$B1/${V0}1 $H2:$B2/${V0}2
# Start the volume
TEST $CLI_1 volume start $V0
EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status_1 $V0 $H1 $B1/${V0}1
EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status_1 $V0 $H2 $B2/${V0}2
EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" shd_up_status_1
EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" shd_up_status_2
# Bring down shd on 2nd node
kill -15 $(get_shd_pid_2)
# Bring down glusterd on 1st node
TEST kill_glusterd 1
#Bring back 1st glusterd
TEST $glusterd_1
# We need to wait till PROCESS_UP_TIMEOUT and then check shd service does not
# come up on node 2
sleep $PROCESS_UP_TIMEOUT
EXPECT "N" shd_up_status_2
cleanup;

View File

@ -4923,10 +4923,6 @@ glusterd_restart_bricks (glusterd_conf_t *conf)
cds_list_for_each_entry (volinfo, &conf->volumes, vol_list) {
if (volinfo->status != GLUSTERD_STATUS_STARTED)
continue;
if (start_svcs == _gf_false) {
start_svcs = _gf_true;
glusterd_svcs_manager (NULL);
}
gf_msg_debug (this->name, 0, "starting the volume %s",
volinfo->volname);
@ -4949,6 +4945,11 @@ glusterd_restart_bricks (glusterd_conf_t *conf)
*/
continue;
} else {
if (start_svcs == _gf_false) {
start_svcs = _gf_true;
glusterd_svcs_manager (NULL);
}
cds_list_for_each_entry (brickinfo, &volinfo->bricks,
brick_list) {
glusterd_brick_start (volinfo, brickinfo,
@ -4961,8 +4962,8 @@ glusterd_restart_bricks (glusterd_conf_t *conf)
cds_list_for_each_entry (volinfo, &snap->volumes, vol_list) {
if (volinfo->status != GLUSTERD_STATUS_STARTED)
continue;
/* Check the quorum, if quorum is not met, don't start the
bricks
/* Check the quorum, if quorum is not met, don't start
* the bricks
*/
ret = check_quorum_for_brick_start (volinfo,
node_quorum);