glusterd: More checks before starting rebalance/remove-brick
Check if a previous remove-brick operation has been committed before starting a new rebalance/remove-brick task. Change-Id: I553e5ba64a6a352ca91032ab1a17997051a4494e BUG: 963541 Signed-off-by: Kaushal M <kaushal@redhat.com> Reviewed-on: http://review.gluster.org/5019 Reviewed-by: Vijay Bellur <vbellur@redhat.com> Tested-by: Gluster Build System <jenkins@build.gluster.com>
This commit is contained in:
parent
979a17d49a
commit
7fd3898127
33
tests/bugs/bug-963541.t
Executable file
33
tests/bugs/bug-963541.t
Executable file
@ -0,0 +1,33 @@
|
||||
#!/bin/bash
|
||||
|
||||
. $(dirname $0)/../include.rc
|
||||
|
||||
cleanup;
|
||||
|
||||
TEST glusterd
|
||||
TEST pidof glusterd
|
||||
|
||||
TEST $CLI volume create $V0 $H0:$B0/${V0}{1..3};
|
||||
TEST $CLI volume start $V0;
|
||||
|
||||
# Start a remove-brick and try to start a rebalance/remove-brick without committing
|
||||
TEST $CLI volume remove-brick $V0 $H0:$B0/${V0}1 start
|
||||
|
||||
TEST ! $CLI volume rebalance $V0 start
|
||||
TEST ! $CLI volume remove-brick $V0 $H0:$B0/${V0}2 start
|
||||
|
||||
#Try to start rebalance/remove-brick again after commit
|
||||
TEST $CLI volume remove-brick $V0 $H0:$B0/${V0}1 commit
|
||||
|
||||
gluster volume status
|
||||
|
||||
TEST $CLI volume rebalance $V0 start
|
||||
TEST $CLI volume rebalance $V0 stop
|
||||
|
||||
TEST $CLI volume remove-brick $V0 $H0:$B0/${V0}2 start
|
||||
TEST $CLI volume remove-brick $V0 $H0:$B0/${V0}2 stop
|
||||
|
||||
TEST $CLI volume stop $V0
|
||||
|
||||
cleanup;
|
||||
|
@ -1264,6 +1264,17 @@ glusterd_op_stage_remove_brick (dict_t *dict, char **op_errstr)
|
||||
gf_log (this->name, GF_LOG_ERROR, "%s", errstr);
|
||||
goto out;
|
||||
}
|
||||
if (!gd_is_remove_brick_committed (volinfo)) {
|
||||
snprintf (msg, sizeof (msg), "An earlier remove-brick "
|
||||
"task exists for volume %s. Either commit it"
|
||||
" or stop it before starting a new task.",
|
||||
volinfo->volname);
|
||||
errstr = gf_strdup (msg);
|
||||
gf_log (this->name, GF_LOG_ERROR, "Earlier remove-brick"
|
||||
" task exists for volume %s.",
|
||||
volinfo->volname);
|
||||
goto out;
|
||||
}
|
||||
if (glusterd_is_defrag_on(volinfo)) {
|
||||
errstr = gf_strdup("Rebalance is in progress. Please "
|
||||
"retry after completion");
|
||||
@ -1538,9 +1549,11 @@ glusterd_op_remove_brick (dict_t *dict, char **op_errstr)
|
||||
}
|
||||
}
|
||||
|
||||
/* Clear task-id on commmitting/stopping of remove-brick operation */
|
||||
if ((cmd != GF_OP_CMD_START) || (cmd != GF_OP_CMD_STATUS))
|
||||
/* Clear task-id & rebal.op on commmitting/stopping remove-brick */
|
||||
if ((cmd != GF_OP_CMD_START) || (cmd != GF_OP_CMD_STATUS)) {
|
||||
uuid_clear (volinfo->rebal.rebalance_id);
|
||||
volinfo->rebal.op = GD_OP_NONE;
|
||||
}
|
||||
|
||||
ret = -1;
|
||||
switch (cmd) {
|
||||
|
@ -42,12 +42,27 @@ glusterd_brick_op_cbk (struct rpc_req *req, struct iovec *iov,
|
||||
int count, void *myframe);
|
||||
int
|
||||
glusterd_defrag_start_validate (glusterd_volinfo_t *volinfo, char *op_errstr,
|
||||
size_t len)
|
||||
size_t len, glusterd_op_t op)
|
||||
{
|
||||
int ret = -1;
|
||||
int ret = -1;
|
||||
xlator_t *this = NULL;
|
||||
|
||||
this = THIS;
|
||||
GF_ASSERT (this);
|
||||
|
||||
/* Check only if operation is not remove-brick */
|
||||
if ((GD_OP_REMOVE_BRICK != op) &&
|
||||
!gd_is_remove_brick_committed (volinfo)) {
|
||||
gf_log (this->name, GF_LOG_DEBUG, "A remove-brick task on "
|
||||
"volume %s is not yet committed", volinfo->volname);
|
||||
snprintf (op_errstr, len, "A remove-brick task on volume %s is"
|
||||
" not yet committed. Either commit or stop the "
|
||||
"remove-brick task.", volinfo->volname);
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (glusterd_is_defrag_on (volinfo)) {
|
||||
gf_log ("glusterd", GF_LOG_DEBUG,
|
||||
gf_log (this->name, GF_LOG_DEBUG,
|
||||
"rebalance on volume %s already started",
|
||||
volinfo->volname);
|
||||
snprintf (op_errstr, len, "Rebalance on %s is already started",
|
||||
@ -57,7 +72,7 @@ glusterd_defrag_start_validate (glusterd_volinfo_t *volinfo, char *op_errstr,
|
||||
|
||||
if (glusterd_is_rb_started (volinfo) ||
|
||||
glusterd_is_rb_paused (volinfo)) {
|
||||
gf_log ("glusterd", GF_LOG_DEBUG,
|
||||
gf_log (this->name, GF_LOG_DEBUG,
|
||||
"Rebalance failed as replace brick is in progress on volume %s",
|
||||
volinfo->volname);
|
||||
snprintf (op_errstr, len, "Rebalance failed as replace brick is in progress on "
|
||||
@ -66,7 +81,7 @@ glusterd_defrag_start_validate (glusterd_volinfo_t *volinfo, char *op_errstr,
|
||||
}
|
||||
ret = 0;
|
||||
out:
|
||||
gf_log ("glusterd", GF_LOG_DEBUG, "Returning %d", ret);
|
||||
gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -190,7 +205,7 @@ glusterd_handle_defrag_start (glusterd_volinfo_t *volinfo, char *op_errstr,
|
||||
GF_ASSERT (volinfo);
|
||||
GF_ASSERT (op_errstr);
|
||||
|
||||
ret = glusterd_defrag_start_validate (volinfo, op_errstr, len);
|
||||
ret = glusterd_defrag_start_validate (volinfo, op_errstr, len, op);
|
||||
if (ret)
|
||||
goto out;
|
||||
if (!volinfo->rebal.defrag)
|
||||
@ -544,8 +559,9 @@ glusterd_op_stage_rebalance (dict_t *dict, char **op_errstr)
|
||||
ret = 0;
|
||||
}
|
||||
}
|
||||
ret = glusterd_defrag_start_validate (volinfo,
|
||||
msg, sizeof (msg));
|
||||
ret = glusterd_defrag_start_validate (volinfo, msg,
|
||||
sizeof (msg),
|
||||
GD_OP_REBALANCE);
|
||||
if (ret) {
|
||||
gf_log (this->name, GF_LOG_DEBUG,
|
||||
"start validate failed");
|
||||
@ -654,10 +670,12 @@ glusterd_op_rebalance (dict_t *dict, char **op_errstr, dict_t *rsp_dict)
|
||||
cmd, NULL, GD_OP_REBALANCE);
|
||||
break;
|
||||
case GF_DEFRAG_CMD_STOP:
|
||||
/* Clear task-id only on explicitly stopping the
|
||||
* rebalance process.
|
||||
/* Clear task-id only on explicitly stopping rebalance.
|
||||
* Also clear the stored operation, so it doesn't cause trouble
|
||||
* with future rebalance/remove-brick starts
|
||||
*/
|
||||
uuid_clear (volinfo->rebal.rebalance_id);
|
||||
volinfo->rebal.op = GD_OP_NONE;
|
||||
|
||||
/* Fall back to the old volume file in case of decommission*/
|
||||
list_for_each_entry_safe (brickinfo, tmp, &volinfo->bricks,
|
||||
|
@ -7630,3 +7630,16 @@ gd_update_volume_op_versions (glusterd_volinfo_t *volinfo)
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
/* A task is committed/completed once the task-id for it is cleared */
|
||||
gf_boolean_t
|
||||
gd_is_remove_brick_committed (glusterd_volinfo_t *volinfo)
|
||||
{
|
||||
GF_ASSERT (volinfo);
|
||||
|
||||
if ((GD_OP_REMOVE_BRICK == volinfo->rebal.op) &&
|
||||
!uuid_is_null (volinfo->rebal.rebalance_id))
|
||||
return _gf_false;
|
||||
|
||||
return _gf_true;
|
||||
}
|
||||
|
@ -528,6 +528,10 @@ glusterd_is_same_address (char *name1, char *name2);
|
||||
|
||||
void
|
||||
gd_update_volume_op_versions (glusterd_volinfo_t *volinfo);
|
||||
|
||||
char*
|
||||
gd_peer_uuid_str (glusterd_peerinfo_t *peerinfo);
|
||||
|
||||
gf_boolean_t
|
||||
gd_is_remove_brick_committed (glusterd_volinfo_t *volinfo);
|
||||
#endif
|
||||
|
Loading…
x
Reference in New Issue
Block a user