glusterd: More checks before starting rebalance/remove-brick

Check if a previous remove-brick operation has been committed before
starting a new rebalance/remove-brick task.

Change-Id: I553e5ba64a6a352ca91032ab1a17997051a4494e
BUG: 963541
Signed-off-by: Kaushal M <kaushal@redhat.com>
Reviewed-on: http://review.gluster.org/5019
Reviewed-by: Vijay Bellur <vbellur@redhat.com>
Tested-by: Gluster Build System <jenkins@build.gluster.com>
This commit is contained in:
Kaushal M 2013-05-16 16:03:52 +05:30 committed by Vijay Bellur
parent 979a17d49a
commit 7fd3898127
5 changed files with 93 additions and 12 deletions

33
tests/bugs/bug-963541.t Executable file
View File

@ -0,0 +1,33 @@
#!/bin/bash
. $(dirname $0)/../include.rc
cleanup;
TEST glusterd
TEST pidof glusterd
TEST $CLI volume create $V0 $H0:$B0/${V0}{1..3};
TEST $CLI volume start $V0;
# Start a remove-brick and try to start a rebalance/remove-brick without committing
TEST $CLI volume remove-brick $V0 $H0:$B0/${V0}1 start
TEST ! $CLI volume rebalance $V0 start
TEST ! $CLI volume remove-brick $V0 $H0:$B0/${V0}2 start
#Try to start rebalance/remove-brick again after commit
TEST $CLI volume remove-brick $V0 $H0:$B0/${V0}1 commit
gluster volume status
TEST $CLI volume rebalance $V0 start
TEST $CLI volume rebalance $V0 stop
TEST $CLI volume remove-brick $V0 $H0:$B0/${V0}2 start
TEST $CLI volume remove-brick $V0 $H0:$B0/${V0}2 stop
TEST $CLI volume stop $V0
cleanup;

View File

@ -1264,6 +1264,17 @@ glusterd_op_stage_remove_brick (dict_t *dict, char **op_errstr)
gf_log (this->name, GF_LOG_ERROR, "%s", errstr);
goto out;
}
if (!gd_is_remove_brick_committed (volinfo)) {
snprintf (msg, sizeof (msg), "An earlier remove-brick "
"task exists for volume %s. Either commit it"
" or stop it before starting a new task.",
volinfo->volname);
errstr = gf_strdup (msg);
gf_log (this->name, GF_LOG_ERROR, "Earlier remove-brick"
" task exists for volume %s.",
volinfo->volname);
goto out;
}
if (glusterd_is_defrag_on(volinfo)) {
errstr = gf_strdup("Rebalance is in progress. Please "
"retry after completion");
@ -1538,9 +1549,11 @@ glusterd_op_remove_brick (dict_t *dict, char **op_errstr)
}
}
/* Clear task-id on commmitting/stopping of remove-brick operation */
if ((cmd != GF_OP_CMD_START) || (cmd != GF_OP_CMD_STATUS))
/* Clear task-id & rebal.op on commmitting/stopping remove-brick */
if ((cmd != GF_OP_CMD_START) || (cmd != GF_OP_CMD_STATUS)) {
uuid_clear (volinfo->rebal.rebalance_id);
volinfo->rebal.op = GD_OP_NONE;
}
ret = -1;
switch (cmd) {

View File

@ -42,12 +42,27 @@ glusterd_brick_op_cbk (struct rpc_req *req, struct iovec *iov,
int count, void *myframe);
int
glusterd_defrag_start_validate (glusterd_volinfo_t *volinfo, char *op_errstr,
size_t len)
size_t len, glusterd_op_t op)
{
int ret = -1;
int ret = -1;
xlator_t *this = NULL;
this = THIS;
GF_ASSERT (this);
/* Check only if operation is not remove-brick */
if ((GD_OP_REMOVE_BRICK != op) &&
!gd_is_remove_brick_committed (volinfo)) {
gf_log (this->name, GF_LOG_DEBUG, "A remove-brick task on "
"volume %s is not yet committed", volinfo->volname);
snprintf (op_errstr, len, "A remove-brick task on volume %s is"
" not yet committed. Either commit or stop the "
"remove-brick task.", volinfo->volname);
goto out;
}
if (glusterd_is_defrag_on (volinfo)) {
gf_log ("glusterd", GF_LOG_DEBUG,
gf_log (this->name, GF_LOG_DEBUG,
"rebalance on volume %s already started",
volinfo->volname);
snprintf (op_errstr, len, "Rebalance on %s is already started",
@ -57,7 +72,7 @@ glusterd_defrag_start_validate (glusterd_volinfo_t *volinfo, char *op_errstr,
if (glusterd_is_rb_started (volinfo) ||
glusterd_is_rb_paused (volinfo)) {
gf_log ("glusterd", GF_LOG_DEBUG,
gf_log (this->name, GF_LOG_DEBUG,
"Rebalance failed as replace brick is in progress on volume %s",
volinfo->volname);
snprintf (op_errstr, len, "Rebalance failed as replace brick is in progress on "
@ -66,7 +81,7 @@ glusterd_defrag_start_validate (glusterd_volinfo_t *volinfo, char *op_errstr,
}
ret = 0;
out:
gf_log ("glusterd", GF_LOG_DEBUG, "Returning %d", ret);
gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret);
return ret;
}
@ -190,7 +205,7 @@ glusterd_handle_defrag_start (glusterd_volinfo_t *volinfo, char *op_errstr,
GF_ASSERT (volinfo);
GF_ASSERT (op_errstr);
ret = glusterd_defrag_start_validate (volinfo, op_errstr, len);
ret = glusterd_defrag_start_validate (volinfo, op_errstr, len, op);
if (ret)
goto out;
if (!volinfo->rebal.defrag)
@ -544,8 +559,9 @@ glusterd_op_stage_rebalance (dict_t *dict, char **op_errstr)
ret = 0;
}
}
ret = glusterd_defrag_start_validate (volinfo,
msg, sizeof (msg));
ret = glusterd_defrag_start_validate (volinfo, msg,
sizeof (msg),
GD_OP_REBALANCE);
if (ret) {
gf_log (this->name, GF_LOG_DEBUG,
"start validate failed");
@ -654,10 +670,12 @@ glusterd_op_rebalance (dict_t *dict, char **op_errstr, dict_t *rsp_dict)
cmd, NULL, GD_OP_REBALANCE);
break;
case GF_DEFRAG_CMD_STOP:
/* Clear task-id only on explicitly stopping the
* rebalance process.
/* Clear task-id only on explicitly stopping rebalance.
* Also clear the stored operation, so it doesn't cause trouble
* with future rebalance/remove-brick starts
*/
uuid_clear (volinfo->rebal.rebalance_id);
volinfo->rebal.op = GD_OP_NONE;
/* Fall back to the old volume file in case of decommission*/
list_for_each_entry_safe (brickinfo, tmp, &volinfo->bricks,

View File

@ -7630,3 +7630,16 @@ gd_update_volume_op_versions (glusterd_volinfo_t *volinfo)
return;
}
/* A task is committed/completed once the task-id for it is cleared */
gf_boolean_t
gd_is_remove_brick_committed (glusterd_volinfo_t *volinfo)
{
GF_ASSERT (volinfo);
if ((GD_OP_REMOVE_BRICK == volinfo->rebal.op) &&
!uuid_is_null (volinfo->rebal.rebalance_id))
return _gf_false;
return _gf_true;
}

View File

@ -528,6 +528,10 @@ glusterd_is_same_address (char *name1, char *name2);
void
gd_update_volume_op_versions (glusterd_volinfo_t *volinfo);
char*
gd_peer_uuid_str (glusterd_peerinfo_t *peerinfo);
gf_boolean_t
gd_is_remove_brick_committed (glusterd_volinfo_t *volinfo);
#endif