glusterd: More checks before starting rebalance/remove-brick
Check if a previous remove-brick operation has been committed before starting a new rebalance/remove-brick task. Change-Id: I553e5ba64a6a352ca91032ab1a17997051a4494e BUG: 963541 Signed-off-by: Kaushal M <kaushal@redhat.com> Reviewed-on: http://review.gluster.org/5019 Reviewed-by: Vijay Bellur <vbellur@redhat.com> Tested-by: Gluster Build System <jenkins@build.gluster.com>
This commit is contained in:
parent
979a17d49a
commit
7fd3898127
33
tests/bugs/bug-963541.t
Executable file
33
tests/bugs/bug-963541.t
Executable file
@ -0,0 +1,33 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
. $(dirname $0)/../include.rc
|
||||||
|
|
||||||
|
cleanup;
|
||||||
|
|
||||||
|
TEST glusterd
|
||||||
|
TEST pidof glusterd
|
||||||
|
|
||||||
|
TEST $CLI volume create $V0 $H0:$B0/${V0}{1..3};
|
||||||
|
TEST $CLI volume start $V0;
|
||||||
|
|
||||||
|
# Start a remove-brick and try to start a rebalance/remove-brick without committing
|
||||||
|
TEST $CLI volume remove-brick $V0 $H0:$B0/${V0}1 start
|
||||||
|
|
||||||
|
TEST ! $CLI volume rebalance $V0 start
|
||||||
|
TEST ! $CLI volume remove-brick $V0 $H0:$B0/${V0}2 start
|
||||||
|
|
||||||
|
#Try to start rebalance/remove-brick again after commit
|
||||||
|
TEST $CLI volume remove-brick $V0 $H0:$B0/${V0}1 commit
|
||||||
|
|
||||||
|
gluster volume status
|
||||||
|
|
||||||
|
TEST $CLI volume rebalance $V0 start
|
||||||
|
TEST $CLI volume rebalance $V0 stop
|
||||||
|
|
||||||
|
TEST $CLI volume remove-brick $V0 $H0:$B0/${V0}2 start
|
||||||
|
TEST $CLI volume remove-brick $V0 $H0:$B0/${V0}2 stop
|
||||||
|
|
||||||
|
TEST $CLI volume stop $V0
|
||||||
|
|
||||||
|
cleanup;
|
||||||
|
|
@ -1264,6 +1264,17 @@ glusterd_op_stage_remove_brick (dict_t *dict, char **op_errstr)
|
|||||||
gf_log (this->name, GF_LOG_ERROR, "%s", errstr);
|
gf_log (this->name, GF_LOG_ERROR, "%s", errstr);
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
if (!gd_is_remove_brick_committed (volinfo)) {
|
||||||
|
snprintf (msg, sizeof (msg), "An earlier remove-brick "
|
||||||
|
"task exists for volume %s. Either commit it"
|
||||||
|
" or stop it before starting a new task.",
|
||||||
|
volinfo->volname);
|
||||||
|
errstr = gf_strdup (msg);
|
||||||
|
gf_log (this->name, GF_LOG_ERROR, "Earlier remove-brick"
|
||||||
|
" task exists for volume %s.",
|
||||||
|
volinfo->volname);
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
if (glusterd_is_defrag_on(volinfo)) {
|
if (glusterd_is_defrag_on(volinfo)) {
|
||||||
errstr = gf_strdup("Rebalance is in progress. Please "
|
errstr = gf_strdup("Rebalance is in progress. Please "
|
||||||
"retry after completion");
|
"retry after completion");
|
||||||
@ -1538,9 +1549,11 @@ glusterd_op_remove_brick (dict_t *dict, char **op_errstr)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Clear task-id on commmitting/stopping of remove-brick operation */
|
/* Clear task-id & rebal.op on commmitting/stopping remove-brick */
|
||||||
if ((cmd != GF_OP_CMD_START) || (cmd != GF_OP_CMD_STATUS))
|
if ((cmd != GF_OP_CMD_START) || (cmd != GF_OP_CMD_STATUS)) {
|
||||||
uuid_clear (volinfo->rebal.rebalance_id);
|
uuid_clear (volinfo->rebal.rebalance_id);
|
||||||
|
volinfo->rebal.op = GD_OP_NONE;
|
||||||
|
}
|
||||||
|
|
||||||
ret = -1;
|
ret = -1;
|
||||||
switch (cmd) {
|
switch (cmd) {
|
||||||
|
@ -42,12 +42,27 @@ glusterd_brick_op_cbk (struct rpc_req *req, struct iovec *iov,
|
|||||||
int count, void *myframe);
|
int count, void *myframe);
|
||||||
int
|
int
|
||||||
glusterd_defrag_start_validate (glusterd_volinfo_t *volinfo, char *op_errstr,
|
glusterd_defrag_start_validate (glusterd_volinfo_t *volinfo, char *op_errstr,
|
||||||
size_t len)
|
size_t len, glusterd_op_t op)
|
||||||
{
|
{
|
||||||
int ret = -1;
|
int ret = -1;
|
||||||
|
xlator_t *this = NULL;
|
||||||
|
|
||||||
|
this = THIS;
|
||||||
|
GF_ASSERT (this);
|
||||||
|
|
||||||
|
/* Check only if operation is not remove-brick */
|
||||||
|
if ((GD_OP_REMOVE_BRICK != op) &&
|
||||||
|
!gd_is_remove_brick_committed (volinfo)) {
|
||||||
|
gf_log (this->name, GF_LOG_DEBUG, "A remove-brick task on "
|
||||||
|
"volume %s is not yet committed", volinfo->volname);
|
||||||
|
snprintf (op_errstr, len, "A remove-brick task on volume %s is"
|
||||||
|
" not yet committed. Either commit or stop the "
|
||||||
|
"remove-brick task.", volinfo->volname);
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
if (glusterd_is_defrag_on (volinfo)) {
|
if (glusterd_is_defrag_on (volinfo)) {
|
||||||
gf_log ("glusterd", GF_LOG_DEBUG,
|
gf_log (this->name, GF_LOG_DEBUG,
|
||||||
"rebalance on volume %s already started",
|
"rebalance on volume %s already started",
|
||||||
volinfo->volname);
|
volinfo->volname);
|
||||||
snprintf (op_errstr, len, "Rebalance on %s is already started",
|
snprintf (op_errstr, len, "Rebalance on %s is already started",
|
||||||
@ -57,7 +72,7 @@ glusterd_defrag_start_validate (glusterd_volinfo_t *volinfo, char *op_errstr,
|
|||||||
|
|
||||||
if (glusterd_is_rb_started (volinfo) ||
|
if (glusterd_is_rb_started (volinfo) ||
|
||||||
glusterd_is_rb_paused (volinfo)) {
|
glusterd_is_rb_paused (volinfo)) {
|
||||||
gf_log ("glusterd", GF_LOG_DEBUG,
|
gf_log (this->name, GF_LOG_DEBUG,
|
||||||
"Rebalance failed as replace brick is in progress on volume %s",
|
"Rebalance failed as replace brick is in progress on volume %s",
|
||||||
volinfo->volname);
|
volinfo->volname);
|
||||||
snprintf (op_errstr, len, "Rebalance failed as replace brick is in progress on "
|
snprintf (op_errstr, len, "Rebalance failed as replace brick is in progress on "
|
||||||
@ -66,7 +81,7 @@ glusterd_defrag_start_validate (glusterd_volinfo_t *volinfo, char *op_errstr,
|
|||||||
}
|
}
|
||||||
ret = 0;
|
ret = 0;
|
||||||
out:
|
out:
|
||||||
gf_log ("glusterd", GF_LOG_DEBUG, "Returning %d", ret);
|
gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -190,7 +205,7 @@ glusterd_handle_defrag_start (glusterd_volinfo_t *volinfo, char *op_errstr,
|
|||||||
GF_ASSERT (volinfo);
|
GF_ASSERT (volinfo);
|
||||||
GF_ASSERT (op_errstr);
|
GF_ASSERT (op_errstr);
|
||||||
|
|
||||||
ret = glusterd_defrag_start_validate (volinfo, op_errstr, len);
|
ret = glusterd_defrag_start_validate (volinfo, op_errstr, len, op);
|
||||||
if (ret)
|
if (ret)
|
||||||
goto out;
|
goto out;
|
||||||
if (!volinfo->rebal.defrag)
|
if (!volinfo->rebal.defrag)
|
||||||
@ -544,8 +559,9 @@ glusterd_op_stage_rebalance (dict_t *dict, char **op_errstr)
|
|||||||
ret = 0;
|
ret = 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
ret = glusterd_defrag_start_validate (volinfo,
|
ret = glusterd_defrag_start_validate (volinfo, msg,
|
||||||
msg, sizeof (msg));
|
sizeof (msg),
|
||||||
|
GD_OP_REBALANCE);
|
||||||
if (ret) {
|
if (ret) {
|
||||||
gf_log (this->name, GF_LOG_DEBUG,
|
gf_log (this->name, GF_LOG_DEBUG,
|
||||||
"start validate failed");
|
"start validate failed");
|
||||||
@ -654,10 +670,12 @@ glusterd_op_rebalance (dict_t *dict, char **op_errstr, dict_t *rsp_dict)
|
|||||||
cmd, NULL, GD_OP_REBALANCE);
|
cmd, NULL, GD_OP_REBALANCE);
|
||||||
break;
|
break;
|
||||||
case GF_DEFRAG_CMD_STOP:
|
case GF_DEFRAG_CMD_STOP:
|
||||||
/* Clear task-id only on explicitly stopping the
|
/* Clear task-id only on explicitly stopping rebalance.
|
||||||
* rebalance process.
|
* Also clear the stored operation, so it doesn't cause trouble
|
||||||
|
* with future rebalance/remove-brick starts
|
||||||
*/
|
*/
|
||||||
uuid_clear (volinfo->rebal.rebalance_id);
|
uuid_clear (volinfo->rebal.rebalance_id);
|
||||||
|
volinfo->rebal.op = GD_OP_NONE;
|
||||||
|
|
||||||
/* Fall back to the old volume file in case of decommission*/
|
/* Fall back to the old volume file in case of decommission*/
|
||||||
list_for_each_entry_safe (brickinfo, tmp, &volinfo->bricks,
|
list_for_each_entry_safe (brickinfo, tmp, &volinfo->bricks,
|
||||||
|
@ -7630,3 +7630,16 @@ gd_update_volume_op_versions (glusterd_volinfo_t *volinfo)
|
|||||||
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* A task is committed/completed once the task-id for it is cleared */
|
||||||
|
gf_boolean_t
|
||||||
|
gd_is_remove_brick_committed (glusterd_volinfo_t *volinfo)
|
||||||
|
{
|
||||||
|
GF_ASSERT (volinfo);
|
||||||
|
|
||||||
|
if ((GD_OP_REMOVE_BRICK == volinfo->rebal.op) &&
|
||||||
|
!uuid_is_null (volinfo->rebal.rebalance_id))
|
||||||
|
return _gf_false;
|
||||||
|
|
||||||
|
return _gf_true;
|
||||||
|
}
|
||||||
|
@ -528,6 +528,10 @@ glusterd_is_same_address (char *name1, char *name2);
|
|||||||
|
|
||||||
void
|
void
|
||||||
gd_update_volume_op_versions (glusterd_volinfo_t *volinfo);
|
gd_update_volume_op_versions (glusterd_volinfo_t *volinfo);
|
||||||
|
|
||||||
char*
|
char*
|
||||||
gd_peer_uuid_str (glusterd_peerinfo_t *peerinfo);
|
gd_peer_uuid_str (glusterd_peerinfo_t *peerinfo);
|
||||||
|
|
||||||
|
gf_boolean_t
|
||||||
|
gd_is_remove_brick_committed (glusterd_volinfo_t *volinfo);
|
||||||
#endif
|
#endif
|
||||||
|
Loading…
x
Reference in New Issue
Block a user