glusterd: Mark vol as deleted by renaming voldir before cleaning up the store
PROBLEM: During 'volume delete', when glusterd fails to erase all information about a volume from the backend store (for instance because rmdir() failed on non-empty directories), not only does volume delete fail on that node, but also subsequent attempts to restart glusterd fail because the volume store is left in an inconsistent state. FIX: Rename the volume directory path to a new location <working-dir>/trash/<volume-id>.deleted, and then go on to clean up its contents. The volume is considered deleted once rename() succeeds, irrespective of whether the cleanup succeeds or not. Change-Id: Iaf18e1684f0b101808bd5e1cd53a5d55790541a8 BUG: 889630 Signed-off-by: Krutika Dhananjay <kdhananj@redhat.com> Reviewed-on: http://review.gluster.org/4639 Reviewed-by: Amar Tumballi <amarts@redhat.com> Reviewed-by: Kaushal M <kaushal@redhat.com> Reviewed-by: Jeff Darcy <jdarcy@redhat.com> Tested-by: Gluster Build System <jenkins@build.gluster.com>
This commit is contained in:
parent
bc4350423a
commit
e125e2ae61
56
tests/bugs/bug-889630.t
Executable file
56
tests/bugs/bug-889630.t
Executable file
@ -0,0 +1,56 @@
|
||||
#!/bin/bash
|
||||
|
||||
. $(dirname $0)/../include.rc
|
||||
. $(dirname $0)/../cluster.rc
|
||||
|
||||
function check_peers {
|
||||
$CLI_1 peer status | grep 'Peer in Cluster (Connected)' | wc -l
|
||||
}
|
||||
|
||||
function volume_count {
|
||||
local cli=$1;
|
||||
if [ $cli -eq '1' ] ; then
|
||||
$CLI_1 volume info | grep 'Volume Name' | wc -l;
|
||||
else
|
||||
$CLI_2 volume info | grep 'Volume Name' | wc -l;
|
||||
fi
|
||||
}
|
||||
|
||||
cleanup;
|
||||
|
||||
TEST launch_cluster 2;
|
||||
TEST $CLI_1 peer probe $H2;
|
||||
|
||||
EXPECT_WITHIN 20 1 check_peers
|
||||
|
||||
TEST $CLI_1 volume create $V0 $H1:$B1/$V0 $H2:$B2/$V0
|
||||
TEST $CLI_1 volume start $V0
|
||||
|
||||
b="B1";
|
||||
|
||||
#Create an extra file in the originator's volume store
|
||||
touch ${!b}/glusterd/vols/$V0/run/file
|
||||
|
||||
TEST $CLI_1 volume stop $V0
|
||||
#Test for self-commit failure
|
||||
TEST $CLI_1 volume delete $V0
|
||||
|
||||
#Check whether delete succeeded on both the nodes
|
||||
EXPECT "0" volume_count '1'
|
||||
EXPECT "0" volume_count '2'
|
||||
|
||||
#Check whether the volume name can be reused after deletion
|
||||
TEST $CLI_1 volume create $V0 $H1:$B1/${V0}1 $H2:$B2/${V0}1
|
||||
TEST $CLI_1 volume start $V0
|
||||
|
||||
#Create an extra file in the peer's volume store
|
||||
touch ${!b}/glusterd/vols/$V0/run/file
|
||||
|
||||
TEST $CLI_1 volume stop $V0
|
||||
#Test for commit failure on the other node
|
||||
TEST $CLI_2 volume delete $V0
|
||||
|
||||
EXPECT "0" volume_count '1';
|
||||
EXPECT "0" volume_count '2';
|
||||
|
||||
cleanup;
|
@ -446,12 +446,10 @@ out:
|
||||
}
|
||||
|
||||
int32_t
|
||||
glusterd_store_delete_brick (glusterd_volinfo_t *volinfo,
|
||||
glusterd_brickinfo_t *brickinfo)
|
||||
glusterd_store_delete_brick (glusterd_brickinfo_t *brickinfo, char *delete_path)
|
||||
{
|
||||
int32_t ret = -1;
|
||||
glusterd_conf_t *priv = NULL;
|
||||
char path[PATH_MAX] = {0,};
|
||||
char brickpath[PATH_MAX] = {0,};
|
||||
char *ptr = NULL;
|
||||
char *tmppath = NULL;
|
||||
@ -459,15 +457,11 @@ glusterd_store_delete_brick (glusterd_volinfo_t *volinfo,
|
||||
|
||||
this = THIS;
|
||||
GF_ASSERT (this);
|
||||
GF_ASSERT (volinfo);
|
||||
GF_ASSERT (brickinfo);
|
||||
|
||||
priv = this->private;
|
||||
|
||||
GF_ASSERT (priv);
|
||||
|
||||
GLUSTERD_GET_BRICK_DIR (path, volinfo, priv);
|
||||
|
||||
tmppath = gf_strdup (brickinfo->path);
|
||||
|
||||
ptr = strchr (tmppath, '/');
|
||||
@ -477,15 +471,16 @@ glusterd_store_delete_brick (glusterd_volinfo_t *volinfo,
|
||||
ptr = strchr (tmppath, '/');
|
||||
}
|
||||
|
||||
snprintf (brickpath, sizeof (brickpath), "%s/%s:%s",
|
||||
path, brickinfo->hostname, tmppath);
|
||||
snprintf (brickpath, sizeof (brickpath),
|
||||
"%s/"GLUSTERD_BRICK_INFO_DIR"/%s:%s", delete_path,
|
||||
brickinfo->hostname, tmppath);
|
||||
|
||||
GF_FREE (tmppath);
|
||||
|
||||
ret = unlink (brickpath);
|
||||
|
||||
if ((ret < 0) && (errno != ENOENT)) {
|
||||
gf_log (this->name, GF_LOG_ERROR, "Unlink failed on %s, "
|
||||
gf_log (this->name, GF_LOG_DEBUG, "Unlink failed on %s, "
|
||||
"reason: %s", brickpath, strerror(errno));
|
||||
ret = -1;
|
||||
goto out;
|
||||
@ -503,7 +498,7 @@ out:
|
||||
}
|
||||
|
||||
int32_t
|
||||
glusterd_store_remove_bricks (glusterd_volinfo_t *volinfo)
|
||||
glusterd_store_remove_bricks (glusterd_volinfo_t *volinfo, char *delete_path)
|
||||
{
|
||||
int32_t ret = 0;
|
||||
glusterd_brickinfo_t *tmp = NULL;
|
||||
@ -520,7 +515,7 @@ glusterd_store_remove_bricks (glusterd_volinfo_t *volinfo)
|
||||
GF_ASSERT (volinfo);
|
||||
|
||||
list_for_each_entry (tmp, &volinfo->bricks, brick_list) {
|
||||
ret = glusterd_store_delete_brick (volinfo, tmp);
|
||||
ret = glusterd_store_delete_brick (tmp, delete_path);
|
||||
if (ret)
|
||||
goto out;
|
||||
}
|
||||
@ -528,7 +523,8 @@ glusterd_store_remove_bricks (glusterd_volinfo_t *volinfo)
|
||||
priv = this->private;
|
||||
GF_ASSERT (priv);
|
||||
|
||||
GLUSTERD_GET_BRICK_DIR (brickdir, volinfo, priv);
|
||||
snprintf (brickdir, sizeof (brickdir), "%s/%s", delete_path,
|
||||
GLUSTERD_BRICK_INFO_DIR);
|
||||
|
||||
dir = opendir (brickdir);
|
||||
|
||||
@ -539,7 +535,7 @@ glusterd_store_remove_bricks (glusterd_volinfo_t *volinfo)
|
||||
brickdir, entry->d_name);
|
||||
ret = unlink (path);
|
||||
if (ret && errno != ENOENT) {
|
||||
gf_log (this->name, GF_LOG_ERROR, "Unable to unlink %s, "
|
||||
gf_log (this->name, GF_LOG_DEBUG, "Unable to unlink %s, "
|
||||
"reason: %s", path, strerror(errno));
|
||||
}
|
||||
glusterd_for_each_entry (entry, dir);
|
||||
@ -1245,14 +1241,17 @@ out:
|
||||
int32_t
|
||||
glusterd_store_delete_volume (glusterd_volinfo_t *volinfo)
|
||||
{
|
||||
char pathname[PATH_MAX] = {0,};
|
||||
int32_t ret = 0;
|
||||
glusterd_conf_t *priv = NULL;
|
||||
DIR *dir = NULL;
|
||||
struct dirent *entry = NULL;
|
||||
char path[PATH_MAX] = {0,};
|
||||
struct stat st = {0, };
|
||||
xlator_t *this = NULL;
|
||||
char pathname[PATH_MAX] = {0,};
|
||||
int32_t ret = 0;
|
||||
glusterd_conf_t *priv = NULL;
|
||||
DIR *dir = NULL;
|
||||
struct dirent *entry = NULL;
|
||||
char path[PATH_MAX] = {0,};
|
||||
char delete_path[PATH_MAX] = {0,};
|
||||
char trashdir[PATH_MAX] = {0,};
|
||||
struct stat st = {0, };
|
||||
xlator_t *this = NULL;
|
||||
gf_boolean_t rename_fail = _gf_false;
|
||||
|
||||
this = THIS;
|
||||
GF_ASSERT (this);
|
||||
@ -1261,29 +1260,53 @@ glusterd_store_delete_volume (glusterd_volinfo_t *volinfo)
|
||||
priv = this->private;
|
||||
|
||||
GF_ASSERT (priv);
|
||||
snprintf (pathname, sizeof (pathname), "%s/vols/%s", priv->workdir,
|
||||
volinfo->volname);
|
||||
|
||||
dir = opendir (pathname);
|
||||
if (!dir) {
|
||||
gf_log (this->name, GF_LOG_ERROR, "Failed to open directory %s."
|
||||
" Reason : %s", pathname, strerror (errno));
|
||||
GLUSTERD_GET_VOLUME_DIR (pathname, volinfo, priv);
|
||||
|
||||
snprintf (delete_path, sizeof (delete_path),
|
||||
"%s/"GLUSTERD_TRASH"/%s.deleted", priv->workdir,
|
||||
uuid_utoa (volinfo->volume_id));
|
||||
|
||||
snprintf (trashdir, sizeof (trashdir), "%s/"GLUSTERD_TRASH,
|
||||
priv->workdir);
|
||||
|
||||
ret = mkdir (trashdir, 0777);
|
||||
if (ret && errno != EEXIST) {
|
||||
gf_log (this->name, GF_LOG_ERROR, "Failed to create trash "
|
||||
"directory, reason : %s", strerror (errno));
|
||||
ret = -1;
|
||||
goto out;
|
||||
}
|
||||
ret = glusterd_store_remove_bricks (volinfo);
|
||||
|
||||
ret = rename (pathname, delete_path);
|
||||
if (ret) {
|
||||
gf_log (this->name, GF_LOG_ERROR, "Failed to rename volume "
|
||||
"directory for volume %s", volinfo->volname);
|
||||
rename_fail = _gf_true;
|
||||
goto out;
|
||||
}
|
||||
|
||||
dir = opendir (delete_path);
|
||||
if (!dir) {
|
||||
gf_log (this->name, GF_LOG_DEBUG, "Failed to open directory %s."
|
||||
" Reason : %s", delete_path, strerror (errno));
|
||||
ret = 0;
|
||||
goto out;
|
||||
}
|
||||
ret = glusterd_store_remove_bricks (volinfo, delete_path);
|
||||
|
||||
if (ret) {
|
||||
gf_log (this->name, GF_LOG_ERROR, "Remove bricks failed for %s",
|
||||
gf_log (this->name, GF_LOG_DEBUG, "Remove bricks failed for %s",
|
||||
volinfo->volname);
|
||||
}
|
||||
|
||||
glusterd_for_each_entry (entry, dir);
|
||||
while (entry) {
|
||||
|
||||
snprintf (path, PATH_MAX, "%s/%s", pathname, entry->d_name);
|
||||
snprintf (path, PATH_MAX, "%s/%s", delete_path, entry->d_name);
|
||||
ret = stat (path, &st);
|
||||
if (ret == -1) {
|
||||
gf_log (this->name, GF_LOG_ERROR, "Failed to stat "
|
||||
gf_log (this->name, GF_LOG_DEBUG, "Failed to stat "
|
||||
"entry %s : %s", path, strerror (errno));
|
||||
goto stat_failed;
|
||||
}
|
||||
@ -1293,11 +1316,12 @@ glusterd_store_delete_volume (glusterd_volinfo_t *volinfo)
|
||||
else
|
||||
ret = unlink (path);
|
||||
|
||||
if (ret)
|
||||
gf_log (this->name, GF_LOG_ERROR, " Failed to remove "
|
||||
if (ret) {
|
||||
gf_log (this->name, GF_LOG_DEBUG, " Failed to remove "
|
||||
"%s. Reason : %s", path, strerror (errno));
|
||||
}
|
||||
|
||||
gf_log (this->name, ret ? GF_LOG_ERROR : GF_LOG_DEBUG, "%s %s",
|
||||
gf_log (this->name, GF_LOG_DEBUG, "%s %s",
|
||||
ret ? "Failed to remove":"Removed",
|
||||
entry->d_name);
|
||||
stat_failed:
|
||||
@ -1307,24 +1331,29 @@ stat_failed:
|
||||
|
||||
ret = closedir (dir);
|
||||
if (ret) {
|
||||
gf_log (this->name, GF_LOG_ERROR, "Failed to close dir %s. "
|
||||
"Reason : %s",pathname, strerror (errno));
|
||||
gf_log (this->name, GF_LOG_DEBUG, "Failed to close dir %s. "
|
||||
"Reason : %s",delete_path, strerror (errno));
|
||||
}
|
||||
|
||||
ret = rmdir (pathname);
|
||||
ret = rmdir (delete_path);
|
||||
if (ret) {
|
||||
gf_log (this->name, GF_LOG_ERROR, "Failed to rmdir: %s, err: %s",
|
||||
pathname, strerror (errno));
|
||||
gf_log (this->name, GF_LOG_DEBUG, "Failed to rmdir: %s,err: %s",
|
||||
delete_path, strerror (errno));
|
||||
}
|
||||
ret = rmdir (trashdir);
|
||||
if (ret) {
|
||||
gf_log (this->name, GF_LOG_DEBUG, "Failed to rmdir: %s, Reason:"
|
||||
" %s", trashdir, strerror (errno));
|
||||
}
|
||||
|
||||
|
||||
out:
|
||||
if (volinfo->shandle) {
|
||||
glusterd_store_handle_destroy (volinfo->shandle);
|
||||
volinfo->shandle = NULL;
|
||||
}
|
||||
gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret);
|
||||
ret = (rename_fail == _gf_true) ? -1: 0;
|
||||
|
||||
gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -117,8 +117,8 @@ int32_t
|
||||
glusterd_store_delete_peerinfo (glusterd_peerinfo_t *peerinfo);
|
||||
|
||||
int32_t
|
||||
glusterd_store_delete_brick (glusterd_volinfo_t *volinfo,
|
||||
glusterd_brickinfo_t *brickinfo);
|
||||
glusterd_store_delete_brick (glusterd_brickinfo_t *brickinfo,
|
||||
char *delete_path);
|
||||
|
||||
int32_t
|
||||
glusterd_store_handle_destroy (glusterd_store_handle_t *handle);
|
||||
|
@ -5495,11 +5495,15 @@ glusterd_delete_brick (glusterd_volinfo_t* volinfo,
|
||||
glusterd_brickinfo_t *brickinfo)
|
||||
{
|
||||
int ret = 0;
|
||||
char voldir[PATH_MAX] = {0,};
|
||||
glusterd_conf_t *priv = THIS->private;
|
||||
GF_ASSERT (volinfo);
|
||||
GF_ASSERT (brickinfo);
|
||||
|
||||
GLUSTERD_GET_VOLUME_DIR(voldir, volinfo, priv);
|
||||
|
||||
glusterd_delete_volfile (volinfo, brickinfo);
|
||||
glusterd_store_delete_brick (volinfo, brickinfo);
|
||||
glusterd_store_delete_brick (brickinfo, voldir);
|
||||
glusterd_brickinfo_delete (brickinfo);
|
||||
volinfo->brick_count--;
|
||||
return ret;
|
||||
|
@ -338,6 +338,7 @@ enum glusterd_vol_comp_status_ {
|
||||
#define GLUSTERD_VOLUME_RBSTATE_FILE "rbstate"
|
||||
#define GLUSTERD_BRICK_INFO_DIR "bricks"
|
||||
#define GLUSTERD_CKSUM_FILE "cksum"
|
||||
#define GLUSTERD_TRASH "trash"
|
||||
#define GLUSTERD_NODE_STATE_FILE "node_state.info"
|
||||
|
||||
/* definitions related to replace brick */
|
||||
|
Loading…
x
Reference in New Issue
Block a user