storage/posix: Janitor should guard against dir renames.

Problem:
Directory rename while a brick is down can cause gfid handle of that directory
to be deleted until next lookup happens on that directory.

*) Self-heal does not have intelligence to detect renames at the moment. So it
has to delete the directory 'd' using special flags, because it has to perform
'rm -rf' of that directory as it is not empty. Posix xlator implements this by
renaming the directory deleted to 'landfill' directory in '.glusterfs' where
janitor thread will perform actual rm -rf by traversing the directory. Janitor
thread wakes up every 10 minutes to check if there are any directories to be
deleted and deletes them. As part of deleting it also deletes the gfid-handles.

Steps to hit the problem:
1) On a replicate volume create a directory 'd', file in 'd' called 'f' so the
   directory 'd' is not empty.

2) bring one of the bricks down (lets call it brick-a, the other one is brick-b

3) Rename d to d1

4) When brick-a comes online again, self-heal deletes directory 'd' and creates
   directory 'd1' on brick-a for performing self-heal. So on brick-a,
   gfid-handle of 'd' pointing to 'da is deleted and recreated to point to 'd1'.

5) This directory 'b' with all its directory hierarchy (for now just the file
   'f') will be under 'landfill' directory.

6) When janitor thread wakes up and deletes directory 'd' and gfid-handle of
   'd' without realizing that it is now pointing to 'd1'. Thus 'd1' loses its
   gfid-handle

Fix:
Delete gfid-handle for a directory only when the gfid-handle is stale.

Change-Id: I21265b3bd3852f0967d916aaa21108ae5c9e7373
BUG: 1101143
Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
Reviewed-on: http://review.gluster.org/7879
Reviewed-by: Niels de Vos <ndevos@redhat.com>
Reviewed-by: Xavier Hernandez <xhernandez@datalab.es>
Tested-by: Gluster Build System <jenkins@build.gluster.com>
Reviewed-by: Anand Avati <avati@redhat.com>
This commit is contained in:
Pranith Kumar K 2014-05-23 12:51:28 +05:30 committed by Anand Avati
parent afeaab53f6
commit d240958fb3
4 changed files with 151 additions and 1 deletions

83
tests/bugs/bug-1101143.t Normal file
View File

@ -0,0 +1,83 @@
#!/bin/bash
. $(dirname $0)/../include.rc
. $(dirname $0)/../volume.rc
#This file checks if softlinks are removed or not when rename is done while
#a brick is down in a replica pair.
cleanup;
TEST glusterd
TEST pidof glusterd
TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
TEST $CLI volume set $V0 brick-log-level DEBUG
TEST $CLI volume start $V0
TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0
cd $M0
TEST mkdir -p d1/d2/d3
TEST mkdir -p dir1/dir1
d1_gfid_path=$(gf_get_gfid_backend_file_path $B0/${V0}0 d1)
d1_gfid=$(gf_get_gfid_xattr $B0/${V0}0/d1)
dir1_gfid_path=$(gf_get_gfid_backend_file_path $B0/${V0}0 dir1)
dir1_dir1_gfid_path=$(gf_get_gfid_backend_file_path $B0/${V0}0 dir1/dir1)
d1_d2_d3_old_gfid_path=$(gf_get_gfid_backend_file_path $B0/${V0}0 d1/d2/d3)
d1_d2_d3_gfid=$(gf_get_gfid_xattr $B0/${V0}0/d1/d2/d3)
TEST kill_brick $V0 $H0 $B0/${V0}0
#Rename should not delete gfid-link by janitor
TEST mv d1 d2
#Rmdir should delete gfid-link by janitor
TEST rm -rf dir1
#Stale-link will be created if we do this after self-heal where old-gfid dir will be pointing to dir with different gfid on brick-0
TEST rmdir d2/d2/d3
TEST mkdir d2/d2/d3
TEST $CLI volume set $V0 self-heal-daemon off
TEST $CLI volume start $V0 force
#Wait for the brick to come up
EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
#Now enable self-heal-daemon so that heal will populate landfill
TEST $CLI volume set $V0 self-heal-daemon on
EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
TEST $CLI volume heal $V0 full
EXPECT_WITHIN $HEAL_TIMEOUT "0" afr_get_pending_heal_count $V0
EXPECT_NOT "0" landfill_entry_count $B0/${V0}0
#Janitor thread walks once every 10 minutes, or at the time of brick start
#So lets stop the volume and make some checks.
TEST $CLI volume stop $V0
#Test that it is pointing to gfid which is not the old one
TEST stat $d1_d2_d3_old_gfid_path #Check there is stale link file
new_gfid=$(getfattr -d -m. -e hex $d1_d2_d3_old_gfid_path | grep gfid| cut -f2 -d'=')
#Check the gfids are valid
EXPECT 34 expr length $new_gfid
EXPECT 34 expr length $d1_d2_d3_gfid
EXPECT_NOT $new_gfid echo $d1_d2_d3_gfid
#restart it to make sure janitor wakes up.
TEST $CLI volume start $V0 force
EXPECT_WITHIN $JANITOR_TIMEOUT "0" landfill_entry_count $B0/${V0}0
#After janitor cleans up, check that the directories have their softlinks
d2_gfid_path=$(gf_get_gfid_backend_file_path $B0/${V0}0 d2)
d2_gfid=$(gf_get_gfid_xattr $B0/${V0}0/d2)
EXPECT "$d1_gfid_path" echo $d2_gfid_path
EXPECT "$d1_gfid" echo $d2_gfid
TEST stat $d1_gfid_path
#TODO afr-v2 is not healing sub-directories because heal is not marking
#new-entry changelog?. Will need to fix that. After the fix, the following line
#needs to be un-commented.
#TEST stat $(gf_get_gfid_backend_file_path $B0/${V0}0 d2/d2)
#Check that janitor removed stale links
TEST ! stat $dir1_gfid_path
TEST ! stat $dir1_dir1_gfid_path
TEST ! stat $d1_d2_d3_old_gfid_path #Check stale link file is deleted
cleanup

View File

@ -16,6 +16,7 @@ REBALANCE_TIMEOUT=120
REOPEN_TIMEOUT=20
HEAL_TIMEOUT=60
MARKER_UPDATE_TIMEOUT=20
JANITOR_TIMEOUT=60
statedumpdir=`gluster --print-statedumpdir`; # Default directory for statedump

View File

@ -364,3 +364,8 @@ function afr_get_index_count {
local brick=$1
ls $1/.glusterfs/indices/xattrop | grep -v xattrop | wc -l
}
function landfill_entry_count {
local brick=$1
ls $brick/.glusterfs/landfill | wc -l
}

View File

@ -972,6 +972,67 @@ out:
return ret;
}
static void
del_stale_dir_handle (xlator_t *this, uuid_t gfid)
{
char newpath[PATH_MAX] = {0, };
uuid_t gfid_curr = {0, };
ssize_t size = -1;
gf_boolean_t stale = _gf_false;
char *hpath = NULL;
struct stat stbuf = {0, };
struct iatt iabuf = {0, };
MAKE_HANDLE_GFID_PATH (hpath, this, gfid, NULL);
/* check that it is valid directory handle */
size = sys_lstat (hpath, &stbuf);
if (size < 0) {
gf_log (this->name, GF_LOG_DEBUG, "%s: Handle stat failed: "
"%s", hpath, strerror (errno));
goto out;
}
iatt_from_stat (&iabuf, &stbuf);
if (iabuf.ia_nlink != 1 || !IA_ISLNK (iabuf.ia_type)) {
gf_log (this->name, GF_LOG_DEBUG, "%s: Handle nlink %d %d",
hpath, iabuf.ia_nlink, IA_ISLNK (iabuf.ia_type));
goto out;
}
size = posix_handle_path (this, gfid, NULL, newpath, sizeof (newpath));
if (size <= 0 && errno == ENOENT) {
gf_log (this->name, GF_LOG_DEBUG, "%s: %s", newpath,
strerror (ENOENT));
stale = _gf_true;
goto out;
}
size = sys_lgetxattr (newpath, GFID_XATTR_KEY, gfid_curr, 16);
if (size < 0 && errno == ENOENT) {
gf_log (this->name, GF_LOG_DEBUG, "%s: %s", newpath,
strerror (ENOENT));
stale = _gf_true;
} else if (size == 16 && uuid_compare (gfid, gfid_curr)) {
gf_log (this->name, GF_LOG_DEBUG, "%s: mismatching gfid: %s, "
"at %s", hpath, uuid_utoa (gfid_curr), newpath);
stale = _gf_true;
}
out:
if (stale) {
size = sys_unlink (hpath);
if (size < 0 && errno != ENOENT)
gf_log (this->name, GF_LOG_ERROR, "%s: Failed to "
"remove handle to %s (%s)", hpath, newpath,
strerror (errno));
} else if (size == 16) {
gf_log (this->name, GF_LOG_DEBUG, "%s: Fresh handle for "
"%s with gfid %s", hpath, newpath,
uuid_utoa (gfid_curr));
}
return;
}
static int
janitor_walker (const char *fpath, const struct stat *sb,
@ -1002,7 +1063,7 @@ janitor_walker (const char *fpath, const struct stat *sb,
"removing directory %s", fpath);
rmdir (fpath);
posix_handle_unset (this, stbuf.ia_gfid, NULL);
del_stale_dir_handle (this, stbuf.ia_gfid);
}
break;
}