cluster/distribute: If cached_subvol is down, return ENOTCONN in lookup

When we follow a linkfile, and the lookup returns a ENOTCONN error, return
the error, as the cached subvol is down, and lookup_everywhere wont succeed,
but actually ends up clearing the linkfile, and clearing the namespace.

Change-Id: I772bf71531bc646e8fb62d3e8549a5fe0f3896da
BUG: 893378
Signed-off-by: shishir gowda <sgowda@redhat.com>
Reviewed-on: http://review.gluster.org/4383
Tested-by: Gluster Build System <jenkins@build.gluster.com>
Reviewed-by: Anand Avati <avati@redhat.com>
This commit is contained in:
shishir gowda 2013-01-15 00:35:21 +05:30 committed by Anand Avati
parent e95e8f8715
commit 9f7ff7df94
2 changed files with 81 additions and 1 deletions

71
tests/bugs/bug-893378.t Executable file
View File

@ -0,0 +1,71 @@
#!/bin/bash
. $(dirname $0)/../include.rc
cleanup;
BRICK_COUNT=3
function file_has_linkfile()
{
i=0
j=0
while [ $i -lt $BRICK_COUNT ]
do
stat=`stat $B0/${V0}$i/$1 2>/dev/null`
if [ $? -eq 0 ]
then
let j++
let "BRICK${j}=$i"
fi
let i++
done
return $j
}
function get_cached_brick()
{
i=1
while [ $i -lt 3 ]
do
test=`getfattr -n trusted.glusterfs.dht.linkto -e text $B0/${V0}$BRICK$i 2>&1`
if [ $? -eq 1 ]
then
cached=$BRICK"$i"
i=$(( $i+3 ))
fi
let i++
done
return $cached
}
TEST glusterd
TEST pidof glusterd
TEST $CLI volume create $V0 $H0:$B0/${V0}0 $H0:$B0/${V0}1 $H0:$B0/${V0}2
TEST $CLI volume start $V0
## Mount FUSE
TEST glusterfs --attribute-timeout=0 --entry-timeout=0 -s $H0 --volfile-id $V0 $M0;
## create a linkfile on subvolume 0
TEST touch $M0/1
TEST mv $M0/1 $M0/2
file_has_linkfile 2
has_link=$?
if [ $has_link -eq 2 ]
then
get_cached_brick
CACHED=$?
# Kill a brick process
kill -9 `cat /var/lib/glusterd/vols/$V0/run/$H0-d-backends-${V0}$CACHED.pid`;
fi
## trigger a lookup
ls -l $M0/2 2>/dev/null
## fail dd if file exists.
dd if=/dev/zero of=$M0/2 bs=1 count=1 conv=excl 2>/dev/null
EXPECT "1" echo $?

View File

@ -1095,7 +1095,16 @@ dht_lookup_linkfile_cbk (call_frame_t *frame, void *cookie,
gf_log (this->name, GF_LOG_INFO,
"lookup of %s on %s (following linkfile) failed (%s)",
local->loc.path, subvol->name, strerror (op_errno));
goto err;
/* If cached subvol returned ENOTCONN, do not do
lookup_everywhere. We need to make sure linkfile does not get
removed, which can take away the namespace, and subvol is
anyways down. */
if (op_errno != ENOTCONN)
goto err;
else
goto unwind;
}
if (check_is_dir (inode, stbuf, xattr)) {