cluster/dht: Fix min-free-disk calculations when quota-deem-statfs is on
PROBLEM: As part of file creation, DHT sends a statfs call to all of its sub-volumes and expects in return the local space consumption and availability on each one of them. This information is used by DHT to ensure that atleast min-free-disk amount of space is left on each sub-volume in the event that there ARE other sub-volumes with more space available. But when quota-deem-statfs is enabled, quota xlator on every brick unwinds the statfs call with volume-wide consumption of disk space. This leads to miscalculation in min-free-disk algo, thereby misleading DHT at some point, into thinking all sub-volumes have equal available space, in which case DHT keeps sending new file creates to subvol-0, causing it to become 100% full at some point although there ARE other subvols with ample space available. FIX: The fix is to make quota_statfs() behave as if quota xlator weren't enabled, thereby making every brick return only its local consumption and disk space availability. Change-Id: I211371a1eddb220037bd36a128973938ea8124c2 BUG: 1099890 Signed-off-by: Krutika Dhananjay <kdhananj@redhat.com> Reviewed-on: http://review.gluster.org/7845 Tested-by: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Raghavendra G <rgowdapp@redhat.com> Reviewed-by: Vijay Bellur <vbellur@redhat.com>
This commit is contained in:
parent
0d26de1b0a
commit
db022ef7ec
@ -89,6 +89,7 @@
|
||||
#define GF_XATTR_USER_PATHINFO_KEY "glusterfs.pathinfo"
|
||||
#define QUOTA_LIMIT_KEY "trusted.glusterfs.quota.limit-set"
|
||||
#define VIRTUAL_QUOTA_XATTR_CLEANUP_KEY "glusterfs.quota-xattr-cleanup"
|
||||
#define GF_INTERNAL_IGNORE_DEEM_STATFS "ignore-deem-statfs"
|
||||
|
||||
#define GF_READDIR_SKIP_DIRS "readdir-filter-directories"
|
||||
|
||||
|
120
tests/bugs/bug-1099890.t
Normal file
120
tests/bugs/bug-1099890.t
Normal file
@ -0,0 +1,120 @@
|
||||
#!/bin/bash
|
||||
|
||||
. $(dirname $0)/../include.rc
|
||||
. $(dirname $0)/../volume.rc
|
||||
. $(dirname $0)/../dht.rc
|
||||
|
||||
## TO-DO: Fix the following once the dht du refresh interval issue is fixed:
|
||||
## 1. Do away with sleep(1).
|
||||
## 2. Do away with creation of empty files.
|
||||
|
||||
cleanup;
|
||||
|
||||
TEST glusterd;
|
||||
TEST pidof glusterd;
|
||||
|
||||
# Create 2 loop devices, one per brick.
|
||||
TEST truncate -s 100M $B0/brick1
|
||||
TEST truncate -s 100M $B0/brick2
|
||||
|
||||
TEST L1=`losetup --find --show $B0/brick1`
|
||||
TEST mkfs.xfs $L1
|
||||
|
||||
TEST L2=`losetup --find --show $B0/brick2`
|
||||
TEST mkfs.xfs $L2
|
||||
|
||||
TEST mkdir -p $B0/${V0}{1,2}
|
||||
|
||||
TEST mount -t xfs $L1 $B0/${V0}1
|
||||
TEST mount -t xfs $L2 $B0/${V0}2
|
||||
|
||||
# Create a plain distribute volume with 2 subvols.
|
||||
TEST $CLI volume create $V0 $H0:$B0/${V0}{1,2};
|
||||
|
||||
TEST $CLI volume start $V0;
|
||||
EXPECT "Started" volinfo_field $V0 'Status';
|
||||
|
||||
TEST $CLI volume quota $V0 enable;
|
||||
|
||||
TEST $CLI volume set $V0 features.quota-deem-statfs on
|
||||
|
||||
TEST $CLI volume quota $V0 limit-usage / 150MB;
|
||||
|
||||
TEST $CLI volume set $V0 cluster.min-free-disk 50%
|
||||
|
||||
TEST glusterfs -s $H0 --volfile-id=$V0 $M0
|
||||
|
||||
# Make sure quota-deem-statfs is working as expected
|
||||
EXPECT "150M" echo `df -h $M0 -P | tail -1 | awk {'print $2'}`
|
||||
|
||||
# Create a new file 'foo' under the root of the volume, which hashes to subvol-0
|
||||
# of DHT, that consumes 40M
|
||||
TEST dd if=/dev/zero of=$M0/foo bs=5M count=8
|
||||
|
||||
TEST stat $B0/${V0}1/foo
|
||||
TEST ! stat $B0/${V0}2/foo
|
||||
|
||||
# Create a new file 'bar' under the root of the volume, which hashes to subvol-1
|
||||
# of DHT, that consumes 40M
|
||||
TEST dd if=/dev/zero of=$M0/bar bs=5M count=8
|
||||
|
||||
TEST ! stat $B0/${V0}1/bar
|
||||
TEST stat $B0/${V0}2/bar
|
||||
|
||||
# Touch a zero-byte file on the root of the volume to make sure the statfs data
|
||||
# on DHT is refreshed
|
||||
sleep 1;
|
||||
TEST touch $M0/empty1;
|
||||
|
||||
# At this point, the available space on each subvol {60M,60M} is greater than
|
||||
# their min-free-disk {50M,50M}, but if this bug still exists, then
|
||||
# the total available space on the volume as perceived by DHT should be less
|
||||
# than min-free-disk, i.e.,
|
||||
#
|
||||
# consumed space returned per subvol by quota = (40M + 40M) = 80M
|
||||
#
|
||||
# Therefore, consumed space per subvol computed by DHT WITHOUT the fix would be:
|
||||
# (80M/150M)*100 = 53%
|
||||
#
|
||||
# Available space per subvol as perceived by DHT with the bug = 47%
|
||||
# which is less than min-free-disk
|
||||
|
||||
# Now I create a file that hashes to subvol-1 (counting from 0) of DHT.
|
||||
# If this bug still exists,then DHT should be routing this creation to subvol-0.
|
||||
# If this bug is fixed, then DHT should be routing the creation to subvol-1 only
|
||||
# as it has more than min-free-disk space available.
|
||||
|
||||
TEST dd if=/dev/zero of=$M0/file bs=1K count=1
|
||||
sleep 1;
|
||||
TEST ! stat $B0/${V0}1/file
|
||||
TEST stat $B0/${V0}2/file
|
||||
|
||||
# Touch another zero-byte file on the root of the volume to refresh statfs
|
||||
# values stored by DHT.
|
||||
|
||||
TEST touch $M0/empty2;
|
||||
|
||||
# Now I create a new file that hashes to subvol-0, at the end of which, there
|
||||
# will be less than min-free-disk space available on it.
|
||||
TEST dd if=/dev/zero of=$M0/fil bs=5M count=4
|
||||
sleep 1;
|
||||
TEST stat $B0/${V0}1/fil
|
||||
TEST ! stat $B0/${V0}2/fil
|
||||
|
||||
# Touch to refresh statfs info cached by DHT
|
||||
|
||||
TEST touch $M0/empty3;
|
||||
|
||||
# Now I create a file that hashes to subvol-0 but since it has less than
|
||||
# min-free-disk space available, its data will be cached on subvol-1.
|
||||
|
||||
TEST dd if=/dev/zero of=$M0/zz bs=5M count=1
|
||||
|
||||
TEST stat $B0/${V0}1/zz
|
||||
TEST stat $B0/${V0}2/zz
|
||||
|
||||
EXPECT "$V0-client-1" dht_get_linkto_target "$B0/${V0}1/zz"
|
||||
|
||||
EXPECT "1" is_dht_linkfile "$B0/${V0}1/zz"
|
||||
|
||||
cleanup;
|
19
tests/dht.rc
19
tests/dht.rc
@ -91,3 +91,22 @@ function remove_brick_completed()
|
||||
echo $val
|
||||
return $val
|
||||
}
|
||||
|
||||
function dht_get_linkto_target()
|
||||
{
|
||||
local path=$1;
|
||||
echo `getfattr -d -m . -e text --only-values --absolute-names --name=trusted.glusterfs.dht.linkto $path`
|
||||
}
|
||||
|
||||
function is_dht_linkfile()
|
||||
{
|
||||
local path=$1
|
||||
retval=0
|
||||
local output=`stat --format=%a $path`
|
||||
if [ $output -eq 1000 ]; then
|
||||
retval=1
|
||||
fi
|
||||
|
||||
echo $retval
|
||||
return $retval
|
||||
}
|
||||
|
@ -135,6 +135,7 @@ int
|
||||
dht_get_du_info (call_frame_t *frame, xlator_t *this, loc_t *loc)
|
||||
{
|
||||
int i = 0;
|
||||
int ret = -1;
|
||||
dht_conf_t *conf = NULL;
|
||||
call_frame_t *statfs_frame = NULL;
|
||||
dht_local_t *statfs_local = NULL;
|
||||
@ -164,12 +165,25 @@ dht_get_du_info (call_frame_t *frame, xlator_t *this, loc_t *loc)
|
||||
goto err;
|
||||
}
|
||||
|
||||
statfs_local->params = dict_new ();
|
||||
if (!statfs_local->params)
|
||||
goto err;
|
||||
|
||||
ret = dict_set_int8 (statfs_local->params,
|
||||
GF_INTERNAL_IGNORE_DEEM_STATFS, 1);
|
||||
if (ret) {
|
||||
gf_log (this->name, GF_LOG_ERROR,
|
||||
"Failed to set "
|
||||
GF_INTERNAL_IGNORE_DEEM_STATFS" in dict");
|
||||
goto err;
|
||||
}
|
||||
|
||||
statfs_local->call_cnt = conf->subvolume_cnt;
|
||||
for (i = 0; i < conf->subvolume_cnt; i++) {
|
||||
STACK_WIND (statfs_frame, dht_du_info_cbk,
|
||||
conf->subvolumes[i],
|
||||
conf->subvolumes[i]->fops->statfs,
|
||||
&tmp_loc, NULL);
|
||||
&tmp_loc, statfs_local->params);
|
||||
}
|
||||
|
||||
conf->last_stat_fetch.tv_sec = tv.tv_sec;
|
||||
|
@ -3808,16 +3808,24 @@ out:
|
||||
int32_t
|
||||
quota_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
|
||||
{
|
||||
quota_local_t *local = NULL;
|
||||
int op_errno = 0;
|
||||
int ret = -1;
|
||||
quota_priv_t *priv = NULL;
|
||||
int op_errno = 0;
|
||||
int ret = -1;
|
||||
int8_t ignore_deem_statfs = 0;
|
||||
quota_priv_t *priv = NULL;
|
||||
quota_local_t *local = NULL;
|
||||
|
||||
priv = this->private;
|
||||
GF_ASSERT (loc);
|
||||
|
||||
WIND_IF_QUOTAOFF (priv->is_quota_on, off);
|
||||
|
||||
ret = dict_get_int8 (xdata, GF_INTERNAL_IGNORE_DEEM_STATFS,
|
||||
&ignore_deem_statfs);
|
||||
ret = 0;
|
||||
|
||||
if (ignore_deem_statfs)
|
||||
goto off;
|
||||
|
||||
if (priv->consider_statfs && loc->inode) {
|
||||
local = quota_local_new ();
|
||||
if (!local) {
|
||||
|
Loading…
x
Reference in New Issue
Block a user