cluster/dht: Correct min_free_disk behaviour

Problem:
Files were being created in subvol which had less than
min_free_disk available even in the cases where other
subvols with more space were available.

Solution:
Changed the logic to look for subvol which has more
space available.
In cases where all the subvols have lesser than
Min_free_disk available , the one with max space and
atleast one inode is available.

Known Issue: Cannot ensure that first file that is
created right after min-free-value is crossed on a
brick will get created in other brick because disk
usage stat takes some time to update in glusterprocess.
Will fix that as part of another bug.

Change-Id: If3ae0bf5a44f8739ce35b3ee3f191009ddd44455
BUG: 858488
Signed-off-by: Raghavendra Talur <rtalur@redhat.com>
Reviewed-on: http://review.gluster.org/4420
Tested-by: Gluster Build System <jenkins@build.gluster.com>
Reviewed-by: Anand Avati <avati@redhat.com>
This commit is contained in:
Raghavendra Talur 2013-01-24 11:26:37 +05:30 committed by Anand Avati
parent 50f0882051
commit 2a46c8769b
4 changed files with 218 additions and 27 deletions

View File

@ -0,0 +1,114 @@
#!/bin/bash
. $(dirname $0)/../include.rc
. $(dirname $0)/../volume.rc
cleanup;
function pidgrep()
{
ps ax | grep "$1" | awk '{print $1}' | head -1
}
## Start glusterd
TEST glusterd;
TEST pidof glusterd;
TEST $CLI volume info;
## Lets create partitions for bricks
TEST truncate -s 100M $B0/brick1
TEST truncate -s 200M $B0/brick2
TEST LO1=`losetup --find --show $B0/brick1`
TEST mkfs.xfs $LO1
TEST LO2=`losetup --find --show $B0/brick2`
TEST mkfs.xfs $LO2
TEST mkdir -p $B0/${V0}1 $B0/${V0}2
TEST mount -t xfs $LO1 $B0/${V0}1
TEST mount -t xfs $LO2 $B0/${V0}2
## Lets create volume
TEST $CLI volume create $V0 $H0:$B0/${V0}{1,2};
## Verify volume is created
EXPECT "$V0" volinfo_field $V0 'Volume Name';
EXPECT 'Created' volinfo_field $V0 'Status';
## Start volume and verify
TEST $CLI volume start $V0;
EXPECT 'Started' volinfo_field $V0 'Status';
TEST glusterfs -s $H0 --volfile-id=$V0 --acl $M0
MOUNT_PID=`ps ax |grep "glusterfs -s $H0 --volfile-id=$V0 --acl $M0" | awk '{print $1}' | head -1`
## Real test starts here
## ----------------------------------------------------------------------------
MINFREEDISKVALUE=90
## Set min free disk to MINFREEDISKVALUE percent
TEST $CLI volume set $V0 cluster.min-free-disk $MINFREEDISKVALUE
## We need to have file name to brick map based on hash.
## We will use this info in test case 0.
i=1
CONTINUE=2
BRICK1FILE=0
BRICK2FILE=0
while [[ $CONTINUE -ne 0 ]]
do
dd if=/dev/zero of=$M0/file$i.data bs=1024 count=1024 1>/dev/null 2>&1
if [[ -e $B0/${V0}1/file$i.data && $BRICK1FILE = "0" ]]
then
BRICK1FILE=file$i.data
CONTINUE=$CONTINUE-1
fi
if [[ -e $B0/${V0}2/file$i.data && $BRICK2FILE = "0" ]]
then
BRICK2FILE=file$i.data
CONTINUE=$CONTINUE-1
fi
rm $M0/file$i.data
let i++
done
## Bring free space on one of the bricks to less than minfree value by
## creating one big file.
dd if=/dev/zero of=$M0/fillonebrick.data bs=1024 count=25600 1>/dev/null 2>&1
#Lets find out where it was created
if [ -f $B0/${V0}1/fillonebrick.data ]
then
FILETOCREATE=$BRICK1FILE
OTHERBRICK=$B0/${V0}2
else
FILETOCREATE=$BRICK2FILE
OTHERBRICK=$B0/${V0}1
fi
##--------------------------------TEST CASE 0-----------------------------------
## If we try to create a file which should go into full brick as per hash, it
## should go into the other brick instead.
## Before that let us create files just to make gluster refresh the stat
## Using touch so it should not change the disk usage stats
for k in {1..20};
do
touch $M0/dummyfile$k
done
dd if=/dev/zero of=$M0/$FILETOCREATE bs=1024 count=2048 1>/dev/null 2>&1
TEST [ -e $OTHERBRICK/$FILETOCREATE ]
## Done testing, lets clean up
EXPECT "$MOUNT_PID" pidgrep $MOUNT_PID
TEST rm -rf $M0/*
## Finish up
TEST $CLI volume stop $V0;
EXPECT 'Stopped' volinfo_field $V0 'Status';
$CLI volume delete $V0;
cleanup;

View File

@ -168,6 +168,21 @@ function cleanup()
{
killall -15 glusterfs glusterfsd glusterd 2>/dev/null || true;
killall -9 glusterfs glusterfsd glusterd 2>/dev/null || true;
MOUNTPOINTS=`mount | grep "$B0/" | awk '{print $3}'`
for m in $MOUNTPOINTS;
do
umount $m
done
LOOPDEVICES=`losetup -a | grep "$B0/" | awk '{print $1}' | tr -d :`
for l in $LOOPDEVICES;
do
losetup -d $l
done
rm -rf /var/lib/glusterd/* $B0/* /etc/glusterd/*;
umount -l $M0 2>/dev/null || true;

View File

@ -724,4 +724,8 @@ int
dht_dir_has_layout (dict_t *xattr);
gf_boolean_t
dht_is_subvol_in_layout (dht_layout_t *layout, xlator_t *xlator);
xlator_t *
dht_subvol_with_free_space_inodes (xlator_t *this, xlator_t *subvol);
xlator_t *
dht_subvol_maxspace_nonzeroinode (xlator_t *this, xlator_t *subvol);
#endif/* _DHT_H */

View File

@ -248,12 +248,11 @@ dht_is_subvol_filled (xlator_t *this, xlator_t *subvol)
return is_subvol_filled;
}
/*Get the best subvolume to create the file in*/
xlator_t *
dht_free_disk_available_subvol (xlator_t *this, xlator_t *subvol)
{
int i = 0;
double max = 0;
double max_inodes = 0;
xlator_t *avail_subvol = NULL;
dht_conf_t *conf = NULL;
@ -261,37 +260,96 @@ dht_free_disk_available_subvol (xlator_t *this, xlator_t *subvol)
LOCK (&conf->subvolume_lock);
{
for (i = 0; i < conf->subvolume_cnt; i++) {
if (conf->disk_unit == 'p') {
if ((conf->du_stats[i].avail_percent > max)
&& (conf->du_stats[i].avail_inodes > max_inodes)) {
max = conf->du_stats[i].avail_percent;
max_inodes = conf->du_stats[i].avail_inodes;
avail_subvol = conf->subvolumes[i];
}
} else {
if ((conf->du_stats[i].avail_space > max)
&& (conf->du_stats[i].avail_inodes > max_inodes)) {
max = conf->du_stats[i].avail_space;
max_inodes = conf->du_stats[i].avail_inodes;
avail_subvol = conf->subvolumes[i];
}
avail_subvol = dht_subvol_with_free_space_inodes(this, subvol);
if(!avail_subvol)
{
avail_subvol = dht_subvol_maxspace_nonzeroinode(this,
subvol);
}
}
}
}
UNLOCK (&conf->subvolume_lock);
if (!avail_subvol) {
gf_log (this->name, GF_LOG_DEBUG,
"no subvolume has enough free space and inodes to create");
gf_log (this->name,
GF_LOG_DEBUG,
"no subvolume has enough free space and/or inodes\
to create");
avail_subvol = subvol;
}
if ((max < conf->min_free_disk) && (max_inodes < conf->min_free_inodes))
avail_subvol = subvol;
if (!avail_subvol)
avail_subvol = subvol;
return avail_subvol;
}
/*Get subvolume which has both space and inodes more than the min criteria*/
xlator_t *
dht_subvol_with_free_space_inodes(xlator_t *this, xlator_t *subvol)
{
int i = 0;
double max = 0;
double max_inodes = 0;
xlator_t *avail_subvol = NULL;
dht_conf_t *conf = NULL;
conf = this->private;
for(i=0; i < conf->subvolume_cnt; i++) {
if ((conf->disk_unit == 'p') &&
(conf->du_stats[i].avail_percent > conf->min_free_disk) &&
(conf->du_stats[i].avail_inodes > conf->min_free_inodes)) {
if ((conf->du_stats[i].avail_inodes > max_inodes) ||
(conf->du_stats[i].avail_percent > max)) {
max = conf->du_stats[i].avail_percent;
max_inodes = conf->du_stats[i].avail_inodes;
avail_subvol = conf->subvolumes[i];
}
}
if ((conf->disk_unit != 'p') &&
(conf->du_stats[i].avail_space > conf->min_free_disk) &&
(conf->du_stats[i].avail_inodes > conf->min_free_inodes)) {
if ((conf->du_stats[i].avail_inodes > max_inodes) ||
(conf->du_stats[i].avail_space > max)) {
max = conf->du_stats[i].avail_space;
max_inodes = conf->du_stats[i].avail_inodes;
avail_subvol = conf->subvolumes[i];
}
}
}
return avail_subvol;
}
/* Get subvol which has atleast one inode and maximum space */
xlator_t *
dht_subvol_maxspace_nonzeroinode (xlator_t *this, xlator_t *subvol)
{
int i = 0;
double max = 0;
xlator_t *avail_subvol = NULL;
dht_conf_t *conf = NULL;
conf = this->private;
for (i = 0; i < conf->subvolume_cnt; i++) {
if (conf->disk_unit == 'p') {
if ((conf->du_stats[i].avail_percent > max)
&& (conf->du_stats[i].avail_inodes > 0 )) {
max = conf->du_stats[i].avail_percent;
avail_subvol = conf->subvolumes[i];
}
} else {
if ((conf->du_stats[i].avail_space > max)
&& (conf->du_stats[i].avail_inodes > 0)) {
max = conf->du_stats[i].avail_space;
avail_subvol = conf->subvolumes[i];
}
}
}
return avail_subvol;
}