cluster/dht: Correct min_free_disk behaviour
Problem: Files were being created in subvol which had less than min_free_disk available even in the cases where other subvols with more space were available. Solution: Changed the logic to look for subvol which has more space available. In cases where all the subvols have lesser than Min_free_disk available , the one with max space and atleast one inode is available. Known Issue: Cannot ensure that first file that is created right after min-free-value is crossed on a brick will get created in other brick because disk usage stat takes some time to update in glusterprocess. Will fix that as part of another bug. Change-Id: If3ae0bf5a44f8739ce35b3ee3f191009ddd44455 BUG: 858488 Signed-off-by: Raghavendra Talur <rtalur@redhat.com> Reviewed-on: http://review.gluster.org/4420 Tested-by: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Anand Avati <avati@redhat.com>
This commit is contained in:
parent
50f0882051
commit
2a46c8769b
114
tests/bugs/bug-858488-min-free-disk.t
Normal file
114
tests/bugs/bug-858488-min-free-disk.t
Normal file
@ -0,0 +1,114 @@
|
||||
#!/bin/bash
|
||||
|
||||
. $(dirname $0)/../include.rc
|
||||
. $(dirname $0)/../volume.rc
|
||||
|
||||
cleanup;
|
||||
|
||||
function pidgrep()
|
||||
{
|
||||
ps ax | grep "$1" | awk '{print $1}' | head -1
|
||||
}
|
||||
|
||||
## Start glusterd
|
||||
TEST glusterd;
|
||||
TEST pidof glusterd;
|
||||
TEST $CLI volume info;
|
||||
|
||||
## Lets create partitions for bricks
|
||||
TEST truncate -s 100M $B0/brick1
|
||||
TEST truncate -s 200M $B0/brick2
|
||||
TEST LO1=`losetup --find --show $B0/brick1`
|
||||
TEST mkfs.xfs $LO1
|
||||
TEST LO2=`losetup --find --show $B0/brick2`
|
||||
TEST mkfs.xfs $LO2
|
||||
TEST mkdir -p $B0/${V0}1 $B0/${V0}2
|
||||
TEST mount -t xfs $LO1 $B0/${V0}1
|
||||
TEST mount -t xfs $LO2 $B0/${V0}2
|
||||
|
||||
|
||||
## Lets create volume
|
||||
TEST $CLI volume create $V0 $H0:$B0/${V0}{1,2};
|
||||
|
||||
## Verify volume is created
|
||||
EXPECT "$V0" volinfo_field $V0 'Volume Name';
|
||||
EXPECT 'Created' volinfo_field $V0 'Status';
|
||||
|
||||
## Start volume and verify
|
||||
TEST $CLI volume start $V0;
|
||||
EXPECT 'Started' volinfo_field $V0 'Status';
|
||||
TEST glusterfs -s $H0 --volfile-id=$V0 --acl $M0
|
||||
MOUNT_PID=`ps ax |grep "glusterfs -s $H0 --volfile-id=$V0 --acl $M0" | awk '{print $1}' | head -1`
|
||||
## Real test starts here
|
||||
## ----------------------------------------------------------------------------
|
||||
|
||||
MINFREEDISKVALUE=90
|
||||
|
||||
## Set min free disk to MINFREEDISKVALUE percent
|
||||
TEST $CLI volume set $V0 cluster.min-free-disk $MINFREEDISKVALUE
|
||||
|
||||
## We need to have file name to brick map based on hash.
|
||||
## We will use this info in test case 0.
|
||||
i=1
|
||||
CONTINUE=2
|
||||
BRICK1FILE=0
|
||||
BRICK2FILE=0
|
||||
while [[ $CONTINUE -ne 0 ]]
|
||||
do
|
||||
dd if=/dev/zero of=$M0/file$i.data bs=1024 count=1024 1>/dev/null 2>&1
|
||||
|
||||
if [[ -e $B0/${V0}1/file$i.data && $BRICK1FILE = "0" ]]
|
||||
then
|
||||
BRICK1FILE=file$i.data
|
||||
CONTINUE=$CONTINUE-1
|
||||
fi
|
||||
|
||||
if [[ -e $B0/${V0}2/file$i.data && $BRICK2FILE = "0" ]]
|
||||
then
|
||||
BRICK2FILE=file$i.data
|
||||
CONTINUE=$CONTINUE-1
|
||||
fi
|
||||
|
||||
rm $M0/file$i.data
|
||||
let i++
|
||||
done
|
||||
|
||||
|
||||
## Bring free space on one of the bricks to less than minfree value by
|
||||
## creating one big file.
|
||||
dd if=/dev/zero of=$M0/fillonebrick.data bs=1024 count=25600 1>/dev/null 2>&1
|
||||
|
||||
#Lets find out where it was created
|
||||
if [ -f $B0/${V0}1/fillonebrick.data ]
|
||||
then
|
||||
FILETOCREATE=$BRICK1FILE
|
||||
OTHERBRICK=$B0/${V0}2
|
||||
else
|
||||
FILETOCREATE=$BRICK2FILE
|
||||
OTHERBRICK=$B0/${V0}1
|
||||
fi
|
||||
|
||||
##--------------------------------TEST CASE 0-----------------------------------
|
||||
## If we try to create a file which should go into full brick as per hash, it
|
||||
## should go into the other brick instead.
|
||||
|
||||
## Before that let us create files just to make gluster refresh the stat
|
||||
## Using touch so it should not change the disk usage stats
|
||||
for k in {1..20};
|
||||
do
|
||||
touch $M0/dummyfile$k
|
||||
done
|
||||
|
||||
dd if=/dev/zero of=$M0/$FILETOCREATE bs=1024 count=2048 1>/dev/null 2>&1
|
||||
TEST [ -e $OTHERBRICK/$FILETOCREATE ]
|
||||
|
||||
## Done testing, lets clean up
|
||||
EXPECT "$MOUNT_PID" pidgrep $MOUNT_PID
|
||||
TEST rm -rf $M0/*
|
||||
|
||||
## Finish up
|
||||
TEST $CLI volume stop $V0;
|
||||
EXPECT 'Stopped' volinfo_field $V0 'Status';
|
||||
$CLI volume delete $V0;
|
||||
|
||||
cleanup;
|
@ -168,6 +168,21 @@ function cleanup()
|
||||
{
|
||||
killall -15 glusterfs glusterfsd glusterd 2>/dev/null || true;
|
||||
killall -9 glusterfs glusterfsd glusterd 2>/dev/null || true;
|
||||
|
||||
MOUNTPOINTS=`mount | grep "$B0/" | awk '{print $3}'`
|
||||
for m in $MOUNTPOINTS;
|
||||
do
|
||||
umount $m
|
||||
done
|
||||
|
||||
|
||||
LOOPDEVICES=`losetup -a | grep "$B0/" | awk '{print $1}' | tr -d :`
|
||||
for l in $LOOPDEVICES;
|
||||
do
|
||||
losetup -d $l
|
||||
done
|
||||
|
||||
|
||||
rm -rf /var/lib/glusterd/* $B0/* /etc/glusterd/*;
|
||||
|
||||
umount -l $M0 2>/dev/null || true;
|
||||
|
@ -724,4 +724,8 @@ int
|
||||
dht_dir_has_layout (dict_t *xattr);
|
||||
gf_boolean_t
|
||||
dht_is_subvol_in_layout (dht_layout_t *layout, xlator_t *xlator);
|
||||
xlator_t *
|
||||
dht_subvol_with_free_space_inodes (xlator_t *this, xlator_t *subvol);
|
||||
xlator_t *
|
||||
dht_subvol_maxspace_nonzeroinode (xlator_t *this, xlator_t *subvol);
|
||||
#endif/* _DHT_H */
|
||||
|
@ -248,12 +248,11 @@ dht_is_subvol_filled (xlator_t *this, xlator_t *subvol)
|
||||
return is_subvol_filled;
|
||||
}
|
||||
|
||||
|
||||
/*Get the best subvolume to create the file in*/
|
||||
xlator_t *
|
||||
dht_free_disk_available_subvol (xlator_t *this, xlator_t *subvol)
|
||||
{
|
||||
int i = 0;
|
||||
double max = 0;
|
||||
double max_inodes = 0;
|
||||
xlator_t *avail_subvol = NULL;
|
||||
dht_conf_t *conf = NULL;
|
||||
|
||||
@ -261,37 +260,96 @@ dht_free_disk_available_subvol (xlator_t *this, xlator_t *subvol)
|
||||
|
||||
LOCK (&conf->subvolume_lock);
|
||||
{
|
||||
for (i = 0; i < conf->subvolume_cnt; i++) {
|
||||
if (conf->disk_unit == 'p') {
|
||||
if ((conf->du_stats[i].avail_percent > max)
|
||||
&& (conf->du_stats[i].avail_inodes > max_inodes)) {
|
||||
max = conf->du_stats[i].avail_percent;
|
||||
max_inodes = conf->du_stats[i].avail_inodes;
|
||||
avail_subvol = conf->subvolumes[i];
|
||||
}
|
||||
} else {
|
||||
if ((conf->du_stats[i].avail_space > max)
|
||||
&& (conf->du_stats[i].avail_inodes > max_inodes)) {
|
||||
max = conf->du_stats[i].avail_space;
|
||||
max_inodes = conf->du_stats[i].avail_inodes;
|
||||
avail_subvol = conf->subvolumes[i];
|
||||
}
|
||||
avail_subvol = dht_subvol_with_free_space_inodes(this, subvol);
|
||||
if(!avail_subvol)
|
||||
{
|
||||
avail_subvol = dht_subvol_maxspace_nonzeroinode(this,
|
||||
subvol);
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
UNLOCK (&conf->subvolume_lock);
|
||||
|
||||
if (!avail_subvol) {
|
||||
gf_log (this->name, GF_LOG_DEBUG,
|
||||
"no subvolume has enough free space and inodes to create");
|
||||
gf_log (this->name,
|
||||
GF_LOG_DEBUG,
|
||||
"no subvolume has enough free space and/or inodes\
|
||||
to create");
|
||||
avail_subvol = subvol;
|
||||
}
|
||||
|
||||
if ((max < conf->min_free_disk) && (max_inodes < conf->min_free_inodes))
|
||||
avail_subvol = subvol;
|
||||
|
||||
if (!avail_subvol)
|
||||
avail_subvol = subvol;
|
||||
|
||||
return avail_subvol;
|
||||
}
|
||||
|
||||
/*Get subvolume which has both space and inodes more than the min criteria*/
|
||||
xlator_t *
|
||||
dht_subvol_with_free_space_inodes(xlator_t *this, xlator_t *subvol)
|
||||
{
|
||||
int i = 0;
|
||||
double max = 0;
|
||||
double max_inodes = 0;
|
||||
|
||||
xlator_t *avail_subvol = NULL;
|
||||
dht_conf_t *conf = NULL;
|
||||
|
||||
conf = this->private;
|
||||
|
||||
for(i=0; i < conf->subvolume_cnt; i++) {
|
||||
if ((conf->disk_unit == 'p') &&
|
||||
(conf->du_stats[i].avail_percent > conf->min_free_disk) &&
|
||||
(conf->du_stats[i].avail_inodes > conf->min_free_inodes)) {
|
||||
if ((conf->du_stats[i].avail_inodes > max_inodes) ||
|
||||
(conf->du_stats[i].avail_percent > max)) {
|
||||
max = conf->du_stats[i].avail_percent;
|
||||
max_inodes = conf->du_stats[i].avail_inodes;
|
||||
avail_subvol = conf->subvolumes[i];
|
||||
}
|
||||
}
|
||||
|
||||
if ((conf->disk_unit != 'p') &&
|
||||
(conf->du_stats[i].avail_space > conf->min_free_disk) &&
|
||||
(conf->du_stats[i].avail_inodes > conf->min_free_inodes)) {
|
||||
if ((conf->du_stats[i].avail_inodes > max_inodes) ||
|
||||
(conf->du_stats[i].avail_space > max)) {
|
||||
max = conf->du_stats[i].avail_space;
|
||||
max_inodes = conf->du_stats[i].avail_inodes;
|
||||
avail_subvol = conf->subvolumes[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return avail_subvol;
|
||||
}
|
||||
|
||||
|
||||
/* Get subvol which has atleast one inode and maximum space */
|
||||
xlator_t *
|
||||
dht_subvol_maxspace_nonzeroinode (xlator_t *this, xlator_t *subvol)
|
||||
{
|
||||
int i = 0;
|
||||
double max = 0;
|
||||
|
||||
xlator_t *avail_subvol = NULL;
|
||||
dht_conf_t *conf = NULL;
|
||||
|
||||
conf = this->private;
|
||||
|
||||
for (i = 0; i < conf->subvolume_cnt; i++) {
|
||||
if (conf->disk_unit == 'p') {
|
||||
if ((conf->du_stats[i].avail_percent > max)
|
||||
&& (conf->du_stats[i].avail_inodes > 0 )) {
|
||||
max = conf->du_stats[i].avail_percent;
|
||||
avail_subvol = conf->subvolumes[i];
|
||||
}
|
||||
} else {
|
||||
if ((conf->du_stats[i].avail_space > max)
|
||||
&& (conf->du_stats[i].avail_inodes > 0)) {
|
||||
max = conf->du_stats[i].avail_space;
|
||||
avail_subvol = conf->subvolumes[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return avail_subvol;
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user