dht: support auto-NUFA option

Many people have asked for behavior like the old NUFA, which builds and
seems to run but was previously impossible to enable/configure in a
standard way.  This change allows NUFA to be enabled instead of DHT from
the command line, with automatic selection of the local subvolume on each
host.

Change-Id: I0065938db3922361fd450a6c1919a4cbbf6f202e
BUG: 882278
Signed-off-by: Jeff Darcy <jdarcy@redhat.com>
Reviewed-on: http://review.gluster.org/4234
Tested-by: Gluster Build System <jenkins@build.gluster.com>
Reviewed-by: Anand Avati <avati@redhat.com>
This commit is contained in:
Jeff Darcy 2012-11-26 16:36:42 -05:00 committed by Anand Avati
parent 48a1738467
commit 86b01a278b
4 changed files with 156 additions and 12 deletions

72
tests/bugs/bug-882278.t Executable file
View File

@ -0,0 +1,72 @@
#!/bin/bash
. $(dirname $0)/../include.rc
cleanup
# Is there a good reason to require --fqdn elsewhere? It's worse than useless
# here.
H0=$(hostname -s)
function recreate {
# The rm is necessary so we don't get fooled by leftovers from old runs.
rm -rf $1 && mkdir -p $1
}
function count_lines {
grep "$1" $2/* | wc -l
}
TEST glusterd
TEST pidof glusterd
TEST $CLI volume info;
## Start and create a volume
TEST recreate ${B0}/${V0}-0
TEST recreate ${B0}/${V0}-1
TEST $CLI volume create $V0 $H0:$B0/${V0}-{0,1}
TEST $CLI volume set $V0 cluster.nufa on
function volinfo_field()
{
local vol=$1;
local field=$2;
$CLI volume info $vol | grep "^$field: " | sed 's/.*: //';
}
## Verify volume is created
EXPECT "$V0" volinfo_field $V0 'Volume Name';
EXPECT 'Created' volinfo_field $V0 'Status';
## Start volume and verify
TEST $CLI volume start $V0;
EXPECT 'Started' volinfo_field $V0 'Status';
## Mount native
special_option="--xlator-option ${V0}-dht.local-volume-name=${V0}-client-1"
TEST glusterfs --volfile-server=$H0 --volfile-id=$V0 $special_option $M0
## Create a bunch of test files.
for i in $(seq 0 99); do
echo hello > $(printf $M0/file%02d $i)
done
## Make sure the files went to the right place. There might be link files in
## the other brick, but they won't have any contents.
EXPECT "0" count_lines hello ${B0}/${V0}-0
EXPECT "100" count_lines hello ${B0}/${V0}-1
if [ "$EXIT_EARLY" = "1" ]; then
exit 0;
fi
## Finish up
TEST umount $M0;
TEST $CLI volume stop $V0;
EXPECT 'Stopped' volinfo_field $V0 'Status';
TEST $CLI volume delete $V0;
TEST ! $CLI volume info $V0;
cleanup;

View File

@ -39,6 +39,9 @@ function test_footer()
echo "ok $t";
else
echo "not ok $t";
if [ "$EXIT_EARLY" = "1" ]; then
exit $RET
fi
fi
dbg "RESULT $t: $RET";

View File

@ -491,6 +491,26 @@ fini (xlator_t *this)
return;
}
gf_boolean_t
same_first_part (char *str1, char term1, char *str2, char term2)
{
gf_boolean_t ended1;
gf_boolean_t ended2;
for (;;) {
ended1 = ((*str1 == '\0') || (*str1 == term1));
ended2 = ((*str2 == '\0') || (*str2 == term2));
if (ended1 && ended2) {
return _gf_true;
}
if (ended1 || ended2 || (*str1 != *str2)) {
return _gf_false;
}
++str1;
++str2;
}
}
int
init (xlator_t *this)
{
@ -504,6 +524,9 @@ init (xlator_t *this)
char my_hostname[256];
double temp_free_disk = 0;
uint64_t size = 0;
xlator_t *local_subvol = NULL;
char *brick_host = NULL;
xlator_t *kid = NULL;
if (!this->children) {
gf_log (this->name, GF_LOG_CRITICAL,
@ -562,23 +585,58 @@ init (xlator_t *this)
local_volname = data->data;
}
trav = this->children;
while (trav) {
for (trav = this->children; trav; trav = trav->next) {
if (strcmp (trav->xlator->name, local_volname) == 0)
break;
trav = trav->next;
if (local_subvol) {
continue;
}
kid = trav->xlator;
for (;;) {
if (dict_get_str(trav->xlator->options,"remote-host",
&brick_host) == 0) {
/* Found it. */
break;
}
if (!kid->children) {
/* Nowhere further to look. */
gf_log (this->name, GF_LOG_ERROR,
"could not get remote-host");
goto err;
}
if (kid->children->next) {
/* Multiple choices, can't/shouldn't decide. */
gf_log (this->name, GF_LOG_ERROR,
"NUFA found fan-out (type %s) volume",
kid->type);
goto err;
}
/* One-to-one xlators are OK, try the next one. */
kid = kid->children->xlator;
}
if (same_first_part(my_hostname,'.',brick_host,'.')) {
local_subvol = trav->xlator;
}
}
if (!trav) {
gf_log (this->name, GF_LOG_ERROR,
"Could not find subvolume named '%s'. "
"Please define volume with the name as the hostname "
"or override it with 'option local-volume-name'",
local_volname);
goto err;
if (trav) {
gf_log (this->name, GF_LOG_INFO,
"Using specified subvol %s", local_volname);
conf->private = trav->xlator;
}
else if (local_subvol) {
gf_log (this->name, GF_LOG_INFO,
"Using first local subvol %s", local_subvol->name);
conf->private = local_subvol;
}
else {
gf_log (this->name, GF_LOG_ERROR,
"Could not find specified or local subvol");
goto err;
}
/* The volume specified exists */
conf->private = trav->xlator;
conf->min_free_disk = 10;
conf->disk_unit = 'p';

View File

@ -108,6 +108,7 @@ static struct volopt_map_entry glusterd_volopt_map[] = {
{"cluster.rebalance-stats", "cluster/distribute", NULL, NULL, NO_DOC, 0, 2},
{"cluster.subvols-per-directory", "cluster/distribute", "directory-layout-spread", NULL, NO_DOC, 0, 2},
{"cluster.readdir-optimize", "cluster/distribute", NULL, NULL, NO_DOC, 0, 2},
{"cluster.nufa", "cluster/distribute", "!nufa", NULL, NO_DOC, 0, 2},
/* AFR xlator options */
{"cluster.entry-change-log", "cluster/replicate", NULL, NULL, NO_DOC, 0, 1},
@ -2399,9 +2400,19 @@ volgen_graph_build_dht_cluster (volgen_graph_t *graph,
int ret = -1;
char *decommissioned_children = NULL;
xlator_t *dht = NULL;
char *optstr = NULL;
gf_boolean_t use_nufa = _gf_false;
if (dict_get_str(volinfo->dict,"cluster.nufa",&optstr) == 0) {
/* Keep static analyzers quiet by "using" the value. */
ret = gf_string2boolean(optstr,&use_nufa);
}
clusters = volgen_graph_build_clusters (graph, volinfo,
"cluster/distribute", "%s-dht",
use_nufa
? "cluster/nufa"
: "cluster/distribute",
"%s-dht",
child_count, child_count);
if (clusters < 0)
goto out;