cli/glusterd: Added support for dispersed volumes

Two new options have been added to the 'create' command of the cli
interface:

    disperse [<count>] redundancy <count>

Both are optional. A dispersed volume is created by specifying, at
least, one of them. If 'disperse' is missing or it's present but
'<count>' does not, the number of bricks enumerated in the command
line is taken as the disperse count.

If 'redundancy' is missing, the lowest optimal value is assumed. A
configuration is considered optimal (for most workloads) when the
disperse count - redundancy count is a power of 2. If the resulting
redundancy is 1, the volume is created normally, but if it's greater
than 1, a warning is shown to the user and he/she must answer yes/no
to continue volume creation. If there isn't any optimal value for
the given number of bricks, a warning is also shown and, if the user
accepts, a redundancy of 1 is used.

If 'redundancy' is specified and the resulting volume is not optimal,
another warning is shown to the user.

A distributed-disperse volume can be created using a number of bricks
multiple of the disperse count.

Change-Id: Iab93efbe78e905cdb91f54f3741599f7ea6645e4
BUG: 1118629
Signed-off-by: Xavier Hernandez <xhernandez@datalab.es>
Reviewed-on: http://review.gluster.org/7782
Tested-by: Gluster Build System <jenkins@build.gluster.com>
Reviewed-by: Jeff Darcy <jdarcy@redhat.com>
Reviewed-by: Vijay Bellur <vbellur@redhat.com>
This commit is contained in:
Xavier Hernandez 2014-05-15 10:35:14 +02:00 committed by Vijay Bellur
parent ad112305a1
commit 1392da3e23
24 changed files with 1054 additions and 37 deletions

View File

@ -177,7 +177,86 @@ out:
}
int32_t
cli_cmd_volume_create_parse (const char **words, int wordcount, dict_t **options)
cli_cmd_create_disperse_check(struct cli_state * state, int * disperse,
int * redundancy, int count)
{
int i = 0;
int tmp = 0;
gf_answer_t answer = GF_ANSWER_NO;
char question[128];
const char * question1 = "There isn't an optimal redundancy value "
"for this configuration. Do you want to "
"create the volume with redundancy 1 ?";
const char * question2 = "The optimal redundancy for this "
"configuration is %d. Do you want to create "
"the volume with this value ?";
const char * question3 = "This configuration is not optimal on most "
"workloads. Do you want to use it ?";
if (*disperse <= 0) {
if (count < 3) {
cli_err ("number of bricks must be greater "
"than 2");
return -1;
}
*disperse = count;
}
if (*redundancy == 0) {
tmp = *disperse - 1;
for (i = tmp / 2;
(i > 0) && ((tmp & -tmp) != tmp);
i--, tmp--);
if (i == 0) {
answer = cli_cmd_get_confirmation(state, question1);
if (answer == GF_ANSWER_NO)
return -1;
*redundancy = 1;
}
else
{
*redundancy = *disperse - tmp;
if (*redundancy > 1) {
sprintf(question, question2, *redundancy);
answer = cli_cmd_get_confirmation(state,
question);
if (answer == GF_ANSWER_NO)
return -1;
}
}
tmp = 0;
}
else {
tmp = *disperse - *redundancy;
}
if (*redundancy > (*disperse - 1) / 2) {
cli_err ("redundancy must be less than %d for a "
"disperse %d volume",
(*disperse + 1) / 2, *disperse);
return -1;
}
if ((tmp & -tmp) != tmp) {
answer = cli_cmd_get_confirmation(state, question3);
if (answer == GF_ANSWER_NO)
return -1;
}
return 0;
}
int32_t
cli_cmd_volume_create_parse (struct cli_state *state, const char **words,
int wordcount, dict_t **options)
{
dict_t *dict = NULL;
char *volname = NULL;
@ -191,7 +270,8 @@ cli_cmd_volume_create_parse (const char **words, int wordcount, dict_t **options
int32_t index = 0;
char *bricks = NULL;
int32_t brick_count = 0;
char *opwords[] = { "replica", "stripe", "transport", NULL };
char *opwords[] = { "replica", "stripe", "transport", "disperse",
"redundancy", NULL };
char *invalid_volnames[] = {"volume", "type", "subvolumes", "option",
"end-volume", "all", "volume_not_in_ring",
@ -200,9 +280,12 @@ cli_cmd_volume_create_parse (const char **words, int wordcount, dict_t **options
"snap-max-soft-limit", "auto-delete",
NULL};
char *w = NULL;
char *ptr = NULL;
int op_count = 0;
int32_t replica_count = 1;
int32_t stripe_count = 1;
int32_t disperse_count = -1;
int32_t redundancy_count = 0;
gf_boolean_t is_force = _gf_false;
int wc = wordcount;
@ -279,6 +362,10 @@ cli_cmd_volume_create_parse (const char **words, int wordcount, dict_t **options
case GF_CLUSTER_TYPE_STRIPE:
type = GF_CLUSTER_TYPE_STRIPE_REPLICATE;
break;
case GF_CLUSTER_TYPE_DISPERSE:
cli_err ("replicated-dispersed volume is not "
"supported");
goto out;
}
if (wordcount < (index+2)) {
@ -310,6 +397,10 @@ cli_cmd_volume_create_parse (const char **words, int wordcount, dict_t **options
case GF_CLUSTER_TYPE_REPLICATE:
type = GF_CLUSTER_TYPE_STRIPE_REPLICATE;
break;
case GF_CLUSTER_TYPE_DISPERSE:
cli_err ("striped-dispersed volume is not "
"supported");
goto out;
}
if (wordcount < (index + 2)) {
ret = -1;
@ -348,6 +439,90 @@ cli_cmd_volume_create_parse (const char **words, int wordcount, dict_t **options
goto out;
}
index += 2;
} else if ((strcmp (w, "disperse")) == 0) {
switch (type) {
case GF_CLUSTER_TYPE_DISPERSE:
if (disperse_count >= 0) {
cli_err ("disperse option given "
"twice");
goto out;
}
break;
case GF_CLUSTER_TYPE_NONE:
type = GF_CLUSTER_TYPE_DISPERSE;
break;
case GF_CLUSTER_TYPE_STRIPE_REPLICATE:
cli_err ("striped-replicated-dispersed volume "
"is not supported");
goto out;
case GF_CLUSTER_TYPE_STRIPE:
cli_err ("striped-dispersed volume is not "
"supported");
goto out;
case GF_CLUSTER_TYPE_REPLICATE:
cli_err ("replicated-dispersed volume is not "
"supported");
goto out;
}
if (wordcount >= (index+2)) {
disperse_count = strtol (words[index + 1],
&ptr, 0);
if (*ptr != 0)
disperse_count = 0;
else {
if (disperse_count < 3) {
cli_err ("disperse count must "
"be greater than 2");
ret = -1;
goto out;
}
index++;
}
}
index++;
} else if ((strcmp (w, "redundancy")) == 0) {
switch (type) {
case GF_CLUSTER_TYPE_NONE:
type = GF_CLUSTER_TYPE_DISPERSE;
break;
case GF_CLUSTER_TYPE_DISPERSE:
if (redundancy_count > 0) {
cli_err ("redundancy option given "
"twice");
goto out;
}
break;
case GF_CLUSTER_TYPE_STRIPE_REPLICATE:
cli_err ("striped-replicated-dispersed volume "
"is not supported");
goto out;
case GF_CLUSTER_TYPE_STRIPE:
cli_err ("striped-dispersed volume is not "
"supported");
goto out;
case GF_CLUSTER_TYPE_REPLICATE:
cli_err ("replicated-dispersed volume is not "
"supported");
goto out;
}
if (wordcount < (index+2)) {
ret = -1;
goto out;
}
redundancy_count = strtol (words[index+1], NULL, 0);
if (redundancy_count < 1) {
cli_err ("redundancy must be greater than 0");
ret = -1;
goto out;
}
index += 2;
} else {
GF_ASSERT (!"opword mismatch");
ret = -1;
@ -359,8 +534,6 @@ cli_cmd_volume_create_parse (const char **words, int wordcount, dict_t **options
if (!trans_type)
trans_type = gf_strdup ("tcp");
sub_count = stripe_count * replica_count;
/* reset the count value now */
count = 1;
@ -389,6 +562,23 @@ cli_cmd_volume_create_parse (const char **words, int wordcount, dict_t **options
goto out;
}
if (type == GF_CLUSTER_TYPE_DISPERSE) {
ret = cli_cmd_create_disperse_check(state, &disperse_count,
&redundancy_count,
brick_count);
if (!ret)
ret = dict_set_int32 (dict, "disperse-count",
disperse_count);
if (!ret)
ret = dict_set_int32 (dict, "redundancy-count",
redundancy_count);
if (ret)
goto out;
sub_count = disperse_count;
} else
sub_count = stripe_count * replica_count;
if (brick_count % sub_count) {
if (type == GF_CLUSTER_TYPE_STRIPE)
cli_err ("number of bricks is not a multiple of "
@ -396,6 +586,9 @@ cli_cmd_volume_create_parse (const char **words, int wordcount, dict_t **options
else if (type == GF_CLUSTER_TYPE_REPLICATE)
cli_err ("number of bricks is not a multiple of "
"replica count");
else if (type == GF_CLUSTER_TYPE_DISPERSE)
cli_err ("number of bricks is not a multiple of "
"disperse count");
else
cli_err ("number of bricks given doesn't match "
"required count");
@ -404,7 +597,7 @@ cli_cmd_volume_create_parse (const char **words, int wordcount, dict_t **options
goto out;
}
/* Everything if parsed fine. start setting info in dict */
/* Everything is parsed fine. start setting info in dict */
ret = dict_set_str (dict, "volname", volname);
if (ret)
goto out;

View File

@ -362,7 +362,7 @@ cli_cmd_volume_create_cbk (struct cli_state *state, struct cli_cmd_word *word,
if (!frame)
goto out;
ret = cli_cmd_volume_create_parse (words, wordcount, &options);
ret = cli_cmd_volume_create_parse (state, words, wordcount, &options);
if (ret) {
cli_usage_out (word->pattern);
@ -376,32 +376,55 @@ cli_cmd_volume_create_cbk (struct cli_state *state, struct cli_cmd_word *word,
goto out;
}
if ((type == GF_CLUSTER_TYPE_REPLICATE) ||
(type == GF_CLUSTER_TYPE_STRIPE_REPLICATE)) {
if ((ret = dict_get_str (options, "bricks", &brick_list)) != 0) {
gf_log ("cli", GF_LOG_ERROR, "Replica bricks check : "
"Could not retrieve bricks list");
(type == GF_CLUSTER_TYPE_STRIPE_REPLICATE) ||
(type == GF_CLUSTER_TYPE_DISPERSE)) {
if ((ret = dict_get_str (options, "bricks",
&brick_list)) != 0) {
gf_log ("cli", GF_LOG_ERROR, "Bricks check : Could "
"not retrieve bricks "
"list");
goto out;
}
if ((ret = dict_get_int32 (options, "count", &brick_count)) != 0) {
gf_log ("cli", GF_LOG_ERROR, "Replica bricks check : "
"Could not retrieve brick count");
if ((ret = dict_get_int32 (options, "count",
&brick_count)) != 0) {
gf_log ("cli", GF_LOG_ERROR, "Bricks check : Could "
"not retrieve brick "
"count");
goto out;
}
if ((ret = dict_get_int32 (options, "replica-count", &sub_count)) != 0) {
gf_log ("cli", GF_LOG_ERROR, "Replica bricks check : "
"Could not retrieve replica count");
goto out;
if (type != GF_CLUSTER_TYPE_DISPERSE) {
if ((ret = dict_get_int32 (options, "replica-count",
&sub_count)) != 0) {
gf_log ("cli", GF_LOG_ERROR, "Bricks check : "
"Could not retrieve "
"replica count");
goto out;
}
gf_log ("cli", GF_LOG_INFO, "Replicate cluster type found."
" Checking brick order.");
} else {
ret = dict_get_int32 (options, "disperse-count",
&sub_count);
if (ret) {
gf_log ("cli", GF_LOG_ERROR, "Bricks check : "
"Could not retrieve "
"disperse count");
goto out;
}
gf_log ("cli", GF_LOG_INFO, "Disperse cluster type found. "
"Checking brick order.");
}
gf_log ("cli", GF_LOG_INFO, "Replicate cluster type found."
" Checking brick order.");
ret = cli_cmd_check_brick_order (state, brick_list, brick_count, sub_count);
ret = cli_cmd_check_brick_order (state, brick_list,
brick_count, sub_count);
if (ret) {
gf_log("cli", GF_LOG_INFO, "Not creating volume because of bad brick order");
gf_log("cli", GF_LOG_INFO, "Not creating volume "
"because of bad brick "
"order");
goto out;
}
}
ret = dict_get_str (options, "transport", &trans_type);
if (ret) {
gf_log("cli", GF_LOG_ERROR, "Unable to get transport type");
@ -2328,6 +2351,7 @@ struct cli_cmd volume_cmds[] = {
"list information of all volumes"},
{ "volume create <NEW-VOLNAME> [stripe <COUNT>] [replica <COUNT>] "
"[disperse [<COUNT>]] [redundancy <COUNT>] "
"[transport <tcp|rdma|tcp,rdma>] <NEW-BRICK>"
#ifdef HAVE_BD_XLATOR
"?<vg_name>"

View File

@ -59,9 +59,11 @@ char *cli_vol_type_str[] = {"Distribute",
"Stripe",
"Replicate",
"Striped-Replicate",
"Disperse",
"Distributed-Stripe",
"Distributed-Replicate",
"Distributed-Striped-Replicate",
"Distributed-Disperse",
};
char *cli_vol_status_str[] = {"Created",
@ -518,6 +520,8 @@ gf_cli_get_volume_cbk (struct rpc_req *req, struct iovec *iov,
int32_t dist_count = 0;
int32_t stripe_count = 0;
int32_t replica_count = 0;
int32_t disperse_count = 0;
int32_t redundancy_count = 0;
int32_t vol_type = 0;
int32_t transport = 0;
char *volume_id_str = NULL;
@ -671,6 +675,16 @@ xml_output:
if (ret)
goto out;
snprintf (key, 256, "volume%d.disperse_count", i);
ret = dict_get_int32 (dict, key, &disperse_count);
if (ret)
goto out;
snprintf (key, 256, "volume%d.redundancy_count", i);
ret = dict_get_int32 (dict, key, &redundancy_count);
if (ret)
goto out;
snprintf (key, 256, "volume%d.transport", i);
ret = dict_get_int32 (dict, key, &transport);
if (ret)
@ -685,7 +699,7 @@ xml_output:
// Distributed (stripe/replicate/stripe-replica) setups
if ((type > 0) && ( dist_count < brick_count))
vol_type = type + 3;
vol_type = type + 4;
cli_out ("Volume Name: %s", volname);
cli_out ("Type: %s", cli_vol_type_str[vol_type]);
@ -734,6 +748,11 @@ next:
brick_count);
} else if (type == GF_CLUSTER_TYPE_NONE) {
cli_out ("Number of Bricks: %d", brick_count);
} else if (type == GF_CLUSTER_TYPE_DISPERSE) {
cli_out ("Number of Bricks: %d x (%d + %d) = %d",
(brick_count / dist_count),
disperse_count - redundancy_count,
redundancy_count, brick_count);
} else {
/* For both replicate and stripe, dist_count is
good enough */

View File

@ -2528,6 +2528,8 @@ cli_xml_output_vol_info (cli_local_t *local, dict_t *dict)
int dist_count = 0;
int stripe_count = 0;
int replica_count = 0;
int disperse_count = 0;
int redundancy_count = 0;
int transport = 0;
char *brick = NULL;
char key[1024] = {0,};
@ -2621,14 +2623,36 @@ cli_xml_output_vol_info (cli_local_t *local, dict_t *dict)
"%d", replica_count);
XML_RET_CHECK_AND_GOTO (ret, out);
memset (key, 0, sizeof (key));
snprintf (key, sizeof (key), "volume%d.disperse_count", i);
ret = dict_get_int32 (dict, key, &disperse_count);
if (ret)
goto out;
ret = xmlTextWriterWriteFormatElement (local->writer,
(xmlChar *)"disperseCount",
"%d", disperse_count);
XML_RET_CHECK_AND_GOTO (ret, out);
memset (key, 0, sizeof (key));
snprintf (key, sizeof (key), "volume%d.redundancy_count", i);
ret = dict_get_int32 (dict, key, &redundancy_count);
if (ret)
goto out;
ret = xmlTextWriterWriteFormatElement (local->writer,
(xmlChar *)"redundancyCount",
"%d", redundancy_count);
XML_RET_CHECK_AND_GOTO (ret, out);
memset (key, 0, sizeof (key));
snprintf (key, sizeof (key), "volume%d.type", i);
ret = dict_get_int32 (dict, key, &type);
if (ret)
goto out;
/* For Distributed-(stripe,replicate,stipe-replicate) types */
/* For Distributed-(stripe,replicate,stipe-replicate,disperse)
types
*/
if ((type > 0) && (dist_count < brick_count))
type += 3;
type += 4;
ret = xmlTextWriterWriteFormatElement (local->writer,
(xmlChar *)"type",
"%d", type);

View File

@ -221,8 +221,8 @@ cli_submit_request (struct rpc_clnt *rpc, void *req, call_frame_t *frame,
xlator_t *this, fop_cbk_fn_t cbkfn, xdrproc_t xdrproc);
int32_t
cli_cmd_volume_create_parse (const char **words, int wordcount,
dict_t **options);
cli_cmd_volume_create_parse (struct cli_state *state, const char **words,
int wordcount, dict_t **options);
int32_t
cli_cmd_volume_reset_parse (const char **words, int wordcount, dict_t **opt);

View File

@ -23,7 +23,8 @@
GF_CLUSTER_TYPE_NONE = 0,
GF_CLUSTER_TYPE_STRIPE,
GF_CLUSTER_TYPE_REPLICATE,
GF_CLUSTER_TYPE_STRIPE_REPLICATE
GF_CLUSTER_TYPE_STRIPE_REPLICATE,
GF_CLUSTER_TYPE_DISPERSE
};
enum gf1_cli_replace_op {

14
tests/basic/ec/ec-12-4.t Normal file
View File

@ -0,0 +1,14 @@
#!/bin/bash
. $(dirname $0)/../../include.rc
. $(dirname $0)/../../volume.rc
# This test checks basic dispersed volume functionality and cli interface
DISPERSE=12
REDUNDANCY=4
# This must be equal to 44 * $DISPERSE + 106
TESTS_EXPECTED_IN_LOOP=634
. $(dirname $0)/ec-common

14
tests/basic/ec/ec-3-1.t Normal file
View File

@ -0,0 +1,14 @@
#!/bin/bash
. $(dirname $0)/../../include.rc
. $(dirname $0)/../../volume.rc
# This test checks basic dispersed volume functionality and cli interface
DISPERSE=3
REDUNDANCY=1
# This must be equal to 44 * $DISPERSE + 106
TESTS_EXPECTED_IN_LOOP=238
. $(dirname $0)/ec-common

14
tests/basic/ec/ec-4-1.t Normal file
View File

@ -0,0 +1,14 @@
#!/bin/bash
. $(dirname $0)/../../include.rc
. $(dirname $0)/../../volume.rc
# This test checks basic dispersed volume functionality and cli interface
DISPERSE=4
REDUNDANCY=1
# This must be equal to 44 * $DISPERSE + 106
TESTS_EXPECTED_IN_LOOP=282
. $(dirname $0)/ec-common

14
tests/basic/ec/ec-5-1.t Normal file
View File

@ -0,0 +1,14 @@
#!/bin/bash
. $(dirname $0)/../../include.rc
. $(dirname $0)/../../volume.rc
# This test checks basic dispersed volume functionality and cli interface
DISPERSE=5
REDUNDANCY=1
# This must be equal to 44 * $DISPERSE + 106
TESTS_EXPECTED_IN_LOOP=326
. $(dirname $0)/ec-common

14
tests/basic/ec/ec-5-2.t Normal file
View File

@ -0,0 +1,14 @@
#!/bin/bash
. $(dirname $0)/../../include.rc
. $(dirname $0)/../../volume.rc
# This test checks basic dispersed volume functionality and cli interface
DISPERSE=5
REDUNDANCY=2
# This must be equal to 44 * $DISPERSE + 106
TESTS_EXPECTED_IN_LOOP=326
. $(dirname $0)/ec-common

14
tests/basic/ec/ec-6-2.t Normal file
View File

@ -0,0 +1,14 @@
#!/bin/bash
. $(dirname $0)/../../include.rc
. $(dirname $0)/../../volume.rc
# This test checks basic dispersed volume functionality and cli interface
DISPERSE=6
REDUNDANCY=2
# This must be equal to 44 * $DISPERSE + 106
TESTS_EXPECTED_IN_LOOP=370
. $(dirname $0)/ec-common

14
tests/basic/ec/ec-7-3.t Normal file
View File

@ -0,0 +1,14 @@
#!/bin/bash
. $(dirname $0)/../../include.rc
. $(dirname $0)/../../volume.rc
# This test checks basic dispersed volume functionality and cli interface
DISPERSE=7
REDUNDANCY=3
# This must be equal to 44 * $DISPERSE + 106
TESTS_EXPECTED_IN_LOOP=414
. $(dirname $0)/ec-common

143
tests/basic/ec/ec-common Normal file
View File

@ -0,0 +1,143 @@
SIZE_LIST="1048576 1000 12345 0"
LAST_BRICK=$(($DISPERSE - 1))
function fragment_size
{
local fragments=$(($DISPERSE - $REDUNDANCY))
local block_size=$((128 * $fragments))
local size=$(($1 + $block_size - 1))
echo $((( $size - ( $size ) % $block_size ) / $fragments))
}
cleanup
tmp=`mktemp -d`
if [ ! -d $tmp ]; then
exit 1
fi
TEST glusterd
TEST pidof glusterd
TEST $CLI volume create $V0 redundancy $REDUNDANCY $H0:$B0/${V0}{0..$LAST_BRICK}
TEST $CLI volume start $V0
TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0
TEST dd if=/dev/urandom of=$tmp/small bs=1024 count=1
TEST dd if=/dev/urandom of=$tmp/big bs=1024 count=4096
cs_small=$(sha1sum $tmp/small | awk '{ print $1 }')
cs_big=$(sha1sum $tmp/big | awk '{ print $1 }')
cp $tmp/small $tmp/small1
for size in $SIZE_LIST; do
truncate -s $size $tmp/small1
eval cs_small_truncate[$size]=$(sha1sum $tmp/small1 | awk '{ print $1 }')
done
cp $tmp/big $tmp/big1
for size in $SIZE_LIST; do
truncate -s $size $tmp/big1
eval cs_big_truncate[$size]=$(sha1sum $tmp/big1 | awk '{ print $1 }')
done
TEST df -h
TEST stat $M0
for idx in `seq 0 $LAST_BRICK`; do
brick[$idx]=$(gf_get_gfid_backend_file_path $B0/$V0$idx)
done
cd $M0
EXPECT "2" echo $(ls -a1 | wc -l)
TEST mkdir dir1
TEST [ -d dir1 ]
TEST touch file1
TEST [ -f file1 ]
for dir in . dir1; do
TEST cp $tmp/small $dir/small
TEST [ -f $dir/small ]
fsize=$(fragment_size 1024)
EXPECT "1024" stat -c "%s" $dir/small
for idx in `seq 0 $LAST_BRICK`; do
EXPECT "$fsize" stat -c "%s" ${brick[$idx]}/$dir/small
done
EXPECT "$cs_small" echo $(sha1sum $dir/small | awk '{ print $1 }')
TEST cp $tmp/big $dir/big
TEST [ -f $dir/big ]
fsize=$(fragment_size 4194304)
EXPECT "4194304" stat -c "%s" $dir/big
for idx in `seq 0 $LAST_BRICK`; do
EXPECT "$fsize" stat -c "%s" ${brick[$idx]}/$dir/big
done
EXPECT "$cs_big" echo $(sha1sum $dir/big | awk '{ print $1 }')
for idx in `seq 0 $LAST_BRICK`; do
TEST kill_brick $V0 $H0 $B0/$V0$idx
EXPECT "1024" stat -c "%s" $dir/small
EXPECT "4194304" stat -c "%s" $dir/big
EXPECT "$cs_small" echo $(sha1sum $dir/small | awk '{ print $1 }')
EXPECT "$cs_big" echo $(sha1sum $dir/big | awk '{ print $1 }')
cd
TEST umount $M0
TEST $CLI volume stop $V0 force
TEST $CLI volume start $V0
TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0
cd $M0
done
for size in $SIZE_LIST; do
TEST truncate -s $size $dir/small
TEST [ -f $dir/small ]
fsize=$(fragment_size $size)
EXPECT "$size" stat -c "%s" $dir/small
for idx in `seq 0 $LAST_BRICK`; do
EXPECT "$fsize" stat -c "%s" ${brick[$idx]}/$dir/small
done
EXPECT "${cs_small_truncate[$size]}" echo $(sha1sum $dir/small | awk '{ print $1 }')
TEST truncate -s $size $dir/big
TEST [ -f $dir/big ]
EXPECT "$size" stat -c "%s" $dir/big
for idx in `seq 0 $LAST_BRICK`; do
EXPECT "$fsize" stat -c "%s" ${brick[$idx]}/$dir/big
done
EXPECT "${cs_big_truncate[$size]}" echo $(sha1sum $dir/big | awk '{ print $1 }')
done
TEST rm -f $dir/small
TEST [ ! -e $dir/small ]
for idx in `seq 0 $LAST_BRICK`; do
TEST [ ! -e ${brick[$idx]}/$dir/small ]
done
TEST rm -f $dir/big
TEST [ ! -e $dir/big ]
for idx in `seq 0 $LAST_BRICK`; do
TEST [ ! -e ${brick[$idx]}/$dir/big ]
done
done
TEST rmdir dir1
TEST [ ! -e dir1 ]
for idx in `seq 0 $LAST_BRICK`; do
TEST [ ! -e ${brick[$idx]}/dir1 ]
done
TEST rm -f file1
TEST [ ! -e file1 ]
for idx in `seq 0 $LAST_BRICK`; do
TEST [ ! -e ${brick[$idx]}/file1 ]
done
rm -rf $tmp
cleanup

233
tests/basic/ec/ec.t Normal file
View File

@ -0,0 +1,233 @@
#!/bin/bash
. $(dirname $0)/../../include.rc
. $(dirname $0)/../../volume.rc
TEST_USER=test-ec-user
TEST_UID=27341
function my_getfattr {
getfattr --only-values -e text $* 2> /dev/null
}
function get_rep_count {
v=$(my_getfattr -n trusted.nsr.rep-count $1)
#echo $v > /dev/tty
echo $v
}
function create_file {
dd if=/dev/urandom of=$1 bs=4k count=$2 conv=sync 2> /dev/null
}
function setup_perm_file {
mkdir $1/perm_dir || return 1
chown ${TEST_USER} $1/perm_dir || return 1
su ${TEST_USER} -c "touch $1/perm_dir/perm_file" || return 1
return 0
}
# Functions to check repair for specific operation types.
function check_create_write {
for b in $*; do
cmp $tmpdir/create-write $b/create-write || return 1
done
return 0
}
function check_truncate {
truncate --size=8192 $tmpdir/truncate
for b in $*; do
cmp $tmpdir/truncate $b/truncate || return 1
done
return 0
}
function check_hard_link {
for b in $*; do
inum1=$(ls -i $b/hard-link-1 | cut -d' ' -f1)
inum2=$(ls -i $b/hard-link-2 | cut -d' ' -f1)
[ "$inum1" = "$inum2" ] || return 1
done
echo "Y"
return 0
}
function check_soft_link {
for b in $*; do
[ "$(readlink $b/soft-link)" = "soft-link-tgt" ] || return 1
done
echo "Y"
return 0
}
function check_unlink {
for b in $*; do
[ ! -e $b/unlink ] || return 1
done
echo "Y"
return 0
}
function check_mkdir {
for b in $*; do
[ -d $b/mkdir ] || return 1
done
echo "Y"
return 0
}
function check_rmdir {
for b in $*; do
[ ! -e $b/rmdir ] || return 1
done
echo "Y"
return 0
}
function check_setxattr {
for b in $*; do
v=$(my_getfattr -n user.foo $b/setxattr)
[ "$v" = "ash_nazg_durbatuluk" ] || return 1
done
echo "Y"
return 0
}
function check_removexattr {
for b in $*; do
my_getfattr -n user.bar $b/removexattr 2> /dev/null
[ $? = 0 ] && return 1
done
echo "Y"
return 0
}
function check_perm_file {
b1=$1
shift 1
ftext=$(stat -c "%u %g %a" $b1/perm_dir/perm_file)
#echo "first u/g/a = $ftext" > /dev/tty
for b in $*; do
btext=$(stat -c "%u %g %a" $b/perm_dir/perm_file)
#echo " next u/a/a = $btext" > /dev/tty
if [ x"$btext" != x"$ftext" ]; then
return 1
fi
done
echo "Y"
return 0
}
cleanup
TEST useradd -o -M -u ${TEST_UID} ${TEST_USER}
trap "userdel --force ${TEST_USER}" EXIT
TEST glusterd
TEST pidof glusterd
TEST $CLI volume info
TEST mkdir -p $B0/${V0}{0,1,2,3,4,5,6,7,8,9}
TEST $CLI volume create $V0 disperse 10 redundancy 2 $H0:$B0/${V0}{0,1,2,3,4,5,6,7,8,9}
EXPECT "$V0" volinfo_field $V0 'Volume Name'
EXPECT 'Created' volinfo_field $V0 'Status'
EXPECT '10' brick_count $V0
TEST $CLI volume start $V0
EXPECT 'Started' volinfo_field $V0 'Status'
# Mount FUSE with caching disabled
TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0
# Create local files for comparisons etc.
tmpdir=$(mktemp -d)
trap "rm -rf $tmpdir" EXIT
TEST create_file $tmpdir/create-write 10
TEST create_file $tmpdir/truncate 10
# Prepare files and directories we'll need later.
TEST cp $tmpdir/truncate $M0/
TEST touch $M0/hard-link-1
TEST touch $M0/unlink
TEST mkdir $M0/rmdir
TEST touch $M0/setxattr
TEST touch $M0/removexattr
TEST setfattr -n user.bar -v "ash_nazg_gimbatul" $M0/removexattr
# Kill a couple of bricks and allow some time for things to settle.
TEST kill_brick $V0 $H0 $B0/${V0}3
TEST kill_brick $V0 $H0 $B0/${V0}8
sleep 10
# Test create+write
TEST cp $tmpdir/create-write $M0/
# Test truncate
TEST truncate --size=8192 $M0/truncate
# Test hard link
TEST ln $M0/hard-link-1 $M0/hard-link-2
# Test soft link
TEST ln -s soft-link-tgt $M0/soft-link
# Test unlink
TEST rm $M0/unlink
# Test rmdir
TEST rmdir $M0/rmdir
# Test mkdir
TEST mkdir $M0/mkdir
# Test setxattr
TEST setfattr -n user.foo -v "ash_nazg_durbatuluk" $M0/setxattr
# Test removexattr
TEST setfattr -x user.bar $M0/removexattr
# Test uid/gid behavior
TEST setup_perm_file $M0
# Unmount/remount so that create/write and truncate don't see cached data.
TEST umount $M0
TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0
# Test create/write and truncate *before* the bricks are brought back.
TEST check_create_write $M0
TEST check_truncate $M0
# Restart the bricks and allow repair to occur.
TEST $CLI volume start $V0 force
sleep 10
# Unmount/remount again, same reason as before.
TEST umount $M0
TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0
# Make sure everything is as it should be. Most tests check for consistency
# between the bricks and the front end. This is not valid for disperse, so we
# check the mountpoint state instead.
TEST check_create_write $M0
TEST check_truncate $M0
TEST stat $M0/hard-link-1
TEST stat $M0/hard-link-2
TEST stat $M0/soft-link
TEST ! stat $M0/unlink
TEST ! stat $M0/rmdir
TEST stat $M0/mkdir
TEST stat $M0/setxattr
TEST stat $M0/removexattr
TEST stat $M0/perm_dir
TEST stat $M0/perm_dir/perm_file
EXPECT_WITHIN 5 "Y" check_hard_link $B0/${V0}{0..9}
EXPECT_WITHIN 5 "Y" check_soft_link $B0/${V0}{0..9}
EXPECT_WITHIN 5 "Y" check_unlink $B0/${V0}{0..9}
EXPECT_WITHIN 5 "Y" check_rmdir $B0/${V0}{0..9}
EXPECT_WITHIN 5 "Y" check_mkdir $B0/${V0}{0..9}
EXPECT_WITHIN 5 "Y" check_setxattr $B0/${V0}{0..9}
EXPECT_WITHIN 5 "Y" check_removexattr $B0/${V0}{0..9}
EXPECT_WITHIN 5 "Y" check_perm_file $B0/${V0}{0..9}
rm -rf $tmpdir
userdel --force ${TEST_USER}
cleanup

123
tests/basic/ec/self-heal.t Normal file
View File

@ -0,0 +1,123 @@
#!/bin/bash
. $(dirname $0)/../../include.rc
. $(dirname $0)/../../volume.rc
# This test checks self-healing feature of dispersed volumes
cleanup
tmp=`mktemp -d`
if [ ! -d $tmp ]; then
exit 1
fi
TESTS_EXPECTED_IN_LOOP=85
TEST glusterd
TEST pidof glusterd
TEST $CLI volume create $V0 redundancy 2 $H0:$B0/${V0}{0..5}
TEST $CLI volume start $V0
TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0
TEST dd if=/dev/urandom of=$tmp/test bs=1024 count=1024
cs=$(sha1sum $tmp/test | awk '{ print $1 }')
TEST df -h
TEST stat $M0
for idx in {0..5}; do
brick[$idx]=$(gf_get_gfid_backend_file_path $B0/$V0$idx)
done
cd $M0
TEST cp $tmp/test test
TEST chmod 644 test
EXPECT "-rw-r--r--" stat -c "%A" test
for idx1 in {0..5}; do
TEST chmod 666 ${brick[$idx1]}/test
sleep 1
EXPECT "-rw-r--r--" stat -c "%A" test
EXPECT_WITHIN 5 "-rw-r--r--" stat -c "%A" ${brick[$idx1]}/test
done
for idx1 in {0..4}; do
for idx2 in `seq $(($idx1 + 1)) 5`; do
if [ $idx1 -ne $idx2 ]; then
TEST chmod 666 ${brick[$idx1]}/test
TEST chmod 600 ${brick[$idx2]}/test
sleep 1
EXPECT "-rw-r--r--" stat -c "%A" test
EXPECT_WITHIN 5 "-rw-r--r--" stat -c "%A" ${brick[$idx1]}/test
EXPECT_WITHIN 5 "-rw-r--r--" stat -c "%A" ${brick[$idx2]}/test
fi
done
done
TEST truncate -s 0 ${brick[0]}/test
TEST truncate -s 2097152 ${brick[1]}/test
TEST setfattr -n user.test -v "test1" ${brick[0]}/test
TEST setfattr -n user.test -v "test2" ${brick[1]}/test
TEST chmod 600 ${brick[0]}/test
TEST chmod 666 ${brick[1]}/test
sleep 1
EXPECT "1048576" stat -c "%s" test
TEST ! getfattr -n user.test test
EXPECT_WITHIN 5 "262144" stat -c "%s" ${brick[0]}/test
EXPECT_WITHIN 5 "262144" stat -c "%s" ${brick[1]}/test
TEST ! getfattr -n user.test ${brick[0]}/test
TEST ! getfattr -n user.test ${brick[1]}/test
EXPECT "-rw-r--r--" stat -c "%A" ${brick[0]}/test
EXPECT "-rw-r--r--" stat -c "%A" ${brick[1]}/test
TEST kill_brick $V0 $H0 $B0/${V0}0
TEST kill_brick $V0 $H0 $B0/${V0}1
TEST cp $tmp/test test2
EXPECT "1048576" stat -c "%s" test2
TEST chmod 777 test2
EXPECT "-rwxrwxrwx" stat -c "%A" test2
TEST mkdir dir1
TEST ls -al dir1
TEST ln -s test2 test3
TEST [ -h test3 ]
TEST ln test2 test4
TEST [ -f test4 ]
EXPECT "2" stat -c "%h" test2
EXPECT "2" stat -c "%h" test4
cd
TEST umount $M0
TEST $CLI volume stop $V0 force
TEST $CLI volume start $V0
TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0
cd $M0
EXPECT "1048576" stat -c "%s" test2
EXPECT "-rwxrwxrwx" stat -c "%A" test2
EXPECT_WITHIN 5 "262144" stat -c "%s" ${brick[0]}/test2
EXPECT_WITHIN 5 "262144" stat -c "%s" ${brick[1]}/test2
EXPECT "-rwxrwxrwx" stat -c "%A" ${brick[0]}/test2
EXPECT "-rwxrwxrwx" stat -c "%A" ${brick[1]}/test2
TEST ls -al dir1
EXPECT_WITHIN 5 "1" eval "if [ -d ${brick[0]}/dir1 ]; then echo 1; fi"
EXPECT_WITHIN 5 "1" eval "if [ -d ${brick[1]}/dir1 ]; then echo 1; fi"
TEST [ -h test3 ]
EXPECT_WITHIN 5 "1" eval "if [ -h ${brick[0]}/test3 ]; then echo 1; fi"
EXPECT_WITHIN 5 "1" eval "if [ -h ${brick[1]}/test3 ]; then echo 1; fi"
EXPECT "2" stat -c "%h" test4
EXPECT_WITHIN 5 "3" stat -c "%h" ${brick[0]}/test4
EXPECT_WITHIN 5 "3" stat -c "%h" ${brick[1]}/test4
rm -rf $tmp
cleanup

View File

@ -169,6 +169,12 @@ gd_addbr_validate_stripe_count (glusterd_volinfo_t *volinfo, int stripe_count,
}
}
break;
case GF_CLUSTER_TYPE_DISPERSE:
snprintf (err_str, err_len, "Volume %s cannot be converted "
"from dispersed to striped-"
"dispersed", volinfo->volname);
gf_log(THIS->name, GF_LOG_ERROR, "%s", err_str);
goto out;
}
out:
@ -259,6 +265,12 @@ gd_addbr_validate_replica_count (glusterd_volinfo_t *volinfo, int replica_count,
}
}
break;
case GF_CLUSTER_TYPE_DISPERSE:
snprintf (err_str, err_len, "Volume %s cannot be converted "
"from dispersed to replicated-"
"dispersed", volinfo->volname);
gf_log(THIS->name, GF_LOG_ERROR, "%s", err_str);
goto out;
}
out:
return ret;
@ -276,6 +288,7 @@ gd_rmbr_validate_replica_count (glusterd_volinfo_t *volinfo,
switch (volinfo->type) {
case GF_CLUSTER_TYPE_NONE:
case GF_CLUSTER_TYPE_STRIPE:
case GF_CLUSTER_TYPE_DISPERSE:
snprintf (err_str, err_len,
"replica count (%d) option given for non replicate "
"volume %s", replica_count, volinfo->volname);
@ -737,6 +750,8 @@ __glusterd_handle_remove_brick (rpcsvc_request_t *req)
strcpy (vol_type, "stripe");
} else if (volinfo->type == GF_CLUSTER_TYPE_STRIPE_REPLICATE) {
strcpy (vol_type, "stripe-replicate");
} else if (volinfo->type == GF_CLUSTER_TYPE_DISPERSE) {
strcpy (vol_type, "disperse");
} else {
strcpy (vol_type, "distribute");
}

View File

@ -398,6 +398,16 @@ glusterd_add_volume_detail_to_dict (glusterd_volinfo_t *volinfo,
if (ret)
goto out;
snprintf (key, 256, "volume%d.disperse_count", count);
ret = dict_set_int32 (volumes, key, volinfo->disperse_count);
if (ret)
goto out;
snprintf (key, 256, "volume%d.redundancy_count", count);
ret = dict_set_int32 (volumes, key, volinfo->redundancy_count);
if (ret)
goto out;
snprintf (key, 256, "volume%d.transport", count);
ret = dict_set_int32 (volumes, key, volinfo->transport_type);
if (ret)

View File

@ -844,6 +844,18 @@ glusterd_volume_exclude_options_write (int fd, glusterd_volinfo_t *volinfo)
if (ret)
goto out;
snprintf (buf, sizeof (buf), "%d", volinfo->disperse_count);
ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_VOL_DISPERSE_CNT,
buf);
if (ret)
goto out;
snprintf (buf, sizeof (buf), "%d", volinfo->redundancy_count);
ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_VOL_REDUNDANCY_CNT,
buf);
if (ret)
goto out;
snprintf (buf, sizeof (buf), "%d", volinfo->version);
ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_VOL_VERSION, buf);
if (ret)
@ -2618,6 +2630,12 @@ glusterd_store_update_volinfo (glusterd_volinfo_t *volinfo)
} else if (!strncmp (key, GLUSTERD_STORE_KEY_VOL_REPLICA_CNT,
strlen (GLUSTERD_STORE_KEY_VOL_REPLICA_CNT))) {
volinfo->replica_count = atoi (value);
} else if (!strncmp (key, GLUSTERD_STORE_KEY_VOL_DISPERSE_CNT,
strlen (GLUSTERD_STORE_KEY_VOL_DISPERSE_CNT))) {
volinfo->disperse_count = atoi (value);
} else if (!strncmp (key, GLUSTERD_STORE_KEY_VOL_REDUNDANCY_CNT,
strlen (GLUSTERD_STORE_KEY_VOL_REDUNDANCY_CNT))) {
volinfo->redundancy_count = atoi (value);
} else if (!strncmp (key, GLUSTERD_STORE_KEY_VOL_TRANSPORT,
strlen (GLUSTERD_STORE_KEY_VOL_TRANSPORT))) {
volinfo->transport_type = atoi (value);
@ -2754,6 +2772,11 @@ glusterd_store_update_volinfo (glusterd_volinfo_t *volinfo)
GF_ASSERT (volinfo->replica_count > 0);
break;
case GF_CLUSTER_TYPE_DISPERSE:
GF_ASSERT (volinfo->disperse_count > 0);
GF_ASSERT (volinfo->redundancy_count > 0);
break;
default:
GF_ASSERT (0);
break;

View File

@ -44,6 +44,8 @@ typedef enum glusterd_store_ver_ac_{
#define GLUSTERD_STORE_KEY_VOL_SUB_COUNT "sub_count"
#define GLUSTERD_STORE_KEY_VOL_STRIPE_CNT "stripe_count"
#define GLUSTERD_STORE_KEY_VOL_REPLICA_CNT "replica_count"
#define GLUSTERD_STORE_KEY_VOL_DISPERSE_CNT "disperse_count"
#define GLUSTERD_STORE_KEY_VOL_REDUNDANCY_CNT "redundancy_count"
#define GLUSTERD_STORE_KEY_VOL_BRICK "brick"
#define GLUSTERD_STORE_KEY_VOL_VERSION "version"
#define GLUSTERD_STORE_KEY_VOL_TRANSPORT "transport-type"

View File

@ -548,6 +548,8 @@ glusterd_volinfo_dup (glusterd_volinfo_t *volinfo,
new_volinfo->type = volinfo->type;
new_volinfo->replica_count = volinfo->replica_count;
new_volinfo->stripe_count = volinfo->stripe_count;
new_volinfo->disperse_count = volinfo->disperse_count;
new_volinfo->redundancy_count = volinfo->redundancy_count;
new_volinfo->dist_leaf_count = volinfo->dist_leaf_count;
new_volinfo->sub_count = volinfo->sub_count;
new_volinfo->transport_type = volinfo->transport_type;
@ -2524,6 +2526,18 @@ glusterd_add_volume_to_dict (glusterd_volinfo_t *volinfo,
if (ret)
goto out;
memset (key, 0, sizeof (key));
snprintf (key, sizeof (key), "%s%d.disperse_count", prefix, count);
ret = dict_set_int32 (dict, key, volinfo->disperse_count);
if (ret)
goto out;
memset (key, 0, sizeof (key));
snprintf (key, sizeof (key), "%s%d.redundancy_count", prefix, count);
ret = dict_set_int32 (dict, key, volinfo->redundancy_count);
if (ret)
goto out;
memset (key, 0, sizeof (key));
snprintf (key, sizeof (key), "%s%d.dist_count", prefix, count);
ret = dict_set_int32 (dict, key, volinfo->dist_leaf_count);
@ -4206,6 +4220,24 @@ glusterd_import_volinfo (dict_t *peer_data, int count,
gf_log (THIS->name, GF_LOG_INFO,
"peer is possibly old version");
/* not having a 'disperse_count' key is not a error
(as peer may be of old version) */
memset (key, 0, sizeof (key));
snprintf (key, sizeof (key), "%s%d.disperse_count", prefix, count);
ret = dict_get_int32 (peer_data, key, &new_volinfo->disperse_count);
if (ret)
gf_log (THIS->name, GF_LOG_INFO,
"peer is possibly old version");
/* not having a 'redundancy_count' key is not a error
(as peer may be of old version) */
memset (key, 0, sizeof (key));
snprintf (key, sizeof (key), "%s%d.redundancy_count", prefix, count);
ret = dict_get_int32 (peer_data, key, &new_volinfo->redundancy_count);
if (ret)
gf_log (THIS->name, GF_LOG_INFO,
"peer is possibly old version");
/* not having a 'dist_count' key is not a error
(as peer may be of old version) */
memset (key, 0, sizeof (key));
@ -6932,6 +6964,9 @@ glusterd_get_dist_leaf_count (glusterd_volinfo_t *volinfo)
int rcount = volinfo->replica_count;
int scount = volinfo->stripe_count;
if (volinfo->type == GF_CLUSTER_TYPE_DISPERSE)
return volinfo->disperse_count;
return (rcount ? rcount : 1) * (scount ? scount : 1);
}
@ -11694,6 +11729,13 @@ gd_update_volume_op_versions (glusterd_volinfo_t *volinfo)
}
}
if (volinfo->type == GF_CLUSTER_TYPE_DISPERSE) {
if (volinfo->op_version < GD_OP_VERSION_3_6_0)
volinfo->op_version = GD_OP_VERSION_3_6_0;
if (volinfo->client_op_version < GD_OP_VERSION_3_6_0)
volinfo->client_op_version = GD_OP_VERSION_3_6_0;
}
return;
}
@ -12774,7 +12816,7 @@ glusterd_volume_quorum_calculate (glusterd_volinfo_t *volinfo, dict_t *dict,
goto out;
}
up_count = volinfo->replica_count - down_count;
up_count = volinfo->dist_leaf_count - down_count;
if (quorum_type && !strcmp (quorum_type, "fixed")) {
if (up_count >= quorum_count) {
@ -12782,7 +12824,8 @@ glusterd_volume_quorum_calculate (glusterd_volinfo_t *volinfo, dict_t *dict,
goto out;
}
} else {
if (volinfo->replica_count % 2 == 0) {
if ((GF_CLUSTER_TYPE_DISPERSE != volinfo->type) &&
(volinfo->dist_leaf_count % 2 == 0)) {
if ((up_count > quorum_count) ||
((up_count == quorum_count) && first_brick_on)) {
quorum_met = _gf_true;
@ -12835,8 +12878,9 @@ glusterd_volume_quorum_check (glusterd_volinfo_t *volinfo, int64_t index,
goto out;
}
if (!glusterd_is_volume_replicate (volinfo) ||
volinfo->replica_count < 3) {
if ((!glusterd_is_volume_replicate (volinfo) ||
volinfo->replica_count < 3) &&
(GF_CLUSTER_TYPE_DISPERSE != volinfo->type)) {
for (i = 0; i < volinfo->brick_count ; i++) {
/* for a pure distribute volume, and replica volume
with replica count 2, quorum is not met if even
@ -12858,7 +12902,8 @@ glusterd_volume_quorum_check (glusterd_volinfo_t *volinfo, int64_t index,
ret = 0;
quorum_met = _gf_true;
} else {
distribute_subvols = volinfo->brick_count / volinfo->replica_count;
distribute_subvols = volinfo->brick_count /
volinfo->dist_leaf_count;
for (j = 0; j < distribute_subvols; j++) {
// by default assume quorum is not met
/* TODO: Handle distributed striped replicate volumes
@ -12867,11 +12912,11 @@ glusterd_volume_quorum_check (glusterd_volinfo_t *volinfo, int64_t index,
*/
ret = 1;
quorum_met = _gf_false;
for (i = 0; i < volinfo->replica_count; i++) {
for (i = 0; i < volinfo->dist_leaf_count; i++) {
snprintf (key, sizeof (key),
"%s%"PRId64".brick%"PRId64".status", key_prefix,
index,
(j * volinfo->replica_count) + i);
(j * volinfo->dist_leaf_count) + i);
ret = dict_get_int32 (dict, key, &brick_online);
if (ret || !brick_online) {
if (i == 0)
@ -13043,6 +13088,9 @@ glusterd_snap_quorum_check_for_create (dict_t *dict, gf_boolean_t snap_volume,
else
quorum_count =
volinfo->replica_count/2 + 1;
} else if (GF_CLUSTER_TYPE_DISPERSE == volinfo->type) {
quorum_count = volinfo->disperse_count -
volinfo->redundancy_count;
} else {
quorum_count = volinfo->brick_count;
}
@ -13061,8 +13109,22 @@ glusterd_snap_quorum_check_for_create (dict_t *dict, gf_boolean_t snap_volume,
if the quorum-type option is not set to auto,
the behavior is set to the default behavior)
*/
if (!ret)
quorum_count = tmp;
if (!ret) {
/* for dispersed volumes, only allow quorums
equal or larger than minimum functional
value.
*/
if ((GF_CLUSTER_TYPE_DISPERSE !=
volinfo->type) ||
(tmp >= quorum_count)) {
quorum_count = tmp;
} else {
gf_log(this->name, GF_LOG_INFO,
"Ignoring small quorum-count "
"(%d) on dispersed volume", tmp);
quorum_type = NULL;
}
}
else
quorum_type = NULL;
}

View File

@ -2684,10 +2684,14 @@ volume_volgen_graph_build_clusters (volgen_graph_t *graph,
"%s-replicate-%d"};
char *stripe_args[] = {"cluster/stripe",
"%s-stripe-%d"};
char *disperse_args[] = {"cluster/disperse",
"%s-disperse-%d"};
char option[32] = "";
int rclusters = 0;
int clusters = 0;
int dist_count = 0;
int ret = -1;
xlator_t * ec = NULL;
if (!volinfo->dist_leaf_count)
goto out;
@ -2737,6 +2741,26 @@ volume_volgen_graph_build_clusters (volgen_graph_t *graph,
if (clusters < 0)
goto out;
break;
case GF_CLUSTER_TYPE_DISPERSE:
clusters = volgen_graph_build_clusters (graph, volinfo,
disperse_args[0],
disperse_args[1],
volinfo->brick_count,
volinfo->disperse_count);
if (clusters < 0)
goto out;
sprintf(option, "%d", volinfo->redundancy_count);
ec = first_of (graph);
while (clusters-- > 0) {
ret = xlator_set_option (ec, "redundancy", option);
if (ret)
goto out;
ec = ec->next;
}
break;
default:
gf_log ("", GF_LOG_ERROR, "volume inconsistency: "
"unrecognized clustering type");

View File

@ -1689,6 +1689,27 @@ glusterd_op_create_volume (dict_t *dict, char **op_errstr)
"replica count for volume %s", volname);
goto out;
}
} else if (GF_CLUSTER_TYPE_DISPERSE == volinfo->type) {
ret = dict_get_int32 (dict, "disperse-count",
&volinfo->disperse_count);
if (ret) {
gf_log (this->name, GF_LOG_ERROR, "Failed to get "
"disperse count for volume %s", volname);
goto out;
}
ret = dict_get_int32 (dict, "redundancy-count",
&volinfo->redundancy_count);
if (ret) {
gf_log (this->name, GF_LOG_ERROR, "Failed to get "
"redundancy count for volume %s", volname);
goto out;
}
if (priv->op_version < GD_OP_VERSION_3_6_0) {
gf_log (this->name, GF_LOG_ERROR, "Disperse volume "
"needs op-version 3.6.0 or higher");
ret = -1;
goto out;
}
}
/* dist-leaf-count is the count of brick nodes for a given

View File

@ -336,6 +336,8 @@ struct glusterd_volinfo_ {
int sub_count; /* backward compatibility */
int stripe_count;
int replica_count;
int disperse_count;
int redundancy_count;
int subvol_count; /* Number of subvolumes in a
distribute volume */
int dist_leaf_count; /* Number of bricks in one