cluster/dht: Change the subvolume encoding in d_off to be a "global"
position in the graph rather than relative (local) to a particular translator. Encoding the volume in this way allows a single translator to manage which brick is currently being scanned for directory entries. Using a single translator minimizes allocated bits in the d_off. It also allows multiple DHT translators in the same graph to have a common frame of reference (the graph position) for which brick is being read. Multiple DHT translators are needed for the Tiering feature. The fix builds off a previous change (9332) which removed subvolume encoding from AFR. The fix makes an equivalent change to the EC translator. More background can be found in fix 9332 and gluster-dev discussions [1]. DHT and AFR/EC are responsibile (as before) for choosing which brick to enumerate directory entries in over the readdir lifecycle. The client translator receiving the readdir fop encodes the dht_t. It is referred to as the "leaf node" in the graph and corresponds to the brick being scanned. When DHT decodes the d_off, it translates the leaf node to a local subvolume, which represents the next node in the graph leading to the brick. Tracking of leaf nodes is done in common utility functions. Leaf nodes counts and positional information are updated on a graph switch. [1] www.gluster.org/pipermail/gluster-devel/2015-January/043592.html Change-Id: Iaf0ea86d7046b1ceadbad69d88707b243077ebc8 BUG: 1190734 Signed-off-by: Dan Lambright <dlambrig@redhat.com> Reviewed-on: http://review.gluster.org/9688 Reviewed-by: Xavier Hernandez <xhernandez@datalab.es> Reviewed-by: Krishnan Parthasarathi <kparthas@redhat.com> Reviewed-by: Vijay Bellur <vbellur@redhat.com> Tested-by: Vijay Bellur <vbellur@redhat.com>
This commit is contained in:
parent
38ccaaf9d1
commit
a216745e5d
@ -21,6 +21,134 @@
|
||||
#include "compat.h"
|
||||
#include "xlator.h"
|
||||
|
||||
#define ONE 1ULL
|
||||
#define PRESENT_D_OFF_BITS 63
|
||||
#define BACKEND_D_OFF_BITS 63
|
||||
#define TOP_BIT (ONE << (PRESENT_D_OFF_BITS - 1))
|
||||
#define MASK (~0ULL)
|
||||
#define SHIFT_BITS (max (0, (BACKEND_D_OFF_BITS - PRESENT_D_OFF_BITS + 1)))
|
||||
#define PRESENT_MASK (MASK >> (64 - PRESENT_D_OFF_BITS))
|
||||
|
||||
static uint64_t
|
||||
bits_for (uint64_t num)
|
||||
{
|
||||
uint64_t bits = 0, ctrl = 1;
|
||||
|
||||
while (ctrl < num) {
|
||||
ctrl *= 2;
|
||||
bits++;
|
||||
}
|
||||
|
||||
return bits;
|
||||
}
|
||||
|
||||
int
|
||||
gf_deitransform(xlator_t *this,
|
||||
uint64_t offset)
|
||||
{
|
||||
int cnt = 0;
|
||||
int max = 0;
|
||||
int max_bits = 0;
|
||||
uint64_t off_mask = 0;
|
||||
uint64_t host_mask = 0;
|
||||
|
||||
max = glusterfs_get_leaf_count(this->graph);
|
||||
|
||||
if (max == 1) {
|
||||
cnt = 0;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (offset & TOP_BIT) {
|
||||
/* HUGE d_off */
|
||||
max_bits = bits_for (max);
|
||||
off_mask = (MASK << max_bits);
|
||||
host_mask = ~(off_mask);
|
||||
|
||||
cnt = offset & host_mask;
|
||||
} else {
|
||||
/* small d_off */
|
||||
cnt = offset % max;
|
||||
}
|
||||
out:
|
||||
return cnt;
|
||||
}
|
||||
|
||||
uint64_t
|
||||
gf_dirent_orig_offset(xlator_t *this,
|
||||
uint64_t offset)
|
||||
{
|
||||
int max = 0;
|
||||
int max_bits = 0;
|
||||
uint64_t off_mask = 0;
|
||||
uint64_t orig_offset;
|
||||
|
||||
max = glusterfs_get_leaf_count(this->graph);
|
||||
|
||||
if (max == 1) {
|
||||
orig_offset = offset;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (offset & TOP_BIT) {
|
||||
/* HUGE d_off */
|
||||
max_bits = bits_for (max);
|
||||
off_mask = (MASK << max_bits);
|
||||
orig_offset = ((offset & ~TOP_BIT) & off_mask) << SHIFT_BITS;
|
||||
} else {
|
||||
/* small d_off */
|
||||
orig_offset = offset / max;
|
||||
}
|
||||
out:
|
||||
return orig_offset;
|
||||
}
|
||||
|
||||
int
|
||||
gf_itransform (xlator_t *this, uint64_t x, uint64_t *y_p, int client_id)
|
||||
{
|
||||
int max = 0;
|
||||
uint64_t y = 0;
|
||||
uint64_t hi_mask = 0;
|
||||
uint64_t off_mask = 0;
|
||||
int max_bits = 0;
|
||||
|
||||
if (x == ((uint64_t) -1)) {
|
||||
y = (uint64_t) -1;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (!x) {
|
||||
y = 0;
|
||||
goto out;
|
||||
}
|
||||
|
||||
max = glusterfs_get_leaf_count(this->graph);
|
||||
|
||||
if (max == 1) {
|
||||
y = x;
|
||||
goto out;
|
||||
}
|
||||
|
||||
max_bits = bits_for (max);
|
||||
|
||||
hi_mask = ~(PRESENT_MASK >> (max_bits + 1));
|
||||
|
||||
if (x & hi_mask) {
|
||||
/* HUGE d_off */
|
||||
off_mask = MASK << max_bits;
|
||||
y = TOP_BIT | ((x >> SHIFT_BITS) & off_mask) | client_id;
|
||||
} else {
|
||||
/* small d_off */
|
||||
y = ((x * max) + client_id);
|
||||
}
|
||||
|
||||
out:
|
||||
if (y_p)
|
||||
*y_p = y;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
gf_dirent_t *
|
||||
gf_dirent_for_name (const char *name)
|
||||
{
|
||||
|
@ -22,6 +22,16 @@
|
||||
|
||||
#define gf_dirent_size(name) (sizeof (gf_dirent_t) + strlen (name) + 1)
|
||||
|
||||
int
|
||||
gf_deitransform(xlator_t *this, uint64_t y);
|
||||
|
||||
int
|
||||
gf_itransform (xlator_t *this, uint64_t x, uint64_t *y_p, int client_id);
|
||||
|
||||
uint64_t
|
||||
gf_dirent_orig_offset (xlator_t *this, uint64_t offset);
|
||||
|
||||
|
||||
struct _dir_entry_t {
|
||||
struct _dir_entry_t *next;
|
||||
char *name;
|
||||
|
@ -452,6 +452,7 @@ struct _glusterfs_graph {
|
||||
int id; /* Used in logging */
|
||||
int used; /* Should be set when fuse gets
|
||||
first CHILD_UP */
|
||||
uint32_t leaf_count;
|
||||
uint32_t volfile_checksum;
|
||||
};
|
||||
typedef struct _glusterfs_graph glusterfs_graph_t;
|
||||
@ -617,6 +618,7 @@ int glusterfs_graph_prepare (glusterfs_graph_t *graph, glusterfs_ctx_t *ctx);
|
||||
int glusterfs_graph_destroy_residual (glusterfs_graph_t *graph);
|
||||
int glusterfs_graph_deactivate (glusterfs_graph_t *graph);
|
||||
int glusterfs_graph_destroy (glusterfs_graph_t *graph);
|
||||
int glusterfs_get_leaf_count (glusterfs_graph_t *graph);
|
||||
int glusterfs_graph_activate (glusterfs_graph_t *graph, glusterfs_ctx_t *ctx);
|
||||
glusterfs_graph_t *glusterfs_graph_construct (FILE *fp);
|
||||
glusterfs_graph_t *glusterfs_graph_new ();
|
||||
|
@ -515,15 +515,138 @@ glusterfs_graph_prepare (glusterfs_graph_t *graph, glusterfs_ctx_t *ctx)
|
||||
/* XXX: --xlator-option additions */
|
||||
gf_add_cmdline_options (graph, &ctx->cmd_args);
|
||||
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static
|
||||
xlator_t *glusterfs_root(glusterfs_graph_t *graph)
|
||||
{
|
||||
return graph->first;
|
||||
}
|
||||
|
||||
static
|
||||
int glusterfs_is_leaf(xlator_t *xl)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
if (!xl->children)
|
||||
ret = 1;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static
|
||||
uint32_t glusterfs_count_leaves(xlator_t *xl)
|
||||
{
|
||||
int n = 0;
|
||||
xlator_list_t *list = NULL;
|
||||
|
||||
if (glusterfs_is_leaf(xl))
|
||||
n = 1;
|
||||
else
|
||||
for (list = xl->children; list; list = list->next)
|
||||
n += glusterfs_count_leaves(list->xlator);
|
||||
|
||||
return n;
|
||||
}
|
||||
|
||||
int glusterfs_get_leaf_count(glusterfs_graph_t *graph)
|
||||
{
|
||||
return graph->leaf_count;
|
||||
}
|
||||
|
||||
static
|
||||
int _glusterfs_leaf_position(xlator_t *tgt, int *id, xlator_t *xl)
|
||||
{
|
||||
xlator_list_t *list = NULL;
|
||||
int found = 0;
|
||||
|
||||
if (xl == tgt)
|
||||
found = 1;
|
||||
else if (glusterfs_is_leaf(xl))
|
||||
*id += 1;
|
||||
else
|
||||
for (list = xl->children; !found && list; list = list->next)
|
||||
found = _glusterfs_leaf_position(tgt, id, list->xlator);
|
||||
|
||||
return found;
|
||||
}
|
||||
|
||||
int glusterfs_leaf_position(xlator_t *tgt)
|
||||
{
|
||||
xlator_t *root = NULL;
|
||||
int pos = 0;
|
||||
|
||||
root = glusterfs_root(tgt->graph);
|
||||
|
||||
if (!_glusterfs_leaf_position(tgt, &pos, root))
|
||||
pos = -1;
|
||||
|
||||
return pos;
|
||||
}
|
||||
|
||||
static int
|
||||
_glusterfs_reachable_leaves(xlator_t *base, xlator_t *xl, dict_t *leaves)
|
||||
{
|
||||
xlator_list_t *list = NULL;
|
||||
int err = 1;
|
||||
int pos = 0;
|
||||
char strpos[6];
|
||||
|
||||
if (glusterfs_is_leaf(xl)) {
|
||||
pos = glusterfs_leaf_position(xl);
|
||||
if (pos < 0)
|
||||
goto out;
|
||||
sprintf(strpos, "%d", pos);
|
||||
|
||||
err = dict_set_static_ptr(leaves, strpos, base);
|
||||
|
||||
} else {
|
||||
for (err = 0, list = xl->children;
|
||||
!err && list;
|
||||
list = list->next)
|
||||
err = _glusterfs_reachable_leaves(base, list->xlator,
|
||||
leaves);
|
||||
}
|
||||
|
||||
out:
|
||||
return err;
|
||||
}
|
||||
|
||||
/*
|
||||
* This function determines which leaves are children (or grandchildren)
|
||||
* of the given base. The base may have multiple sub volumes. Each sub
|
||||
* volumes in turn may have sub volumes.. until the leaves are reached.
|
||||
* Each leaf is numbered 1,2,3,...etc.
|
||||
*
|
||||
* The base translator calls this function to see which of *its* subvolumes
|
||||
* it would forward an FOP to, to *get to* a particular leaf.
|
||||
* That information is built into the "leaves" dictionary.
|
||||
* key:destination leaf# -> value:base subvolume xlator.
|
||||
*/
|
||||
|
||||
int
|
||||
glusterfs_reachable_leaves(xlator_t *base, dict_t *leaves)
|
||||
{
|
||||
xlator_list_t *list = NULL;
|
||||
int err = 0;
|
||||
|
||||
for (list = base->children; !err && list; list = list->next)
|
||||
err = _glusterfs_reachable_leaves(list->xlator,
|
||||
list->xlator, leaves);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
int
|
||||
glusterfs_graph_activate (glusterfs_graph_t *graph, glusterfs_ctx_t *ctx)
|
||||
{
|
||||
int ret = 0;
|
||||
xlator_t *root = NULL;
|
||||
|
||||
root = glusterfs_root(graph);
|
||||
|
||||
graph->leaf_count = glusterfs_count_leaves(root);
|
||||
|
||||
/* XXX: all xlator options validation */
|
||||
ret = glusterfs_graph_validate_options (graph);
|
||||
|
@ -978,4 +978,11 @@ glusterfs_volfile_reconfigure (int oldvollen, FILE *newvolfile_fp,
|
||||
|
||||
int
|
||||
loc_touchup (loc_t *loc, const char *name);
|
||||
|
||||
int
|
||||
glusterfs_leaf_position(xlator_t *tgt);
|
||||
|
||||
int
|
||||
glusterfs_reachable_leaves(xlator_t *base, dict_t *leaves);
|
||||
|
||||
#endif /* _XLATOR_H */
|
||||
|
104
tests/bugs/distribute/bug-1190734.t
Normal file
104
tests/bugs/distribute/bug-1190734.t
Normal file
@ -0,0 +1,104 @@
|
||||
#!/bin/bash
|
||||
|
||||
. $(dirname $0)/../../include.rc
|
||||
. $(dirname $0)/../../volume.rc
|
||||
. $(dirname $0)/../../nfs.rc
|
||||
|
||||
BRICK_COUNT=3
|
||||
FILE_COUNT=100
|
||||
|
||||
function create_files {
|
||||
rm -rf $2
|
||||
mkdir $2
|
||||
for i in `seq 1 $1`; do
|
||||
touch $2/file_$i
|
||||
done
|
||||
}
|
||||
|
||||
function check_file_count {
|
||||
ORIG_FILE_COUNT=`find $2 | tail -n +2 |wc -l`
|
||||
[ $ORIG_FILE_COUNT -eq $1 ]
|
||||
}
|
||||
|
||||
function reset {
|
||||
$CLI volume stop $V0
|
||||
umount $1
|
||||
$CLI volume delete $V0
|
||||
}
|
||||
|
||||
function start_mount_fuse {
|
||||
$CLI volume start $V0
|
||||
[ $? -ne 0 ] && return 1
|
||||
|
||||
$GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
|
||||
[ $? -ne 0 ] && return 1
|
||||
|
||||
create_files $FILE_COUNT $M0/$1
|
||||
[ $? -ne 0 ] && return 1
|
||||
|
||||
return 0
|
||||
}
|
||||
|
||||
function start_mount_nfs {
|
||||
$CLI volume start $V0
|
||||
[ $? -ne 0 ] && return 1
|
||||
|
||||
sleep 3
|
||||
mount_nfs $H0:/$V0 $N0
|
||||
[ $? -ne 0 ] && return 1
|
||||
|
||||
create_files $FILE_COUNT $N0/$1
|
||||
[ $? -ne 0 ] && return 1
|
||||
|
||||
return 0
|
||||
}
|
||||
|
||||
function start_removing_bricks {
|
||||
check_file_count $FILE_COUNT $1
|
||||
[ $? -ne 0 ] && return 1
|
||||
$CLI volume remove-brick $V0 replica 2 $H0:$B0/${V0}2 $H0:$B0/${V0}3 start
|
||||
[ $? -ne 0 ] && return 1
|
||||
|
||||
return 0
|
||||
}
|
||||
|
||||
function finish_removing_bricks {
|
||||
|
||||
$CLI volume remove-brick $V0 replica 2 $H0:$B0/${V0}2 $H0:$B0/${V0}3 commit
|
||||
[ $? -ne 0 ] && return 1
|
||||
|
||||
check_file_count $FILE_COUNT $1
|
||||
return $?
|
||||
}
|
||||
|
||||
cleanup
|
||||
|
||||
TEST glusterd
|
||||
TEST pidof glusterd
|
||||
|
||||
# Test 1-2 Create repliacted volume
|
||||
|
||||
TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}0 $H0:$B0/${V0}1 \
|
||||
$H0:$B0/${V0}2 $H0:$B0/${V0}3 $H0:$B0/${V0}4 $H0:$B0/${V0}5
|
||||
|
||||
# ------- test 1: AFR, fuse + remove bricks
|
||||
|
||||
TEST start_mount_fuse test1
|
||||
TEST start_removing_bricks $M0/test1
|
||||
EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" remove_brick_status_completed_field "$V0" "$H0:$B0/${V0}2 $H0:$B0/${V0}3"
|
||||
$CLI volume remove-brick $V0 replica 2 $H0:$B0/${V0}2 $H0:$B0/${V0}3 status > /tmp/out
|
||||
TEST finish_removing_bricks $M0/test1
|
||||
reset $M0
|
||||
|
||||
# ------- test 2: AFR, nfs + remove bricks
|
||||
|
||||
TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}0 $H0:$B0/${V0}1 \
|
||||
$H0:$B0/${V0}2 $H0:$B0/${V0}3 $H0:$B0/${V0}4 $H0:$B0/${V0}5
|
||||
|
||||
TEST start_mount_nfs test2
|
||||
TEST start_removing_bricks $N0/test2
|
||||
EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" remove_brick_status_completed_field "$V0" "$H0:$B0/${V0}2 $H0:$B0/${V0}3"
|
||||
TEST finish_removing_bricks $N0/test2
|
||||
reset $N0
|
||||
|
||||
cleanup
|
@ -2840,6 +2840,7 @@ dht_getxattr (call_frame_t *frame, xlator_t *this,
|
||||
}
|
||||
|
||||
if (key && (strcmp (key, GF_XATTR_LINKINFO_KEY) == 0)) {
|
||||
|
||||
hashed_subvol = dht_subvol_get_hashed (this, loc);
|
||||
if (!hashed_subvol) {
|
||||
gf_msg (this->name, GF_LOG_ERROR, 0,
|
||||
@ -2864,6 +2865,7 @@ dht_getxattr (call_frame_t *frame, xlator_t *this,
|
||||
op_errno = ENODATA;
|
||||
goto err;
|
||||
}
|
||||
|
||||
STACK_WIND (frame, dht_linkinfo_getxattr_cbk, hashed_subvol,
|
||||
hashed_subvol->fops->getxattr, loc,
|
||||
GF_XATTR_PATHINFO_KEY, xdata);
|
||||
@ -3854,9 +3856,7 @@ list:
|
||||
}
|
||||
}
|
||||
|
||||
dht_itransform (this, prev->this, orig_entry->d_off,
|
||||
&entry->d_off);
|
||||
|
||||
entry->d_off = orig_entry->d_off;
|
||||
entry->d_stat = orig_entry->d_stat;
|
||||
entry->d_ino = orig_entry->d_ino;
|
||||
entry->d_type = orig_entry->d_type;
|
||||
@ -3988,9 +3988,7 @@ dht_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
|
||||
goto unwind;
|
||||
}
|
||||
|
||||
dht_itransform (this, prev->this, orig_entry->d_off,
|
||||
&entry->d_off);
|
||||
|
||||
entry->d_off = orig_entry->d_off;
|
||||
entry->d_ino = orig_entry->d_ino;
|
||||
entry->d_type = orig_entry->d_type;
|
||||
entry->d_len = orig_entry->d_len;
|
||||
@ -4050,7 +4048,6 @@ dht_do_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
|
||||
dht_local_t *local = NULL;
|
||||
int op_errno = -1;
|
||||
xlator_t *xvol = NULL;
|
||||
off_t xoff = 0;
|
||||
int ret = 0;
|
||||
dht_conf_t *conf = NULL;
|
||||
|
||||
@ -4072,7 +4069,7 @@ dht_do_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
|
||||
local->xattr_req = (dict)? dict_ref (dict) : NULL;
|
||||
local->first_up_subvol = dht_first_up_subvol (this);
|
||||
|
||||
dht_deitransform (this, yoff, &xvol, (uint64_t *)&xoff);
|
||||
dht_deitransform (this, yoff, &xvol);
|
||||
|
||||
/* TODO: do proper readdir */
|
||||
if (whichop == GF_FOP_READDIRP) {
|
||||
@ -4111,10 +4108,10 @@ dht_do_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
|
||||
}
|
||||
|
||||
STACK_WIND (frame, dht_readdirp_cbk, xvol, xvol->fops->readdirp,
|
||||
fd, size, xoff, local->xattr);
|
||||
fd, size, yoff, local->xattr);
|
||||
} else {
|
||||
STACK_WIND (frame, dht_readdir_cbk, xvol, xvol->fops->readdir,
|
||||
fd, size, xoff, local->xattr);
|
||||
fd, size, yoff, local->xattr);
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
@ -332,6 +332,7 @@ struct dht_conf {
|
||||
gf_boolean_t unhashed_sticky_bit;
|
||||
struct timeval last_stat_fetch;
|
||||
gf_lock_t layout_lock;
|
||||
dict_t *leaf_to_subvol;
|
||||
void *private; /* Can be used by wrapper xlators over
|
||||
dht */
|
||||
gf_boolean_t use_readdirp;
|
||||
@ -501,9 +502,7 @@ int dht_disk_layout_merge (xlator_t *this, dht_layout_t *layout,
|
||||
|
||||
int dht_frame_return (call_frame_t *frame);
|
||||
|
||||
int dht_itransform (xlator_t *this, xlator_t *subvol, uint64_t x, uint64_t *y);
|
||||
int dht_deitransform (xlator_t *this, uint64_t y, xlator_t **subvol,
|
||||
uint64_t *x);
|
||||
int dht_deitransform (xlator_t *this, uint64_t y, xlator_t **subvol);
|
||||
|
||||
void dht_local_wipe (xlator_t *this, dht_local_t *local);
|
||||
dht_local_t *dht_local_init (call_frame_t *frame, loc_t *loc, fd_t *fd,
|
||||
@ -775,6 +774,8 @@ int32_t dht_discard(call_frame_t *frame, xlator_t *this, fd_t *fd,
|
||||
int32_t dht_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd,
|
||||
off_t offset, off_t len, dict_t *xdata);
|
||||
|
||||
int
|
||||
dht_set_subvol_range(xlator_t *this);
|
||||
int32_t dht_init (xlator_t *this);
|
||||
void dht_fini (xlator_t *this);
|
||||
int dht_reconfigure (xlator_t *this, dict_t *options);
|
||||
|
@ -62,20 +62,6 @@ dht_frame_return (call_frame_t *frame)
|
||||
return this_call_cnt;
|
||||
}
|
||||
|
||||
|
||||
static uint64_t
|
||||
dht_bits_for (uint64_t num)
|
||||
{
|
||||
uint64_t bits = 0, ctrl = 1;
|
||||
|
||||
while (ctrl < num) {
|
||||
ctrl *= 2;
|
||||
bits ++;
|
||||
}
|
||||
|
||||
return bits;
|
||||
}
|
||||
|
||||
/*
|
||||
* A slightly "updated" version of the algorithm described in the commit log
|
||||
* is used here.
|
||||
@ -88,66 +74,6 @@ dht_bits_for (uint64_t num)
|
||||
* upwards which is described as 64, are both made "configurable."
|
||||
*/
|
||||
|
||||
|
||||
#define BACKEND_D_OFF_BITS 63
|
||||
#define PRESENT_D_OFF_BITS 63
|
||||
|
||||
#define ONE 1ULL
|
||||
#define MASK (~0ULL)
|
||||
#define PRESENT_MASK (MASK >> (64 - PRESENT_D_OFF_BITS))
|
||||
#define BACKEND_MASK (MASK >> (64 - BACKEND_D_OFF_BITS))
|
||||
|
||||
#define TOP_BIT (ONE << (PRESENT_D_OFF_BITS - 1))
|
||||
#define SHIFT_BITS (max (0, (BACKEND_D_OFF_BITS - PRESENT_D_OFF_BITS + 1)))
|
||||
|
||||
int
|
||||
dht_itransform (xlator_t *this, xlator_t *subvol, uint64_t x, uint64_t *y_p)
|
||||
{
|
||||
dht_conf_t *conf = NULL;
|
||||
int cnt = 0;
|
||||
int max = 0;
|
||||
uint64_t y = 0;
|
||||
uint64_t hi_mask = 0;
|
||||
uint64_t off_mask = 0;
|
||||
int max_bits = 0;
|
||||
|
||||
if (x == ((uint64_t) -1)) {
|
||||
y = (uint64_t) -1;
|
||||
goto out;
|
||||
}
|
||||
|
||||
conf = this->private;
|
||||
if (!conf)
|
||||
goto out;
|
||||
|
||||
max = conf->subvolume_cnt;
|
||||
cnt = dht_subvol_cnt (this, subvol);
|
||||
|
||||
if (max == 1) {
|
||||
y = x;
|
||||
goto out;
|
||||
}
|
||||
|
||||
max_bits = dht_bits_for (max);
|
||||
|
||||
hi_mask = ~(PRESENT_MASK >> (max_bits + 1));
|
||||
|
||||
if (x & hi_mask) {
|
||||
/* HUGE d_off */
|
||||
off_mask = MASK << max_bits;
|
||||
y = TOP_BIT | ((x >> SHIFT_BITS) & off_mask) | cnt;
|
||||
} else {
|
||||
/* small d_off */
|
||||
y = ((x * max) + cnt);
|
||||
}
|
||||
|
||||
out:
|
||||
if (y_p)
|
||||
*y_p = y;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
dht_filter_loc_subvol_key (xlator_t *this, loc_t *loc, loc_t *new_loc,
|
||||
xlator_t **subvol)
|
||||
@ -205,55 +131,44 @@ out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
int
|
||||
dht_deitransform (xlator_t *this, uint64_t y, xlator_t **subvol_p,
|
||||
uint64_t *x_p)
|
||||
static xlator_t *
|
||||
dht_get_subvol_from_id(xlator_t *this, int client_id)
|
||||
{
|
||||
xlator_t *xl = NULL;
|
||||
dht_conf_t *conf = NULL;
|
||||
int cnt = 0;
|
||||
int max = 0;
|
||||
uint64_t x = 0;
|
||||
char sid[6] = { 0 };
|
||||
|
||||
conf = this->private;
|
||||
|
||||
sprintf(sid, "%d", client_id);
|
||||
if (dict_get_ptr(conf->leaf_to_subvol, sid, (void **) &xl))
|
||||
xl = NULL;
|
||||
|
||||
return xl;
|
||||
}
|
||||
|
||||
int
|
||||
dht_deitransform (xlator_t *this, uint64_t y, xlator_t **subvol_p)
|
||||
{
|
||||
int client_id = 0;
|
||||
xlator_t *subvol = 0;
|
||||
int max_bits = 0;
|
||||
uint64_t off_mask = 0;
|
||||
uint64_t host_mask = 0;
|
||||
dht_conf_t *conf = NULL;
|
||||
|
||||
if (!this->private)
|
||||
return -1;
|
||||
|
||||
conf = this->private;
|
||||
max = conf->subvolume_cnt;
|
||||
|
||||
if (max == 1) {
|
||||
x = y;
|
||||
cnt = 0;
|
||||
goto out;
|
||||
}
|
||||
client_id = gf_deitransform(this, y);
|
||||
|
||||
if (y & TOP_BIT) {
|
||||
/* HUGE d_off */
|
||||
max_bits = dht_bits_for (max);
|
||||
off_mask = (MASK << max_bits);
|
||||
host_mask = ~(off_mask);
|
||||
subvol = dht_get_subvol_from_id(this, client_id);
|
||||
|
||||
x = ((y & ~TOP_BIT) & off_mask) << SHIFT_BITS;
|
||||
|
||||
cnt = y & host_mask;
|
||||
} else {
|
||||
/* small d_off */
|
||||
cnt = y % max;
|
||||
x = y / max;
|
||||
}
|
||||
|
||||
out:
|
||||
subvol = conf->subvolumes[cnt];
|
||||
if (!subvol)
|
||||
subvol = conf->subvolumes[0];
|
||||
|
||||
if (subvol_p)
|
||||
*subvol_p = subvol;
|
||||
|
||||
if (x_p)
|
||||
*x_p = x;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -829,6 +744,8 @@ dht_init_subvolumes (xlator_t *this, dht_conf_t *conf)
|
||||
}
|
||||
conf->subvolume_cnt = cnt;
|
||||
|
||||
dht_set_subvol_range(this);
|
||||
|
||||
cnt = 0;
|
||||
for (subvols = this->children; subvols; subvols = subvols->next)
|
||||
conf->subvolumes[cnt++] = subvols->xlator;
|
||||
|
@ -166,7 +166,6 @@ dht_layout_search (xlator_t *this, dht_layout_t *layout, const char *name)
|
||||
int i = 0;
|
||||
int ret = 0;
|
||||
|
||||
|
||||
ret = dht_hash_compute (this, layout->type, name, &hash);
|
||||
if (ret != 0) {
|
||||
gf_log (this->name, GF_LOG_WARNING,
|
||||
|
@ -1492,12 +1492,14 @@ gf_defrag_migrate_data (xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
|
||||
if (defrag->stats == _gf_true) {
|
||||
gettimeofday (&start, NULL);
|
||||
}
|
||||
|
||||
if (defrag->defrag_pattern &&
|
||||
(gf_defrag_pattern_match (defrag, entry->d_name,
|
||||
entry->d_stat.ia_size)
|
||||
== _gf_false)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
loc_wipe (&entry_loc);
|
||||
ret =dht_build_child_loc (this, &entry_loc, loc,
|
||||
entry->d_name);
|
||||
|
@ -214,6 +214,8 @@ dht_fini (xlator_t *this)
|
||||
GF_FREE (conf->file_layouts);
|
||||
}
|
||||
|
||||
dict_destroy(conf->leaf_to_subvol);
|
||||
|
||||
GF_FREE (conf->subvolumes);
|
||||
|
||||
GF_FREE (conf->subvolume_status);
|
||||
@ -288,7 +290,6 @@ out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
dht_decommissioned_remove (xlator_t *this, dht_conf_t *conf)
|
||||
{
|
||||
@ -343,6 +344,27 @@ dht_init_regex (xlator_t *this, dict_t *odict, char *name,
|
||||
}
|
||||
}
|
||||
|
||||
int
|
||||
dht_set_subvol_range(xlator_t *this)
|
||||
{
|
||||
int ret = -1;
|
||||
dht_conf_t *conf = NULL;
|
||||
|
||||
conf = this->private;
|
||||
|
||||
if (!conf)
|
||||
goto out;
|
||||
|
||||
conf->leaf_to_subvol = dict_new();
|
||||
if (!conf->leaf_to_subvol)
|
||||
goto out;
|
||||
|
||||
ret = glusterfs_reachable_leaves(this, conf->leaf_to_subvol);
|
||||
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
int
|
||||
dht_reconfigure (xlator_t *this, dict_t *options)
|
||||
{
|
||||
@ -676,6 +698,9 @@ dht_init (xlator_t *this)
|
||||
|
||||
this->private = conf;
|
||||
|
||||
if (dht_set_subvol_range(this))
|
||||
goto err;
|
||||
|
||||
return 0;
|
||||
|
||||
err:
|
||||
|
@ -304,8 +304,6 @@ void ec_adjust_readdir(ec_t * ec, int32_t idx, gf_dirent_t * entries)
|
||||
|
||||
list_for_each_entry(entry, &entries->list, list)
|
||||
{
|
||||
entry->d_off = ec_itransform(ec, idx, entry->d_off);
|
||||
|
||||
if (entry->d_stat.ia_type == IA_IFREG)
|
||||
{
|
||||
if ((entry->dict == NULL) ||
|
||||
@ -413,10 +411,20 @@ int32_t ec_manager_readdir(ec_fop_data_t * fop, int32_t state)
|
||||
|
||||
if (fop->offset != 0)
|
||||
{
|
||||
int32_t idx;
|
||||
int32_t idx = -1;
|
||||
ec_t *ec = fop->xl->private;
|
||||
|
||||
fop->offset = ec_deitransform(fop->xl->private, &idx,
|
||||
fop->offset);
|
||||
idx = gf_deitransform(fop->xl, fop->offset);
|
||||
|
||||
if ((idx < 0) || (idx >= ec->nodes)) {
|
||||
|
||||
gf_log(fop->xl->name, GF_LOG_ERROR,
|
||||
"Invalid index %d in readdirp request", idx);
|
||||
|
||||
fop->error = EIO;
|
||||
|
||||
return EC_STATE_REPORT;
|
||||
}
|
||||
fop->mask &= 1ULL << idx;
|
||||
}
|
||||
|
||||
|
@ -16,17 +16,6 @@
|
||||
#include "ec-fops.h"
|
||||
#include "ec-helpers.h"
|
||||
|
||||
#define BACKEND_D_OFF_BITS 63
|
||||
#define PRESENT_D_OFF_BITS 63
|
||||
|
||||
#define ONE 1ULL
|
||||
#define MASK (~0ULL)
|
||||
#define PRESENT_MASK (MASK >> (64 - PRESENT_D_OFF_BITS))
|
||||
#define BACKEND_MASK (MASK >> (64 - BACKEND_D_OFF_BITS))
|
||||
|
||||
#define TOP_BIT (ONE << (PRESENT_D_OFF_BITS - 1))
|
||||
#define SHIFT_BITS (max(0, (BACKEND_D_OFF_BITS - PRESENT_D_OFF_BITS + 1)))
|
||||
|
||||
#ifndef ffsll
|
||||
#define ffsll(x) __builtin_ffsll(x)
|
||||
#endif
|
||||
@ -106,41 +95,6 @@ void ec_trace(const char * event, ec_fop_data_t * fop, const char * fmt, ...)
|
||||
}
|
||||
}
|
||||
|
||||
uint64_t ec_itransform(ec_t * ec, int32_t idx, uint64_t offset)
|
||||
{
|
||||
int32_t bits;
|
||||
|
||||
if (offset == -1ULL)
|
||||
{
|
||||
return -1ULL;
|
||||
}
|
||||
|
||||
bits = ec->bits_for_nodes;
|
||||
if ((offset & ~(PRESENT_MASK >> (bits + 1))) != 0)
|
||||
{
|
||||
return TOP_BIT | ((offset >> SHIFT_BITS) & (MASK << bits)) | idx;
|
||||
}
|
||||
|
||||
return (offset * ec->nodes) + idx;
|
||||
}
|
||||
|
||||
uint64_t ec_deitransform(ec_t * ec, int32_t * idx, uint64_t offset)
|
||||
{
|
||||
uint64_t mask = 0;
|
||||
|
||||
if ((offset & TOP_BIT) != 0)
|
||||
{
|
||||
mask = MASK << ec->bits_for_nodes;
|
||||
|
||||
*idx = offset & ~mask;
|
||||
return ((offset & ~TOP_BIT) & mask) << SHIFT_BITS;
|
||||
}
|
||||
|
||||
*idx = offset % ec->nodes;
|
||||
|
||||
return offset / ec->nodes;
|
||||
}
|
||||
|
||||
int32_t ec_bits_count(uint64_t n)
|
||||
{
|
||||
n -= (n >> 1) & 0x5555555555555555ULL;
|
||||
|
@ -16,8 +16,6 @@
|
||||
const char * ec_bin(char * str, size_t size, uint64_t value, int32_t digits);
|
||||
const char * ec_fop_name(int32_t id);
|
||||
void ec_trace(const char * event, ec_fop_data_t * fop, const char * fmt, ...);
|
||||
uint64_t ec_itransform(ec_t * ec, int32_t idx, uint64_t offset);
|
||||
uint64_t ec_deitransform(ec_t * ec, int32_t * idx, uint64_t offset);
|
||||
int32_t ec_bits_count(uint64_t n);
|
||||
int32_t ec_bits_index(uint64_t n);
|
||||
int32_t ec_bits_consume(uint64_t * n);
|
||||
|
@ -1194,6 +1194,8 @@ client_setvolume_cbk (struct rpc_req *req, struct iovec *iov, int count, void *m
|
||||
}
|
||||
*/
|
||||
|
||||
conf->client_id = glusterfs_leaf_position(this);
|
||||
|
||||
gf_log (this->name, GF_LOG_INFO,
|
||||
"Connected to %s, attached to remote volume '%s'.",
|
||||
conf->rpc->conn.name,
|
||||
|
@ -141,12 +141,16 @@ client_local_wipe (clnt_local_t *local)
|
||||
}
|
||||
|
||||
int
|
||||
unserialize_rsp_dirent (struct gfs3_readdir_rsp *rsp, gf_dirent_t *entries)
|
||||
unserialize_rsp_dirent (xlator_t *this, struct gfs3_readdir_rsp *rsp,
|
||||
gf_dirent_t *entries)
|
||||
{
|
||||
struct gfs3_dirlist *trav = NULL;
|
||||
gf_dirent_t *entry = NULL;
|
||||
int entry_len = 0;
|
||||
int ret = -1;
|
||||
clnt_conf_t *conf = NULL;
|
||||
|
||||
conf = this->private;
|
||||
|
||||
trav = rsp->reply;
|
||||
while (trav) {
|
||||
@ -156,7 +160,8 @@ unserialize_rsp_dirent (struct gfs3_readdir_rsp *rsp, gf_dirent_t *entries)
|
||||
goto out;
|
||||
|
||||
entry->d_ino = trav->d_ino;
|
||||
entry->d_off = trav->d_off;
|
||||
gf_itransform (this, trav->d_off, &entry->d_off,
|
||||
conf->client_id);
|
||||
entry->d_len = trav->d_len;
|
||||
entry->d_type = trav->d_type;
|
||||
|
||||
@ -182,12 +187,17 @@ unserialize_rsp_direntp (xlator_t *this, fd_t *fd,
|
||||
inode_table_t *itable = NULL;
|
||||
int entry_len = 0;
|
||||
int ret = -1;
|
||||
clnt_conf_t *conf = NULL;
|
||||
|
||||
trav = rsp->reply;
|
||||
|
||||
if (fd)
|
||||
itable = fd->inode->table;
|
||||
|
||||
conf = this->private;
|
||||
if (!conf)
|
||||
goto out;
|
||||
|
||||
while (trav) {
|
||||
entry_len = gf_dirent_size (trav->name);
|
||||
entry = GF_CALLOC (1, entry_len, gf_common_mt_gf_dirent_t);
|
||||
@ -195,7 +205,8 @@ unserialize_rsp_direntp (xlator_t *this, fd_t *fd,
|
||||
goto out;
|
||||
|
||||
entry->d_ino = trav->d_ino;
|
||||
entry->d_off = trav->d_off;
|
||||
gf_itransform (this, trav->d_off, &entry->d_off,
|
||||
conf->client_id);
|
||||
entry->d_len = trav->d_len;
|
||||
entry->d_type = trav->d_type;
|
||||
|
||||
|
@ -2450,7 +2450,7 @@ client3_3_readdir_cbk (struct rpc_req *req, struct iovec *iov, int count,
|
||||
|
||||
INIT_LIST_HEAD (&entries.list);
|
||||
if (rsp.op_ret > 0) {
|
||||
unserialize_rsp_dirent (&rsp, &entries);
|
||||
unserialize_rsp_dirent (this, &rsp, &entries);
|
||||
}
|
||||
|
||||
GF_PROTOCOL_DICT_UNSERIALIZE (frame->this, xdata,
|
||||
|
@ -24,6 +24,7 @@
|
||||
|
||||
#include "xdr-rpc.h"
|
||||
#include "glusterfs3.h"
|
||||
#include "gf-dirent.h"
|
||||
|
||||
extern rpc_clnt_prog_t clnt_handshake_prog;
|
||||
extern rpc_clnt_prog_t clnt_dump_prog;
|
||||
@ -1913,6 +1914,9 @@ client_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd,
|
||||
if (!conf || !conf->fops)
|
||||
goto out;
|
||||
|
||||
if (off != 0)
|
||||
off = gf_dirent_orig_offset(this, off);
|
||||
|
||||
args.fd = fd;
|
||||
args.size = size;
|
||||
args.offset = off;
|
||||
@ -1948,6 +1952,9 @@ client_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd,
|
||||
if (!conf || !conf->fops)
|
||||
goto out;
|
||||
|
||||
if (off != 0)
|
||||
off = gf_dirent_orig_offset(this, off);
|
||||
|
||||
args.fd = fd;
|
||||
args.size = size;
|
||||
args.offset = off;
|
||||
@ -2447,7 +2454,7 @@ build_client_config (xlator_t *this, clnt_conf_t *conf)
|
||||
{
|
||||
int ret = -1;
|
||||
|
||||
if (!conf)
|
||||
if (!conf)
|
||||
goto out;
|
||||
|
||||
GF_OPTION_INIT ("frame-timeout", conf->rpc_conf.rpc_timeout,
|
||||
@ -2470,6 +2477,8 @@ build_client_config (xlator_t *this, clnt_conf_t *conf)
|
||||
|
||||
GF_OPTION_INIT ("send-gids", conf->send_gids, bool, out);
|
||||
|
||||
conf->client_id = glusterfs_leaf_position(this);
|
||||
|
||||
ret = client_check_remote_host (this, this->options);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
@ -85,6 +85,7 @@ typedef struct clnt_conf {
|
||||
rpc_clnt_prog_t *handshake;
|
||||
rpc_clnt_prog_t *dump;
|
||||
|
||||
int client_id;
|
||||
uint64_t reopen_fd_count; /* Count of fds reopened after a
|
||||
connection is established */
|
||||
gf_lock_t rec_lock;
|
||||
@ -228,7 +229,8 @@ int client_submit_request (xlator_t *this, void *req,
|
||||
struct iovec *rsp_payload, int rsp_count,
|
||||
struct iobref *rsp_iobref, xdrproc_t xdrproc);
|
||||
|
||||
int unserialize_rsp_dirent (struct gfs3_readdir_rsp *rsp, gf_dirent_t *entries);
|
||||
int unserialize_rsp_dirent (xlator_t *this, struct gfs3_readdir_rsp *rsp,
|
||||
gf_dirent_t *entries);
|
||||
int unserialize_rsp_direntp (xlator_t *this, fd_t *fd,
|
||||
struct gfs3_readdirp_rsp *rsp, gf_dirent_t *entries);
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user