Load balance read operations among subvolumes in afr
Signed-off-by: Anand V. Avati <avati@amp.gluster.com>
This commit is contained in:
parent
1e7c9fa937
commit
75c70231fe
@ -72,6 +72,40 @@ afr_build_parent_loc (loc_t *parent, loc_t *child)
|
||||
}
|
||||
|
||||
|
||||
afr_inode_ctx_t *
|
||||
afr_get_inode_ctx (xlator_t *this, inode_t *inode)
|
||||
{
|
||||
afr_inode_ctx_t * inode_ctx = NULL;
|
||||
uint64_t ctx;
|
||||
|
||||
int ret = 0;
|
||||
|
||||
LOCK (&inode->lock);
|
||||
{
|
||||
ret = __inode_ctx_get (inode, this, &ctx);
|
||||
|
||||
if (ret < 0) {
|
||||
inode_ctx = CALLOC (1, sizeof (afr_inode_ctx_t));
|
||||
|
||||
ret = __inode_ctx_put (inode, this,
|
||||
(uint64_t)(long) inode_ctx);
|
||||
|
||||
if (ret < 0) {
|
||||
gf_log (this->name, GF_LOG_ERROR,
|
||||
"could not set inode ctx");
|
||||
FREE (inode_ctx);
|
||||
inode_ctx = NULL;
|
||||
}
|
||||
} else {
|
||||
inode_ctx = (afr_inode_ctx_t *)(long) ctx;
|
||||
}
|
||||
}
|
||||
UNLOCK (&inode->lock);
|
||||
|
||||
return inode_ctx;
|
||||
}
|
||||
|
||||
|
||||
/* {{{ create */
|
||||
|
||||
int
|
||||
@ -91,11 +125,13 @@ afr_create_unwind (call_frame_t *frame, xlator_t *this)
|
||||
}
|
||||
UNLOCK (&frame->lock);
|
||||
|
||||
if (main_frame)
|
||||
if (main_frame) {
|
||||
AFR_STACK_UNWIND (main_frame, local->op_ret, local->op_errno,
|
||||
local->cont.create.fd,
|
||||
local->cont.create.inode,
|
||||
&local->cont.create.buf);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -107,6 +143,8 @@ afr_create_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
|
||||
{
|
||||
afr_local_t * local = NULL;
|
||||
afr_private_t * priv = NULL;
|
||||
|
||||
afr_inode_ctx_t * inode_ctx = NULL;
|
||||
|
||||
int call_count = -1;
|
||||
int child_index = -1;
|
||||
@ -124,14 +162,36 @@ afr_create_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
|
||||
if (op_ret != -1) {
|
||||
local->op_ret = op_ret;
|
||||
|
||||
if ((local->success_count == 0)
|
||||
|| (child_index == priv->read_child)) {
|
||||
if (local->success_count == 0) {
|
||||
local->cont.create.buf = *buf;
|
||||
local->cont.create.buf.st_ino =
|
||||
afr_itransform (buf->st_ino,
|
||||
priv->child_count,
|
||||
child_index);
|
||||
|
||||
inode_ctx = afr_get_inode_ctx (this, inode);
|
||||
|
||||
if (inode_ctx) {
|
||||
if (priv->read_child >= 0) {
|
||||
inode_ctx->read_child = priv->read_child;
|
||||
} else {
|
||||
inode_ctx->read_child = local->read_child_index;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (child_index == local->read_child_index) {
|
||||
inode_ctx = afr_get_inode_ctx (this, inode);
|
||||
|
||||
if (inode_ctx) {
|
||||
if (priv->read_child >= 0) {
|
||||
inode_ctx->read_child = priv->read_child;
|
||||
} else {
|
||||
inode_ctx->read_child = local->read_child_index;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
local->cont.create.inode = inode;
|
||||
|
||||
local->success_count++;
|
||||
@ -246,6 +306,13 @@ afr_create (call_frame_t *frame, xlator_t *this,
|
||||
|
||||
loc_copy (&local->loc, loc);
|
||||
|
||||
LOCK (&priv->read_child_lock);
|
||||
{
|
||||
local->read_child_index = (++priv->read_child_rr)
|
||||
% (priv->child_count);
|
||||
}
|
||||
UNLOCK (&priv->read_child_lock);
|
||||
|
||||
local->cont.create.flags = flags;
|
||||
local->cont.create.mode = mode;
|
||||
local->cont.create.fd = fd_ref (fd);
|
||||
@ -294,10 +361,12 @@ afr_mknod_unwind (call_frame_t *frame, xlator_t *this)
|
||||
}
|
||||
UNLOCK (&frame->lock);
|
||||
|
||||
if (main_frame)
|
||||
if (main_frame) {
|
||||
AFR_STACK_UNWIND (main_frame, local->op_ret, local->op_errno,
|
||||
local->cont.mknod.inode,
|
||||
&local->cont.mknod.buf);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -310,6 +379,8 @@ afr_mknod_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
|
||||
afr_local_t * local = NULL;
|
||||
afr_private_t * priv = NULL;
|
||||
|
||||
afr_inode_ctx_t * inode_ctx = NULL;
|
||||
|
||||
int call_count = -1;
|
||||
int child_index = -1;
|
||||
|
||||
@ -326,14 +397,36 @@ afr_mknod_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
|
||||
if (op_ret != -1) {
|
||||
local->op_ret = op_ret;
|
||||
|
||||
if ((local->success_count == 0)
|
||||
|| (child_index == priv->read_child)) {
|
||||
if (local->success_count == 0){
|
||||
local->cont.mknod.buf = *buf;
|
||||
local->cont.mknod.buf.st_ino =
|
||||
afr_itransform (buf->st_ino,
|
||||
priv->child_count,
|
||||
child_index);
|
||||
|
||||
inode_ctx = afr_get_inode_ctx (this, inode);
|
||||
|
||||
if (inode_ctx) {
|
||||
if (priv->read_child >= 0) {
|
||||
inode_ctx->read_child = priv->read_child;
|
||||
} else {
|
||||
inode_ctx->read_child = local->read_child_index;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (child_index == local->read_child_index) {
|
||||
inode_ctx = afr_get_inode_ctx (this, inode);
|
||||
|
||||
if (inode_ctx) {
|
||||
if (priv->read_child >= 0) {
|
||||
inode_ctx->read_child = priv->read_child;
|
||||
} else {
|
||||
inode_ctx->read_child = local->read_child_index;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
local->cont.mknod.inode = inode;
|
||||
|
||||
local->success_count++;
|
||||
@ -444,6 +537,13 @@ afr_mknod (call_frame_t *frame, xlator_t *this,
|
||||
|
||||
loc_copy (&local->loc, loc);
|
||||
|
||||
LOCK (&priv->read_child_lock);
|
||||
{
|
||||
local->read_child_index = (++priv->read_child_rr)
|
||||
% (priv->child_count);
|
||||
}
|
||||
UNLOCK (&priv->read_child_lock);
|
||||
|
||||
local->cont.mknod.mode = mode;
|
||||
local->cont.mknod.dev = dev;
|
||||
|
||||
@ -492,10 +592,12 @@ afr_mkdir_unwind (call_frame_t *frame, xlator_t *this)
|
||||
}
|
||||
UNLOCK (&frame->lock);
|
||||
|
||||
if (main_frame)
|
||||
if (main_frame) {
|
||||
AFR_STACK_UNWIND (main_frame, local->op_ret, local->op_errno,
|
||||
local->cont.mkdir.inode,
|
||||
&local->cont.mkdir.buf);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -508,6 +610,8 @@ afr_mkdir_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
|
||||
afr_local_t * local = NULL;
|
||||
afr_private_t * priv = NULL;
|
||||
|
||||
afr_inode_ctx_t * inode_ctx = NULL;
|
||||
|
||||
int call_count = -1;
|
||||
int child_index = -1;
|
||||
|
||||
@ -524,13 +628,35 @@ afr_mkdir_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
|
||||
if (op_ret != -1) {
|
||||
local->op_ret = op_ret;
|
||||
|
||||
if ((local->success_count == 0)
|
||||
|| (child_index == priv->read_child)) {
|
||||
if (local->success_count == 0) {
|
||||
local->cont.mkdir.buf = *buf;
|
||||
local->cont.mkdir.buf.st_ino =
|
||||
afr_itransform (buf->st_ino, priv->child_count,
|
||||
child_index);
|
||||
|
||||
inode_ctx = afr_get_inode_ctx (this, inode);
|
||||
|
||||
if (inode_ctx) {
|
||||
if (priv->read_child >= 0) {
|
||||
inode_ctx->read_child = priv->read_child;
|
||||
} else {
|
||||
inode_ctx->read_child = local->read_child_index;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (child_index == local->read_child_index) {
|
||||
inode_ctx = afr_get_inode_ctx (this, inode);
|
||||
|
||||
if (inode_ctx) {
|
||||
if (priv->read_child >= 0) {
|
||||
inode_ctx->read_child = priv->read_child;
|
||||
} else {
|
||||
inode_ctx->read_child = local->read_child_index;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
local->cont.mkdir.inode = inode;
|
||||
|
||||
local->success_count++;
|
||||
@ -642,6 +768,13 @@ afr_mkdir (call_frame_t *frame, xlator_t *this,
|
||||
|
||||
loc_copy (&local->loc, loc);
|
||||
|
||||
LOCK (&priv->read_child_lock);
|
||||
{
|
||||
local->read_child_index = (++priv->read_child_rr)
|
||||
% (priv->child_count);
|
||||
}
|
||||
UNLOCK (&priv->read_child_lock);
|
||||
|
||||
local->cont.mkdir.mode = mode;
|
||||
|
||||
local->transaction.fop = afr_mkdir_wind;
|
||||
@ -710,6 +843,8 @@ afr_link_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
|
||||
afr_local_t * local = NULL;
|
||||
afr_private_t * priv = NULL;
|
||||
|
||||
afr_inode_ctx_t * inode_ctx = NULL;
|
||||
|
||||
int call_count = -1;
|
||||
int child_index = -1;
|
||||
|
||||
@ -726,13 +861,35 @@ afr_link_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
|
||||
if (op_ret != -1) {
|
||||
local->op_ret = op_ret;
|
||||
|
||||
if ((local->success_count == 0)
|
||||
|| (child_index == priv->read_child)) {
|
||||
if (local->success_count == 0) {
|
||||
local->cont.link.buf = *buf;
|
||||
local->cont.link.buf.st_ino =
|
||||
afr_itransform (buf->st_ino, priv->child_count,
|
||||
child_index);
|
||||
|
||||
inode_ctx = afr_get_inode_ctx (this, inode);
|
||||
|
||||
if (inode_ctx) {
|
||||
if (priv->read_child >= 0) {
|
||||
inode_ctx->read_child = priv->read_child;
|
||||
} else {
|
||||
inode_ctx->read_child = local->read_child_index;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (child_index == local->read_child_index) {
|
||||
inode_ctx = afr_get_inode_ctx (this, inode);
|
||||
|
||||
if (inode_ctx) {
|
||||
if (priv->read_child >= 0) {
|
||||
inode_ctx->read_child = priv->read_child;
|
||||
} else {
|
||||
inode_ctx->read_child = local->read_child_index;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
local->cont.link.inode = inode;
|
||||
|
||||
local->success_count++;
|
||||
@ -844,6 +1001,13 @@ afr_link (call_frame_t *frame, xlator_t *this,
|
||||
loc_copy (&local->loc, oldloc);
|
||||
loc_copy (&local->newloc, newloc);
|
||||
|
||||
LOCK (&priv->read_child_lock);
|
||||
{
|
||||
local->read_child_index = (++priv->read_child_rr)
|
||||
% (priv->child_count);
|
||||
}
|
||||
UNLOCK (&priv->read_child_lock);
|
||||
|
||||
local->cont.link.ino = oldloc->inode->ino;
|
||||
|
||||
local->transaction.fop = afr_link_wind;
|
||||
@ -892,10 +1056,12 @@ afr_symlink_unwind (call_frame_t *frame, xlator_t *this)
|
||||
}
|
||||
UNLOCK (&frame->lock);
|
||||
|
||||
if (main_frame)
|
||||
if (main_frame) {
|
||||
AFR_STACK_UNWIND (main_frame, local->op_ret, local->op_errno,
|
||||
local->cont.symlink.inode,
|
||||
&local->cont.symlink.buf);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -908,6 +1074,8 @@ afr_symlink_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
|
||||
afr_local_t * local = NULL;
|
||||
afr_private_t * priv = NULL;
|
||||
|
||||
afr_inode_ctx_t * inode_ctx = NULL;
|
||||
|
||||
int call_count = -1;
|
||||
int child_index = -1;
|
||||
|
||||
@ -924,13 +1092,35 @@ afr_symlink_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
|
||||
if (op_ret != -1) {
|
||||
local->op_ret = op_ret;
|
||||
|
||||
if ((local->success_count == 0)
|
||||
|| (child_index == priv->read_child)) {
|
||||
if (local->success_count == 0) {
|
||||
local->cont.symlink.buf = *buf;
|
||||
local->cont.symlink.buf.st_ino =
|
||||
afr_itransform (buf->st_ino, priv->child_count,
|
||||
child_index);
|
||||
|
||||
inode_ctx = afr_get_inode_ctx (this, inode);
|
||||
|
||||
if (inode_ctx) {
|
||||
if (priv->read_child >= 0) {
|
||||
inode_ctx->read_child = priv->read_child;
|
||||
} else {
|
||||
inode_ctx->read_child = local->read_child_index;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (child_index == local->read_child_index) {
|
||||
inode_ctx = afr_get_inode_ctx (this, inode);
|
||||
|
||||
if (inode_ctx) {
|
||||
if (priv->read_child >= 0) {
|
||||
inode_ctx->read_child = priv->read_child;
|
||||
} else {
|
||||
inode_ctx->read_child = local->read_child_index;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
local->cont.symlink.inode = inode;
|
||||
|
||||
local->success_count++;
|
||||
@ -1043,6 +1233,13 @@ afr_symlink (call_frame_t *frame, xlator_t *this,
|
||||
|
||||
loc_copy (&local->loc, loc);
|
||||
|
||||
LOCK (&priv->read_child_lock);
|
||||
{
|
||||
local->read_child_index = (++priv->read_child_rr)
|
||||
% (priv->child_count);
|
||||
}
|
||||
UNLOCK (&priv->read_child_lock);
|
||||
|
||||
local->cont.symlink.ino = loc->inode->ino;
|
||||
local->cont.symlink.linkpath = strdup (linkpath);
|
||||
|
||||
|
@ -49,7 +49,7 @@
|
||||
|
||||
/**
|
||||
* Common algorithm for inode read calls:
|
||||
*
|
||||
*
|
||||
* - Try the fop on the first child that is up
|
||||
* - if we have failed due to ENOTCONN:
|
||||
* try the next child
|
||||
@ -70,13 +70,17 @@ afr_access_cbk (call_frame_t *frame, void *cookie,
|
||||
int unwind = 1;
|
||||
int last_tried = -1;
|
||||
int this_try = -1;
|
||||
int read_child = -1;
|
||||
|
||||
priv = this->private;
|
||||
children = priv->children;
|
||||
|
||||
local = frame->local;
|
||||
|
||||
read_child = (long) cookie;
|
||||
|
||||
if (op_ret == -1) {
|
||||
retry:
|
||||
last_tried = local->cont.access.last_tried;
|
||||
|
||||
if (all_tried (last_tried, priv->child_count)) {
|
||||
@ -84,11 +88,15 @@ afr_access_cbk (call_frame_t *frame, void *cookie,
|
||||
}
|
||||
this_try = ++local->cont.access.last_tried;
|
||||
|
||||
if (this_try == read_child) {
|
||||
goto retry;
|
||||
}
|
||||
|
||||
unwind = 0;
|
||||
|
||||
STACK_WIND_COOKIE (frame, afr_access_cbk,
|
||||
(void *) (long) this_try,
|
||||
children[this_try],
|
||||
(void *) (long) read_child,
|
||||
children[this_try],
|
||||
children[this_try]->fops->access,
|
||||
&local->loc, local->cont.access.mask);
|
||||
}
|
||||
@ -111,6 +119,10 @@ afr_access (call_frame_t *frame, xlator_t *this,
|
||||
int call_child = 0;
|
||||
afr_local_t *local = NULL;
|
||||
|
||||
afr_inode_ctx_t * inode_ctx = NULL;
|
||||
uint64_t ctx;
|
||||
int ret = 0;
|
||||
|
||||
int32_t op_ret = -1;
|
||||
int32_t op_errno = 0;
|
||||
|
||||
@ -125,15 +137,34 @@ afr_access (call_frame_t *frame, xlator_t *this,
|
||||
|
||||
ALLOC_OR_GOTO (local, afr_local_t, out);
|
||||
|
||||
call_child = afr_first_up_child (priv);
|
||||
if (call_child == -1) {
|
||||
op_errno = ENOTCONN;
|
||||
gf_log (this->name, GF_LOG_ERROR,
|
||||
"no child is up :(");
|
||||
goto out;
|
||||
}
|
||||
ret = inode_ctx_get (loc->inode, this,
|
||||
&ctx);
|
||||
if (ret < 0) {
|
||||
op_errno = EINVAL;
|
||||
gf_log (this->name, GF_LOG_ERROR,
|
||||
"inode ctx not set!");
|
||||
goto out;
|
||||
}
|
||||
|
||||
inode_ctx = (afr_inode_ctx_t *)(long) ctx;
|
||||
|
||||
if (inode_ctx->read_child >= 0) {
|
||||
call_child = inode_ctx->read_child;
|
||||
|
||||
local->cont.access.last_tried = -1;
|
||||
|
||||
} else {
|
||||
call_child = afr_first_up_child (priv);
|
||||
if (call_child == -1) {
|
||||
op_errno = ENOTCONN;
|
||||
gf_log (this->name, GF_LOG_ERROR,
|
||||
"no child is up :(");
|
||||
goto out;
|
||||
}
|
||||
|
||||
local->cont.access.last_tried = call_child;
|
||||
}
|
||||
|
||||
local->cont.access.last_tried = call_child;
|
||||
loc_copy (&local->loc, loc);
|
||||
local->cont.access.mask = mask;
|
||||
|
||||
@ -164,16 +195,15 @@ afr_stat_cbk (call_frame_t *frame, void *cookie,
|
||||
afr_local_t * local = NULL;
|
||||
xlator_t ** children = NULL;
|
||||
|
||||
int deitransform_child = -1;
|
||||
|
||||
int unwind = 1;
|
||||
int last_tried = -1;
|
||||
int this_try = -1;
|
||||
int read_child = -1;
|
||||
|
||||
priv = this->private;
|
||||
children = priv->children;
|
||||
|
||||
deitransform_child = (long) cookie;
|
||||
read_child = (long) cookie;
|
||||
|
||||
local = frame->local;
|
||||
|
||||
@ -186,15 +216,15 @@ afr_stat_cbk (call_frame_t *frame, void *cookie,
|
||||
}
|
||||
this_try = ++local->cont.stat.last_tried;
|
||||
|
||||
if (this_try == deitransform_child) {
|
||||
if (this_try == read_child) {
|
||||
goto retry;
|
||||
}
|
||||
|
||||
unwind = 0;
|
||||
|
||||
STACK_WIND_COOKIE (frame, afr_stat_cbk,
|
||||
(void *) (long) deitransform_child,
|
||||
children[this_try],
|
||||
(void *) (long) read_child,
|
||||
children[this_try],
|
||||
children[this_try]->fops->stat,
|
||||
&local->loc);
|
||||
}
|
||||
@ -219,6 +249,10 @@ afr_stat (call_frame_t *frame, xlator_t *this,
|
||||
afr_local_t * local = NULL;
|
||||
xlator_t ** children = NULL;
|
||||
|
||||
afr_inode_ctx_t * inode_ctx = NULL;
|
||||
uint64_t ctx;
|
||||
int ret = 0;
|
||||
|
||||
int call_child = 0;
|
||||
|
||||
int32_t op_ret = -1;
|
||||
@ -237,14 +271,36 @@ afr_stat (call_frame_t *frame, xlator_t *this,
|
||||
|
||||
frame->local = local;
|
||||
|
||||
call_child = afr_deitransform (loc->inode->ino, priv->child_count);
|
||||
ret = inode_ctx_get (loc->inode, this,
|
||||
&ctx);
|
||||
if (ret < 0) {
|
||||
op_errno = EINVAL;
|
||||
gf_log (this->name, GF_LOG_ERROR,
|
||||
"inode ctx not set!");
|
||||
goto out;
|
||||
}
|
||||
|
||||
inode_ctx = (afr_inode_ctx_t *)(long) ctx;
|
||||
|
||||
if (inode_ctx->read_child >= 0) {
|
||||
call_child = inode_ctx->read_child;
|
||||
|
||||
local->cont.stat.last_tried = -1;
|
||||
|
||||
} else {
|
||||
call_child = afr_first_up_child (priv);
|
||||
if (call_child == -1) {
|
||||
op_errno = ENOTCONN;
|
||||
gf_log (this->name, GF_LOG_ERROR,
|
||||
"no child is up :(");
|
||||
goto out;
|
||||
}
|
||||
|
||||
local->cont.stat.last_tried = call_child;
|
||||
}
|
||||
|
||||
loc_copy (&local->loc, loc);
|
||||
|
||||
/*
|
||||
if stat fails from the deitranform'd child, we try
|
||||
all children starting with the first one
|
||||
*/
|
||||
local->cont.stat.last_tried = -1;
|
||||
local->cont.stat.ino = loc->inode->ino;
|
||||
|
||||
STACK_WIND_COOKIE (frame, afr_stat_cbk, (void *) (long) call_child,
|
||||
@ -275,19 +331,18 @@ afr_fstat_cbk (call_frame_t *frame, void *cookie,
|
||||
afr_local_t * local = NULL;
|
||||
xlator_t ** children = NULL;
|
||||
|
||||
int deitransform_child = -1;
|
||||
|
||||
int unwind = 1;
|
||||
int last_tried = -1;
|
||||
int this_try = -1;
|
||||
int read_child = -1;
|
||||
|
||||
priv = this->private;
|
||||
children = priv->children;
|
||||
|
||||
deitransform_child = (long) cookie;
|
||||
|
||||
local = frame->local;
|
||||
|
||||
read_child = (long) cookie;
|
||||
|
||||
if (op_ret == -1) {
|
||||
retry:
|
||||
last_tried = local->cont.fstat.last_tried;
|
||||
@ -297,20 +352,15 @@ afr_fstat_cbk (call_frame_t *frame, void *cookie,
|
||||
}
|
||||
this_try = ++local->cont.fstat.last_tried;
|
||||
|
||||
if (this_try == deitransform_child) {
|
||||
/*
|
||||
skip the deitransform'd child since if we are here
|
||||
we must have already tried that child
|
||||
*/
|
||||
if (this_try == read_child) {
|
||||
goto retry;
|
||||
}
|
||||
|
||||
|
||||
unwind = 0;
|
||||
|
||||
STACK_WIND_COOKIE (frame, afr_fstat_cbk,
|
||||
(void *) (long) deitransform_child,
|
||||
children[this_try],
|
||||
(void *) (long) read_child,
|
||||
children[this_try],
|
||||
children[this_try]->fops->fstat,
|
||||
local->fd);
|
||||
}
|
||||
@ -337,6 +387,10 @@ afr_fstat (call_frame_t *frame, xlator_t *this,
|
||||
|
||||
int call_child = 0;
|
||||
|
||||
afr_inode_ctx_t * inode_ctx = NULL;
|
||||
uint64_t ctx;
|
||||
int ret = 0;
|
||||
|
||||
int32_t op_ret = -1;
|
||||
int32_t op_errno = 0;
|
||||
|
||||
@ -356,13 +410,35 @@ afr_fstat (call_frame_t *frame, xlator_t *this,
|
||||
|
||||
VALIDATE_OR_GOTO (fd->inode, out);
|
||||
|
||||
call_child = afr_deitransform (fd->inode->ino, priv->child_count);
|
||||
ret = inode_ctx_get (fd->inode, this,
|
||||
&ctx);
|
||||
|
||||
if (ret < 0) {
|
||||
op_errno = EINVAL;
|
||||
gf_log (this->name, GF_LOG_ERROR,
|
||||
"inode ctx not set!");
|
||||
goto out;
|
||||
}
|
||||
|
||||
inode_ctx = (afr_inode_ctx_t *)(long) ctx;
|
||||
|
||||
if (inode_ctx->read_child >= 0) {
|
||||
call_child = inode_ctx->read_child;
|
||||
|
||||
local->cont.fstat.last_tried = -1;
|
||||
} else {
|
||||
call_child = afr_first_up_child (priv);
|
||||
|
||||
if (call_child == -1) {
|
||||
op_errno = ENOTCONN;
|
||||
gf_log (this->name, GF_LOG_ERROR,
|
||||
"no child is up :(");
|
||||
goto out;
|
||||
}
|
||||
|
||||
local->cont.fstat.last_tried = call_child;
|
||||
}
|
||||
|
||||
/*
|
||||
if fstat fails from the deitranform'd child, we try
|
||||
all children starting with the first one
|
||||
*/
|
||||
local->cont.fstat.last_tried = -1;
|
||||
local->cont.fstat.ino = fd->inode->ino;
|
||||
local->fd = fd_ref (fd);
|
||||
|
||||
@ -396,13 +472,17 @@ afr_readlink_cbk (call_frame_t *frame, void *cookie,
|
||||
int unwind = 1;
|
||||
int last_tried = -1;
|
||||
int this_try = -1;
|
||||
int read_child = -1;
|
||||
|
||||
priv = this->private;
|
||||
children = priv->children;
|
||||
|
||||
local = frame->local;
|
||||
|
||||
read_child = (long) cookie;
|
||||
|
||||
if (op_ret == -1) {
|
||||
retry:
|
||||
last_tried = local->cont.readlink.last_tried;
|
||||
|
||||
if (all_tried (last_tried, priv->child_count)) {
|
||||
@ -410,10 +490,14 @@ afr_readlink_cbk (call_frame_t *frame, void *cookie,
|
||||
}
|
||||
this_try = ++local->cont.readlink.last_tried;
|
||||
|
||||
if (this_try == read_child) {
|
||||
goto retry;
|
||||
}
|
||||
|
||||
unwind = 0;
|
||||
STACK_WIND_COOKIE (frame, afr_readlink_cbk,
|
||||
(void *) (long) this_try,
|
||||
children[this_try],
|
||||
(void *) (long) read_child,
|
||||
children[this_try],
|
||||
children[this_try]->fops->readlink,
|
||||
&local->loc,
|
||||
local->cont.readlink.size);
|
||||
@ -437,6 +521,10 @@ afr_readlink (call_frame_t *frame, xlator_t *this,
|
||||
int call_child = 0;
|
||||
afr_local_t *local = NULL;
|
||||
|
||||
afr_inode_ctx_t * inode_ctx = NULL;
|
||||
uint64_t ctx;
|
||||
int ret = 0;
|
||||
|
||||
int32_t op_ret = -1;
|
||||
int32_t op_errno = 0;
|
||||
|
||||
@ -453,15 +541,35 @@ afr_readlink (call_frame_t *frame, xlator_t *this,
|
||||
|
||||
frame->local = local;
|
||||
|
||||
call_child = afr_first_up_child (priv);
|
||||
if (call_child == -1) {
|
||||
op_errno = ENOTCONN;
|
||||
gf_log (this->name, GF_LOG_ERROR,
|
||||
"no child is up :(");
|
||||
goto out;
|
||||
}
|
||||
ret = inode_ctx_get (loc->inode, this,
|
||||
&ctx);
|
||||
if (ret < 0) {
|
||||
op_errno = EINVAL;
|
||||
gf_log (this->name, GF_LOG_ERROR,
|
||||
"inode ctx not set!");
|
||||
goto out;
|
||||
}
|
||||
|
||||
inode_ctx = (afr_inode_ctx_t *)(long) ctx;
|
||||
|
||||
if (inode_ctx->read_child >= 0) {
|
||||
call_child = inode_ctx->read_child;
|
||||
|
||||
local->cont.readlink.last_tried = -1;
|
||||
|
||||
} else {
|
||||
call_child = afr_first_up_child (priv);
|
||||
|
||||
if (call_child == -1) {
|
||||
op_errno = ENOTCONN;
|
||||
gf_log (this->name, GF_LOG_ERROR,
|
||||
"no child is up :(");
|
||||
goto out;
|
||||
}
|
||||
|
||||
local->cont.readlink.last_tried = call_child;
|
||||
}
|
||||
|
||||
local->cont.readlink.last_tried = call_child;
|
||||
loc_copy (&local->loc, loc);
|
||||
local->cont.readlink.size = size;
|
||||
|
||||
@ -495,13 +603,17 @@ afr_getxattr_cbk (call_frame_t *frame, void *cookie,
|
||||
int unwind = 1;
|
||||
int last_tried = -1;
|
||||
int this_try = -1;
|
||||
int read_child = -1;
|
||||
|
||||
priv = this->private;
|
||||
children = priv->children;
|
||||
|
||||
local = frame->local;
|
||||
|
||||
read_child = (long) cookie;
|
||||
|
||||
if (op_ret == -1) {
|
||||
retry:
|
||||
last_tried = local->cont.getxattr.last_tried;
|
||||
|
||||
if (all_tried (last_tried, priv->child_count)) {
|
||||
@ -509,10 +621,14 @@ afr_getxattr_cbk (call_frame_t *frame, void *cookie,
|
||||
}
|
||||
this_try = ++local->cont.getxattr.last_tried;
|
||||
|
||||
if (this_try == read_child) {
|
||||
goto retry;
|
||||
}
|
||||
|
||||
unwind = 0;
|
||||
STACK_WIND_COOKIE (frame, afr_getxattr_cbk,
|
||||
(void *) (long) this_try,
|
||||
children[this_try],
|
||||
(void *) (long) read_child,
|
||||
children[this_try],
|
||||
children[this_try]->fops->getxattr,
|
||||
&local->loc,
|
||||
local->cont.getxattr.name);
|
||||
@ -536,6 +652,10 @@ afr_getxattr (call_frame_t *frame, xlator_t *this,
|
||||
int call_child = 0;
|
||||
afr_local_t * local = NULL;
|
||||
|
||||
afr_inode_ctx_t * inode_ctx = NULL;
|
||||
uint64_t ctx;
|
||||
int ret = 0;
|
||||
|
||||
int32_t op_ret = -1;
|
||||
int32_t op_errno = 0;
|
||||
|
||||
@ -551,15 +671,34 @@ afr_getxattr (call_frame_t *frame, xlator_t *this,
|
||||
ALLOC_OR_GOTO (local, afr_local_t, out);
|
||||
frame->local = local;
|
||||
|
||||
call_child = afr_first_up_child (priv);
|
||||
if (call_child == -1) {
|
||||
op_errno = ENOTCONN;
|
||||
gf_log (this->name, GF_LOG_ERROR,
|
||||
"no child is up :(");
|
||||
goto out;
|
||||
}
|
||||
ret = inode_ctx_get (loc->inode, this, &ctx);
|
||||
|
||||
if (ret < 0) {
|
||||
op_errno = EINVAL;
|
||||
gf_log (this->name, GF_LOG_ERROR,
|
||||
"inode ctx not set!");
|
||||
goto out;
|
||||
}
|
||||
|
||||
inode_ctx = (afr_inode_ctx_t *)(long) ctx;
|
||||
|
||||
if (inode_ctx->read_child >= 0) {
|
||||
call_child = inode_ctx->read_child;
|
||||
|
||||
local->cont.getxattr.last_tried = -1;
|
||||
} else {
|
||||
call_child = afr_first_up_child (priv);
|
||||
|
||||
if (call_child == -1) {
|
||||
op_errno = ENOTCONN;
|
||||
gf_log (this->name, GF_LOG_ERROR,
|
||||
"no child is up :(");
|
||||
goto out;
|
||||
}
|
||||
|
||||
local->cont.getxattr.last_tried = call_child;
|
||||
}
|
||||
|
||||
local->cont.getxattr.last_tried = call_child;
|
||||
loc_copy (&local->loc, loc);
|
||||
if (name)
|
||||
local->cont.getxattr.name = strdup (name);
|
||||
@ -584,7 +723,7 @@ out:
|
||||
|
||||
/**
|
||||
* read algorithm:
|
||||
*
|
||||
*
|
||||
* if the user has specified a read subvolume, use it
|
||||
* otherwise -
|
||||
* use the inode number to hash it to one of the subvolumes, and
|
||||
@ -593,7 +732,7 @@ out:
|
||||
* if any of the above read's fail, try the children in sequence
|
||||
* beginning at the beginning
|
||||
*/
|
||||
|
||||
|
||||
int32_t
|
||||
afr_readv_cbk (call_frame_t *frame, void *cookie,
|
||||
xlator_t *this, int32_t op_ret, int32_t op_errno,
|
||||
@ -605,7 +744,8 @@ afr_readv_cbk (call_frame_t *frame, void *cookie,
|
||||
|
||||
int unwind = 1;
|
||||
int last_tried = -1;
|
||||
int this_try = -1;
|
||||
int this_try = -1;
|
||||
int read_child = -1;
|
||||
|
||||
VALIDATE_OR_GOTO (frame, out);
|
||||
VALIDATE_OR_GOTO (this, out);
|
||||
@ -618,6 +758,8 @@ afr_readv_cbk (call_frame_t *frame, void *cookie,
|
||||
|
||||
local = frame->local;
|
||||
|
||||
read_child = (long) cookie;
|
||||
|
||||
if (op_ret == -1) {
|
||||
retry:
|
||||
last_tried = local->cont.readv.last_tried;
|
||||
@ -627,8 +769,8 @@ afr_readv_cbk (call_frame_t *frame, void *cookie,
|
||||
}
|
||||
this_try = ++local->cont.readv.last_tried;
|
||||
|
||||
if (this_try == priv->read_child) {
|
||||
/*
|
||||
if (this_try == read_child) {
|
||||
/*
|
||||
skip the read child since if we are here
|
||||
we must have already tried that child
|
||||
*/
|
||||
@ -638,8 +780,8 @@ afr_readv_cbk (call_frame_t *frame, void *cookie,
|
||||
unwind = 0;
|
||||
|
||||
STACK_WIND_COOKIE (frame, afr_readv_cbk,
|
||||
(void *) (long) this_try,
|
||||
children[this_try],
|
||||
(void *) (long) read_child,
|
||||
children[this_try],
|
||||
children[this_try]->fops->readv,
|
||||
local->fd, local->cont.readv.size,
|
||||
local->cont.readv.offset);
|
||||
@ -662,6 +804,10 @@ afr_readv (call_frame_t *frame, xlator_t *this,
|
||||
afr_local_t * local = NULL;
|
||||
xlator_t ** children = NULL;
|
||||
|
||||
afr_inode_ctx_t * inode_ctx = NULL;
|
||||
uint64_t ctx;
|
||||
int ret = 0;
|
||||
|
||||
int call_child = 0;
|
||||
|
||||
int32_t op_ret = -1;
|
||||
@ -679,15 +825,28 @@ afr_readv (call_frame_t *frame, xlator_t *this,
|
||||
|
||||
frame->local = local;
|
||||
|
||||
if (priv->read_child != -1) {
|
||||
call_child = priv->read_child;
|
||||
ret = inode_ctx_get (fd->inode, this,
|
||||
&ctx);
|
||||
|
||||
/*
|
||||
if (ret < 0) {
|
||||
op_errno = EINVAL;
|
||||
gf_log (this->name, GF_LOG_ERROR,
|
||||
"inode ctx not set!");
|
||||
goto out;
|
||||
}
|
||||
|
||||
inode_ctx = (afr_inode_ctx_t *)(long) ctx;
|
||||
|
||||
if (inode_ctx->read_child >= 0) {
|
||||
call_child = inode_ctx->read_child;
|
||||
|
||||
/*
|
||||
if read fails from the read child, we try
|
||||
all children starting with the first one
|
||||
*/
|
||||
local->cont.readv.last_tried = -1;
|
||||
} else {
|
||||
local->cont.readv.last_tried = -1;
|
||||
|
||||
} else {
|
||||
call_child = afr_first_up_child (priv);
|
||||
if (call_child == -1) {
|
||||
op_errno = ENOTCONN;
|
||||
|
@ -345,10 +345,13 @@ afr_lookup_cbk (call_frame_t *frame, void *cookie,
|
||||
struct stat * lookup_buf = NULL;
|
||||
int call_count = -1;
|
||||
int child_index = -1;
|
||||
int prev_child_index = -1;
|
||||
|
||||
uint32_t open_fd_count = 0;
|
||||
int ret = 0;
|
||||
|
||||
afr_inode_ctx_t * inode_ctx = NULL;
|
||||
uint64_t ctx;
|
||||
|
||||
child_index = (long) cookie;
|
||||
priv = this->private;
|
||||
|
||||
@ -409,7 +412,52 @@ afr_lookup_cbk (call_frame_t *frame, void *cookie,
|
||||
lookup_buf->st_ino = afr_itransform (buf->st_ino,
|
||||
priv->child_count,
|
||||
child_index);
|
||||
|
||||
ret = inode_ctx_get (local->cont.lookup.inode, this,
|
||||
&ctx);
|
||||
|
||||
inode_ctx = (afr_inode_ctx_t *)(long) ctx;
|
||||
|
||||
if (priv->read_child >= 0) {
|
||||
inode_ctx->read_child = priv->read_child;
|
||||
} else {
|
||||
inode_ctx->read_child = child_index;
|
||||
}
|
||||
|
||||
} else {
|
||||
if ((local->op_ret == 0)
|
||||
&& (child_index == local->read_child_index)) {
|
||||
|
||||
/*
|
||||
lookup has succeeded on the read child.
|
||||
So use its inode number
|
||||
*/
|
||||
|
||||
local->op_ret = op_ret;
|
||||
|
||||
if (local->cont.lookup.xattr)
|
||||
dict_unref (local->cont.lookup.xattr);
|
||||
|
||||
local->cont.lookup.inode = inode;
|
||||
local->cont.lookup.xattr = dict_ref (xattr);
|
||||
|
||||
*lookup_buf = *buf;
|
||||
lookup_buf->st_ino = afr_itransform (buf->st_ino,
|
||||
priv->child_count,
|
||||
child_index);
|
||||
|
||||
ret = inode_ctx_get (local->cont.lookup.inode, this,
|
||||
&ctx);
|
||||
|
||||
inode_ctx = (afr_inode_ctx_t *)(long) ctx;
|
||||
|
||||
if (priv->read_child >= 0) {
|
||||
inode_ctx->read_child = priv->read_child;
|
||||
} else {
|
||||
inode_ctx->read_child = local->read_child_index;
|
||||
}
|
||||
}
|
||||
|
||||
if (FILETYPE_DIFFERS (buf, lookup_buf)) {
|
||||
/* mismatching filetypes with same name
|
||||
-- Govinda !! GOvinda !!!
|
||||
@ -431,15 +479,6 @@ afr_lookup_cbk (call_frame_t *frame, void *cookie,
|
||||
&& S_ISREG (buf->st_mode)) {
|
||||
local->need_data_self_heal = 1;
|
||||
}
|
||||
|
||||
prev_child_index = afr_deitransform_orig (lookup_buf->st_ino,
|
||||
priv->child_count);
|
||||
if (child_index < prev_child_index) {
|
||||
*lookup_buf = *buf;
|
||||
lookup_buf->st_ino = afr_itransform (buf->st_ino,
|
||||
priv->child_count,
|
||||
child_index);
|
||||
}
|
||||
}
|
||||
|
||||
local->success_count++;
|
||||
@ -465,9 +504,13 @@ unlock:
|
||||
}
|
||||
|
||||
if (local->success_count) {
|
||||
/* check for govinda_gOvinda case in previous lookup */
|
||||
if (!inode_ctx_get (local->cont.lookup.inode,
|
||||
this, NULL))
|
||||
/* check for split-brain case in previous lookup */
|
||||
ret = inode_ctx_get (local->cont.lookup.inode, this,
|
||||
&ctx);
|
||||
|
||||
inode_ctx = (afr_inode_ctx_t *)(long) ctx;
|
||||
|
||||
if (inode_ctx->split_brain)
|
||||
local->need_data_self_heal = 1;
|
||||
}
|
||||
|
||||
@ -544,7 +587,12 @@ afr_lookup (call_frame_t *frame, xlator_t *this,
|
||||
}
|
||||
}
|
||||
|
||||
local->reval_child_index = 0;
|
||||
LOCK (&priv->read_child_lock);
|
||||
{
|
||||
local->read_child_index = (++priv->read_child_rr)
|
||||
% (priv->child_count);
|
||||
}
|
||||
UNLOCK (&priv->read_child_lock);
|
||||
|
||||
local->call_count = priv->child_count;
|
||||
|
||||
@ -2187,17 +2235,12 @@ init (xlator_t *this)
|
||||
trav = trav->next;
|
||||
}
|
||||
|
||||
/* XXX: return inode numbers from 1st subvolume till
|
||||
afr supports read-subvolume based on inode's ctx
|
||||
(and not itransform) for this reason afr_deitransform()
|
||||
returns 0 always
|
||||
*/
|
||||
priv->read_child = 0;
|
||||
|
||||
priv->wait_count = 1;
|
||||
|
||||
priv->child_count = child_count;
|
||||
|
||||
LOCK_INIT (&priv->lock);
|
||||
LOCK_INIT (&priv->read_child_lock);
|
||||
|
||||
priv->child_up = CALLOC (sizeof (unsigned char), child_count);
|
||||
if (!priv->child_up) {
|
||||
|
@ -35,6 +35,9 @@ typedef struct _afr_private {
|
||||
gf_lock_t lock; /* to guard access to child_count, etc */
|
||||
unsigned int child_count; /* total number of children */
|
||||
|
||||
unsigned int read_child_rr; /* round-robin index of the read_child */
|
||||
gf_lock_t read_child_lock; /* lock to protect above */
|
||||
|
||||
xlator_t **children;
|
||||
|
||||
unsigned char *child_up;
|
||||
@ -48,7 +51,7 @@ typedef struct _afr_private {
|
||||
gf_boolean_t metadata_change_log; /* on/off */
|
||||
gf_boolean_t entry_change_log; /* on/off */
|
||||
|
||||
unsigned int read_child; /* read-subvolume */
|
||||
int read_child; /* read-subvolume */
|
||||
unsigned int favorite_child; /* subvolume to be preferred in resolving
|
||||
split-brain cases */
|
||||
|
||||
@ -110,7 +113,8 @@ typedef struct _afr_local {
|
||||
unsigned int need_data_self_heal;
|
||||
unsigned int govinda_gOvinda;
|
||||
|
||||
unsigned int reval_child_index;
|
||||
unsigned int read_child_index;
|
||||
|
||||
int32_t op_ret;
|
||||
int32_t op_errno;
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user