cluster/afr: mark new entry changelog for create/mknod failures

Problem:
When create/mknod fails on some of the nodes, appropriate pending
data/metadata changelogs are not assigned. This was not considered
to be an issue because entry self-heal would do the assigning of
appropriate changelog after creating new entries. But using
the combination of rebalance and remove brick we can construct a
case where a file with same name and gfid can be created in a dir
with different data and link-to xattr without any changelog.

Fix:
When a create/mknod failure is observed mark the appropriate
changelog on the new file created.

Change-Id: I4c32cbf5594a13fb14deaf97ff30b2fff11cbfd6
BUG: 858212
Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
Reviewed-on: http://review.gluster.org/4207
Tested-by: Gluster Build System <jenkins@build.gluster.com>
Reviewed-by: Anand Avati <avati@redhat.com>
This commit is contained in:
Pranith Kumar K 2012-11-27 12:34:18 +05:30 committed by Anand Avati
parent d1bb60ad79
commit 676b8793b9
5 changed files with 228 additions and 67 deletions

View File

@ -766,6 +766,13 @@ out:
return;
}
void
afr_xattr_array_destroy (dict_t **xattr, unsigned int child_count)
{
afr_reset_xattr (xattr, child_count);
GF_FREE (xattr);
}
void
afr_local_sh_cleanup (afr_local_t *local, xlator_t *this)
{
@ -782,10 +789,7 @@ afr_local_sh_cleanup (afr_local_t *local, xlator_t *this)
if (sh->inode)
inode_unref (sh->inode);
if (sh->xattr) {
afr_reset_xattr (sh->xattr, priv->child_count);
GF_FREE (sh->xattr);
}
afr_xattr_array_destroy (sh->xattr, priv->child_count);
GF_FREE (sh->child_errno);
@ -4233,3 +4237,42 @@ xlator_subvolume_count (xlator_t *this)
i++;
return i;
}
inline gf_boolean_t
afr_is_errno_set (int *child_errno, int child)
{
return child_errno[child];
}
inline gf_boolean_t
afr_is_errno_unset (int *child_errno, int child)
{
return !afr_is_errno_set (child_errno, child);
}
void
afr_prepare_new_entry_pending_matrix (int32_t **pending,
gf_boolean_t (*is_pending) (int *, int),
int *ctx, struct iatt *buf,
unsigned int child_count)
{
int midx = 0;
int idx = 0;
int i = 0;
midx = afr_index_for_transaction_type (AFR_METADATA_TRANSACTION);
if (IA_ISDIR (buf->ia_type))
idx = afr_index_for_transaction_type (AFR_ENTRY_TRANSACTION);
else if (IA_ISREG (buf->ia_type))
idx = afr_index_for_transaction_type (AFR_DATA_TRANSACTION);
else
idx = -1;
for (i = 0; i < child_count; i++) {
if (is_pending (ctx, i)) {
pending[i][midx] = hton32 (1);
if (idx == -1)
continue;
pending[i][idx] = hton32 (1);
}
}
}

View File

@ -106,11 +106,134 @@ __dir_entry_fop_common_cbk (call_frame_t *frame, int child_index,
local->fresh_children[local->success_count] = child_index;
local->success_count++;
local->child_errno[child_index] = 0;
} else {
local->child_errno[child_index] = op_errno;
}
local->op_errno = op_errno;
}
int
afr_mark_new_entry_changelog_cbk (call_frame_t *frame, void *cookie,
xlator_t *this,
int32_t op_ret, int32_t op_errno,
dict_t *xattr, dict_t *xdata)
{
int call_count = 0;
call_count = afr_frame_return (frame);
if (call_count == 0) {
AFR_STACK_DESTROY (frame);
}
return 0;
}
void
afr_mark_new_entry_changelog (call_frame_t *frame, xlator_t *this)
{
call_frame_t *new_frame = NULL;
afr_local_t *local = NULL;
afr_local_t *new_local = NULL;
afr_private_t *priv = NULL;
dict_t **xattr = NULL;
int32_t **changelog = NULL;
int i = 0;
GF_UNUSED int op_errno = 0;
local = frame->local;
priv = this->private;
new_frame = copy_frame (frame);
if (!new_frame) {
goto out;
}
AFR_LOCAL_ALLOC_OR_GOTO (new_frame->local, out);
new_local = new_frame->local;
changelog = afr_matrix_create (priv->child_count, AFR_NUM_CHANGE_LOGS);
if (!changelog)
goto out;
xattr = GF_CALLOC (priv->child_count, sizeof (*xattr),
gf_afr_mt_dict_t);
if (!xattr)
goto out;
for (i = 0; i < priv->child_count; i++) {
if (local->child_errno[i])
continue;
xattr[i] = dict_new ();
if (!xattr[i])
goto out;
}
afr_prepare_new_entry_pending_matrix (changelog,
afr_is_errno_set,
local->child_errno,
&local->cont.dir_fop.buf,
priv->child_count);
new_local->pending = changelog;
uuid_copy (new_local->loc.gfid, local->cont.dir_fop.buf.ia_gfid);
new_local->loc.inode = inode_ref (local->cont.dir_fop.inode);
new_local->call_count = local->success_count;
for (i = 0; i < priv->child_count; i++) {
if (local->child_errno[i])
continue;
afr_set_pending_dict (priv, xattr[i], changelog, i, LOCAL_LAST);
STACK_WIND_COOKIE (new_frame, afr_mark_new_entry_changelog_cbk,
(void *) (long) i, priv->children[i],
priv->children[i]->fops->xattrop,
&new_local->loc, GF_XATTROP_ADD_ARRAY,
xattr[i], NULL);
}
new_frame = NULL;
out:
if (new_frame)
AFR_STACK_DESTROY (new_frame);
afr_xattr_array_destroy (xattr, priv->child_count);
return;
}
gf_boolean_t
afr_is_new_entry_changelog_needed (glusterfs_fop_t fop)
{
glusterfs_fop_t fops[] = {GF_FOP_CREATE, GF_FOP_MKNOD, GF_FOP_NULL};
int i = 0;
for (i = 0; fops[i] != GF_FOP_NULL; i++) {
if (fop == fops[i])
return _gf_true;
}
return _gf_false;
}
void
afr_dir_fop_mark_entry_pending_changelog (call_frame_t *frame, xlator_t *this)
{
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
local = frame->local;
priv = this->private;
if (local->op_ret < 0)
goto out;
if (local->success_count == priv->child_count)
goto out;
if (!afr_is_new_entry_changelog_needed (local->op))
goto out;
afr_mark_new_entry_changelog (frame, this);
out:
local->transaction.resume (frame, this);
}
void
afr_dir_fop_done (call_frame_t *frame, xlator_t *this)
{
@ -129,7 +252,7 @@ afr_dir_fop_done (call_frame_t *frame, xlator_t *this)
local->cont.dir_fop.buf.ia_gfid);
done:
local->transaction.unwind (frame, this);
local->transaction.resume (frame, this);
afr_dir_fop_mark_entry_pending_changelog (frame, this);
}
/* {{{ create */
@ -331,6 +454,7 @@ afr_create (call_frame_t *frame, xlator_t *this,
}
UNLOCK (&priv->read_child_lock);
local->op = GF_FOP_CREATE;
local->cont.create.flags = flags;
local->cont.create.mode = mode;
local->cont.create.fd = fd_ref (fd);
@ -524,6 +648,7 @@ afr_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
}
UNLOCK (&priv->read_child_lock);
local->op = GF_FOP_MKNOD;
local->cont.mknod.mode = mode;
local->cont.mknod.dev = dev;
local->umask = umask;

View File

@ -1018,33 +1018,6 @@ out:
return 0;
}
void
afr_sh_prepare_new_entry_pending_matrix (int32_t **pending,
int *child_errno,
struct iatt *buf,
unsigned int child_count)
{
int midx = 0;
int idx = 0;
int i = 0;
midx = afr_index_for_transaction_type (AFR_METADATA_TRANSACTION);
if (IA_ISDIR (buf->ia_type))
idx = afr_index_for_transaction_type (AFR_ENTRY_TRANSACTION);
else if (IA_ISREG (buf->ia_type))
idx = afr_index_for_transaction_type (AFR_DATA_TRANSACTION);
else
idx = -1;
for (i = 0; i < child_count; i++) {
if (child_errno[i])
continue;
pending[i][midx] = hton32 (1);
if (idx == -1)
continue;
pending[i][idx] = hton32 (1);
}
}
int
afr_sh_entry_impunge_perform_xattrop (call_frame_t *impunge_frame,
xlator_t *this)
@ -1061,10 +1034,11 @@ afr_sh_entry_impunge_perform_xattrop (call_frame_t *impunge_frame,
impunge_sh = &impunge_local->self_heal;
active_src = impunge_sh->active_source;
afr_sh_prepare_new_entry_pending_matrix (impunge_local->pending,
impunge_sh->child_errno,
&impunge_sh->entrybuf,
priv->child_count);
afr_prepare_new_entry_pending_matrix (impunge_local->pending,
afr_is_errno_unset,
impunge_sh->child_errno,
&impunge_sh->entrybuf,
priv->child_count);
xattr = dict_new ();
if (!xattr) {
op_errno = ENOMEM;

View File

@ -203,6 +203,40 @@ __mark_all_success (int32_t *pending[], int child_count,
}
}
void
_set_all_child_errno (int *child_errno, unsigned int child_count)
{
int i = 0;
for (i = 0; i < child_count; i++)
if (child_errno[i] == 0)
child_errno[i] = ENOTCONN;
}
void
afr_transaction_perform_fop (call_frame_t *frame, xlator_t *this)
{
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
local = frame->local;
priv = this->private;
__mark_all_success (local->pending, priv->child_count,
local->transaction.type);
_set_all_child_errno (local->child_errno, priv->child_count);
/* Perform fops with the lk-owner from top xlator.
* Eg: lk-owner of posix-lk and flush should be same,
* flush cant clear the posix-lks without that lk-owner.
*/
afr_save_lk_owner (frame);
frame->root->lk_owner =
local->transaction.main_frame->root->lk_owner;
local->transaction.fop (frame, this);
}
static int
__changelog_enabled (afr_private_t *priv, afr_transaction_type type)
@ -778,18 +812,7 @@ afr_changelog_pre_op_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
(local->op_errno == ENOTSUP)) {
local->transaction.resume (frame, this);
} else {
__mark_all_success (local->pending, priv->child_count,
local->transaction.type);
/* Perform fops with the lk-owner from top xlator.
* Eg: lk-owner of posix-lk and flush should be same,
* flush cant clear the posix-lks without that lk-owner.
*/
afr_save_lk_owner (frame);
frame->root->lk_owner =
local->transaction.main_frame->root->lk_owner;
local->transaction.fop (frame, this);
afr_transaction_perform_fop (frame, this);
}
}
@ -1218,28 +1241,10 @@ afr_lock (call_frame_t *frame, xlator_t *this)
int
afr_internal_lock_finish (call_frame_t *frame, xlator_t *this)
{
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
priv = this->private;
local = frame->local;
if (__fop_changelog_needed (frame, this)) {
afr_changelog_pre_op (frame, this);
} else {
__mark_all_success (local->pending, priv->child_count,
local->transaction.type);
/* Perform fops with the lk-owner from top xlator.
* Eg: lk-owner of posix-lk and flush should be same,
* flush cant clear the posix-lks without that lk-owner.
*/
afr_save_lk_owner (frame);
frame->root->lk_owner =
local->transaction.main_frame->root->lk_owner;
local->transaction.fop (frame, this);
afr_transaction_perform_fop (frame, this);
}
return 0;

View File

@ -1023,6 +1023,20 @@ afr_matrix_cleanup (int32_t **pending, unsigned int m);
int32_t**
afr_matrix_create (unsigned int m, unsigned int n);
gf_boolean_t
afr_is_errno_set (int *child_errno, int child);
gf_boolean_t
afr_is_errno_unset (int *child_errno, int child);
void
afr_prepare_new_entry_pending_matrix (int32_t **pending,
gf_boolean_t (*is_pending) (int *, int),
int *ctx, struct iatt *buf,
unsigned int child_count);
void
afr_xattr_array_destroy (dict_t **xattr, unsigned int child_count);
/*
* Special value indicating we should use the "auto" quorum method instead of
* a fixed value (including zero to turn off quorum enforcement).