afr: propagate correct errno for fop failures in arbiter
Problem: If quorum is not met in fop cbk, arbiter sends an ENOTCONN error to the upper xlators. In a VM workload with sharding enabled, this was leading to the VM pausing when replace-brick was performed as described in the BZ. Fix: Move the fop cbk arbitration logic to afr_handle_quorum() because in normal replica volumes, that is the function that has the quorum and errno checks in the fop cbk path before doing a post-op. Thanks to Pranith for suggesting this approach. Change-Id: Ie6315db30c5e36326b71b90a01da824109e86796 BUG: 1449610 Signed-off-by: Ravishankar N <ravishankar@redhat.com> Reviewed-on: https://review.gluster.org/17235 Smoke: Gluster Build System <jenkins@build.gluster.org> Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com> NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org> CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
This commit is contained in:
parent
64f41b962b
commit
93c850dd2a
@ -183,7 +183,6 @@ __afr_dir_write_finalize (call_frame_t *frame, xlator_t *this)
|
||||
}
|
||||
}
|
||||
|
||||
afr_txn_arbitrate_fop_cbk (frame, this);
|
||||
}
|
||||
|
||||
|
||||
|
@ -131,7 +131,6 @@ __afr_inode_write_finalize (call_frame_t *frame, xlator_t *this)
|
||||
}
|
||||
}
|
||||
|
||||
afr_txn_arbitrate_fop_cbk (frame, this);
|
||||
afr_set_in_flight_sb_status (this, local, local->inode);
|
||||
}
|
||||
|
||||
|
@ -304,22 +304,21 @@ afr_compute_pre_op_sources (call_frame_t *frame, xlator_t *this)
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
afr_txn_arbitrate_fop_cbk (call_frame_t *frame, xlator_t *this)
|
||||
gf_boolean_t
|
||||
afr_has_arbiter_fop_cbk_quorum (call_frame_t *frame)
|
||||
{
|
||||
afr_local_t *local = NULL;
|
||||
afr_private_t *priv = NULL;
|
||||
xlator_t *this = NULL;
|
||||
gf_boolean_t fop_failed = _gf_false;
|
||||
unsigned char *pre_op_sources = NULL;
|
||||
int i = 0;
|
||||
|
||||
local = frame->local;
|
||||
this = frame->this;
|
||||
priv = this->private;
|
||||
pre_op_sources = local->transaction.pre_op_sources;
|
||||
|
||||
if (priv->arbiter_count != 1 || local->op_ret < 0)
|
||||
return;
|
||||
|
||||
/* If the fop failed on the brick, it is not a source. */
|
||||
for (i = 0; i < priv->child_count; i++)
|
||||
if (local->transaction.failed_subvols[i])
|
||||
@ -335,12 +334,10 @@ afr_txn_arbitrate_fop_cbk (call_frame_t *frame, xlator_t *this)
|
||||
break;
|
||||
}
|
||||
|
||||
if (fop_failed) {
|
||||
local->op_ret = -1;
|
||||
local->op_errno = ENOTCONN;
|
||||
}
|
||||
if (fop_failed)
|
||||
return _gf_false;
|
||||
|
||||
return;
|
||||
return _gf_true;
|
||||
}
|
||||
|
||||
void
|
||||
@ -807,8 +804,12 @@ afr_handle_quorum (call_frame_t *frame)
|
||||
* no split-brain with the fix. The problem is eliminated completely.
|
||||
*/
|
||||
|
||||
if (afr_has_fop_cbk_quorum (frame))
|
||||
if (priv->arbiter_count) {
|
||||
if (afr_has_arbiter_fop_cbk_quorum (frame))
|
||||
return;
|
||||
} else if (afr_has_fop_cbk_quorum (frame)) {
|
||||
return;
|
||||
}
|
||||
|
||||
for (i = 0; i < priv->child_count; i++) {
|
||||
if (local->transaction.pre_op[i])
|
||||
|
@ -16,8 +16,6 @@
|
||||
void
|
||||
afr_transaction_fop_failed (call_frame_t *frame, xlator_t *this,
|
||||
int child_index);
|
||||
void
|
||||
afr_txn_arbitrate_fop_cbk (call_frame_t *frame, xlator_t *this);
|
||||
|
||||
int
|
||||
afr_lock_server_count (afr_private_t *priv, afr_transaction_type type);
|
||||
|
Loading…
x
Reference in New Issue
Block a user