afr: perform replace-brick in a synctask
Problem: replace-brick setxattr is not performed inside a synctask. This can lead to hangs if the setxattr is executed by epoll thread, as the epoll thread will be waiting for replies to come where as epoll thread is the thread that needs to epoll_ctl for reading from socket and listen. Fix: Move replace-brick to synctask to prevent epoll thread hang. This patch is in line with the fix performed in http://review.gluster.org/#/c/12163/ Change-Id: I6a71038bb7819f9e98f7098b18a6cee34805868f BUG: 1262345 Signed-off-by: Ravishankar N <ravishankar@redhat.com> Reviewed-on: http://review.gluster.org/12169 Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com> Tested-by: Gluster Build System <jenkins@build.gluster.com> Tested-by: NetBSD Build System <jenkins@build.gluster.org>
This commit is contained in:
parent
63e3806ba3
commit
22882a9b81
@ -202,6 +202,7 @@ function is_bad_test ()
|
||||
./tests/basic/tier/tier.t \
|
||||
./tests/bugs/glusterd/bug-1238706-daemons-stop-on-peer-cleanup.t \
|
||||
./tests/bugs/glusterd/bug-948686.t \
|
||||
./tests/geo-rep/georep-basic-dr-rsync.t \
|
||||
; do
|
||||
[ x"$name" = x"$bt" ] && return 0 # bash: zero means true/success
|
||||
done
|
||||
|
@ -1134,27 +1134,45 @@ out:
|
||||
}
|
||||
|
||||
int
|
||||
_afr_handle_replace_brick (xlator_t *this, call_frame_t *frame, loc_t *loc,
|
||||
int rb_index)
|
||||
_afr_handle_replace_brick_cbk (int ret, call_frame_t *frame, void *opaque)
|
||||
{
|
||||
afr_replace_brick_args_t *data = NULL;
|
||||
|
||||
data = opaque;
|
||||
loc_wipe (&data->loc);
|
||||
GF_FREE (data);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
_afr_handle_replace_brick (void *opaque)
|
||||
{
|
||||
|
||||
afr_local_t *local = NULL;
|
||||
afr_private_t *priv = NULL;
|
||||
int rb_index = -1;
|
||||
int ret = -1;
|
||||
int op_errno = ENOMEM;
|
||||
call_frame_t *frame = NULL;
|
||||
xlator_t *this = NULL;
|
||||
afr_replace_brick_args_t *data = NULL;
|
||||
|
||||
data = opaque;
|
||||
frame = data->frame;
|
||||
rb_index = data->rb_index;
|
||||
this = frame->this;
|
||||
priv = this->private;
|
||||
|
||||
local = AFR_FRAME_INIT (frame, op_errno);
|
||||
if (!local)
|
||||
goto out;
|
||||
|
||||
loc_copy (&local->loc, loc);
|
||||
loc_copy (&local->loc, &data->loc);
|
||||
|
||||
gf_log (this->name, GF_LOG_DEBUG, "Child being replaced is : %s",
|
||||
priv->children[rb_index]->name);
|
||||
|
||||
ret = _afr_handle_replace_brick_type (this, frame, loc, rb_index,
|
||||
ret = _afr_handle_replace_brick_type (this, frame, &local->loc, rb_index,
|
||||
AFR_METADATA_TRANSACTION);
|
||||
if (ret) {
|
||||
op_errno = -ret;
|
||||
@ -1167,7 +1185,7 @@ _afr_handle_replace_brick (xlator_t *this, call_frame_t *frame, loc_t *loc,
|
||||
local->pending = NULL;
|
||||
local->xdata_req = NULL;
|
||||
|
||||
ret = _afr_handle_replace_brick_type (this, frame, loc, rb_index,
|
||||
ret = _afr_handle_replace_brick_type (this, frame, &local->loc, rb_index,
|
||||
AFR_ENTRY_TRANSACTION);
|
||||
if (ret) {
|
||||
op_errno = -ret;
|
||||
@ -1394,32 +1412,60 @@ afr_handle_replace_brick (xlator_t *this, call_frame_t *frame, loc_t *loc,
|
||||
{
|
||||
int ret = -1;
|
||||
int rb_index = -1;
|
||||
int op_errno = EPERM;
|
||||
char *replace_brick = NULL;
|
||||
afr_replace_brick_args_t *data = NULL;
|
||||
|
||||
ret = dict_get_str (dict, GF_AFR_REPLACE_BRICK, &replace_brick);
|
||||
|
||||
if (!ret) {
|
||||
if (frame->root->pid != GF_CLIENT_PID_AFR_SELF_HEALD) {
|
||||
gf_log (this->name, GF_LOG_ERROR, "'%s' is an internal"
|
||||
" extended attribute : %s.",
|
||||
GF_AFR_REPLACE_BRICK, strerror (EPERM));
|
||||
ret = 1;
|
||||
goto out;
|
||||
}
|
||||
rb_index = afr_get_child_index_from_name (this, replace_brick);
|
||||
|
||||
if (rb_index < 0)
|
||||
if (rb_index < 0) {
|
||||
/* Didn't belong to this replica pair
|
||||
* Just do a no-op
|
||||
*/
|
||||
AFR_STACK_UNWIND (setxattr, frame, 0, 0, NULL);
|
||||
else
|
||||
_afr_handle_replace_brick (this, frame, loc, rb_index);
|
||||
return 0;
|
||||
} else {
|
||||
data = GF_CALLOC (1, sizeof (*data),
|
||||
gf_afr_mt_replace_brick_t);
|
||||
if (!data) {
|
||||
ret = 1;
|
||||
op_errno = ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
data->frame = frame;
|
||||
loc_copy (&data->loc, loc);
|
||||
data->rb_index = rb_index;
|
||||
ret = synctask_new (this->ctx->env,
|
||||
_afr_handle_replace_brick,
|
||||
_afr_handle_replace_brick_cbk,
|
||||
NULL, data);
|
||||
if (ret) {
|
||||
gf_msg (this->name, GF_LOG_ERROR, 0,
|
||||
AFR_MSG_REPLACE_BRICK_FAILED,
|
||||
"Failed to create synctask. Unable to "
|
||||
"perform replace-brick.");
|
||||
ret = 1;
|
||||
op_errno = ENOMEM;
|
||||
loc_wipe (&data->loc);
|
||||
GF_FREE (data);
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
ret = 0;
|
||||
}
|
||||
out:
|
||||
if (ret == 1) {
|
||||
gf_log (this->name, GF_LOG_ERROR, "'%s' is an internal"
|
||||
" extended attribute : %s.",
|
||||
GF_AFR_REPLACE_BRICK, strerror (EPERM));
|
||||
AFR_STACK_UNWIND (setxattr, frame, -1, EPERM, NULL);
|
||||
AFR_STACK_UNWIND (setxattr, frame, -1, op_errno, NULL);
|
||||
ret = 0;
|
||||
}
|
||||
return ret;
|
||||
|
@ -45,6 +45,7 @@ enum gf_afr_mem_types_ {
|
||||
gf_afr_mt_subvol_healer_t,
|
||||
gf_afr_mt_spbc_timeout_t,
|
||||
gf_afr_mt_spb_status_t,
|
||||
gf_afr_mt_replace_brick_t,
|
||||
gf_afr_mt_end
|
||||
};
|
||||
#endif
|
||||
|
@ -40,7 +40,7 @@
|
||||
*/
|
||||
|
||||
#define GLFS_COMP_BASE_AFR GLFS_MSGID_COMP_AFR
|
||||
#define GLFS_NUM_MESSAGES 38
|
||||
#define GLFS_NUM_MESSAGES 39
|
||||
#define GLFS_MSGID_END (GLFS_COMP_BASE_AFR + GLFS_NUM_MESSAGES + 1)
|
||||
|
||||
#define glfs_msg_start_x GLFS_COMP_BASE_AFR, "Invalid: Start of messages"
|
||||
@ -327,7 +327,6 @@
|
||||
*/
|
||||
#define AFR_MSG_SELF_HEAL_FAILED (GLFS_COMP_BASE_AFR + 37)
|
||||
|
||||
|
||||
/*!
|
||||
* @messageid 108038
|
||||
* @diagnosis
|
||||
@ -335,6 +334,13 @@
|
||||
*/
|
||||
#define AFR_MSG_SPLIT_BRAIN_STATUS (GLFS_COMP_BASE_AFR + 38)
|
||||
|
||||
/*!
|
||||
* @messageid 108039
|
||||
* @diagnosis
|
||||
* @recommendedaction
|
||||
*/
|
||||
#define AFR_MSG_REPLACE_BRICK_FAILED (GLFS_COMP_BASE_AFR + 39)
|
||||
|
||||
|
||||
#define glfs_msg_end_x GLFS_MSGID_END, "Invalid: End of messages"
|
||||
#endif /* !_AFR_MESSAGES_H_ */
|
||||
|
@ -755,6 +755,12 @@ typedef struct afr_spb_status {
|
||||
loc_t *loc;
|
||||
} afr_spb_status_t;
|
||||
|
||||
typedef struct afr_replace_brick_args {
|
||||
call_frame_t *frame;
|
||||
loc_t loc;
|
||||
int rb_index;
|
||||
} afr_replace_brick_args_t;
|
||||
|
||||
typedef struct afr_read_subvol_args {
|
||||
ia_type_t ia_type;
|
||||
uuid_t gfid;
|
||||
|
Loading…
x
Reference in New Issue
Block a user