cluster/dht: Fix fd check race
There is a another race between the cached subvol being updated in the inode_ctx and the fd being opened on the target. 1. fop1 -> fd1 -> subvol0 2. file migrated from subvol0 to subvol1 and cached_subvol changed to subvol1 in inode_ctx 3. fop2 -> fd1 -> subvol1 [takes new cached subvol] 4. fop2 -> checks fd ctx (fd not open on subvol1) -> opens fd1 on subvol1 5. fop1 -> checks fd ctx (fd not open on subvol0) -> tries to open fd1 on subvol0 -> fails with "No such file on directory". Fix: If dht_fd_open_on_dst fails with ENOENT or ESTALE, wind to old subvol and let the phase1/phase2 checks handle it. Change-Id: I34f8011574a8b72e3bcfe03b0cc4f024b352f225 BUG: 1465075 Signed-off-by: N Balachandran <nbalacha@redhat.com> Reviewed-on: https://review.gluster.org/17731 Smoke: Gluster Build System <jenkins@build.gluster.org> CentOS-regression: Gluster Build System <jenkins@build.gluster.org> Reviewed-by: Raghavendra G <rgowdapp@redhat.com> Reviewed-by: Amar Tumballi <amarts@redhat.com>
This commit is contained in:
parent
a62b16b72b
commit
f7a450c17f
@ -287,10 +287,12 @@ dht_check_and_open_fd_on_subvol_complete (int ret, call_frame_t *frame,
|
||||
glusterfs_fop_t fop = 0;
|
||||
dht_local_t *local = NULL;
|
||||
xlator_t *subvol = NULL;
|
||||
xlator_t *this = NULL;
|
||||
fd_t *fd = NULL;
|
||||
int op_errno = -1;
|
||||
|
||||
local = frame->local;
|
||||
this = frame->this;
|
||||
fop = local->fop;
|
||||
subvol = local->cached_subvol;
|
||||
fd = local->fd;
|
||||
@ -379,6 +381,11 @@ dht_check_and_open_fd_on_subvol_complete (int ret, call_frame_t *frame,
|
||||
break;
|
||||
|
||||
default:
|
||||
gf_msg (this->name, GF_LOG_ERROR, 0,
|
||||
DHT_MSG_UNKNOWN_FOP,
|
||||
"Unknown FOP on fd (%p) on file %s @ %s",
|
||||
fd, uuid_utoa (fd->inode->gfid),
|
||||
subvol->name);
|
||||
break;
|
||||
|
||||
}
|
||||
@ -439,6 +446,11 @@ handle_err:
|
||||
break;
|
||||
|
||||
default:
|
||||
gf_msg (this->name, GF_LOG_ERROR, 0,
|
||||
DHT_MSG_UNKNOWN_FOP,
|
||||
"Unknown FOP on fd (%p) on file %s @ %s",
|
||||
fd, uuid_utoa (fd->inode->gfid),
|
||||
subvol->name);
|
||||
break;
|
||||
}
|
||||
|
||||
@ -502,6 +514,20 @@ dht_check_and_open_fd_on_subvol_task (void *data)
|
||||
" (%p, flags=0%o) on file %s @ %s",
|
||||
fd, fd->flags, uuid_utoa (fd->inode->gfid),
|
||||
subvol->name);
|
||||
/* This can happen if the cached subvol was updated in the
|
||||
* inode_ctx and the fd was opened on the new cached suvol
|
||||
* after this fop was wound on the old cached subvol.
|
||||
* As we do not close the fd on the old subvol (a leak)
|
||||
* don't treat ENOENT as an error and allow the phase1/phase2
|
||||
* checks to handle it.
|
||||
*/
|
||||
|
||||
if ((-ret != ENOENT) && (-ret != ESTALE)) {
|
||||
local->op_errno = -ret;
|
||||
ret = -1;
|
||||
} else {
|
||||
ret = 0;
|
||||
}
|
||||
|
||||
local->op_errno = -ret;
|
||||
ret = -1;
|
||||
|
@ -40,7 +40,7 @@
|
||||
*/
|
||||
|
||||
#define GLFS_DHT_BASE GLFS_MSGID_COMP_DHT
|
||||
#define GLFS_DHT_NUM_MESSAGES 124
|
||||
#define GLFS_DHT_NUM_MESSAGES 125
|
||||
#define GLFS_MSGID_END (GLFS_DHT_BASE + GLFS_DHT_NUM_MESSAGES + 1)
|
||||
|
||||
/* Messages with message IDs */
|
||||
@ -1127,5 +1127,12 @@
|
||||
*/
|
||||
#define DHT_MSG_LOC_FAILED (GLFS_DHT_BASE + 124)
|
||||
|
||||
/*
|
||||
* @messageid 109125
|
||||
* @diagnosis
|
||||
* @recommendedaction None
|
||||
*/
|
||||
#define DHT_MSG_UNKNOWN_FOP (GLFS_DHT_BASE + 125)
|
||||
|
||||
#define glfs_msg_end_x GLFS_MSGID_END, "Invalid: End of messages"
|
||||
#endif /* _DHT_MESSAGES_H_ */
|
||||
|
Loading…
x
Reference in New Issue
Block a user