cluster/dht : Ftruncate on migrating file fails with EINVAL

What:
If dht_open is called on a migrating file after the inode_ctx is set,
subsequent FOPs on that fd do not open the fd on the dst subvol.
This is seen when the open-ftruncate-close sequence is repeatedly
called on a migrating file.
A second call to the sequence described above causes dht_truncate_cbk
to call dht_truncate2 as the dht_inode_ctx was already set by the first
call. As dht_rebalance_in_progress_check is not called, the fd is not
opened on the dst subvol.
On a distributed-replicate volume, this causes AFR to
open the fd using afr_fix_open, but with the wrong flags, causing
posix_ftruncate to fail with EINVAL.
The fix: We require fd specific information to make a decision while
handling migrating files.
Set the fd_ctx to indicate the fd has been opened on the dst subvol
and check if it has been set while processing Phase1/Phase2 checks
in the FOP callback functions.

Change-Id: I43cdcd8017b4a11e18afdd210469de7cd9a5ef14
BUG: 1284823
Signed-off-by: N Balachandran <nbalacha@redhat.com>
Reviewed-on: http://review.gluster.org/12985
Reviewed-by: Raghavendra G <rgowdapp@redhat.com>
Tested-by: Gluster Build System <jenkins@build.gluster.com>
Reviewed-by: Dan Lambright <dlambrig@redhat.com>
Tested-by: Dan Lambright <dlambrig@redhat.com>
This commit is contained in:
N Balachandran 2015-12-16 21:09:22 +05:30 committed by Dan Lambright
parent e62c0fe19b
commit 430ad40529
9 changed files with 317 additions and 80 deletions

View File

@ -8050,3 +8050,9 @@ dht_is_tier_xlator (xlator_t *this)
return _gf_true;
return _gf_false;
}
int32_t
dht_release (xlator_t *this, fd_t *fd)
{
return dht_fd_ctx_destroy (this, fd);
}

View File

@ -547,6 +547,13 @@ typedef struct dht_migrate_info {
} dht_migrate_info_t;
typedef struct dht_fd_ctx {
uint64_t opened_on_dst;
GF_REF_DECL;
} dht_fd_ctx_t;
#define ENTRY_MISSING(op_ret, op_errno) (op_ret == -1 && op_errno == ENOENT)
#define is_revalidate(loc) (dht_inode_ctx_layout_get (loc->inode, this, NULL) == 0)
@ -1131,11 +1138,20 @@ int
dht_build_parent_loc (xlator_t *this, loc_t *parent, loc_t *child,
int32_t *op_errno);
int32_t dht_set_local_rebalance (xlator_t *this, dht_local_t *local,
struct iatt *stbuf,
struct iatt *prebuf,
struct iatt *postbuf, dict_t *xdata);
int32_t
dht_set_local_rebalance (xlator_t *this, dht_local_t *local,
struct iatt *stbuf,
struct iatt *prebuf,
struct iatt *postbuf, dict_t *xdata);
void
dht_build_root_loc (inode_t *inode, loc_t *loc);
gf_boolean_t
dht_fd_open_on_dst (xlator_t *this, fd_t *fd, xlator_t *dst);
int32_t
dht_fd_ctx_destroy (xlator_t *this, fd_t *fd);
int32_t
dht_release (xlator_t *this, fd_t *fd);
#endif/* _DHT_H */

View File

@ -14,6 +14,166 @@
#include "dht-common.h"
#include "dht-helper.h"
void
dht_free_fd_ctx (void *data)
{
dht_fd_ctx_t *fd_ctx = NULL;
fd_ctx = (dht_fd_ctx_t *)data;
GF_FREE (fd_ctx);
return;
}
int32_t
dht_fd_ctx_destroy (xlator_t *this, fd_t *fd)
{
dht_fd_ctx_t *fd_ctx = NULL;
uint64_t value = 0;
int32_t ret = -1;
GF_VALIDATE_OR_GOTO ("dht", this, out);
GF_VALIDATE_OR_GOTO (this->name, fd, out);
ret = fd_ctx_del (fd, this, &value);
if (ret) {
goto out;
}
fd_ctx = (dht_fd_ctx_t *)value;
if (fd_ctx) {
GF_REF_PUT (fd_ctx);
}
out:
return ret;
}
static int
__dht_fd_ctx_set (xlator_t *this, fd_t *fd, xlator_t *dst)
{
dht_fd_ctx_t *fd_ctx = NULL;
uint64_t value = 0;
int ret = -1;
GF_VALIDATE_OR_GOTO ("dht", this, out);
GF_VALIDATE_OR_GOTO (this->name, fd, out);
fd_ctx = GF_CALLOC (1, sizeof (*fd_ctx), gf_dht_mt_fd_ctx_t);
if (!fd_ctx) {
goto out;
}
fd_ctx->opened_on_dst = (uint64_t) dst;
GF_REF_INIT (fd_ctx, dht_free_fd_ctx);
value = (uint64_t) fd_ctx;
ret = __fd_ctx_set (fd, this, value);
if (ret < 0) {
gf_msg (this->name, GF_LOG_WARNING, 0,
DHT_MSG_FD_CTX_SET_FAILED,
"Failed to set fd ctx in fd=0x%p", fd);
GF_REF_PUT (fd_ctx);
}
out:
return ret;
}
int
dht_fd_ctx_set (xlator_t *this, fd_t *fd, xlator_t *dst)
{
dht_fd_ctx_t *fd_ctx = NULL;
uint64_t value = 0;
int ret = -1;
GF_VALIDATE_OR_GOTO ("dht", this, out);
GF_VALIDATE_OR_GOTO (this->name, fd, out);
LOCK (&fd->lock);
{
ret = __fd_ctx_get (fd, this, &value);
if (ret && value) {
fd_ctx = (dht_fd_ctx_t *) value;
if (fd_ctx->opened_on_dst == (uint64_t) dst) {
/* This could happen due to racing
* check_progress tasks*/
goto unlock;
} else {
/* This would be a big problem*/
gf_msg (this->name, GF_LOG_WARNING, 0,
DHT_MSG_INVALID_VALUE,
"Different dst found in the fd ctx");
/* Overwrite and hope for the best*/
fd_ctx->opened_on_dst = (uint64_t)dst;
goto unlock;
}
}
ret = __dht_fd_ctx_set (this, fd, dst);
}
unlock:
UNLOCK (&fd->lock);
out:
return ret;
}
static
dht_fd_ctx_t *
dht_fd_ctx_get (xlator_t *this, fd_t *fd)
{
dht_fd_ctx_t *fd_ctx = NULL;
int ret = -1;
uint64_t tmp_val = 0;
GF_VALIDATE_OR_GOTO ("dht", this, out);
GF_VALIDATE_OR_GOTO (this->name, fd, out);
LOCK (&fd->lock);
{
ret = __fd_ctx_get (fd, this, &tmp_val);
if ((ret < 0) || (tmp_val == 0)) {
UNLOCK (&fd->lock);
goto out;
}
fd_ctx = (dht_fd_ctx_t *)tmp_val;
GF_REF_GET (fd_ctx);
}
UNLOCK (&fd->lock);
out:
return fd_ctx;
}
gf_boolean_t
dht_fd_open_on_dst (xlator_t *this, fd_t *fd, xlator_t *dst)
{
dht_fd_ctx_t *fd_ctx = NULL;
gf_boolean_t opened = _gf_false;
fd_ctx = dht_fd_ctx_get (this, fd);
if (fd_ctx) {
if (fd_ctx->opened_on_dst == (uint64_t) dst) {
opened = _gf_true;
}
GF_REF_PUT (fd_ctx);
}
return opened;
}
void
dht_free_mig_info (void *data)
{
@ -1071,25 +1231,34 @@ dht_migration_complete_check_task (void *data)
inode_path (inode, NULL, &path);
if (path)
tmp_loc.path = path;
list_for_each_entry (iter_fd, &inode->fd_list, inode_list) {
if (fd_is_anonymous (iter_fd))
continue;
if (dht_fd_open_on_dst (this, iter_fd, dst_node))
continue;
/* flags for open are stripped down to allow following the
* new location of the file, otherwise we can get EEXIST or
* truncate the file again as rebalance is moving the data */
ret = syncop_open (dst_node, &tmp_loc,
(iter_fd->flags &
~(O_CREAT | O_EXCL | O_TRUNC)), iter_fd,
NULL, NULL);
~(O_CREAT | O_EXCL | O_TRUNC)),
iter_fd, NULL, NULL);
if (ret < 0) {
gf_msg (this->name, GF_LOG_ERROR, -ret,
DHT_MSG_OPEN_FD_ON_DST_FAILED, "failed to open "
"the fd (%p, flags=0%o) on file %s @ %s",
iter_fd, iter_fd->flags, path, dst_node->name);
DHT_MSG_OPEN_FD_ON_DST_FAILED, "failed"
" to open the fd"
" (%p, flags=0%o) on file %s @ %s",
iter_fd, iter_fd->flags, path,
dst_node->name);
open_failed = 1;
local->op_errno = -ret;
ret = -1;
} else {
dht_fd_ctx_set (this, iter_fd, dst_node);
}
}
@ -1159,22 +1328,22 @@ out:
static int
dht_rebalance_inprogress_task (void *data)
{
int ret = -1;
xlator_t *src_node = NULL;
xlator_t *dst_node = NULL;
dht_local_t *local = NULL;
dict_t *dict = NULL;
call_frame_t *frame = NULL;
xlator_t *this = NULL;
char *path = NULL;
struct iatt stbuf = {0,};
loc_t tmp_loc = {0,};
dht_conf_t *conf = NULL;
inode_t *inode = NULL;
fd_t *iter_fd = NULL;
int open_failed = 0;
uint64_t tmp_miginfo = 0;
dht_migrate_info_t *miginfo = NULL;
int ret = -1;
xlator_t *src_node = NULL;
xlator_t *dst_node = NULL;
dht_local_t *local = NULL;
dict_t *dict = NULL;
call_frame_t *frame = NULL;
xlator_t *this = NULL;
char *path = NULL;
struct iatt stbuf = {0,};
loc_t tmp_loc = {0,};
dht_conf_t *conf = NULL;
inode_t *inode = NULL;
fd_t *iter_fd = NULL;
int open_failed = 0;
uint64_t tmp_miginfo = 0;
dht_migrate_info_t *miginfo = NULL;
this = THIS;
@ -1298,22 +1467,30 @@ dht_rebalance_inprogress_task (void *data)
if (fd_is_anonymous (iter_fd))
continue;
if (dht_fd_open_on_dst (this, iter_fd, dst_node))
continue;
/* flags for open are stripped down to allow following the
* new location of the file, otherwise we can get EEXIST or
* truncate the file again as rebalance is moving the data */
ret = syncop_open (dst_node, &tmp_loc,
(iter_fd->flags &
~(O_CREAT | O_EXCL | O_TRUNC)), iter_fd,
NULL, NULL);
(iter_fd->flags &
~(O_CREAT | O_EXCL | O_TRUNC)),
iter_fd, NULL, NULL);
if (ret < 0) {
gf_msg (this->name, GF_LOG_ERROR, -ret,
DHT_MSG_OPEN_FD_ON_DST_FAILED,
"failed to send open "
"the fd (%p, flags=0%o) on file %s @ %s",
iter_fd, iter_fd->flags, path, dst_node->name);
iter_fd, iter_fd->flags, path,
dst_node->name);
ret = -1;
open_failed = 1;
} else {
/* Potential fd leak if this fails here as it will be
reopened at the next Phase1/2 check */
dht_fd_ctx_set (this, iter_fd, dst_node);
}
}
SYNCTASK_SETID (frame->root->uid, frame->root->gid);

View File

@ -142,7 +142,8 @@ int
dht_file_attr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, struct iatt *stbuf, dict_t *xdata)
{
xlator_t *subvol = 0;
xlator_t *subvol1 = 0;
xlator_t *subvol2 = 0;
dht_local_t *local = NULL;
call_frame_t *prev = NULL;
int ret = -1;
@ -172,21 +173,31 @@ dht_file_attr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
/* Check if the rebalance phase2 is true */
if ((op_ret == -1) || IS_DHT_MIGRATION_PHASE2 (stbuf)) {
local->rebalance.target_op_fn = dht_attr2;
dht_set_local_rebalance (this, local, NULL, NULL,
stbuf, xdata);
inode = (local->fd) ? local->fd->inode : local->loc.inode;
ret = dht_inode_ctx_get_mig_info (this, inode, NULL, &subvol);
if (!subvol) {
dht_inode_ctx_get_mig_info (this, inode, &subvol1, &subvol2);
if (dht_mig_info_is_invalid (local->cached_subvol,
subvol1, subvol2)){
/* Phase 2 of migration */
local->rebalance.target_op_fn = dht_attr2;
dht_set_local_rebalance (this, local, NULL, NULL,
stbuf, xdata);
ret = dht_rebalance_complete_check (this, frame);
if (!ret)
return 0;
} else {
/* value is already set in fd_ctx, that means no need
to check for whether its complete or not. */
dht_attr2 (this, subvol, frame, 0);
return 0;
/* it is a non-fd op or it is an fd based Fop and
opened on the dst.*/
if (local->fd &&
!dht_fd_open_on_dst (this, local->fd, subvol2)) {
ret = dht_rebalance_complete_check (this, frame);
if (!ret)
return 0;
} else {
dht_attr2 (this, subvol2, frame, 0);
return 0;
}
}
}
@ -431,17 +442,19 @@ dht_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->op_errno = op_errno;
if ((op_ret == -1) || IS_DHT_MIGRATION_PHASE2 (stbuf)) {
local->op_ret = op_ret;
local->rebalance.target_op_fn = dht_readv2;
dht_set_local_rebalance (this, local, NULL, NULL,
stbuf, xdata);
/* File would be migrated to other node */
ret = dht_inode_ctx_get_mig_info (this, local->fd->inode,
&src_subvol,
&dst_subvol);
if (dht_mig_info_is_invalid (local->cached_subvol,
src_subvol, dst_subvol)) {
local->op_ret = op_ret;
local->rebalance.target_op_fn = dht_readv2;
dht_set_local_rebalance (this, local, NULL, NULL,
stbuf, xdata);
src_subvol, dst_subvol)
|| !dht_fd_open_on_dst(this, local->fd, dst_subvol)) {
ret = dht_rebalance_complete_check (this, frame);
if (!ret)
@ -691,7 +704,7 @@ dht_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
/* If context is set, then send flush() it to the destination */
dht_inode_ctx_get_mig_info (this, local->fd->inode, NULL, &subvol);
if (subvol) {
if (subvol && dht_fd_open_on_dst (this, local->fd, subvol)) {
dht_flush2 (this, subvol, frame, 0);
return 0;
}
@ -805,32 +818,35 @@ dht_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
local->op_ret = op_ret;
inode = local->fd->inode;
dht_inode_ctx_get_mig_info (this, inode, &src_subvol, &dst_subvol);
local->rebalance.target_op_fn = dht_fsync2;
dht_set_local_rebalance (this, local, NULL, prebuf,
postbuf, xdata);
if (dht_mig_info_is_invalid (local->cached_subvol,
src_subvol, dst_subvol)) {
/* Check if the rebalance phase1 is true */
if (IS_DHT_MIGRATION_PHASE1 (postbuf)) {
local->rebalance.target_op_fn = dht_fsync2;
dht_set_local_rebalance (this, local, NULL, prebuf,
postbuf, xdata);
dht_iatt_merge (this, &local->stbuf, postbuf, NULL);
dht_iatt_merge (this, &local->prebuf, prebuf, NULL);
/* Check if the rebalance phase1 is true */
if (IS_DHT_MIGRATION_PHASE1 (postbuf)) {
dht_iatt_merge (this, &local->stbuf, postbuf, NULL);
dht_iatt_merge (this, &local->prebuf, prebuf, NULL);
dht_inode_ctx_get_mig_info (this, inode, &src_subvol, &dst_subvol);
if (dht_mig_info_is_invalid (local->cached_subvol, src_subvol,
dst_subvol) ||
!dht_fd_open_on_dst (this, local->fd, dst_subvol)) {
ret = dht_rebalance_in_progress_check (this, frame);
if (!ret)
return 0;
} else {
dht_fsync2 (this, dst_subvol, frame, 0);
return 0;
}
}
/* Check if the rebalance phase2 is true */
if (IS_DHT_MIGRATION_PHASE2 (postbuf)) {
ret = dht_rebalance_complete_check (this, frame);
}
if (IS_DHT_MIGRATION_PHASE2 (postbuf)) {
ret = dht_rebalance_complete_check (this, frame);
if (!ret)
return 0;
} else {
dht_fsync2 (this, dst_subvol, frame, 0);
return 0;
}
out:

View File

@ -89,8 +89,10 @@ dht_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
&subvol1, &subvol2);
if (!dht_mig_info_is_invalid (local->cached_subvol,
subvol1, subvol2)) {
dht_writev2 (this, subvol2, frame, 0);
return 0;
if (dht_fd_open_on_dst (this, local->fd, subvol2)) {
dht_writev2 (this, subvol2, frame, 0);
return 0;
}
}
ret = dht_rebalance_in_progress_check (this, frame);
if (!ret)
@ -207,12 +209,12 @@ dht_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, struct iatt *prebuf,
struct iatt *postbuf, dict_t *xdata)
{
dht_local_t *local = NULL;
call_frame_t *prev = NULL;
int ret = -1;
xlator_t *src_subvol = NULL;
xlator_t *dst_subvol = NULL;
inode_t *inode = NULL;
dht_local_t *local = NULL;
call_frame_t *prev = NULL;
int ret = -1;
xlator_t *src_subvol = NULL;
xlator_t *dst_subvol = NULL;
inode_t *inode = NULL;
GF_VALIDATE_OR_GOTO ("dht", frame, err);
GF_VALIDATE_OR_GOTO ("dht", this, out);
@ -262,14 +264,18 @@ dht_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
if (IS_DHT_MIGRATION_PHASE1 (postbuf)) {
dht_iatt_merge (this, &local->stbuf, postbuf, NULL);
dht_iatt_merge (this, &local->prebuf, prebuf, NULL);
inode = (local->fd) ? local->fd->inode : local->loc.inode;
dht_inode_ctx_get_mig_info (this, inode, &src_subvol,
&dst_subvol);
if (!dht_mig_info_is_invalid (local->cached_subvol,
src_subvol, dst_subvol)) {
dht_truncate2 (this, dst_subvol, frame, 0);
return 0;
if ((!local->fd) || ((local->fd) &&
dht_fd_open_on_dst (this, local->fd, dst_subvol))) {
dht_truncate2 (this, dst_subvol, frame, 0);
return 0;
}
}
ret = dht_rebalance_in_progress_check (this, frame);
if (!ret)
@ -475,8 +481,10 @@ dht_fallocate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
&dst_subvol);
if (!dht_mig_info_is_invalid (local->cached_subvol,
src_subvol, dst_subvol)) {
dht_fallocate2 (this, dst_subvol, frame, 0);
return 0;
if (dht_fd_open_on_dst (this, local->fd, dst_subvol)) {
dht_fallocate2 (this, dst_subvol, frame, 0);
return 0;
}
}
ret = dht_rebalance_in_progress_check (this, frame);
if (!ret)
@ -638,8 +646,10 @@ dht_discard_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
&dst_subvol);
if (!dht_mig_info_is_invalid(local->cached_subvol,
src_subvol, dst_subvol)) {
dht_discard2 (this, dst_subvol, frame, 0);
return 0;
if (dht_fd_open_on_dst (this, local->fd, dst_subvol)) {
dht_discard2 (this, dst_subvol, frame, 0);
return 0;
}
}
ret = dht_rebalance_in_progress_check (this, frame);
if (!ret)
@ -796,8 +806,10 @@ dht_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
&subvol1, &subvol2);
if (!dht_mig_info_is_invalid (local->cached_subvol,
subvol1, subvol2)) {
dht_zerofill2 (this, subvol2, frame, 0);
return 0;
if (dht_fd_open_on_dst (this, local->fd, subvol2)) {
dht_zerofill2 (this, subvol2, frame, 0);
return 0;
}
}
ret = dht_rebalance_in_progress_check (this, frame);

View File

@ -36,6 +36,7 @@ enum gf_dht_mem_types_ {
gf_dht_mt_miginfo_t,
gf_tier_mt_bricklist_t,
gf_tier_mt_ipc_ctr_params_t,
gf_dht_mt_fd_ctx_t,
gf_dht_mt_end
};
#endif

View File

@ -40,7 +40,7 @@
*/
#define GLFS_DHT_BASE GLFS_MSGID_COMP_DHT
#define GLFS_DHT_NUM_MESSAGES 111
#define GLFS_DHT_NUM_MESSAGES 112
#define GLFS_MSGID_END (GLFS_DHT_BASE + GLFS_DHT_NUM_MESSAGES + 1)
/* Messages with message IDs */
@ -1034,5 +1034,13 @@
#define DHT_MSG_DEFRAG_PROCESS_DIR_FAILED (GLFS_DHT_BASE + 111)
/*
* @messageid 109112
* @diagnosis
* @recommendedaction None
*/
#define DHT_MSG_FD_CTX_SET_FAILED (GLFS_DHT_BASE + 112)
#define glfs_msg_end_x GLFS_MSGID_END, "Invalid: End of messages"
#endif /* _DHT_MESSAGES_H_ */

View File

@ -77,7 +77,7 @@ struct xlator_dumpops dumpops = {
struct xlator_cbks cbks = {
// .release = dht_release,
.release = dht_release,
// .releasedir = dht_releasedir,
.forget = dht_forget
};

View File

@ -2071,6 +2071,7 @@ struct xlator_fops fops = {
struct xlator_cbks cbks = {
.release = dht_release,
.forget = dht_forget
};