cluster/afr: prevent piggyback on stale pre_op

Here are the logs of a file on which we saw EIO because of size mismatch:
[root@lizzie ~]# grep 38f18204 /var/log/glusterfs/mnt-x-.log
Reporting Unstable write for 38f18204-2840-408e-ae65-c01f4106b8c4
for offset: 0, len: 7680

Cleared unstable write flag for 38f18204-2840-408e-ae65-c01f4106b8c4:
offset 0 length 7680

Reporting Unstable write for 38f18204-2840-408e-ae65-c01f4106b8c4 for
offset: 7680, len: 71680

Reporting Unstable write for 38f18204-2840-408e-ae65-c01f4106b8c4 for
offset: 79360, len: 15716

fsync completed on 38f18204-2840-408e-ae65-c01f4106b8c4 for
offset 0 length 7680 with changelog status: -1 -1

According to these logs fsync did not happen after writev with
offset: 79360, len: 15716. Which is the reason for this problem.

In total 3 writes came. lets call them w1, w2, w3
w1 does pre_op so pre_op_done[0], pre_op_done[1] counts become 1 and 1
then is_piggyback_post_op() is called for w1 and it returns *false*

w1's fsync is fired

Now w2 and w3 come and see that pre_op_done[0], pre_op_done[1] are both 1,
so pre_op_piggyback[0] and pre_op_piggyback[1] are both incremented twice,
once by w2, one more time by w3 and become 2, 2  ------- Step-A

Now fsync of w1 is complete and it goes ahead with post op and decrements
pre_op_done[0], pre_op_done[1] to 0, 0

Now w2, w3 writevs complete and is_piggyback_post_op will return *true* for
both w2, w3.
So fsync is not fired for both w2, w3

this patch prevents Step-A from happening.

Change-Id: I8b6af1f1875b2cf5f718caa3c16ee7ff3dc96b5c
BUG: 927146
Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
Reviewed-on: http://review.gluster.org/4752
Tested-by: Gluster Build System <jenkins@build.gluster.com>
Reviewed-by: Jeff Darcy <jdarcy@redhat.com>
This commit is contained in:
Pranith Kumar K 2013-04-02 00:24:45 +05:30 committed by Anand Avati
parent e0616e9314
commit 864ac6b7b3

View File

@ -146,36 +146,6 @@ out:
return;
}
static void
__mark_pre_op_undone_on_fd (call_frame_t *frame, xlator_t *this, int child_index)
{
afr_local_t *local = NULL;
afr_fd_ctx_t *fd_ctx = NULL;
local = frame->local;
if (!local->fd)
return;
fd_ctx = afr_fd_ctx_get (local->fd, this);
if (!fd_ctx)
goto out;
LOCK (&local->fd->lock);
{
if (local->transaction.type == AFR_DATA_TRANSACTION) {
GF_ASSERT (fd_ctx->pre_op_done[child_index]);
fd_ctx->pre_op_done[child_index]--;
}
}
UNLOCK (&local->fd->lock);
out:
return;
}
static void
__mark_non_participant_children (int32_t *pending[], int child_count,
unsigned char *participants,
@ -691,9 +661,6 @@ afr_changelog_post_op_now (call_frame_t *frame, xlator_t *this)
afr_changelog_post_op_cbk (frame, (void *)(long)i,
this, 1, 0, xattr[i], NULL);
} else {
if (!piggyback)
__mark_pre_op_undone_on_fd (frame, this,
i);
STACK_WIND_COOKIE (frame,
afr_changelog_post_op_cbk,
(void *) (long) i,
@ -1392,6 +1359,8 @@ is_piggyback_post_op (call_frame_t *frame, fd_t *fd)
if necesssary
*/
piggyback = _gf_false;
GF_ASSERT (fdctx->pre_op_done[i]);
fdctx->pre_op_done[i]--;
}
}
}