cluster/afr: Don't let NFS cache stat after writes
Problem: Afr does post-ops after write but the stat buffer it unwinds is at the time of write, so if nfs client caches this, it will see different ctime when it does stat on it after post-op is done. From NFS client's perspective it thinks the file is changed. Tar which depends on this to be correct keeps giving 'file changed as we read it' warning. If Afr instead has to choose to unwind after post-op, eager-lock, delayed-post-op will have to be disabled which will lead to bad performance for all write usecases. Fix: Don't let client cache stat after write. Change-Id: Ic6062acc6e5cdd97a9c83c56bd529ec83cee8a23 BUG: 1302948 Signed-off-by: Pranith Kumar K <pkarampu@redhat.com> Signed-off-by: Anuradha Talur <atalur@redhat.com> Reviewed-on: http://review.gluster.org/13785 Smoke: Gluster Build System <jenkins@build.gluster.com> NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org> CentOS-regression: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Niels de Vos <ndevos@redhat.com>
This commit is contained in:
parent
e3b2ed938a
commit
4c4624c9ba
@ -4425,3 +4425,27 @@ fop_enum_to_string (glusterfs_fop_t fop)
|
||||
|
||||
return "UNKNOWNFOP";
|
||||
}
|
||||
|
||||
gf_boolean_t
|
||||
gf_is_zero_filled_stat (struct iatt *buf)
|
||||
{
|
||||
if (!buf)
|
||||
return 1;
|
||||
|
||||
/* Do not use st_dev because it is transformed to store the xlator id
|
||||
* in place of the device number. Do not use st_ino because by this time
|
||||
* we've already mapped the root ino to 1 so it is not guaranteed to be
|
||||
* 0.
|
||||
*/
|
||||
if ((buf->ia_nlink == 0) && (buf->ia_ctime == 0))
|
||||
return 1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void
|
||||
gf_zero_fill_stat (struct iatt *buf)
|
||||
{
|
||||
buf->ia_nlink = 0;
|
||||
buf->ia_ctime = 0;
|
||||
}
|
||||
|
@ -820,4 +820,10 @@ gf_nwrite (int fd, const void *buf, size_t count);
|
||||
void _mask_cancellation (void);
|
||||
void _unmask_cancellation (void);
|
||||
|
||||
gf_boolean_t
|
||||
gf_is_zero_filled_stat (struct iatt *buf);
|
||||
|
||||
void
|
||||
gf_zero_fill_stat (struct iatt *buf);
|
||||
|
||||
#endif /* _COMMON_UTILS_H */
|
||||
|
37
tests/basic/afr/tarissue.t
Normal file
37
tests/basic/afr/tarissue.t
Normal file
@ -0,0 +1,37 @@
|
||||
#!/bin/bash
|
||||
|
||||
. $(dirname $0)/../../include.rc
|
||||
. $(dirname $0)/../../volume.rc
|
||||
. $(dirname $0)/../../nfs.rc
|
||||
|
||||
TESTS_EXPECTED_IN_LOOP=10
|
||||
cleanup;
|
||||
|
||||
#Basic checks
|
||||
TEST glusterd
|
||||
TEST pidof glusterd
|
||||
|
||||
#Create a distributed-replicate volume
|
||||
TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1..6};
|
||||
TEST $CLI volume set $V0 cluster.consistent-metadata on
|
||||
TEST $CLI volume set $V0 cluster.post-op-delay-secs 0
|
||||
TEST $CLI volume set $V0 nfs.rdirplus off
|
||||
TEST $CLI volume set $V0 nfs.disable false
|
||||
TEST $CLI volume start $V0
|
||||
EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available;
|
||||
|
||||
# Mount NFS
|
||||
mount_nfs $H0:/$V0 $N0 vers=3
|
||||
|
||||
#Create files
|
||||
TEST mkdir -p $N0/nfs/dir1/dir2
|
||||
for i in {1..10}; do
|
||||
TEST_IN_LOOP dd if=/dev/urandom of=$N0/nfs/dir1/dir2/file$i bs=1024k count=1
|
||||
done
|
||||
TEST tar cvf /tmp/dir1.tar.gz $N0/nfs/dir1
|
||||
|
||||
TEST rm -f /tmp/dir1.tar.gz
|
||||
|
||||
EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0
|
||||
|
||||
cleanup;
|
@ -234,7 +234,9 @@ __afr_dir_write_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
|
||||
afr_local_t *local = NULL;
|
||||
int child_index = (long) cookie;
|
||||
int call_count = -1;
|
||||
afr_private_t *priv = NULL;
|
||||
|
||||
priv = this->private;
|
||||
local = frame->local;
|
||||
|
||||
LOCK (&frame->lock);
|
||||
@ -249,8 +251,13 @@ __afr_dir_write_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
|
||||
if (call_count == 0) {
|
||||
__afr_dir_write_finalize (frame, this);
|
||||
|
||||
if (afr_txn_nothing_failed (frame, this))
|
||||
local->transaction.unwind (frame, this);
|
||||
if (afr_txn_nothing_failed (frame, this)) {
|
||||
/*if it did pre-op, it will do post-op changing ctime*/
|
||||
if (priv->consistent_metadata &&
|
||||
afr_needs_changelog_update (local))
|
||||
afr_zero_fill_stat (local);
|
||||
local->transaction.unwind (frame, this);
|
||||
}
|
||||
|
||||
afr_mark_entry_pending_changelog (frame, this);
|
||||
|
||||
|
@ -182,7 +182,9 @@ __afr_inode_write_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
|
||||
afr_local_t *local = NULL;
|
||||
int child_index = (long) cookie;
|
||||
int call_count = -1;
|
||||
afr_private_t *priv = NULL;
|
||||
|
||||
priv = this->private;
|
||||
local = frame->local;
|
||||
|
||||
LOCK (&frame->lock);
|
||||
@ -198,8 +200,13 @@ __afr_inode_write_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
|
||||
if (call_count == 0) {
|
||||
__afr_inode_write_finalize (frame, this);
|
||||
|
||||
if (afr_txn_nothing_failed (frame, this))
|
||||
local->transaction.unwind (frame, this);
|
||||
if (afr_txn_nothing_failed (frame, this)) {
|
||||
/*if it did pre-op, it will do post-op changing ctime*/
|
||||
if (priv->consistent_metadata &&
|
||||
afr_needs_changelog_update (local))
|
||||
afr_zero_fill_stat (local);
|
||||
local->transaction.unwind (frame, this);
|
||||
}
|
||||
|
||||
local->transaction.resume (frame, this);
|
||||
}
|
||||
@ -230,8 +237,13 @@ void
|
||||
afr_writev_unwind (call_frame_t *frame, xlator_t *this)
|
||||
{
|
||||
afr_local_t * local = NULL;
|
||||
afr_private_t *priv = this->private;
|
||||
|
||||
local = frame->local;
|
||||
|
||||
if (priv->consistent_metadata)
|
||||
afr_zero_fill_stat (local);
|
||||
|
||||
AFR_STACK_UNWIND (writev, frame,
|
||||
local->op_ret, local->op_errno,
|
||||
&local->cont.inode_wfop.prebuf,
|
||||
|
@ -36,6 +36,37 @@ afr_changelog_do (call_frame_t *frame, xlator_t *this, dict_t *xattr,
|
||||
afr_changelog_resume_t changelog_resume,
|
||||
afr_xattrop_type_t op);
|
||||
|
||||
void
|
||||
afr_zero_fill_stat (afr_local_t *local)
|
||||
{
|
||||
if (!local)
|
||||
return;
|
||||
if (local->transaction.type == AFR_DATA_TRANSACTION ||
|
||||
local->transaction.type == AFR_METADATA_TRANSACTION) {
|
||||
gf_zero_fill_stat (&local->cont.inode_wfop.prebuf);
|
||||
gf_zero_fill_stat (&local->cont.inode_wfop.postbuf);
|
||||
} else if (local->transaction.type == AFR_ENTRY_TRANSACTION ||
|
||||
local->transaction.type == AFR_ENTRY_RENAME_TRANSACTION) {
|
||||
gf_zero_fill_stat (&local->cont.dir_fop.buf);
|
||||
gf_zero_fill_stat (&local->cont.dir_fop.preparent);
|
||||
gf_zero_fill_stat (&local->cont.dir_fop.postparent);
|
||||
if (local->transaction.type == AFR_ENTRY_TRANSACTION)
|
||||
return;
|
||||
gf_zero_fill_stat (&local->cont.dir_fop.prenewparent);
|
||||
gf_zero_fill_stat (&local->cont.dir_fop.postnewparent);
|
||||
}
|
||||
}
|
||||
|
||||
gf_boolean_t
|
||||
afr_needs_changelog_update (afr_local_t *local)
|
||||
{
|
||||
if (local->transaction.type == AFR_DATA_TRANSACTION)
|
||||
return _gf_true;
|
||||
if (!local->optimistic_change_log)
|
||||
return _gf_true;
|
||||
return _gf_false;
|
||||
}
|
||||
|
||||
static int32_t
|
||||
afr_quorum_errno (afr_private_t *priv)
|
||||
{
|
||||
@ -85,9 +116,21 @@ int
|
||||
__afr_txn_write_done (call_frame_t *frame, xlator_t *this)
|
||||
{
|
||||
afr_local_t *local = NULL;
|
||||
afr_private_t *priv = NULL;
|
||||
gf_boolean_t unwind = _gf_false;
|
||||
|
||||
priv = this->private;
|
||||
local = frame->local;
|
||||
|
||||
if (priv->consistent_metadata) {
|
||||
LOCK (&frame->lock);
|
||||
{
|
||||
unwind = (local->transaction.main_frame != NULL);
|
||||
}
|
||||
UNLOCK (&frame->lock);
|
||||
if (unwind)/*It definitely did post-op*/
|
||||
afr_zero_fill_stat (local);
|
||||
}
|
||||
local->transaction.unwind (frame, this);
|
||||
|
||||
AFR_STACK_DESTROY (frame);
|
||||
@ -1232,8 +1275,7 @@ afr_changelog_pre_op (call_frame_t *frame, xlator_t *this)
|
||||
goto err;
|
||||
}
|
||||
|
||||
if ((local->transaction.type == AFR_DATA_TRANSACTION ||
|
||||
!local->optimistic_change_log)) {
|
||||
if (afr_needs_changelog_update (local)) {
|
||||
|
||||
local->dirty[idx] = hton32(1);
|
||||
|
||||
|
@ -52,5 +52,7 @@ int __afr_txn_write_fop (call_frame_t *frame, xlator_t *this);
|
||||
int __afr_txn_write_done (call_frame_t *frame, xlator_t *this);
|
||||
call_frame_t *afr_transaction_detach_fop_frame (call_frame_t *frame);
|
||||
gf_boolean_t afr_has_quorum (unsigned char *subvols, xlator_t *this);
|
||||
gf_boolean_t afr_needs_changelog_update (afr_local_t *local);
|
||||
void afr_zero_fill_stat (afr_local_t *local);
|
||||
|
||||
#endif /* __TRANSACTION_H__ */
|
||||
|
@ -106,25 +106,6 @@ nfs_mntpath_to_xlator (xlator_list_t *cl, char *path)
|
||||
}
|
||||
|
||||
|
||||
/* Returns 1 if the stat seems to be filled with zeroes. */
|
||||
int
|
||||
nfs_zero_filled_stat (struct iatt *buf)
|
||||
{
|
||||
if (!buf)
|
||||
return 1;
|
||||
|
||||
/* Do not use st_dev because it is transformed to store the xlator id
|
||||
* in place of the device number. Do not use st_ino because by this time
|
||||
* we've already mapped the root ino to 1 so it is not guaranteed to be
|
||||
* 0.
|
||||
*/
|
||||
if ((buf->ia_nlink == 0) && (buf->ia_ctime == 0))
|
||||
return 1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
nfs_loc_wipe (loc_t *loc)
|
||||
{
|
||||
|
@ -37,9 +37,6 @@ nfs_path_to_xlator (xlator_list_t *cl, char *path);
|
||||
extern xlator_t *
|
||||
nfs_mntpath_to_xlator (xlator_list_t *cl, char *path);
|
||||
|
||||
extern int
|
||||
nfs_zero_filled_stat (struct iatt *buf);
|
||||
|
||||
extern void
|
||||
nfs_loc_wipe (loc_t *loc);
|
||||
|
||||
|
@ -373,7 +373,7 @@ nfs3_stat_to_post_op_attr (struct iatt *buf)
|
||||
* returning these zeroed out attrs.
|
||||
*/
|
||||
attr.attributes_follow = FALSE;
|
||||
if (nfs_zero_filled_stat (buf))
|
||||
if (gf_is_zero_filled_stat (buf))
|
||||
goto out;
|
||||
|
||||
nfs3_stat_to_fattr3 (buf, &(attr.post_op_attr_u.attributes));
|
||||
@ -394,7 +394,7 @@ nfs3_stat_to_pre_op_attr (struct iatt *pre)
|
||||
* returning these zeroed out attrs.
|
||||
*/
|
||||
poa.attributes_follow = FALSE;
|
||||
if (nfs_zero_filled_stat (pre))
|
||||
if (gf_is_zero_filled_stat (pre))
|
||||
goto out;
|
||||
|
||||
poa.attributes_follow = TRUE;
|
||||
|
Loading…
x
Reference in New Issue
Block a user