cluster/afr: Preserve mtime in self-heal

Problem:
Data self-heal may choose sink iatt to set mtimes.
This happens because after syncing of data is done
self-heal does one more xattrops/fstat to determine
sources sinks to set the inode-ctx. Since this is done
after data syncing and erase of xattrs, old source and
old sink are now sources, but the mtimes of them differ.
Old code just takes the first source from the list and
update mtimes, which could be sink before the self-heal
started.

Fix:
Set mtime from 'sources before syncing'.

Change-Id: Id769e1b99aa4f041eaee775f64cbf2c57b799723
BUG: 918437
Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
Reviewed-on: http://review.gluster.org/4658
Reviewed-by: Jeff Darcy <jdarcy@redhat.com>
Tested-by: Gluster Build System <jenkins@build.gluster.com>
This commit is contained in:
Pranith Kumar K 2013-03-08 15:20:22 +05:30 committed by Vijay Bellur
parent e125e2ae61
commit 35660032d6
2 changed files with 77 additions and 14 deletions

View File

@ -0,0 +1,52 @@
#!/bin/bash
. $(dirname $0)/../include.rc
. $(dirname $0)/../volume.rc
function get_mtime {
local f=$1
stat $f | grep Modify | awk '{print $2 $3}' | cut -f1 -d'.'
}
cleanup;
## Tests if mtime is correct after self-heal.
TEST glusterd
TEST pidof glusterd
TEST mkdir -p $B0/gfs0/brick0{1,2}
TEST $CLI volume create $V0 replica 2 transport tcp $H0:$B0/gfs0/brick01 $H0:$B0/gfs0/brick02
TEST $CLI volume set $V0 nfs.disable on
TEST $CLI volume set $V0 performance.stat-prefetch off
TEST $CLI volume set $V0 cluster.background-self-heal-count 0
TEST $CLI volume set $V0 cluster.self-heal-daemon off
TEST $CLI volume start $V0
TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --direct-io-mode=enable
# file 'a' is healed from brick02 to brick01 where as file 'b' is healed from
# brick01 to brick02
TEST cp -p /etc/passwd $M0/a
TEST cp -p /etc/passwd $M0/b
#Store mtimes before self-heals
TEST modify_atstamp=$(get_mtime $B0/gfs0/brick02/a)
TEST modify_btstamp=$(get_mtime $B0/gfs0/brick02/b)
TEST $CLI volume stop $V0
TEST gf_rm_file_and_gfid_link $B0/gfs0/brick01 a
TEST gf_rm_file_and_gfid_link $B0/gfs0/brick02 b
TEST $CLI volume start $V0 force
EXPECT_WITHIN 20 "1" afr_child_up_status $V0 0
EXPECT_WITHIN 20 "1" afr_child_up_status $V0 1
find $M0 | xargs stat 1>/dev/null
TEST modify_atstamp1=$(get_mtime $B0/gfs0/brick01/a)
TEST modify_atstamp2=$(get_mtime $B0/gfs0/brick02/a)
EXPECT $modify_atstamp echo $modify_atstamp1
EXPECT $modify_atstamp echo $modify_atstamp2
TEST modify_btstamp1=$(get_mtime $B0/gfs0/brick01/b)
TEST modify_btstamp2=$(get_mtime $B0/gfs0/brick02/b)
EXPECT $modify_btstamp echo $modify_btstamp1
EXPECT $modify_btstamp echo $modify_btstamp2
cleanup;

View File

@ -190,29 +190,20 @@ afr_sh_data_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
}
int
afr_sh_data_setattr (call_frame_t *frame, xlator_t *this)
afr_sh_data_setattr (call_frame_t *frame, xlator_t *this, struct iatt* stbuf)
{
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
afr_self_heal_t *sh = NULL;
int i = 0;
int call_count = 0;
int source = 0;
int32_t valid = 0;
struct iatt stbuf = {0,};
local = frame->local;
sh = &local->self_heal;
priv = this->private;
source = sh->source;
valid |= (GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME);
stbuf.ia_atime = sh->buf[source].ia_atime;
stbuf.ia_atime_nsec = sh->buf[source].ia_atime_nsec;
stbuf.ia_mtime = sh->buf[source].ia_mtime;
stbuf.ia_mtime_nsec = sh->buf[source].ia_mtime_nsec;
valid = (GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME);
call_count = afr_set_elem_count_get (sh->success,
priv->child_count);
@ -232,7 +223,7 @@ afr_sh_data_setattr (call_frame_t *frame, xlator_t *this)
(void *) (long) i,
priv->children[i],
priv->children[i]->fops->setattr,
&local->loc, &stbuf, valid, NULL);
&local->loc, stbuf, valid, NULL);
if (!--call_count)
break;
@ -256,7 +247,7 @@ afr_sh_data_setattr_fstat_cbk (call_frame_t *frame, void *cookie,
GF_ASSERT (sh->source == child_index);
if (op_ret != -1) {
sh->buf[child_index] = *buf;
afr_sh_data_setattr (frame, this);
afr_sh_data_setattr (frame, this, buf);
} else {
gf_log (this->name, GF_LOG_ERROR, "%s: Failed to set "
"time-stamps after self-heal", local->loc.path);
@ -683,6 +674,9 @@ afr_sh_data_fxattrop_fstat_done (call_frame_t *frame, xlator_t *this)
afr_private_t *priv = NULL;
int nsources = 0;
int ret = 0;
int *old_sources = NULL;
int tstamp_source = 0;
int i = 0;
local = frame->local;
sh = &local->self_heal;
@ -690,6 +684,13 @@ afr_sh_data_fxattrop_fstat_done (call_frame_t *frame, xlator_t *this)
gf_log (this->name, GF_LOG_DEBUG, "Pending matrix for: %s",
lkowner_utoa (&frame->root->lk_owner));
if (sh->sync_done) {
//store sources before sync so that mtime can be set using the
//iatt buf from one of them.
old_sources = alloca (priv->child_count*sizeof (*old_sources));
memcpy (old_sources, sh->sources,
priv->child_count * sizeof (*old_sources));
}
nsources = afr_build_sources (this, sh->xattr, sh->buf, sh->pending_matrix,
sh->sources, sh->success_children,
@ -720,6 +721,7 @@ afr_sh_data_fxattrop_fstat_done (call_frame_t *frame, xlator_t *this)
}
afr_set_split_brain (this, sh->inode, DONT_KNOW, NO_SPB);
ret = afr_sh_inode_set_read_ctx (sh, this);
if (ret) {
gf_log (this->name, GF_LOG_DEBUG,
@ -730,7 +732,16 @@ afr_sh_data_fxattrop_fstat_done (call_frame_t *frame, xlator_t *this)
}
if (sh->sync_done) {
afr_sh_data_setattr (frame, this);
/* Perform setattr from one of the old_sources if possible
* Because only they have the correct mtime, the new sources
* (i.e. old sinks) have mtime from last writev in sync.
*/
tstamp_source = sh->source;
for (i = 0; i < priv->child_count; i++) {
if (old_sources[i] && sh->sources[i])
tstamp_source = i;
}
afr_sh_data_setattr (frame, this, &sh->buf[tstamp_source]);
} else {
if (nsources == 0) {
gf_log (this->name, GF_LOG_DEBUG,