ec: Fix self-heal issues

Problem: Doing an 'ls' of a directory that has been modified while one
         of the bricks was down, sometimes returns the old directory
         contents.

Cause: Directories are not marked when they are modified as files are.
       The ec xlator balances requests amongst available and healthy
       bricks. Since there is no way to detect that a directory is
       out of date in one of the bricks, it is used from time to time
       to return the directory contents.

Solution: Basically the solution consists in use versioning information
          also for directories, however some additional changes have
          been necessary.

Changes:

 * Use directory versioning:

     This required to lock full directory instead of a single entry for
     all requests that add or remove entries from it. This is needed to
     allow atomic version update. This affects the following fops:

         create, mkdir, mknod, link, symlink, rename, unlink, rmdir

     Another side effect is that opendir requires to do a previous
     lookup to get versioning information and discard out of date
     bricks for subsequent readdir(p) calls.

 * Restrict directory self-heal:

     Till now, when one discrepancy was found in lookup, a self-heal
     was automatically started. This caused the versioning information
     of a bad directory to be healed instantly, making the original
     problem to reapear again.

     To solve this, when a missing directory is detected in one or more
     bricks on lookup or opendir fops, only a partial self-heal is
     performed on it. A partial self-heal basically creates the
     directory but does not restore any additional information.

     This avoids that an 'ls' could repair the directory and cause the
     problem to happen again. With this change, output of 'ls' is
     always consistent. However, since the directory has been created
     in the brick, this allows any other operation on it (create new
     files, for example) to succeed on all bricks and not add additional
     work to the self-heal process.

     To force a self-heal of a directory, any other operation must be
     done on it. For example a getxattr.

     With these changes, the correct healing procedure that would avoid
     inconsistent directory browsing consists on a post-order traversal
     of directoriesi being healed. This way, the directory contents will
     be healed before healing the directory itslef.

 * Additional changes to fix self-heal errors

     - Don't use fop->fd to decide between fd/loc.

         open, opendir and create have an fd, but the correct data is in
         loc.

     - Fix incorrect management of bad bricks per inode/fd.

     - Fix incorrect selection of fop's target bricks when there are bad
       bricks involved.

     - Improved ec_loc_parent() to always return a parent loc as
       complete as possible.

Change-Id: Iaf3df174d7857da57d4a87b4a8740a7048b366ad
BUG: 1149726
Signed-off-by: Xavier Hernandez <xhernandez@datalab.es>
Reviewed-on: http://review.gluster.org/8916
Reviewed-by: Dan Lambright <dlambrig@redhat.com>
Tested-by: Gluster Build System <jenkins@build.gluster.com>
This commit is contained in:
Xavier Hernandez 2014-10-08 09:20:11 +02:00 committed by Vijay Bellur
parent d57ecca632
commit 56caf4349c
15 changed files with 558 additions and 347 deletions

View File

@ -45,66 +45,97 @@ function check_truncate {
}
function check_hard_link {
stat $M0/hard-link-1
stat $M0/hard-link-2
for b in $*; do
inum1=$(ls -i $b/hard-link-1 | cut -d' ' -f1)
inum2=$(ls -i $b/hard-link-2 | cut -d' ' -f1)
[ "$inum1" = "$inum2" ] || return 1
if [ "$inum1" != "$inum2" ]; then
echo "N"
return 0
fi
done
echo "Y"
return 0
}
function check_soft_link {
stat $M0/soft-link
for b in $*; do
[ "$(readlink $b/soft-link)" = "soft-link-tgt" ] || return 1
if [ "$(readlink $b/soft-link)" != "soft-link-tgt" ]; then
echo "N"
return 0
fi
done
echo "Y"
return 0
}
function check_unlink {
stat $M0/unlink
for b in $*; do
[ ! -e $b/unlink ] || return 1
if [ -e $b/unlink ]; then
echo "N"
return 0
fi
done
echo "Y"
return 0
}
function check_mkdir {
getfattr -m. -d $M0/mkdir
for b in $*; do
[ -d $b/mkdir ] || return 1
if [ ! -d $b/mkdir ]; then
echo "N"
return 0
fi
done
echo "Y"
return 0
}
function check_rmdir {
getfattr -m. -d $M0/rmdir
for b in $*; do
[ ! -e $b/rmdir ] || return 1
if [ -e $b/rmdir ]; then
echo "N"
return 0
fi
done
echo "Y"
return 0
}
function check_setxattr {
stat $M0/setxattr
for b in $*; do
v=$(my_getfattr -n user.foo $b/setxattr)
[ "$v" = "ash_nazg_durbatuluk" ] || return 1
if [ "$v" != "ash_nazg_durbatuluk" ]; then
echo "N"
return 0
fi
done
echo "Y"
return 0
}
function check_removexattr {
stat $M0/removexattr
for b in $*; do
my_getfattr -n user.bar $b/removexattr 2> /dev/null
[ $? = 0 ] && return 1
if [ $? -eq 0 ]; then
echo "N"
return 0
fi
done
echo "Y"
return 0
}
function check_perm_file {
stat $M0/perm_dir/perm_file
getfattr -m. -d $M0/perm_dir
b1=$1
shift 1
ftext=$(stat -c "%u %g %a" $b1/perm_dir/perm_file)
@ -113,7 +144,8 @@ function check_perm_file {
btext=$(stat -c "%u %g %a" $b/perm_dir/perm_file)
#echo " next u/a/a = $btext" > /dev/tty
if [ x"$btext" != x"$ftext" ]; then
return 1
echo "N"
return 0
fi
done
echo "Y"
@ -210,17 +242,6 @@ EXPECT_WITHIN $CHILD_UP_TIMEOUT "10" ec_child_up_count $V0 0
TEST check_create_write $M0
TEST check_truncate $M0
TEST stat $M0/hard-link-1
TEST stat $M0/hard-link-2
TEST stat $M0/soft-link
TEST ! stat $M0/unlink
TEST ! stat $M0/rmdir
TEST stat $M0/mkdir
TEST stat $M0/setxattr
TEST stat $M0/removexattr
TEST stat $M0/perm_dir
TEST stat $M0/perm_dir/perm_file
EXPECT_WITHIN $HEAL_TIMEOUT "Y" check_hard_link $B0/${V0}{0..9}
EXPECT_WITHIN $HEAL_TIMEOUT "Y" check_soft_link $B0/${V0}{0..9}
EXPECT_WITHIN $HEAL_TIMEOUT "Y" check_unlink $B0/${V0}{0..9}

View File

@ -7,12 +7,119 @@
cleanup
function check_mount_dir
{
for i in {1..20}; do
ls | grep "dir1"
if [ $? -ne 0 ]; then
return 1
fi
done
return 0
}
function check_size
{
stat $1
for i in "${brick[@]}"; do
res=`stat -c "%s" $i/$1`
if [ "$res" != "$2" ]; then
echo "N"
return 0
fi
done
echo "Y"
return 0
}
function check_mode
{
stat $1
for i in "${brick[@]}"; do
res=`stat -c "%A" $i/$1`
if [ "$res" != "$2" ]; then
echo "N"
return 0
fi
done
echo "Y"
return 0
}
function check_date
{
stat $1
for i in "${brick[@]}"; do
res=`stat -c "%Y" $i/$1`
if [ "$res" != "$2" ]; then
echo "N"
return 0
fi
done
echo "Y"
return 0
}
function check_xattr
{
stat $1
for i in "${brick[@]}"; do
getfattr -n $2 $i/$1 2>/dev/null
if [ $? -eq 0 ]; then
echo "N"
return 0
fi
done
echo "Y"
return 0
}
function check_dir
{
getfattr -m. -d dir1
for i in "${brick[@]}"; do
if [ ! -d $i/dir1 ]; then
echo "N"
return 0
fi
done
echo "Y"
return 0
}
function check_soft_link
{
stat test3
for i in "${brick[@]}"; do
if [ ! -h $i/test3 ]; then
echo "N"
return 0
fi
done
echo "Y"
return 0
}
function check_hard_link
{
stat test4
for i in "${brick[@]}"; do
if [ `stat -c "%h" $i/test4` -ne 3 ]; then
echo "N"
return 0
fi
done
echo "Y"
return 0
}
tmp=`mktemp -d -t ${0##*/}.XXXXXX`
if [ ! -d $tmp ]; then
exit 1
fi
TESTS_EXPECTED_IN_LOOP=250
TESTS_EXPECTED_IN_LOOP=194
TEST glusterd
TEST pidof glusterd
@ -21,6 +128,7 @@ EXPECT "Created" volinfo_field $V0 'Status'
TEST $CLI volume start $V0
EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Started" volinfo_field $V0 'Status'
TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0;
# Wait until all 6 childs have been recognized by the ec xlator
EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
TEST dd if=/dev/urandom of=$tmp/test bs=1024 count=1024
@ -46,12 +154,11 @@ for idx1 in {0..5}; do
TEST chmod 666 ${brick[$idx1]}/test
TEST truncate -s 0 ${brick[$idx1]}/test
TEST setfattr -n user.test -v "test1" ${brick[$idx1]}/test
sleep 1
EXPECT "-rw-r--r--" stat -c "%A" test
EXPECT_WITHIN $HEAL_TIMEOUT "262144" stat -c "%s" ${brick[$idx1]}/test
EXPECT_WITHIN $HEAL_TIMEOUT "-rw-r--r--" stat -c "%A" ${brick[$idx1]}/test
EXPECT_WITHIN $HEAL_TIMEOUT "946681200" stat -c "%Y" ${brick[$idx1]}/test
TEST ! getfattr -n user.test ${brick[$idx1]}/test
EXPECT_WITHIN $HEAL_TIMEOUT "Y" check_size test "262144"
EXPECT_WITHIN $HEAL_TIMEOUT "Y" check_mode test "-rw-r--r--"
EXPECT_WITHIN $HEAL_TIMEOUT "Y" check_date test "946681200"
EXPECT_WITHIN $HEAL_TIMEOUT "Y" check_xattr test "user.test"
done
for idx1 in {0..4}; do
@ -63,16 +170,11 @@ for idx1 in {0..4}; do
TEST truncate -s 2097152 ${brick[$idx2]}/test
TEST setfattr -n user.test -v "test1" ${brick[$idx1]}/test
TEST setfattr -n user.test -v "test2" ${brick[$idx2]}/test
sleep 1
EXPECT "-rw-r--r--" stat -c "%A" test
EXPECT_WITHIN $HEAL_TIMEOUT "262144" stat -c "%s" ${brick[$idx1]}/test
EXPECT_WITHIN $HEAL_TIMEOUT "262144" stat -c "%s" ${brick[$idx2]}/test
EXPECT_WITHIN $HEAL_TIMEOUT "-rw-r--r--" stat -c "%A" ${brick[$idx1]}/test
EXPECT_WITHIN $HEAL_TIMEOUT "-rw-r--r--" stat -c "%A" ${brick[$idx2]}/test
EXPECT_WITHIN $HEAL_TIMEOUT "946681200" stat -c "%Y" ${brick[$idx1]}/test
EXPECT_WITHIN $HEAL_TIMEOUT "946681200" stat -c "%Y" ${brick[$idx2]}/test
TEST ! getfattr -n user.test ${brick[$idx1]}/test
TEST ! getfattr -n user.test ${brick[$idx2]}/test
EXPECT_WITHIN $HEAL_TIMEOUT "Y" check_size test "262144"
EXPECT_WITHIN $HEAL_TIMEOUT "Y" check_mode test "-rw-r--r--"
EXPECT_WITHIN $HEAL_TIMEOUT "Y" check_date test "946681200"
EXPECT_WITHIN $HEAL_TIMEOUT "Y" check_xattr test "user.test"
fi
done
done
@ -96,26 +198,25 @@ EXPECT "2" stat -c "%h" test2
EXPECT "2" stat -c "%h" test4
TEST $CLI volume start $V0 force
# Wait until the killed bricks have been started and recognized by the ec
# xlator
EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
TEST check_mount_dir
EXPECT "1048576" stat -c "%s" test2
EXPECT "-rwxrwxrwx" stat -c "%A" test2
EXPECT_WITHIN $HEAL_TIMEOUT "262144" stat -c "%s" ${brick[0]}/test2
EXPECT_WITHIN $HEAL_TIMEOUT "262144" stat -c "%s" ${brick[1]}/test2
EXPECT "-rwxrwxrwx" stat -c "%A" ${brick[0]}/test2
EXPECT "-rwxrwxrwx" stat -c "%A" ${brick[1]}/test2
EXPECT_WITHIN $HEAL_TIMEOUT "Y" check_size test2 "262144"
EXPECT_WITHIN $HEAL_TIMEOUT "Y" check_mode test2 "-rwxrwxrwx"
TEST ls -al dir1
EXPECT_WITHIN $HEAL_TIMEOUT "1" eval "if [ -d ${brick[0]}/dir1 ]; then echo 1; fi"
EXPECT_WITHIN $HEAL_TIMEOUT "1" eval "if [ -d ${brick[1]}/dir1 ]; then echo 1; fi"
EXPECT_WITHIN $HEAL_TIMEOUT "Y" check_dir
TEST [ -h test3 ]
EXPECT_WITHIN $HEAL_TIMEOUT "1" eval "if [ -h ${brick[0]}/test3 ]; then echo 1; fi"
EXPECT_WITHIN $HEAL_TIMEOUT "1" eval "if [ -h ${brick[1]}/test3 ]; then echo 1; fi"
EXPECT_WITHIN $HEAL_TIMEOUT "Y" check_soft_link
EXPECT "2" stat -c "%h" test4
EXPECT_WITHIN $HEAL_TIMEOUT "3" stat -c "%h" ${brick[0]}/test4
EXPECT_WITHIN $HEAL_TIMEOUT "3" stat -c "%h" ${brick[1]}/test4
EXPECT_WITHIN $HEAL_TIMEOUT "Y" check_hard_link
TEST rm -rf $tmp

View File

@ -71,7 +71,7 @@ uintptr_t ec_fd_good(fd_t * fd, xlator_t * xl)
uintptr_t bad = 0;
ctx = ec_fd_get(fd, xl);
if ((ctx != NULL) && (ctx->loc.inode != NULL))
if (ctx != NULL)
{
bad = ctx->bad;
}
@ -110,7 +110,7 @@ uintptr_t ec_update_fd(ec_fop_data_t * fop, fd_t * fd, uintptr_t good,
LOCK(&fd->lock);
ctx = __ec_fd_get(fd, fop->xl);
if ((ctx != NULL) && (ctx->loc.inode != NULL))
if (ctx != NULL)
{
ctx->bad &= ~good;
bad |= ctx->bad;
@ -143,6 +143,15 @@ int32_t ec_heal_report(call_frame_t * frame, void * cookie, xlator_t * this,
void ec_check_status(ec_fop_data_t * fop)
{
ec_t * ec = fop->xl->private;
int32_t partial = 0;
if (fop->answer->op_ret >= 0) {
if (fop->id == GF_FOP_LOOKUP) {
partial = fop->answer->iatt[0].ia_type == IA_IFDIR;
} else if (fop->id == GF_FOP_OPENDIR) {
partial = 1;
}
}
if ((ec->xl_up & ~(fop->remaining | fop->good)) == 0)
{
@ -154,42 +163,36 @@ void ec_check_status(ec_fop_data_t * fop)
"remaining=%lX, good=%lX, bad=%lX)",
ec->xl_up, fop->mask, fop->remaining, fop->good, fop->bad);
if (fop->fd != NULL)
if (fop->use_fd)
{
ec_fheal(fop->frame, fop->xl, -1, EC_MINIMUM_ONE, ec_heal_report, NULL,
fop->fd, NULL);
if (fop->fd != NULL) {
ec_fheal(fop->frame, fop->xl, -1, EC_MINIMUM_ONE, ec_heal_report,
NULL, fop->fd, partial, NULL);
}
}
else
{
ec_heal(fop->frame, fop->xl, -1, EC_MINIMUM_ONE, ec_heal_report, NULL,
&fop->loc[0], NULL);
&fop->loc[0], partial, NULL);
if (fop->loc[1].inode != NULL)
{
ec_heal(fop->frame, fop->xl, -1, EC_MINIMUM_ONE, ec_heal_report,
NULL, &fop->loc[1], NULL);
NULL, &fop->loc[1], partial, NULL);
}
}
}
void ec_update_bad(ec_fop_data_t * fop, uintptr_t good)
{
ec_t *ec = fop->xl->private;
uintptr_t bad;
int32_t update = 0;
bad = fop->mask & ~(fop->remaining | good);
if ((fop->bad & bad) != bad)
{
fop->bad |= bad;
update = 1;
}
if ((fop->good & good) != good)
{
fop->good |= good;
update = 1;
}
bad = ec->xl_up & ~(fop->remaining | good);
fop->bad |= bad;
fop->good |= good;
if (update && (fop->parent == NULL))
if (fop->parent == NULL)
{
if ((fop->flags & EC_FLAG_UPDATE_LOC_PARENT) != 0)
{
@ -369,21 +372,20 @@ int32_t ec_child_select(ec_fop_data_t * fop)
fop->mask &= ec->node_mask;
mask = ec->xl_up;
if (fop->loc[0].inode != NULL)
if (fop->parent == NULL)
{
mask |= ec_inode_good(fop->loc[0].inode, fop->xl);
}
if (fop->loc[1].inode != NULL)
{
mask |= ec_inode_good(fop->loc[1].inode, fop->xl);
}
if (fop->fd != NULL)
{
if (fop->fd->inode != NULL)
{
mask |= ec_inode_good(fop->fd->inode, fop->xl);
if (fop->loc[0].inode != NULL) {
mask &= ec_inode_good(fop->loc[0].inode, fop->xl);
}
if (fop->loc[1].inode != NULL) {
mask &= ec_inode_good(fop->loc[1].inode, fop->xl);
}
if (fop->fd != NULL) {
if (fop->fd->inode != NULL) {
mask &= ec_inode_good(fop->fd->inode, fop->xl);
}
mask &= ec_fd_good(fop->fd, fop->xl);
}
mask |= ec_fd_good(fop->fd, fop->xl);
}
if ((fop->mask & ~mask) != 0)
{
@ -619,7 +621,6 @@ ec_lock_t * ec_lock_allocate(xlator_t * xl, int32_t kind, loc_t * loc)
void ec_lock_destroy(ec_lock_t * lock)
{
GF_FREE(lock->basename);
loc_wipe(&lock->loc);
mem_put(lock);
@ -627,31 +628,13 @@ void ec_lock_destroy(ec_lock_t * lock)
int32_t ec_lock_compare(ec_lock_t * lock1, ec_lock_t * lock2)
{
int32_t res;
res = uuid_compare(lock1->loc.gfid, lock2->loc.gfid);
if (res != 0)
{
return res;
}
if (lock1->basename == NULL)
{
if (lock2->basename == NULL)
{
return 0;
}
return 1;
}
if (lock2->basename == NULL)
{
return -1;
}
return strcmp(lock1->basename, lock2->basename);
return uuid_compare(lock1->loc.gfid, lock2->loc.gfid);
}
void ec_lock_insert(ec_fop_data_t * fop, ec_lock_t * lock)
void ec_lock_insert(ec_fop_data_t *fop, ec_lock_t *lock, int32_t update)
{
ec_lock_t * tmp;
int32_t tmp_update;
if ((fop->lock_count > 0) &&
(ec_lock_compare(fop->locks[0].lock, lock) > 0))
@ -659,19 +642,25 @@ void ec_lock_insert(ec_fop_data_t * fop, ec_lock_t * lock)
tmp = fop->locks[0].lock;
fop->locks[0].lock = lock;
lock = tmp;
tmp_update = fop->locks_update;
fop->locks_update = update;
update = tmp_update;
}
fop->locks[fop->lock_count].lock = lock;
fop->locks[fop->lock_count].fop = fop;
fop->locks_update |= update << fop->lock_count;
fop->lock_count++;
lock->refs++;
}
void ec_lock_prepare_entry(ec_fop_data_t * fop, loc_t * loc)
void ec_lock_prepare_entry(ec_fop_data_t *fop, loc_t *loc, int32_t update)
{
ec_lock_t * lock = NULL;
ec_inode_t * ctx = NULL;
char * name = NULL;
loc_t tmp;
int32_t error;
@ -680,12 +669,23 @@ void ec_lock_prepare_entry(ec_fop_data_t * fop, loc_t * loc)
return;
}
error = ec_loc_parent(fop->xl, loc, &tmp, &name);
if (error != 0)
/* update is only 0 for 'opendir', which needs to lock the entry pointed
* by loc instead of its parent.
*/
if (update)
{
ec_fop_set_error(fop, error);
error = ec_loc_parent(fop->xl, loc, &tmp);
if (error != 0) {
ec_fop_set_error(fop, error);
return;
return;
}
} else {
if (!ec_loc_from_loc(fop->xl, &tmp, loc)) {
ec_fop_set_error(fop, EIO);
return;
}
}
LOCK(&tmp.inode->lock);
@ -698,16 +698,14 @@ void ec_lock_prepare_entry(ec_fop_data_t * fop, loc_t * loc)
goto unlock;
}
list_for_each_entry(lock, &ctx->entry_locks, list)
if (ctx->entry_lock != NULL)
{
if (strcmp(lock->basename, name) == 0)
{
ec_trace("LOCK_ENTRYLK", fop, "lock=%p, inode=%p, path=%s, "
"name=%s. Lock already acquired",
lock, tmp.inode, tmp.path, name);
lock = ctx->entry_lock;
ec_trace("LOCK_ENTRYLK", fop, "lock=%p, inode=%p, path=%s"
"Lock already acquired",
lock, tmp.inode, tmp.path);
goto insert;
}
goto insert;
}
lock = ec_lock_allocate(fop->xl, EC_LOCK_ENTRY, &tmp);
@ -721,22 +719,20 @@ void ec_lock_prepare_entry(ec_fop_data_t * fop, loc_t * loc)
ec_trace("LOCK_CREATE", fop, "lock=%p", lock);
lock->type = ENTRYLK_WRLCK;
lock->basename = name;
name = NULL;
list_add_tail(&lock->list, &ctx->entry_locks);
lock->plock = &ctx->entry_lock;
ctx->entry_lock = lock;
insert:
ec_lock_insert(fop, lock);
ec_lock_insert(fop, lock, update);
unlock:
UNLOCK(&tmp.inode->lock);
loc_wipe(&tmp);
GF_FREE(name);
}
void ec_lock_prepare_inode(ec_fop_data_t * fop, loc_t * loc)
void ec_lock_prepare_inode(ec_fop_data_t *fop, loc_t *loc, int32_t update)
{
ec_lock_t * lock;
ec_inode_t * ctx;
@ -756,9 +752,9 @@ void ec_lock_prepare_inode(ec_fop_data_t * fop, loc_t * loc)
goto unlock;
}
if (!list_empty(&ctx->inode_locks))
if (ctx->inode_lock != NULL)
{
lock = list_entry(ctx->inode_locks.next, ec_lock_t, list);
lock = ctx->inode_lock;
ec_trace("LOCK_INODELK", fop, "lock=%p, inode=%p. Lock already "
"acquired", lock, loc->inode);
@ -778,16 +774,17 @@ void ec_lock_prepare_inode(ec_fop_data_t * fop, loc_t * loc)
lock->flock.l_type = F_WRLCK;
lock->flock.l_whence = SEEK_SET;
list_add_tail(&lock->list, &ctx->inode_locks);
lock->plock = &ctx->inode_lock;
ctx->inode_lock = lock;
insert:
ec_lock_insert(fop, lock);
ec_lock_insert(fop, lock, update);
unlock:
UNLOCK(&loc->inode->lock);
}
void ec_lock_prepare_fd(ec_fop_data_t * fop, fd_t * fd)
void ec_lock_prepare_fd(ec_fop_data_t *fop, fd_t *fd, int32_t update)
{
loc_t loc;
@ -798,7 +795,7 @@ void ec_lock_prepare_fd(ec_fop_data_t * fop, fd_t * fd)
if (ec_loc_from_fd(fop->xl, &loc, fd))
{
ec_lock_prepare_inode(fop, &loc);
ec_lock_prepare_inode(fop, &loc, update);
loc_wipe(&loc);
}
@ -868,12 +865,11 @@ void ec_lock(ec_fop_data_t * fop)
if (lock->kind == EC_LOCK_ENTRY)
{
ec_trace("LOCK_ACQUIRE", fop, "lock=%p, inode=%p, path=%s, "
"name=%s", lock, lock->loc.inode, lock->loc.path,
lock->basename);
ec_trace("LOCK_ACQUIRE", fop, "lock=%p, inode=%p, path=%s",
lock, lock->loc.inode, lock->loc.path);
ec_entrylk(fop->frame, fop->xl, -1, EC_MINIMUM_ALL, ec_locked,
lock, fop->xl->name, &lock->loc, lock->basename,
lock, fop->xl->name, &lock->loc, NULL,
ENTRYLK_LOCK, lock->type, NULL);
}
else
@ -936,7 +932,7 @@ void ec_unlock(ec_fop_data_t * fop)
refs = --lock->refs;
if (refs == 0)
{
list_del_init(&lock->list);
*lock->plock = NULL;
}
UNLOCK(&lock->loc.inode->lock);
@ -951,13 +947,12 @@ void ec_unlock(ec_fop_data_t * fop)
{
case EC_LOCK_ENTRY:
ec_trace("UNLOCK_ENTRYLK", fop, "lock=%p, inode=%p, "
"path=%s, basename=%s",
lock, lock->loc.inode, lock->loc.path,
lock->basename);
"path=%s",
lock, lock->loc.inode, lock->loc.path);
ec_entrylk(fop->frame, fop->xl, lock->mask,
EC_MINIMUM_ALL, ec_unlocked, lock,
fop->xl->name, &lock->loc, lock->basename,
fop->xl->name, &lock->loc, NULL,
ENTRYLK_UNLOCK, lock->type, NULL);
break;
@ -1061,17 +1056,23 @@ int32_t ec_get_size_version_set(call_frame_t * frame, void * cookie,
}
}
LOCK(&inode->lock);
ctx = __ec_inode_get(inode, this);
if ((ctx != NULL) && !list_empty(&ctx->inode_locks))
{
lock = list_entry(ctx->inode_locks.next, ec_lock_t, list);
if (ctx != NULL) {
if (ctx->inode_lock != NULL) {
lock = ctx->inode_lock;
lock->version = fop->answer->version;
lock->have_size = 1;
lock->size = buf->ia_size;
lock->version = fop->answer->version;
if (buf->ia_type == IA_IFREG) {
lock->have_size = 1;
lock->size = buf->ia_size;
}
}
if (ctx->entry_lock != NULL) {
lock = ctx->entry_lock;
lock->version = fop->answer->version;
}
}
UNLOCK(&inode->lock);
@ -1083,10 +1084,10 @@ int32_t ec_get_size_version_set(call_frame_t * frame, void * cookie,
fop->parent->mask &= fop->good;
}
fop->parent->pre_size = fop->parent->post_size = buf->ia_size;
fop->parent->have_size = 1;
if (buf->ia_type == IA_IFREG) {
fop->parent->pre_size = fop->parent->post_size = buf->ia_size;
fop->parent->have_size = 1;
}
}
else
{
@ -1143,7 +1144,7 @@ void ec_get_size_version(ec_fop_data_t * fop)
error = EIO;
if (fop->fd == NULL)
if (!fop->use_fd)
{
if (!ec_loc_from_loc(fop->xl, &loc, &fop->loc[0]))
{
@ -1211,7 +1212,7 @@ int32_t ec_update_size_version_done(call_frame_t * frame, void * cookie,
return 0;
}
void ec_update_size_version(ec_fop_data_t * fop, uint64_t version,
void ec_update_size_version(ec_fop_data_t *fop, loc_t *loc, uint64_t version,
uint64_t size)
{
dict_t * dict;
@ -1251,18 +1252,9 @@ void ec_update_size_version(ec_fop_data_t * fop, uint64_t version,
fop->frame->root->uid = 0;
fop->frame->root->gid = 0;
if (fop->fd == NULL)
{
ec_xattrop(fop->frame, fop->xl, fop->mask, EC_MINIMUM_MIN,
ec_update_size_version_done, NULL, &fop->loc[0],
GF_XATTROP_ADD_ARRAY64, dict, NULL);
}
else
{
ec_fxattrop(fop->frame, fop->xl, fop->mask, EC_MINIMUM_MIN,
ec_update_size_version_done, NULL, fop->fd,
GF_XATTROP_ADD_ARRAY64, dict, NULL);
}
ec_xattrop(fop->frame, fop->xl, fop->mask, EC_MINIMUM_MIN,
ec_update_size_version_done, NULL, loc,
GF_XATTROP_ADD_ARRAY64, dict, NULL);
fop->frame->root->uid = uid;
fop->frame->root->gid = gid;
@ -1291,8 +1283,6 @@ void ec_flush_size_version(ec_fop_data_t * fop)
lock = fop->locks[0].lock;
GF_ASSERT(lock->kind == EC_LOCK_INODE);
LOCK(&lock->loc.inode->lock);
GF_ASSERT(lock->owner == fop);
@ -1306,11 +1296,11 @@ void ec_flush_size_version(ec_fop_data_t * fop)
if (version > 0)
{
ec_update_size_version(fop, version, delta);
ec_update_size_version(fop, &lock->loc, version, delta);
}
}
void ec_lock_reuse(ec_fop_data_t * fop, int32_t update)
void ec_lock_reuse(ec_fop_data_t *fop)
{
ec_fop_data_t * wait_fop;
ec_lock_t * lock;
@ -1321,6 +1311,10 @@ void ec_lock_reuse(ec_fop_data_t * fop, int32_t update)
for (i = 0; i < fop->lock_count; i++)
{
refs = 0;
delta = 0;
version = 0;
wait_fop = NULL;
lock = fop->locks[i].lock;
@ -1332,28 +1326,26 @@ void ec_lock_reuse(ec_fop_data_t * fop, int32_t update)
GF_ASSERT(lock->owner == fop);
lock->owner = NULL;
if (lock->kind == EC_LOCK_INODE)
{
if (update && (fop->error == 0))
if (((fop->locks_update >> i) & 1) != 0) {
if (fop->error == 0)
{
lock->version_delta++;
lock->size_delta += fop->post_size - fop->pre_size;
}
version = lock->version_delta;
delta = lock->size_delta;
refs = lock->refs;
if (refs == 1)
{
lock->version_delta = 0;
lock->size_delta = 0;
}
if (fop->have_size)
{
lock->size = fop->post_size;
lock->have_size = 1;
if (fop->have_size) {
lock->size = fop->post_size;
lock->have_size = 1;
}
}
}
version = lock->version_delta;
delta = lock->size_delta;
refs = lock->refs;
if (refs == 1) {
lock->version_delta = 0;
lock->size_delta = 0;
}
lock->good_mask &= fop->mask;
if (!list_empty(&lock->waiting))
@ -1379,11 +1371,10 @@ void ec_lock_reuse(ec_fop_data_t * fop, int32_t update)
ec_resume(wait_fop, 0);
}
}
if ((refs == 1) && (version > 0))
{
ec_update_size_version(fop, version, delta);
if ((refs == 1) && (version > 0)) {
ec_update_size_version(fop, &lock->loc, version, delta);
}
}
}

View File

@ -66,14 +66,15 @@
#define EC_STATE_HEAL_OPEN 207
#define EC_STATE_HEAL_REOPEN_FD 208
#define EC_STATE_HEAL_UNLOCK 209
#define EC_STATE_HEAL_DATA_LOCK 210
#define EC_STATE_HEAL_DATA_COPY 211
#define EC_STATE_HEAL_DATA_UNLOCK 212
#define EC_STATE_HEAL_POST_INODELK_LOCK 213
#define EC_STATE_HEAL_POST_INODE_LOOKUP 214
#define EC_STATE_HEAL_SETATTR 215
#define EC_STATE_HEAL_POST_INODELK_UNLOCK 216
#define EC_STATE_HEAL_DISPATCH 217
#define EC_STATE_HEAL_UNLOCK_ENTRY 210
#define EC_STATE_HEAL_DATA_LOCK 211
#define EC_STATE_HEAL_DATA_COPY 212
#define EC_STATE_HEAL_DATA_UNLOCK 213
#define EC_STATE_HEAL_POST_INODELK_LOCK 214
#define EC_STATE_HEAL_POST_INODE_LOOKUP 215
#define EC_STATE_HEAL_SETATTR 216
#define EC_STATE_HEAL_POST_INODELK_UNLOCK 217
#define EC_STATE_HEAL_DISPATCH 218
int32_t ec_dispatch_one_retry(ec_fop_data_t * fop, int32_t idx, int32_t op_ret,
int32_t op_errno);
@ -85,11 +86,11 @@ void ec_update_bad(ec_fop_data_t * fop, uintptr_t good);
void ec_fop_set_error(ec_fop_data_t * fop, int32_t error);
void ec_lock_prepare_inode(ec_fop_data_t * fop, loc_t * loc);
void ec_lock_prepare_entry(ec_fop_data_t * fop, loc_t * loc);
void ec_lock_prepare_fd(ec_fop_data_t * fop, fd_t * fd);
void ec_lock_prepare_inode(ec_fop_data_t *fop, loc_t *loc, int32_t update);
void ec_lock_prepare_entry(ec_fop_data_t *fop, loc_t *loc, int32_t update);
void ec_lock_prepare_fd(ec_fop_data_t *fop, fd_t *fd, int32_t update);
void ec_lock(ec_fop_data_t * fop);
void ec_lock_reuse(ec_fop_data_t * fop, int32_t update);
void ec_lock_reuse(ec_fop_data_t *fop);
void ec_unlock(ec_fop_data_t * fop);
void ec_get_size_version(ec_fop_data_t * fop);

View File

@ -76,10 +76,10 @@ struct _ec_fd
struct _ec_inode
{
uintptr_t bad;
struct list_head entry_locks;
struct list_head inode_locks;
ec_heal_t * heal;
uintptr_t bad;
ec_lock_t *entry_lock;
ec_lock_t *inode_lock;
ec_heal_t *heal;
};
typedef int32_t (* fop_heal_cbk_t)(call_frame_t *, void * cookie, xlator_t *,
@ -141,27 +141,23 @@ union _ec_cbk
struct _ec_lock
{
struct list_head list;
struct list_head waiting;
uintptr_t mask;
uintptr_t good_mask;
int32_t kind;
int32_t refs;
int32_t acquired;
int32_t have_size;
uint64_t size;
uint64_t size_delta;
uint64_t version;
uint64_t version_delta;
ec_fop_data_t * owner;
loc_t loc;
ec_lock_t **plock;
struct list_head waiting;
uintptr_t mask;
uintptr_t good_mask;
int32_t kind;
int32_t refs;
int32_t acquired;
int32_t have_size;
uint64_t size;
uint64_t size_delta;
uint64_t version;
uint64_t version_delta;
ec_fop_data_t *owner;
loc_t loc;
union
{
struct
{
entrylk_type type;
char * basename;
};
entrylk_type type;
struct gf_flock flock;
};
};
@ -193,6 +189,7 @@ struct _ec_fop_data
int32_t lock_count;
int32_t locked;
ec_lock_link_t locks[2];
int32_t locks_update;
int32_t have_size;
uint64_t pre_size;
uint64_t post_size;
@ -215,6 +212,8 @@ struct _ec_fop_data
uint64_t user_size;
uint32_t head;
int32_t use_fd;
dict_t * xdata;
dict_t * dict;
int32_t int32;
@ -273,6 +272,7 @@ struct _ec_heal
struct iatt iatt;
char * symlink;
fd_t * fd;
int32_t partial;
int32_t done;
uintptr_t available;
uintptr_t good;

View File

@ -116,10 +116,38 @@ void ec_wind_opendir(ec_t * ec, ec_fop_data_t * fop, int32_t idx)
int32_t ec_manager_opendir(ec_fop_data_t * fop, int32_t state)
{
ec_cbk_data_t * cbk;
ec_fd_t *ctx;
switch (state)
{
case EC_STATE_INIT:
LOCK(&fop->fd->lock);
ctx = __ec_fd_get(fop->fd, fop->xl);
if ((ctx == NULL) || !ec_loc_from_loc(fop->xl, &ctx->loc,
&fop->loc[0])) {
UNLOCK(&fop->fd->lock);
fop->error = EIO;
return EC_STATE_REPORT;
}
UNLOCK(&fop->fd->lock);
/* Fall through */
case EC_STATE_LOCK:
ec_lock_prepare_entry(fop, &fop->loc[0], 0);
ec_lock(fop);
return EC_STATE_GET_SIZE_AND_VERSION;
case EC_STATE_GET_SIZE_AND_VERSION:
ec_get_size_version(fop);
return EC_STATE_DISPATCH;
case EC_STATE_DISPATCH:
ec_dispatch_all(fop);
@ -160,8 +188,10 @@ int32_t ec_manager_opendir(ec_fop_data_t * fop, int32_t state)
cbk->op_errno, cbk->fd, cbk->xdata);
}
return EC_STATE_END;
return EC_STATE_LOCK_REUSE;
case -EC_STATE_LOCK:
case -EC_STATE_GET_SIZE_AND_VERSION:
case -EC_STATE_DISPATCH:
case -EC_STATE_PREPARE_ANSWER:
case -EC_STATE_REPORT:
@ -173,6 +203,18 @@ int32_t ec_manager_opendir(ec_fop_data_t * fop, int32_t state)
NULL, NULL);
}
return EC_STATE_LOCK_REUSE;
case -EC_STATE_LOCK_REUSE:
case EC_STATE_LOCK_REUSE:
ec_lock_reuse(fop);
return EC_STATE_UNLOCK;
case -EC_STATE_UNLOCK:
case EC_STATE_UNLOCK:
ec_unlock(fop);
return EC_STATE_END;
default:
@ -421,6 +463,8 @@ void ec_readdir(call_frame_t * frame, xlator_t * this, uintptr_t target,
goto out;
}
fop->use_fd = 1;
fop->size = size;
fop->offset = offset;
@ -533,6 +577,8 @@ void ec_readdirp(call_frame_t * frame, xlator_t * this, uintptr_t target,
goto out;
}
fop->use_fd = 1;
fop->size = size;
fop->offset = offset;

View File

@ -210,7 +210,7 @@ int32_t ec_manager_create(ec_fop_data_t * fop, int32_t state)
/* Fall through */
case EC_STATE_LOCK:
ec_lock_prepare_entry(fop, &fop->loc[0]);
ec_lock_prepare_entry(fop, &fop->loc[0], 1);
ec_lock(fop);
return EC_STATE_DISPATCH;
@ -293,7 +293,7 @@ int32_t ec_manager_create(ec_fop_data_t * fop, int32_t state)
case -EC_STATE_LOCK_REUSE:
case EC_STATE_LOCK_REUSE:
ec_lock_reuse(fop, 1);
ec_lock_reuse(fop);
return EC_STATE_UNLOCK;
@ -494,7 +494,7 @@ int32_t ec_manager_link(ec_fop_data_t * fop, int32_t state)
// Parent entry of fop->loc[0] should be locked, but I don't
// receive enough information to do it (fop->loc[0].parent is
// NULL).
ec_lock_prepare_entry(fop, &fop->loc[1]);
ec_lock_prepare_entry(fop, &fop->loc[1], 1);
ec_lock(fop);
return EC_STATE_GET_SIZE_AND_VERSION;
@ -577,7 +577,7 @@ int32_t ec_manager_link(ec_fop_data_t * fop, int32_t state)
case -EC_STATE_LOCK_REUSE:
case EC_STATE_LOCK_REUSE:
ec_lock_reuse(fop, 0);
ec_lock_reuse(fop);
return EC_STATE_UNLOCK;
@ -765,7 +765,7 @@ int32_t ec_manager_mkdir(ec_fop_data_t * fop, int32_t state)
{
case EC_STATE_INIT:
case EC_STATE_LOCK:
ec_lock_prepare_entry(fop, &fop->loc[0]);
ec_lock_prepare_entry(fop, &fop->loc[0], 1);
ec_lock(fop);
return EC_STATE_DISPATCH;
@ -837,7 +837,7 @@ int32_t ec_manager_mkdir(ec_fop_data_t * fop, int32_t state)
case -EC_STATE_LOCK_REUSE:
case EC_STATE_LOCK_REUSE:
ec_lock_reuse(fop, 0);
ec_lock_reuse(fop);
return EC_STATE_UNLOCK;
@ -1022,7 +1022,7 @@ int32_t ec_manager_mknod(ec_fop_data_t * fop, int32_t state)
{
case EC_STATE_INIT:
case EC_STATE_LOCK:
ec_lock_prepare_entry(fop, &fop->loc[0]);
ec_lock_prepare_entry(fop, &fop->loc[0], 1);
ec_lock(fop);
return EC_STATE_DISPATCH;
@ -1094,7 +1094,7 @@ int32_t ec_manager_mknod(ec_fop_data_t * fop, int32_t state)
case -EC_STATE_LOCK_REUSE:
case EC_STATE_LOCK_REUSE:
ec_lock_reuse(fop, 0);
ec_lock_reuse(fop);
return EC_STATE_UNLOCK;
@ -1277,8 +1277,8 @@ int32_t ec_manager_rename(ec_fop_data_t * fop, int32_t state)
{
case EC_STATE_INIT:
case EC_STATE_LOCK:
ec_lock_prepare_entry(fop, &fop->loc[0]);
ec_lock_prepare_entry(fop, &fop->loc[1]);
ec_lock_prepare_entry(fop, &fop->loc[0], 1);
ec_lock_prepare_entry(fop, &fop->loc[1], 1);
ec_lock(fop);
return EC_STATE_GET_SIZE_AND_VERSION;
@ -1359,7 +1359,7 @@ int32_t ec_manager_rename(ec_fop_data_t * fop, int32_t state)
case -EC_STATE_LOCK_REUSE:
case EC_STATE_LOCK_REUSE:
ec_lock_reuse(fop, 0);
ec_lock_reuse(fop);
return EC_STATE_UNLOCK;
@ -1533,7 +1533,7 @@ int32_t ec_manager_rmdir(ec_fop_data_t * fop, int32_t state)
{
case EC_STATE_INIT:
case EC_STATE_LOCK:
ec_lock_prepare_entry(fop, &fop->loc[0]);
ec_lock_prepare_entry(fop, &fop->loc[0], 1);
ec_lock(fop);
return EC_STATE_DISPATCH;
@ -1597,7 +1597,7 @@ int32_t ec_manager_rmdir(ec_fop_data_t * fop, int32_t state)
case -EC_STATE_LOCK_REUSE:
case EC_STATE_LOCK_REUSE:
ec_lock_reuse(fop, 0);
ec_lock_reuse(fop);
return EC_STATE_UNLOCK;
@ -1780,7 +1780,7 @@ int32_t ec_manager_symlink(ec_fop_data_t * fop, int32_t state)
{
case EC_STATE_INIT:
case EC_STATE_LOCK:
ec_lock_prepare_entry(fop, &fop->loc[0]);
ec_lock_prepare_entry(fop, &fop->loc[0], 1);
ec_lock(fop);
return EC_STATE_DISPATCH;
@ -1852,7 +1852,7 @@ int32_t ec_manager_symlink(ec_fop_data_t * fop, int32_t state)
case -EC_STATE_LOCK_REUSE:
case EC_STATE_LOCK_REUSE:
ec_lock_reuse(fop, 0);
ec_lock_reuse(fop);
return EC_STATE_UNLOCK;
@ -2031,7 +2031,7 @@ int32_t ec_manager_unlink(ec_fop_data_t * fop, int32_t state)
{
case EC_STATE_INIT:
case EC_STATE_LOCK:
ec_lock_prepare_entry(fop, &fop->loc[0]);
ec_lock_prepare_entry(fop, &fop->loc[0], 1);
ec_lock(fop);
return EC_STATE_GET_SIZE_AND_VERSION;
@ -2101,7 +2101,7 @@ int32_t ec_manager_unlink(ec_fop_data_t * fop, int32_t state)
case -EC_STATE_LOCK_REUSE:
case EC_STATE_LOCK_REUSE:
ec_lock_reuse(fop, 0);
ec_lock_reuse(fop);
return EC_STATE_UNLOCK;

View File

@ -70,11 +70,11 @@ void ec_fgetxattr(call_frame_t * frame, xlator_t * this, uintptr_t target,
void ec_heal(call_frame_t * frame, xlator_t * this, uintptr_t target,
int32_t minimum, fop_heal_cbk_t func, void *data, loc_t * loc,
dict_t * xdata);
int32_t partial, dict_t *xdata);
void ec_fheal(call_frame_t * frame, xlator_t * this, uintptr_t target,
int32_t minimum, fop_fheal_cbk_t func, void *data, fd_t * fd,
dict_t * xdata);
int32_t partial, dict_t *xdata);
void ec_inodelk(call_frame_t * frame, xlator_t * this, uintptr_t target,
int32_t minimum, fop_inodelk_cbk_t func, void *data,

View File

@ -91,7 +91,7 @@ int32_t ec_manager_flush(ec_fop_data_t * fop, int32_t state)
{
case EC_STATE_INIT:
case EC_STATE_LOCK:
ec_lock_prepare_fd(fop, fop->fd);
ec_lock_prepare_fd(fop, fop->fd, 0);
ec_lock(fop);
return EC_STATE_DISPATCH;
@ -159,7 +159,7 @@ int32_t ec_manager_flush(ec_fop_data_t * fop, int32_t state)
case -EC_STATE_LOCK_REUSE:
case EC_STATE_LOCK_REUSE:
ec_lock_reuse(fop, 0);
ec_lock_reuse(fop);
return EC_STATE_UNLOCK;
@ -199,6 +199,8 @@ void ec_flush(call_frame_t * frame, xlator_t * this, uintptr_t target,
goto out;
}
fop->use_fd = 1;
if (fd != NULL)
{
fop->fd = fd_ref(fd);
@ -325,7 +327,7 @@ int32_t ec_manager_fsync(ec_fop_data_t * fop, int32_t state)
{
case EC_STATE_INIT:
case EC_STATE_LOCK:
ec_lock_prepare_fd(fop, fop->fd);
ec_lock_prepare_fd(fop, fop->fd, 0);
ec_lock(fop);
return EC_STATE_GET_SIZE_AND_VERSION;
@ -408,7 +410,7 @@ int32_t ec_manager_fsync(ec_fop_data_t * fop, int32_t state)
case -EC_STATE_LOCK_REUSE:
case EC_STATE_LOCK_REUSE:
ec_lock_reuse(fop, 0);
ec_lock_reuse(fop);
return EC_STATE_UNLOCK;
@ -448,6 +450,8 @@ void ec_fsync(call_frame_t * frame, xlator_t * this, uintptr_t target,
goto out;
}
fop->use_fd = 1;
fop->int32 = datasync;
if (fd != NULL)
@ -550,7 +554,7 @@ int32_t ec_manager_fsyncdir(ec_fop_data_t * fop, int32_t state)
{
case EC_STATE_INIT:
case EC_STATE_LOCK:
ec_lock_prepare_fd(fop, fop->fd);
ec_lock_prepare_fd(fop, fop->fd, 0);
ec_lock(fop);
return EC_STATE_DISPATCH;
@ -618,7 +622,7 @@ int32_t ec_manager_fsyncdir(ec_fop_data_t * fop, int32_t state)
case -EC_STATE_LOCK_REUSE:
case EC_STATE_LOCK_REUSE:
ec_lock_reuse(fop, 0);
ec_lock_reuse(fop);
return EC_STATE_UNLOCK;
@ -658,6 +662,8 @@ void ec_fsyncdir(call_frame_t * frame, xlator_t * this, uintptr_t target,
goto out;
}
fop->use_fd = 1;
fop->int32 = datasync;
if (fd != NULL)
@ -720,9 +726,9 @@ void ec_lookup_rebuild(ec_t * ec, ec_fop_data_t * fop, ec_cbk_data_t * cbk)
LOCK(&cbk->inode->lock);
ctx = __ec_inode_get(cbk->inode, fop->xl);
if ((ctx != NULL) && !list_empty(&ctx->inode_locks))
if ((ctx != NULL) && (ctx->inode_lock != NULL))
{
lock = list_entry(ctx->inode_locks.next, ec_lock_t, list);
lock = ctx->inode_lock;
cbk->version = lock->version;
if (lock->have_size)
{
@ -1374,11 +1380,11 @@ int32_t ec_manager_xattrop(ec_fop_data_t * fop, int32_t state)
case EC_STATE_LOCK:
if (fop->fd == NULL)
{
ec_lock_prepare_inode(fop, &fop->loc[0]);
ec_lock_prepare_inode(fop, &fop->loc[0], 1);
}
else
{
ec_lock_prepare_fd(fop, fop->fd);
ec_lock_prepare_fd(fop, fop->fd, 1);
}
ec_lock(fop);
@ -1468,7 +1474,7 @@ int32_t ec_manager_xattrop(ec_fop_data_t * fop, int32_t state)
case -EC_STATE_LOCK_REUSE:
case EC_STATE_LOCK_REUSE:
ec_lock_reuse(fop, 1);
ec_lock_reuse(fop);
return EC_STATE_UNLOCK;
@ -1652,6 +1658,8 @@ void ec_fxattrop(call_frame_t * frame, xlator_t * this, uintptr_t target,
goto out;
}
fop->use_fd = 1;
fop->xattrop_flags = optype;
if (fd != NULL)

View File

@ -310,8 +310,7 @@ int32_t ec_heal_reopen_cbk(call_frame_t * frame, void * cookie,
LOCK(&fd->lock);
ctx = __ec_fd_get(fd, fop->xl);
if ((ctx != NULL) && (ctx->loc.inode != NULL))
{
if (ctx != NULL) {
ctx->bad &= ~good;
ctx->open |= good;
}
@ -482,6 +481,7 @@ int32_t ec_heal_init(ec_fop_data_t * fop)
heal->fop = fop;
pool = fop->xl->ctx->iobuf_pool;
heal->size = iobpool_default_pagesize(pool) * ec->fragments;
heal->partial = fop->int32;
LOCK(&inode->lock);
@ -516,10 +516,9 @@ out:
void ec_heal_entrylk(ec_heal_t * heal, entrylk_cmd cmd)
{
loc_t loc;
char * name;
int32_t error;
error = ec_loc_parent(heal->xl, &heal->loc, &loc, &name);
error = ec_loc_parent(heal->xl, &heal->loc, &loc);
if (error != 0)
{
ec_fop_set_error(heal->fop, error);
@ -528,10 +527,9 @@ void ec_heal_entrylk(ec_heal_t * heal, entrylk_cmd cmd)
}
ec_entrylk(heal->fop->frame, heal->xl, -1, EC_MINIMUM_ALL, NULL, NULL,
heal->xl->name, &loc, name, cmd, ENTRYLK_WRLCK, NULL);
heal->xl->name, &loc, NULL, cmd, ENTRYLK_WRLCK, NULL);
loc_wipe(&loc);
GF_FREE(name);
}
void ec_heal_inodelk(ec_heal_t * heal, int32_t type, int32_t use_fd,
@ -970,7 +968,8 @@ void ec_heal_reopen_fd(ec_heal_t * heal)
{
inode_t * inode;
fd_t * fd;
ec_fd_t * ctx;
ec_fd_t *ctx_fd;
ec_inode_t *ctx_inode;
uintptr_t mask;
int32_t flags;
@ -978,12 +977,16 @@ void ec_heal_reopen_fd(ec_heal_t * heal)
LOCK(&inode->lock);
ctx_inode = __ec_inode_get(inode, heal->xl);
if (ctx_inode != NULL) {
ctx_inode->bad &= ~(heal->good | heal->bad);
}
list_for_each_entry(fd, &inode->fd_list, inode_list)
{
ctx = ec_fd_get(fd, heal->xl);
if ((ctx != NULL) && (ctx->loc.inode != NULL))
{
mask = heal->bad & ~ctx->open;
ctx_fd = ec_fd_get(fd, heal->xl);
if (ctx_fd != NULL) {
mask = heal->bad & ~ctx_fd->open;
if (mask != 0)
{
UNLOCK(&inode->lock);
@ -996,7 +999,7 @@ void ec_heal_reopen_fd(ec_heal_t * heal)
}
else
{
flags = ctx->flags & ~O_TRUNC;
flags = ctx_fd->flags & ~O_TRUNC;
if ((flags & O_ACCMODE) == O_WRONLY)
{
flags &= ~O_ACCMODE;
@ -1179,7 +1182,13 @@ int32_t ec_manager_heal(ec_fop_data_t * fop, int32_t state)
return EC_STATE_HEAL_ENTRY_PREPARE;
case EC_STATE_HEAL_ENTRY_PREPARE:
ec_heal_prepare(heal);
if (!heal->partial || (heal->iatt.ia_type == IA_IFDIR)) {
ec_heal_prepare(heal);
}
if (heal->partial) {
return EC_STATE_HEAL_UNLOCK_ENTRY;
}
return EC_STATE_HEAL_PRE_INODELK_LOCK;
@ -1240,6 +1249,8 @@ int32_t ec_manager_heal(ec_fop_data_t * fop, int32_t state)
case -EC_STATE_HEAL_ENTRY_PREPARE:
case -EC_STATE_HEAL_PRE_INODELK_LOCK:
case -EC_STATE_HEAL_PRE_INODE_LOOKUP:
case -EC_STATE_HEAL_UNLOCK_ENTRY:
case EC_STATE_HEAL_UNLOCK_ENTRY:
ec_heal_entrylk(heal, ENTRYLK_UNLOCK);
if (ec_heal_needs_data_rebuild(heal))
@ -1395,7 +1406,7 @@ int32_t ec_manager_heal(ec_fop_data_t * fop, int32_t state)
void ec_heal(call_frame_t * frame, xlator_t * this, uintptr_t target,
int32_t minimum, fop_heal_cbk_t func, void * data, loc_t * loc,
dict_t * xdata)
int32_t partial, dict_t *xdata)
{
ec_cbk_t callback = { .heal = func };
ec_fop_data_t * fop = NULL;
@ -1415,6 +1426,8 @@ void ec_heal(call_frame_t * frame, xlator_t * this, uintptr_t target,
goto out;
}
fop->int32 = partial;
if (loc != NULL)
{
if (loc_copy(&fop->loc[0], loc) != 0)
@ -1474,14 +1487,15 @@ void ec_wind_fheal(ec_t * ec, ec_fop_data_t * fop, int32_t idx)
void ec_fheal(call_frame_t * frame, xlator_t * this, uintptr_t target,
int32_t minimum, fop_fheal_cbk_t func, void * data, fd_t * fd,
dict_t * xdata)
int32_t partial, dict_t *xdata)
{
ec_fd_t * ctx = ec_fd_get(fd, this);
if ((ctx != NULL) && (ctx->loc.inode != NULL))
if (ctx != NULL)
{
gf_log("ec", GF_LOG_DEBUG, "FHEAL ctx: flags=%X, open=%lX, bad=%lX",
ctx->flags, ctx->open, ctx->bad);
ec_heal(frame, this, target, minimum, func, data, &ctx->loc, xdata);
ec_heal(frame, this, target, minimum, func, data, &ctx->loc, partial,
xdata);
}
}

View File

@ -334,83 +334,56 @@ int32_t ec_loc_gfid_check(xlator_t * xl, uuid_t dst, uuid_t src)
return 1;
}
int32_t ec_loc_parent(xlator_t * xl, loc_t * loc, loc_t * parent, char ** name)
int32_t ec_loc_parent(xlator_t *xl, loc_t *loc, loc_t *parent)
{
char * str = NULL;
int32_t error = 0;
memset(parent, 0, sizeof(loc_t));
if (loc->path == NULL)
if (loc->inode == NULL)
{
gf_log(xl->name, GF_LOG_ERROR, "inode path missing in loc_t: %p", loc->parent);
gf_log(xl->name, GF_LOG_ERROR, "Invalid loc");
return EINVAL;
error = EINVAL;
goto out;
}
if (loc->parent == NULL)
if (__is_root_gfid(loc->inode->gfid) || __is_root_gfid(loc->gfid) ||
((loc->path != NULL) && (strcmp(loc->path, "/") == 0)))
{
if ((loc->inode == NULL) || !__is_root_gfid(loc->inode->gfid) ||
(strcmp(loc->path, "/") != 0))
{
gf_log(xl->name, GF_LOG_ERROR, "Parent inode missing for "
"loc_t (path=%s, name=%s)",
loc->path, loc->name);
parent->path = gf_strdup("/");
if (parent->path == NULL) {
gf_log(xl->name, GF_LOG_ERROR, "Unable to duplicate path '/'");
return EINVAL;
error = ENOMEM;
goto out;
}
if (loc_copy(parent, loc) != 0)
{
return ENOMEM;
}
parent->gfid[15] = 1;
parent->inode = inode_find(loc->inode->table, parent->gfid);
parent->name = NULL;
if (name != NULL)
{
*name = NULL;
}
return 0;
}
else
{
if (uuid_is_null(loc->parent->gfid) && (uuid_is_null(loc->pargfid)))
{
gf_log(xl->name, GF_LOG_ERROR, "Invalid parent inode "
"(path=%s, name=%s)",
loc->path, loc->name);
return EINVAL;
}
uuid_copy(parent->gfid, loc->pargfid);
if (loc->path != NULL) {
str = gf_strdup(loc->path);
if (str == NULL)
{
gf_log(xl->name, GF_LOG_ERROR, "Unable to duplicate path "
"'%s'", str);
"'%s'", loc->path);
return ENOMEM;
}
if (name != NULL)
{
*name = gf_strdup(basename(str));
if (*name == NULL)
{
gf_log(xl->name, GF_LOG_ERROR, "Unable to get basename "
"of '%s'", str);
error = ENOMEM;
error = ENOMEM;
goto out;
}
strcpy(str, loc->path);
goto out;
}
parent->path = gf_strdup(dirname(str));
if (parent->path == NULL)
{
gf_log(xl->name, GF_LOG_ERROR, "Unable to get dirname of "
"'%s'", str);
"'%s'", loc->path);
error = ENOMEM;
@ -427,19 +400,57 @@ int32_t ec_loc_parent(xlator_t * xl, loc_t * loc, loc_t * parent, char ** name)
goto out;
}
parent->name++;
}
if (loc->parent != NULL) {
parent->inode = inode_ref(loc->parent);
uuid_copy(parent->gfid, loc->parent->gfid);
}
if (!uuid_is_null(loc->pargfid) && uuid_is_null(parent->gfid)) {
uuid_copy(parent->gfid, loc->pargfid);
}
if ((loc->inode == NULL) ||
ec_loc_gfid_check(xl, loc->gfid, loc->inode->gfid))
if ((parent->inode == NULL) && (parent->path != NULL))
{
parent = NULL;
if (strcmp(parent->path, "/") == 0) {
parent->inode = inode_ref(loc->inode->table->root);
goto out;
}
parent->inode = inode_resolve(loc->inode->table, (char *)parent->path);
if (parent->inode != NULL) {
goto out;
}
gf_log(xl->name, GF_LOG_WARNING, "Unable to resolve parent inode");
}
if ((parent->inode == NULL) && !uuid_is_null(parent->gfid)) {
if (__is_root_gfid(parent->gfid)) {
parent->inode = inode_ref(loc->inode->table->root);
goto out;
}
parent->inode = inode_find(loc->inode->table, parent->gfid);
if (parent->inode != NULL) {
goto out;
}
gf_log(xl->name, GF_LOG_WARNING, "Unable to find parent inode");
}
if ((parent->inode == NULL) && (parent->path == NULL) &&
uuid_is_null(parent->gfid)) {
gf_log(xl->name, GF_LOG_ERROR, "Parent inode missing for loc_t");
error = EINVAL;
goto out;
}
out:
GF_FREE(str);
if (parent != NULL)
if (error != 0)
{
loc_wipe(parent);
}
@ -567,9 +578,6 @@ ec_inode_t * __ec_inode_get(inode_t * inode, xlator_t * xl)
return NULL;
}
INIT_LIST_HEAD(&ctx->entry_locks);
INIT_LIST_HEAD(&ctx->inode_locks);
}
}
else

View File

@ -39,8 +39,7 @@ int32_t ec_dict_del_number(dict_t * dict, char * key, uint64_t * value);
int32_t ec_dict_set_config(dict_t * dict, char * key, ec_config_t * config);
int32_t ec_dict_del_config(dict_t * dict, char * key, ec_config_t * config);
int32_t ec_loc_parent(xlator_t * xl, loc_t * loc, loc_t * parent,
char ** name);
int32_t ec_loc_parent(xlator_t *xl, loc_t *loc, loc_t *parent);
int32_t ec_loc_prepare(xlator_t * xl, loc_t * loc, inode_t * inode,
struct iatt * iatt);

View File

@ -254,11 +254,11 @@ int32_t ec_manager_getxattr(ec_fop_data_t * fop, int32_t state)
case EC_STATE_LOCK:
if (fop->fd == NULL)
{
ec_lock_prepare_inode(fop, &fop->loc[0]);
ec_lock_prepare_inode(fop, &fop->loc[0], 0);
}
else
{
ec_lock_prepare_fd(fop, fop->fd);
ec_lock_prepare_fd(fop, fop->fd, 0);
}
ec_lock(fop);
@ -337,7 +337,7 @@ int32_t ec_manager_getxattr(ec_fop_data_t * fop, int32_t state)
case -EC_STATE_LOCK_REUSE:
case EC_STATE_LOCK_REUSE:
ec_lock_reuse(fop, 0);
ec_lock_reuse(fop);
return EC_STATE_UNLOCK;
@ -516,6 +516,8 @@ void ec_fgetxattr(call_frame_t * frame, xlator_t * this, uintptr_t target,
goto out;
}
fop->use_fd = 1;
if (fd != NULL)
{
fop->fd = fd_ref(fd);
@ -1230,7 +1232,7 @@ int32_t ec_manager_readv(ec_fop_data_t * fop, int32_t state)
/* Fall through */
case EC_STATE_LOCK:
ec_lock_prepare_fd(fop, fop->fd);
ec_lock_prepare_fd(fop, fop->fd, 0);
ec_lock(fop);
return EC_STATE_GET_SIZE_AND_VERSION;
@ -1310,7 +1312,7 @@ int32_t ec_manager_readv(ec_fop_data_t * fop, int32_t state)
case -EC_STATE_LOCK_REUSE:
case EC_STATE_LOCK_REUSE:
ec_lock_reuse(fop, 0);
ec_lock_reuse(fop);
return EC_STATE_UNLOCK;
@ -1350,6 +1352,8 @@ void ec_readv(call_frame_t * frame, xlator_t * this, uintptr_t target,
goto out;
}
fop->use_fd = 1;
fop->size = size;
fop->offset = offset;
fop->uint32 = flags;
@ -1478,11 +1482,11 @@ int32_t ec_manager_stat(ec_fop_data_t * fop, int32_t state)
case EC_STATE_LOCK:
if (fop->fd == NULL)
{
ec_lock_prepare_inode(fop, &fop->loc[0]);
ec_lock_prepare_inode(fop, &fop->loc[0], 0);
}
else
{
ec_lock_prepare_fd(fop, fop->fd);
ec_lock_prepare_fd(fop, fop->fd, 0);
}
ec_lock(fop);
@ -1581,7 +1585,7 @@ int32_t ec_manager_stat(ec_fop_data_t * fop, int32_t state)
case -EC_STATE_LOCK_REUSE:
case EC_STATE_LOCK_REUSE:
ec_lock_reuse(fop, 0);
ec_lock_reuse(fop);
return EC_STATE_UNLOCK;
@ -1741,6 +1745,8 @@ void ec_fstat(call_frame_t * frame, xlator_t * this, uintptr_t target,
goto out;
}
fop->use_fd = 1;
if (fd != NULL)
{
fop->fd = fd_ref(fd);

View File

@ -94,11 +94,11 @@ int32_t ec_manager_removexattr(ec_fop_data_t * fop, int32_t state)
case EC_STATE_LOCK:
if (fop->fd == NULL)
{
ec_lock_prepare_inode(fop, &fop->loc[0]);
ec_lock_prepare_inode(fop, &fop->loc[0], 1);
}
else
{
ec_lock_prepare_fd(fop, fop->fd);
ec_lock_prepare_fd(fop, fop->fd, 1);
}
ec_lock(fop);
@ -186,7 +186,7 @@ int32_t ec_manager_removexattr(ec_fop_data_t * fop, int32_t state)
case -EC_STATE_LOCK_REUSE:
case EC_STATE_LOCK_REUSE:
ec_lock_reuse(fop, 1);
ec_lock_reuse(fop);
return EC_STATE_UNLOCK;
@ -351,6 +351,8 @@ void ec_fremovexattr(call_frame_t * frame, xlator_t * this, uintptr_t target,
goto out;
}
fop->use_fd = 1;
if (fd != NULL)
{
fop->fd = fd_ref(fd);
@ -490,11 +492,11 @@ int32_t ec_manager_setattr(ec_fop_data_t * fop, int32_t state)
case EC_STATE_LOCK:
if (fop->fd == NULL)
{
ec_lock_prepare_inode(fop, &fop->loc[0]);
ec_lock_prepare_inode(fop, &fop->loc[0], 1);
}
else
{
ec_lock_prepare_fd(fop, fop->fd);
ec_lock_prepare_fd(fop, fop->fd, 1);
}
ec_lock(fop);
@ -598,7 +600,7 @@ int32_t ec_manager_setattr(ec_fop_data_t * fop, int32_t state)
case -EC_STATE_LOCK_REUSE:
case EC_STATE_LOCK_REUSE:
ec_lock_reuse(fop, 1);
ec_lock_reuse(fop);
return EC_STATE_UNLOCK;
@ -772,6 +774,8 @@ void ec_fsetattr(call_frame_t * frame, xlator_t * this, uintptr_t target,
goto out;
}
fop->use_fd = 1;
fop->int32 = valid;
if (fd != NULL)
@ -880,11 +884,11 @@ int32_t ec_manager_setxattr(ec_fop_data_t * fop, int32_t state)
case EC_STATE_LOCK:
if (fop->fd == NULL)
{
ec_lock_prepare_inode(fop, &fop->loc[0]);
ec_lock_prepare_inode(fop, &fop->loc[0], 1);
}
else
{
ec_lock_prepare_fd(fop, fop->fd);
ec_lock_prepare_fd(fop, fop->fd, 1);
}
ec_lock(fop);
@ -971,7 +975,7 @@ int32_t ec_manager_setxattr(ec_fop_data_t * fop, int32_t state)
case -EC_STATE_LOCK_REUSE:
case EC_STATE_LOCK_REUSE:
ec_lock_reuse(fop, 1);
ec_lock_reuse(fop);
return EC_STATE_UNLOCK;
@ -1138,6 +1142,8 @@ void ec_fsetxattr(call_frame_t * frame, xlator_t * this, uintptr_t target,
goto out;
}
fop->use_fd = 1;
fop->int32 = flags;
if (fd != NULL)
@ -1380,11 +1386,11 @@ int32_t ec_manager_truncate(ec_fop_data_t * fop, int32_t state)
case EC_STATE_LOCK:
if (fop->id == GF_FOP_TRUNCATE)
{
ec_lock_prepare_inode(fop, &fop->loc[0]);
ec_lock_prepare_inode(fop, &fop->loc[0], 1);
}
else
{
ec_lock_prepare_fd(fop, fop->fd);
ec_lock_prepare_fd(fop, fop->fd, 1);
}
ec_lock(fop);
@ -1497,7 +1503,7 @@ int32_t ec_manager_truncate(ec_fop_data_t * fop, int32_t state)
case -EC_STATE_LOCK_REUSE:
case EC_STATE_LOCK_REUSE:
ec_lock_reuse(fop, 1);
ec_lock_reuse(fop);
return EC_STATE_UNLOCK;
@ -1666,6 +1672,8 @@ void ec_ftruncate(call_frame_t * frame, xlator_t * this, uintptr_t target,
goto out;
}
fop->use_fd = 1;
fop->offset = offset;
if (fd != NULL)
@ -2019,7 +2027,7 @@ int32_t ec_manager_writev(ec_fop_data_t * fop, int32_t state)
/* Fall through */
case EC_STATE_LOCK:
ec_lock_prepare_fd(fop, fop->fd);
ec_lock_prepare_fd(fop, fop->fd, 1);
ec_lock(fop);
return EC_STATE_GET_SIZE_AND_VERSION;
@ -2125,7 +2133,7 @@ int32_t ec_manager_writev(ec_fop_data_t * fop, int32_t state)
case -EC_STATE_LOCK_REUSE:
case EC_STATE_LOCK_REUSE:
ec_lock_reuse(fop, 1);
ec_lock_reuse(fop);
return EC_STATE_UNLOCK;
@ -2171,6 +2179,8 @@ void ec_writev(call_frame_t * frame, xlator_t * this, uintptr_t target,
fop->offset = offset;
fop->uint32 = flags;
fop->use_fd = 1;
if (fd != NULL)
{
fop->fd = fd_ref(fd);

View File

@ -497,6 +497,8 @@ void ec_fentrylk(call_frame_t * frame, xlator_t * this, uintptr_t target,
goto out;
}
fop->use_fd = 1;
fop->entrylk_cmd = cmd;
fop->entrylk_type = type;
@ -951,6 +953,8 @@ void ec_finodelk(call_frame_t * frame, xlator_t * this, uintptr_t target,
goto out;
}
fop->use_fd = 1;
fop->int32 = cmd;
if (volume != NULL)
@ -1245,6 +1249,8 @@ void ec_lk(call_frame_t * frame, xlator_t * this, uintptr_t target,
goto out;
}
fop->use_fd = 1;
fop->int32 = cmd;
if (fd != NULL)