ec: Optimize read/write performance

This patch significantly improves performance of read/write
operations on a dispersed volume by reusing previous inodelk/
entrylk operations on the same inode/entry. This reduces the
latency of each individual operation considerably.

Inode version and size are also updated when needed instead
of on each request. This gives an additional boost.

Change-Id: I4b98d5508c86b53032e16e295f72a3f83fd8fcac
BUG: 1122586
Signed-off-by: Xavier Hernandez <xhernandez@datalab.es>
Reviewed-on: http://review.gluster.org/8369
Tested-by: Gluster Build System <jenkins@build.gluster.com>
Reviewed-by: Jeff Darcy <jdarcy@redhat.com>
Reviewed-by: Dan Lambright <dlambrig@redhat.com>
This commit is contained in:
Xavier Hernandez 2014-07-14 17:34:04 +02:00 committed by Vijay Bellur
parent 2be5458500
commit d97863562b
13 changed files with 712 additions and 274 deletions

View File

@ -735,7 +735,7 @@ void ec_combine(ec_cbk_data_t * cbk, ec_combine_f combine)
ec_fop_data_t * fop = cbk->fop; ec_fop_data_t * fop = cbk->fop;
ec_cbk_data_t * ans = NULL, * tmp = NULL; ec_cbk_data_t * ans = NULL, * tmp = NULL;
struct list_head * item = NULL; struct list_head * item = NULL;
int32_t needed = 0, report = 0; int32_t needed = 0, resume = 0;
char str[32]; char str[32];
LOCK(&fop->lock); LOCK(&fop->lock);
@ -776,7 +776,7 @@ void ec_combine(ec_cbk_data_t * cbk, ec_combine_f combine)
ec_update_bad(fop, cbk->mask); ec_update_bad(fop, cbk->mask);
report = 1; resume = 1;
} }
ans = list_entry(fop->cbk_list.next, ec_cbk_data_t, list); ans = list_entry(fop->cbk_list.next, ec_cbk_data_t, list);
@ -788,8 +788,8 @@ void ec_combine(ec_cbk_data_t * cbk, ec_combine_f combine)
{ {
ec_dispatch_next(fop, cbk->idx); ec_dispatch_next(fop, cbk->idx);
} }
else if (report) else if (resume)
{ {
ec_report(fop, 0); ec_resume(fop, 0);
} }
} }

View File

@ -316,20 +316,10 @@ void ec_resume_parent(ec_fop_data_t * fop, int32_t error)
} }
} }
void ec_report(ec_fop_data_t * fop, int32_t error)
{
if (!list_empty(&fop->lock_list))
{
ec_owner_set(fop->frame, fop->frame->root);
}
ec_resume(fop, error);
}
void ec_complete(ec_fop_data_t * fop) void ec_complete(ec_fop_data_t * fop)
{ {
ec_cbk_data_t * cbk = NULL; ec_cbk_data_t * cbk = NULL;
int32_t ready = 0, report = 0; int32_t resume = 0;
LOCK(&fop->lock); LOCK(&fop->lock);
@ -351,21 +341,17 @@ void ec_complete(ec_fop_data_t * fop)
} }
} }
report = 1; resume = 1;
} }
else if ((fop->flags & EC_FLAG_WAITING_WINDS) != 0) else if ((fop->flags & EC_FLAG_WAITING_WINDS) != 0)
{ {
ready = 1; resume = 1;
} }
} }
UNLOCK(&fop->lock); UNLOCK(&fop->lock);
if (report) if (resume)
{
ec_report(fop, 0);
}
if (ready)
{ {
ec_resume(fop, 0); ec_resume(fop, 0);
} }
@ -518,7 +504,7 @@ void ec_dispatch_start(ec_fop_data_t * fop)
INIT_LIST_HEAD(&fop->cbk_list); INIT_LIST_HEAD(&fop->cbk_list);
if (!list_empty(&fop->lock_list)) if (fop->lock_count > 0)
{ {
ec_owner_copy(fop->frame, &fop->req_frame->root->lk_owner); ec_owner_copy(fop->frame, &fop->req_frame->root->lk_owner);
} }
@ -602,6 +588,7 @@ void ec_dispatch_min(ec_fop_data_t * fop)
ec_lock_t * ec_lock_allocate(xlator_t * xl, int32_t kind, loc_t * loc) ec_lock_t * ec_lock_allocate(xlator_t * xl, int32_t kind, loc_t * loc)
{ {
ec_t * ec = xl->private;
ec_lock_t * lock; ec_lock_t * lock;
if ((loc->inode == NULL) || if ((loc->inode == NULL) ||
@ -613,15 +600,15 @@ ec_lock_t * ec_lock_allocate(xlator_t * xl, int32_t kind, loc_t * loc)
return NULL; return NULL;
} }
lock = GF_MALLOC(sizeof(*lock), ec_mt_ec_lock_t); lock = mem_get0(ec->lock_pool);
if (lock != NULL) if (lock != NULL)
{ {
memset(lock, 0, sizeof(*lock));
lock->kind = kind; lock->kind = kind;
lock->good_mask = -1ULL;
INIT_LIST_HEAD(&lock->waiting);
if (!ec_loc_from_loc(xl, &lock->loc, loc)) if (!ec_loc_from_loc(xl, &lock->loc, loc))
{ {
GF_FREE(lock); mem_put(lock);
lock = NULL; lock = NULL;
} }
} }
@ -634,34 +621,55 @@ void ec_lock_destroy(ec_lock_t * lock)
GF_FREE(lock->basename); GF_FREE(lock->basename);
loc_wipe(&lock->loc); loc_wipe(&lock->loc);
GF_FREE(lock); mem_put(lock);
} }
int32_t ec_locked(call_frame_t * frame, void * cookie, xlator_t * this, int32_t ec_lock_compare(ec_lock_t * lock1, ec_lock_t * lock2)
int32_t op_ret, int32_t op_errno, dict_t * xdata)
{ {
ec_fop_data_t * fop = cookie; int32_t res;
ec_lock_t * lock = NULL;
if (op_ret >= 0) res = uuid_compare(lock1->loc.gfid, lock2->loc.gfid);
if (res != 0)
{ {
lock = fop->data; return res;
lock->mask = fop->good;
fop->parent->mask &= fop->good;
ec_trace("LOCKED", fop->parent, "lock=%p", lock);
} }
else if (lock1->basename == NULL)
{ {
gf_log(this->name, GF_LOG_WARNING, "Failed to complete preop lock"); if (lock2->basename == NULL)
{
return 0;
}
return 1;
} }
if (lock2->basename == NULL)
return 0; {
return -1;
}
return strcmp(lock1->basename, lock2->basename);
} }
void ec_lock_entry(ec_fop_data_t * fop, loc_t * loc) void ec_lock_insert(ec_fop_data_t * fop, ec_lock_t * lock)
{
ec_lock_t * tmp;
if ((fop->lock_count > 0) &&
(ec_lock_compare(fop->locks[0].lock, lock) > 0))
{
tmp = fop->locks[0].lock;
fop->locks[0].lock = lock;
lock = tmp;
}
fop->locks[fop->lock_count].lock = lock;
fop->locks[fop->lock_count].fop = fop;
fop->lock_count++;
lock->refs++;
}
void ec_lock_prepare_entry(ec_fop_data_t * fop, loc_t * loc)
{ {
ec_lock_t * lock = NULL; ec_lock_t * lock = NULL;
ec_inode_t * ctx = NULL;
char * name = NULL; char * name = NULL;
loc_t tmp; loc_t tmp;
int32_t error; int32_t error;
@ -679,116 +687,106 @@ void ec_lock_entry(ec_fop_data_t * fop, loc_t * loc)
return; return;
} }
LOCK(&fop->lock); LOCK(&tmp.inode->lock);
list_for_each_entry(lock, &fop->lock_list, list) ctx = __ec_inode_get(tmp.inode, fop->xl);
if (ctx == NULL)
{ {
if ((lock->kind == EC_LOCK_ENTRY) && __ec_fop_set_error(fop, EIO);
(lock->loc.inode == tmp.inode) &&
(strcmp(lock->basename, name) == 0)) goto unlock;
}
list_for_each_entry(lock, &ctx->entry_locks, list)
{
if (strcmp(lock->basename, name) == 0)
{ {
ec_trace("LOCK_ENTRYLK", fop, "lock=%p, parent=%p, path=%s, " ec_trace("LOCK_ENTRYLK", fop, "lock=%p, inode=%p, path=%s, "
"name=%s. Lock already acquired", "name=%s. Lock already acquired",
lock, loc->parent, loc->path, name); lock, tmp.inode, tmp.path, name);
lock = NULL; goto insert;
goto unlock;
} }
} }
lock = ec_lock_allocate(fop->xl, EC_LOCK_ENTRY, &tmp); lock = ec_lock_allocate(fop->xl, EC_LOCK_ENTRY, &tmp);
if (lock != NULL) if (lock == NULL)
{
lock->type = ENTRYLK_WRLCK;
lock->basename = name;
if (list_empty(&fop->lock_list))
{
ec_owner_set(fop->frame, fop->frame->root);
}
list_add_tail(&lock->list, &fop->lock_list);
}
else
{ {
__ec_fop_set_error(fop, EIO); __ec_fop_set_error(fop, EIO);
goto unlock;
} }
ec_trace("LOCK_CREATE", fop, "lock=%p", lock);
lock->type = ENTRYLK_WRLCK;
lock->basename = name;
name = NULL;
list_add_tail(&lock->list, &ctx->entry_locks);
insert:
ec_lock_insert(fop, lock);
unlock: unlock:
UNLOCK(&fop->lock); UNLOCK(&tmp.inode->lock);
loc_wipe(&tmp); loc_wipe(&tmp);
GF_FREE(name);
if (lock != NULL)
{
ec_trace("LOCK_ENTRYLK", fop, "lock=%p, parent=%p, path=%s, "
"basename=%s", lock, lock->loc.inode,
lock->loc.path, lock->basename);
ec_entrylk(fop->frame, fop->xl, -1, EC_MINIMUM_ALL, ec_locked, lock,
fop->xl->name, &lock->loc, lock->basename, ENTRYLK_LOCK,
lock->type, NULL);
}
else
{
GF_FREE(name);
}
} }
void ec_lock_inode(ec_fop_data_t * fop, loc_t * loc) void ec_lock_prepare_inode(ec_fop_data_t * fop, loc_t * loc)
{ {
ec_lock_t * lock; ec_lock_t * lock;
ec_inode_t * ctx;
if ((fop->parent != NULL) || (fop->error != 0) || (loc->inode == NULL)) if ((fop->parent != NULL) || (fop->error != 0) || (loc->inode == NULL))
{ {
return; return;
} }
LOCK(&fop->lock); LOCK(&loc->inode->lock);
list_for_each_entry(lock, &fop->lock_list, list) ctx = __ec_inode_get(loc->inode, fop->xl);
if (ctx == NULL)
{ {
if ((lock->kind == EC_LOCK_INODE) && (lock->loc.inode == loc->inode)) __ec_fop_set_error(fop, EIO);
{
UNLOCK(&fop->lock);
ec_trace("LOCK_INODELK", fop, "lock=%p, inode=%p. Lock already " goto unlock;
"acquired", lock, loc->inode); }
return; if (!list_empty(&ctx->inode_locks))
} {
lock = list_entry(ctx->inode_locks.next, ec_lock_t, list);
ec_trace("LOCK_INODELK", fop, "lock=%p, inode=%p. Lock already "
"acquired", lock, loc->inode);
goto insert;
} }
lock = ec_lock_allocate(fop->xl, EC_LOCK_INODE, loc); lock = ec_lock_allocate(fop->xl, EC_LOCK_INODE, loc);
if (lock != NULL) if (lock == NULL)
{
lock->flock.l_type = F_WRLCK;
lock->flock.l_whence = SEEK_SET;
if (list_empty(&fop->lock_list))
{
ec_owner_set(fop->frame, fop->frame->root);
}
list_add_tail(&lock->list, &fop->lock_list);
}
else
{ {
__ec_fop_set_error(fop, EIO); __ec_fop_set_error(fop, EIO);
goto unlock;
} }
UNLOCK(&fop->lock); ec_trace("LOCK_CREATE", fop, "lock=%p", lock);
if (lock != NULL) lock->flock.l_type = F_WRLCK;
{ lock->flock.l_whence = SEEK_SET;
ec_trace("LOCK_INODELK", fop, "lock=%p, inode=%p, owner=%p", lock,
lock->loc.inode, fop->frame->root);
ec_inodelk(fop->frame, fop->xl, -1, EC_MINIMUM_ALL, ec_locked, lock, list_add_tail(&lock->list, &ctx->inode_locks);
fop->xl->name, &lock->loc, F_SETLKW, &lock->flock, NULL);
} insert:
ec_lock_insert(fop, lock);
unlock:
UNLOCK(&loc->inode->lock);
} }
void ec_lock_fd(ec_fop_data_t * fop, fd_t * fd) void ec_lock_prepare_fd(ec_fop_data_t * fop, fd_t * fd)
{ {
loc_t loc; loc_t loc;
@ -799,7 +797,7 @@ void ec_lock_fd(ec_fop_data_t * fop, fd_t * fd)
if (ec_loc_from_fd(fop->xl, &loc, fd)) if (ec_loc_from_fd(fop->xl, &loc, fd))
{ {
ec_lock_inode(fop, &loc); ec_lock_prepare_inode(fop, &loc);
loc_wipe(&loc); loc_wipe(&loc);
} }
@ -809,6 +807,100 @@ void ec_lock_fd(ec_fop_data_t * fop, fd_t * fd)
} }
} }
int32_t ec_locked(call_frame_t * frame, void * cookie, xlator_t * this,
int32_t op_ret, int32_t op_errno, dict_t * xdata)
{
ec_fop_data_t * fop = cookie;
ec_lock_t * lock = NULL;
if (op_ret >= 0)
{
lock = fop->data;
lock->mask = fop->good;
lock->acquired = 1;
fop->parent->mask &= fop->good;
fop->parent->locked++;
ec_trace("LOCKED", fop->parent, "lock=%p", lock);
ec_lock(fop->parent);
}
else
{
gf_log(this->name, GF_LOG_WARNING, "Failed to complete preop lock");
}
return 0;
}
void ec_lock(ec_fop_data_t * fop)
{
ec_lock_t * lock;
while (fop->locked < fop->lock_count)
{
lock = fop->locks[fop->locked].lock;
LOCK(&lock->loc.inode->lock);
if (lock->owner != NULL)
{
ec_trace("LOCK_WAIT", fop, "lock=%p", lock);
list_add_tail(&fop->locks[fop->locked].wait_list, &lock->waiting);
fop->jobs++;
fop->refs++;
UNLOCK(&lock->loc.inode->lock);
break;
}
lock->owner = fop;
UNLOCK(&lock->loc.inode->lock);
if (!lock->acquired)
{
ec_owner_set(fop->frame, lock);
if (lock->kind == EC_LOCK_ENTRY)
{
ec_trace("LOCK_ACQUIRE", fop, "lock=%p, inode=%p, path=%s, "
"name=%s", lock, lock->loc.inode, lock->loc.path,
lock->basename);
ec_entrylk(fop->frame, fop->xl, -1, EC_MINIMUM_ALL, ec_locked,
lock, fop->xl->name, &lock->loc, lock->basename,
ENTRYLK_LOCK, lock->type, NULL);
}
else
{
ec_trace("LOCK_ACQUIRE", fop, "lock=%p, inode=%p", lock,
lock->loc.inode);
ec_inodelk(fop->frame, fop->xl, -1, EC_MINIMUM_ALL, ec_locked,
lock, fop->xl->name, &lock->loc, F_SETLKW,
&lock->flock, NULL);
}
break;
}
ec_trace("LOCK_REUSE", fop, "lock=%p", lock);
if (lock->have_size)
{
fop->pre_size = fop->post_size = lock->size;
fop->have_size = 1;
}
fop->mask &= lock->good_mask;
fop->locked++;
}
}
int32_t ec_unlocked(call_frame_t * frame, void * cookie, xlator_t * this, int32_t ec_unlocked(call_frame_t * frame, void * cookie, xlator_t * this,
int32_t op_ret, int32_t op_errno, dict_t * xdata) int32_t op_ret, int32_t op_errno, dict_t * xdata)
{ {
@ -829,50 +921,68 @@ int32_t ec_unlocked(call_frame_t * frame, void * cookie, xlator_t * this,
void ec_unlock(ec_fop_data_t * fop) void ec_unlock(ec_fop_data_t * fop)
{ {
ec_lock_t * lock, * item; ec_lock_t * lock;
int32_t i, refs;
ec_trace("UNLOCK", fop, ""); for (i = 0; i < fop->lock_count; i++)
list_for_each_entry_safe(lock, item, &fop->lock_list, list)
{ {
list_del(&lock->list); lock = fop->locks[i].lock;
if (lock->mask != 0) LOCK(&lock->loc.inode->lock);
ec_trace("UNLOCK", fop, "lock=%p", lock);
refs = --lock->refs;
if (refs == 0)
{ {
switch (lock->kind) list_del_init(&lock->list);
{
case EC_LOCK_ENTRY:
ec_trace("UNLOCK_ENTRYLK", fop, "lock=%p, parent=%p, "
"path=%s, basename=%s",
lock, lock->loc.inode, lock->loc.path,
lock->basename);
ec_entrylk(fop->frame, fop->xl, lock->mask, EC_MINIMUM_ALL,
ec_unlocked, lock, fop->xl->name, &lock->loc,
lock->basename, ENTRYLK_UNLOCK, lock->type,
NULL);
break;
case EC_LOCK_INODE:
lock->flock.l_type = F_UNLCK;
ec_trace("UNLOCK_INODELK", fop, "lock=%p, inode=%p", lock,
lock->loc.inode);
ec_inodelk(fop->frame, fop->xl, lock->mask, EC_MINIMUM_ALL,
ec_unlocked, lock, fop->xl->name, &lock->loc,
F_SETLK, &lock->flock, NULL);
break;
default:
gf_log(fop->xl->name, GF_LOG_ERROR, "Invalid lock type");
}
} }
loc_wipe(&lock->loc); UNLOCK(&lock->loc.inode->lock);
GF_FREE(lock); if (refs == 0)
{
if (lock->mask != 0)
{
ec_owner_set(fop->frame, lock);
switch (lock->kind)
{
case EC_LOCK_ENTRY:
ec_trace("UNLOCK_ENTRYLK", fop, "lock=%p, inode=%p, "
"path=%s, basename=%s",
lock, lock->loc.inode, lock->loc.path,
lock->basename);
ec_entrylk(fop->frame, fop->xl, lock->mask,
EC_MINIMUM_ALL, ec_unlocked, lock,
fop->xl->name, &lock->loc, lock->basename,
ENTRYLK_UNLOCK, lock->type, NULL);
break;
case EC_LOCK_INODE:
lock->flock.l_type = F_UNLCK;
ec_trace("UNLOCK_INODELK", fop, "lock=%p, inode=%p",
lock, lock->loc.inode);
ec_inodelk(fop->frame, fop->xl, lock->mask,
EC_MINIMUM_ALL, ec_unlocked, lock,
fop->xl->name, &lock->loc, F_SETLK,
&lock->flock, NULL);
break;
default:
gf_log(fop->xl->name, GF_LOG_ERROR, "Invalid lock "
"type");
}
}
ec_trace("LOCK_DESTROY", fop, "lock=%p", lock);
ec_lock_destroy(lock);
}
} }
} }
@ -883,11 +993,36 @@ int32_t ec_get_size_version_set(call_frame_t * frame, void * cookie,
struct iatt * postparent) struct iatt * postparent)
{ {
ec_fop_data_t * fop = cookie; ec_fop_data_t * fop = cookie;
ec_inode_t * ctx;
ec_lock_t * lock;
if (op_ret >= 0) if (op_ret >= 0)
{ {
fop->parent->mask &= fop->good; LOCK(&inode->lock);
ctx = __ec_inode_get(inode, this);
if ((ctx != NULL) && !list_empty(&ctx->inode_locks))
{
lock = list_entry(ctx->inode_locks.next, ec_lock_t, list);
lock->have_size = 1;
lock->size = buf->ia_size;
lock->version = fop->answer->version;
}
UNLOCK(&inode->lock);
if (lock != NULL)
{
// Only update parent mask if the lookup has been made with
// inode locked.
fop->parent->mask &= fop->good;
}
fop->parent->pre_size = fop->parent->post_size = buf->ia_size; fop->parent->pre_size = fop->parent->post_size = buf->ia_size;
fop->parent->have_size = 1;
} }
else else
{ {
@ -907,11 +1042,18 @@ void ec_get_size_version(ec_fop_data_t * fop)
gid_t gid; gid_t gid;
int32_t error = ENOMEM; int32_t error = ENOMEM;
if (fop->parent != NULL) if (fop->have_size)
{
return;
}
if ((fop->parent != NULL) && fop->parent->have_size)
{ {
fop->pre_size = fop->parent->pre_size; fop->pre_size = fop->parent->pre_size;
fop->post_size = fop->parent->post_size; fop->post_size = fop->parent->post_size;
fop->have_size = 1;
return; return;
} }
@ -998,10 +1140,10 @@ int32_t ec_update_size_version_done(call_frame_t * frame, void * cookie,
return 0; return 0;
} }
void ec_update_size_version(ec_fop_data_t * fop) void ec_update_size_version(ec_fop_data_t * fop, uint64_t version,
size_t size)
{ {
dict_t * dict; dict_t * dict;
size_t size;
uid_t uid; uid_t uid;
gid_t gid; gid_t gid;
@ -1012,20 +1154,20 @@ void ec_update_size_version(ec_fop_data_t * fop)
return; return;
} }
ec_trace("UPDATE", fop, "version=%ld, size=%ld", version, size);
dict = dict_new(); dict = dict_new();
if (dict == NULL) if (dict == NULL)
{ {
goto out; goto out;
} }
if (ec_dict_set_number(dict, EC_XATTR_VERSION, 1) != 0) if (ec_dict_set_number(dict, EC_XATTR_VERSION, version) != 0)
{ {
goto out; goto out;
} }
size = fop->post_size; if (size != 0)
if (fop->pre_size != size)
{ {
size -= fop->pre_size;
if (ec_dict_set_number(dict, EC_XATTR_SIZE, size) != 0) if (ec_dict_set_number(dict, EC_XATTR_SIZE, size) != 0)
{ {
goto out; goto out;
@ -1069,6 +1211,113 @@ out:
gf_log(fop->xl->name, GF_LOG_ERROR, "Unable to update version and size"); gf_log(fop->xl->name, GF_LOG_ERROR, "Unable to update version and size");
} }
void ec_flush_size_version(ec_fop_data_t * fop)
{
ec_lock_t * lock;
uint64_t version;
size_t delta;
GF_ASSERT(fop->lock_count == 1);
lock = fop->locks[0].lock;
GF_ASSERT(lock->kind == EC_LOCK_INODE);
LOCK(&lock->loc.inode->lock);
GF_ASSERT(lock->owner == fop);
version = lock->version_delta;
delta = lock->size_delta;
lock->version_delta = 0;
lock->size_delta = 0;
UNLOCK(&lock->loc.inode->lock);
if (version > 0)
{
ec_update_size_version(fop, version, delta);
}
}
void ec_lock_reuse(ec_fop_data_t * fop, int32_t update)
{
ec_fop_data_t * wait_fop;
ec_lock_t * lock;
ec_lock_link_t * link;
size_t delta = 0;
uint64_t version = 0;
int32_t refs = 0;
int32_t i;
for (i = 0; i < fop->lock_count; i++)
{
wait_fop = NULL;
lock = fop->locks[i].lock;
LOCK(&lock->loc.inode->lock);
ec_trace("LOCK_DONE", fop, "lock=%p", lock);
GF_ASSERT(lock->owner == fop);
lock->owner = NULL;
if (lock->kind == EC_LOCK_INODE)
{
if (update && (fop->error == 0))
{
lock->version_delta++;
lock->size_delta += fop->post_size - fop->pre_size;
}
version = lock->version_delta;
delta = lock->size_delta;
refs = lock->refs;
if (refs == 1)
{
lock->version_delta = 0;
lock->size_delta = 0;
}
if (fop->have_size)
{
lock->size = fop->post_size;
lock->have_size = 1;
}
}
lock->good_mask &= fop->mask;
if (!list_empty(&lock->waiting))
{
link = list_entry(lock->waiting.next, ec_lock_link_t, wait_list);
list_del_init(&link->wait_list);
wait_fop = link->fop;
if (lock->kind == EC_LOCK_INODE)
{
wait_fop->pre_size = wait_fop->post_size = fop->post_size;
wait_fop->have_size = fop->have_size;
}
wait_fop->mask &= fop->mask;
}
UNLOCK(&lock->loc.inode->lock);
if (wait_fop != NULL)
{
ec_lock(wait_fop);
ec_resume(wait_fop, 0);
}
}
if ((refs == 1) && (version > 0))
{
ec_update_size_version(fop, version, delta);
}
}
void __ec_manager(ec_fop_data_t * fop, int32_t error) void __ec_manager(ec_fop_data_t * fop, int32_t error)
{ {
do do

View File

@ -47,10 +47,10 @@
#define EC_STATE_DISPATCH 4 #define EC_STATE_DISPATCH 4
#define EC_STATE_PREPARE_ANSWER 5 #define EC_STATE_PREPARE_ANSWER 5
#define EC_STATE_REPORT 6 #define EC_STATE_REPORT 6
#define EC_STATE_UPDATE_SIZE_AND_VERSION 7 #define EC_STATE_LOCK_REUSE 7
#define EC_STATE_UNLOCK 8 #define EC_STATE_UNLOCK 8
#define EC_STATE_WRITE_START 100 #define EC_STATE_DELAYED_START 100
#define EC_STATE_HEAL_ENTRY_LOOKUP 200 #define EC_STATE_HEAL_ENTRY_LOOKUP 200
#define EC_STATE_HEAL_ENTRY_PREPARE 201 #define EC_STATE_HEAL_ENTRY_PREPARE 201
@ -81,14 +81,15 @@ void ec_update_bad(ec_fop_data_t * fop, uintptr_t good);
void ec_fop_set_error(ec_fop_data_t * fop, int32_t error); void ec_fop_set_error(ec_fop_data_t * fop, int32_t error);
void ec_lock_inode(ec_fop_data_t * fop, loc_t * loc); void ec_lock_prepare_inode(ec_fop_data_t * fop, loc_t * loc);
void ec_lock_entry(ec_fop_data_t * fop, loc_t * loc); void ec_lock_prepare_entry(ec_fop_data_t * fop, loc_t * loc);
void ec_lock_fd(ec_fop_data_t * fop, fd_t * fd); void ec_lock_prepare_fd(ec_fop_data_t * fop, fd_t * fd);
void ec_lock(ec_fop_data_t * fop);
void ec_lock_reuse(ec_fop_data_t * fop, int32_t update);
void ec_unlock(ec_fop_data_t * fop); void ec_unlock(ec_fop_data_t * fop);
void ec_get_size_version(ec_fop_data_t * fop); void ec_get_size_version(ec_fop_data_t * fop);
void ec_update_size_version(ec_fop_data_t * fop); void ec_flush_size_version(ec_fop_data_t * fop);
void ec_dispatch_all(ec_fop_data_t * fop); void ec_dispatch_all(ec_fop_data_t * fop);
void ec_dispatch_inc(ec_fop_data_t * fop); void ec_dispatch_inc(ec_fop_data_t * fop);
@ -97,8 +98,8 @@ void ec_dispatch_one(ec_fop_data_t * fop);
void ec_wait_winds(ec_fop_data_t * fop); void ec_wait_winds(ec_fop_data_t * fop);
void ec_resume(ec_fop_data_t * fop, int32_t error);
void ec_resume_parent(ec_fop_data_t * fop, int32_t error); void ec_resume_parent(ec_fop_data_t * fop, int32_t error);
void ec_report(ec_fop_data_t * fop, int32_t error);
void ec_manager(ec_fop_data_t * fop, int32_t error); void ec_manager(ec_fop_data_t * fop, int32_t error);

View File

@ -158,7 +158,6 @@ ec_fop_data_t * ec_fop_data_allocate(call_frame_t * frame, xlator_t * this,
fop->minimum = minimum; fop->minimum = minimum;
fop->mask = target; fop->mask = target;
INIT_LIST_HEAD(&fop->lock_list);
INIT_LIST_HEAD(&fop->cbk_list); INIT_LIST_HEAD(&fop->cbk_list);
INIT_LIST_HEAD(&fop->answer_list); INIT_LIST_HEAD(&fop->answer_list);

View File

@ -37,6 +37,9 @@ typedef union _ec_cbk ec_cbk_t;
struct _ec_lock; struct _ec_lock;
typedef struct _ec_lock ec_lock_t; typedef struct _ec_lock ec_lock_t;
struct _ec_lock_link;
typedef struct _ec_lock_link ec_lock_link_t;
struct _ec_fop_data; struct _ec_fop_data;
typedef struct _ec_fop_data ec_fop_data_t; typedef struct _ec_fop_data ec_fop_data_t;
@ -60,8 +63,10 @@ struct _ec_fd
struct _ec_inode struct _ec_inode
{ {
uintptr_t bad; uintptr_t bad;
ec_heal_t * heal; struct list_head entry_locks;
struct list_head inode_locks;
ec_heal_t * heal;
}; };
typedef int32_t (* fop_heal_cbk_t)(call_frame_t *, void * cookie, xlator_t *, typedef int32_t (* fop_heal_cbk_t)(call_frame_t *, void * cookie, xlator_t *,
@ -124,8 +129,18 @@ union _ec_cbk
struct _ec_lock struct _ec_lock
{ {
struct list_head list; struct list_head list;
struct list_head waiting;
uintptr_t mask; uintptr_t mask;
uintptr_t good_mask;
int32_t kind; int32_t kind;
int32_t refs;
int32_t acquired;
int32_t have_size;
size_t size;
size_t size_delta;
uint64_t version;
uint64_t version_delta;
ec_fop_data_t * owner;
loc_t loc; loc_t loc;
union union
{ {
@ -138,6 +153,13 @@ struct _ec_lock
}; };
}; };
struct _ec_lock_link
{
ec_lock_t * lock;
ec_fop_data_t * fop;
struct list_head wait_list;
};
struct _ec_fop_data struct _ec_fop_data
{ {
int32_t id; int32_t id;
@ -152,10 +174,13 @@ struct _ec_fop_data
xlator_t * xl; xlator_t * xl;
call_frame_t * req_frame; // frame of the calling xlator call_frame_t * req_frame; // frame of the calling xlator
call_frame_t * frame; // frame used by this fop call_frame_t * frame; // frame used by this fop
struct list_head lock_list; // list locks held by this fop
struct list_head cbk_list; // sorted list of groups of answers struct list_head cbk_list; // sorted list of groups of answers
struct list_head answer_list; // list of answers struct list_head answer_list; // list of answers
ec_cbk_data_t * answer; // accepted answer ec_cbk_data_t * answer; // accepted answer
int32_t lock_count;
int32_t locked;
ec_lock_link_t locks[2];
int32_t have_size;
size_t pre_size; size_t pre_size;
size_t post_size; size_t post_size;
gf_lock_t lock; gf_lock_t lock;

View File

@ -181,7 +181,8 @@ int32_t ec_manager_create(ec_fop_data_t * fop, int32_t state)
/* Fall through */ /* Fall through */
case EC_STATE_LOCK: case EC_STATE_LOCK:
ec_lock_entry(fop, &fop->loc[0]); ec_lock_prepare_entry(fop, &fop->loc[0]);
ec_lock(fop);
return EC_STATE_DISPATCH; return EC_STATE_DISPATCH;
@ -245,11 +246,7 @@ int32_t ec_manager_create(ec_fop_data_t * fop, int32_t state)
cbk->xdata); cbk->xdata);
} }
if (cbk->op_ret >= 0) return EC_STATE_LOCK_REUSE;
{
return EC_STATE_UPDATE_SIZE_AND_VERSION;
}
return EC_STATE_UNLOCK;
case -EC_STATE_LOCK: case -EC_STATE_LOCK:
case -EC_STATE_DISPATCH: case -EC_STATE_DISPATCH:
@ -263,14 +260,14 @@ int32_t ec_manager_create(ec_fop_data_t * fop, int32_t state)
NULL, NULL, NULL, NULL, NULL, NULL); NULL, NULL, NULL, NULL, NULL, NULL);
} }
return EC_STATE_UNLOCK; return EC_STATE_LOCK_REUSE;
case EC_STATE_UPDATE_SIZE_AND_VERSION: case -EC_STATE_LOCK_REUSE:
ec_update_size_version(fop); case EC_STATE_LOCK_REUSE:
ec_lock_reuse(fop, 1);
return EC_STATE_UNLOCK; return EC_STATE_UNLOCK;
case -EC_STATE_UPDATE_SIZE_AND_VERSION:
case -EC_STATE_UNLOCK: case -EC_STATE_UNLOCK:
case EC_STATE_UNLOCK: case EC_STATE_UNLOCK:
ec_unlock(fop); ec_unlock(fop);
@ -468,7 +465,8 @@ int32_t ec_manager_link(ec_fop_data_t * fop, int32_t state)
// Parent entry of fop->loc[0] should be locked, but I don't // Parent entry of fop->loc[0] should be locked, but I don't
// receive enough information to do it (fop->loc[0].parent is // receive enough information to do it (fop->loc[0].parent is
// NULL). // NULL).
ec_lock_entry(fop, &fop->loc[1]); ec_lock_prepare_entry(fop, &fop->loc[1]);
ec_lock(fop);
return EC_STATE_GET_SIZE_AND_VERSION; return EC_STATE_GET_SIZE_AND_VERSION;
@ -531,7 +529,7 @@ int32_t ec_manager_link(ec_fop_data_t * fop, int32_t state)
&cbk->iatt[1], &cbk->iatt[2], cbk->xdata); &cbk->iatt[1], &cbk->iatt[2], cbk->xdata);
} }
return EC_STATE_UNLOCK; return EC_STATE_LOCK_REUSE;
case -EC_STATE_LOCK: case -EC_STATE_LOCK:
case -EC_STATE_GET_SIZE_AND_VERSION: case -EC_STATE_GET_SIZE_AND_VERSION:
@ -546,6 +544,12 @@ int32_t ec_manager_link(ec_fop_data_t * fop, int32_t state)
NULL, NULL, NULL, NULL, NULL); NULL, NULL, NULL, NULL, NULL);
} }
return EC_STATE_LOCK_REUSE;
case -EC_STATE_LOCK_REUSE:
case EC_STATE_LOCK_REUSE:
ec_lock_reuse(fop, 0);
return EC_STATE_UNLOCK; return EC_STATE_UNLOCK;
case -EC_STATE_UNLOCK: case -EC_STATE_UNLOCK:
@ -732,7 +736,8 @@ int32_t ec_manager_mkdir(ec_fop_data_t * fop, int32_t state)
{ {
case EC_STATE_INIT: case EC_STATE_INIT:
case EC_STATE_LOCK: case EC_STATE_LOCK:
ec_lock_entry(fop, &fop->loc[0]); ec_lock_prepare_entry(fop, &fop->loc[0]);
ec_lock(fop);
return EC_STATE_DISPATCH; return EC_STATE_DISPATCH;
@ -785,7 +790,7 @@ int32_t ec_manager_mkdir(ec_fop_data_t * fop, int32_t state)
&cbk->iatt[1], &cbk->iatt[2], cbk->xdata); &cbk->iatt[1], &cbk->iatt[2], cbk->xdata);
} }
return EC_STATE_UNLOCK; return EC_STATE_LOCK_REUSE;
case -EC_STATE_LOCK: case -EC_STATE_LOCK:
case -EC_STATE_DISPATCH: case -EC_STATE_DISPATCH:
@ -799,6 +804,12 @@ int32_t ec_manager_mkdir(ec_fop_data_t * fop, int32_t state)
NULL, NULL, NULL, NULL, NULL); NULL, NULL, NULL, NULL, NULL);
} }
return EC_STATE_LOCK_REUSE;
case -EC_STATE_LOCK_REUSE:
case EC_STATE_LOCK_REUSE:
ec_lock_reuse(fop, 0);
return EC_STATE_UNLOCK; return EC_STATE_UNLOCK;
case -EC_STATE_UNLOCK: case -EC_STATE_UNLOCK:
@ -982,7 +993,8 @@ int32_t ec_manager_mknod(ec_fop_data_t * fop, int32_t state)
{ {
case EC_STATE_INIT: case EC_STATE_INIT:
case EC_STATE_LOCK: case EC_STATE_LOCK:
ec_lock_entry(fop, &fop->loc[0]); ec_lock_prepare_entry(fop, &fop->loc[0]);
ec_lock(fop);
return EC_STATE_DISPATCH; return EC_STATE_DISPATCH;
@ -1035,7 +1047,7 @@ int32_t ec_manager_mknod(ec_fop_data_t * fop, int32_t state)
&cbk->iatt[1], &cbk->iatt[2], cbk->xdata); &cbk->iatt[1], &cbk->iatt[2], cbk->xdata);
} }
return EC_STATE_UNLOCK; return EC_STATE_LOCK_REUSE;
case -EC_STATE_LOCK: case -EC_STATE_LOCK:
case -EC_STATE_DISPATCH: case -EC_STATE_DISPATCH:
@ -1049,6 +1061,12 @@ int32_t ec_manager_mknod(ec_fop_data_t * fop, int32_t state)
NULL, NULL, NULL, NULL, NULL); NULL, NULL, NULL, NULL, NULL);
} }
return EC_STATE_LOCK_REUSE;
case -EC_STATE_LOCK_REUSE:
case EC_STATE_LOCK_REUSE:
ec_lock_reuse(fop, 0);
return EC_STATE_UNLOCK; return EC_STATE_UNLOCK;
case -EC_STATE_UNLOCK: case -EC_STATE_UNLOCK:
@ -1230,8 +1248,9 @@ int32_t ec_manager_rename(ec_fop_data_t * fop, int32_t state)
{ {
case EC_STATE_INIT: case EC_STATE_INIT:
case EC_STATE_LOCK: case EC_STATE_LOCK:
ec_lock_entry(fop, &fop->loc[0]); ec_lock_prepare_entry(fop, &fop->loc[0]);
ec_lock_entry(fop, &fop->loc[1]); ec_lock_prepare_entry(fop, &fop->loc[1]);
ec_lock(fop);
return EC_STATE_GET_SIZE_AND_VERSION; return EC_STATE_GET_SIZE_AND_VERSION;
@ -1292,7 +1311,7 @@ int32_t ec_manager_rename(ec_fop_data_t * fop, int32_t state)
cbk->xdata); cbk->xdata);
} }
return EC_STATE_UNLOCK; return EC_STATE_LOCK_REUSE;
case -EC_STATE_LOCK: case -EC_STATE_LOCK:
case -EC_STATE_GET_SIZE_AND_VERSION: case -EC_STATE_GET_SIZE_AND_VERSION:
@ -1307,6 +1326,12 @@ int32_t ec_manager_rename(ec_fop_data_t * fop, int32_t state)
NULL, NULL, NULL, NULL, NULL, NULL); NULL, NULL, NULL, NULL, NULL, NULL);
} }
return EC_STATE_LOCK_REUSE;
case -EC_STATE_LOCK_REUSE:
case EC_STATE_LOCK_REUSE:
ec_lock_reuse(fop, 0);
return EC_STATE_UNLOCK; return EC_STATE_UNLOCK;
case -EC_STATE_UNLOCK: case -EC_STATE_UNLOCK:
@ -1479,7 +1504,8 @@ int32_t ec_manager_rmdir(ec_fop_data_t * fop, int32_t state)
{ {
case EC_STATE_INIT: case EC_STATE_INIT:
case EC_STATE_LOCK: case EC_STATE_LOCK:
ec_lock_entry(fop, &fop->loc[0]); ec_lock_prepare_entry(fop, &fop->loc[0]);
ec_lock(fop);
return EC_STATE_DISPATCH; return EC_STATE_DISPATCH;
@ -1524,7 +1550,7 @@ int32_t ec_manager_rmdir(ec_fop_data_t * fop, int32_t state)
cbk->xdata); cbk->xdata);
} }
return EC_STATE_UNLOCK; return EC_STATE_LOCK_REUSE;
case -EC_STATE_LOCK: case -EC_STATE_LOCK:
case -EC_STATE_DISPATCH: case -EC_STATE_DISPATCH:
@ -1538,6 +1564,12 @@ int32_t ec_manager_rmdir(ec_fop_data_t * fop, int32_t state)
NULL, NULL, NULL); NULL, NULL, NULL);
} }
return EC_STATE_LOCK_REUSE;
case -EC_STATE_LOCK_REUSE:
case EC_STATE_LOCK_REUSE:
ec_lock_reuse(fop, 0);
return EC_STATE_UNLOCK; return EC_STATE_UNLOCK;
case -EC_STATE_UNLOCK: case -EC_STATE_UNLOCK:
@ -1719,7 +1751,8 @@ int32_t ec_manager_symlink(ec_fop_data_t * fop, int32_t state)
{ {
case EC_STATE_INIT: case EC_STATE_INIT:
case EC_STATE_LOCK: case EC_STATE_LOCK:
ec_lock_entry(fop, &fop->loc[0]); ec_lock_prepare_entry(fop, &fop->loc[0]);
ec_lock(fop);
return EC_STATE_DISPATCH; return EC_STATE_DISPATCH;
@ -1772,7 +1805,7 @@ int32_t ec_manager_symlink(ec_fop_data_t * fop, int32_t state)
&cbk->iatt[1], &cbk->iatt[2], cbk->xdata); &cbk->iatt[1], &cbk->iatt[2], cbk->xdata);
} }
return EC_STATE_UNLOCK; return EC_STATE_LOCK_REUSE;
case -EC_STATE_LOCK: case -EC_STATE_LOCK:
case -EC_STATE_DISPATCH: case -EC_STATE_DISPATCH:
@ -1786,6 +1819,12 @@ int32_t ec_manager_symlink(ec_fop_data_t * fop, int32_t state)
NULL, NULL, NULL, NULL, NULL); NULL, NULL, NULL, NULL, NULL);
} }
return EC_STATE_LOCK_REUSE;
case -EC_STATE_LOCK_REUSE:
case EC_STATE_LOCK_REUSE:
ec_lock_reuse(fop, 0);
return EC_STATE_UNLOCK; return EC_STATE_UNLOCK;
case -EC_STATE_UNLOCK: case -EC_STATE_UNLOCK:
@ -1963,7 +2002,8 @@ int32_t ec_manager_unlink(ec_fop_data_t * fop, int32_t state)
{ {
case EC_STATE_INIT: case EC_STATE_INIT:
case EC_STATE_LOCK: case EC_STATE_LOCK:
ec_lock_entry(fop, &fop->loc[0]); ec_lock_prepare_entry(fop, &fop->loc[0]);
ec_lock(fop);
return EC_STATE_GET_SIZE_AND_VERSION; return EC_STATE_GET_SIZE_AND_VERSION;
@ -2013,7 +2053,7 @@ int32_t ec_manager_unlink(ec_fop_data_t * fop, int32_t state)
cbk->xdata); cbk->xdata);
} }
return EC_STATE_UNLOCK; return EC_STATE_LOCK_REUSE;
case -EC_STATE_LOCK: case -EC_STATE_LOCK:
case -EC_STATE_GET_SIZE_AND_VERSION: case -EC_STATE_GET_SIZE_AND_VERSION:
@ -2028,6 +2068,12 @@ int32_t ec_manager_unlink(ec_fop_data_t * fop, int32_t state)
NULL, NULL, NULL); NULL, NULL, NULL);
} }
return EC_STATE_LOCK_REUSE;
case -EC_STATE_LOCK_REUSE:
case EC_STATE_LOCK_REUSE:
ec_lock_reuse(fop, 0);
return EC_STATE_UNLOCK; return EC_STATE_UNLOCK;
case -EC_STATE_UNLOCK: case -EC_STATE_UNLOCK:

View File

@ -91,11 +91,17 @@ int32_t ec_manager_flush(ec_fop_data_t * fop, int32_t state)
{ {
case EC_STATE_INIT: case EC_STATE_INIT:
case EC_STATE_LOCK: case EC_STATE_LOCK:
ec_lock_fd(fop, fop->fd); ec_lock_prepare_fd(fop, fop->fd);
ec_lock(fop);
return EC_STATE_DISPATCH; return EC_STATE_DISPATCH;
case EC_STATE_DISPATCH: case EC_STATE_DISPATCH:
ec_flush_size_version(fop);
return EC_STATE_DELAYED_START;
case EC_STATE_DELAYED_START:
ec_dispatch_all(fop); ec_dispatch_all(fop);
return EC_STATE_PREPARE_ANSWER; return EC_STATE_PREPARE_ANSWER;
@ -135,7 +141,7 @@ int32_t ec_manager_flush(ec_fop_data_t * fop, int32_t state)
cbk->op_errno, cbk->xdata); cbk->op_errno, cbk->xdata);
} }
return EC_STATE_UNLOCK; return EC_STATE_LOCK_REUSE;
case -EC_STATE_LOCK: case -EC_STATE_LOCK:
case -EC_STATE_DISPATCH: case -EC_STATE_DISPATCH:
@ -149,6 +155,12 @@ int32_t ec_manager_flush(ec_fop_data_t * fop, int32_t state)
NULL); NULL);
} }
return EC_STATE_LOCK_REUSE;
case -EC_STATE_LOCK_REUSE:
case EC_STATE_LOCK_REUSE:
ec_lock_reuse(fop, 0);
return EC_STATE_UNLOCK; return EC_STATE_UNLOCK;
case -EC_STATE_UNLOCK: case -EC_STATE_UNLOCK:
@ -313,7 +325,8 @@ int32_t ec_manager_fsync(ec_fop_data_t * fop, int32_t state)
{ {
case EC_STATE_INIT: case EC_STATE_INIT:
case EC_STATE_LOCK: case EC_STATE_LOCK:
ec_lock_fd(fop, fop->fd); ec_lock_prepare_fd(fop, fop->fd);
ec_lock(fop);
return EC_STATE_GET_SIZE_AND_VERSION; return EC_STATE_GET_SIZE_AND_VERSION;
@ -323,6 +336,11 @@ int32_t ec_manager_fsync(ec_fop_data_t * fop, int32_t state)
return EC_STATE_DISPATCH; return EC_STATE_DISPATCH;
case EC_STATE_DISPATCH: case EC_STATE_DISPATCH:
ec_flush_size_version(fop);
return EC_STATE_DELAYED_START;
case EC_STATE_DELAYED_START:
ec_dispatch_all(fop); ec_dispatch_all(fop);
return EC_STATE_PREPARE_ANSWER; return EC_STATE_PREPARE_ANSWER;
@ -371,7 +389,7 @@ int32_t ec_manager_fsync(ec_fop_data_t * fop, int32_t state)
cbk->xdata); cbk->xdata);
} }
return EC_STATE_UNLOCK; return EC_STATE_LOCK_REUSE;
case -EC_STATE_LOCK: case -EC_STATE_LOCK:
case -EC_STATE_GET_SIZE_AND_VERSION: case -EC_STATE_GET_SIZE_AND_VERSION:
@ -386,6 +404,12 @@ int32_t ec_manager_fsync(ec_fop_data_t * fop, int32_t state)
NULL, NULL, NULL); NULL, NULL, NULL);
} }
return EC_STATE_LOCK_REUSE;
case -EC_STATE_LOCK_REUSE:
case EC_STATE_LOCK_REUSE:
ec_lock_reuse(fop, 0);
return EC_STATE_UNLOCK; return EC_STATE_UNLOCK;
case -EC_STATE_UNLOCK: case -EC_STATE_UNLOCK:
@ -526,11 +550,17 @@ int32_t ec_manager_fsyncdir(ec_fop_data_t * fop, int32_t state)
{ {
case EC_STATE_INIT: case EC_STATE_INIT:
case EC_STATE_LOCK: case EC_STATE_LOCK:
ec_lock_fd(fop, fop->fd); ec_lock_prepare_fd(fop, fop->fd);
ec_lock(fop);
return EC_STATE_DISPATCH; return EC_STATE_DISPATCH;
case EC_STATE_DISPATCH: case EC_STATE_DISPATCH:
ec_flush_size_version(fop);
return EC_STATE_DELAYED_START;
case EC_STATE_DELAYED_START:
ec_dispatch_all(fop); ec_dispatch_all(fop);
return EC_STATE_PREPARE_ANSWER; return EC_STATE_PREPARE_ANSWER;
@ -570,7 +600,7 @@ int32_t ec_manager_fsyncdir(ec_fop_data_t * fop, int32_t state)
cbk->op_errno, cbk->xdata); cbk->op_errno, cbk->xdata);
} }
return EC_STATE_UNLOCK; return EC_STATE_LOCK_REUSE;
case -EC_STATE_LOCK: case -EC_STATE_LOCK:
case -EC_STATE_DISPATCH: case -EC_STATE_DISPATCH:
@ -584,6 +614,12 @@ int32_t ec_manager_fsyncdir(ec_fop_data_t * fop, int32_t state)
fop->error, NULL); fop->error, NULL);
} }
return EC_STATE_LOCK_REUSE;
case -EC_STATE_LOCK_REUSE:
case EC_STATE_LOCK_REUSE:
ec_lock_reuse(fop, 0);
return EC_STATE_UNLOCK; return EC_STATE_UNLOCK;
case -EC_STATE_UNLOCK: case -EC_STATE_UNLOCK:
@ -665,10 +701,12 @@ out:
void ec_lookup_rebuild(ec_t * ec, ec_fop_data_t * fop, ec_cbk_data_t * cbk) void ec_lookup_rebuild(ec_t * ec, ec_fop_data_t * fop, ec_cbk_data_t * cbk)
{ {
ec_cbk_data_t * ans = NULL; ec_cbk_data_t * ans = NULL;
ec_inode_t * ctx = NULL;
ec_lock_t * lock = NULL;
data_t * data = NULL; data_t * data = NULL;
uint8_t * buff = NULL; uint8_t * buff = NULL;
size_t size = 0; size_t size = 0;
int32_t i = 0; int32_t i = 0, have_size = 0;
if (cbk->op_ret < 0) if (cbk->op_ret < 0)
{ {
@ -679,6 +717,22 @@ void ec_lookup_rebuild(ec_t * ec, ec_fop_data_t * fop, ec_cbk_data_t * cbk)
ec_loc_prepare(fop->xl, &fop->loc[0], cbk->inode, &cbk->iatt[0]); ec_loc_prepare(fop->xl, &fop->loc[0], cbk->inode, &cbk->iatt[0]);
LOCK(&cbk->inode->lock);
ctx = __ec_inode_get(cbk->inode, fop->xl);
if ((ctx != NULL) && !list_empty(&ctx->inode_locks))
{
lock = list_entry(ctx->inode_locks.next, ec_lock_t, list);
cbk->version = lock->version;
if (lock->have_size)
{
size = lock->size;
have_size = 1;
}
}
UNLOCK(&cbk->inode->lock);
if (cbk->iatt[0].ia_type == IA_IFREG) if (cbk->iatt[0].ia_type == IA_IFREG)
{ {
uint8_t * blocks[cbk->count]; uint8_t * blocks[cbk->count];
@ -686,6 +740,10 @@ void ec_lookup_rebuild(ec_t * ec, ec_fop_data_t * fop, ec_cbk_data_t * cbk)
cbk->size = cbk->iatt[0].ia_size; cbk->size = cbk->iatt[0].ia_size;
ec_dict_del_number(cbk->xdata, EC_XATTR_SIZE, &cbk->iatt[0].ia_size); ec_dict_del_number(cbk->xdata, EC_XATTR_SIZE, &cbk->iatt[0].ia_size);
if (have_size)
{
cbk->iatt[0].ia_size = size;
}
size = SIZE_MAX; size = SIZE_MAX;
for (i = 0, ans = cbk; (ans != NULL) && (i < ec->fragments); for (i = 0, ans = cbk; (ans != NULL) && (i < ec->fragments);
@ -1314,7 +1372,15 @@ int32_t ec_manager_xattrop(ec_fop_data_t * fop, int32_t state)
{ {
case EC_STATE_INIT: case EC_STATE_INIT:
case EC_STATE_LOCK: case EC_STATE_LOCK:
ec_lock_inode(fop, &fop->loc[0]); if (fop->fd == NULL)
{
ec_lock_prepare_inode(fop, &fop->loc[0]);
}
else
{
ec_lock_prepare_fd(fop, fop->fd);
}
ec_lock(fop);
return EC_STATE_DISPATCH; return EC_STATE_DISPATCH;
@ -1373,11 +1439,7 @@ int32_t ec_manager_xattrop(ec_fop_data_t * fop, int32_t state)
} }
} }
if (cbk->op_ret >= 0) return EC_STATE_LOCK_REUSE;
{
return EC_STATE_UPDATE_SIZE_AND_VERSION;
}
return EC_STATE_UNLOCK;
case -EC_STATE_LOCK: case -EC_STATE_LOCK:
case -EC_STATE_DISPATCH: case -EC_STATE_DISPATCH:
@ -1402,14 +1464,14 @@ int32_t ec_manager_xattrop(ec_fop_data_t * fop, int32_t state)
} }
} }
return EC_STATE_UNLOCK; return EC_STATE_LOCK_REUSE;
case EC_STATE_UPDATE_SIZE_AND_VERSION: case -EC_STATE_LOCK_REUSE:
ec_update_size_version(fop); case EC_STATE_LOCK_REUSE:
ec_lock_reuse(fop, 1);
return EC_STATE_UNLOCK; return EC_STATE_UNLOCK;
case -EC_STATE_UPDATE_SIZE_AND_VERSION:
case -EC_STATE_UNLOCK: case -EC_STATE_UNLOCK:
case EC_STATE_UNLOCK: case EC_STATE_UNLOCK:
ec_unlock(fop); ec_unlock(fop);

View File

@ -503,6 +503,9 @@ ec_inode_t * __ec_inode_get(inode_t * inode, xlator_t * xl)
return NULL; return NULL;
} }
INIT_LIST_HEAD(&ctx->entry_locks);
INIT_LIST_HEAD(&ctx->inode_locks);
} }
} }
else else

View File

@ -252,7 +252,15 @@ int32_t ec_manager_getxattr(ec_fop_data_t * fop, int32_t state)
{ {
case EC_STATE_INIT: case EC_STATE_INIT:
case EC_STATE_LOCK: case EC_STATE_LOCK:
ec_lock_inode(fop, &fop->loc[0]); if (fop->fd == NULL)
{
ec_lock_prepare_inode(fop, &fop->loc[0]);
}
else
{
ec_lock_prepare_fd(fop, fop->fd);
}
ec_lock(fop);
return EC_STATE_DISPATCH; return EC_STATE_DISPATCH;
@ -311,7 +319,7 @@ int32_t ec_manager_getxattr(ec_fop_data_t * fop, int32_t state)
cbk->op_errno, cbk->dict, cbk->xdata); cbk->op_errno, cbk->dict, cbk->xdata);
} }
return EC_STATE_UNLOCK; return EC_STATE_LOCK_REUSE;
case -EC_STATE_LOCK: case -EC_STATE_LOCK:
case -EC_STATE_DISPATCH: case -EC_STATE_DISPATCH:
@ -325,6 +333,12 @@ int32_t ec_manager_getxattr(ec_fop_data_t * fop, int32_t state)
fop->error, NULL, NULL); fop->error, NULL, NULL);
} }
return EC_STATE_LOCK_REUSE;
case -EC_STATE_LOCK_REUSE:
case EC_STATE_LOCK_REUSE:
ec_lock_reuse(fop, 0);
return EC_STATE_UNLOCK; return EC_STATE_UNLOCK;
case -EC_STATE_UNLOCK: case -EC_STATE_UNLOCK:
@ -1216,7 +1230,8 @@ int32_t ec_manager_readv(ec_fop_data_t * fop, int32_t state)
/* Fall through */ /* Fall through */
case EC_STATE_LOCK: case EC_STATE_LOCK:
ec_lock_fd(fop, fop->fd); ec_lock_prepare_fd(fop, fop->fd);
ec_lock(fop);
return EC_STATE_GET_SIZE_AND_VERSION; return EC_STATE_GET_SIZE_AND_VERSION;
@ -1276,7 +1291,7 @@ int32_t ec_manager_readv(ec_fop_data_t * fop, int32_t state)
&cbk->iatt[0], cbk->buffers, cbk->xdata); &cbk->iatt[0], cbk->buffers, cbk->xdata);
} }
return EC_STATE_UNLOCK; return EC_STATE_LOCK_REUSE;
case -EC_STATE_LOCK: case -EC_STATE_LOCK:
case -EC_STATE_GET_SIZE_AND_VERSION: case -EC_STATE_GET_SIZE_AND_VERSION:
@ -1291,6 +1306,12 @@ int32_t ec_manager_readv(ec_fop_data_t * fop, int32_t state)
NULL, 0, NULL, NULL, NULL); NULL, 0, NULL, NULL, NULL);
} }
return EC_STATE_LOCK_REUSE;
case -EC_STATE_LOCK_REUSE:
case EC_STATE_LOCK_REUSE:
ec_lock_reuse(fop, 0);
return EC_STATE_UNLOCK; return EC_STATE_UNLOCK;
case -EC_STATE_UNLOCK: case -EC_STATE_UNLOCK:
@ -1455,7 +1476,15 @@ int32_t ec_manager_stat(ec_fop_data_t * fop, int32_t state)
{ {
case EC_STATE_INIT: case EC_STATE_INIT:
case EC_STATE_LOCK: case EC_STATE_LOCK:
ec_lock_inode(fop, &fop->loc[0]); if (fop->fd == NULL)
{
ec_lock_prepare_inode(fop, &fop->loc[0]);
}
else
{
ec_lock_prepare_fd(fop, fop->fd);
}
ec_lock(fop);
return EC_STATE_GET_SIZE_AND_VERSION; return EC_STATE_GET_SIZE_AND_VERSION;
@ -1522,7 +1551,7 @@ int32_t ec_manager_stat(ec_fop_data_t * fop, int32_t state)
} }
} }
return EC_STATE_UNLOCK; return EC_STATE_LOCK_REUSE;
case -EC_STATE_LOCK: case -EC_STATE_LOCK:
case -EC_STATE_GET_SIZE_AND_VERSION: case -EC_STATE_GET_SIZE_AND_VERSION:
@ -1548,6 +1577,12 @@ int32_t ec_manager_stat(ec_fop_data_t * fop, int32_t state)
} }
} }
return EC_STATE_LOCK_REUSE;
case -EC_STATE_LOCK_REUSE:
case EC_STATE_LOCK_REUSE:
ec_lock_reuse(fop, 0);
return EC_STATE_UNLOCK; return EC_STATE_UNLOCK;
case -EC_STATE_UNLOCK: case -EC_STATE_UNLOCK:

View File

@ -92,7 +92,15 @@ int32_t ec_manager_removexattr(ec_fop_data_t * fop, int32_t state)
{ {
case EC_STATE_INIT: case EC_STATE_INIT:
case EC_STATE_LOCK: case EC_STATE_LOCK:
ec_lock_inode(fop, &fop->loc[0]); if (fop->fd == NULL)
{
ec_lock_prepare_inode(fop, &fop->loc[0]);
}
else
{
ec_lock_prepare_fd(fop, fop->fd);
}
ec_lock(fop);
return EC_STATE_DISPATCH; return EC_STATE_DISPATCH;
@ -149,11 +157,7 @@ int32_t ec_manager_removexattr(ec_fop_data_t * fop, int32_t state)
} }
} }
if (cbk->op_ret >= 0) return EC_STATE_LOCK_REUSE;
{
return EC_STATE_UPDATE_SIZE_AND_VERSION;
}
return EC_STATE_UNLOCK;
case -EC_STATE_LOCK: case -EC_STATE_LOCK:
case -EC_STATE_DISPATCH: case -EC_STATE_DISPATCH:
@ -178,14 +182,14 @@ int32_t ec_manager_removexattr(ec_fop_data_t * fop, int32_t state)
} }
} }
return EC_STATE_UNLOCK; return EC_STATE_LOCK_REUSE;
case EC_STATE_UPDATE_SIZE_AND_VERSION: case -EC_STATE_LOCK_REUSE:
ec_update_size_version(fop); case EC_STATE_LOCK_REUSE:
ec_lock_reuse(fop, 1);
return EC_STATE_UNLOCK; return EC_STATE_UNLOCK;
case -EC_STATE_UPDATE_SIZE_AND_VERSION:
case -EC_STATE_UNLOCK: case -EC_STATE_UNLOCK:
case EC_STATE_UNLOCK: case EC_STATE_UNLOCK:
ec_unlock(fop); ec_unlock(fop);
@ -484,7 +488,15 @@ int32_t ec_manager_setattr(ec_fop_data_t * fop, int32_t state)
{ {
case EC_STATE_INIT: case EC_STATE_INIT:
case EC_STATE_LOCK: case EC_STATE_LOCK:
ec_lock_inode(fop, &fop->loc[0]); if (fop->fd == NULL)
{
ec_lock_prepare_inode(fop, &fop->loc[0]);
}
else
{
ec_lock_prepare_fd(fop, fop->fd);
}
ec_lock(fop);
return EC_STATE_GET_SIZE_AND_VERSION; return EC_STATE_GET_SIZE_AND_VERSION;
@ -556,11 +568,7 @@ int32_t ec_manager_setattr(ec_fop_data_t * fop, int32_t state)
} }
} }
if (cbk->op_ret >= 0) return EC_STATE_LOCK_REUSE;
{
return EC_STATE_UPDATE_SIZE_AND_VERSION;
}
return EC_STATE_UNLOCK;
case -EC_STATE_LOCK: case -EC_STATE_LOCK:
case -EC_STATE_GET_SIZE_AND_VERSION: case -EC_STATE_GET_SIZE_AND_VERSION:
@ -586,14 +594,14 @@ int32_t ec_manager_setattr(ec_fop_data_t * fop, int32_t state)
} }
} }
return EC_STATE_UNLOCK; return EC_STATE_LOCK_REUSE;
case EC_STATE_UPDATE_SIZE_AND_VERSION: case -EC_STATE_LOCK_REUSE:
ec_update_size_version(fop); case EC_STATE_LOCK_REUSE:
ec_lock_reuse(fop, 1);
return EC_STATE_UNLOCK; return EC_STATE_UNLOCK;
case -EC_STATE_UPDATE_SIZE_AND_VERSION:
case -EC_STATE_UNLOCK: case -EC_STATE_UNLOCK:
case EC_STATE_UNLOCK: case EC_STATE_UNLOCK:
ec_unlock(fop); ec_unlock(fop);
@ -870,7 +878,15 @@ int32_t ec_manager_setxattr(ec_fop_data_t * fop, int32_t state)
{ {
case EC_STATE_INIT: case EC_STATE_INIT:
case EC_STATE_LOCK: case EC_STATE_LOCK:
ec_lock_inode(fop, &fop->loc[0]); if (fop->fd == NULL)
{
ec_lock_prepare_inode(fop, &fop->loc[0]);
}
else
{
ec_lock_prepare_fd(fop, fop->fd);
}
ec_lock(fop);
return EC_STATE_DISPATCH; return EC_STATE_DISPATCH;
@ -926,11 +942,7 @@ int32_t ec_manager_setxattr(ec_fop_data_t * fop, int32_t state)
} }
} }
if (cbk->op_ret >= 0) return EC_STATE_LOCK_REUSE;
{
return EC_STATE_UPDATE_SIZE_AND_VERSION;
}
return EC_STATE_UNLOCK;
case -EC_STATE_LOCK: case -EC_STATE_LOCK:
case -EC_STATE_DISPATCH: case -EC_STATE_DISPATCH:
@ -955,14 +967,14 @@ int32_t ec_manager_setxattr(ec_fop_data_t * fop, int32_t state)
} }
} }
return EC_STATE_UNLOCK; return EC_STATE_LOCK_REUSE;
case EC_STATE_UPDATE_SIZE_AND_VERSION: case -EC_STATE_LOCK_REUSE:
ec_update_size_version(fop); case EC_STATE_LOCK_REUSE:
ec_lock_reuse(fop, 1);
return EC_STATE_UNLOCK; return EC_STATE_UNLOCK;
case -EC_STATE_UPDATE_SIZE_AND_VERSION:
case -EC_STATE_UNLOCK: case -EC_STATE_UNLOCK:
case EC_STATE_UNLOCK: case EC_STATE_UNLOCK:
ec_unlock(fop); ec_unlock(fop);
@ -1366,7 +1378,15 @@ int32_t ec_manager_truncate(ec_fop_data_t * fop, int32_t state)
/* Fall through */ /* Fall through */
case EC_STATE_LOCK: case EC_STATE_LOCK:
ec_lock_inode(fop, &fop->loc[0]); if (fop->fd == NULL)
{
ec_lock_prepare_inode(fop, &fop->loc[0]);
}
else
{
ec_lock_prepare_fd(fop, fop->fd);
}
ec_lock(fop);
return EC_STATE_GET_SIZE_AND_VERSION; return EC_STATE_GET_SIZE_AND_VERSION;
@ -1447,11 +1467,7 @@ int32_t ec_manager_truncate(ec_fop_data_t * fop, int32_t state)
} }
} }
if (cbk->op_ret >= 0) return EC_STATE_LOCK_REUSE;
{
return EC_STATE_UPDATE_SIZE_AND_VERSION;
}
return EC_STATE_UNLOCK;
case -EC_STATE_LOCK: case -EC_STATE_LOCK:
case -EC_STATE_GET_SIZE_AND_VERSION: case -EC_STATE_GET_SIZE_AND_VERSION:
@ -1477,14 +1493,14 @@ int32_t ec_manager_truncate(ec_fop_data_t * fop, int32_t state)
} }
} }
return EC_STATE_UNLOCK; return EC_STATE_LOCK_REUSE;
case EC_STATE_UPDATE_SIZE_AND_VERSION: case -EC_STATE_LOCK_REUSE:
ec_update_size_version(fop); case EC_STATE_LOCK_REUSE:
ec_lock_reuse(fop, 1);
return EC_STATE_UNLOCK; return EC_STATE_UNLOCK;
case -EC_STATE_UPDATE_SIZE_AND_VERSION:
case -EC_STATE_UNLOCK: case -EC_STATE_UNLOCK:
case EC_STATE_UNLOCK: case EC_STATE_UNLOCK:
ec_unlock(fop); ec_unlock(fop);
@ -2003,7 +2019,8 @@ int32_t ec_manager_writev(ec_fop_data_t * fop, int32_t state)
/* Fall through */ /* Fall through */
case EC_STATE_LOCK: case EC_STATE_LOCK:
ec_lock_fd(fop, fop->fd); ec_lock_prepare_fd(fop, fop->fd);
ec_lock(fop);
return EC_STATE_GET_SIZE_AND_VERSION; return EC_STATE_GET_SIZE_AND_VERSION;
@ -2015,9 +2032,9 @@ int32_t ec_manager_writev(ec_fop_data_t * fop, int32_t state)
case EC_STATE_DISPATCH: case EC_STATE_DISPATCH:
ec_writev_start(fop); ec_writev_start(fop);
return EC_STATE_WRITE_START; return EC_STATE_DELAYED_START;
case EC_STATE_WRITE_START: case EC_STATE_DELAYED_START:
ec_dispatch_all(fop); ec_dispatch_all(fop);
return EC_STATE_PREPARE_ANSWER; return EC_STATE_PREPARE_ANSWER;
@ -2089,11 +2106,7 @@ int32_t ec_manager_writev(ec_fop_data_t * fop, int32_t state)
cbk->xdata); cbk->xdata);
} }
if (cbk->op_ret >= 0) return EC_STATE_LOCK_REUSE;
{
return EC_STATE_UPDATE_SIZE_AND_VERSION;
}
return EC_STATE_UNLOCK;
case -EC_STATE_LOCK: case -EC_STATE_LOCK:
case -EC_STATE_GET_SIZE_AND_VERSION: case -EC_STATE_GET_SIZE_AND_VERSION:
@ -2108,14 +2121,14 @@ int32_t ec_manager_writev(ec_fop_data_t * fop, int32_t state)
NULL, NULL, NULL); NULL, NULL, NULL);
} }
return EC_STATE_UNLOCK; return EC_STATE_LOCK_REUSE;
case EC_STATE_UPDATE_SIZE_AND_VERSION: case -EC_STATE_LOCK_REUSE:
ec_update_size_version(fop); case EC_STATE_LOCK_REUSE:
ec_lock_reuse(fop, 1);
return EC_STATE_UNLOCK; return EC_STATE_UNLOCK;
case -EC_STATE_UPDATE_SIZE_AND_VERSION:
case -EC_STATE_UNLOCK: case -EC_STATE_UNLOCK:
case EC_STATE_UNLOCK: case EC_STATE_UNLOCK:
ec_unlock(fop); ec_unlock(fop);

View File

@ -27,11 +27,8 @@ enum gf_ec_mem_types_
{ {
ec_mt_ec_t = gf_common_mt_end + 1, ec_mt_ec_t = gf_common_mt_end + 1,
ec_mt_xlator_t, ec_mt_xlator_t,
ec_mt_ec_fop_data_t,
ec_mt_ec_cbk_data_t,
ec_mt_ec_inode_t, ec_mt_ec_inode_t,
ec_mt_ec_fd_t, ec_mt_ec_fd_t,
ec_mt_ec_lock_t,
ec_mt_ec_heal_t, ec_mt_ec_heal_t,
ec_mt_end ec_mt_end
}; };

View File

@ -151,6 +151,11 @@ void __ec_destroy_private(xlator_t * this)
mem_pool_destroy(ec->cbk_pool); mem_pool_destroy(ec->cbk_pool);
} }
if (ec->lock_pool != NULL)
{
mem_pool_destroy(ec->lock_pool);
}
LOCK_DESTROY(&ec->lock); LOCK_DESTROY(&ec->lock);
GF_FREE(ec); GF_FREE(ec);
@ -350,7 +355,9 @@ int32_t init(xlator_t * this)
ec->fop_pool = mem_pool_new(ec_fop_data_t, 1024); ec->fop_pool = mem_pool_new(ec_fop_data_t, 1024);
ec->cbk_pool = mem_pool_new(ec_cbk_data_t, 4096); ec->cbk_pool = mem_pool_new(ec_cbk_data_t, 4096);
if ((ec->fop_pool == NULL) || (ec->cbk_pool == NULL)) ec->lock_pool = mem_pool_new(ec_lock_t, 1024);
if ((ec->fop_pool == NULL) || (ec->cbk_pool == NULL) ||
(ec->lock_pool == NULL))
{ {
gf_log(this->name, GF_LOG_ERROR, "Failed to create memory pools."); gf_log(this->name, GF_LOG_ERROR, "Failed to create memory pools.");

View File

@ -49,6 +49,7 @@ struct _ec
gf_timer_t * timer; gf_timer_t * timer;
struct mem_pool * fop_pool; struct mem_pool * fop_pool;
struct mem_pool * cbk_pool; struct mem_pool * cbk_pool;
struct mem_pool * lock_pool;
}; };
#endif /* __EC_H__ */ #endif /* __EC_H__ */