performance/write-behind: better invalidation in readdirp
Current invalidation of stats in wb_readdirp_cbk is prone to races. As the deleted comment explains, <snip> We cannot guarantee integrity of entry->d_stat as there are cached writes. The stat is most likely stale as it doesn't account the cached writes. However, checking for non-empty liability list here is not a fool-proof solution as there can be races like, 1. readdirp is successful on posix 2. sync of cached write is successful on posix 3. write-behind received sync response and removed the request from liability queue 4. readdirp response is processed at write-behind. In the above scenario, stat for the file is sent back in readdirp response but it is stale. </snip> Change-Id: I6ce170985cc6ce3df2382ec038dd5415beefded5 Signed-off-by: Raghavendra G <rgowdapp@redhat.com> Updates: bz#1512691
This commit is contained in:
parent
379d427960
commit
4d3c62e71f
@ -132,6 +132,7 @@ __dentry_unset (dentry_t *dentry)
|
||||
__dentry_unhash (dentry);
|
||||
|
||||
list_del_init (&dentry->inode_list);
|
||||
list_del_init (&dentry->parent_list);
|
||||
|
||||
GF_FREE (dentry->name);
|
||||
dentry->name = NULL;
|
||||
@ -141,6 +142,7 @@ __dentry_unset (dentry_t *dentry)
|
||||
dentry->parent = NULL;
|
||||
}
|
||||
|
||||
dentry->inode = NULL;
|
||||
mem_put (dentry);
|
||||
}
|
||||
|
||||
@ -604,6 +606,7 @@ __dentry_create (inode_t *inode, inode_t *parent, const char *name)
|
||||
|
||||
INIT_LIST_HEAD (&newd->inode_list);
|
||||
INIT_LIST_HEAD (&newd->hash);
|
||||
INIT_LIST_HEAD (&newd->parent_list);
|
||||
|
||||
newd->name = gf_strdup (name);
|
||||
if (newd->name == NULL) {
|
||||
@ -616,8 +619,9 @@ __dentry_create (inode_t *inode, inode_t *parent, const char *name)
|
||||
newd->parent = __inode_ref (parent);
|
||||
|
||||
list_add (&newd->inode_list, &inode->dentry_list);
|
||||
newd->inode = inode;
|
||||
list_add (&newd->parent_list, &parent->children);
|
||||
|
||||
newd->inode = inode;
|
||||
out:
|
||||
return newd;
|
||||
}
|
||||
@ -648,6 +652,7 @@ __inode_create (inode_table_t *table)
|
||||
INIT_LIST_HEAD (&newi->list);
|
||||
INIT_LIST_HEAD (&newi->hash);
|
||||
INIT_LIST_HEAD (&newi->dentry_list);
|
||||
INIT_LIST_HEAD (&newi->children);
|
||||
|
||||
newi->_ctx = GF_CALLOC (1,
|
||||
(sizeof (struct _inode_ctx) * table->ctxcount),
|
||||
|
@ -60,6 +60,7 @@ struct _inode_table {
|
||||
struct _dentry {
|
||||
struct list_head inode_list; /* list of dentries of inode */
|
||||
struct list_head hash; /* hash table pointers */
|
||||
struct list_head parent_list; /* list of parent's children */
|
||||
inode_t *inode; /* inode of this directory entry */
|
||||
char *name; /* name of the directory entry */
|
||||
inode_t *parent; /* directory of the entry */
|
||||
@ -99,6 +100,7 @@ struct _inode {
|
||||
struct list_head dentry_list; /* list of directory entries for this inode */
|
||||
struct list_head hash; /* hash table pointers */
|
||||
struct list_head list; /* active/lru/purge */
|
||||
struct list_head children; /* list of children */
|
||||
|
||||
struct _inode_ctx *_ctx; /* replacement for dict_t *(inode->ctx) */
|
||||
};
|
||||
|
@ -115,6 +115,8 @@ typedef struct wb_inode {
|
||||
* error during fulfill.
|
||||
*/
|
||||
|
||||
int invalidate_stat;
|
||||
|
||||
} wb_inode_t;
|
||||
|
||||
|
||||
@ -2461,29 +2463,7 @@ wb_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
|
||||
|
||||
LOCK (&wb_inode->lock);
|
||||
{
|
||||
if (!list_empty (&wb_inode->liability)) {
|
||||
/* We cannot guarantee integrity of
|
||||
entry->d_stat as there are cached writes.
|
||||
The stat is most likely stale as it doesn't
|
||||
account the cached writes. However, checking
|
||||
for non-empty liability list here is not a
|
||||
fool-proof solution as there can be races
|
||||
like,
|
||||
1. readdirp is successful on posix
|
||||
2. sync of cached write is successful on
|
||||
posix
|
||||
3. write-behind received sync response and
|
||||
removed the request from liability queue
|
||||
4. readdirp response is processed at
|
||||
write-behind
|
||||
|
||||
In the above scenario, stat for the file is
|
||||
sent back in readdirp response but it is
|
||||
stale.
|
||||
|
||||
For lack of better solutions I am sticking
|
||||
with current solution.
|
||||
*/
|
||||
if (wb_inode->invalidate_stat) {
|
||||
inode = entry->inode;
|
||||
|
||||
entry->inode = NULL;
|
||||
@ -2491,6 +2471,7 @@ wb_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
|
||||
sizeof (entry->d_stat));
|
||||
|
||||
inode_unref (inode);
|
||||
wb_inode->invalidate_stat = 0;
|
||||
}
|
||||
}
|
||||
UNLOCK (&wb_inode->lock);
|
||||
@ -2507,6 +2488,30 @@ int32_t
|
||||
wb_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
|
||||
off_t off, dict_t *xdata)
|
||||
{
|
||||
dentry_t *child = NULL;
|
||||
wb_inode_t *wb_inode = NULL;
|
||||
wb_request_t *each = NULL;
|
||||
|
||||
pthread_mutex_lock (&fd->inode->table->lock);
|
||||
{
|
||||
list_for_each_entry (child, &fd->inode->children, parent_list) {
|
||||
wb_inode = wb_inode_ctx_get (this, child->inode);
|
||||
if (!wb_inode)
|
||||
continue;
|
||||
|
||||
LOCK (&wb_inode->lock);
|
||||
{
|
||||
list_for_each_entry (each, &wb_inode->liability,
|
||||
lie) {
|
||||
if (each->gen < wb_inode->gen)
|
||||
wb_inode->invalidate_stat = 1;
|
||||
}
|
||||
}
|
||||
UNLOCK (&wb_inode->lock);
|
||||
}
|
||||
}
|
||||
pthread_mutex_unlock (&fd->inode->table->lock);
|
||||
|
||||
STACK_WIND (frame, wb_readdirp_cbk, FIRST_CHILD(this),
|
||||
FIRST_CHILD(this)->fops->readdirp,
|
||||
fd, size, off, xdata);
|
||||
|
Loading…
x
Reference in New Issue
Block a user