5db11c21a9
Signed-off-by: Mike Marshall <hubcap@omnibond.com>
395 lines
11 KiB
C
395 lines
11 KiB
C
/*
|
|
* (C) 2001 Clemson University and The University of Chicago
|
|
*
|
|
* See COPYING in top-level directory.
|
|
*/
|
|
|
|
#include "protocol.h"
|
|
#include "pvfs2-kernel.h"
|
|
#include "pvfs2-bufmap.h"
|
|
|
|
struct readdir_handle_s {
|
|
int buffer_index;
|
|
struct pvfs2_readdir_response_s readdir_response;
|
|
void *dents_buf;
|
|
};
|
|
|
|
/*
|
|
* decode routine needed by kmod to make sense of the shared page for readdirs.
|
|
*/
|
|
static long decode_dirents(char *ptr, struct pvfs2_readdir_response_s *readdir)
|
|
{
|
|
int i;
|
|
struct pvfs2_readdir_response_s *rd =
|
|
(struct pvfs2_readdir_response_s *) ptr;
|
|
char *buf = ptr;
|
|
char **pptr = &buf;
|
|
|
|
readdir->token = rd->token;
|
|
readdir->pvfs_dirent_outcount = rd->pvfs_dirent_outcount;
|
|
readdir->dirent_array = kmalloc(readdir->pvfs_dirent_outcount *
|
|
sizeof(*readdir->dirent_array),
|
|
GFP_KERNEL);
|
|
if (readdir->dirent_array == NULL)
|
|
return -ENOMEM;
|
|
*pptr += offsetof(struct pvfs2_readdir_response_s, dirent_array);
|
|
for (i = 0; i < readdir->pvfs_dirent_outcount; i++) {
|
|
dec_string(pptr, &readdir->dirent_array[i].d_name,
|
|
&readdir->dirent_array[i].d_length);
|
|
readdir->dirent_array[i].khandle =
|
|
*(struct pvfs2_khandle *) *pptr;
|
|
*pptr += 16;
|
|
}
|
|
return (unsigned long)*pptr - (unsigned long)ptr;
|
|
}
|
|
|
|
static long readdir_handle_ctor(struct readdir_handle_s *rhandle, void *buf,
|
|
int buffer_index)
|
|
{
|
|
long ret;
|
|
|
|
if (buf == NULL) {
|
|
gossip_err
|
|
("Invalid NULL buffer specified in readdir_handle_ctor\n");
|
|
return -ENOMEM;
|
|
}
|
|
if (buffer_index < 0) {
|
|
gossip_err
|
|
("Invalid buffer index specified in readdir_handle_ctor\n");
|
|
return -EINVAL;
|
|
}
|
|
rhandle->buffer_index = buffer_index;
|
|
rhandle->dents_buf = buf;
|
|
ret = decode_dirents(buf, &rhandle->readdir_response);
|
|
if (ret < 0) {
|
|
gossip_err("Could not decode readdir from buffer %ld\n", ret);
|
|
rhandle->buffer_index = -1;
|
|
gossip_debug(GOSSIP_DIR_DEBUG, "vfree %p\n", buf);
|
|
vfree(buf);
|
|
rhandle->dents_buf = NULL;
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
static void readdir_handle_dtor(struct pvfs2_bufmap *bufmap,
|
|
struct readdir_handle_s *rhandle)
|
|
{
|
|
if (rhandle == NULL)
|
|
return;
|
|
|
|
/* kfree(NULL) is safe */
|
|
kfree(rhandle->readdir_response.dirent_array);
|
|
rhandle->readdir_response.dirent_array = NULL;
|
|
|
|
if (rhandle->buffer_index >= 0) {
|
|
readdir_index_put(bufmap, rhandle->buffer_index);
|
|
rhandle->buffer_index = -1;
|
|
}
|
|
if (rhandle->dents_buf) {
|
|
gossip_debug(GOSSIP_DIR_DEBUG, "vfree %p\n",
|
|
rhandle->dents_buf);
|
|
vfree(rhandle->dents_buf);
|
|
rhandle->dents_buf = NULL;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Read directory entries from an instance of an open directory.
|
|
*
|
|
* \note This routine was converted for the readdir to iterate change
|
|
* in "struct file_operations". "converted" mostly amounts to
|
|
* changing occurrences of "readdir" and "filldir" in the
|
|
* comments to "iterate" and "dir_emit". Also filldir calls
|
|
* were changed to dir_emit calls.
|
|
*
|
|
* \param dir_emit callback function called for each entry read.
|
|
*
|
|
* \retval <0 on error
|
|
* \retval 0 when directory has been completely traversed
|
|
* \retval >0 if we don't call dir_emit for all entries
|
|
*
|
|
* \note If the dir_emit call-back returns non-zero, then iterate should
|
|
* assume that it has had enough, and should return as well.
|
|
*/
|
|
static int pvfs2_readdir(struct file *file, struct dir_context *ctx)
|
|
{
|
|
struct pvfs2_bufmap *bufmap = NULL;
|
|
int ret = 0;
|
|
int buffer_index;
|
|
__u64 *ptoken = file->private_data;
|
|
__u64 pos = 0;
|
|
ino_t ino = 0;
|
|
struct dentry *dentry = file->f_path.dentry;
|
|
struct pvfs2_kernel_op_s *new_op = NULL;
|
|
struct pvfs2_inode_s *pvfs2_inode = PVFS2_I(dentry->d_inode);
|
|
int buffer_full = 0;
|
|
struct readdir_handle_s rhandle;
|
|
int i = 0;
|
|
int len = 0;
|
|
ino_t current_ino = 0;
|
|
char *current_entry = NULL;
|
|
long bytes_decoded;
|
|
|
|
gossip_ldebug(GOSSIP_DIR_DEBUG,
|
|
"%s: ctx->pos:%lld, token = %llu\n",
|
|
__func__,
|
|
lld(ctx->pos),
|
|
llu(*ptoken));
|
|
|
|
pos = (__u64) ctx->pos;
|
|
|
|
/* are we done? */
|
|
if (pos == PVFS_READDIR_END) {
|
|
gossip_debug(GOSSIP_DIR_DEBUG,
|
|
"Skipping to termination path\n");
|
|
return 0;
|
|
}
|
|
|
|
gossip_debug(GOSSIP_DIR_DEBUG,
|
|
"pvfs2_readdir called on %s (pos=%llu)\n",
|
|
dentry->d_name.name, llu(pos));
|
|
|
|
rhandle.buffer_index = -1;
|
|
rhandle.dents_buf = NULL;
|
|
memset(&rhandle.readdir_response, 0, sizeof(rhandle.readdir_response));
|
|
|
|
new_op = op_alloc(PVFS2_VFS_OP_READDIR);
|
|
if (!new_op)
|
|
return -ENOMEM;
|
|
|
|
new_op->uses_shared_memory = 1;
|
|
new_op->upcall.req.readdir.refn = pvfs2_inode->refn;
|
|
new_op->upcall.req.readdir.max_dirent_count = MAX_DIRENT_COUNT_READDIR;
|
|
|
|
gossip_debug(GOSSIP_DIR_DEBUG,
|
|
"%s: upcall.req.readdir.refn.khandle: %pU\n",
|
|
__func__,
|
|
&new_op->upcall.req.readdir.refn.khandle);
|
|
|
|
/*
|
|
* NOTE: the position we send to the readdir upcall is out of
|
|
* sync with ctx->pos since:
|
|
* 1. pvfs2 doesn't include the "." and ".." entries that are
|
|
* added below.
|
|
* 2. the introduction of distributed directory logic makes token no
|
|
* longer be related to f_pos and pos. Instead an independent
|
|
* variable is used inside the function and stored in the
|
|
* private_data of the file structure.
|
|
*/
|
|
new_op->upcall.req.readdir.token = *ptoken;
|
|
|
|
get_new_buffer_index:
|
|
ret = readdir_index_get(&bufmap, &buffer_index);
|
|
if (ret < 0) {
|
|
gossip_lerr("pvfs2_readdir: readdir_index_get() failure (%d)\n",
|
|
ret);
|
|
goto out_free_op;
|
|
}
|
|
new_op->upcall.req.readdir.buf_index = buffer_index;
|
|
|
|
ret = service_operation(new_op,
|
|
"pvfs2_readdir",
|
|
get_interruptible_flag(dentry->d_inode));
|
|
|
|
gossip_debug(GOSSIP_DIR_DEBUG,
|
|
"Readdir downcall status is %d. ret:%d\n",
|
|
new_op->downcall.status,
|
|
ret);
|
|
|
|
if (ret == -EAGAIN && op_state_purged(new_op)) {
|
|
/*
|
|
* readdir shared memory aread has been wiped due to
|
|
* pvfs2-client-core restarting, so we must get a new
|
|
* index into the shared memory.
|
|
*/
|
|
gossip_debug(GOSSIP_DIR_DEBUG,
|
|
"%s: Getting new buffer_index for retry of readdir..\n",
|
|
__func__);
|
|
readdir_index_put(bufmap, buffer_index);
|
|
goto get_new_buffer_index;
|
|
}
|
|
|
|
if (ret == -EIO && op_state_purged(new_op)) {
|
|
gossip_err("%s: Client is down. Aborting readdir call.\n",
|
|
__func__);
|
|
readdir_index_put(bufmap, buffer_index);
|
|
goto out_free_op;
|
|
}
|
|
|
|
if (ret < 0 || new_op->downcall.status != 0) {
|
|
gossip_debug(GOSSIP_DIR_DEBUG,
|
|
"Readdir request failed. Status:%d\n",
|
|
new_op->downcall.status);
|
|
readdir_index_put(bufmap, buffer_index);
|
|
if (ret >= 0)
|
|
ret = new_op->downcall.status;
|
|
goto out_free_op;
|
|
}
|
|
|
|
bytes_decoded =
|
|
readdir_handle_ctor(&rhandle,
|
|
new_op->downcall.trailer_buf,
|
|
buffer_index);
|
|
if (bytes_decoded < 0) {
|
|
gossip_err("pvfs2_readdir: Could not decode trailer buffer into a readdir response %d\n",
|
|
ret);
|
|
ret = bytes_decoded;
|
|
readdir_index_put(bufmap, buffer_index);
|
|
goto out_free_op;
|
|
}
|
|
|
|
if (bytes_decoded != new_op->downcall.trailer_size) {
|
|
gossip_err("pvfs2_readdir: # bytes decoded (%ld) != trailer size (%ld)\n",
|
|
bytes_decoded,
|
|
(long)new_op->downcall.trailer_size);
|
|
ret = -EINVAL;
|
|
goto out_destroy_handle;
|
|
}
|
|
|
|
if (pos == 0) {
|
|
ino = get_ino_from_khandle(dentry->d_inode);
|
|
gossip_debug(GOSSIP_DIR_DEBUG,
|
|
"%s: calling dir_emit of \".\" with pos = %llu\n",
|
|
__func__,
|
|
llu(pos));
|
|
ret = dir_emit(ctx, ".", 1, ino, DT_DIR);
|
|
if (ret < 0)
|
|
goto out_destroy_handle;
|
|
ctx->pos++;
|
|
gossip_ldebug(GOSSIP_DIR_DEBUG,
|
|
"%s: ctx->pos:%lld\n",
|
|
__func__,
|
|
lld(ctx->pos));
|
|
pos++;
|
|
}
|
|
|
|
if (pos == 1) {
|
|
ino = get_parent_ino_from_dentry(dentry);
|
|
gossip_debug(GOSSIP_DIR_DEBUG,
|
|
"%s: calling dir_emit of \"..\" with pos = %llu\n",
|
|
__func__,
|
|
llu(pos));
|
|
ret = dir_emit(ctx, "..", 2, ino, DT_DIR);
|
|
if (ret < 0)
|
|
goto out_destroy_handle;
|
|
ctx->pos++;
|
|
gossip_ldebug(GOSSIP_DIR_DEBUG,
|
|
"%s: ctx->pos:%lld\n",
|
|
__func__,
|
|
lld(ctx->pos));
|
|
pos++;
|
|
}
|
|
|
|
for (i = 0; i < rhandle.readdir_response.pvfs_dirent_outcount; i++) {
|
|
len = rhandle.readdir_response.dirent_array[i].d_length;
|
|
current_entry = rhandle.readdir_response.dirent_array[i].d_name;
|
|
current_ino = pvfs2_khandle_to_ino(
|
|
&(rhandle.readdir_response.dirent_array[i].khandle));
|
|
|
|
gossip_debug(GOSSIP_DIR_DEBUG,
|
|
"calling dir_emit for %s with len %d, pos %ld\n",
|
|
current_entry,
|
|
len,
|
|
(unsigned long)pos);
|
|
ret =
|
|
dir_emit(ctx, current_entry, len, current_ino, DT_UNKNOWN);
|
|
if (ret < 0) {
|
|
gossip_debug(GOSSIP_DIR_DEBUG,
|
|
"dir_emit() failed. ret:%d\n",
|
|
ret);
|
|
if (i < 2) {
|
|
gossip_err("dir_emit failed on one of the first two true PVFS directory entries.\n");
|
|
gossip_err("Duplicate entries may appear.\n");
|
|
}
|
|
buffer_full = 1;
|
|
break;
|
|
}
|
|
ctx->pos++;
|
|
gossip_ldebug(GOSSIP_DIR_DEBUG,
|
|
"%s: ctx->pos:%lld\n",
|
|
__func__,
|
|
lld(ctx->pos));
|
|
|
|
pos++;
|
|
}
|
|
|
|
/* this means that all of the dir_emit calls succeeded */
|
|
if (i == rhandle.readdir_response.pvfs_dirent_outcount) {
|
|
/* update token */
|
|
*ptoken = rhandle.readdir_response.token;
|
|
} else {
|
|
/* this means a dir_emit call failed */
|
|
if (rhandle.readdir_response.token == PVFS_READDIR_END) {
|
|
/*
|
|
* If PVFS hit end of directory, then there
|
|
* is no way to do math on the token that it
|
|
* returned. Instead we go by ctx->pos but
|
|
* back up to account for the artificial .
|
|
* and .. entries.
|
|
*/
|
|
ctx->pos -= 3;
|
|
} else {
|
|
/*
|
|
* this means a dir_emit call failed. !!! need to set
|
|
* back to previous ctx->pos, no middle value allowed
|
|
*/
|
|
pos -= (i - 1);
|
|
ctx->pos -= (i - 1);
|
|
}
|
|
gossip_debug(GOSSIP_DIR_DEBUG,
|
|
"at least one dir_emit call failed. Setting ctx->pos to: %lld\n",
|
|
lld(ctx->pos));
|
|
}
|
|
|
|
/*
|
|
* Did we hit the end of the directory?
|
|
*/
|
|
if (rhandle.readdir_response.token == PVFS_READDIR_END &&
|
|
!buffer_full) {
|
|
gossip_debug(GOSSIP_DIR_DEBUG, "End of dir detected; setting ctx->pos to PVFS_READDIR_END.\n");
|
|
ctx->pos = PVFS_READDIR_END;
|
|
}
|
|
|
|
gossip_debug(GOSSIP_DIR_DEBUG,
|
|
"pos = %llu, token = %llu"
|
|
", ctx->pos should have been %lld\n",
|
|
llu(pos),
|
|
llu(*ptoken),
|
|
lld(ctx->pos));
|
|
|
|
out_destroy_handle:
|
|
readdir_handle_dtor(bufmap, &rhandle);
|
|
out_free_op:
|
|
op_release(new_op);
|
|
gossip_debug(GOSSIP_DIR_DEBUG, "pvfs2_readdir returning %d\n", ret);
|
|
return ret;
|
|
}
|
|
|
|
static int pvfs2_dir_open(struct inode *inode, struct file *file)
|
|
{
|
|
__u64 *ptoken;
|
|
|
|
file->private_data = kmalloc(sizeof(__u64), GFP_KERNEL);
|
|
if (!file->private_data)
|
|
return -ENOMEM;
|
|
|
|
ptoken = file->private_data;
|
|
*ptoken = PVFS_READDIR_START;
|
|
return 0;
|
|
}
|
|
|
|
static int pvfs2_dir_release(struct inode *inode, struct file *file)
|
|
{
|
|
pvfs2_flush_inode(inode);
|
|
kfree(file->private_data);
|
|
return 0;
|
|
}
|
|
|
|
/** PVFS2 implementation of VFS directory operations */
|
|
const struct file_operations pvfs2_dir_operations = {
|
|
.read = generic_read_dir,
|
|
.iterate = pvfs2_readdir,
|
|
.open = pvfs2_dir_open,
|
|
.release = pvfs2_dir_release,
|
|
};
|