gfapi/handleops: Introducing glfs_xreaddirplus_r() fop for handleops

Its known that readdirplus operation fetches stat as well for each of the
dirents. But often applications may need extra information, like for eg.,
NFS-Ganesha which operates on handles needs handles for each of those
dirents returned. So this would require extra calls to the backend, in this
case LOOKUP (which is very expensive operation) resulting in very low
readdir performance.

To address that introducing this new API using which applications can
make request for any extra information to be returned as part of
readdirplus response.

Currently this new api returns stat and handles as demanded by application.
The synopsis of the API is noted in glfs.h.

@todo:
* Enhance test script using this new API

Below were the perf results on single brick volume with and without
these changes -

Dataset used -
10*100 directories and each directory containing 100 empty files.

I used NFS-Ganesha application to test these changes -
>for i in {1..5}; do systemctl restart nfs-ganesha; sleep 10; mount -t nfs -o vers=4 localhost:/brick_vol /mnt; cd /mnt; echo "ITERATION$i"; date; find . > tmp-nfs.log; date; cd /; umount /mnt; sleep 2; done;

Without these changes -
ITERATION1
Mon Mar 20 17:22:26 IST 2017
Mon Mar 20 17:23:18 IST 2017
ITERATION2
Mon Mar 20 17:23:39 IST 2017
Mon Mar 20 17:24:28 IST 2017
ITERATION3
Mon Mar 20 17:24:49 IST 2017
Mon Mar 20 17:25:36 IST 2017
ITERATION4
Mon Mar 20 17:30:57 IST 2017
Mon Mar 20 17:31:37 IST 2017
ITERATION5
Mon Mar 20 17:31:57 IST 2017
Mon Mar 20 17:32:40 IST 2017
[root@dhcp35-197 /]#

On an average ~46.2 sec

With these changes applied -
ITERATION1
Mon Mar 20 17:35:03 IST 2017
Mon Mar 20 17:35:15 IST 2017
ITERATION2
Mon Mar 20 17:35:36 IST 2017
Mon Mar 20 17:35:46 IST 2017
ITERATION3
Mon Mar 20 17:36:06 IST 2017
Mon Mar 20 17:36:17 IST 2017
ITERATION4
Mon Mar 20 17:41:38 IST 2017
Mon Mar 20 17:41:49 IST 2017
ITERATION5
Mon Mar 20 17:42:10 IST 2017
Mon Mar 20 17:42:20 IST 2017

On an average ~10.8 sec

Updates #174
BUG: 1442950
Change-Id: I0f74f74dc62085ca4c4a23c38e3edc84bd850876
Signed-off-by: Soumya Koduri <skoduri@redhat.com>
Reviewed-on: https://review.gluster.org/15663
Smoke: Gluster Build System <jenkins@build.gluster.org>
NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
Reviewed-by: Niels de Vos <ndevos@redhat.com>
CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
This commit is contained in:
Soumya Koduri 2017-04-21 16:30:20 +05:30 committed by Jeff Darcy
parent 316e3300cf
commit 41000cd0b5
12 changed files with 437 additions and 4 deletions

View File

@ -40,7 +40,7 @@
*/
#define GLFS_GFAPI_BASE GLFS_MSGID_COMP_API
#define GLFS_NUM_MESSAGES 49
#define GLFS_NUM_MESSAGES 50
#define GLFS_MSGID_END (GLFS_GFAPI_BASE + GLFS_NUM_MESSAGES + 1)
/* Messages with message IDs */
#define glfs_msg_start_x GLFS_GFAPI_BASE, "Invalid: Start of messages"
@ -95,6 +95,7 @@
#define API_MSG_CREATE_HANDLE_FAILED (GLFS_GFAPI_BASE + 47)
#define API_MSG_INODE_LINK_FAILED (GLFS_GFAPI_BASE + 48)
#define API_MSG_STATEDUMP_FAILED (GLFS_GFAPI_BASE + 49)
#define API_MSG_XREADDIRP_R_FAILED (GLFS_GFAPI_BASE + 50)
/*------------*/
#define glfs_msg_end_x GLFS_MSGID_END, "Invalid: End of messages"

View File

@ -158,4 +158,9 @@ _pub_glfs_realpath _glfs_realpath$GFAPI_3.7.17
_pub_glfs_sysrq _glfs_sysrq$GFAPI_3.10.0
_pub_glfs_xreaddirplus_r _glfs_xreaddirplus_r$GFAPI_3.11.0
_pub_glfs_xreaddirplus_r_get_stat _glfs_xreaddirplus_r_get_stat$GFAPI_3.11.0
_pub_glfs_xreaddirplus_r_get_object _glfs_xreaddirplus_r_get_object$GFAPI_3.11.0
_pub_glfs_object_copy _glfs_object_copy$GFAPI_3.11.0
_pub_glfs_ipc _glfs_ipc$GFAPI_4.0.0

View File

@ -199,7 +199,14 @@ GFAPI_3.10.0 {
glfs_sysrq;
} GFAPI_3.7.17;
GFAPI_3.11.0 {
glfs_xreaddirplus_r;
glfs_xreaddirplus_r_get_stat;
glfs_xreaddirplus_r_get_object;
glfs_object_copy;
} GFAPI_3.10.0;
GFAPI_4.0.0 {
global:
glfs_ipc;
} GFAPI_3.10.0;
} GFAPI_3.11.0;

View File

@ -2792,7 +2792,7 @@ glfd_entry_next (struct glfs_fd *glfd, int plus)
}
static struct dirent *
struct dirent *
glfs_readdirbuf_get (struct glfs_fd *glfd)
{
struct dirent *buf = NULL;
@ -4608,3 +4608,138 @@ out:
invalid_fs:
return ret;
}
/*
* Given glfd of a directory, this function does readdirp and returns
* xstat along with dirents.
*/
int
pub_glfs_xreaddirplus_r (struct glfs_fd *glfd, uint32_t flags,
struct glfs_xreaddirp_stat **xstat_p,
struct dirent *ext,
struct dirent **res)
{
int ret = -1;
gf_dirent_t *entry = NULL;
struct dirent *buf = NULL;
struct glfs_xreaddirp_stat *xstat = NULL;
DECLARE_OLD_THIS;
__GLFS_ENTRY_VALIDATE_FD (glfd, invalid_fs);
GF_REF_GET (glfd);
GF_VALIDATE_OR_GOTO (THIS->name, xstat_p, out);
GF_VALIDATE_OR_GOTO (THIS->name, res, out);
errno = 0;
if (ext)
buf = ext;
else
buf = glfs_readdirbuf_get (glfd);
if (!buf)
goto out;
xstat = GF_CALLOC(1, sizeof(struct glfs_xreaddirp_stat),
glfs_mt_xreaddirp_stat_t);
if (!xstat)
goto out;
/* this is readdirplus operation */
entry = glfd_entry_next (glfd, 1);
/* XXX: Ideally when we reach EOD, errno should have been
* set to ENOENT. But that doesn't seem to be the case.
*
* The only way to confirm if its EOD at this point is that
* errno == 0 and entry == NULL
*/
if (errno)
goto out;
if (!entry) {
/* reached EOD, ret = 0 */
ret = 0;
*res = NULL;
goto out;
}
*res = buf;
gf_dirent_to_dirent (entry, buf);
if (flags & GFAPI_XREADDIRP_STAT) {
glfs_iatt_to_stat (glfd->fs, &entry->d_stat, &xstat->st);
xstat->flags_handled |= GFAPI_XREADDIRP_STAT;
}
if ((flags & GFAPI_XREADDIRP_HANDLE) &&
/* skip . and .. */
strcmp(buf->d_name, ".")
&& strcmp(buf->d_name, "..")) {
/* Now create object.
* We can use "glfs_h_find_handle" as well as inodes would have
* already got linked as part of 'gf_link_inodes_from_dirent' */
xstat->object = glfs_h_create_from_handle (glfd->fs,
entry->d_stat.ia_gfid,
GFAPI_HANDLE_LENGTH,
NULL);
if (xstat->object) { /* success */
/* note: xstat->object->inode->ref is taken
* This shall be unref'ed when application does
* glfs_free(xstat) */
xstat->flags_handled |= GFAPI_XREADDIRP_HANDLE;
}
}
ret = xstat->flags_handled;
*xstat_p = xstat;
out:
gf_msg_debug (THIS->name, 0,
"xreaddirp- requested_flags (%x) , processed_flags (%x)",
flags, xstat->flags_handled);
GF_REF_PUT (glfd);
if (ret < 0) {
gf_msg (THIS->name, GF_LOG_WARNING, errno,
API_MSG_XREADDIRP_R_FAILED,
"glfs_x_readdirp_r failed - reason (%s)",
strerror(errno));
if (xstat)
glfs_free (xstat);
}
__GLFS_EXIT_FS;
return ret;
invalid_fs:
return -1;
}
GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_xreaddirplus_r, 3.11.0);
struct stat*
pub_glfs_xreaddirplus_get_stat (struct glfs_xreaddirp_stat *xstat)
{
GF_VALIDATE_OR_GOTO ("glfs_xreaddirplus_get_stat", xstat, out);
if (!xstat->flags_handled & GFAPI_XREADDIRP_STAT)
gf_msg (THIS->name, GF_LOG_ERROR, errno,
LG_MSG_INVALID_ARG,
"GFAPI_XREADDIRP_STAT is not set. Flags"
"handled for xstat(%p) are (%x)",
xstat, xstat->flags_handled);
return &xstat->st;
out:
return NULL;
}
GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_xreaddirplus_get_stat, 3.11.0);

View File

@ -2379,3 +2379,48 @@ pub_glfs_h_anonymous_write (struct glfs *fs, struct glfs_object *object,
}
GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_anonymous_write, 3.7.0);
struct glfs_object*
pub_glfs_object_copy (struct glfs_object *src)
{
struct glfs_object *object = NULL;
GF_VALIDATE_OR_GOTO ("glfs_dup_object", src, out);
object = GF_CALLOC (1, sizeof(struct glfs_object),
glfs_mt_glfs_object_t);
if (object == NULL) {
errno = ENOMEM;
gf_msg (THIS->name, GF_LOG_WARNING, errno,
API_MSG_CREATE_HANDLE_FAILED,
"glfs_dup_object for gfid-%s failed",
uuid_utoa (src->inode->gfid));
return NULL;
}
object->inode = inode_ref (src->inode);
gf_uuid_copy (object->gfid, src->inode->gfid);
out:
return object;
}
GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_object_copy, 3.11.0);
struct glfs_object*
pub_glfs_xreaddirplus_get_object (struct glfs_xreaddirp_stat *xstat)
{
GF_VALIDATE_OR_GOTO ("glfs_xreaddirplus_get_object", xstat, out);
if (!(xstat->flags_handled & GFAPI_XREADDIRP_HANDLE))
gf_msg (THIS->name, GF_LOG_ERROR, errno,
LG_MSG_INVALID_ARG,
"GFAPI_XREADDIRP_HANDLE is not set. Flags"
"handled for xstat(%p) are (%x)",
xstat, xstat->flags_handled);
return xstat->object;
out:
return NULL;
}
GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_xreaddirplus_get_object, 3.11.0);

View File

@ -355,6 +355,22 @@ glfs_h_anonymous_read (struct glfs *fs, struct glfs_object *object,
const void *buf, size_t count, off_t offset) __THROW
GFAPI_PUBLIC(glfs_h_anonymous_read, 3.7.0);
/*
* Caution: The object returned by this object gets freed as part
* of 'glfs_free(xstat)'. Make sure to have a copy using 'glfs_object_copy()'
* to use post that.
*/
struct glfs_object*
glfs_xreaddirplus_get_object (struct glfs_xreaddirp_stat *xstat) __THROW
GFAPI_PUBLIC(glfs_xreaddirplus_get_object, 3.11.0);
/* Applications should close the object returned by this routine
* explicitly using 'glfs_h_close()'
*/
struct glfs_object*
glfs_object_copy (struct glfs_object *src);
GFAPI_PUBLIC(glfs_object_copy, 3.11.0);
__END_DECLS
#endif /* !_GLFS_HANDLES_H */

View File

@ -245,6 +245,12 @@ struct glfs_upcall_inode {
struct stat oldp_buf; /* Latest stat of old parent dir handle */
};
struct glfs_xreaddirp_stat {
struct stat st; /* Stat for that dirent - corresponds to GFAPI_XREADDIRP_STAT */
struct glfs_object *object; /* handled for GFAPI_XREADDIRP_HANDLE */
uint32_t flags_handled; /* final set of flags successfulyy handled */
};
#define DEFAULT_EVENT_POOL_SIZE 16384
#define GF_MEMPOOL_COUNT_OF_DICT_T 4096
#define GF_MEMPOOL_COUNT_OF_DATA_T (GF_MEMPOOL_COUNT_OF_DICT_T * 4)
@ -445,7 +451,6 @@ glfs_anonymous_pwritev (struct glfs *fs, struct glfs_object *object,
struct glfs_object *
glfs_h_resolve_symlink (struct glfs *fs, struct glfs_object *object);
/* Deprecated structures that were passed to client applications, replaced by
* accessor functions. Do not use these in new applications, and update older
* usage.
@ -475,5 +480,13 @@ struct glfs_callback_inode_arg {
struct stat oldp_buf; /* Latest stat of old parent
* dir handle */
};
struct dirent *
glfs_readdirbuf_get (struct glfs_fd *glfd);
gf_dirent_t *
glfd_entry_next (struct glfs_fd *glfd, int plus);
void
gf_dirent_to_dirent (gf_dirent_t *gf_dirent, struct dirent *dirent);
#endif /* !_GLFS_INTERNAL_H */

View File

@ -29,6 +29,7 @@ enum glfs_mem_types_ {
glfs_mt_acl_t,
glfs_mt_upcall_inode_t,
glfs_mt_realpath_t,
glfs_mt_xreaddirp_stat_t,
glfs_mt_end
};
#endif

View File

@ -1375,6 +1375,16 @@ pub_glfs_free (void *ptr)
GF_FREE (ptr);
break;
}
case glfs_mt_xreaddirp_stat_t:
{
struct glfs_xreaddirp_stat *to_free = ptr;
if (to_free->object)
glfs_h_close (to_free->object);
GF_FREE (ptr);
break;
}
default:
GF_FREE (ptr);
}

View File

@ -41,6 +41,7 @@
#include <sys/cdefs.h>
#include <dirent.h>
#include <sys/statvfs.h>
#include <inttypes.h>
#if defined(HAVE_SYS_ACL_H) || (defined(USE_POSIX_ACLS) && USE_POSIX_ACLS)
#include <sys/acl.h>
@ -789,6 +790,71 @@ int glfs_sysrq (glfs_t *fs, char sysrq) __THROW
#define GLFS_SYSRQ_STATEDUMP 's' /* create a statedump */
/*
* Structure returned as part of xreaddirplus
*/
struct glfs_xreaddirp_stat;
/* Request flags to be used in XREADDIRP operation */
#define GFAPI_XREADDIRP_NULL 0x00000000 /* by default, no stat will be fetched */
#define GFAPI_XREADDIRP_STAT 0x00000001 /* Get stat */
#define GFAPI_XREADDIRP_HANDLE 0x00000002 /* Get object handle */
/*
* This stat structure returned gets freed as part of glfs_free(xstat)
*/
struct stat*
glfs_xreaddirplus_get_stat (struct glfs_xreaddirp_stat *xstat) __THROW
GFAPI_PUBLIC(glfs_xreaddirplus_get_stat, 3.11.0);
/*
* SYNOPSIS
*
* glfs_xreaddirplus_r: Extended Readirplus operation
*
* DESCRIPTION
*
* This API does readdirplus operation, but along with stat it can fetch other
* extra information like object handles etc for each of the dirents returned
* based on requested flags. On success it returns the set of flags successfully
* processed.
*
* Note that there are chances that some of the requested information may not be
* available or returned (for example if reached EOD). Ensure to validate the
* returned value to determine what flags have been successfully processed
* & set.
*
* PARAMETERS
*
* INPUT:
* @glfd: GFAPI file descriptor of the directory
* @flags: Flags determining xreaddirp_stat requested
* Current available values are:
* GFAPI_XREADDIRP_NULL
* GFAPI_XREADDIRP_STAT
* GFAPI_XREADDIRP_HANDLE
* @ext: Dirent struture to copy the values to
* (though optional recommended to be allocated by application
* esp., in multi-threaded environement)
*
* OUTPUT:
* @res: to store the next dirent value. If NULL and return value is '0',
* it means it reached end of the directory.
* @xstat_p: Pointer to contain all the requested data returned
* for that dirent. Application should make use of glfs_free() API
* to free this pointer and the variables returned by
* glfs_xreaddirplus_get_*() APIs.
*
* RETURN VALUE:
* >=0: SUCCESS (value contains the flags successfully processed)
* -1: FAILURE
*/
int
glfs_xreaddirplus_r (struct glfs_fd *glfd, uint32_t flags,
struct glfs_xreaddirp_stat **xstat_p,
struct dirent *ext, struct dirent **res);
GFAPI_PUBLIC(glfs_xreaddirplus_r, 3.11.0);
/*
* Nobody needs this call at all yet except for the test script.
*/

View File

@ -0,0 +1,107 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <glusterfs/api/glfs.h>
#include <glusterfs/api/glfs-handles.h>
#define VALIDATE_AND_GOTO_LABEL_ON_ERROR(func, ret, label) do { \
if (ret < 0) { \
fprintf (stderr, "%s : returned error %d (%s)\n", \
func, ret, strerror (errno)); \
goto label; \
} \
} while (0)
#define MAX_FILES_CREATE 10
#define MAXPATHNAME 512
int
main (int argc, char *argv[])
{
int ret = -1;
glfs_t *fs = NULL;
char *volname = NULL;
char *logfile = NULL;
char *hostname = NULL;
char *my_file = "file_";
char my_file_name[MAXPATHNAME];
struct dirent de;
struct dirent *pde = NULL;
struct glfs_xreaddirp_stat *xstat = NULL;
uint32_t rflags = (GFAPI_XREADDIRP_STAT |
GFAPI_XREADDIRP_HANDLE);
uint32_t flags = O_RDWR|O_SYNC;
struct glfs_fd *fd = NULL;
int i = 0;
if (argc != 4) {
fprintf (stderr, "Invalid argument\n");
return 1;
}
hostname = argv[1];
volname = argv[2];
logfile = argv[3];
fs = glfs_new (volname);
if (!fs)
VALIDATE_AND_GOTO_LABEL_ON_ERROR ("glfs_new", ret, out);
ret = glfs_set_volfile_server (fs, "tcp", hostname, 24007);
VALIDATE_AND_GOTO_LABEL_ON_ERROR ("glfs_set_volfile_server", ret, out);
ret = glfs_set_logging (fs, logfile, 7);
VALIDATE_AND_GOTO_LABEL_ON_ERROR ("glfs_set_logging", ret, out);
ret = glfs_init (fs);
VALIDATE_AND_GOTO_LABEL_ON_ERROR ("glfs_init", ret, out);
for (i = 0; i < MAX_FILES_CREATE; i++) {
sprintf (my_file_name, "%s%d", my_file, i);
fd = glfs_creat(fs, my_file_name, flags, 0644);
if (fd == NULL) {
ret = -1;
VALIDATE_AND_GOTO_LABEL_ON_ERROR ("glfs_creat", ret,
out);
}
glfs_close (fd);
}
/* XXX: measure performance and memory usage of this readdirp call */
fd = glfs_opendir (fs, "/");
ret = glfs_xreaddirplus_r(fd, rflags, &xstat, &de, &pde);
while (ret > 0 && pde != NULL) {
fprintf (stderr, "%s: %lu\n", de.d_name, glfs_telldir (fd));
if (xstat)
glfs_free(xstat);
ret = glfs_xreaddirplus_r(fd, rflags, &xstat, &de, &pde);
/* XXX: Use other APIs to fetch stat and handles */
}
if (xstat)
glfs_free(xstat);
VALIDATE_AND_GOTO_LABEL_ON_ERROR ("glfs_xreaddirp_r", ret, out);
out:
if (fd != NULL)
glfs_close(fd);
if (fs) {
ret = glfs_fini(fs);
if (ret)
fprintf (stderr, "glfs_fini(fs) returned %d\n", ret);
}
return ret;
}

View File

@ -0,0 +1,27 @@
#!/bin/bash
. $(dirname $0)/../../include.rc
. $(dirname $0)/../../volume.rc
cleanup;
TEST glusterd
TEST $CLI volume create $V0 $H0:$B0/brick1;
EXPECT 'Created' volinfo_field $V0 'Status';
TEST $CLI volume start $V0;
EXPECT 'Started' volinfo_field $V0 'Status';
logdir=`gluster --print-logdir`
TEST build_tester $(dirname $0)/glfs_xreaddirplus_r.c -lgfapi
TEST $(dirname $0)/glfs_xreaddirplus_r $H0 $V0 $logdir/glfs_xreaddirplus_r.log
cleanup_tester $(dirname $0)/glfs_xreaddirplus_r
TEST $CLI volume stop $V0
TEST $CLI volume delete $V0
cleanup;