debug/io-stats: Add FOP sampling feature

Summary:
- Using sampling feature you can record details about every Nth FOP.
  The fields in each sample are: FOP type, hostname, uid, gid, FOP priority,
  port and time taken (latency) to fufill the request.
- Implemented using a ring buffer which is not (m/c) allocated in the IO path,
  this should make the sampling process pretty cheap.
- DNS resolution done @ dump time not @ sample time for performance w/
  cache
- Metrics can be used for both diagnostics, traffic/IO profiling as well
  as P95/P99 calculations
- To control this feature there are two new volume options:
  diagnostics.fop-sample-interval - The sampling interval, e.g. 1 means
  sample every FOP, 100 means sample every 100th FOP
  diagnostics.fop-sample-buf-size - The size (in bytes) of the ring
  buffer used to store the samples.  In the even more samples
  are collected in the stats dump interval than can be held in this buffer,
  the oldest samples shall be discarded.  Samples are stored in the log
  directory under /var/log/glusterfs/samples.
- Uses DNS cache written by sshreyas@fb.com (Thank-you!), the DNS cache
  TTL is controlled by the diagnostics.stats-dnscache-ttl-sec option
  and defaults to 24hrs.

Test Plan:
- Valgrind'd to ensure it's leak free
- Run prove test(s)
- Shadow testing on 100+ brick cluster

Change-Id: I9ee14c2fa18486b7efb38e59f70687249d3f96d8
BUG: 1271310
Signed-off-by: Jeff Darcy <jdarcy@redhat.com>
Reviewed-on: http://review.gluster.org/12210
Tested-by: Gluster Build System <jenkins@build.gluster.com>
Reviewed-by: Vijay Bellur <vbellur@redhat.com>
This commit is contained in:
Richard Wareing 2015-06-23 17:03:11 -07:00 committed by Vijay Bellur
parent 846697d91e
commit d3e496cbcd
14 changed files with 863 additions and 34 deletions

View File

@ -26,3 +26,12 @@
/usr/bin/killall -HUP glusterfsd > /dev/null 2>&1 || true
endscript
}
/var/log/glusterfs/samples/*.samp {
daily
rotate 3
sharedscripts
missingok
compress
delaycompress
}

View File

@ -34,6 +34,7 @@
#if defined(GF_BSD_HOST_OS) || defined(GF_DARWIN_HOST_OS)
#include <sys/sysctl.h>
#endif
#include <libgen.h>
#include "compat-errno.h"
#include "logging.h"
@ -210,7 +211,7 @@ out:
}
/**
* gf_resolve_parent_path -- Given a path, returns an allocated string
* gf_resolve_path_parent -- Given a path, returns an allocated string
* containing the parent's path.
* @path: Path to parse
* @return: The parent path if found, NULL otherwise
@ -359,6 +360,135 @@ err:
return -1;
}
/**
* gf_dnscache_init -- Initializes a dnscache struct and sets the ttl
* to the specified value in the parameter.
*
* @ttl: the TTL in seconds
* @return: SUCCESS: Pointer to an allocated dnscache struct
* FAILURE: NULL
*/
struct dnscache *
gf_dnscache_init (time_t ttl)
{
struct dnscache *cache = GF_MALLOC (sizeof (*cache),
gf_common_mt_dnscache);
cache->cache_dict = NULL;
cache->ttl = ttl;
return cache;
}
/**
* gf_dnscache_entry_init -- Initialize a dnscache entry
*
* @return: SUCCESS: Pointer to an allocated dnscache entry struct
* FAILURE: NULL
*/
struct dnscache_entry *
gf_dnscache_entry_init ()
{
struct dnscache_entry *entry = GF_CALLOC (1, sizeof (*entry),
gf_common_mt_dnscache_entry);
return entry;
}
/**
* gf_dnscache_entry_deinit -- Free memory used by a dnscache entry
*
* @entry: Pointer to deallocate
*/
void
gf_dnscache_entry_deinit (struct dnscache_entry *entry)
{
GF_FREE (entry->ip);
GF_FREE (entry->fqdn);
GF_FREE (entry);
}
/**
* gf_rev_dns_lookup -- Perform a reverse DNS lookup on the IP address.
*
* @ip: The IP address to perform a reverse lookup on
*
* @return: success: Allocated string containing the hostname
* failure: NULL
*/
char *
gf_rev_dns_lookup_cached (const char *ip, struct dnscache *dnscache)
{
char *fqdn = NULL;
int ret = 0;
dict_t *cache = NULL;
data_t *entrydata = NULL;
struct dnscache_entry *dnsentry = NULL;
gf_boolean_t from_cache = _gf_false;
if (!dnscache)
goto out;
if (!dnscache->cache_dict) {
dnscache->cache_dict = dict_new ();
if (!dnscache->cache_dict) {
goto out;
}
}
cache = dnscache->cache_dict;
/* Quick cache lookup to see if we already hold it */
entrydata = dict_get (cache, (char *)ip);
if (entrydata) {
dnsentry = (struct dnscache_entry *)entrydata->data;
/* First check the TTL & timestamp */
if (time (NULL) - dnsentry->timestamp > dnscache->ttl) {
gf_dnscache_entry_deinit (dnsentry);
entrydata->data = NULL; /* Mark this as 'null' so
* dict_del () doesn't try free
* this after we've already
* freed it.
*/
dict_del (cache, (char *)ip); /* Remove this entry */
} else {
/* Cache entry is valid, get the FQDN and return */
fqdn = dnsentry->fqdn;
from_cache = _gf_true; /* Mark this as from cache */
goto out;
}
}
/* Get the FQDN */
ret = gf_get_hostname_from_ip ((char *)ip, &fqdn);
if (ret != 0)
goto out;
if (!fqdn) {
gf_log_callingfn ("resolver", GF_LOG_CRITICAL,
"Allocation failed for the host address");
goto out;
}
from_cache = _gf_false;
out:
/* Insert into the cache */
if (fqdn && !from_cache) {
struct dnscache_entry *entry = gf_dnscache_entry_init ();
if (!entry) {
goto out;
}
entry->fqdn = fqdn;
entry->ip = gf_strdup (ip);
if (!ip) {
gf_dnscache_entry_deinit (entry);
goto out;
}
entry->timestamp = time (NULL);
entrydata = bin_to_data (entry, sizeof (*entry));
dict_set (cache, (char *)ip, entrydata);
}
return fqdn;
}
struct xldump {
int lineno;
@ -4012,3 +4142,131 @@ _unmask_cancellation (void)
{
(void) pthread_setcancelstate (PTHREAD_CANCEL_ENABLE, NULL);
}
const char *
fop_enum_to_pri_string (glusterfs_fop_t fop)
{
switch (fop) {
case GF_FOP_OPEN:
case GF_FOP_STAT:
case GF_FOP_FSTAT:
case GF_FOP_LOOKUP:
case GF_FOP_ACCESS:
case GF_FOP_READLINK:
case GF_FOP_OPENDIR:
case GF_FOP_STATFS:
case GF_FOP_READDIR:
case GF_FOP_READDIRP:
return "HIGH";
case GF_FOP_CREATE:
case GF_FOP_FLUSH:
case GF_FOP_LK:
case GF_FOP_INODELK:
case GF_FOP_FINODELK:
case GF_FOP_ENTRYLK:
case GF_FOP_FENTRYLK:
case GF_FOP_UNLINK:
case GF_FOP_SETATTR:
case GF_FOP_FSETATTR:
case GF_FOP_MKNOD:
case GF_FOP_MKDIR:
case GF_FOP_RMDIR:
case GF_FOP_SYMLINK:
case GF_FOP_RENAME:
case GF_FOP_LINK:
case GF_FOP_SETXATTR:
case GF_FOP_GETXATTR:
case GF_FOP_FGETXATTR:
case GF_FOP_FSETXATTR:
case GF_FOP_REMOVEXATTR:
case GF_FOP_FREMOVEXATTR:
case GF_FOP_IPC:
return "NORMAL";
case GF_FOP_READ:
case GF_FOP_WRITE:
case GF_FOP_FSYNC:
case GF_FOP_TRUNCATE:
case GF_FOP_FTRUNCATE:
case GF_FOP_FSYNCDIR:
case GF_FOP_XATTROP:
case GF_FOP_FXATTROP:
case GF_FOP_RCHECKSUM:
case GF_FOP_ZEROFILL:
case GF_FOP_FALLOCATE:
return "LOW";
case GF_FOP_NULL:
case GF_FOP_FORGET:
case GF_FOP_RELEASE:
case GF_FOP_RELEASEDIR:
case GF_FOP_GETSPEC:
case GF_FOP_MAXVALUE:
case GF_FOP_DISCARD:
return "LEAST";
}
return "UNKNOWN";
}
const char *
fop_enum_to_string (glusterfs_fop_t fop)
{
static const char *const str_map[] = {
"NULL",
"STAT",
"READLINK",
"MKNOD",
"MKDIR",
"UNLINK",
"RMDIR",
"SYMLINK",
"RENAME",
"LINK",
"TRUNCATE",
"OPEN",
"READ",
"WRITE",
"STATFS",
"FLUSH",
"FSYNC",
"SETXATTR",
"GETXATTR",
"REMOVEXATTR",
"OPENDIR",
"FSYNCDIR",
"ACCESS",
"CREATE",
"FTRUNCATE",
"FSTAT",
"LK",
"LOOKUP",
"READDIR",
"INODELK",
"FINODELK",
"ENTRYLK",
"FENTRYLK",
"XATTROP",
"FXATTROP",
"FGETXATTR",
"FSETXATTR",
"RCHECKSUM",
"SETATTR",
"FSETATTR",
"READDIRP",
"FORGET",
"RELEASE",
"RELEASEDIR",
"GETSPEC",
"FREMOVEXATTR",
"FALLOCATE",
"DISCARD",
"ZEROFILL",
"IPC",
"MAXVALUE"};
if (fop <= GF_FOP_MAXVALUE)
return str_map[fop];
return "UNKNOWNFOP";
}

View File

@ -159,6 +159,27 @@ typedef struct dht_changelog_rename_info {
typedef int (*gf_cmp) (void *, void *);
struct _dict;
struct dnscache {
struct _dict *cache_dict;
time_t ttl;
};
struct dnscache_entry {
char *ip;
char *fqdn;
time_t timestamp;
};
struct dnscache *gf_dnscache_init (time_t ttl);
struct dnscache_entry *gf_dnscache_entry_init ();
void gf_dnscache_entry_deinit (struct dnscache_entry *entry);
char *gf_rev_dns_lookup_cached (const char *ip, struct dnscache *dnscache);
char *gf_resolve_path_parent (const char *path);
void gf_global_variable_init(void);
int32_t gf_resolve_ip6 (const char *hostname, uint16_t port, int family,

View File

@ -258,6 +258,10 @@
/* NOTE: add members ONLY at the end (just before _MAXVALUE) */
/*
* OTHER NOTE: fop_enum_to_str and fop_enum_to_pri_str (in common-utils.h) also
* contain lists of fops, so if you update this list UPDATE THOSE TOO.
*/
typedef enum {
GF_FOP_NULL = 0,
GF_FOP_STAT,
@ -312,6 +316,8 @@ typedef enum {
GF_FOP_MAXVALUE,
} glusterfs_fop_t;
const char *fop_enum_to_pri_string (glusterfs_fop_t fop);
const char *fop_enum_to_string (glusterfs_fop_t fop);
typedef enum {
GF_MGMT_NULL = 0,

View File

@ -128,6 +128,8 @@ enum gf_common_mem_types_ {
gf_common_mt_ereg,
gf_common_mt_wr,
gf_common_mt_rdma_arena_mr,
gf_common_mt_dnscache = 115,
gf_common_mt_dnscache_entry = 116,
gf_common_mt_parser_t,
gf_common_quota_meta_t,
/*related to gfdb library*/

View File

@ -96,6 +96,7 @@ struct _call_stack_t {
uid_t uid;
gid_t gid;
pid_t pid;
char identifier[UNIX_PATH_MAX];
uint16_t ngrps;
uint32_t groups_small[SMALL_GROUP_COUNT];
uint32_t *groups_large;

View File

@ -0,0 +1,61 @@
#!/bin/bash
#
. $(dirname $0)/../include.rc
. $(dirname $0)/../volume.rc
SAMPLE_FILE="$(gluster --print-logdir)/samples/glusterfs_${V0}.samp"
function print_cnt() {
local FOP_TYPE=$1
local FOP_CNT=$(grep ,${FOP_TYPE} ${SAMPLE_FILE} | wc -l)
echo $FOP_CNT
}
# Verify we got non-zero counts for stats/lookup/readdir
check_samples() {
STAT_CNT=$(print_cnt STAT)
if [ "$STAT_CNT" -le "0" ]; then
echo "STAT count is zero"
return
fi
LOOKUP_CNT=$(print_cnt LOOKUP)
if [ "$LOOKUP_CNT" -le "0" ]; then
echo "LOOKUP count is zero"
return
fi
READDIR_CNT=$(print_cnt READDIR)
if [ "$READDIR_CNT" -le "0" ]; then
echo "READDIR count is zero"
return
fi
echo "OK"
}
cleanup;
TEST glusterd
TEST pidof glusterd
TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
TEST $CLI volume set $V0 nfs.disable off
TEST $CLI volume set $V0 diagnostics.latency-measurement on
TEST $CLI volume set $V0 diagnostics.count-fop-hits on
TEST $CLI volume set $V0 diagnostics.stats-dump-interval 2
TEST $CLI volume set $V0 diagnostics.fop-sample-buf-size 65535
TEST $CLI volume set $V0 diagnostics.fop-sample-interval 1
TEST $CLI volume set $V0 diagnostics.stats-dnscache-ttl-sec 3600
TEST $CLI volume start $V0
TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0
for i in {1..5}
do
dd if=/dev/zero of=${M0}/testfile$i bs=4k count=1
done
TEST ls -l $M0
EXPECT_WITHIN 6 "OK" check_samples
cleanup

View File

@ -20,6 +20,8 @@ enum gf_io_stats_mem_types_ {
gf_io_stats_mt_ios_fd,
gf_io_stats_mt_ios_stat,
gf_io_stats_mt_ios_stat_list,
gf_io_stats_mt_ios_sample_buf,
gf_io_stats_mt_ios_sample,
gf_io_stats_mt_end
};
#endif

View File

@ -34,8 +34,12 @@
#include "logging.h"
#include "cli1-xdr.h"
#include "statedump.h"
#include <pwd.h>
#include <grp.h>
#define MAX_LIST_MEMBERS 100
#define DEFAULT_PWD_BUF_SZ 16384
#define DEFAULT_GRP_BUF_SZ 16384
typedef enum {
IOS_STATS_TYPE_NONE,
@ -82,6 +86,25 @@ struct ios_stat_head {
struct ios_stat_list *iosstats;
};
typedef struct _ios_sample_t {
uid_t uid;
gid_t gid;
char identifier[UNIX_PATH_MAX];
glusterfs_fop_t fop_type;
struct timeval timestamp;
double elapsed;
} ios_sample_t;
typedef struct _ios_sample_buf_t {
uint64_t pos; /* Position in write buffer */
uint64_t size; /* Size of ring buffer */
uint64_t collected; /* Number of samples we've collected */
uint64_t observed; /* Number of FOPs we've observed */
ios_sample_t *ios_samples; /* Our list of samples */
} ios_sample_buf_t;
struct ios_lat {
double min;
double max;
@ -102,7 +125,6 @@ struct ios_global_stats {
struct timeval max_openfd_time;
};
struct ios_conf {
gf_lock_t lock;
struct ios_global_stats cumulative;
@ -116,6 +138,12 @@ struct ios_conf {
int32_t ios_dump_interval;
pthread_t dump_thread;
gf_boolean_t dump_thread_should_die;
gf_lock_t ios_sampling_lock;
int32_t ios_sample_interval;
int32_t ios_sample_buf_size;
ios_sample_buf_t *ios_sample_buf;
struct dnscache *dnscache;
int32_t ios_dnscache_ttl_sec;
};
@ -133,7 +161,8 @@ typedef enum {
IOS_DUMP_TYPE_FILE = 1,
IOS_DUMP_TYPE_DICT = 2,
IOS_DUMP_TYPE_JSON_FILE = 3,
IOS_DUMP_TYPE_MAX = 4
IOS_DUMP_TYPE_SAMPLES = 4,
IOS_DUMP_TYPE_MAX = 5
} ios_dump_type_t;
struct ios_dump_args {
@ -163,6 +192,7 @@ is_fop_latency_started (call_frame_t *frame)
}
#ifdef GF_LINUX_HOST_OS
const char *_IOS_SAMP_DIR = "/var/log/glusterfs/samples";
#define _IOS_DUMP_DIR DATADIR "/lib/glusterd/stats"
#else
#define _IOS_DUMP_DIR DATADIR "/db/glusterd/stats"
@ -429,6 +459,72 @@ ios_inode_ctx_get (inode_t *inode, xlator_t *this, struct ios_stat **iosstat)
}
/*
* So why goto all this trouble? Why not just queue up some samples in
* a big list and malloc away? Well malloc is expensive relative
* to what we are measuring, so cannot have any malloc's (or worse
* callocs) in our measurement code paths. Instead, we are going to
* pre-allocate a circular buffer and collect a maximum number of samples.
* Prior to dumping them all we'll create a new buffer and swap the
* old buffer with the new, and then proceed to dump the statistics
* in our dump thread.
*
*/
ios_sample_buf_t *
ios_create_sample_buf (size_t buf_size)
{
ios_sample_buf_t *ios_sample_buf = NULL;
ios_sample_t *ios_samples = NULL;
ios_sample_buf = GF_CALLOC (1,
sizeof (*ios_sample_buf),
gf_io_stats_mt_ios_sample_buf);
if (!ios_sample_buf)
goto err;
ios_samples = GF_CALLOC (buf_size,
sizeof (*ios_samples),
gf_io_stats_mt_ios_sample);
if (!ios_samples)
goto err;
ios_sample_buf->ios_samples = ios_samples;
ios_sample_buf->size = buf_size;
ios_sample_buf->pos = 0;
ios_sample_buf->observed = 0;
ios_sample_buf->collected = 0;
return ios_sample_buf;
err:
GF_FREE (ios_sample_buf);
return NULL;
}
void
ios_destroy_sample_buf (ios_sample_buf_t *ios_sample_buf)
{
GF_FREE (ios_sample_buf->ios_samples);
GF_FREE (ios_sample_buf);
}
static int
ios_init_sample_buf (struct ios_conf *conf)
{
int32_t ret = -1;
GF_ASSERT (conf);
LOCK (&conf->lock);
conf->ios_sample_buf = ios_create_sample_buf (
conf->ios_sample_buf_size);
if (!conf->ios_sample_buf)
goto out;
ret = 0;
out:
UNLOCK (&conf->lock);
return ret;
}
int
ios_stat_add_to_list (struct ios_stat_head *list_head, uint64_t value,
struct ios_stat *iosstat)
@ -815,6 +911,230 @@ out:
return ret;
}
char *
_resolve_username (xlator_t *this, uid_t uid)
{
struct passwd pwd;
struct passwd *pwd_result = NULL;
size_t pwd_buf_len;
char *pwd_buf = NULL;
char *ret = NULL;
/* Prepare our buffer for the uid->username translation */
#ifdef _SC_GETGR_R_SIZE_MAX
pwd_buf_len = sysconf (_SC_GETGR_R_SIZE_MAX);
#else
pwd_buf_len = -1;
#endif
if (pwd_buf_len == -1) {
pwd_buf_len = DEFAULT_PWD_BUF_SZ; /* per the man page */
}
pwd_buf = alloca (pwd_buf_len);
if (!pwd_buf)
goto err;
getpwuid_r (uid, &pwd, pwd_buf, pwd_buf_len,
&pwd_result);
if (!pwd_result)
goto err;
ret = gf_strdup (pwd.pw_name);
if (ret)
return ret;
else
gf_log (this->name, GF_LOG_ERROR,
"gf_strdup failed, failing username "
"resolution.");
err:
return ret;
}
char *
_resolve_group_name (xlator_t *this, gid_t gid)
{
struct group grp;
struct group *grp_result = NULL;
size_t grp_buf_len;
char *grp_buf = NULL;
char *ret = NULL;
/* Prepare our buffer for the gid->group name translation */
#ifdef _SC_GETGR_R_SIZE_MAX
grp_buf_len = sysconf (_SC_GETGR_R_SIZE_MAX);
#else
grp_buf_len = -1;
#endif
if (grp_buf_len == -1) {
grp_buf_len = DEFAULT_GRP_BUF_SZ; /* per the man page */
}
grp_buf = alloca (grp_buf_len);
if (!grp_buf) {
goto err;
}
getgrgid_r (gid, &grp, grp_buf, grp_buf_len,
&grp_result);
if (!grp_result)
goto err;
ret = gf_strdup (grp.gr_name);
if (ret)
return ret;
else
gf_log (this->name, GF_LOG_ERROR,
"gf_strdup failed, failing username "
"resolution.");
err:
return ret;
}
/*
* This function writes out a latency sample to a given file descriptor
* and beautifies the output in the process.
*/
void
_io_stats_write_latency_sample (xlator_t *this, ios_sample_t *sample,
FILE *logfp)
{
double epoch_time = 0.00;
char *xlator_name = NULL;
char *instance_name = NULL;
char *hostname = NULL;
char *identifier = NULL;
char *port = NULL;
char *port_pos = NULL;
char *group_name = NULL;
char *username = NULL;
struct ios_conf *conf = NULL;
conf = this->private;
epoch_time = (sample->timestamp).tv_sec +
((sample->timestamp).tv_usec / 1000000.0);
if (!sample->identifier || (strlen (sample->identifier) == 0)) {
hostname = "Unknown";
port = "Unknown";
} else {
identifier = strdupa (sample->identifier);
port_pos = strrchr (identifier, ':');
if (!port_pos || strlen(port_pos) < 2)
goto err;
port = strdupa (port_pos + 1);
if (!port)
goto err;
*port_pos = '\0';
hostname = gf_rev_dns_lookup_cached (identifier,
conf->dnscache);
if (!hostname)
hostname = "Unknown";
}
xlator_name = this->name;
if (!xlator_name || strlen (xlator_name) == 0)
xlator_name = "Unknown";
instance_name = this->instance_name;
if (!instance_name || strlen (instance_name) == 0)
instance_name = "N/A";
/* Resolve the UID to a string username */
username = _resolve_username (this, sample->uid);
if (!username) {
username = GF_MALLOC (30, gf_common_mt_char);
sprintf (username, "%d", (int32_t)sample->uid);
}
/* Resolve the GID to a string group name */
group_name = _resolve_group_name (this, sample->gid);
if (!group_name) {
group_name = GF_MALLOC (30, gf_common_mt_char);
sprintf (group_name, "%d", (int32_t)sample->gid);
}
ios_log (this, logfp,
"%0.6lf,%s,%s,%0.4lf,%s,%s,%s,%s,%s,%s",
epoch_time, fop_enum_to_pri_string (sample->fop_type),
fop_enum_to_string (sample->fop_type),
sample->elapsed, xlator_name, instance_name, username,
group_name, hostname, port);
goto out;
err:
gf_log (this->name, GF_LOG_ERROR,
"Error parsing socket identifier");
out:
GF_FREE (group_name);
GF_FREE (username);
}
/*
* Takes our current sample buffer in conf->io_sample_buf, and saves
* a reference to this, init's a new buffer, and then dumps out the
* contents of the saved reference.
*/
int
io_stats_dump_latency_samples_logfp (xlator_t *this, FILE *logfp)
{
uint64_t i = 0;
struct ios_conf *conf = NULL;
ios_sample_buf_t *sample_buf = NULL;
int ret = 1; /* Default to error */
conf = this->private;
/* Save pointer to old buffer; the CS equivalent of
* Indiana Jones: https://www.youtube.com/watch?v=Pr-8AP0To4k,
* though ours will end better I hope!
*/
sample_buf = conf->ios_sample_buf;
if (!sample_buf) {
gf_log (this->name, GF_LOG_WARNING,
"Sampling buffer is null, bailing!");
goto out;
}
/* Empty case, nothing to do, exit. */
if (sample_buf->collected == 0) {
gf_log (this->name, GF_LOG_DEBUG,
"No samples, dump not required.");
ret = 0;
goto out;
}
/* Init a new buffer, so we are free to work on the one we saved a
* reference to above.
*/
if (ios_init_sample_buf (conf) != 0) {
gf_log (this->name, GF_LOG_WARNING,
"Failed to init new sampling buffer, out of memory?");
goto out;
}
/* Wrap-around case, dump from pos to sample_buf->size -1
* and then from 0 to sample_buf->pos (covered off by
* "simple case")
*/
if (sample_buf->collected > sample_buf->pos + 1) {
for (i = sample_buf->pos; i < sample_buf->size; i++) {
_io_stats_write_latency_sample (this,
&(sample_buf->ios_samples[i]), logfp);
}
}
/* Simple case: Dump from 0 to sample_buf->pos */
for (i = 0; i < sample_buf->pos; i++) {
_io_stats_write_latency_sample (this,
&(sample_buf->ios_samples[i]), logfp);
}
ios_destroy_sample_buf (sample_buf);
out:
return ret;
}
int
io_stats_dump_global_to_logfp (xlator_t *this, struct ios_global_stats *stats,
struct timeval *now, int interval, FILE* logfp)
@ -1284,6 +1604,48 @@ io_stats_dump_fd (xlator_t *this, struct ios_fd *iosfd)
return 0;
}
void collect_ios_latency_sample (struct ios_conf *conf,
glusterfs_fop_t fop_type, double elapsed,
call_frame_t *frame)
{
ios_sample_buf_t *ios_sample_buf = NULL;
ios_sample_t *ios_sample = NULL;
struct timeval *timestamp = NULL;
call_stack_t *root = NULL;
ios_sample_buf = conf->ios_sample_buf;
LOCK (&conf->ios_sampling_lock);
if (conf->ios_sample_interval == 0 ||
ios_sample_buf->observed % conf->ios_sample_interval != 0)
goto out;
timestamp = &frame->begin;
root = frame->root;
ios_sample = &(ios_sample_buf->ios_samples[ios_sample_buf->pos]);
ios_sample->elapsed = elapsed;
ios_sample->fop_type = fop_type;
ios_sample->uid = root->uid;
ios_sample->gid = root->gid;
(ios_sample->timestamp).tv_sec = timestamp->tv_sec;
(ios_sample->timestamp).tv_usec = timestamp->tv_usec;
memcpy (&ios_sample->identifier, &root->identifier,
sizeof (root->identifier));
/* We've reached the end of the circular buffer, start from the
* beginning. */
if (ios_sample_buf->pos == (ios_sample_buf->size - 1))
ios_sample_buf->pos = 0;
else
ios_sample_buf->pos++;
ios_sample_buf->collected++;
out:
ios_sample_buf->observed++;
UNLOCK (&conf->ios_sampling_lock);
return;
}
static void
update_ios_latency_stats (struct ios_global_stats *stats, double elapsed,
glusterfs_fop_t op)
@ -1321,6 +1683,7 @@ update_ios_latency (struct ios_conf *conf, call_frame_t *frame,
update_ios_latency_stats (&conf->cumulative, elapsed, op);
update_ios_latency_stats (&conf->incremental, elapsed, op);
collect_ios_latency_sample (conf, op, elapsed, frame);
return 0;
}
@ -1660,7 +2023,7 @@ io_stats_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
if (iosstat) {
BUMP_STATS (iosstat, IOS_STATS_TYPE_READDIRP);
iosstat = NULL;
iosstat = NULL;
}
STACK_UNWIND_STRICT (readdirp, frame, op_ret, op_errno, buf, xdata);
@ -2512,14 +2875,18 @@ _ios_destroy_dump_thread (struct ios_conf *conf) {
void *
_ios_dump_thread (xlator_t *this) {
struct ios_conf *conf = NULL;
FILE *logfp = NULL;
FILE *stats_logfp = NULL;
FILE *samples_logfp = NULL;
struct ios_dump_args args = {0};
int i;
int bytes_written = 0;
char filename[PATH_MAX];
int stats_bytes_written = 0;
int samples_bytes_written = 0;
char stats_filename[PATH_MAX];
char samples_filename[PATH_MAX];
char *xlator_name;
char *instance_name;
gf_boolean_t log_fopen_failure = _gf_true;
gf_boolean_t log_stats_fopen_failure = _gf_true;
gf_boolean_t log_samples_fopen_failure = _gf_true;
int old_cancel_type;
conf = this->private;
@ -2546,17 +2913,34 @@ _ios_dump_thread (xlator_t *this) {
goto out;
}
}
if (mkdir (_IOS_SAMP_DIR, S_IRWXU | S_IRWXO | S_IRWXG) == (-1)) {
if (errno != EEXIST) {
gf_log (this->name, GF_LOG_ERROR,
"could not create stats-sample directory %s",
_IOS_SAMP_DIR);
goto out;
}
}
if (instance_name) {
bytes_written = snprintf (filename, PATH_MAX,
stats_bytes_written = snprintf (stats_filename, PATH_MAX,
"%s/%s_%s_%s.dump", _IOS_DUMP_DIR,
__progname, xlator_name, instance_name);
samples_bytes_written = snprintf (samples_filename, PATH_MAX,
"%s/%s_%s_%s.samp", _IOS_SAMP_DIR,
__progname, xlator_name, instance_name);
} else {
bytes_written = snprintf (filename, PATH_MAX, "%s/%s_%s.dump",
_IOS_DUMP_DIR, __progname, xlator_name);
stats_bytes_written = snprintf (stats_filename, PATH_MAX,
"%s/%s_%s.dump", _IOS_DUMP_DIR, __progname,
xlator_name);
samples_bytes_written = snprintf (samples_filename, PATH_MAX,
"%s/%s_%s.samp", _IOS_SAMP_DIR, __progname,
xlator_name);
}
if (bytes_written >= PATH_MAX) {
if ((stats_bytes_written >= PATH_MAX) ||
(samples_bytes_written >= PATH_MAX)) {
gf_log (this->name, GF_LOG_ERROR,
"Invalid path for IO Stats dump: %s", filename);
"Invalid path for stats dump (%s) and/or latency "
"samples (%s)", stats_filename, samples_filename);
goto out;
}
while (1) {
@ -2572,22 +2956,32 @@ _ios_dump_thread (xlator_t *this) {
* just hold it open and rewind/truncate on each iteration.
* Leaving it alone for now.
*/
logfp = fopen (filename, "w+");
if (!logfp) {
if (log_fopen_failure) {
gf_log (this->name, GF_LOG_ERROR,
"could not open stats-dump file %s",
filename);
log_fopen_failure = _gf_false;
}
continue;
stats_logfp = fopen (stats_filename, "w+");
if (stats_logfp) {
(void) ios_dump_args_init (&args,
IOS_DUMP_TYPE_JSON_FILE,
stats_logfp);
io_stats_dump (this, &args, GF_CLI_INFO_ALL, _gf_false);
fclose (stats_logfp);
log_stats_fopen_failure = _gf_true;
} else if (log_stats_fopen_failure) {
gf_log (this->name, GF_LOG_ERROR,
"could not open stats-dump file %s (%s)",
stats_filename, strerror(errno));
log_stats_fopen_failure = _gf_false;
}
samples_logfp = fopen (samples_filename, "w+");
if (samples_logfp) {
io_stats_dump_latency_samples_logfp (this,
samples_logfp);
fclose (samples_logfp);
log_samples_fopen_failure = _gf_true;
} else if (log_samples_fopen_failure) {
gf_log (this->name, GF_LOG_ERROR,
"could not open samples-dump file %s (%s)",
samples_filename, strerror(errno));
log_samples_fopen_failure = _gf_false;
}
(void) ios_dump_args_init (
&args, IOS_DUMP_TYPE_JSON_FILE,
logfp);
io_stats_dump (this, &args, GF_CLI_INFO_ALL, _gf_false);
fclose (logfp);
log_fopen_failure = _gf_true;
}
out:
gf_log (this->name, GF_LOG_INFO, "IO stats dump thread terminated");
@ -3116,6 +3510,10 @@ reconfigure (xlator_t *this, dict_t *options)
(void *) &_ios_dump_thread, this);
}
GF_OPTION_RECONF ("ios-sample-interval", conf->ios_sample_interval,
options, int32, out);
GF_OPTION_RECONF ("ios-sample-buf-size", conf->ios_sample_buf_size,
options, int32, out);
GF_OPTION_RECONF ("sys-log-level", sys_log_str, options, str, out);
if (sys_log_str) {
sys_log_level = glusterd_check_log_level (sys_log_str);
@ -3229,6 +3627,7 @@ init (xlator_t *this)
* in case of error paths.
*/
LOCK_INIT (&conf->lock);
LOCK_INIT (&conf->ios_sampling_lock);
gettimeofday (&conf->cumulative.started_at, NULL);
gettimeofday (&conf->incremental.started_at, NULL);
@ -3247,6 +3646,22 @@ init (xlator_t *this)
GF_OPTION_INIT ("ios-dump-interval", conf->ios_dump_interval,
int32, out);
GF_OPTION_INIT ("ios-sample-interval", conf->ios_sample_interval,
int32, out);
GF_OPTION_INIT ("ios-sample-buf-size", conf->ios_sample_buf_size,
int32, out);
if (ios_init_sample_buf (conf) != 0) {
gf_log (this->name, GF_LOG_ERROR,
"Out of memory.");
return -1;
}
GF_OPTION_INIT ("ios-dnscache-ttl-sec", conf->ios_dnscache_ttl_sec,
int32, out);
conf->dnscache = gf_dnscache_init (conf->ios_dnscache_ttl_sec);
GF_OPTION_INIT ("sys-log-level", sys_log_str, str, out);
if (sys_log_str) {
sys_log_level = glusterd_check_log_level (sys_log_str);
@ -3497,6 +3912,29 @@ struct volume_options options[] = {
.description = "Interval (in seconds) at which to auto-dump "
"statistics. Zero disables automatic dumping."
},
{ .key = { "ios-sample-interval" },
.type = GF_OPTION_TYPE_INT,
.min = 0,
.max = 65535,
.default_value = "0",
.description = "Interval in which we want to collect FOP latency "
"samples. 2 means collect a sample every 2nd FOP."
},
{ .key = { "ios-sample-buf-size" },
.type = GF_OPTION_TYPE_INT,
.min = 1024,
.max = 1024*1024,
.default_value = "65535",
.description = "The maximum size of our FOP sampling ring buffer."
},
{ .key = { "ios-dnscache-ttl-sec" },
.type = GF_OPTION_TYPE_INT,
.min = 1,
.max = 3600 * 72,
.default_value = "86400",
.description = "The interval after wish a cached DNS entry will be "
"re-validated. Default: 24 hrs"
},
{ .key = { "latency-measurement" },
.type = GF_OPTION_TYPE_BOOL,
.default_value = "off",

View File

@ -849,6 +849,21 @@ struct volopt_map_entry glusterd_volopt_map[] = {
.option = "ios-dump-interval",
.op_version = 1
},
{ .key = "diagnostics.fop-sample-interval",
.voltype = "debug/io-stats",
.option = "ios-sample-interval",
.op_version = 1
},
{ .key = "diagnostics.fop-sample-buf-size",
.voltype = "debug/io-stats",
.option = "ios-sample-buf-size",
.op_version = 1
},
{ .key = "diagnostics.stats-dnscache-ttl-sec",
.voltype = "debug/io-stats",
.option = "ios-dnscache-ttl-sec",
.op_version = 1
},
/* IO-cache xlator options */
{ .key = "performance.cache-max-file-size",

View File

@ -219,6 +219,7 @@ nfs_create_frame (xlator_t *xl, nfs_user_t *nfu)
frame->root->pid = NFS_PID;
frame->root->uid = nfu->uid;
frame->root->gid = nfu->gids[NFS_PRIMGID_IDX];
memcpy (&frame->root->identifier, &nfu->identifier, UNIX_PATH_MAX);
frame->root->lk_owner = nfu->lk_owner;
if (nfu->ngrps != 1) {

View File

@ -639,8 +639,8 @@ nfs_user_root_create (nfs_user_t *newnfu)
int
nfs_user_create (nfs_user_t *newnfu, uid_t uid, gid_t gid, gid_t *auxgids,
int auxcount)
nfs_user_create (nfs_user_t *newnfu, uid_t uid, gid_t gid,
rpc_transport_t *trans, gid_t *auxgids, int auxcount)
{
int x = 1;
int y = 0;
@ -655,6 +655,10 @@ nfs_user_create (nfs_user_t *newnfu, uid_t uid, gid_t gid, gid_t *auxgids,
newnfu->uid = uid;
newnfu->gids[0] = gid;
newnfu->ngrps = 1;
if (trans) {
memcpy (&newnfu->identifier, trans->peerinfo.identifier,
UNIX_PATH_MAX);
}
gf_msg_trace (GF_NFS, 0, "uid: %d, gid %d, gids: %d", uid, gid,
auxcount);
@ -683,7 +687,9 @@ nfs_request_user_init (nfs_user_t *nfu, rpcsvc_request_t *req)
gidarr = rpcsvc_auth_unix_auxgids (req, &gids);
nfs_user_create (nfu, rpcsvc_request_uid (req),
rpcsvc_request_gid (req), gidarr, gids);
rpcsvc_request_gid (req),
rpcsvc_request_transport (req),
gidarr, gids);
return;
}
@ -699,7 +705,8 @@ nfs_request_primary_user_init (nfs_user_t *nfu, rpcsvc_request_t *req,
return;
gidarr = rpcsvc_auth_unix_auxgids (req, &gids);
nfs_user_create (nfu, uid, gid, gidarr, gids);
nfs_user_create (nfu, uid, gid, rpcsvc_request_transport (req),
gidarr, gids);
return;
}

View File

@ -125,14 +125,15 @@ typedef struct nfs_user_info {
gid_t gids[NFS_NGROUPS];
int ngrps;
gf_lkowner_t lk_owner;
char identifier[UNIX_PATH_MAX]; /* ip of user */
} nfs_user_t;
extern int
nfs_user_root_create (nfs_user_t *newnfu);
extern int
nfs_user_create (nfs_user_t *newnfu, uid_t uid, gid_t gid, gid_t *auxgids,
int auxcount);
nfs_user_create (nfs_user_t *newnfu, uid_t uid, gid_t gid,
rpc_transport_t *trans, gid_t *auxgids, int auxcount);
extern void
nfs_request_user_init (nfs_user_t *nfu, rpcsvc_request_t *req);

View File

@ -423,6 +423,7 @@ get_frame_from_request (rpcsvc_request_t *req)
server_conf_t *priv = NULL;
clienttable_t *clienttable = NULL;
unsigned int i = 0;
rpc_transport_t *trans = NULL;
GF_VALIDATE_OR_GOTO ("server", req, out);
@ -500,6 +501,12 @@ get_frame_from_request (rpcsvc_request_t *req)
server_resolve_groups (frame, req);
else
server_decode_groups (frame, req);
trans = req->trans;
if (trans) {
memcpy (&frame->root->identifier, trans->peerinfo.identifier,
sizeof (trans->peerinfo.identifier));
}
frame->local = req;
out: