glusterd: statedump support

Although glusterd currently has statedump support but it doesn't dump its
context information. Implementing glusterd_dump_priv function to export per-node
glusterd information would be useful for debugging bugs. Once implemented, we
could enhance sos-report to fetch this information. This would potentially
reduce our time to root cause and data needed for debugability can be dumped
gradually.

Following is the main items of the dump list targeted in this patch :

    * Supported max/min op-version and current op-version
    * Information about peer list
    * Information about peer list involved while a transaction is going on
      (xaction_peers)
    * option dictionary in glusterd_conf_t
    * mgmt_v3_lock in glusterd_conf_t
    * List of connected clients
    * uuid of glusterd
    * A section of rpc related information like live connections and their
      statistics

There are couple of issues which were found during implementation and testing
phase:
 - xaction_peers of glusterd_conf_t was not initialized in init because of which
   traversing through this list head was crashing when there was no active
   transaction
 - gf_free was not setting the typestr to NULL if the the alloc count becomes 0
   for a mem-type earlier allocated.

Change-Id: Ic9bce2d57682fc1771cd2bc6af0b7316ecbc761f
BUG: 1139682
Signed-off-by: Atin Mukherjee <amukherj@redhat.com>
Reviewed-on: http://review.gluster.org/8665
Tested-by: Gluster Build System <jenkins@build.gluster.com>
Reviewed-by: Kaushal M <kaushal@redhat.com>
This commit is contained in:
Atin Mukherjee 2014-09-04 21:47:50 +05:30 committed by Kaushal M
parent 7e8eefca2c
commit 6c13daed1d
9 changed files with 334 additions and 34 deletions

View File

@ -29,6 +29,7 @@
#include "compat.h"
#include "byte-order.h"
#include "globals.h"
#include "statedump.h"
data_t *
get_new_data ()
@ -2807,39 +2808,67 @@ out:
return ret;
}
void
dict_dump (dict_t *this)
int
dict_dump_to_str (dict_t *dict, char *dump, int dumpsize, char *format)
{
int ret = 0;
int dumplen = 0;
data_pair_t *trav = NULL;
char dump[64*1024]; /* This is debug only, hence
performance should not matter */
int ret = 0;
int dumplen = 0;
data_pair_t *trav = NULL;
if (!this) {
gf_log_callingfn ("dict", GF_LOG_WARNING, "dict NULL");
goto out;
}
dump[0] = '\0'; /* the array is not initialized to '\0' */
/* There is a possibility of issues if data is binary, ignore it
for now as debugging is more important */
for (trav = this->members_list; trav; trav = trav->next) {
ret = snprintf (&dump[dumplen], ((64*1024) - dumplen - 1),
"(%s:%s)", trav->key, trav->value->data);
for (trav = dict->members_list; trav; trav = trav->next) {
ret = snprintf (&dump[dumplen], dumpsize - dumplen,
format, trav->key, trav->value->data);
if ((ret == -1) || !ret)
break;
return ret;
dumplen += ret;
/* snprintf doesn't append a trailing '\0', add it here */
dump[dumplen] = '\0';
}
return 0;
}
void
dict_dump_to_log (dict_t *dict)
{
int ret = -1;
char dump[64*1024] = {0,};
char *format = "(%s:%s)";
if (!dict) {
gf_log_callingfn ("dict", GF_LOG_WARNING, "dict is NULL");
return;
}
if (dumplen)
gf_log_callingfn ("dict", GF_LOG_INFO,
"dict=%p (%s)", this, dump);
ret = dict_dump_to_str (dict, dump, sizeof(dump), format);
if (ret) {
gf_log ("dict", GF_LOG_WARNING, "Failed to log dictionary");
return;
}
gf_log_callingfn ("dict", GF_LOG_INFO, "dict=%p (%s)", dict, dump);
return;
}
void
dict_dump_to_statedump (dict_t *dict, char *dict_name, char *domain)
{
int ret = -1;
char dump[64*1024] = {0,};
char key[4096] = {0,};
char *format = "\n\t%s:%s";
if (!dict) {
gf_log_callingfn (domain, GF_LOG_WARNING, "dict is NULL");
return;
}
ret = dict_dump_to_str (dict, dump, sizeof(dump), format);
if (ret) {
gf_log (domain, GF_LOG_WARNING, "Failed to log dictionary %s",
dict_name);
return;
}
gf_proc_dump_build_key (key, domain, dict_name);
gf_proc_dump_write (key, "%s", dump);
out:
return;
}

View File

@ -234,6 +234,12 @@ GF_MUST_CHECK int dict_get_str (dict_t *this, char *key, char **str);
GF_MUST_CHECK int dict_get_str_boolean (dict_t *this, char *key, int default_val);
GF_MUST_CHECK int dict_serialize_value_with_delim (dict_t *this, char *buf, int32_t *serz_len,
char delimiter);
void
dict_dump_to_statedump (dict_t *dict, char *dict_name, char *domain);
void dict_dump (dict_t *dict);
void
dict_dump_to_log (dict_t *dict);
int
dict_dump_to_str (dict_t *dict, char *dump, int dumpsize, char *format);
#endif

View File

@ -279,6 +279,10 @@ __gf_free (void *free_ptr)
{
xl->mem_acct.rec[type].size -= req_size;
xl->mem_acct.rec[type].num_allocs--;
/* If all the instaces are freed up then ensure typestr is
* set to NULL */
if (!xl->mem_acct.rec[type].num_allocs)
xl->mem_acct.rec[type].typestr = NULL;
}
UNLOCK (&xl->mem_acct.rec[type].lock);
free:

View File

@ -31,8 +31,12 @@ build_tester $(dirname $0)/bug-834465.c
TEST $(dirname $0)/bug-834465 $M0/testfile
sdump2=$(generate_mount_statedump $V0);
nalloc2=`grep -A3 "fuse - usage-type gf_common_mt_fd_lk_ctx_node_t" $sdump2 | grep num_allocs | cut -d '=' -f2`
# With _gf_free now setting typestr to NULL when num_allocs become 0, it is
# expected that there wouldn't be any entry for gf_common_mt_fd_lk_ctx_node_t
# in the statedump file now
nalloc2=`grep -A3 "fuse - usage-type gf_common_mt_fd_lk_ctx_node_t" $sdump2 | wc -l`
TEST [ $nalloc1 -eq $nalloc2 ];
TEST rm -rf $MOUNTDIR/*

View File

@ -661,6 +661,18 @@ glusterd_mgmt_v3_unlock (const char *name, uuid_t uuid, char *type)
/* Removing the mgmt_v3 lock from the global list */
dict_del (priv->mgmt_v3_lock, key);
/* Remove the backtrace key as well */
ret = snprintf (key, sizeof(key), "debug.last-success-bt-%s-%s", name,
type);
if (ret != strlen ("debug.last-success-bt-") + strlen (name) +
strlen (type) + 1) {
gf_log (this->name, GF_LOG_ERROR, "Unable to create backtrace "
"key");
ret = -1;
goto out;
}
dict_del (priv->mgmt_v3_lock, key);
gf_log (this->name, GF_LOG_DEBUG,
"Lock for %s %s successfully released",
type, name);

View File

@ -277,13 +277,227 @@ glusterd_fetchsnap_notify (xlator_t *this)
return ret;
}
int
glusterd_priv (xlator_t *this)
void
glusterd_dump_peer (glusterd_peerinfo_t *peerinfo, char *input_key, int index,
gf_boolean_t xpeers)
{
return 0;
char subkey[50] = {0,};
char key[GF_DUMP_MAX_BUF_LEN] = {0,};
strcpy (key, input_key);
snprintf (subkey, sizeof (subkey), "%s%d", key, index);
gf_proc_dump_build_key (key, subkey, "uuid");
gf_proc_dump_write (key, "%s",
uuid_utoa (peerinfo->uuid));
gf_proc_dump_build_key (key, subkey, "hostname");
gf_proc_dump_write (key, "%d", peerinfo->hostname);
gf_proc_dump_build_key (key, subkey, "port");
gf_proc_dump_write (key, "%d", peerinfo->port);
gf_proc_dump_build_key (key, subkey, "state");
gf_proc_dump_write (key, "%d", peerinfo->state.state);
gf_proc_dump_build_key (key, subkey, "quorum-action");
gf_proc_dump_write (key, "%d", peerinfo->quorum_action);
gf_proc_dump_build_key (key, subkey, "quorum-contrib");
gf_proc_dump_write (key, "%d",
peerinfo->quorum_contrib);
gf_proc_dump_build_key (key, subkey, "detaching");
gf_proc_dump_write (key, "%d", peerinfo->detaching);
gf_proc_dump_build_key (key, subkey, "locked");
gf_proc_dump_write (key, "%d", peerinfo->locked);
}
void
glusterd_dump_peer_rpcstat (glusterd_peerinfo_t *peerinfo, char *input_key,
int index)
{
rpc_clnt_connection_t *conn = NULL;
int ret = -1;
rpc_clnt_t *rpc = NULL;
char rpcsvc_peername[RPCSVC_PEER_STRLEN] = {0,};
char subkey[50] = {0,};
char key[GF_DUMP_MAX_BUF_LEN] = {0,};
strcpy (key, input_key);
/* Dump the rpc connection statistics */
rpc = peerinfo->rpc;
if (rpc) {
conn = &rpc->conn;
snprintf (subkey, sizeof (subkey), "%s%d", key, index);
ret = rpcsvc_transport_peername (conn->trans,
(char *)&rpcsvc_peername,
sizeof (rpcsvc_peername));
if (!ret) {
gf_proc_dump_build_key (key, subkey, "rpc.peername");
gf_proc_dump_write (key, "%s", rpcsvc_peername);
}
gf_proc_dump_build_key (key, subkey, "rpc.connected");
gf_proc_dump_write (key, "%d", conn->connected);
gf_proc_dump_build_key (key, subkey, "rpc.total-bytes-read");
gf_proc_dump_write (key, "%"PRIu64,
conn->trans->total_bytes_read);
gf_proc_dump_build_key (key, subkey, "rpc.total-bytes-written");
gf_proc_dump_write (key, "%"PRIu64,
conn->trans->total_bytes_write);
gf_proc_dump_build_key (key, subkey, "rpc.ping_msgs_sent");
gf_proc_dump_write (key, "%"PRIu64, conn->pingcnt);
gf_proc_dump_build_key (key, subkey, "rpc.msgs_sent");
gf_proc_dump_write (key, "%"PRIu64, conn->msgcnt);
}
}
static void
glusterd_dump_client_details (glusterd_conf_t *conf)
{
rpc_transport_t *xprt = NULL;
char key[GF_DUMP_MAX_BUF_LEN] = {0,};
char subkey[50] = {0,};
int index = 1;
pthread_mutex_lock (&conf->xprt_lock);
{
list_for_each_entry (xprt, &conf->xprt_list, list) {
snprintf (subkey, sizeof (subkey), "glusterd.client%d",
index);
gf_proc_dump_build_key (key, subkey, "identifier");
gf_proc_dump_write (key, "%s",
xprt->peerinfo.identifier);
gf_proc_dump_build_key (key, subkey, "volname");
gf_proc_dump_write (key, "%s",
xprt->peerinfo.volname);
gf_proc_dump_build_key (key, subkey, "max-op-version");
gf_proc_dump_write (key, "%u",
xprt->peerinfo.max_op_version);
gf_proc_dump_build_key (key, subkey, "min-op-version");
gf_proc_dump_write (key, "%u",
xprt->peerinfo.min_op_version);
index++;
}
}
pthread_mutex_unlock (&conf->xprt_lock);
}
/* The following function is just for dumping mgmt_v3_lock dictionary, any other
* dict passed to this API will not work */
static void
glusterd_dict_mgmt_v3_lock_statedump (dict_t *dict)
{
int ret = 0;
int dumplen = 0;
data_pair_t *trav = NULL;
char key[GF_DUMP_MAX_BUF_LEN] = {0,};
char dump[64*1024] = {0,};
if (!dict) {
gf_log_callingfn ("glusterd", GF_LOG_WARNING, "dict NULL");
goto out;
}
for (trav = dict->members_list; trav; trav = trav->next) {
if (strstr (trav->key, "debug.last-success-bt") != NULL) {
ret = snprintf (&dump[dumplen], sizeof(dump) - dumplen,
"\n\t%s:%s", trav->key,
trav->value->data);
} else {
ret = snprintf (&dump[dumplen], sizeof(dump) - dumplen,
"\n\t%s:%s", trav->key,
uuid_utoa (((glusterd_mgmt_v3_lock_obj *)
(trav->value->data))->lock_owner));
}
if ((ret == -1) || !ret)
return;
dumplen += ret;
}
if (dumplen) {
gf_proc_dump_build_key (key, "glusterd", "mgmt_v3_lock");
gf_proc_dump_write (key, "%s", dump);
}
out:
return;
}
int
glusterd_dump_priv (xlator_t *this)
{
int index = 1;
glusterd_conf_t *priv = NULL;
char key[GF_DUMP_MAX_BUF_LEN] = {0,};
char subkey[50] = {0,};
glusterd_peerinfo_t *peerinfo = NULL;
glusterd_volinfo_t *volinfo = NULL;
GF_VALIDATE_OR_GOTO ("glusterd", this, out);
priv = this->private;
if (!priv)
return 0;
gf_proc_dump_build_key (key, "xlator.glusterd", "priv");
gf_proc_dump_add_section (key);
pthread_mutex_lock (&priv->mutex);
{
gf_proc_dump_build_key (key, "glusterd", "my-uuid");
gf_proc_dump_write (key, "%s", uuid_utoa (priv->uuid));
gf_proc_dump_build_key (key, "glusterd", "working-directory");
gf_proc_dump_write (key, "%s", priv->workdir);
gf_proc_dump_build_key (key, "glusterd", "max-op-version");
gf_proc_dump_write (key, "%d", GD_OP_VERSION_MAX);
gf_proc_dump_build_key (key, "glusterd", "min-op-version");
gf_proc_dump_write (key, "%d", GD_OP_VERSION_MIN);
gf_proc_dump_build_key (key, "glusterd", "current-op-version");
gf_proc_dump_write (key, "%d", priv->op_version);
gf_proc_dump_build_key (key, "glusterd", "ping-timeout");
gf_proc_dump_write (key, "%d", priv->ping_timeout);
gf_proc_dump_build_key (key, "glusterd", "shd.online");
gf_proc_dump_write (key, "%d", priv->shd->online);
gf_proc_dump_build_key (key, "glusterd", "nfs.online");
gf_proc_dump_write (key, "%d", priv->nfs->online);
gf_proc_dump_build_key (key, "glusterd", "quotad.online");
gf_proc_dump_write (key, "%d", priv->quotad->online);
GLUSTERD_DUMP_PEERS (&priv->peers, uuid_list, _gf_false);
GLUSTERD_DUMP_PEERS (&priv->xaction_peers, op_peers_list,
_gf_true);
glusterd_dump_client_details (priv);
glusterd_dict_mgmt_v3_lock_statedump(priv->mgmt_v3_lock);
dict_dump_to_statedump (priv->opts, "options", "glusterd");
}
pthread_mutex_unlock (&priv->mutex);
out:
return 0;
}
int32_t
mem_acct_init (xlator_t *this)
@ -1446,6 +1660,7 @@ init (xlator_t *this)
GF_VALIDATE_OR_GOTO(this->name, conf->quotad, out);
INIT_LIST_HEAD (&conf->peers);
INIT_LIST_HEAD (&conf->xaction_peers);
INIT_LIST_HEAD (&conf->volumes);
INIT_LIST_HEAD (&conf->snapshots);
INIT_LIST_HEAD (&conf->missed_snaps_list);
@ -1630,7 +1845,7 @@ struct xlator_fops fops;
struct xlator_cbks cbks;
struct xlator_dumpops dumpops = {
.priv = glusterd_priv,
.priv = glusterd_dump_priv,
};

View File

@ -624,6 +624,28 @@ typedef ssize_t (*gd_serialize_t) (struct iovec outmsg, void *args);
*snap_volname_ptr = '\0'; \
} while (0)
#define GLUSTERD_DUMP_PEERS(head, member, xpeers) do { \
glusterd_peerinfo_t *_peerinfo = NULL; \
char subkey[50] = {0,}; \
int index = 1; \
char key[GF_DUMP_MAX_BUF_LEN] = {0,}; \
\
if (!xpeers) \
snprintf (key, sizeof (key), "glusterd.peer"); \
else \
snprintf (key, sizeof (key), \
"glusterd.xaction_peer"); \
\
list_for_each_entry (_peerinfo, head, member) { \
glusterd_dump_peer (_peerinfo, key, index, xpeers); \
if (!xpeers) \
glusterd_dump_peer_rpcstat (_peerinfo, key, \
index); \
index++; \
} \
\
} while (0)
int glusterd_uuid_init();
int glusterd_uuid_generate_save ();
@ -1057,4 +1079,12 @@ glusterd_add_brick_status_to_dict (dict_t *dict, glusterd_volinfo_t *volinfo,
int32_t
glusterd_handle_snap_limit (dict_t *dict, dict_t *rsp_dict);
void
glusterd_dump_peer (glusterd_peerinfo_t *peerinfo, char *key, int index,
gf_boolean_t xpeers);
void
glusterd_dump_peer_rpcstat (glusterd_peerinfo_t *peerinfo, char *key,
int index);
#endif

View File

@ -2832,7 +2832,7 @@ client3_3_readv_cbk (struct rpc_req *req, struct iovec *iov, int count,
rsp.op_errno, out);
#ifdef GF_TESTING_IO_XDATA
dict_dump (xdata);
dict_dump_to_log (xdata);
#endif
out:

View File

@ -3787,7 +3787,7 @@ server3_3_writev (rpcsvc_request_t *req)
op_errno, out);
#ifdef GF_TESTING_IO_XDATA
dict_dump (state->xdata);
dict_dump_to_log (state->xdata);
#endif
ret = 0;