glusterd: statedump support
Although glusterd currently has statedump support but it doesn't dump its context information. Implementing glusterd_dump_priv function to export per-node glusterd information would be useful for debugging bugs. Once implemented, we could enhance sos-report to fetch this information. This would potentially reduce our time to root cause and data needed for debugability can be dumped gradually. Following is the main items of the dump list targeted in this patch : * Supported max/min op-version and current op-version * Information about peer list * Information about peer list involved while a transaction is going on (xaction_peers) * option dictionary in glusterd_conf_t * mgmt_v3_lock in glusterd_conf_t * List of connected clients * uuid of glusterd * A section of rpc related information like live connections and their statistics There are couple of issues which were found during implementation and testing phase: - xaction_peers of glusterd_conf_t was not initialized in init because of which traversing through this list head was crashing when there was no active transaction - gf_free was not setting the typestr to NULL if the the alloc count becomes 0 for a mem-type earlier allocated. Change-Id: Ic9bce2d57682fc1771cd2bc6af0b7316ecbc761f BUG: 1139682 Signed-off-by: Atin Mukherjee <amukherj@redhat.com> Reviewed-on: http://review.gluster.org/8665 Tested-by: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Kaushal M <kaushal@redhat.com>
This commit is contained in:
parent
7e8eefca2c
commit
6c13daed1d
@ -29,6 +29,7 @@
|
||||
#include "compat.h"
|
||||
#include "byte-order.h"
|
||||
#include "globals.h"
|
||||
#include "statedump.h"
|
||||
|
||||
data_t *
|
||||
get_new_data ()
|
||||
@ -2807,39 +2808,67 @@ out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
void
|
||||
dict_dump (dict_t *this)
|
||||
int
|
||||
dict_dump_to_str (dict_t *dict, char *dump, int dumpsize, char *format)
|
||||
{
|
||||
int ret = 0;
|
||||
int dumplen = 0;
|
||||
data_pair_t *trav = NULL;
|
||||
char dump[64*1024]; /* This is debug only, hence
|
||||
performance should not matter */
|
||||
int ret = 0;
|
||||
int dumplen = 0;
|
||||
data_pair_t *trav = NULL;
|
||||
|
||||
if (!this) {
|
||||
gf_log_callingfn ("dict", GF_LOG_WARNING, "dict NULL");
|
||||
goto out;
|
||||
}
|
||||
|
||||
dump[0] = '\0'; /* the array is not initialized to '\0' */
|
||||
|
||||
/* There is a possibility of issues if data is binary, ignore it
|
||||
for now as debugging is more important */
|
||||
for (trav = this->members_list; trav; trav = trav->next) {
|
||||
ret = snprintf (&dump[dumplen], ((64*1024) - dumplen - 1),
|
||||
"(%s:%s)", trav->key, trav->value->data);
|
||||
for (trav = dict->members_list; trav; trav = trav->next) {
|
||||
ret = snprintf (&dump[dumplen], dumpsize - dumplen,
|
||||
format, trav->key, trav->value->data);
|
||||
if ((ret == -1) || !ret)
|
||||
break;
|
||||
return ret;
|
||||
|
||||
dumplen += ret;
|
||||
/* snprintf doesn't append a trailing '\0', add it here */
|
||||
dump[dumplen] = '\0';
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
void
|
||||
dict_dump_to_log (dict_t *dict)
|
||||
{
|
||||
int ret = -1;
|
||||
char dump[64*1024] = {0,};
|
||||
char *format = "(%s:%s)";
|
||||
|
||||
if (!dict) {
|
||||
gf_log_callingfn ("dict", GF_LOG_WARNING, "dict is NULL");
|
||||
return;
|
||||
}
|
||||
|
||||
if (dumplen)
|
||||
gf_log_callingfn ("dict", GF_LOG_INFO,
|
||||
"dict=%p (%s)", this, dump);
|
||||
ret = dict_dump_to_str (dict, dump, sizeof(dump), format);
|
||||
if (ret) {
|
||||
gf_log ("dict", GF_LOG_WARNING, "Failed to log dictionary");
|
||||
return;
|
||||
}
|
||||
gf_log_callingfn ("dict", GF_LOG_INFO, "dict=%p (%s)", dict, dump);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
void
|
||||
dict_dump_to_statedump (dict_t *dict, char *dict_name, char *domain)
|
||||
{
|
||||
int ret = -1;
|
||||
char dump[64*1024] = {0,};
|
||||
char key[4096] = {0,};
|
||||
char *format = "\n\t%s:%s";
|
||||
|
||||
if (!dict) {
|
||||
gf_log_callingfn (domain, GF_LOG_WARNING, "dict is NULL");
|
||||
return;
|
||||
}
|
||||
|
||||
ret = dict_dump_to_str (dict, dump, sizeof(dump), format);
|
||||
if (ret) {
|
||||
gf_log (domain, GF_LOG_WARNING, "Failed to log dictionary %s",
|
||||
dict_name);
|
||||
return;
|
||||
}
|
||||
gf_proc_dump_build_key (key, domain, dict_name);
|
||||
gf_proc_dump_write (key, "%s", dump);
|
||||
|
||||
out:
|
||||
return;
|
||||
}
|
||||
|
@ -234,6 +234,12 @@ GF_MUST_CHECK int dict_get_str (dict_t *this, char *key, char **str);
|
||||
GF_MUST_CHECK int dict_get_str_boolean (dict_t *this, char *key, int default_val);
|
||||
GF_MUST_CHECK int dict_serialize_value_with_delim (dict_t *this, char *buf, int32_t *serz_len,
|
||||
char delimiter);
|
||||
void
|
||||
dict_dump_to_statedump (dict_t *dict, char *dict_name, char *domain);
|
||||
|
||||
void dict_dump (dict_t *dict);
|
||||
void
|
||||
dict_dump_to_log (dict_t *dict);
|
||||
|
||||
int
|
||||
dict_dump_to_str (dict_t *dict, char *dump, int dumpsize, char *format);
|
||||
#endif
|
||||
|
@ -279,6 +279,10 @@ __gf_free (void *free_ptr)
|
||||
{
|
||||
xl->mem_acct.rec[type].size -= req_size;
|
||||
xl->mem_acct.rec[type].num_allocs--;
|
||||
/* If all the instaces are freed up then ensure typestr is
|
||||
* set to NULL */
|
||||
if (!xl->mem_acct.rec[type].num_allocs)
|
||||
xl->mem_acct.rec[type].typestr = NULL;
|
||||
}
|
||||
UNLOCK (&xl->mem_acct.rec[type].lock);
|
||||
free:
|
||||
|
@ -31,8 +31,12 @@ build_tester $(dirname $0)/bug-834465.c
|
||||
TEST $(dirname $0)/bug-834465 $M0/testfile
|
||||
|
||||
sdump2=$(generate_mount_statedump $V0);
|
||||
nalloc2=`grep -A3 "fuse - usage-type gf_common_mt_fd_lk_ctx_node_t" $sdump2 | grep num_allocs | cut -d '=' -f2`
|
||||
|
||||
# With _gf_free now setting typestr to NULL when num_allocs become 0, it is
|
||||
# expected that there wouldn't be any entry for gf_common_mt_fd_lk_ctx_node_t
|
||||
# in the statedump file now
|
||||
|
||||
nalloc2=`grep -A3 "fuse - usage-type gf_common_mt_fd_lk_ctx_node_t" $sdump2 | wc -l`
|
||||
TEST [ $nalloc1 -eq $nalloc2 ];
|
||||
|
||||
TEST rm -rf $MOUNTDIR/*
|
||||
|
@ -661,6 +661,18 @@ glusterd_mgmt_v3_unlock (const char *name, uuid_t uuid, char *type)
|
||||
/* Removing the mgmt_v3 lock from the global list */
|
||||
dict_del (priv->mgmt_v3_lock, key);
|
||||
|
||||
/* Remove the backtrace key as well */
|
||||
ret = snprintf (key, sizeof(key), "debug.last-success-bt-%s-%s", name,
|
||||
type);
|
||||
if (ret != strlen ("debug.last-success-bt-") + strlen (name) +
|
||||
strlen (type) + 1) {
|
||||
gf_log (this->name, GF_LOG_ERROR, "Unable to create backtrace "
|
||||
"key");
|
||||
ret = -1;
|
||||
goto out;
|
||||
}
|
||||
dict_del (priv->mgmt_v3_lock, key);
|
||||
|
||||
gf_log (this->name, GF_LOG_DEBUG,
|
||||
"Lock for %s %s successfully released",
|
||||
type, name);
|
||||
|
@ -277,13 +277,227 @@ glusterd_fetchsnap_notify (xlator_t *this)
|
||||
return ret;
|
||||
}
|
||||
|
||||
int
|
||||
glusterd_priv (xlator_t *this)
|
||||
void
|
||||
glusterd_dump_peer (glusterd_peerinfo_t *peerinfo, char *input_key, int index,
|
||||
gf_boolean_t xpeers)
|
||||
{
|
||||
return 0;
|
||||
char subkey[50] = {0,};
|
||||
char key[GF_DUMP_MAX_BUF_LEN] = {0,};
|
||||
|
||||
strcpy (key, input_key);
|
||||
|
||||
snprintf (subkey, sizeof (subkey), "%s%d", key, index);
|
||||
|
||||
gf_proc_dump_build_key (key, subkey, "uuid");
|
||||
gf_proc_dump_write (key, "%s",
|
||||
uuid_utoa (peerinfo->uuid));
|
||||
|
||||
gf_proc_dump_build_key (key, subkey, "hostname");
|
||||
gf_proc_dump_write (key, "%d", peerinfo->hostname);
|
||||
|
||||
gf_proc_dump_build_key (key, subkey, "port");
|
||||
gf_proc_dump_write (key, "%d", peerinfo->port);
|
||||
|
||||
gf_proc_dump_build_key (key, subkey, "state");
|
||||
gf_proc_dump_write (key, "%d", peerinfo->state.state);
|
||||
|
||||
gf_proc_dump_build_key (key, subkey, "quorum-action");
|
||||
gf_proc_dump_write (key, "%d", peerinfo->quorum_action);
|
||||
|
||||
gf_proc_dump_build_key (key, subkey, "quorum-contrib");
|
||||
gf_proc_dump_write (key, "%d",
|
||||
peerinfo->quorum_contrib);
|
||||
|
||||
gf_proc_dump_build_key (key, subkey, "detaching");
|
||||
gf_proc_dump_write (key, "%d", peerinfo->detaching);
|
||||
|
||||
gf_proc_dump_build_key (key, subkey, "locked");
|
||||
gf_proc_dump_write (key, "%d", peerinfo->locked);
|
||||
|
||||
}
|
||||
|
||||
void
|
||||
glusterd_dump_peer_rpcstat (glusterd_peerinfo_t *peerinfo, char *input_key,
|
||||
int index)
|
||||
{
|
||||
rpc_clnt_connection_t *conn = NULL;
|
||||
int ret = -1;
|
||||
rpc_clnt_t *rpc = NULL;
|
||||
char rpcsvc_peername[RPCSVC_PEER_STRLEN] = {0,};
|
||||
char subkey[50] = {0,};
|
||||
char key[GF_DUMP_MAX_BUF_LEN] = {0,};
|
||||
|
||||
strcpy (key, input_key);
|
||||
|
||||
/* Dump the rpc connection statistics */
|
||||
rpc = peerinfo->rpc;
|
||||
if (rpc) {
|
||||
conn = &rpc->conn;
|
||||
snprintf (subkey, sizeof (subkey), "%s%d", key, index);
|
||||
ret = rpcsvc_transport_peername (conn->trans,
|
||||
(char *)&rpcsvc_peername,
|
||||
sizeof (rpcsvc_peername));
|
||||
if (!ret) {
|
||||
gf_proc_dump_build_key (key, subkey, "rpc.peername");
|
||||
gf_proc_dump_write (key, "%s", rpcsvc_peername);
|
||||
}
|
||||
gf_proc_dump_build_key (key, subkey, "rpc.connected");
|
||||
gf_proc_dump_write (key, "%d", conn->connected);
|
||||
|
||||
gf_proc_dump_build_key (key, subkey, "rpc.total-bytes-read");
|
||||
gf_proc_dump_write (key, "%"PRIu64,
|
||||
conn->trans->total_bytes_read);
|
||||
|
||||
gf_proc_dump_build_key (key, subkey, "rpc.total-bytes-written");
|
||||
gf_proc_dump_write (key, "%"PRIu64,
|
||||
conn->trans->total_bytes_write);
|
||||
|
||||
gf_proc_dump_build_key (key, subkey, "rpc.ping_msgs_sent");
|
||||
gf_proc_dump_write (key, "%"PRIu64, conn->pingcnt);
|
||||
|
||||
gf_proc_dump_build_key (key, subkey, "rpc.msgs_sent");
|
||||
gf_proc_dump_write (key, "%"PRIu64, conn->msgcnt);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
static void
|
||||
glusterd_dump_client_details (glusterd_conf_t *conf)
|
||||
{
|
||||
rpc_transport_t *xprt = NULL;
|
||||
char key[GF_DUMP_MAX_BUF_LEN] = {0,};
|
||||
char subkey[50] = {0,};
|
||||
int index = 1;
|
||||
|
||||
pthread_mutex_lock (&conf->xprt_lock);
|
||||
{
|
||||
list_for_each_entry (xprt, &conf->xprt_list, list) {
|
||||
snprintf (subkey, sizeof (subkey), "glusterd.client%d",
|
||||
index);
|
||||
|
||||
gf_proc_dump_build_key (key, subkey, "identifier");
|
||||
gf_proc_dump_write (key, "%s",
|
||||
xprt->peerinfo.identifier);
|
||||
|
||||
gf_proc_dump_build_key (key, subkey, "volname");
|
||||
gf_proc_dump_write (key, "%s",
|
||||
xprt->peerinfo.volname);
|
||||
|
||||
gf_proc_dump_build_key (key, subkey, "max-op-version");
|
||||
gf_proc_dump_write (key, "%u",
|
||||
xprt->peerinfo.max_op_version);
|
||||
|
||||
gf_proc_dump_build_key (key, subkey, "min-op-version");
|
||||
gf_proc_dump_write (key, "%u",
|
||||
xprt->peerinfo.min_op_version);
|
||||
index++;
|
||||
}
|
||||
}
|
||||
pthread_mutex_unlock (&conf->xprt_lock);
|
||||
}
|
||||
|
||||
|
||||
/* The following function is just for dumping mgmt_v3_lock dictionary, any other
|
||||
* dict passed to this API will not work */
|
||||
|
||||
static void
|
||||
glusterd_dict_mgmt_v3_lock_statedump (dict_t *dict)
|
||||
{
|
||||
int ret = 0;
|
||||
int dumplen = 0;
|
||||
data_pair_t *trav = NULL;
|
||||
char key[GF_DUMP_MAX_BUF_LEN] = {0,};
|
||||
char dump[64*1024] = {0,};
|
||||
|
||||
if (!dict) {
|
||||
gf_log_callingfn ("glusterd", GF_LOG_WARNING, "dict NULL");
|
||||
goto out;
|
||||
}
|
||||
for (trav = dict->members_list; trav; trav = trav->next) {
|
||||
if (strstr (trav->key, "debug.last-success-bt") != NULL) {
|
||||
ret = snprintf (&dump[dumplen], sizeof(dump) - dumplen,
|
||||
"\n\t%s:%s", trav->key,
|
||||
trav->value->data);
|
||||
} else {
|
||||
ret = snprintf (&dump[dumplen], sizeof(dump) - dumplen,
|
||||
"\n\t%s:%s", trav->key,
|
||||
uuid_utoa (((glusterd_mgmt_v3_lock_obj *)
|
||||
(trav->value->data))->lock_owner));
|
||||
}
|
||||
if ((ret == -1) || !ret)
|
||||
return;
|
||||
dumplen += ret;
|
||||
}
|
||||
|
||||
if (dumplen) {
|
||||
gf_proc_dump_build_key (key, "glusterd", "mgmt_v3_lock");
|
||||
gf_proc_dump_write (key, "%s", dump);
|
||||
}
|
||||
|
||||
out:
|
||||
return;
|
||||
}
|
||||
|
||||
int
|
||||
glusterd_dump_priv (xlator_t *this)
|
||||
{
|
||||
int index = 1;
|
||||
glusterd_conf_t *priv = NULL;
|
||||
char key[GF_DUMP_MAX_BUF_LEN] = {0,};
|
||||
char subkey[50] = {0,};
|
||||
glusterd_peerinfo_t *peerinfo = NULL;
|
||||
glusterd_volinfo_t *volinfo = NULL;
|
||||
|
||||
GF_VALIDATE_OR_GOTO ("glusterd", this, out);
|
||||
|
||||
priv = this->private;
|
||||
if (!priv)
|
||||
return 0;
|
||||
|
||||
gf_proc_dump_build_key (key, "xlator.glusterd", "priv");
|
||||
gf_proc_dump_add_section (key);
|
||||
|
||||
pthread_mutex_lock (&priv->mutex);
|
||||
{
|
||||
gf_proc_dump_build_key (key, "glusterd", "my-uuid");
|
||||
gf_proc_dump_write (key, "%s", uuid_utoa (priv->uuid));
|
||||
|
||||
gf_proc_dump_build_key (key, "glusterd", "working-directory");
|
||||
gf_proc_dump_write (key, "%s", priv->workdir);
|
||||
|
||||
gf_proc_dump_build_key (key, "glusterd", "max-op-version");
|
||||
gf_proc_dump_write (key, "%d", GD_OP_VERSION_MAX);
|
||||
|
||||
gf_proc_dump_build_key (key, "glusterd", "min-op-version");
|
||||
gf_proc_dump_write (key, "%d", GD_OP_VERSION_MIN);
|
||||
|
||||
gf_proc_dump_build_key (key, "glusterd", "current-op-version");
|
||||
gf_proc_dump_write (key, "%d", priv->op_version);
|
||||
|
||||
gf_proc_dump_build_key (key, "glusterd", "ping-timeout");
|
||||
gf_proc_dump_write (key, "%d", priv->ping_timeout);
|
||||
|
||||
gf_proc_dump_build_key (key, "glusterd", "shd.online");
|
||||
gf_proc_dump_write (key, "%d", priv->shd->online);
|
||||
|
||||
gf_proc_dump_build_key (key, "glusterd", "nfs.online");
|
||||
gf_proc_dump_write (key, "%d", priv->nfs->online);
|
||||
|
||||
gf_proc_dump_build_key (key, "glusterd", "quotad.online");
|
||||
gf_proc_dump_write (key, "%d", priv->quotad->online);
|
||||
|
||||
GLUSTERD_DUMP_PEERS (&priv->peers, uuid_list, _gf_false);
|
||||
GLUSTERD_DUMP_PEERS (&priv->xaction_peers, op_peers_list,
|
||||
_gf_true);
|
||||
glusterd_dump_client_details (priv);
|
||||
glusterd_dict_mgmt_v3_lock_statedump(priv->mgmt_v3_lock);
|
||||
dict_dump_to_statedump (priv->opts, "options", "glusterd");
|
||||
}
|
||||
pthread_mutex_unlock (&priv->mutex);
|
||||
|
||||
out:
|
||||
return 0;
|
||||
}
|
||||
|
||||
int32_t
|
||||
mem_acct_init (xlator_t *this)
|
||||
@ -1446,6 +1660,7 @@ init (xlator_t *this)
|
||||
GF_VALIDATE_OR_GOTO(this->name, conf->quotad, out);
|
||||
|
||||
INIT_LIST_HEAD (&conf->peers);
|
||||
INIT_LIST_HEAD (&conf->xaction_peers);
|
||||
INIT_LIST_HEAD (&conf->volumes);
|
||||
INIT_LIST_HEAD (&conf->snapshots);
|
||||
INIT_LIST_HEAD (&conf->missed_snaps_list);
|
||||
@ -1630,7 +1845,7 @@ struct xlator_fops fops;
|
||||
struct xlator_cbks cbks;
|
||||
|
||||
struct xlator_dumpops dumpops = {
|
||||
.priv = glusterd_priv,
|
||||
.priv = glusterd_dump_priv,
|
||||
};
|
||||
|
||||
|
||||
|
@ -624,6 +624,28 @@ typedef ssize_t (*gd_serialize_t) (struct iovec outmsg, void *args);
|
||||
*snap_volname_ptr = '\0'; \
|
||||
} while (0)
|
||||
|
||||
#define GLUSTERD_DUMP_PEERS(head, member, xpeers) do { \
|
||||
glusterd_peerinfo_t *_peerinfo = NULL; \
|
||||
char subkey[50] = {0,}; \
|
||||
int index = 1; \
|
||||
char key[GF_DUMP_MAX_BUF_LEN] = {0,}; \
|
||||
\
|
||||
if (!xpeers) \
|
||||
snprintf (key, sizeof (key), "glusterd.peer"); \
|
||||
else \
|
||||
snprintf (key, sizeof (key), \
|
||||
"glusterd.xaction_peer"); \
|
||||
\
|
||||
list_for_each_entry (_peerinfo, head, member) { \
|
||||
glusterd_dump_peer (_peerinfo, key, index, xpeers); \
|
||||
if (!xpeers) \
|
||||
glusterd_dump_peer_rpcstat (_peerinfo, key, \
|
||||
index); \
|
||||
index++; \
|
||||
} \
|
||||
\
|
||||
} while (0)
|
||||
|
||||
int glusterd_uuid_init();
|
||||
|
||||
int glusterd_uuid_generate_save ();
|
||||
@ -1057,4 +1079,12 @@ glusterd_add_brick_status_to_dict (dict_t *dict, glusterd_volinfo_t *volinfo,
|
||||
int32_t
|
||||
glusterd_handle_snap_limit (dict_t *dict, dict_t *rsp_dict);
|
||||
|
||||
void
|
||||
glusterd_dump_peer (glusterd_peerinfo_t *peerinfo, char *key, int index,
|
||||
gf_boolean_t xpeers);
|
||||
|
||||
void
|
||||
glusterd_dump_peer_rpcstat (glusterd_peerinfo_t *peerinfo, char *key,
|
||||
int index);
|
||||
|
||||
#endif
|
||||
|
@ -2832,7 +2832,7 @@ client3_3_readv_cbk (struct rpc_req *req, struct iovec *iov, int count,
|
||||
rsp.op_errno, out);
|
||||
|
||||
#ifdef GF_TESTING_IO_XDATA
|
||||
dict_dump (xdata);
|
||||
dict_dump_to_log (xdata);
|
||||
#endif
|
||||
|
||||
out:
|
||||
|
@ -3787,7 +3787,7 @@ server3_3_writev (rpcsvc_request_t *req)
|
||||
op_errno, out);
|
||||
|
||||
#ifdef GF_TESTING_IO_XDATA
|
||||
dict_dump (state->xdata);
|
||||
dict_dump_to_log (state->xdata);
|
||||
#endif
|
||||
|
||||
ret = 0;
|
||||
|
Loading…
x
Reference in New Issue
Block a user