1
0
mirror of https://github.com/samba-team/samba.git synced 2025-01-11 05:18:09 +03:00

Add rolling statistics that are collected across 10 second intervals.

Add a new command "ctdb stats [num]" that prints the [num] most recent statistics intervals collected.

(This used to be ctdb commit e6e16fcd5a45ebd3739a8160c8fb5f44494edb9e)
This commit is contained in:
Ronnie Sahlberg 2010-09-29 12:13:05 +10:00
parent 41b6e09fb1
commit 9f66a93f12
9 changed files with 213 additions and 56 deletions

View File

@ -63,7 +63,7 @@ CTDB_SERVER_OBJ = server/ctdbd.o server/ctdb_daemon.o server/ctdb_lockwait.o \
server/ctdb_traverse.o server/eventscript.o server/ctdb_takeover.o \
server/ctdb_serverids.o server/ctdb_persistent.o \
server/ctdb_keepalive.o server/ctdb_logging.o server/ctdb_uptime.o \
server/ctdb_vacuum.o server/ctdb_banning.o \
server/ctdb_vacuum.o server/ctdb_banning.o server/ctdb_statistics.o \
$(CTDB_CLIENT_OBJ) $(CTDB_TCP_OBJ) @INFINIBAND_WRAPPER_OBJ@
TEST_BINS=tests/bin/ctdb_bench tests/bin/ctdb_fetch tests/bin/ctdb_fetch_one \

View File

@ -4216,3 +4216,23 @@ int ctdb_ctrl_get_db_priority(struct ctdb_context *ctdb, struct timeval timeout,
return 0;
}
int ctdb_ctrl_getstathistory(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, TALLOC_CTX *mem_ctx, struct ctdb_statistics_wire **stats)
{
int ret;
TDB_DATA outdata;
int32_t res;
ret = ctdb_control(ctdb, destnode, 0,
CTDB_CONTROL_GET_STAT_HISTORY, 0, tdb_null,
mem_ctx, &outdata, &res, &timeout, NULL);
if (ret != 0 || res != 0 || outdata.dsize == 0) {
DEBUG(DEBUG_ERR,(__location__ " ctdb_control for getstathistory failed ret:%d res:%d\n", ret, res));
return -1;
}
*stats = (struct ctdb_statistics_wire *)talloc_memdup(mem_ctx, outdata.dptr, outdata.dsize);
talloc_free(outdata.dptr);
return 0;
}

View File

@ -588,4 +588,7 @@ struct ctdb_db_priority {
int ctdb_ctrl_set_db_priority(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, struct ctdb_db_priority *db_prio);
int ctdb_ctrl_get_db_priority(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t db_id, uint32_t *priority);
int ctdb_ctrl_getstathistory(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, TALLOC_CTX *mem_ctx, struct ctdb_statistics_wire **stats);
#endif /* _CTDB_CLIENT_H */

View File

@ -334,60 +334,6 @@ struct ctdb_daemon_data {
/*
ctdb statistics information
*/
struct ctdb_statistics {
uint32_t num_clients;
uint32_t frozen;
uint32_t recovering;
uint32_t client_packets_sent;
uint32_t client_packets_recv;
uint32_t node_packets_sent;
uint32_t node_packets_recv;
uint32_t keepalive_packets_sent;
uint32_t keepalive_packets_recv;
struct {
uint32_t req_call;
uint32_t reply_call;
uint32_t req_dmaster;
uint32_t reply_dmaster;
uint32_t reply_error;
uint32_t req_message;
uint32_t req_control;
uint32_t reply_control;
} node;
struct {
uint32_t req_call;
uint32_t req_message;
uint32_t req_control;
} client;
struct {
uint32_t call;
uint32_t control;
uint32_t traverse;
} timeouts;
struct {
double ctdbd;
double recd;
} reclock;
uint32_t total_calls;
uint32_t pending_calls;
uint32_t lockwait_calls;
uint32_t pending_lockwait_calls;
uint32_t childwrite_calls;
uint32_t pending_childwrite_calls;
uint32_t memory_used;
uint32_t __last_counter; /* hack for control_statistics_all */
uint32_t max_hop_count;
double max_call_latency;
double max_lockwait_latency;
double max_childwrite_latency;
uint32_t num_recoveries;
struct timeval statistics_start_time;
struct timeval statistics_current_time;
};
#define INVALID_GENERATION 1
/* table that contains the mapping between a hash value and lmaster
@ -477,6 +423,8 @@ struct ctdb_context {
struct ctdb_daemon_data daemon;
struct ctdb_statistics statistics;
struct ctdb_statistics statistics_current;
#define MAX_STAT_HISTORY 100
struct ctdb_statistics statistics_history[MAX_STAT_HISTORY];
struct ctdb_vnn_map *vnn_map;
uint32_t num_clients;
uint32_t recovery_master;
@ -1395,6 +1343,10 @@ int update_ip_assignment_tree(struct ctdb_context *ctdb,
int ctdb_init_tevent_logging(struct ctdb_context *ctdb);
int ctdb_update_stat_counter(struct ctdb_context *ctdb, uint32_t *counter, uint32_t value);
int ctdb_statistics_init(struct ctdb_context *ctdb);
int32_t ctdb_control_get_stat_history(struct ctdb_context *ctdb,
struct ctdb_req_control *c,
TDB_DATA *outdata);
#endif

View File

@ -357,6 +357,7 @@ enum ctdb_controls {CTDB_CONTROL_PROCESS_EXISTS = 0,
CTDB_CONTROL_GET_IFACES = 124,
CTDB_CONTROL_SET_IFACE_LINK_STATE = 125,
CTDB_CONTROL_TCP_ADD_DELAYED_UPDATE = 126,
CTDB_CONTROL_GET_STAT_HISTORY = 127,
};
/*
@ -534,4 +535,68 @@ struct ctdb_all_public_ips {
struct ctdb_public_ip ips[1];
};
/*
ctdb statistics information
*/
struct ctdb_statistics {
uint32_t num_clients;
uint32_t frozen;
uint32_t recovering;
uint32_t client_packets_sent;
uint32_t client_packets_recv;
uint32_t node_packets_sent;
uint32_t node_packets_recv;
uint32_t keepalive_packets_sent;
uint32_t keepalive_packets_recv;
struct {
uint32_t req_call;
uint32_t reply_call;
uint32_t req_dmaster;
uint32_t reply_dmaster;
uint32_t reply_error;
uint32_t req_message;
uint32_t req_control;
uint32_t reply_control;
} node;
struct {
uint32_t req_call;
uint32_t req_message;
uint32_t req_control;
} client;
struct {
uint32_t call;
uint32_t control;
uint32_t traverse;
} timeouts;
struct {
double ctdbd;
double recd;
} reclock;
uint32_t total_calls;
uint32_t pending_calls;
uint32_t lockwait_calls;
uint32_t pending_lockwait_calls;
uint32_t childwrite_calls;
uint32_t pending_childwrite_calls;
uint32_t memory_used;
uint32_t __last_counter; /* hack for control_statistics_all */
uint32_t max_hop_count;
double max_call_latency;
double max_lockwait_latency;
double max_childwrite_latency;
uint32_t num_recoveries;
struct timeval statistics_start_time;
struct timeval statistics_current_time;
};
/*
* wire format for statistics history
*/
struct ctdb_statistics_wire {
uint32_t num;
struct ctdb_statistics stats[1];
};
#endif

View File

@ -600,6 +600,10 @@ static int32_t ctdb_control_dispatch(struct ctdb_context *ctdb,
CHECK_CONTROL_DATA_SIZE(sizeof(struct ctdb_control_iface_info));
return ctdb_control_set_iface_link(ctdb, c, indata);
case CTDB_CONTROL_GET_STAT_HISTORY:
CHECK_CONTROL_DATA_SIZE(0);
return ctdb_control_get_stat_history(ctdb, c, outdata);
default:
DEBUG(DEBUG_CRIT,(__location__ " Unknown CTDB control opcode %u\n", opcode));
return -1;

View File

@ -761,6 +761,9 @@ int ctdb_start_daemon(struct ctdb_context *ctdb, bool do_fork, bool use_syslog)
ctdb_set_child_logging(ctdb);
/* initialize statistics collection */
ctdb_statistics_init(ctdb);
/* force initial recovery for election */
ctdb->recovery_mode = CTDB_RECOVERY_ACTIVE;

View File

@ -0,0 +1,77 @@
/*
ctdb statistics code
Copyright (C) Ronnie Sahlberg 2010
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
#include "includes.h"
#include <string.h>
#include "lib/tevent/tevent.h"
#include "../include/ctdb_private.h"
static void ctdb_statistics_update(struct event_context *ev, struct timed_event *te,
struct timeval t, void *p)
{
struct ctdb_context *ctdb = talloc_get_type(p, struct ctdb_context);
memmove(&ctdb->statistics_history[1], &ctdb->statistics_history[0], (MAX_STAT_HISTORY-1)*sizeof(struct ctdb_statistics));
memcpy(&ctdb->statistics_history[0], &ctdb->statistics_current, sizeof(struct ctdb_statistics));
ctdb->statistics_history[0].statistics_current_time = timeval_current();
bzero(&ctdb->statistics_current, sizeof(struct ctdb_statistics));
ctdb->statistics_current.statistics_start_time = timeval_current();
event_add_timed(ctdb->ev, ctdb, timeval_current_ofs(10, 0), ctdb_statistics_update, ctdb);
}
int ctdb_statistics_init(struct ctdb_context *ctdb)
{
bzero(&ctdb->statistics, sizeof(struct ctdb_statistics));
bzero(&ctdb->statistics_current, sizeof(struct ctdb_statistics));
ctdb->statistics_current.statistics_start_time = timeval_current();
bzero(ctdb->statistics_history, sizeof(ctdb->statistics_history));
event_add_timed(ctdb->ev, ctdb, timeval_current_ofs(10, 0), ctdb_statistics_update, ctdb);
return 0;
}
int32_t ctdb_control_get_stat_history(struct ctdb_context *ctdb,
struct ctdb_req_control *c,
TDB_DATA *outdata)
{
int len;
struct ctdb_statistics_wire *stat;
len = offsetof(struct ctdb_statistics_wire, stats) + MAX_STAT_HISTORY*sizeof(struct ctdb_statistics);
stat = talloc_size(outdata, len);
if (stat == NULL) {
DEBUG(DEBUG_ERR,(__location__ " Failed to allocate statistics history structure\n"));
return -1;
}
stat->num = MAX_STAT_HISTORY;
memcpy(&stat->stats[0], &ctdb->statistics_history[0], sizeof(ctdb->statistics_history));
outdata->dsize = len;
outdata->dptr = (uint8_t *)stat;
return 0;
}

View File

@ -323,6 +323,38 @@ static int control_statistics_reset(struct ctdb_context *ctdb, int argc, const c
}
/*
display remote ctdb rolling statistics
*/
static int control_stats(struct ctdb_context *ctdb, int argc, const char **argv)
{
int ret;
struct ctdb_statistics_wire *stats;
int i, num_records = -1;
if (argc ==1) {
num_records = atoi(argv[0]) - 1;
}
ret = ctdb_ctrl_getstathistory(ctdb, TIMELIMIT(), options.pnn, ctdb, &stats);
if (ret != 0) {
DEBUG(DEBUG_ERR, ("Unable to get rolling statistics from node %u\n", options.pnn));
return ret;
}
for (i=0;i<stats->num;i++) {
if (stats->stats[i].statistics_start_time.tv_sec == 0) {
continue;
}
show_statistics(&stats->stats[i]);
if (i == num_records) {
break;
}
printf("===\n");
}
return 0;
}
/*
display uptime of remote node
*/
@ -4679,6 +4711,7 @@ static const struct {
{ "listvars", control_listvars, true, false, "list tunable variables"},
{ "statistics", control_statistics, false, false, "show statistics" },
{ "statisticsreset", control_statistics_reset, true, false, "reset statistics"},
{ "stats", control_stats, false, false, "show rolling statistics", "[number of history records]" },
{ "ip", control_ip, false, false, "show which public ip's that ctdb manages" },
{ "ipinfo", control_ipinfo, true, false, "show details about a public ip that ctdb manages", "<ip>" },
{ "ifaces", control_ifaces, true, false, "show which interfaces that ctdb manages" },