1
0
mirror of https://github.com/samba-team/samba.git synced 2024-12-22 13:34:15 +03:00

add improvements to tracking memory usage in ctdbd adn the recovery daemon

and a ctdb command to pull the talloc memory map from a recovery daemon
ctdb rddumpmemory

(This used to be ctdb commit d23950be7406cf288f48b660c0f57a9b8d7bdd05)
This commit is contained in:
Ronnie Sahlberg 2008-04-01 15:34:54 +11:00
parent 78081de82a
commit 27a7f854f5
8 changed files with 127 additions and 2 deletions

View File

@ -277,7 +277,7 @@ int ctdb_queue_send(struct ctdb_queue *queue, uint8_t *data, uint32_t length)
DLIST_ADD_END(queue->out_queue, pkt, struct ctdb_queue_pkt *);
if (LogLevel > DEBUG_NOTICE) {
if (queue->ctdb->tunable.verbose_memory_names != 0) {
struct ctdb_req_header *hdr = (struct ctdb_req_header *)pkt->data;
switch (hdr->operation) {
case CTDB_REQ_CONTROL: {

View File

@ -90,6 +90,12 @@ struct ctdb_call_info {
*/
#define CTDB_SRVID_VACUUM_FETCH 0xF700000000000000LL
/*
a message to tell the recovery daemon to write a talloc memdump
to the log
*/
#define CTDB_SRVID_MEM_DUMP 0xF800000000000000LL
/* used on the domain socket, send a pdu to the local daemon */
#define CTDB_CURRENT_NODE 0xF0000001

View File

@ -38,6 +38,14 @@
#define CTDB_FETCH_FUNC 0xFF000002
/*
recovery daemon memdump reply address
*/
struct rd_memdump_reply {
uint32_t pnn;
uint64_t srvid;
};
/*
a tcp connection description
*/
@ -93,6 +101,7 @@ struct ctdb_tunable {
uint32_t disable_when_unhealthy;
uint32_t reclock_ping_period;
uint32_t no_ip_failback;
uint32_t verbose_memory_names;
};
/*
@ -1275,5 +1284,6 @@ void ctdb_load_nodes_file(struct ctdb_context *ctdb);
int ctdb_control_reload_nodes_file(struct ctdb_context *ctdb, uint32_t opcode);
int32_t ctdb_dump_memory(struct ctdb_context *ctdb, TDB_DATA *outdata);
#endif

View File

@ -39,7 +39,7 @@ struct ctdb_control_state {
/*
dump talloc memory hierarchy, returning it as a blob to the client
*/
static int32_t ctdb_dump_memory(struct ctdb_context *ctdb, TDB_DATA *outdata)
int32_t ctdb_dump_memory(struct ctdb_context *ctdb, TDB_DATA *outdata)
{
/* dump to a file, then send the file as a blob */
FILE *f;

View File

@ -27,6 +27,7 @@
#include "system/wait.h"
#include "../include/ctdb.h"
#include "../include/ctdb_private.h"
#include <sys/socket.h>
static void daemon_incoming_packet(void *, struct ctdb_req_header *);
@ -540,6 +541,19 @@ static void ctdb_accept_client(struct event_context *ev, struct fd_event *fde,
set_close_on_exec(fd);
client = talloc_zero(ctdb, struct ctdb_client);
if (ctdb->tunable.verbose_memory_names != 0) {
struct ucred cr;
socklen_t crl = sizeof(struct ucred);
if (getsockopt(fd, SOL_SOCKET, SO_PEERCRED, &cr, &crl) == 0) {
#if 0
/* This causes client to later become NULL ? */
talloc_set_name(client, "struct ctdb_client: pid:%u", (unsigned)cr.pid);
#else
DEBUG(DEBUG_ERR, ("Client with pid:%u connected to struct ctdb_client %p\n", (unsigned)cr.pid, client));
#endif
}
}
client->ctdb = ctdb;
client->fd = fd;
client->client_id = ctdb_reqid_new(ctdb, client);

View File

@ -1599,6 +1599,47 @@ static void election_send_request(struct event_context *ev, struct timed_event *
rec->send_election_te = NULL;
}
/*
handler for memory dumps
*/
static void mem_dump_handler(struct ctdb_context *ctdb, uint64_t srvid,
TDB_DATA data, void *private_data)
{
TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
TDB_DATA *dump;
int ret;
struct rd_memdump_reply *rd;
if (data.dsize != sizeof(struct rd_memdump_reply)) {
DEBUG(DEBUG_ERR, (__location__ " Wrong size of return address.\n"));
return;
}
rd = (struct rd_memdump_reply *)data.dptr;
dump = talloc_zero(tmp_ctx, TDB_DATA);
if (dump == NULL) {
DEBUG(DEBUG_ERR, (__location__ " Failed to allocate memory for memdump\n"));
talloc_free(tmp_ctx);
return;
}
ret = ctdb_dump_memory(ctdb, dump);
if (ret != 0) {
DEBUG(DEBUG_ERR, (__location__ " ctdb_dump_memory() failed\n"));
talloc_free(tmp_ctx);
return;
}
DEBUG(DEBUG_ERR, ("recovery master memory dump\n"));
ret = ctdb_send_message(ctdb, rd->pnn, rd->srvid, *dump);
if (ret != 0) {
DEBUG(DEBUG_ERR,("Failed to send rd memdump reply message\n"));
return;
}
talloc_free(tmp_ctx);
}
/*
handler for recovery master elections
*/
@ -2122,6 +2163,9 @@ static void monitor_cluster(struct ctdb_context *ctdb)
rec->rec_file_fd = -1;
ctdb_recoverd_get_pnn_lock(rec);
/* register a message port for sending memory dumps */
ctdb_set_message_handler(ctdb, CTDB_SRVID_MEM_DUMP, mem_dump_handler, rec);
/* register a message port for recovery elections */
ctdb_set_message_handler(ctdb, CTDB_SRVID_RECOVERY, election_handler, rec);

View File

@ -48,6 +48,7 @@ static const struct {
{ "DisableWhenUnhealthy", 0, offsetof(struct ctdb_tunable, disable_when_unhealthy) },
{ "ReclockPingPeriod", 60, offsetof(struct ctdb_tunable, reclock_ping_period) },
{ "NoIPFailback", 0, offsetof(struct ctdb_tunable, no_ip_failback) },
{ "VerboseMemoryNames", 0, offsetof(struct ctdb_tunable, verbose_memory_names) },
};
/*

View File

@ -1407,6 +1407,55 @@ static int control_dumpmemory(struct ctdb_context *ctdb, int argc, const char **
return 0;
}
/*
handler for memory dumps
*/
static void mem_dump_handler(struct ctdb_context *ctdb, uint64_t srvid,
TDB_DATA data, void *private_data)
{
write(1, data.dptr, data.dsize);
exit(0);
}
/*
dump memory usage on the recovery daemon
*/
static int control_rddumpmemory(struct ctdb_context *ctdb, int argc, const char **argv)
{
int ret;
TDB_DATA data;
struct rd_memdump_reply rd;
rd.pnn = ctdb_ctrl_getpnn(ctdb, TIMELIMIT(), CTDB_CURRENT_NODE);
if (rd.pnn == -1) {
DEBUG(DEBUG_ERR, ("Failed to get pnn of local node\n"));
return -1;
}
rd.srvid = getpid();
/* register a message port for receiveing the reply so that we
can receive the reply
*/
ctdb_set_message_handler(ctdb, rd.srvid, mem_dump_handler, NULL);
data.dptr = (uint8_t *)&rd;
data.dsize = sizeof(rd);
ret = ctdb_send_message(ctdb, options.pnn, CTDB_SRVID_MEM_DUMP, data);
if (ret != 0) {
DEBUG(DEBUG_ERR,("Failed to send memdump request message to %u\n", options.pnn));
return -1;
}
/* this loop will terminate when we have received the reply */
while (1) {
event_loop_once(ctdb->ev);
}
return 0;
}
/*
list all nodes in the cluster
*/
@ -1499,6 +1548,7 @@ static const struct {
{ "getdebug", control_getdebug, true, "get debug level" },
{ "attach", control_attach, true, "attach to a database", "<dbname>" },
{ "dumpmemory", control_dumpmemory, true, "dump memory map to stdout" },
{ "rddumpmemory", control_rddumpmemory, true, "dump memory map from the recovery daemon to stdout" },
{ "getpid", control_getpid, true, "get ctdbd process ID" },
{ "disable", control_disable, true, "disable a nodes public IP" },
{ "enable", control_enable, true, "enable a nodes public IP" },