mirror of
https://github.com/samba-team/samba.git
synced 2024-12-22 13:34:15 +03:00
add improvements to tracking memory usage in ctdbd adn the recovery daemon
and a ctdb command to pull the talloc memory map from a recovery daemon ctdb rddumpmemory (This used to be ctdb commit d23950be7406cf288f48b660c0f57a9b8d7bdd05)
This commit is contained in:
parent
78081de82a
commit
27a7f854f5
@ -277,7 +277,7 @@ int ctdb_queue_send(struct ctdb_queue *queue, uint8_t *data, uint32_t length)
|
||||
|
||||
DLIST_ADD_END(queue->out_queue, pkt, struct ctdb_queue_pkt *);
|
||||
|
||||
if (LogLevel > DEBUG_NOTICE) {
|
||||
if (queue->ctdb->tunable.verbose_memory_names != 0) {
|
||||
struct ctdb_req_header *hdr = (struct ctdb_req_header *)pkt->data;
|
||||
switch (hdr->operation) {
|
||||
case CTDB_REQ_CONTROL: {
|
||||
|
@ -90,6 +90,12 @@ struct ctdb_call_info {
|
||||
*/
|
||||
#define CTDB_SRVID_VACUUM_FETCH 0xF700000000000000LL
|
||||
|
||||
/*
|
||||
a message to tell the recovery daemon to write a talloc memdump
|
||||
to the log
|
||||
*/
|
||||
#define CTDB_SRVID_MEM_DUMP 0xF800000000000000LL
|
||||
|
||||
|
||||
/* used on the domain socket, send a pdu to the local daemon */
|
||||
#define CTDB_CURRENT_NODE 0xF0000001
|
||||
|
@ -38,6 +38,14 @@
|
||||
#define CTDB_FETCH_FUNC 0xFF000002
|
||||
|
||||
|
||||
/*
|
||||
recovery daemon memdump reply address
|
||||
*/
|
||||
struct rd_memdump_reply {
|
||||
uint32_t pnn;
|
||||
uint64_t srvid;
|
||||
};
|
||||
|
||||
/*
|
||||
a tcp connection description
|
||||
*/
|
||||
@ -93,6 +101,7 @@ struct ctdb_tunable {
|
||||
uint32_t disable_when_unhealthy;
|
||||
uint32_t reclock_ping_period;
|
||||
uint32_t no_ip_failback;
|
||||
uint32_t verbose_memory_names;
|
||||
};
|
||||
|
||||
/*
|
||||
@ -1275,5 +1284,6 @@ void ctdb_load_nodes_file(struct ctdb_context *ctdb);
|
||||
|
||||
int ctdb_control_reload_nodes_file(struct ctdb_context *ctdb, uint32_t opcode);
|
||||
|
||||
int32_t ctdb_dump_memory(struct ctdb_context *ctdb, TDB_DATA *outdata);
|
||||
|
||||
#endif
|
||||
|
@ -39,7 +39,7 @@ struct ctdb_control_state {
|
||||
/*
|
||||
dump talloc memory hierarchy, returning it as a blob to the client
|
||||
*/
|
||||
static int32_t ctdb_dump_memory(struct ctdb_context *ctdb, TDB_DATA *outdata)
|
||||
int32_t ctdb_dump_memory(struct ctdb_context *ctdb, TDB_DATA *outdata)
|
||||
{
|
||||
/* dump to a file, then send the file as a blob */
|
||||
FILE *f;
|
||||
|
@ -27,6 +27,7 @@
|
||||
#include "system/wait.h"
|
||||
#include "../include/ctdb.h"
|
||||
#include "../include/ctdb_private.h"
|
||||
#include <sys/socket.h>
|
||||
|
||||
static void daemon_incoming_packet(void *, struct ctdb_req_header *);
|
||||
|
||||
@ -540,6 +541,19 @@ static void ctdb_accept_client(struct event_context *ev, struct fd_event *fde,
|
||||
set_close_on_exec(fd);
|
||||
|
||||
client = talloc_zero(ctdb, struct ctdb_client);
|
||||
if (ctdb->tunable.verbose_memory_names != 0) {
|
||||
struct ucred cr;
|
||||
socklen_t crl = sizeof(struct ucred);
|
||||
if (getsockopt(fd, SOL_SOCKET, SO_PEERCRED, &cr, &crl) == 0) {
|
||||
#if 0
|
||||
/* This causes client to later become NULL ? */
|
||||
talloc_set_name(client, "struct ctdb_client: pid:%u", (unsigned)cr.pid);
|
||||
#else
|
||||
DEBUG(DEBUG_ERR, ("Client with pid:%u connected to struct ctdb_client %p\n", (unsigned)cr.pid, client));
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
client->ctdb = ctdb;
|
||||
client->fd = fd;
|
||||
client->client_id = ctdb_reqid_new(ctdb, client);
|
||||
|
@ -1599,6 +1599,47 @@ static void election_send_request(struct event_context *ev, struct timed_event *
|
||||
rec->send_election_te = NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
handler for memory dumps
|
||||
*/
|
||||
static void mem_dump_handler(struct ctdb_context *ctdb, uint64_t srvid,
|
||||
TDB_DATA data, void *private_data)
|
||||
{
|
||||
TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
|
||||
TDB_DATA *dump;
|
||||
int ret;
|
||||
struct rd_memdump_reply *rd;
|
||||
|
||||
if (data.dsize != sizeof(struct rd_memdump_reply)) {
|
||||
DEBUG(DEBUG_ERR, (__location__ " Wrong size of return address.\n"));
|
||||
return;
|
||||
}
|
||||
rd = (struct rd_memdump_reply *)data.dptr;
|
||||
|
||||
dump = talloc_zero(tmp_ctx, TDB_DATA);
|
||||
if (dump == NULL) {
|
||||
DEBUG(DEBUG_ERR, (__location__ " Failed to allocate memory for memdump\n"));
|
||||
talloc_free(tmp_ctx);
|
||||
return;
|
||||
}
|
||||
ret = ctdb_dump_memory(ctdb, dump);
|
||||
if (ret != 0) {
|
||||
DEBUG(DEBUG_ERR, (__location__ " ctdb_dump_memory() failed\n"));
|
||||
talloc_free(tmp_ctx);
|
||||
return;
|
||||
}
|
||||
|
||||
DEBUG(DEBUG_ERR, ("recovery master memory dump\n"));
|
||||
|
||||
ret = ctdb_send_message(ctdb, rd->pnn, rd->srvid, *dump);
|
||||
if (ret != 0) {
|
||||
DEBUG(DEBUG_ERR,("Failed to send rd memdump reply message\n"));
|
||||
return;
|
||||
}
|
||||
|
||||
talloc_free(tmp_ctx);
|
||||
}
|
||||
|
||||
/*
|
||||
handler for recovery master elections
|
||||
*/
|
||||
@ -2122,6 +2163,9 @@ static void monitor_cluster(struct ctdb_context *ctdb)
|
||||
rec->rec_file_fd = -1;
|
||||
ctdb_recoverd_get_pnn_lock(rec);
|
||||
|
||||
/* register a message port for sending memory dumps */
|
||||
ctdb_set_message_handler(ctdb, CTDB_SRVID_MEM_DUMP, mem_dump_handler, rec);
|
||||
|
||||
/* register a message port for recovery elections */
|
||||
ctdb_set_message_handler(ctdb, CTDB_SRVID_RECOVERY, election_handler, rec);
|
||||
|
||||
|
@ -48,6 +48,7 @@ static const struct {
|
||||
{ "DisableWhenUnhealthy", 0, offsetof(struct ctdb_tunable, disable_when_unhealthy) },
|
||||
{ "ReclockPingPeriod", 60, offsetof(struct ctdb_tunable, reclock_ping_period) },
|
||||
{ "NoIPFailback", 0, offsetof(struct ctdb_tunable, no_ip_failback) },
|
||||
{ "VerboseMemoryNames", 0, offsetof(struct ctdb_tunable, verbose_memory_names) },
|
||||
};
|
||||
|
||||
/*
|
||||
|
@ -1407,6 +1407,55 @@ static int control_dumpmemory(struct ctdb_context *ctdb, int argc, const char **
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
handler for memory dumps
|
||||
*/
|
||||
static void mem_dump_handler(struct ctdb_context *ctdb, uint64_t srvid,
|
||||
TDB_DATA data, void *private_data)
|
||||
{
|
||||
write(1, data.dptr, data.dsize);
|
||||
exit(0);
|
||||
}
|
||||
|
||||
/*
|
||||
dump memory usage on the recovery daemon
|
||||
*/
|
||||
static int control_rddumpmemory(struct ctdb_context *ctdb, int argc, const char **argv)
|
||||
{
|
||||
int ret;
|
||||
TDB_DATA data;
|
||||
struct rd_memdump_reply rd;
|
||||
|
||||
rd.pnn = ctdb_ctrl_getpnn(ctdb, TIMELIMIT(), CTDB_CURRENT_NODE);
|
||||
if (rd.pnn == -1) {
|
||||
DEBUG(DEBUG_ERR, ("Failed to get pnn of local node\n"));
|
||||
return -1;
|
||||
}
|
||||
rd.srvid = getpid();
|
||||
|
||||
/* register a message port for receiveing the reply so that we
|
||||
can receive the reply
|
||||
*/
|
||||
ctdb_set_message_handler(ctdb, rd.srvid, mem_dump_handler, NULL);
|
||||
|
||||
|
||||
data.dptr = (uint8_t *)&rd;
|
||||
data.dsize = sizeof(rd);
|
||||
|
||||
ret = ctdb_send_message(ctdb, options.pnn, CTDB_SRVID_MEM_DUMP, data);
|
||||
if (ret != 0) {
|
||||
DEBUG(DEBUG_ERR,("Failed to send memdump request message to %u\n", options.pnn));
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* this loop will terminate when we have received the reply */
|
||||
while (1) {
|
||||
event_loop_once(ctdb->ev);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
list all nodes in the cluster
|
||||
*/
|
||||
@ -1499,6 +1548,7 @@ static const struct {
|
||||
{ "getdebug", control_getdebug, true, "get debug level" },
|
||||
{ "attach", control_attach, true, "attach to a database", "<dbname>" },
|
||||
{ "dumpmemory", control_dumpmemory, true, "dump memory map to stdout" },
|
||||
{ "rddumpmemory", control_rddumpmemory, true, "dump memory map from the recovery daemon to stdout" },
|
||||
{ "getpid", control_getpid, true, "get ctdbd process ID" },
|
||||
{ "disable", control_disable, true, "disable a nodes public IP" },
|
||||
{ "enable", control_enable, true, "enable a nodes public IP" },
|
||||
|
Loading…
Reference in New Issue
Block a user