mirror of
https://github.com/samba-team/samba.git
synced 2025-01-10 01:18:15 +03:00
first step in health monitoring of cluster nodes. When not healthy they will be marked disabled
(This used to be ctdb commit d3dbd9fc4db21632075b56fc52cf95435c63374a)
This commit is contained in:
parent
ee747b5bd6
commit
ac55bc4166
@ -222,14 +222,16 @@ uint32_t ctdb_get_vnn(struct ctdb_context *ctdb)
|
||||
}
|
||||
|
||||
/*
|
||||
return the number of connected nodes
|
||||
return the number of enabled nodes
|
||||
*/
|
||||
uint32_t ctdb_get_num_connected_nodes(struct ctdb_context *ctdb)
|
||||
uint32_t ctdb_get_num_enabled_nodes(struct ctdb_context *ctdb)
|
||||
{
|
||||
int i;
|
||||
uint32_t count=0;
|
||||
for (i=0;i<ctdb->vnn_map->size;i++) {
|
||||
if (ctdb->nodes[ctdb->vnn_map->map[i]]->flags & NODE_FLAGS_CONNECTED) {
|
||||
struct ctdb_node *node = ctdb->nodes[ctdb->vnn_map->map[i]];
|
||||
if ((node->flags & NODE_FLAGS_CONNECTED) &&
|
||||
!(node->flags & NODE_FLAGS_DISABLED)) {
|
||||
count++;
|
||||
}
|
||||
}
|
||||
|
@ -1364,7 +1364,7 @@ struct ctdb_db_context *ctdb_attach(struct ctdb_context *ctdb, const char *name)
|
||||
ctdb_db->db_id = *(uint32_t *)data.dptr;
|
||||
talloc_free(data.dptr);
|
||||
|
||||
ret = ctdb_ctrl_getdbpath(ctdb, timeval_current_ofs(1, 0), CTDB_CURRENT_NODE, ctdb_db->db_id, ctdb_db, &ctdb_db->db_path);
|
||||
ret = ctdb_ctrl_getdbpath(ctdb, timeval_current_ofs(2, 0), CTDB_CURRENT_NODE, ctdb_db->db_id, ctdb_db, &ctdb_db->db_path);
|
||||
if (ret != 0) {
|
||||
DEBUG(0,("Failed to get dbpath for database '%s'\n", name));
|
||||
talloc_free(ctdb_db);
|
||||
|
@ -697,6 +697,8 @@ again:
|
||||
"MonitorFrequency", &ctdb->tunable.monitor_frequency);
|
||||
ctdb_ctrl_get_tunable(ctdb, CONTROL_TIMEOUT(), CTDB_CURRENT_NODE,
|
||||
"ElectionTimeout", &ctdb->tunable.election_timeout);
|
||||
ctdb_ctrl_get_tunable(ctdb, CONTROL_TIMEOUT(), CTDB_CURRENT_NODE,
|
||||
"TakeoverTimeout", &ctdb->tunable.takeover_timeout);
|
||||
|
||||
vnn = ctdb_ctrl_getvnn(ctdb, CONTROL_TIMEOUT(), CTDB_CURRENT_NODE);
|
||||
if (vnn == (uint32_t)-1) {
|
||||
|
@ -372,7 +372,7 @@ int32_t ctdb_control_traverse_data(struct ctdb_context *ctdb, TDB_DATA data, TDB
|
||||
|
||||
if (key.dsize == 0 && data.dsize == 0) {
|
||||
state->null_count++;
|
||||
if (state->null_count != ctdb_get_num_connected_nodes(ctdb)) {
|
||||
if (state->null_count != ctdb_get_num_enabled_nodes(ctdb)) {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
@ -35,6 +35,7 @@ static const struct {
|
||||
{ "RecoverTimeout", 5, offsetof(struct ctdb_tunable, recover_timeout) },
|
||||
{ "MonitorFrequency", 1, offsetof(struct ctdb_tunable, monitor_frequency) },
|
||||
{ "ElectionTimeout", 3, offsetof(struct ctdb_tunable, election_timeout) },
|
||||
{ "TakeoverTimeout", 5, offsetof(struct ctdb_tunable, takeover_timeout) },
|
||||
};
|
||||
|
||||
/*
|
||||
|
@ -21,8 +21,10 @@ case $cmd in
|
||||
service smb stop > /dev/null 2>&1
|
||||
service winbind stop > /dev/null 2>&1
|
||||
|
||||
# start Samba service
|
||||
service smb start
|
||||
# start Samba service. Start it reniced, as under very heavy load
|
||||
# the number of smbd processes will mean that it leaves few cycles for
|
||||
# anything else
|
||||
nice service smb start
|
||||
service winbind start
|
||||
|
||||
# wait for the Samba tcp ports to become available
|
||||
|
@ -50,6 +50,7 @@ struct ctdb_tunable {
|
||||
uint32_t recover_timeout;
|
||||
uint32_t monitor_frequency;
|
||||
uint32_t election_timeout;
|
||||
uint32_t takeover_timeout;
|
||||
};
|
||||
|
||||
/*
|
||||
@ -109,6 +110,7 @@ struct ctdb_node {
|
||||
void *private_data; /* private to transport */
|
||||
uint32_t vnn;
|
||||
#define NODE_FLAGS_CONNECTED 0x00000001
|
||||
#define NODE_FLAGS_DISABLED 0x00000002
|
||||
uint32_t flags;
|
||||
|
||||
/* used by the dead node monitoring */
|
||||
@ -905,7 +907,7 @@ int32_t ctdb_control_thaw(struct ctdb_context *ctdb);
|
||||
|
||||
int ctdb_start_recoverd(struct ctdb_context *ctdb);
|
||||
|
||||
uint32_t ctdb_get_num_connected_nodes(struct ctdb_context *ctdb);
|
||||
uint32_t ctdb_get_num_enabled_nodes(struct ctdb_context *ctdb);
|
||||
|
||||
int ctdb_start_monitoring(struct ctdb_context *ctdb);
|
||||
void ctdb_send_keepalive(struct ctdb_context *ctdb, uint32_t destnode);
|
||||
|
@ -27,7 +27,7 @@
|
||||
#include "../include/ctdb_private.h"
|
||||
|
||||
|
||||
#define TAKEOVER_TIMEOUT() timeval_current_ofs(5,0)
|
||||
#define TAKEOVER_TIMEOUT() timeval_current_ofs(ctdb->tunable.takeover_timeout,0)
|
||||
|
||||
#define CTDB_ARP_INTERVAL 1
|
||||
#define CTDB_ARP_REPEAT 3
|
||||
@ -403,7 +403,8 @@ int ctdb_takeover_run(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap)
|
||||
|
||||
/* work out which node will look after each public IP */
|
||||
for (i=0;i<nodemap->num;i++) {
|
||||
if (nodemap->nodes[i].flags & NODE_FLAGS_CONNECTED) {
|
||||
if ((nodemap->nodes[i].flags & NODE_FLAGS_CONNECTED) &&
|
||||
!(nodemap->nodes[i].flags & NODE_FLAGS_DISABLED)) {
|
||||
ctdb->nodes[i]->takeover_vnn = nodemap->nodes[i].vnn;
|
||||
} else {
|
||||
/* assign this dead nodes IP to the next higher node */
|
||||
@ -411,6 +412,7 @@ int ctdb_takeover_run(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap)
|
||||
j != i;
|
||||
j=(j+1)%nodemap->num) {
|
||||
if ((nodemap->nodes[j].flags & NODE_FLAGS_CONNECTED) &&
|
||||
!(nodemap->nodes[j].flags & NODE_FLAGS_DISABLED) &&
|
||||
ctdb_same_subnet(ctdb->nodes[j]->public_address,
|
||||
ctdb->nodes[i]->public_address,
|
||||
ctdb->nodes[j]->public_netmask_bits)) {
|
||||
|
@ -383,7 +383,7 @@ static int control_shutdown(struct ctdb_context *ctdb, int argc, const char **ar
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = ctdb_ctrl_shutdown(ctdb, timeval_current_ofs(1, 0), options.vnn);
|
||||
ret = ctdb_ctrl_shutdown(ctdb, TIMELIMIT(), options.vnn);
|
||||
if (ret != 0) {
|
||||
printf("Unable to shutdown node %u\n", options.vnn);
|
||||
return ret;
|
||||
|
Loading…
Reference in New Issue
Block a user