mirror of
https://github.com/samba-team/samba.git
synced 2025-01-13 13:18:06 +03:00
e6170b5389
This is used to mark nodes as being DELETED internally in ctdb so that nodes are not renumbered if / when they are removed from the nodes file. This is used to be able to do "ctdb reloadnodes" at runtime without causing nodes to be renumbered. To do this, instead of deleting a node from the nodes file, just comment it out like 1.0.0.1 #1.0.0.2 1.0.0.3 After removing 1.0.0.2 from the cluster, the remaining nodes retain their pnn's from prior to the deletion, namely 0 and 2 Any line in the nodes file that is commented out represents a DELETED pnn (This used to be ctdb commit 6a5e4fd7fa391206b463bb4e976502f3ac5bd343)
109 lines
2.8 KiB
C
109 lines
2.8 KiB
C
/*
|
|
monitoring links to all other nodes to detect dead nodes
|
|
|
|
|
|
Copyright (C) Ronnie Sahlberg 2007
|
|
|
|
This program is free software; you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation; either version 3 of the License, or
|
|
(at your option) any later version.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program; if not, see <http://www.gnu.org/licenses/>.
|
|
*/
|
|
|
|
#include "includes.h"
|
|
#include "lib/events/events.h"
|
|
#include "system/filesys.h"
|
|
#include "system/wait.h"
|
|
#include "../include/ctdb_private.h"
|
|
|
|
|
|
/*
|
|
see if any nodes are dead
|
|
*/
|
|
static void ctdb_check_for_dead_nodes(struct event_context *ev, struct timed_event *te,
|
|
struct timeval t, void *private_data)
|
|
{
|
|
struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
|
|
int i;
|
|
|
|
/* send a keepalive to all other nodes, unless */
|
|
for (i=0;i<ctdb->num_nodes;i++) {
|
|
struct ctdb_node *node = ctdb->nodes[i];
|
|
|
|
if (node->flags & NODE_FLAGS_DELETED) {
|
|
continue;
|
|
}
|
|
|
|
if (node->pnn == ctdb->pnn) {
|
|
continue;
|
|
}
|
|
|
|
if (node->flags & NODE_FLAGS_DISCONNECTED) {
|
|
/* it might have come alive again */
|
|
if (node->rx_cnt != 0) {
|
|
ctdb_node_connected(node);
|
|
}
|
|
continue;
|
|
}
|
|
|
|
|
|
if (node->rx_cnt == 0) {
|
|
node->dead_count++;
|
|
} else {
|
|
node->dead_count = 0;
|
|
}
|
|
|
|
node->rx_cnt = 0;
|
|
|
|
if (node->dead_count >= ctdb->tunable.keepalive_limit) {
|
|
DEBUG(DEBUG_NOTICE,("dead count reached for node %u\n", node->pnn));
|
|
ctdb_node_dead(node);
|
|
ctdb_send_keepalive(ctdb, node->pnn);
|
|
/* maybe tell the transport layer to kill the
|
|
sockets as well?
|
|
*/
|
|
continue;
|
|
}
|
|
|
|
DEBUG(DEBUG_DEBUG,("sending keepalive to %u\n", node->pnn));
|
|
ctdb_send_keepalive(ctdb, node->pnn);
|
|
|
|
node->tx_cnt = 0;
|
|
}
|
|
|
|
event_add_timed(ctdb->ev, ctdb->keepalive_ctx,
|
|
timeval_current_ofs(ctdb->tunable.keepalive_interval, 0),
|
|
ctdb_check_for_dead_nodes, ctdb);
|
|
}
|
|
|
|
|
|
void ctdb_start_keepalive(struct ctdb_context *ctdb)
|
|
{
|
|
struct timed_event *te;
|
|
|
|
ctdb->keepalive_ctx = talloc_new(ctdb);
|
|
CTDB_NO_MEMORY_FATAL(ctdb, ctdb->keepalive_ctx);
|
|
|
|
te = event_add_timed(ctdb->ev, ctdb->keepalive_ctx,
|
|
timeval_current_ofs(ctdb->tunable.keepalive_interval, 0),
|
|
ctdb_check_for_dead_nodes, ctdb);
|
|
CTDB_NO_MEMORY_FATAL(ctdb, te);
|
|
|
|
DEBUG(DEBUG_NOTICE,("Keepalive monitoring has been started\n"));
|
|
}
|
|
|
|
void ctdb_stop_keepalive(struct ctdb_context *ctdb)
|
|
{
|
|
talloc_free(ctdb->keepalive_ctx);
|
|
ctdb->keepalive_ctx = NULL;
|
|
}
|
|
|