mirror of
https://github.com/samba-team/samba.git
synced 2025-01-29 21:47:30 +03:00
a2a5904f66
had been completely idle during that interval. If we had been sending other packets such as Messages, Calls or Controls there wouldnt be any need for an explicit keepalive and thus we didnt send one. This does make it somewhat awkward when analyzing traces since it is non-intuitive when keepalives are sent and when they are not sent. Change the keepalive logic to always send a keepalive regardless of whether the link is idle or not. (This used to be ctdb commit 7a18f33ec7512100dd067c65f0470889ff8fd591)
104 lines
2.7 KiB
C
104 lines
2.7 KiB
C
/*
|
|
monitoring links to all other nodes to detect dead nodes
|
|
|
|
|
|
Copyright (C) Ronnie Sahlberg 2007
|
|
|
|
This program is free software; you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation; either version 3 of the License, or
|
|
(at your option) any later version.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program; if not, see <http://www.gnu.org/licenses/>.
|
|
*/
|
|
|
|
#include "includes.h"
|
|
#include "lib/events/events.h"
|
|
#include "system/filesys.h"
|
|
#include "system/wait.h"
|
|
#include "../include/ctdb_private.h"
|
|
|
|
|
|
/*
|
|
see if any nodes are dead
|
|
*/
|
|
static void ctdb_check_for_dead_nodes(struct event_context *ev, struct timed_event *te,
|
|
struct timeval t, void *private_data)
|
|
{
|
|
struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
|
|
int i;
|
|
|
|
/* send a keepalive to all other nodes, unless */
|
|
for (i=0;i<ctdb->num_nodes;i++) {
|
|
struct ctdb_node *node = ctdb->nodes[i];
|
|
if (node->pnn == ctdb->pnn) {
|
|
continue;
|
|
}
|
|
|
|
if (node->flags & NODE_FLAGS_DISCONNECTED) {
|
|
/* it might have come alive again */
|
|
if (node->rx_cnt != 0) {
|
|
ctdb_node_connected(node);
|
|
}
|
|
continue;
|
|
}
|
|
|
|
|
|
if (node->rx_cnt == 0) {
|
|
node->dead_count++;
|
|
} else {
|
|
node->dead_count = 0;
|
|
}
|
|
|
|
node->rx_cnt = 0;
|
|
|
|
if (node->dead_count >= ctdb->tunable.keepalive_limit) {
|
|
DEBUG(DEBUG_NOTICE,("dead count reached for node %u\n", node->pnn));
|
|
ctdb_node_dead(node);
|
|
ctdb_send_keepalive(ctdb, node->pnn);
|
|
/* maybe tell the transport layer to kill the
|
|
sockets as well?
|
|
*/
|
|
continue;
|
|
}
|
|
|
|
DEBUG(DEBUG_DEBUG,("sending keepalive to %u\n", node->pnn));
|
|
ctdb_send_keepalive(ctdb, node->pnn);
|
|
|
|
node->tx_cnt = 0;
|
|
}
|
|
|
|
event_add_timed(ctdb->ev, ctdb->keepalive_ctx,
|
|
timeval_current_ofs(ctdb->tunable.keepalive_interval, 0),
|
|
ctdb_check_for_dead_nodes, ctdb);
|
|
}
|
|
|
|
|
|
void ctdb_start_keepalive(struct ctdb_context *ctdb)
|
|
{
|
|
struct timed_event *te;
|
|
|
|
ctdb->keepalive_ctx = talloc_new(ctdb);
|
|
CTDB_NO_MEMORY_FATAL(ctdb, ctdb->keepalive_ctx);
|
|
|
|
te = event_add_timed(ctdb->ev, ctdb->keepalive_ctx,
|
|
timeval_current_ofs(ctdb->tunable.keepalive_interval, 0),
|
|
ctdb_check_for_dead_nodes, ctdb);
|
|
CTDB_NO_MEMORY_FATAL(ctdb, te);
|
|
|
|
DEBUG(DEBUG_NOTICE,("Keepalive monitoring has been started\n"));
|
|
}
|
|
|
|
void ctdb_stop_keepalive(struct ctdb_context *ctdb)
|
|
{
|
|
talloc_free(ctdb->keepalive_ctx);
|
|
ctdb->keepalive_ctx = NULL;
|
|
}
|
|
|