2007-05-18 14:06:29 +04:00
/*
monitoring links to all other nodes to detect dead nodes
Copyright ( C ) Ronnie Sahlberg 2007
This library is free software ; you can redistribute it and / or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation ; either
version 2 of the License , or ( at your option ) any later version .
This library is distributed in the hope that it will be useful ,
but WITHOUT ANY WARRANTY ; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the GNU
Lesser General Public License for more details .
You should have received a copy of the GNU Lesser General Public
License along with this library ; if not , write to the Free Software
Foundation , Inc . , 59 Temple Place , Suite 330 , Boston , MA 02111 - 1307 USA
*/
# include "includes.h"
# include "lib/events/events.h"
# include "system/filesys.h"
# include "system/wait.h"
# include "../include/ctdb_private.h"
/*
2007-05-18 17:23:36 +04:00
see if any nodes are dead
*/
2007-05-18 14:06:29 +04:00
static void ctdb_check_for_dead_nodes ( struct event_context * ev , struct timed_event * te ,
struct timeval t , void * private_data )
{
struct ctdb_context * ctdb = talloc_get_type ( private_data , struct ctdb_context ) ;
int i ;
/* send a keepalive to all other nodes, unless */
for ( i = 0 ; i < ctdb - > num_nodes ; i + + ) {
2007-05-18 17:23:36 +04:00
struct ctdb_node * node = ctdb - > nodes [ i ] ;
if ( node - > vnn = = ctdb - > vnn ) {
2007-05-18 14:06:29 +04:00
continue ;
}
2007-05-18 17:23:36 +04:00
/* it might have come alive again */
if ( ! ( node - > flags & NODE_FLAGS_CONNECTED ) & & node - > rx_cnt ! = 0 ) {
2007-05-19 11:21:58 +04:00
ctdb_node_connected ( node ) ;
continue ;
2007-05-18 14:06:29 +04:00
}
2007-05-18 17:23:36 +04:00
if ( node - > rx_cnt = = 0 ) {
node - > dead_count + + ;
2007-05-18 14:06:29 +04:00
} else {
2007-05-18 17:23:36 +04:00
node - > dead_count = 0 ;
2007-05-18 14:06:29 +04:00
}
2007-05-18 17:23:36 +04:00
node - > rx_cnt = 0 ;
if ( node - > dead_count > = CTDB_MONITORING_DEAD_COUNT ) {
2007-05-19 10:59:10 +04:00
ctdb_node_dead ( node ) ;
2007-05-18 17:23:36 +04:00
/* maybe tell the transport layer to kill the
sockets as well ?
2007-05-18 14:06:29 +04:00
*/
continue ;
}
2007-05-19 04:20:19 +04:00
if ( node - > tx_cnt = = 0 ) {
ctdb_send_keepalive ( ctdb , node - > vnn ) ;
}
2007-05-18 14:06:29 +04:00
2007-05-19 04:20:19 +04:00
node - > tx_cnt = 0 ;
2007-05-18 14:06:29 +04:00
}
event_add_timed ( ctdb - > ev , ctdb ,
timeval_current_ofs ( CTDB_MONITORING_TIMEOUT , 0 ) ,
ctdb_check_for_dead_nodes , ctdb ) ;
}
2007-05-18 17:23:36 +04:00
/*
start watching for nodes that might be dead
*/
2007-05-18 14:06:29 +04:00
int ctdb_start_monitoring ( struct ctdb_context * ctdb )
{
event_add_timed ( ctdb - > ev , ctdb ,
timeval_current_ofs ( CTDB_MONITORING_TIMEOUT , 0 ) ,
ctdb_check_for_dead_nodes , ctdb ) ;
return 0 ;
}