2008-01-15 00:42:12 +03:00
/*
monitoring links to all other nodes to detect dead nodes
Copyright ( C ) Ronnie Sahlberg 2007
This program is free software ; you can redistribute it and / or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation ; either version 3 of the License , or
( at your option ) any later version .
This program is distributed in the hope that it will be useful ,
but WITHOUT ANY WARRANTY ; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the
GNU General Public License for more details .
You should have received a copy of the GNU General Public License
along with this program ; if not , see < http : //www.gnu.org/licenses/>.
*/
2015-10-26 08:50:46 +03:00
# include "replace.h"
2008-01-15 00:42:12 +03:00
# include "system/filesys.h"
2015-10-26 08:50:46 +03:00
# include "system/network.h"
# include "system/time.h"
2008-01-15 00:42:12 +03:00
# include "system/wait.h"
2015-10-26 08:50:46 +03:00
# include <talloc.h>
# include <tevent.h>
# include "lib/util/debug.h"
# include "lib/util/samba_util.h"
# include "ctdb_private.h"
2008-01-15 00:42:12 +03:00
2015-10-23 06:17:34 +03:00
# include "common/common.h"
2015-11-11 07:41:10 +03:00
# include "common/logging.h"
2015-10-23 06:17:34 +03:00
2008-01-15 00:42:12 +03:00
/*
see if any nodes are dead
*/
2015-10-26 08:50:09 +03:00
static void ctdb_check_for_dead_nodes ( struct tevent_context * ev ,
struct tevent_timer * te ,
2008-01-15 00:42:12 +03:00
struct timeval t , void * private_data )
{
struct ctdb_context * ctdb = talloc_get_type ( private_data , struct ctdb_context ) ;
int i ;
/* send a keepalive to all other nodes, unless */
for ( i = 0 ; i < ctdb - > num_nodes ; i + + ) {
struct ctdb_node * node = ctdb - > nodes [ i ] ;
2009-06-01 08:18:34 +04:00
if ( node - > flags & NODE_FLAGS_DELETED ) {
continue ;
}
2008-01-15 00:42:12 +03:00
if ( node - > pnn = = ctdb - > pnn ) {
continue ;
}
if ( node - > flags & NODE_FLAGS_DISCONNECTED ) {
/* it might have come alive again */
if ( node - > rx_cnt ! = 0 ) {
ctdb_node_connected ( node ) ;
}
continue ;
}
if ( node - > rx_cnt = = 0 ) {
node - > dead_count + + ;
} else {
node - > dead_count = 0 ;
}
node - > rx_cnt = 0 ;
if ( node - > dead_count > = ctdb - > tunable . keepalive_limit ) {
2008-02-04 12:07:15 +03:00
DEBUG ( DEBUG_NOTICE , ( " dead count reached for node %u \n " , node - > pnn ) ) ;
2008-01-15 00:42:12 +03:00
ctdb_node_dead ( node ) ;
ctdb_send_keepalive ( ctdb , node - > pnn ) ;
/* maybe tell the transport layer to kill the
sockets as well ?
*/
continue ;
}
2008-11-20 05:35:08 +03:00
DEBUG ( DEBUG_DEBUG , ( " sending keepalive to %u \n " , node - > pnn ) ) ;
ctdb_send_keepalive ( ctdb , node - > pnn ) ;
2008-01-15 00:42:12 +03:00
node - > tx_cnt = 0 ;
}
2015-10-26 08:50:09 +03:00
tevent_add_timer ( ctdb - > ev , ctdb - > keepalive_ctx ,
timeval_current_ofs ( ctdb - > tunable . keepalive_interval , 0 ) ,
ctdb_check_for_dead_nodes , ctdb ) ;
2008-01-15 00:42:12 +03:00
}
void ctdb_start_keepalive ( struct ctdb_context * ctdb )
{
2015-10-26 08:50:09 +03:00
struct tevent_timer * te ;
2008-01-15 00:42:12 +03:00
ctdb - > keepalive_ctx = talloc_new ( ctdb ) ;
CTDB_NO_MEMORY_FATAL ( ctdb , ctdb - > keepalive_ctx ) ;
2015-10-26 08:50:09 +03:00
te = tevent_add_timer ( ctdb - > ev , ctdb - > keepalive_ctx ,
timeval_current_ofs ( ctdb - > tunable . keepalive_interval , 0 ) ,
ctdb_check_for_dead_nodes , ctdb ) ;
2008-01-15 00:42:12 +03:00
CTDB_NO_MEMORY_FATAL ( ctdb , te ) ;
2008-02-04 12:07:15 +03:00
DEBUG ( DEBUG_NOTICE , ( " Keepalive monitoring has been started \n " ) ) ;
2008-01-15 00:42:12 +03:00
}
void ctdb_stop_keepalive ( struct ctdb_context * ctdb )
{
talloc_free ( ctdb - > keepalive_ctx ) ;
ctdb - > keepalive_ctx = NULL ;
}