2007-05-04 08:30:18 +10:00
/*
ctdb recovery daemon
Copyright ( C ) Ronnie Sahlberg 2007
2007-05-31 13:50:53 +10:00
This program is free software ; you can redistribute it and / or modify
it under the terms of the GNU General Public License as published by
2007-07-10 15:29:31 +10:00
the Free Software Foundation ; either version 3 of the License , or
2007-05-31 13:50:53 +10:00
( at your option ) any later version .
This program is distributed in the hope that it will be useful ,
2007-05-04 08:30:18 +10:00
but WITHOUT ANY WARRANTY ; without even the implied warranty of
2007-05-31 13:50:53 +10:00
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the
GNU General Public License for more details .
You should have received a copy of the GNU General Public License
2007-07-10 15:29:31 +10:00
along with this program ; if not , see < http : //www.gnu.org/licenses/>.
2007-05-04 08:30:18 +10:00
*/
2015-10-26 16:50:46 +11:00
# include "replace.h"
2007-05-04 08:30:18 +10:00
# include "system/filesys.h"
2007-05-10 14:06:48 +10:00
# include "system/time.h"
2007-09-14 10:16:36 +10:00
# include "system/network.h"
2007-10-22 12:34:08 +10:00
# include "system/wait.h"
2015-10-26 16:50:46 +11:00
# include <popt.h>
# include <talloc.h>
# include <tevent.h>
# include <tdb.h>
2014-08-15 15:46:33 +10:00
# include "lib/tdb_wrap/tdb_wrap.h"
2014-08-15 16:18:05 +10:00
# include "lib/util/dlinklist.h"
2015-10-26 16:50:46 +11:00
# include "lib/util/debug.h"
# include "lib/util/samba_util.h"
2016-11-29 12:55:06 +11:00
# include "lib/util/sys_rw.h"
2015-09-23 16:10:59 -07:00
# include "lib/util/util_process.h"
2015-10-26 16:50:46 +11:00
# include "ctdb_private.h"
# include "ctdb_client.h"
2020-03-16 16:07:26 +11:00
# include "protocol/protocol_basic.h"
2018-06-28 20:15:37 +10:00
# include "common/system_socket.h"
2015-10-23 14:17:34 +11:00
# include "common/common.h"
2015-11-11 15:41:10 +11:00
# include "common/logging.h"
2007-05-04 08:30:18 +10:00
2018-08-21 13:41:22 +10:00
# include "server/ctdb_config.h"
2016-02-17 20:20:03 +11:00
# include "ctdb_cluster_mutex.h"
2007-06-07 16:34:33 +10:00
2013-08-16 20:02:34 +10:00
/* List of SRVID requests that need to be processed */
struct srvid_list {
struct srvid_list * next , * prev ;
2015-10-29 14:32:49 +11:00
struct ctdb_srvid_message * request ;
2013-08-16 20:02:34 +10:00
} ;
struct srvid_requests {
struct srvid_list * requests ;
2009-07-02 13:00:26 +10:00
} ;
2013-08-16 20:02:34 +10:00
static void srvid_request_reply ( struct ctdb_context * ctdb ,
2015-10-29 14:32:49 +11:00
struct ctdb_srvid_message * request ,
2013-08-16 20:02:34 +10:00
TDB_DATA result )
{
/* Someone that sent srvid==0 does not want a reply */
if ( request - > srvid = = 0 ) {
talloc_free ( request ) ;
return ;
}
if ( ctdb_client_send_message ( ctdb , request - > pnn , request - > srvid ,
result ) = = 0 ) {
DEBUG ( DEBUG_INFO , ( " Sent SRVID reply to %u:%llu \n " ,
( unsigned ) request - > pnn ,
( unsigned long long ) request - > srvid ) ) ;
} else {
DEBUG ( DEBUG_ERR , ( " Failed to send SRVID reply to %u:%llu \n " ,
( unsigned ) request - > pnn ,
( unsigned long long ) request - > srvid ) ) ;
}
talloc_free ( request ) ;
}
static void srvid_requests_reply ( struct ctdb_context * ctdb ,
struct srvid_requests * * requests ,
TDB_DATA result )
{
struct srvid_list * r ;
2016-05-03 15:56:09 +10:00
if ( * requests = = NULL ) {
return ;
}
2013-08-16 20:02:34 +10:00
for ( r = ( * requests ) - > requests ; r ! = NULL ; r = r - > next ) {
srvid_request_reply ( ctdb , r - > request , result ) ;
}
/* Free the list structure... */
TALLOC_FREE ( * requests ) ;
}
static void srvid_request_add ( struct ctdb_context * ctdb ,
struct srvid_requests * * requests ,
2015-10-29 14:32:49 +11:00
struct ctdb_srvid_message * request )
2013-08-16 20:02:34 +10:00
{
struct srvid_list * t ;
int32_t ret ;
TDB_DATA result ;
if ( * requests = = NULL ) {
* requests = talloc_zero ( ctdb , struct srvid_requests ) ;
if ( * requests = = NULL ) {
goto nomem ;
}
}
t = talloc_zero ( * requests , struct srvid_list ) ;
if ( t = = NULL ) {
/* If *requests was just allocated above then free it */
if ( ( * requests ) - > requests = = NULL ) {
TALLOC_FREE ( * requests ) ;
}
goto nomem ;
}
2015-10-29 14:32:49 +11:00
t - > request = ( struct ctdb_srvid_message * ) talloc_steal ( t , request ) ;
2013-08-16 20:02:34 +10:00
DLIST_ADD ( ( * requests ) - > requests , t ) ;
return ;
nomem :
/* Failed to add the request to the list. Send a fail. */
DEBUG ( DEBUG_ERR , ( __location__
" Out of memory, failed to queue SRVID request \n " ) ) ;
ret = - ENOMEM ;
result . dsize = sizeof ( ret ) ;
result . dptr = ( uint8_t * ) & ret ;
srvid_request_reply ( ctdb , request , result ) ;
}
2015-02-08 20:50:38 +11:00
/* An abstraction to allow an operation (takeover runs, recoveries,
* . . . ) to be disabled for a given timeout */
struct ctdb_op_state {
struct tevent_timer * timer ;
bool in_progress ;
const char * name ;
} ;
static struct ctdb_op_state * ctdb_op_init ( TALLOC_CTX * mem_ctx , const char * name )
{
struct ctdb_op_state * state = talloc_zero ( mem_ctx , struct ctdb_op_state ) ;
if ( state ! = NULL ) {
state - > in_progress = false ;
state - > name = name ;
}
return state ;
}
static bool ctdb_op_is_disabled ( struct ctdb_op_state * state )
{
return state - > timer ! = NULL ;
}
static bool ctdb_op_begin ( struct ctdb_op_state * state )
{
if ( ctdb_op_is_disabled ( state ) ) {
DEBUG ( DEBUG_NOTICE ,
( " Unable to begin - %s are disabled \n " , state - > name ) ) ;
return false ;
}
state - > in_progress = true ;
return true ;
}
static bool ctdb_op_end ( struct ctdb_op_state * state )
{
return state - > in_progress = false ;
}
static bool ctdb_op_is_in_progress ( struct ctdb_op_state * state )
{
return state - > in_progress ;
}
static void ctdb_op_enable ( struct ctdb_op_state * state )
{
TALLOC_FREE ( state - > timer ) ;
}
2015-10-26 16:50:09 +11:00
static void ctdb_op_timeout_handler ( struct tevent_context * ev ,
struct tevent_timer * te ,
2015-02-08 20:50:38 +11:00
struct timeval yt , void * p )
{
struct ctdb_op_state * state =
talloc_get_type ( p , struct ctdb_op_state ) ;
DEBUG ( DEBUG_NOTICE , ( " Reenabling %s after timeout \n " , state - > name ) ) ;
ctdb_op_enable ( state ) ;
}
static int ctdb_op_disable ( struct ctdb_op_state * state ,
struct tevent_context * ev ,
uint32_t timeout )
{
if ( timeout = = 0 ) {
DEBUG ( DEBUG_NOTICE , ( " Reenabling %s \n " , state - > name ) ) ;
ctdb_op_enable ( state ) ;
return 0 ;
}
if ( state - > in_progress ) {
DEBUG ( DEBUG_ERR ,
( " Unable to disable %s - in progress \n " , state - > name ) ) ;
return - EAGAIN ;
}
DEBUG ( DEBUG_NOTICE , ( " Disabling %s for %u seconds \n " ,
state - > name , timeout ) ) ;
/* Clear any old timers */
talloc_free ( state - > timer ) ;
/* Arrange for the timeout to occur */
state - > timer = tevent_add_timer ( ev , state ,
timeval_current_ofs ( timeout , 0 ) ,
ctdb_op_timeout_handler , state ) ;
if ( state - > timer = = NULL ) {
DEBUG ( DEBUG_ERR , ( __location__ " Unable to setup timer \n " ) ) ;
return - ENOMEM ;
}
return 0 ;
}
2009-09-04 02:20:39 +10:00
struct ctdb_banning_state {
2020-07-29 12:15:03 +10:00
uint32_t pnn ;
2009-09-04 02:20:39 +10:00
uint32_t count ;
struct timeval last_reported_time ;
} ;
2021-12-10 11:29:06 +11:00
struct ctdb_cluster_lock_handle ;
2018-09-03 12:39:32 +10:00
2007-06-07 15:18:55 +10:00
/*
private state of recovery daemon
*/
struct ctdb_recoverd {
struct ctdb_context * ctdb ;
2020-07-14 15:22:33 +10:00
uint32_t leader ;
2020-03-16 16:16:44 +11:00
struct tevent_timer * leader_broadcast_te ;
2021-12-17 14:42:47 +11:00
struct tevent_timer * leader_broadcast_timeout_te ;
2021-12-09 10:33:17 +11:00
uint32_t pnn ;
2009-09-04 02:20:39 +10:00
uint32_t last_culprit_node ;
2020-07-29 13:30:04 +10:00
struct ctdb_banning_state * banning_state ;
2015-10-29 17:22:48 +11:00
struct ctdb_node_map_old * nodemap ;
2007-06-07 18:37:27 +10:00
struct timeval priority_time ;
2007-09-13 14:08:18 +10:00
bool need_takeover_run ;
2007-09-14 09:49:12 +10:00
bool need_recovery ;
2007-10-05 13:28:21 +10:00
uint32_t node_flags ;
2015-10-26 16:50:09 +11:00
struct tevent_timer * send_election_te ;
2020-03-18 20:27:10 +11:00
bool election_in_progress ;
2015-10-26 16:50:09 +11:00
struct tevent_timer * election_timeout ;
2013-08-16 20:02:34 +10:00
struct srvid_requests * reallocate_requests ;
2015-02-08 20:52:12 +11:00
struct ctdb_op_state * takeover_run ;
2015-02-06 14:47:33 +11:00
struct ctdb_op_state * recovery ;
2015-10-28 19:43:48 +11:00
struct ctdb_iface_list_old * ifaces ;
2013-09-04 14:30:04 +10:00
uint32_t * force_rebalance_nodes ;
2014-07-31 15:26:03 +10:00
struct ctdb_node_capabilities * caps ;
2016-06-01 12:10:46 +10:00
bool frozen_on_inactive ;
2021-12-10 11:29:06 +11:00
struct ctdb_cluster_lock_handle * cluster_lock_handle ;
2021-09-30 21:15:56 +10:00
pid_t helper_pid ;
2007-06-07 15:18:55 +10:00
} ;
2007-05-04 08:30:18 +10:00
2007-06-04 20:22:44 +10:00
# define CONTROL_TIMEOUT() timeval_current_ofs(ctdb->tunable.recover_timeout, 0)
2007-06-06 10:25:46 +10:00
# define MONITOR_TIMEOUT() timeval_current_ofs(ctdb->tunable.recover_interval, 0)
2007-05-24 13:49:27 +10:00
2015-10-26 16:50:09 +11:00
static void ctdb_restart_recd ( struct tevent_context * ev ,
struct tevent_timer * te , struct timeval t ,
void * private_data ) ;
2008-01-05 09:35:43 +11:00
2021-12-08 19:37:39 +11:00
static bool this_node_is_leader ( struct ctdb_recoverd * rec )
{
2020-07-14 15:22:33 +10:00
return rec - > leader = = rec - > pnn ;
2021-12-08 19:37:39 +11:00
}
2021-12-14 10:57:03 +11:00
static bool this_node_can_be_leader ( struct ctdb_recoverd * rec )
{
return ( rec - > node_flags & NODE_FLAGS_INACTIVE ) = = 0 & &
( rec - > ctdb - > capabilities & CTDB_CAP_RECMASTER ) ! = 0 ;
}
2020-07-29 17:57:53 +10:00
static bool node_flags ( struct ctdb_recoverd * rec , uint32_t pnn , uint32_t * flags )
{
size_t i ;
for ( i = 0 ; i < rec - > nodemap - > num ; i + + ) {
struct ctdb_node_and_flags * node = & rec - > nodemap - > nodes [ i ] ;
if ( node - > pnn = = pnn ) {
if ( flags ! = NULL ) {
* flags = node - > flags ;
}
return true ;
}
}
return false ;
}
2007-06-07 16:34:33 +10:00
/*
ban a node for a period of time
*/
2021-12-10 10:31:56 +11:00
static void ctdb_ban_node ( struct ctdb_recoverd * rec , uint32_t pnn )
2007-06-07 16:34:33 +10:00
{
2009-09-04 02:20:39 +10:00
int ret ;
2007-06-07 16:34:33 +10:00
struct ctdb_context * ctdb = rec - > ctdb ;
2021-12-10 10:31:56 +11:00
uint32_t ban_time = ctdb - > tunable . recovery_ban_period ;
2015-10-28 18:18:33 +11:00
struct ctdb_ban_state bantime ;
2007-09-04 10:33:10 +10:00
if ( ! ctdb_validate_pnn ( ctdb , pnn ) ) {
2008-02-04 20:07:15 +11:00
DEBUG ( DEBUG_ERR , ( " Bad pnn %u in ctdb_ban_node \n " , pnn ) ) ;
2007-06-07 16:48:31 +10:00
return ;
}
2013-06-24 14:18:58 +10:00
DEBUG ( DEBUG_NOTICE , ( " Banning node %u for %u seconds \n " , pnn , ban_time ) ) ;
2009-09-04 02:20:39 +10:00
bantime . pnn = pnn ;
bantime . time = ban_time ;
2007-11-23 12:36:14 +11:00
2009-09-04 02:20:39 +10:00
ret = ctdb_ctrl_set_ban ( ctdb , CONTROL_TIMEOUT ( ) , pnn , & bantime ) ;
if ( ret ! = 0 ) {
DEBUG ( DEBUG_ERR , ( __location__ " Failed to ban node %d \n " , pnn ) ) ;
2007-12-03 15:45:53 +11:00
return ;
2007-06-07 18:37:27 +10:00
}
2007-06-07 16:34:33 +10:00
}
2007-08-27 10:31:22 +10:00
enum monitor_result { MONITOR_OK , MONITOR_RECOVERY_NEEDED , MONITOR_ELECTION_NEEDED , MONITOR_FAILED } ;
2008-06-12 16:53:36 +10:00
/*
remember the trouble maker
*/
2020-07-29 13:30:04 +10:00
static void ctdb_set_culprit_count ( struct ctdb_recoverd * rec ,
uint32_t culprit ,
uint32_t count )
2008-06-12 16:53:36 +10:00
{
2020-07-29 13:30:04 +10:00
struct ctdb_context * ctdb = talloc_get_type_abort (
rec - > ctdb , struct ctdb_context ) ;
struct ctdb_banning_state * ban_state = NULL ;
size_t len ;
bool ok ;
2009-09-04 02:20:39 +10:00
2020-07-29 13:30:04 +10:00
ok = node_flags ( rec , culprit , NULL ) ;
if ( ! ok ) {
DBG_WARNING ( " Unknown culprit node % " PRIu32 " \n " , culprit ) ;
2009-09-04 02:20:39 +10:00
return ;
}
2013-06-28 14:10:47 +10:00
/* If we are banned or stopped, do not set other nodes as culprits */
if ( rec - > node_flags & NODE_FLAGS_INACTIVE ) {
2020-07-29 13:30:04 +10:00
D_WARNING ( " This node is INACTIVE, cannot set culprit node %d \n " ,
culprit ) ;
2013-06-28 14:10:47 +10:00
return ;
}
2020-07-29 13:30:04 +10:00
if ( rec - > banning_state = = NULL ) {
len = 0 ;
} else {
size_t i ;
len = talloc_array_length ( rec - > banning_state ) ;
for ( i = 0 ; i < len ; i + + ) {
if ( rec - > banning_state [ i ] . pnn = = culprit ) {
ban_state = & rec - > banning_state [ i ] ;
break ;
}
}
2009-09-04 02:20:39 +10:00
}
2020-07-29 13:30:04 +10:00
/* Not found, so extend (or allocate new) array */
if ( ban_state = = NULL ) {
struct ctdb_banning_state * t ;
len + = 1 ;
/*
* talloc_realloc ( ) handles the corner case where
* rec - > banning_state is NULL
*/
t = talloc_realloc ( rec ,
rec - > banning_state ,
struct ctdb_banning_state ,
len ) ;
if ( t = = NULL ) {
DBG_WARNING ( " Memory allocation errror " ) ;
return ;
}
rec - > banning_state = t ;
/* New element is always at the end - initialise it... */
ban_state = & rec - > banning_state [ len - 1 ] ;
* ban_state = ( struct ctdb_banning_state ) {
. pnn = culprit ,
. count = 0 ,
} ;
} else if ( ban_state - > count > 0 & &
timeval_elapsed ( & ban_state - > last_reported_time ) >
ctdb - > tunable . recovery_grace_period ) {
/*
* Forgive old transgressions beyond the tunable time - limit
*/
2009-09-04 02:20:39 +10:00
ban_state - > count = 0 ;
2008-06-12 16:53:36 +10:00
}
2009-09-04 02:20:39 +10:00
ban_state - > count + = count ;
ban_state - > last_reported_time = timeval_current ( ) ;
rec - > last_culprit_node = culprit ;
2008-06-12 16:53:36 +10:00
}
2020-07-29 13:30:04 +10:00
static void ban_counts_reset ( struct ctdb_recoverd * rec )
{
D_NOTICE ( " Resetting ban count to 0 for all nodes \n " ) ;
TALLOC_FREE ( rec - > banning_state ) ;
}
2009-04-24 13:58:32 +10:00
/*
remember the trouble maker
*/
2009-09-04 02:20:39 +10:00
static void ctdb_set_culprit ( struct ctdb_recoverd * rec , uint32_t culprit )
2009-04-24 13:58:32 +10:00
{
2009-09-04 02:20:39 +10:00
ctdb_set_culprit_count ( rec , culprit , 1 ) ;
2009-04-24 13:58:32 +10:00
}
2008-06-12 16:53:36 +10:00
2008-05-06 15:42:59 +10:00
/*
2015-10-27 15:09:33 +11:00
Retrieve capabilities from all connected nodes
2008-05-06 15:42:59 +10:00
*/
2014-07-31 15:26:03 +10:00
static int update_capabilities ( struct ctdb_recoverd * rec ,
2015-10-29 17:22:48 +11:00
struct ctdb_node_map_old * nodemap )
2008-05-06 15:42:59 +10:00
{
2014-07-31 15:26:03 +10:00
uint32_t * capp ;
2008-05-06 15:42:59 +10:00
TALLOC_CTX * tmp_ctx ;
2014-07-31 15:26:03 +10:00
struct ctdb_node_capabilities * caps ;
struct ctdb_context * ctdb = rec - > ctdb ;
2008-05-06 15:42:59 +10:00
2014-07-31 15:26:03 +10:00
tmp_ctx = talloc_new ( rec ) ;
2008-05-06 15:42:59 +10:00
CTDB_NO_MEMORY ( ctdb , tmp_ctx ) ;
2014-07-31 15:26:03 +10:00
caps = ctdb_get_capabilities ( ctdb , tmp_ctx ,
CONTROL_TIMEOUT ( ) , nodemap ) ;
if ( caps = = NULL ) {
DEBUG ( DEBUG_ERR ,
( __location__ " Failed to get node capabilities \n " ) ) ;
2008-05-06 15:42:59 +10:00
talloc_free ( tmp_ctx ) ;
return - 1 ;
}
2021-12-08 20:25:46 +11:00
capp = ctdb_get_node_capabilities ( caps , rec - > pnn ) ;
2014-07-31 15:26:03 +10:00
if ( capp = = NULL ) {
DEBUG ( DEBUG_ERR ,
( __location__
" Capabilities don't include current node. \n " ) ) ;
talloc_free ( tmp_ctx ) ;
return - 1 ;
}
ctdb - > capabilities = * capp ;
TALLOC_FREE ( rec - > caps ) ;
rec - > caps = talloc_steal ( rec , caps ) ;
2008-05-06 15:42:59 +10:00
talloc_free ( tmp_ctx ) ;
return 0 ;
}
2007-06-07 15:18:55 +10:00
/*
change recovery mode on all nodes
*/
2015-10-06 11:52:06 +11:00
static int set_recovery_mode ( struct ctdb_context * ctdb ,
struct ctdb_recoverd * rec ,
2015-10-29 17:22:48 +11:00
struct ctdb_node_map_old * nodemap ,
2016-09-13 15:45:54 +10:00
uint32_t rec_mode )
2007-05-06 09:53:12 +10:00
{
2008-01-06 12:38:01 +11:00
TDB_DATA data ;
2008-01-29 13:59:28 +11:00
uint32_t * nodes ;
TALLOC_CTX * tmp_ctx ;
tmp_ctx = talloc_new ( ctdb ) ;
CTDB_NO_MEMORY ( ctdb , tmp_ctx ) ;
2008-06-12 16:53:36 +10:00
nodes = list_of_active_nodes ( ctdb , nodemap , tmp_ctx , true ) ;
2014-05-06 14:24:52 +10:00
data . dsize = sizeof ( uint32_t ) ;
data . dptr = ( unsigned char * ) & rec_mode ;
if ( ctdb_client_async_control ( ctdb , CTDB_CONTROL_SET_RECMODE ,
nodes , 0 ,
CONTROL_TIMEOUT ( ) ,
false , data ,
NULL , NULL ,
NULL ) ! = 0 ) {
DEBUG ( DEBUG_ERR , ( __location__ " Unable to set recovery mode. Recovery failed. \n " ) ) ;
talloc_free ( tmp_ctx ) ;
return - 1 ;
}
2008-01-29 13:59:28 +11:00
talloc_free ( tmp_ctx ) ;
2007-05-06 09:53:12 +10:00
return 0 ;
}
2020-05-05 23:37:57 +10:00
/*
2018-09-28 10:46:17 +10:00
* Update flags on all connected nodes
2020-05-05 23:37:57 +10:00
*/
2018-09-28 10:46:17 +10:00
static int update_flags_on_all_nodes ( struct ctdb_recoverd * rec ,
uint32_t pnn ,
uint32_t flags )
2020-05-05 23:37:57 +10:00
{
2018-09-28 10:46:17 +10:00
struct ctdb_context * ctdb = rec - > ctdb ;
struct timeval timeout = CONTROL_TIMEOUT ( ) ;
2020-05-05 23:37:57 +10:00
TDB_DATA data ;
struct ctdb_node_map_old * nodemap = NULL ;
struct ctdb_node_flag_change c ;
TALLOC_CTX * tmp_ctx = talloc_new ( ctdb ) ;
uint32_t * nodes ;
2020-07-14 14:22:15 +10:00
uint32_t i ;
2018-09-28 10:46:17 +10:00
int ret ;
2020-05-05 23:37:57 +10:00
2020-05-05 23:49:05 +10:00
nodemap = rec - > nodemap ;
2020-05-05 23:37:57 +10:00
2020-07-14 14:22:15 +10:00
for ( i = 0 ; i < nodemap - > num ; i + + ) {
if ( pnn = = nodemap - > nodes [ i ] . pnn ) {
break ;
}
}
if ( i > = nodemap - > num ) {
2020-05-05 23:49:05 +10:00
DBG_ERR ( " Nodemap does not contain node %d \n " , pnn ) ;
2020-05-05 23:37:57 +10:00
talloc_free ( tmp_ctx ) ;
return - 1 ;
}
2018-09-28 10:46:17 +10:00
c . pnn = pnn ;
2020-07-14 14:22:15 +10:00
c . old_flags = nodemap - > nodes [ i ] . flags ;
2020-07-14 14:29:09 +10:00
c . new_flags = flags ;
2020-05-05 23:37:57 +10:00
data . dsize = sizeof ( c ) ;
data . dptr = ( unsigned char * ) & c ;
/* send the flags update to all connected nodes */
nodes = list_of_connected_nodes ( ctdb , nodemap , tmp_ctx , true ) ;
2018-09-28 10:46:17 +10:00
ret = ctdb_client_async_control ( ctdb ,
CTDB_CONTROL_MODIFY_FLAGS ,
nodes ,
0 ,
timeout ,
false ,
data ,
NULL ,
NULL ,
NULL ) ;
if ( ret ! = 0 ) {
DBG_ERR ( " Unable to update flags on remote nodes \n " ) ;
2020-05-05 23:37:57 +10:00
talloc_free ( tmp_ctx ) ;
return - 1 ;
}
talloc_free ( tmp_ctx ) ;
return 0 ;
}
2021-12-10 11:29:06 +11:00
static bool _cluster_lock_lock ( struct ctdb_recoverd * rec ) ;
static bool cluster_lock_held ( struct ctdb_recoverd * rec ) ;
2018-09-20 12:30:58 +10:00
2021-12-10 11:43:10 +11:00
static bool cluster_lock_enabled ( struct ctdb_recoverd * rec )
{
return rec - > ctdb - > recovery_lock ! = NULL ;
}
2018-09-20 12:30:58 +10:00
static bool cluster_lock_take ( struct ctdb_recoverd * rec )
{
struct ctdb_context * ctdb = rec - > ctdb ;
2021-12-10 11:29:06 +11:00
bool have_lock ;
2018-09-20 12:30:58 +10:00
2021-12-10 11:43:10 +11:00
if ( ! cluster_lock_enabled ( rec ) ) {
2018-09-20 12:30:58 +10:00
return true ;
}
2021-12-10 11:29:06 +11:00
if ( cluster_lock_held ( rec ) ) {
D_NOTICE ( " Already holding cluster lock \n " ) ;
2018-09-20 12:30:58 +10:00
return true ;
}
2021-12-10 11:29:06 +11:00
D_NOTICE ( " Attempting to take cluster lock (%s) \n " , ctdb - > recovery_lock ) ;
have_lock = _cluster_lock_lock ( rec ) ;
if ( ! have_lock ) {
2018-09-20 12:30:58 +10:00
return false ;
}
2021-12-10 11:29:06 +11:00
D_NOTICE ( " Cluster lock taken successfully \n " ) ;
2018-09-20 12:30:58 +10:00
return true ;
}
2007-07-04 08:36:59 +10:00
/*
called when ctdb_wait_timeout should finish
*/
2015-10-26 16:50:09 +11:00
static void ctdb_wait_handler ( struct tevent_context * ev ,
struct tevent_timer * te ,
2007-07-04 08:36:59 +10:00
struct timeval yt , void * p )
{
uint32_t * timed_out = ( uint32_t * ) p ;
( * timed_out ) = 1 ;
}
/*
wait for a given number of seconds
*/
2010-06-22 22:50:35 +09:30
static void ctdb_wait_timeout ( struct ctdb_context * ctdb , double secs )
2007-07-04 08:36:59 +10:00
{
uint32_t timed_out = 0 ;
2010-06-22 22:50:35 +09:30
time_t usecs = ( secs - ( time_t ) secs ) * 1000000 ;
2015-10-26 16:50:09 +11:00
tevent_add_timer ( ctdb - > ev , ctdb , timeval_current_ofs ( secs , usecs ) ,
ctdb_wait_handler , & timed_out ) ;
2007-07-04 08:36:59 +10:00
while ( ! timed_out ) {
2015-10-26 16:50:09 +11:00
tevent_loop_once ( ctdb - > ev ) ;
2007-07-04 08:36:59 +10:00
}
}
2020-03-16 16:16:44 +11:00
/*
* Broadcast cluster leader
*/
static int leader_broadcast_send ( struct ctdb_recoverd * rec , uint32_t pnn )
{
struct ctdb_context * ctdb = rec - > ctdb ;
TDB_DATA data ;
int ret ;
data . dptr = ( uint8_t * ) & pnn ;
data . dsize = sizeof ( pnn ) ;
ret = ctdb_client_send_message ( ctdb ,
CTDB_BROADCAST_CONNECTED ,
CTDB_SRVID_LEADER ,
data ) ;
return ret ;
}
static int leader_broadcast_loop ( struct ctdb_recoverd * rec ) ;
2021-12-10 11:29:06 +11:00
static void cluster_lock_release ( struct ctdb_recoverd * rec ) ;
2020-03-16 16:16:44 +11:00
/* This runs continously but only sends the broadcast when leader */
static void leader_broadcast_loop_handler ( struct tevent_context * ev ,
struct tevent_timer * te ,
struct timeval current_time ,
void * private_data )
{
struct ctdb_recoverd * rec = talloc_get_type_abort (
private_data , struct ctdb_recoverd ) ;
int ret ;
if ( ! this_node_can_be_leader ( rec ) ) {
if ( this_node_is_leader ( rec ) ) {
rec - > leader = CTDB_UNKNOWN_PNN ;
}
2021-12-10 11:43:10 +11:00
if ( cluster_lock_enabled ( rec ) & & cluster_lock_held ( rec ) ) {
2021-12-10 11:29:06 +11:00
cluster_lock_release ( rec ) ;
2020-03-16 16:16:44 +11:00
}
goto done ;
}
if ( ! this_node_is_leader ( rec ) ) {
goto done ;
}
if ( rec - > election_in_progress ) {
goto done ;
}
ret = leader_broadcast_send ( rec , rec - > leader ) ;
if ( ret ! = 0 ) {
DBG_WARNING ( " Failed to send leader broadcast \n " ) ;
}
done :
ret = leader_broadcast_loop ( rec ) ;
if ( ret ! = 0 ) {
D_WARNING ( " Failed to set up leader broadcast \n " ) ;
}
}
static int leader_broadcast_loop ( struct ctdb_recoverd * rec )
{
struct ctdb_context * ctdb = rec - > ctdb ;
TALLOC_FREE ( rec - > leader_broadcast_te ) ;
rec - > leader_broadcast_te =
tevent_add_timer ( ctdb - > ev ,
rec ,
timeval_current_ofs ( 1 , 0 ) ,
leader_broadcast_loop_handler ,
rec ) ;
if ( rec - > leader_broadcast_te = = NULL ) {
return ENOMEM ;
}
return 0 ;
}
static bool leader_broadcast_loop_active ( struct ctdb_recoverd * rec )
{
return rec - > leader_broadcast_te ! = NULL ;
}
2007-11-13 10:27:44 +11:00
/*
called when an election times out ( ends )
*/
2015-10-26 16:50:09 +11:00
static void ctdb_election_timeout ( struct tevent_context * ev ,
struct tevent_timer * te ,
2007-11-13 10:27:44 +11:00
struct timeval t , void * p )
{
struct ctdb_recoverd * rec = talloc_get_type ( p , struct ctdb_recoverd ) ;
2018-09-20 14:13:58 +10:00
bool ok ;
2020-03-18 20:27:10 +11:00
rec - > election_in_progress = false ;
2007-11-13 10:27:44 +11:00
rec - > election_timeout = NULL ;
2010-06-22 22:55:20 +09:30
fast_start = false ;
2009-07-17 11:37:03 +10:00
2021-12-08 11:07:25 +11:00
D_WARNING ( " Election period ended, leader=%u \n " , rec - > leader ) ;
2018-09-20 14:13:58 +10:00
if ( ! this_node_is_leader ( rec ) ) {
return ;
}
ok = cluster_lock_take ( rec ) ;
if ( ! ok ) {
D_ERR ( " Unable to get cluster lock, banning node \n " ) ;
ctdb_ban_node ( rec , rec - > pnn ) ;
}
2007-11-13 10:27:44 +11:00
}
/*
wait for an election to finish . It finished election_timeout seconds after
the last election packet is received
*/
static void ctdb_wait_election ( struct ctdb_recoverd * rec )
{
struct ctdb_context * ctdb = rec - > ctdb ;
2020-03-18 20:27:10 +11:00
while ( rec - > election_in_progress ) {
2015-10-26 16:50:09 +11:00
tevent_loop_once ( ctdb - > ev ) ;
2007-11-13 10:27:44 +11:00
}
}
2007-10-15 14:28:51 +10:00
/*
2018-01-18 20:35:55 +11:00
* Update local flags from all remote connected nodes and push out
2021-12-08 11:07:25 +11:00
* flags changes to all nodes . This is only run by the leader .
2007-10-15 14:28:51 +10:00
*/
2018-01-24 10:21:37 +11:00
static int update_flags ( struct ctdb_recoverd * rec ,
struct ctdb_node_map_old * nodemap ,
struct ctdb_node_map_old * * remote_nodemaps )
2007-10-15 14:28:51 +10:00
{
2019-05-23 08:43:58 +10:00
unsigned int j ;
2007-11-30 08:44:34 +11:00
struct ctdb_context * ctdb = rec - > ctdb ;
2007-10-15 14:28:51 +10:00
TALLOC_CTX * mem_ctx = talloc_new ( ctdb ) ;
2018-01-18 20:35:55 +11:00
/* Check flags from remote nodes */
2007-10-15 14:28:51 +10:00
for ( j = 0 ; j < nodemap - > num ; j + + ) {
2015-10-29 17:22:48 +11:00
struct ctdb_node_map_old * remote_nodemap = NULL ;
2019-06-15 07:19:26 +10:00
uint32_t local_flags = nodemap - > nodes [ j ] . flags ;
2021-07-11 20:40:10 +10:00
uint32_t remote_pnn = nodemap - > nodes [ j ] . pnn ;
2019-06-15 07:19:26 +10:00
uint32_t remote_flags ;
2021-07-11 22:17:08 +10:00
unsigned int i ;
2007-10-15 14:28:51 +10:00
int ret ;
2019-06-15 07:19:26 +10:00
if ( local_flags & NODE_FLAGS_DISCONNECTED ) {
2007-10-15 14:28:51 +10:00
continue ;
}
2021-12-08 20:25:46 +11:00
if ( remote_pnn = = rec - > pnn ) {
2021-07-11 22:17:08 +10:00
/*
* No remote nodemap for this node since this
* is the local nodemap . However , still need
* to check this against the remote nodes and
* push it if they are out - of - date .
*/
goto compare_remotes ;
2007-10-15 14:28:51 +10:00
}
2018-01-18 20:35:55 +11:00
remote_nodemap = remote_nodemaps [ j ] ;
2019-06-15 07:19:26 +10:00
remote_flags = remote_nodemap - > nodes [ j ] . flags ;
if ( local_flags ! = remote_flags ) {
/*
* Update the local copy of the flags in the
* recovery daemon .
*/
D_NOTICE ( " Remote node %u had flags 0x%x, "
" local had 0x%x - updating local \n " ,
2021-07-11 20:40:10 +10:00
remote_pnn ,
2019-06-15 07:19:26 +10:00
remote_flags ,
local_flags ) ;
nodemap - > nodes [ j ] . flags = remote_flags ;
2021-07-11 21:28:43 +10:00
local_flags = remote_flags ;
goto push ;
}
2021-07-11 22:17:08 +10:00
compare_remotes :
for ( i = 0 ; i < nodemap - > num ; i + + ) {
if ( i = = j ) {
continue ;
}
if ( nodemap - > nodes [ i ] . flags & NODE_FLAGS_DISCONNECTED ) {
continue ;
}
2021-12-08 20:25:46 +11:00
if ( nodemap - > nodes [ i ] . pnn = = rec - > pnn ) {
2021-07-11 22:17:08 +10:00
continue ;
}
remote_nodemap = remote_nodemaps [ i ] ;
remote_flags = remote_nodemap - > nodes [ j ] . flags ;
if ( local_flags ! = remote_flags ) {
goto push ;
}
}
2021-07-11 21:28:43 +10:00
continue ;
push :
D_NOTICE ( " Pushing updated flags for node %u (0x%x) \n " ,
remote_pnn ,
local_flags ) ;
ret = update_flags_on_all_nodes ( rec , remote_pnn , local_flags ) ;
if ( ret ! = 0 ) {
DBG_ERR ( " Unable to update flags on remote nodes \n " ) ;
talloc_free ( mem_ctx ) ;
return - 1 ;
2007-10-15 14:28:51 +10:00
}
}
talloc_free ( mem_ctx ) ;
2016-04-27 21:47:08 +10:00
return 0 ;
2007-10-15 14:28:51 +10:00
}
2015-10-12 16:52:49 +02:00
/* Create a new random generation id.
2007-08-22 12:38:31 +10:00
The generation id can not be the INVALID_GENERATION id
*/
static uint32_t new_generation ( void )
{
uint32_t generation ;
while ( 1 ) {
generation = random ( ) ;
if ( generation ! = INVALID_GENERATION ) {
break ;
}
}
return generation ;
}
2007-10-05 12:01:40 +10:00
2021-12-10 11:29:06 +11:00
static bool cluster_lock_held ( struct ctdb_recoverd * rec )
2016-02-17 20:20:03 +11:00
{
2021-12-10 11:29:06 +11:00
return ( rec - > cluster_lock_handle ! = NULL ) ;
2016-02-17 20:20:03 +11:00
}
2021-12-10 11:29:06 +11:00
struct ctdb_cluster_lock_handle {
2016-02-17 20:20:03 +11:00
bool done ;
2016-05-31 18:37:30 +10:00
bool locked ;
2016-06-01 17:32:42 +10:00
double latency ;
2018-09-03 12:39:32 +10:00
struct ctdb_cluster_mutex_handle * h ;
2019-01-10 13:24:34 +11:00
struct ctdb_recoverd * rec ;
2016-02-17 20:20:03 +11:00
} ;
2021-12-10 11:29:06 +11:00
static void take_cluster_lock_handler ( char status ,
double latency ,
void * private_data )
2016-02-17 20:20:03 +11:00
{
2021-12-10 11:29:06 +11:00
struct ctdb_cluster_lock_handle * s =
( struct ctdb_cluster_lock_handle * ) private_data ;
2016-02-17 20:20:03 +11:00
2019-01-21 16:28:28 +11:00
s - > locked = ( status = = ' 0 ' ) ;
/*
* If unsuccessful then ensure the process has exited and that
* the file descriptor event handler has been cancelled
*/
if ( ! s - > locked ) {
TALLOC_FREE ( s - > h ) ;
}
2016-02-17 20:20:03 +11:00
switch ( status ) {
case ' 0 ' :
2016-06-01 17:32:42 +10:00
s - > latency = latency ;
2016-02-17 20:20:03 +11:00
break ;
case ' 1 ' :
2021-12-10 11:29:06 +11:00
D_ERR ( " Unable to take cluster lock - contention \n " ) ;
2019-01-21 16:36:13 +11:00
break ;
case ' 2 ' :
2021-12-10 11:29:06 +11:00
D_ERR ( " Unable to take cluster lock - timeout \n " ) ;
2016-02-17 20:20:03 +11:00
break ;
default :
2021-12-10 11:29:06 +11:00
D_ERR ( " Unable to take cluster lock - unknown error \n " ) ;
2016-02-17 20:20:03 +11:00
}
s - > done = true ;
}
2021-12-08 19:27:01 +11:00
static void force_election ( struct ctdb_recoverd * rec ) ;
2016-05-29 07:25:05 +10:00
2021-12-10 11:29:06 +11:00
static void lost_cluster_lock_handler ( void * private_data )
2016-05-29 07:25:05 +10:00
{
struct ctdb_recoverd * rec = talloc_get_type_abort (
private_data , struct ctdb_recoverd ) ;
2021-12-10 11:29:06 +11:00
D_ERR ( " Cluster lock helper terminated \n " ) ;
TALLOC_FREE ( rec - > cluster_lock_handle ) ;
2018-11-08 15:49:30 +11:00
2022-01-07 11:27:06 +11:00
if ( this_node_can_be_leader ( rec ) ) {
force_election ( rec ) ;
}
2016-05-29 07:25:05 +10:00
}
2021-12-10 11:29:06 +11:00
static bool _cluster_lock_lock ( struct ctdb_recoverd * rec )
2016-02-17 20:20:03 +11:00
{
2016-05-24 14:54:39 +10:00
struct ctdb_context * ctdb = rec - > ctdb ;
2016-02-17 20:20:03 +11:00
struct ctdb_cluster_mutex_handle * h ;
2021-12-10 11:29:06 +11:00
struct ctdb_cluster_lock_handle * s ;
2018-09-03 11:43:44 +10:00
2021-12-10 11:29:06 +11:00
s = talloc_zero ( rec , struct ctdb_cluster_lock_handle ) ;
2018-09-03 11:43:44 +10:00
if ( s = = NULL ) {
DBG_ERR ( " Memory allocation error \n " ) ;
return false ;
2016-02-17 20:20:03 +11:00
} ;
2019-01-10 13:24:34 +11:00
s - > rec = rec ;
2018-09-03 12:39:32 +10:00
h = ctdb_cluster_mutex ( s ,
2018-09-03 11:43:44 +10:00
ctdb ,
ctdb - > recovery_lock ,
2019-02-22 15:09:33 +11:00
120 ,
2021-12-10 11:29:06 +11:00
take_cluster_lock_handler ,
2018-09-03 11:43:44 +10:00
s ,
2021-12-10 11:29:06 +11:00
lost_cluster_lock_handler ,
2018-09-03 11:43:44 +10:00
rec ) ;
2016-02-17 20:20:03 +11:00
if ( h = = NULL ) {
2018-09-03 11:43:44 +10:00
talloc_free ( s ) ;
2016-06-01 15:56:42 +10:00
return false ;
2016-02-17 20:20:03 +11:00
}
2021-12-10 11:29:06 +11:00
rec - > cluster_lock_handle = s ;
2018-09-03 13:30:57 +10:00
s - > h = h ;
2018-09-03 11:43:44 +10:00
while ( ! s - > done ) {
2016-02-17 20:20:03 +11:00
tevent_loop_once ( ctdb - > ev ) ;
}
2018-09-03 11:43:44 +10:00
if ( ! s - > locked ) {
2021-12-10 11:29:06 +11:00
TALLOC_FREE ( rec - > cluster_lock_handle ) ;
2016-05-24 14:54:39 +10:00
return false ;
}
2018-09-03 11:43:44 +10:00
ctdb_ctrl_report_recd_lock_latency ( ctdb ,
CONTROL_TIMEOUT ( ) ,
s - > latency ) ;
2016-05-24 14:54:39 +10:00
return true ;
2016-02-17 20:20:03 +11:00
}
2021-12-10 11:29:06 +11:00
static void cluster_lock_release ( struct ctdb_recoverd * rec )
2016-02-17 20:20:03 +11:00
{
2021-12-10 11:29:06 +11:00
if ( rec - > cluster_lock_handle = = NULL ) {
2018-09-11 15:05:19 +10:00
return ;
2016-02-17 20:20:03 +11:00
}
2018-09-11 15:05:19 +10:00
2021-12-10 11:29:06 +11:00
if ( ! rec - > cluster_lock_handle - > done ) {
2018-09-03 13:01:19 +10:00
/*
2021-12-10 11:29:06 +11:00
* Taking of cluster lock still in progress . Free
2018-09-03 13:01:19 +10:00
* the cluster mutex handle to release it but leave
2021-12-10 11:29:06 +11:00
* the cluster lock handle in place to allow taking
2018-09-03 13:01:19 +10:00
* of the lock to fail .
*/
2021-12-10 11:29:06 +11:00
D_NOTICE ( " Cancelling cluster lock \n " ) ;
TALLOC_FREE ( rec - > cluster_lock_handle - > h ) ;
rec - > cluster_lock_handle - > done = true ;
rec - > cluster_lock_handle - > locked = false ;
2018-09-03 13:01:19 +10:00
return ;
}
2021-12-10 11:29:06 +11:00
D_NOTICE ( " Releasing cluster lock \n " ) ;
TALLOC_FREE ( rec - > cluster_lock_handle ) ;
2016-02-17 20:20:03 +11:00
}
2013-06-28 16:31:07 +10:00
static void ban_misbehaving_nodes ( struct ctdb_recoverd * rec , bool * self_ban )
2013-06-28 14:31:02 +10:00
{
2020-07-29 13:30:04 +10:00
size_t len = talloc_array_length ( rec - > banning_state ) ;
size_t i ;
2013-06-28 14:31:02 +10:00
2013-06-28 16:31:07 +10:00
* self_ban = false ;
2020-07-29 13:30:04 +10:00
for ( i = 0 ; i < len ; i + + ) {
struct ctdb_banning_state * ban_state = & rec - > banning_state [ i ] ;
if ( ban_state - > count < 2 * rec - > nodemap - > num ) {
2013-06-28 14:31:02 +10:00
continue ;
}
2021-12-10 10:31:56 +11:00
D_NOTICE ( " Node %u reached %u banning credits \n " ,
2020-07-29 12:15:03 +10:00
ban_state - > pnn ,
2021-12-10 10:31:56 +11:00
ban_state - > count ) ;
2020-07-29 12:15:03 +10:00
ctdb_ban_node ( rec , ban_state - > pnn ) ;
2013-06-28 14:31:02 +10:00
ban_state - > count = 0 ;
2013-06-28 16:31:07 +10:00
/* Banning ourself? */
2020-07-29 12:15:03 +10:00
if ( ban_state - > pnn = = rec - > pnn ) {
2013-06-28 16:31:07 +10:00
* self_ban = true ;
}
2013-06-28 14:31:02 +10:00
}
}
2016-12-09 15:04:03 +11:00
struct helper_state {
int fd [ 2 ] ;
pid_t pid ;
int result ;
bool done ;
} ;
static void helper_handler ( struct tevent_context * ev ,
struct tevent_fd * fde ,
uint16_t flags , void * private_data )
{
struct helper_state * state = talloc_get_type_abort (
private_data , struct helper_state ) ;
int ret ;
ret = sys_read ( state - > fd [ 0 ] , & state - > result , sizeof ( state - > result ) ) ;
if ( ret ! = sizeof ( state - > result ) ) {
state - > result = EPIPE ;
}
state - > done = true ;
}
static int helper_run ( struct ctdb_recoverd * rec , TALLOC_CTX * mem_ctx ,
const char * prog , const char * arg , const char * type )
{
struct helper_state * state ;
struct tevent_fd * fde ;
const char * * args ;
int nargs , ret ;
state = talloc_zero ( mem_ctx , struct helper_state ) ;
if ( state = = NULL ) {
DEBUG ( DEBUG_ERR , ( __location__ " memory error \n " ) ) ;
return - 1 ;
}
state - > pid = - 1 ;
ret = pipe ( state - > fd ) ;
if ( ret ! = 0 ) {
DEBUG ( DEBUG_ERR ,
( " Failed to create pipe for %s helper \n " , type ) ) ;
goto fail ;
}
set_close_on_exec ( state - > fd [ 0 ] ) ;
nargs = 4 ;
args = talloc_array ( state , const char * , nargs ) ;
if ( args = = NULL ) {
DEBUG ( DEBUG_ERR , ( __location__ " memory error \n " ) ) ;
goto fail ;
}
args [ 0 ] = talloc_asprintf ( args , " %d " , state - > fd [ 1 ] ) ;
if ( args [ 0 ] = = NULL ) {
DEBUG ( DEBUG_ERR , ( __location__ " memory error \n " ) ) ;
goto fail ;
}
args [ 1 ] = rec - > ctdb - > daemon . name ;
args [ 2 ] = arg ;
args [ 3 ] = NULL ;
if ( args [ 2 ] = = NULL ) {
nargs = 3 ;
}
state - > pid = ctdb_vfork_exec ( state , rec - > ctdb , prog , nargs , args ) ;
if ( state - > pid = = - 1 ) {
DEBUG ( DEBUG_ERR ,
( " Failed to create child for %s helper \n " , type ) ) ;
goto fail ;
}
close ( state - > fd [ 1 ] ) ;
state - > fd [ 1 ] = - 1 ;
2021-09-30 21:15:56 +10:00
rec - > helper_pid = state - > pid ;
2016-12-09 15:04:03 +11:00
state - > done = false ;
2019-05-11 14:24:24 +10:00
fde = tevent_add_fd ( rec - > ctdb - > ev , state , state - > fd [ 0 ] ,
2016-12-09 15:04:03 +11:00
TEVENT_FD_READ , helper_handler , state ) ;
if ( fde = = NULL ) {
goto fail ;
}
tevent_fd_set_auto_close ( fde ) ;
while ( ! state - > done ) {
tevent_loop_once ( rec - > ctdb - > ev ) ;
2017-09-08 11:24:27 +10:00
2021-12-09 11:47:54 +11:00
if ( ! this_node_is_leader ( rec ) ) {
2021-12-08 11:07:25 +11:00
D_ERR ( " Leader changed to %u, aborting %s \n " ,
rec - > leader ,
type ) ;
2017-09-08 11:24:27 +10:00
state - > result = 1 ;
break ;
}
2016-12-09 15:04:03 +11:00
}
close ( state - > fd [ 0 ] ) ;
state - > fd [ 0 ] = - 1 ;
if ( state - > result ! = 0 ) {
goto fail ;
}
2021-09-30 21:15:56 +10:00
rec - > helper_pid = - 1 ;
2016-12-09 15:04:03 +11:00
ctdb_kill ( rec - > ctdb , state - > pid , SIGKILL ) ;
talloc_free ( state ) ;
return 0 ;
fail :
if ( state - > fd [ 0 ] ! = - 1 ) {
close ( state - > fd [ 0 ] ) ;
}
if ( state - > fd [ 1 ] ! = - 1 ) {
close ( state - > fd [ 1 ] ) ;
}
2021-09-30 21:15:56 +10:00
rec - > helper_pid = - 1 ;
2016-12-09 15:04:03 +11:00
if ( state - > pid ! = - 1 ) {
ctdb_kill ( rec - > ctdb , state - > pid , SIGKILL ) ;
}
talloc_free ( state ) ;
return - 1 ;
}
2016-12-09 16:21:39 +11:00
static int ctdb_takeover ( struct ctdb_recoverd * rec ,
uint32_t * force_rebalance_nodes )
{
static char prog [ PATH_MAX + 1 ] = " " ;
char * arg ;
2019-05-23 08:43:58 +10:00
unsigned int i ;
int ret ;
2016-12-09 16:21:39 +11:00
if ( ! ctdb_set_helper ( " takeover_helper " , prog , sizeof ( prog ) ,
" CTDB_TAKEOVER_HELPER " , CTDB_HELPER_BINDIR ,
" ctdb_takeover_helper " ) ) {
ctdb_die ( rec - > ctdb , " Unable to set takeover helper \n " ) ;
}
arg = NULL ;
for ( i = 0 ; i < talloc_array_length ( force_rebalance_nodes ) ; i + + ) {
uint32_t pnn = force_rebalance_nodes [ i ] ;
if ( arg = = NULL ) {
arg = talloc_asprintf ( rec , " %u " , pnn ) ;
} else {
arg = talloc_asprintf_append ( arg , " ,%u " , pnn ) ;
}
if ( arg = = NULL ) {
DEBUG ( DEBUG_ERR , ( __location__ " memory error \n " ) ) ;
return - 1 ;
}
}
2018-08-21 13:41:22 +10:00
if ( ctdb_config . failover_disabled ) {
2018-08-21 09:36:00 +10:00
ret = setenv ( " CTDB_DISABLE_IP_FAILOVER " , " 1 " , 1 ) ;
if ( ret ! = 0 ) {
D_ERR ( " Failed to set CTDB_DISABLE_IP_FAILOVER variable \n " ) ;
return - 1 ;
}
}
2016-12-09 16:21:39 +11:00
return helper_run ( rec , rec , prog , arg , " takeover " ) ;
}
2016-12-09 15:04:03 +11:00
2013-08-27 12:14:34 +10:00
static bool do_takeover_run ( struct ctdb_recoverd * rec ,
2016-05-03 15:35:08 +10:00
struct ctdb_node_map_old * nodemap )
2013-08-27 12:14:34 +10:00
{
2013-08-27 15:04:40 +10:00
uint32_t * nodes = NULL ;
2015-10-28 18:23:13 +11:00
struct ctdb_disable_message dtr ;
2013-09-03 11:21:09 +10:00
TDB_DATA data ;
2019-05-23 08:43:58 +10:00
size_t i ;
2013-09-06 11:23:07 +10:00
uint32_t * rebalance_nodes = rec - > force_rebalance_nodes ;
2013-08-27 12:14:34 +10:00
int ret ;
bool ok ;
2013-09-18 17:06:16 +10:00
DEBUG ( DEBUG_NOTICE , ( " Takeover run starting \n " ) ) ;
2015-02-08 20:52:12 +11:00
if ( ctdb_op_is_in_progress ( rec - > takeover_run ) ) {
2013-09-03 11:20:01 +10:00
DEBUG ( DEBUG_ERR , ( __location__
" takeover run already in progress \n " ) ) ;
ok = false ;
goto done ;
}
2015-02-08 20:52:12 +11:00
if ( ! ctdb_op_begin ( rec - > takeover_run ) ) {
2013-08-27 15:04:40 +10:00
ok = false ;
goto done ;
2013-09-03 11:21:09 +10:00
}
2013-08-27 15:04:40 +10:00
/* Disable IP checks (takeover runs, really) on other nodes
* while doing this takeover run . This will stop those other
* nodes from triggering takeover runs when think they should
* be hosting an IP but it isn ' t yet on an interface . Don ' t
* wait for replies since a failure here might cause some
* noise in the logs but will not actually cause a problem .
*/
2016-01-11 17:23:12 +11:00
ZERO_STRUCT ( dtr ) ;
2013-08-27 15:04:40 +10:00
dtr . srvid = 0 ; /* No reply */
dtr . pnn = - 1 ;
data . dptr = ( uint8_t * ) & dtr ;
data . dsize = sizeof ( dtr ) ;
nodes = list_of_connected_nodes ( rec - > ctdb , nodemap , rec , false ) ;
2013-10-24 11:13:16 +11:00
/* Disable for 60 seconds. This can be a tunable later if
2013-08-27 15:04:40 +10:00
* necessary .
*/
2015-10-28 18:23:13 +11:00
dtr . timeout = 60 ;
2013-08-27 15:04:40 +10:00
for ( i = 0 ; i < talloc_array_length ( nodes ) ; i + + ) {
if ( ctdb_client_send_message ( rec - > ctdb , nodes [ i ] ,
CTDB_SRVID_DISABLE_TAKEOVER_RUNS ,
data ) ! = 0 ) {
DEBUG ( DEBUG_INFO , ( " Failed to disable takeover runs \n " ) ) ;
}
}
2013-09-03 11:20:01 +10:00
2016-12-09 16:21:39 +11:00
ret = ctdb_takeover ( rec , rec - > force_rebalance_nodes ) ;
2013-09-03 11:21:09 +10:00
2013-08-27 15:04:40 +10:00
/* Reenable takeover runs and IP checks on other nodes */
2015-10-28 18:23:13 +11:00
dtr . timeout = 0 ;
2013-08-27 15:04:40 +10:00
for ( i = 0 ; i < talloc_array_length ( nodes ) ; i + + ) {
if ( ctdb_client_send_message ( rec - > ctdb , nodes [ i ] ,
CTDB_SRVID_DISABLE_TAKEOVER_RUNS ,
data ) ! = 0 ) {
2015-07-26 23:02:57 +02:00
DEBUG ( DEBUG_INFO , ( " Failed to re-enable takeover runs \n " ) ) ;
2013-08-27 15:04:40 +10:00
}
2013-09-03 11:21:09 +10:00
}
2013-08-27 12:14:34 +10:00
if ( ret ! = 0 ) {
2013-09-18 17:06:16 +10:00
DEBUG ( DEBUG_ERR , ( " ctdb_takeover_run() failed \n " ) ) ;
2013-08-27 12:14:34 +10:00
ok = false ;
goto done ;
}
ok = true ;
2013-09-04 14:30:04 +10:00
/* Takeover run was successful so clear force rebalance targets */
2013-09-06 11:23:07 +10:00
if ( rebalance_nodes = = rec - > force_rebalance_nodes ) {
TALLOC_FREE ( rec - > force_rebalance_nodes ) ;
} else {
DEBUG ( DEBUG_WARNING ,
( " Rebalance target nodes changed during takeover run - not clearing \n " ) ) ;
}
2013-08-27 12:14:34 +10:00
done :
rec - > need_takeover_run = ! ok ;
2013-08-27 15:04:40 +10:00
talloc_free ( nodes ) ;
2015-02-08 20:52:12 +11:00
ctdb_op_end ( rec - > takeover_run ) ;
2013-09-18 17:06:16 +10:00
DEBUG ( DEBUG_NOTICE , ( " Takeover run %s \n " , ok ? " completed successfully " : " unsuccessful " ) ) ;
2013-08-27 12:14:34 +10:00
return ok ;
}
2015-09-17 16:22:38 +10:00
static int db_recovery_parallel ( struct ctdb_recoverd * rec , TALLOC_CTX * mem_ctx )
{
static char prog [ PATH_MAX + 1 ] = " " ;
2016-12-09 15:04:03 +11:00
const char * arg ;
2015-09-17 16:22:38 +10:00
if ( ! ctdb_set_helper ( " recovery_helper " , prog , sizeof ( prog ) ,
" CTDB_RECOVERY_HELPER " , CTDB_HELPER_BINDIR ,
" ctdb_recovery_helper " ) ) {
ctdb_die ( rec - > ctdb , " Unable to set recovery helper \n " ) ;
}
2016-12-09 15:04:03 +11:00
arg = talloc_asprintf ( mem_ctx , " %u " , new_generation ( ) ) ;
if ( arg = = NULL ) {
2015-09-17 16:22:38 +10:00
DEBUG ( DEBUG_ERR , ( __location__ " memory error \n " ) ) ;
return - 1 ;
}
2016-02-11 14:32:34 +11:00
setenv ( " CTDB_DBDIR_STATE " , rec - > ctdb - > db_directory_state , 1 ) ;
2016-12-09 15:04:03 +11:00
return helper_run ( rec , mem_ctx , prog , arg , " recovery " ) ;
2015-09-17 16:22:38 +10:00
}
2015-09-17 16:00:47 +10:00
/*
2021-12-08 11:07:25 +11:00
* Main recovery function , only run by leader
2015-09-17 16:00:47 +10:00
*/
2018-01-16 16:20:05 +11:00
static int do_recovery ( struct ctdb_recoverd * rec , TALLOC_CTX * mem_ctx )
2015-09-17 16:00:47 +10:00
{
struct ctdb_context * ctdb = rec - > ctdb ;
2018-01-16 16:20:05 +11:00
struct ctdb_node_map_old * nodemap = rec - > nodemap ;
2019-05-23 08:43:58 +10:00
unsigned int i ;
int ret ;
2015-09-17 16:00:47 +10:00
bool self_ban ;
DEBUG ( DEBUG_NOTICE , ( __location__ " Starting do_recovery \n " ) ) ;
2021-12-08 11:07:25 +11:00
/* Check if the current node is still the leader. It's possible that
* re - election has changed the leader .
2015-10-06 17:31:41 +11:00
*/
2021-12-08 19:37:39 +11:00
if ( ! this_node_is_leader ( rec ) ) {
2021-12-08 11:07:25 +11:00
D_NOTICE ( " Leader changed to %u, aborting recovery \n " ,
rec - > leader ) ;
2015-10-06 17:31:41 +11:00
return - 1 ;
}
2015-09-17 16:00:47 +10:00
/* if recovery fails, force it again */
rec - > need_recovery = true ;
if ( ! ctdb_op_begin ( rec - > recovery ) ) {
return - 1 ;
}
2020-03-18 20:27:10 +11:00
if ( rec - > election_in_progress ) {
2015-09-17 16:00:47 +10:00
/* an election is in progress */
DEBUG ( DEBUG_ERR , ( " do_recovery called while election in progress - try again later \n " ) ) ;
goto fail ;
}
ban_misbehaving_nodes ( rec , & self_ban ) ;
if ( self_ban ) {
DEBUG ( DEBUG_NOTICE , ( " This node was banned, aborting recovery \n " ) ) ;
goto fail ;
}
2020-05-04 17:45:51 +10:00
if ( cluster_lock_enabled ( rec ) & & ! cluster_lock_held ( rec ) ) {
/* Leader can change in ban_misbehaving_nodes() */
if ( ! this_node_is_leader ( rec ) ) {
D_NOTICE ( " Leader changed to %u, aborting recovery \n " ,
rec - > leader ) ;
rec - > need_recovery = false ;
2018-09-20 12:30:58 +10:00
goto fail ;
2015-09-17 16:00:47 +10:00
}
2020-05-04 17:45:51 +10:00
D_ERR ( " Cluster lock not held - abort recovery, ban node \n " ) ;
ctdb_ban_node ( rec , rec - > pnn ) ;
goto fail ;
2015-09-17 16:00:47 +10:00
}
DEBUG ( DEBUG_NOTICE , ( __location__ " Recovery initiated due to problem with node %u \n " , rec - > last_culprit_node ) ) ;
2015-10-27 15:09:33 +11:00
/* Retrieve capabilities from all connected nodes */
2015-09-17 16:07:37 +10:00
ret = update_capabilities ( rec , nodemap ) ;
if ( ret ! = 0 ) {
DEBUG ( DEBUG_ERR , ( __location__ " Unable to update node capabilities. \n " ) ) ;
return - 1 ;
}
2015-09-17 17:10:15 +10:00
/*
update all nodes to have the same flags that we have
*/
for ( i = 0 ; i < nodemap - > num ; i + + ) {
if ( nodemap - > nodes [ i ] . flags & NODE_FLAGS_DISCONNECTED ) {
continue ;
}
2020-05-05 23:45:15 +10:00
ret = update_flags_on_all_nodes ( rec ,
2020-07-14 14:43:04 +10:00
nodemap - > nodes [ i ] . pnn ,
2018-01-18 20:25:07 +11:00
nodemap - > nodes [ i ] . flags ) ;
2015-09-17 17:10:15 +10:00
if ( ret ! = 0 ) {
if ( nodemap - > nodes [ i ] . flags & NODE_FLAGS_INACTIVE ) {
DEBUG ( DEBUG_WARNING , ( __location__ " Unable to update flags on inactive node %d \n " , i ) ) ;
} else {
DEBUG ( DEBUG_ERR , ( __location__ " Unable to update flags on all nodes for node %d \n " , i ) ) ;
return - 1 ;
}
}
}
DEBUG ( DEBUG_NOTICE , ( __location__ " Recovery - updated flags \n " ) ) ;
2016-07-19 16:06:37 +10:00
ret = db_recovery_parallel ( rec , mem_ctx ) ;
2015-09-17 16:00:47 +10:00
if ( ret ! = 0 ) {
goto fail ;
}
2016-05-03 15:35:08 +10:00
do_takeover_run ( rec , nodemap ) ;
2008-02-18 19:38:04 +11:00
2007-05-26 00:05:30 +10:00
/* send a message to all clients telling them that the cluster
has been reconfigured */
2013-11-11 12:39:27 +11:00
ret = ctdb_client_send_message ( ctdb , CTDB_BROADCAST_CONNECTED ,
CTDB_SRVID_RECONFIGURE , tdb_null ) ;
if ( ret ! = 0 ) {
DEBUG ( DEBUG_ERR , ( __location__ " Failed to send reconfigure message \n " ) ) ;
2015-02-06 14:32:08 +11:00
goto fail ;
2013-11-11 12:39:27 +11:00
}
2007-05-04 15:21:40 +10:00
2008-02-04 17:44:24 +11:00
DEBUG ( DEBUG_NOTICE , ( __location__ " Recovery complete \n " ) ) ;
2007-07-04 08:36:59 +10:00
2007-09-14 09:49:12 +10:00
rec - > need_recovery = false ;
2015-02-06 14:47:33 +11:00
ctdb_op_end ( rec - > recovery ) ;
2007-09-14 09:49:12 +10:00
2020-07-29 13:30:04 +10:00
/*
* Completed a full recovery so forgive any past transgressions
*/
ban_counts_reset ( rec ) ;
2009-09-25 13:14:53 +10:00
2015-02-06 14:47:33 +11:00
/* We just finished a recovery successfully.
We now wait for rerecovery_timeout before we allow
2007-07-04 08:36:59 +10:00
another recovery to take place .
*/
2017-02-17 22:51:52 +13:00
DEBUG ( DEBUG_NOTICE , ( " Just finished a recovery. New recoveries will now be suppressed for the rerecovery timeout (%d seconds) \n " , ctdb - > tunable . rerecovery_timeout ) ) ;
2015-02-06 14:47:33 +11:00
ctdb_op_disable ( rec - > recovery , ctdb - > ev ,
ctdb - > tunable . rerecovery_timeout ) ;
2007-05-04 15:21:40 +10:00
return 0 ;
2015-02-06 14:32:08 +11:00
fail :
2015-02-06 14:47:33 +11:00
ctdb_op_end ( rec - > recovery ) ;
2015-02-06 14:32:08 +11:00
return - 1 ;
2007-05-04 09:45:53 +10:00
}
2007-05-04 08:30:18 +10:00
2007-05-07 04:41:12 +10:00
2007-06-07 19:17:27 +10:00
/*
elections are won by first checking the number of connected nodes , then
2007-09-04 10:33:10 +10:00
the priority time , then the pnn
2007-06-07 19:17:27 +10:00
*/
2007-05-07 06:51:58 +10:00
struct election_message {
2007-06-07 19:17:27 +10:00
uint32_t num_connected ;
2007-06-07 18:37:27 +10:00
struct timeval priority_time ;
2007-09-04 10:33:10 +10:00
uint32_t pnn ;
2007-10-05 13:28:21 +10:00
uint32_t node_flags ;
2007-05-07 06:51:58 +10:00
} ;
2007-06-07 19:17:27 +10:00
/*
form this nodes election data
*/
static void ctdb_election_data ( struct ctdb_recoverd * rec , struct election_message * em )
{
2019-05-23 08:43:58 +10:00
unsigned int i ;
int ret ;
2015-10-29 17:22:48 +11:00
struct ctdb_node_map_old * nodemap ;
2007-06-07 19:17:27 +10:00
struct ctdb_context * ctdb = rec - > ctdb ;
2020-07-29 17:57:53 +10:00
bool ok ;
2007-06-07 19:17:27 +10:00
ZERO_STRUCTP ( em ) ;
2021-12-08 20:25:46 +11:00
em - > pnn = rec - > pnn ;
2007-06-07 19:17:27 +10:00
em - > priority_time = rec - > priority_time ;
ret = ctdb_ctrl_getnodemap ( ctdb , CONTROL_TIMEOUT ( ) , CTDB_CURRENT_NODE , rec , & nodemap ) ;
if ( ret ! = 0 ) {
2013-10-30 11:32:28 +11:00
DEBUG ( DEBUG_ERR , ( __location__ " unable to get node map \n " ) ) ;
2007-06-07 19:17:27 +10:00
return ;
}
2020-07-29 17:57:53 +10:00
ok = node_flags ( rec , rec - > pnn , & rec - > node_flags ) ;
if ( ! ok ) {
DBG_ERR ( " Unable to get node flags for this node \n " ) ;
return ;
}
2009-07-17 11:37:03 +10:00
em - > node_flags = rec - > node_flags ;
2007-06-07 19:17:27 +10:00
for ( i = 0 ; i < nodemap - > num ; i + + ) {
if ( ! ( nodemap - > nodes [ i ] . flags & NODE_FLAGS_DISCONNECTED ) ) {
em - > num_connected + + ;
}
}
2008-05-06 13:56:56 +10:00
2021-12-14 10:57:03 +11:00
if ( ! this_node_can_be_leader ( rec ) ) {
/* Try to lose... */
2008-05-06 13:56:56 +10:00
em - > num_connected = 0 ;
em - > priority_time = timeval_current ( ) ;
}
2007-06-07 19:17:27 +10:00
talloc_free ( nodemap ) ;
}
/*
see if the given election data wins
*/
static bool ctdb_election_win ( struct ctdb_recoverd * rec , struct election_message * em )
{
struct election_message myem ;
2007-10-05 13:28:21 +10:00
int cmp = 0 ;
2007-06-07 19:17:27 +10:00
ctdb_election_data ( rec , & myem ) ;
2021-12-14 10:57:03 +11:00
if ( ! this_node_can_be_leader ( rec ) ) {
2009-07-09 14:44:03 +10:00
return false ;
2013-06-21 14:06:22 +02:00
}
2009-07-09 14:44:03 +10:00
2020-03-17 17:10:20 +11:00
/* Automatically win if other node is banned or stopped */
if ( em - > node_flags & NODE_FLAGS_INACTIVE ) {
2009-07-09 14:44:03 +10:00
return true ;
}
2007-06-07 19:17:27 +10:00
/* then the longest running node */
if ( cmp = = 0 ) {
2007-06-07 19:21:55 +10:00
cmp = timeval_compare ( & em - > priority_time , & myem . priority_time ) ;
2007-06-07 19:17:27 +10:00
}
if ( cmp = = 0 ) {
2007-09-04 10:33:10 +10:00
cmp = ( int ) myem . pnn - ( int ) em - > pnn ;
2007-06-07 19:17:27 +10:00
}
return cmp > 0 ;
}
2007-06-07 15:18:55 +10:00
/*
send out an election request
*/
2021-12-08 19:27:01 +11:00
static int send_election_request ( struct ctdb_recoverd * rec )
2007-05-07 06:51:58 +10:00
{
TDB_DATA election_data ;
struct election_message emsg ;
uint64_t srvid ;
2007-06-07 18:37:27 +10:00
struct ctdb_context * ctdb = rec - > ctdb ;
2007-10-11 06:16:36 +10:00
2015-10-29 17:51:52 +11:00
srvid = CTDB_SRVID_ELECTION ;
2007-05-07 06:51:58 +10:00
2007-06-07 19:17:27 +10:00
ctdb_election_data ( rec , & emsg ) ;
2007-05-07 06:51:58 +10:00
election_data . dsize = sizeof ( struct election_message ) ;
election_data . dptr = ( unsigned char * ) & emsg ;
2020-05-05 23:26:41 +10:00
/* Assume this node will win the election, set leader accordingly */
2020-07-14 15:22:33 +10:00
rec - > leader = rec - > pnn ;
2013-10-29 16:38:42 +11:00
2007-05-07 06:51:58 +10:00
/* send an election message to all active nodes */
2009-07-17 11:37:03 +10:00
DEBUG ( DEBUG_INFO , ( __location__ " Send election request to all active nodes \n " ) ) ;
2013-11-11 12:39:27 +11:00
return ctdb_client_send_message ( ctdb , CTDB_BROADCAST_ALL , srvid , election_data ) ;
2007-05-07 06:51:58 +10:00
}
2007-11-13 10:27:44 +11:00
/*
we think we are winning the election - send a broadcast election request
*/
2015-10-26 16:50:09 +11:00
static void election_send_request ( struct tevent_context * ev ,
struct tevent_timer * te ,
struct timeval t , void * p )
2007-11-13 10:27:44 +11:00
{
struct ctdb_recoverd * rec = talloc_get_type ( p , struct ctdb_recoverd ) ;
int ret ;
2021-12-08 19:27:01 +11:00
ret = send_election_request ( rec ) ;
2007-11-13 10:27:44 +11:00
if ( ret ! = 0 ) {
2008-02-04 20:07:15 +11:00
DEBUG ( DEBUG_ERR , ( " Failed to send election request! \n " ) ) ;
2007-11-13 10:27:44 +11:00
}
2015-10-23 16:03:38 +11:00
TALLOC_FREE ( rec - > send_election_te ) ;
2007-11-13 10:27:44 +11:00
}
2008-04-01 15:34:54 +11:00
/*
handler for memory dumps
*/
2015-04-08 14:38:26 +10:00
static void mem_dump_handler ( uint64_t srvid , TDB_DATA data , void * private_data )
2008-04-01 15:34:54 +11:00
{
2015-04-08 14:38:26 +10:00
struct ctdb_recoverd * rec = talloc_get_type (
private_data , struct ctdb_recoverd ) ;
struct ctdb_context * ctdb = rec - > ctdb ;
2008-04-01 15:34:54 +11:00
TALLOC_CTX * tmp_ctx = talloc_new ( ctdb ) ;
TDB_DATA * dump ;
int ret ;
2015-10-29 14:32:49 +11:00
struct ctdb_srvid_message * rd ;
2008-04-01 15:34:54 +11:00
2015-10-29 14:32:49 +11:00
if ( data . dsize ! = sizeof ( struct ctdb_srvid_message ) ) {
2008-04-01 15:34:54 +11:00
DEBUG ( DEBUG_ERR , ( __location__ " Wrong size of return address. \n " ) ) ;
2008-09-16 09:00:48 +10:00
talloc_free ( tmp_ctx ) ;
2008-04-01 15:34:54 +11:00
return ;
}
2015-10-29 14:32:49 +11:00
rd = ( struct ctdb_srvid_message * ) data . dptr ;
2008-04-01 15:34:54 +11:00
dump = talloc_zero ( tmp_ctx , TDB_DATA ) ;
if ( dump = = NULL ) {
DEBUG ( DEBUG_ERR , ( __location__ " Failed to allocate memory for memdump \n " ) ) ;
talloc_free ( tmp_ctx ) ;
return ;
}
ret = ctdb_dump_memory ( ctdb , dump ) ;
if ( ret ! = 0 ) {
DEBUG ( DEBUG_ERR , ( __location__ " ctdb_dump_memory() failed \n " ) ) ;
talloc_free ( tmp_ctx ) ;
return ;
}
2021-12-08 11:07:25 +11:00
DBG_ERR ( " recovery daemon memory dump \n " ) ;
2008-04-01 15:34:54 +11:00
2010-06-02 09:45:21 +10:00
ret = ctdb_client_send_message ( ctdb , rd - > pnn , rd - > srvid , * dump ) ;
2008-04-01 15:34:54 +11:00
if ( ret ! = 0 ) {
DEBUG ( DEBUG_ERR , ( " Failed to send rd memdump reply message \n " ) ) ;
2008-09-16 09:00:48 +10:00
talloc_free ( tmp_ctx ) ;
2008-04-01 15:34:54 +11:00
return ;
}
talloc_free ( tmp_ctx ) ;
}
2009-06-01 14:18:34 +10:00
/*
handler for reload_nodes
*/
2015-04-08 14:38:26 +10:00
static void reload_nodes_handler ( uint64_t srvid , TDB_DATA data ,
void * private_data )
2009-06-01 14:18:34 +10:00
{
2015-04-08 14:38:26 +10:00
struct ctdb_recoverd * rec = talloc_get_type (
private_data , struct ctdb_recoverd ) ;
2009-06-01 14:18:34 +10:00
DEBUG ( DEBUG_ERR , ( __location__ " Reload nodes file from recovery daemon \n " ) ) ;
2013-10-14 13:54:39 +11:00
ctdb_load_nodes_file ( rec - > ctdb ) ;
2009-06-01 14:18:34 +10:00
}
2009-10-06 12:11:32 +11:00
2015-04-08 14:38:26 +10:00
static void recd_node_rebalance_handler ( uint64_t srvid , TDB_DATA data ,
void * private_data )
2012-02-28 06:56:04 +11:00
{
2015-04-08 14:38:26 +10:00
struct ctdb_recoverd * rec = talloc_get_type (
private_data , struct ctdb_recoverd ) ;
struct ctdb_context * ctdb = rec - > ctdb ;
2012-02-28 06:56:04 +11:00
uint32_t pnn ;
2013-09-04 14:30:04 +10:00
uint32_t * t ;
int len ;
2012-02-28 06:56:04 +11:00
2021-12-08 19:37:39 +11:00
if ( ! this_node_is_leader ( rec ) ) {
2013-09-04 14:30:04 +10:00
return ;
}
2012-02-28 06:56:04 +11:00
if ( data . dsize ! = sizeof ( uint32_t ) ) {
DEBUG ( DEBUG_ERR , ( __location__ " Incorrect size of node rebalance message. Was %zd but expected %zd bytes \n " , data . dsize , sizeof ( uint32_t ) ) ) ;
return ;
}
pnn = * ( uint32_t * ) & data . dptr [ 0 ] ;
2013-09-04 14:30:04 +10:00
DEBUG ( DEBUG_NOTICE , ( " Setting up rebalance of IPs to node %u \n " , pnn ) ) ;
2012-02-28 06:56:04 +11:00
2013-09-04 14:30:04 +10:00
/* Copy any existing list of nodes. There's probably some
* sort of realloc variant that will do this but we need to
* make sure that freeing the old array also cancels the timer
* event for the timeout . . . not sure if realloc will do that .
*/
len = ( rec - > force_rebalance_nodes ! = NULL ) ?
talloc_array_length ( rec - > force_rebalance_nodes ) :
0 ;
/* This allows duplicates to be added but they don't cause
* harm . A call to add a duplicate PNN arguably means that
* the timeout should be reset , so this is the simplest
* solution .
*/
t = talloc_zero_array ( rec , uint32_t , len + 1 ) ;
CTDB_NO_MEMORY_VOID ( ctdb , t ) ;
if ( len > 0 ) {
memcpy ( t , rec - > force_rebalance_nodes , sizeof ( uint32_t ) * len ) ;
2012-02-28 06:56:04 +11:00
}
2013-09-04 14:30:04 +10:00
t [ len ] = pnn ;
talloc_free ( rec - > force_rebalance_nodes ) ;
rec - > force_rebalance_nodes = t ;
2012-02-28 06:56:04 +11:00
}
2021-12-08 21:28:05 +11:00
static void srvid_disable_and_reply ( struct ctdb_recoverd * rec ,
2015-02-06 13:05:12 +11:00
TDB_DATA data ,
struct ctdb_op_state * op_state )
2013-08-27 15:04:40 +10:00
{
2021-12-08 21:28:05 +11:00
struct ctdb_context * ctdb = rec - > ctdb ;
2015-10-28 18:23:13 +11:00
struct ctdb_disable_message * r ;
2013-08-27 15:04:40 +10:00
uint32_t timeout ;
TDB_DATA result ;
int32_t ret = 0 ;
/* Validate input data */
2015-10-28 18:23:13 +11:00
if ( data . dsize ! = sizeof ( struct ctdb_disable_message ) ) {
2013-08-27 15:04:40 +10:00
DEBUG ( DEBUG_ERR , ( __location__ " Wrong size for data :%lu "
" expecting %lu \n " , ( long unsigned ) data . dsize ,
2015-10-29 14:32:49 +11:00
( long unsigned ) sizeof ( struct ctdb_srvid_message ) ) ) ;
2013-11-11 12:39:27 +11:00
return ;
2013-08-27 15:04:40 +10:00
}
if ( data . dptr = = NULL ) {
DEBUG ( DEBUG_ERR , ( __location__ " No data received \n " ) ) ;
2013-11-11 12:39:27 +11:00
return ;
2013-08-27 15:04:40 +10:00
}
2015-10-28 18:23:13 +11:00
r = ( struct ctdb_disable_message * ) data . dptr ;
timeout = r - > timeout ;
2013-08-27 15:04:40 +10:00
2015-02-06 13:05:12 +11:00
ret = ctdb_op_disable ( op_state , ctdb - > ev , timeout ) ;
2015-02-08 20:52:12 +11:00
if ( ret ! = 0 ) {
2013-08-27 15:04:40 +10:00
goto done ;
}
/* Returning our PNN tells the caller that we succeeded */
2021-12-08 20:25:46 +11:00
ret = rec - > pnn ;
2013-08-27 15:04:40 +10:00
done :
result . dsize = sizeof ( int32_t ) ;
result . dptr = ( uint8_t * ) & ret ;
2015-10-29 14:32:49 +11:00
srvid_request_reply ( ctdb , ( struct ctdb_srvid_message * ) r , result ) ;
2013-08-27 15:04:40 +10:00
}
2015-04-08 14:38:26 +10:00
static void disable_takeover_runs_handler ( uint64_t srvid , TDB_DATA data ,
2015-02-06 13:05:12 +11:00
void * private_data )
{
2015-04-08 14:38:26 +10:00
struct ctdb_recoverd * rec = talloc_get_type (
private_data , struct ctdb_recoverd ) ;
2015-02-06 13:05:12 +11:00
2021-12-08 21:28:05 +11:00
srvid_disable_and_reply ( rec , data , rec - > takeover_run ) ;
2015-02-06 13:05:12 +11:00
}
2015-02-06 15:03:03 +11:00
/* Backward compatibility for this SRVID */
2015-04-08 14:38:26 +10:00
static void disable_ip_check_handler ( uint64_t srvid , TDB_DATA data ,
void * private_data )
2013-08-28 11:32:54 +10:00
{
2015-04-08 14:38:26 +10:00
struct ctdb_recoverd * rec = talloc_get_type (
private_data , struct ctdb_recoverd ) ;
2015-02-06 15:03:03 +11:00
uint32_t timeout ;
2013-08-28 11:32:54 +10:00
if ( data . dsize ! = sizeof ( uint32_t ) ) {
DEBUG ( DEBUG_ERR , ( __location__ " Wrong size for data :%lu "
" expecting %lu \n " , ( long unsigned ) data . dsize ,
( long unsigned ) sizeof ( uint32_t ) ) ) ;
return ;
}
if ( data . dptr = = NULL ) {
DEBUG ( DEBUG_ERR , ( __location__ " No data received \n " ) ) ;
return ;
}
2015-02-06 15:03:03 +11:00
timeout = * ( ( uint32_t * ) data . dptr ) ;
2013-08-28 11:32:54 +10:00
2015-04-08 14:38:26 +10:00
ctdb_op_disable ( rec - > takeover_run , rec - > ctdb - > ev , timeout ) ;
2013-08-28 11:32:54 +10:00
}
2009-10-06 12:11:32 +11:00
2015-04-08 14:38:26 +10:00
static void disable_recoveries_handler ( uint64_t srvid , TDB_DATA data ,
2015-02-06 15:06:44 +11:00
void * private_data )
{
2015-04-08 14:38:26 +10:00
struct ctdb_recoverd * rec = talloc_get_type (
private_data , struct ctdb_recoverd ) ;
2015-02-06 15:06:44 +11:00
2021-12-08 21:28:05 +11:00
srvid_disable_and_reply ( rec , data , rec - > recovery ) ;
2015-02-06 15:06:44 +11:00
}
2009-07-02 13:00:26 +10:00
/*
2013-08-16 20:10:10 +10:00
handler for ip reallocate , just add it to the list of requests and
2009-07-02 13:00:26 +10:00
handle this later in the monitor_cluster loop so we do not recurse
2013-08-16 20:10:10 +10:00
with other requests to takeover_run ( )
2009-07-02 13:00:26 +10:00
*/
2015-04-08 14:38:26 +10:00
static void ip_reallocate_handler ( uint64_t srvid , TDB_DATA data ,
void * private_data )
2009-07-02 13:00:26 +10:00
{
2015-10-29 14:32:49 +11:00
struct ctdb_srvid_message * request ;
2015-04-08 14:38:26 +10:00
struct ctdb_recoverd * rec = talloc_get_type (
private_data , struct ctdb_recoverd ) ;
2009-07-02 13:00:26 +10:00
2015-10-29 14:32:49 +11:00
if ( data . dsize ! = sizeof ( struct ctdb_srvid_message ) ) {
2009-07-02 13:00:26 +10:00
DEBUG ( DEBUG_ERR , ( __location__ " Wrong size of return address. \n " ) ) ;
return ;
}
2015-10-29 14:32:49 +11:00
request = ( struct ctdb_srvid_message * ) data . dptr ;
2009-07-02 13:00:26 +10:00
2015-04-08 14:38:26 +10:00
srvid_request_add ( rec - > ctdb , & rec - > reallocate_requests , request ) ;
2009-07-02 13:00:26 +10:00
}
2013-08-16 20:02:34 +10:00
static void process_ipreallocate_requests ( struct ctdb_context * ctdb ,
struct ctdb_recoverd * rec )
2009-07-02 13:00:26 +10:00
{
TDB_DATA result ;
int32_t ret ;
2013-11-22 13:57:03 +11:00
struct srvid_requests * current ;
2009-07-02 13:00:26 +10:00
2013-11-22 13:57:03 +11:00
/* Only process requests that are currently pending. More
* might come in while the takeover run is in progress and
* they will need to be processed later since they might
* be in response flag changes .
*/
current = rec - > reallocate_requests ;
rec - > reallocate_requests = NULL ;
2016-05-03 15:35:08 +10:00
if ( do_takeover_run ( rec , rec - > nodemap ) ) {
2021-12-08 20:25:46 +11:00
ret = rec - > pnn ;
2015-10-28 20:04:41 +11:00
} else {
ret = - 1 ;
2010-01-19 08:42:48 +01:00
}
2009-07-02 13:00:26 +10:00
result . dsize = sizeof ( int32_t ) ;
result . dptr = ( uint8_t * ) & ret ;
2013-11-22 13:57:03 +11:00
srvid_requests_reply ( ctdb , & current , result ) ;
2009-07-02 13:00:26 +10:00
}
2009-06-01 14:18:34 +10:00
2016-03-17 17:26:30 +11:00
/*
* handler for assigning banning credits
*/
static void banning_handler ( uint64_t srvid , TDB_DATA data , void * private_data )
{
struct ctdb_recoverd * rec = talloc_get_type (
private_data , struct ctdb_recoverd ) ;
uint32_t ban_pnn ;
2021-12-08 19:37:39 +11:00
/* Ignore if we are not leader */
if ( ! this_node_is_leader ( rec ) ) {
2016-03-17 17:26:30 +11:00
return ;
}
if ( data . dsize ! = sizeof ( uint32_t ) ) {
DEBUG ( DEBUG_ERR , ( __location__ " invalid data size %zu \n " ,
data . dsize ) ) ;
return ;
}
ban_pnn = * ( uint32_t * ) data . dptr ;
ctdb_set_culprit_count ( rec , ban_pnn , rec - > nodemap - > num ) ;
}
2009-06-01 14:18:34 +10:00
2007-05-07 06:51:58 +10:00
/*
2021-12-08 11:07:25 +11:00
* Handler for leader elections
*/
2015-04-08 14:38:26 +10:00
static void election_handler ( uint64_t srvid , TDB_DATA data , void * private_data )
2007-05-07 06:51:58 +10:00
{
2015-04-08 14:38:26 +10:00
struct ctdb_recoverd * rec = talloc_get_type (
private_data , struct ctdb_recoverd ) ;
struct ctdb_context * ctdb = rec - > ctdb ;
2007-05-07 06:51:58 +10:00
struct election_message * em = ( struct election_message * ) data . dptr ;
2013-11-01 14:34:20 +11:00
/* Ignore election packets from ourself */
2021-12-08 20:25:46 +11:00
if ( rec - > pnn = = em - > pnn ) {
2013-11-01 14:34:20 +11:00
return ;
}
2007-11-13 10:27:44 +11:00
/* we got an election packet - update the timeout for the election */
talloc_free ( rec - > election_timeout ) ;
2020-03-18 20:27:10 +11:00
rec - > election_in_progress = true ;
2015-10-26 16:50:09 +11:00
rec - > election_timeout = tevent_add_timer (
ctdb - > ev , ctdb ,
fast_start ?
timeval_current_ofs ( 0 , 500000 ) :
timeval_current_ofs ( ctdb - > tunable . election_timeout , 0 ) ,
ctdb_election_timeout , rec ) ;
2007-11-13 10:27:44 +11:00
2007-05-07 06:51:58 +10:00
/* someone called an election. check their election data
and if we disagree and we would rather be the elected node ,
send a new election message to all other nodes
*/
2007-06-07 19:17:27 +10:00
if ( ctdb_election_win ( rec , em ) ) {
2007-11-13 10:27:44 +11:00
if ( ! rec - > send_election_te ) {
2015-10-26 16:50:09 +11:00
rec - > send_election_te = tevent_add_timer (
ctdb - > ev , rec ,
timeval_current_ofs ( 0 , 500000 ) ,
election_send_request , rec ) ;
2007-05-07 06:51:58 +10:00
}
return ;
}
2014-12-09 13:50:22 +11:00
2007-11-13 10:27:44 +11:00
/* we didn't win */
2015-03-31 13:59:02 +11:00
TALLOC_FREE ( rec - > send_election_te ) ;
2007-05-07 06:51:58 +10:00
2021-12-10 11:29:06 +11:00
/* Release the cluster lock file */
if ( cluster_lock_held ( rec ) ) {
cluster_lock_release ( rec ) ;
2007-05-23 14:35:19 +10:00
}
2021-12-08 11:07:25 +11:00
/* Set leader to the winner of this round */
2020-07-14 15:22:33 +10:00
rec - > leader = em - > pnn ;
2007-05-07 06:51:58 +10:00
return ;
}
2020-03-18 15:14:39 +11:00
static void cluster_lock_election ( struct ctdb_recoverd * rec )
{
bool ok ;
if ( ! this_node_can_be_leader ( rec ) ) {
if ( cluster_lock_held ( rec ) ) {
cluster_lock_release ( rec ) ;
}
2022-01-21 18:09:47 +11:00
goto done ;
2020-03-18 15:14:39 +11:00
}
/*
* Don ' t need to unconditionally release the lock and then
* attempt to retake it . This provides stability .
*/
if ( cluster_lock_held ( rec ) ) {
2022-01-21 18:09:47 +11:00
goto done ;
2020-03-18 15:14:39 +11:00
}
rec - > leader = CTDB_UNKNOWN_PNN ;
ok = cluster_lock_take ( rec ) ;
if ( ok ) {
rec - > leader = rec - > pnn ;
D_WARNING ( " Took cluster lock, leader=% " PRIu32 " \n " , rec - > leader ) ;
}
2022-01-21 18:09:47 +11:00
done :
2020-03-18 15:14:39 +11:00
rec - > election_in_progress = false ;
}
2007-06-07 15:18:55 +10:00
/*
force the start of the election process
*/
2021-12-08 19:27:01 +11:00
static void force_election ( struct ctdb_recoverd * rec )
2007-05-07 06:51:58 +10:00
{
int ret ;
2007-06-07 18:37:27 +10:00
struct ctdb_context * ctdb = rec - > ctdb ;
2007-05-10 09:48:14 +10:00
2022-01-23 06:21:51 +11:00
D_ERR ( " Start election \n " ) ;
2009-07-17 11:37:03 +10:00
2007-05-10 09:48:14 +10:00
/* set all nodes to recovery mode to stop all internode traffic */
2021-12-08 19:27:01 +11:00
ret = set_recovery_mode ( ctdb , rec , rec - > nodemap , CTDB_RECOVERY_ACTIVE ) ;
2008-07-07 08:50:12 +10:00
if ( ret ! = 0 ) {
2008-02-04 20:07:15 +11:00
DEBUG ( DEBUG_ERR , ( __location__ " Unable to set recovery mode to active on cluster \n " ) ) ;
2007-05-10 09:48:14 +10:00
return ;
}
2007-11-13 10:27:44 +11:00
2022-01-23 05:49:18 +11:00
rec - > election_in_progress = true ;
2022-01-23 06:18:51 +11:00
/* Let other nodes know that an election is underway */
leader_broadcast_send ( rec , CTDB_UNKNOWN_PNN ) ;
2022-01-23 05:49:18 +11:00
2020-03-18 15:14:39 +11:00
if ( cluster_lock_enabled ( rec ) ) {
cluster_lock_election ( rec ) ;
return ;
}
2007-11-13 10:27:44 +11:00
talloc_free ( rec - > election_timeout ) ;
2015-10-26 16:50:09 +11:00
rec - > election_timeout = tevent_add_timer (
ctdb - > ev , ctdb ,
fast_start ?
timeval_current_ofs ( 0 , 500000 ) :
timeval_current_ofs ( ctdb - > tunable . election_timeout , 0 ) ,
ctdb_election_timeout , rec ) ;
2007-11-13 10:27:44 +11:00
2021-12-08 19:27:01 +11:00
ret = send_election_request ( rec ) ;
2007-05-07 06:51:58 +10:00
if ( ret ! = 0 ) {
2021-12-08 11:07:25 +11:00
DBG_ERR ( " Failed to initiate leader election " ) ;
2007-05-07 06:51:58 +10:00
return ;
}
2007-05-26 14:01:08 +10:00
/* wait for a few seconds to collect all responses */
2007-11-13 10:27:44 +11:00
ctdb_wait_election ( rec ) ;
2007-06-07 15:18:55 +10:00
}
2018-01-17 19:04:34 +11:00
static void srvid_not_implemented ( uint64_t srvid ,
TDB_DATA data ,
void * private_data )
2007-06-07 15:18:55 +10:00
{
2018-01-17 19:04:34 +11:00
const char * s ;
2007-06-07 15:18:55 +10:00
2018-01-17 19:04:34 +11:00
switch ( srvid ) {
case CTDB_SRVID_SET_NODE_FLAGS :
s = " CTDB_SRVID_SET_NODE_FLAGS " ;
break ;
default :
s = " UNKNOWN " ;
2007-06-07 15:18:55 +10:00
}
2018-01-17 19:04:34 +11:00
D_WARNING ( " SRVID %s (0x% " PRIx64 " ) is obsolete \n " , s , srvid ) ;
2007-05-07 06:51:58 +10:00
}
2007-05-07 04:41:12 +10:00
2008-11-19 14:43:46 +11:00
/*
2019-08-29 22:19:03 +02:00
handler for when we need to push out flag changes to all other nodes
2008-11-19 14:43:46 +11:00
*/
2015-04-08 14:38:26 +10:00
static void push_flags_handler ( uint64_t srvid , TDB_DATA data ,
void * private_data )
2008-11-19 14:43:46 +11:00
{
2015-04-08 14:38:26 +10:00
struct ctdb_recoverd * rec = talloc_get_type (
private_data , struct ctdb_recoverd ) ;
struct ctdb_context * ctdb = rec - > ctdb ;
2008-11-19 14:43:46 +11:00
int ret ;
struct ctdb_node_flag_change * c = ( struct ctdb_node_flag_change * ) data . dptr ;
2015-10-29 17:22:48 +11:00
struct ctdb_node_map_old * nodemap = NULL ;
2009-10-09 15:47:49 +02:00
TALLOC_CTX * tmp_ctx = talloc_new ( ctdb ) ;
uint32_t * nodes ;
2008-11-19 14:43:46 +11:00
2021-12-08 11:07:25 +11:00
/* read the node flags from the leader */
2020-07-14 15:22:33 +10:00
ret = ctdb_ctrl_getnodemap ( ctdb , CONTROL_TIMEOUT ( ) , rec - > leader ,
2015-10-23 15:33:01 +11:00
tmp_ctx , & nodemap ) ;
2009-10-09 15:47:49 +02:00
if ( ret ! = 0 ) {
DEBUG ( DEBUG_ERR , ( __location__ " Unable to get nodemap from node %u \n " , c - > pnn ) ) ;
talloc_free ( tmp_ctx ) ;
return ;
2008-11-19 14:43:46 +11:00
}
2009-10-09 15:47:49 +02:00
if ( c - > pnn > = nodemap - > num ) {
2021-12-08 11:07:25 +11:00
DBG_ERR ( " Nodemap from leader does not contain node %d \n " ,
c - > pnn ) ;
2009-10-09 15:47:49 +02:00
talloc_free ( tmp_ctx ) ;
return ;
}
/* send the flags update to all connected nodes */
nodes = list_of_connected_nodes ( ctdb , nodemap , tmp_ctx , true ) ;
if ( ctdb_client_async_control ( ctdb , CTDB_CONTROL_MODIFY_FLAGS ,
nodes , 0 , CONTROL_TIMEOUT ( ) ,
false , data ,
NULL , NULL ,
NULL ) ! = 0 ) {
DEBUG ( DEBUG_ERR , ( __location__ " ctdb_control to modify node flags failed \n " ) ) ;
talloc_free ( tmp_ctx ) ;
return ;
}
talloc_free ( tmp_ctx ) ;
2008-11-19 14:43:46 +11:00
}
2007-06-07 15:18:55 +10:00
2021-12-17 14:42:47 +11:00
static void leader_broadcast_timeout_handler ( struct tevent_context * ev ,
struct tevent_timer * te ,
struct timeval current_time ,
void * private_data )
{
struct ctdb_recoverd * rec = talloc_get_type_abort (
private_data , struct ctdb_recoverd ) ;
rec - > leader_broadcast_timeout_te = NULL ;
2022-01-23 06:21:51 +11:00
D_NOTICE ( " Leader broadcast timeout \n " ) ;
2021-12-17 14:42:47 +11:00
force_election ( rec ) ;
}
static void leader_broadcast_timeout_cancel ( struct ctdb_recoverd * rec )
{
TALLOC_FREE ( rec - > leader_broadcast_timeout_te ) ;
}
static int leader_broadcast_timeout_start ( struct ctdb_recoverd * rec )
{
struct ctdb_context * ctdb = rec - > ctdb ;
/*
* This should not be necessary . However , there will be
* interactions with election code here . It will want to
* cancel and restart the timer around potentially long
* elections .
*/
leader_broadcast_timeout_cancel ( rec ) ;
rec - > leader_broadcast_timeout_te =
tevent_add_timer (
ctdb - > ev ,
rec ,
2022-01-15 13:02:02 +11:00
timeval_current_ofs ( ctdb_config . leader_timeout , 0 ) ,
2021-12-17 14:42:47 +11:00
leader_broadcast_timeout_handler ,
rec ) ;
if ( rec - > leader_broadcast_timeout_te = = NULL ) {
D_ERR ( " Unable to start leader broadcast timeout \n " ) ;
return ENOMEM ;
}
return 0 ;
}
static bool leader_broadcast_timeout_active ( struct ctdb_recoverd * rec )
{
return rec - > leader_broadcast_timeout_te ! = NULL ;
}
2020-03-16 16:07:26 +11:00
static void leader_handler ( uint64_t srvid , TDB_DATA data , void * private_data )
{
struct ctdb_recoverd * rec = talloc_get_type_abort (
private_data , struct ctdb_recoverd ) ;
uint32_t pnn ;
size_t npull ;
int ret ;
ret = ctdb_uint32_pull ( data . dptr , data . dsize , & pnn , & npull ) ;
if ( ret ! = 0 ) {
DBG_WARNING ( " Unable to parse leader broadcast, ret=%d \n " , ret ) ;
return ;
}
2021-12-17 14:42:47 +11:00
leader_broadcast_timeout_cancel ( rec ) ;
2020-03-16 16:07:26 +11:00
if ( pnn = = rec - > leader ) {
2021-12-17 14:42:47 +11:00
goto done ;
2020-03-16 16:07:26 +11:00
}
if ( pnn = = CTDB_UNKNOWN_PNN ) {
2020-03-18 15:14:39 +11:00
bool was_election_in_progress = rec - > election_in_progress ;
2021-12-17 14:42:47 +11:00
/*
* Leader broadcast timeout was cancelled above - stop
* main loop from restarting it until election is
* complete
*/
rec - > election_in_progress = true ;
2020-03-18 15:14:39 +11:00
/*
* This is the only notification for a cluster lock
* election , so handle it here . . .
*/
if ( cluster_lock_enabled ( rec ) & & ! was_election_in_progress ) {
cluster_lock_election ( rec ) ;
}
2020-03-16 16:07:26 +11:00
return ;
}
D_NOTICE ( " Received leader broadcast, leader=% " PRIu32 " \n " , pnn ) ;
rec - > leader = pnn ;
2021-12-17 14:42:47 +11:00
done :
leader_broadcast_timeout_start ( rec ) ;
2020-03-16 16:07:26 +11:00
}
2007-08-23 13:48:39 +10:00
2007-08-27 09:40:10 +10:00
struct verify_recmode_normal_data {
uint32_t count ;
enum monitor_result status ;
} ;
static void verify_recmode_normal_callback ( struct ctdb_client_control_state * state )
{
2007-09-26 14:25:32 +10:00
struct verify_recmode_normal_data * rmdata = talloc_get_type ( state - > async . private_data , struct verify_recmode_normal_data ) ;
2007-08-27 09:40:10 +10:00
/* one more node has responded with recmode data*/
rmdata - > count - - ;
/* if we failed to get the recmode, then return an error and let
the main loop try again .
*/
if ( state - > state ! = CTDB_CONTROL_DONE ) {
if ( rmdata - > status = = MONITOR_OK ) {
rmdata - > status = MONITOR_FAILED ;
}
return ;
}
/* if we got a response, then the recmode will be stored in the
status field
*/
if ( state - > status ! = CTDB_RECOVERY_NORMAL ) {
2013-06-30 17:57:33 +10:00
DEBUG ( DEBUG_NOTICE , ( " Node:%u was in recovery mode. Start recovery process \n " , state - > c - > hdr . destnode ) ) ;
2007-08-27 09:40:10 +10:00
rmdata - > status = MONITOR_RECOVERY_NEEDED ;
}
return ;
}
/* verify that all nodes are in normal recovery mode */
2015-10-29 17:22:48 +11:00
static enum monitor_result verify_recmode ( struct ctdb_context * ctdb , struct ctdb_node_map_old * nodemap )
2007-08-23 13:48:39 +10:00
{
2007-08-27 09:40:10 +10:00
struct verify_recmode_normal_data * rmdata ;
2007-08-23 19:27:09 +10:00
TALLOC_CTX * mem_ctx = talloc_new ( ctdb ) ;
2007-08-27 09:40:10 +10:00
struct ctdb_client_control_state * state ;
enum monitor_result status ;
2019-05-23 08:43:58 +10:00
unsigned int j ;
2007-08-27 09:40:10 +10:00
rmdata = talloc ( mem_ctx , struct verify_recmode_normal_data ) ;
CTDB_NO_MEMORY_FATAL ( ctdb , rmdata ) ;
rmdata - > count = 0 ;
rmdata - > status = MONITOR_OK ;
2007-08-23 13:48:39 +10:00
/* loop over all active nodes and send an async getrecmode call to
them */
for ( j = 0 ; j < nodemap - > num ; j + + ) {
if ( nodemap - > nodes [ j ] . flags & NODE_FLAGS_INACTIVE ) {
continue ;
}
2007-08-27 09:40:10 +10:00
state = ctdb_ctrl_getrecmode_send ( ctdb , mem_ctx ,
2007-08-23 13:48:39 +10:00
CONTROL_TIMEOUT ( ) ,
2007-09-04 09:50:07 +10:00
nodemap - > nodes [ j ] . pnn ) ;
2007-08-27 09:40:10 +10:00
if ( state = = NULL ) {
/* we failed to send the control, treat this as
an error and try again next iteration
*/
2008-02-04 20:07:15 +11:00
DEBUG ( DEBUG_ERR , ( " Failed to call ctdb_ctrl_getrecmode_send during monitoring \n " ) ) ;
2007-08-23 19:27:09 +10:00
talloc_free ( mem_ctx ) ;
2007-08-23 13:48:39 +10:00
return MONITOR_FAILED ;
}
2007-08-23 19:27:09 +10:00
2007-08-27 09:40:10 +10:00
/* set up the callback functions */
state - > async . fn = verify_recmode_normal_callback ;
2007-09-26 14:25:32 +10:00
state - > async . private_data = rmdata ;
2007-08-27 09:40:10 +10:00
/* one more control to wait for to complete */
rmdata - > count + + ;
2007-08-23 13:48:39 +10:00
}
2007-08-27 09:40:10 +10:00
/* now wait for up to the maximum number of seconds allowed
or until all nodes we expect a response from has replied
*/
while ( rmdata - > count > 0 ) {
2015-10-26 16:50:09 +11:00
tevent_loop_once ( ctdb - > ev ) ;
2007-08-27 09:40:10 +10:00
}
status = rmdata - > status ;
2007-08-23 19:27:09 +10:00
talloc_free ( mem_ctx ) ;
2007-08-27 09:40:10 +10:00
return status ;
2007-08-23 13:48:39 +10:00
}
2007-08-27 09:40:10 +10:00
2013-02-21 10:43:35 +11:00
static bool interfaces_have_changed ( struct ctdb_context * ctdb ,
struct ctdb_recoverd * rec )
{
2015-10-28 19:43:48 +11:00
struct ctdb_iface_list_old * ifaces = NULL ;
2013-02-21 10:43:35 +11:00
TALLOC_CTX * mem_ctx ;
bool ret = false ;
mem_ctx = talloc_new ( NULL ) ;
/* Read the interfaces from the local node */
if ( ctdb_ctrl_get_ifaces ( ctdb , CONTROL_TIMEOUT ( ) ,
CTDB_CURRENT_NODE , mem_ctx , & ifaces ) ! = 0 ) {
2021-12-08 20:25:46 +11:00
D_ERR ( " Unable to get interfaces from local node %u \n " , rec - > pnn ) ;
2013-02-21 10:43:35 +11:00
/* We could return an error. However, this will be
* rare so we ' ll decide that the interfaces have
* actually changed , just in case .
*/
talloc_free ( mem_ctx ) ;
return true ;
}
if ( ! rec - > ifaces ) {
/* We haven't been here before so things have changed */
2013-08-15 17:04:01 +10:00
DEBUG ( DEBUG_NOTICE , ( " Initial interface fetched \n " ) ) ;
2013-02-21 10:43:35 +11:00
ret = true ;
} else if ( rec - > ifaces - > num ! = ifaces - > num ) {
/* Number of interfaces has changed */
2013-08-15 17:04:01 +10:00
DEBUG ( DEBUG_NOTICE , ( " Interface count changed from %d to %d \n " ,
rec - > ifaces - > num , ifaces - > num ) ) ;
2013-02-21 10:43:35 +11:00
ret = true ;
} else {
/* See if interface names or link states have changed */
2019-05-23 08:43:58 +10:00
unsigned int i ;
2013-02-21 10:43:35 +11:00
for ( i = 0 ; i < rec - > ifaces - > num ; i + + ) {
2015-10-28 19:37:17 +11:00
struct ctdb_iface * iface = & rec - > ifaces - > ifaces [ i ] ;
2013-08-15 17:04:01 +10:00
if ( strcmp ( iface - > name , ifaces - > ifaces [ i ] . name ) ! = 0 ) {
DEBUG ( DEBUG_NOTICE ,
( " Interface in slot %d changed: %s => %s \n " ,
i , iface - > name , ifaces - > ifaces [ i ] . name ) ) ;
ret = true ;
break ;
}
if ( iface - > link_state ! = ifaces - > ifaces [ i ] . link_state ) {
DEBUG ( DEBUG_NOTICE ,
( " Interface %s changed state: %d => %d \n " ,
iface - > name , iface - > link_state ,
ifaces - > ifaces [ i ] . link_state ) ) ;
2013-02-21 10:43:35 +11:00
ret = true ;
break ;
}
}
}
talloc_free ( rec - > ifaces ) ;
rec - > ifaces = talloc_steal ( rec , ifaces ) ;
talloc_free ( mem_ctx ) ;
return ret ;
}
2007-06-07 15:18:55 +10:00
2016-05-03 16:36:37 +10:00
/* Check that the local allocation of public IP addresses is correct
* and do some house - keeping */
2021-12-13 09:51:36 +11:00
static int verify_local_ip_allocation ( struct ctdb_recoverd * rec )
2008-07-02 13:55:59 +10:00
{
TALLOC_CTX * mem_ctx = talloc_new ( NULL ) ;
2021-12-13 09:51:36 +11:00
struct ctdb_context * ctdb = rec - > ctdb ;
2019-05-23 08:43:58 +10:00
unsigned int j ;
int ret ;
2009-12-22 15:21:08 +01:00
bool need_takeover_run = false ;
2015-11-09 15:41:45 +11:00
struct ctdb_public_ip_list_old * ips = NULL ;
2021-12-08 19:37:39 +11:00
/* If we are not the leader then do some housekeeping */
if ( ! this_node_is_leader ( rec ) ) {
2021-12-08 11:07:25 +11:00
/* Ignore any IP reallocate requests - only leader
2016-05-03 16:36:37 +10:00
* processes them
*/
TALLOC_FREE ( rec - > reallocate_requests ) ;
/* Clear any nodes that should be force rebalanced in
2021-12-08 11:07:25 +11:00
* the next takeover run . If the leader has changed
* then we don ' t want to process these some time in
* the future .
2016-05-03 16:36:37 +10:00
*/
TALLOC_FREE ( rec - > force_rebalance_nodes ) ;
}
2015-11-09 15:41:45 +11:00
/* Return early if disabled... */
2018-08-21 13:41:22 +10:00
if ( ctdb_config . failover_disabled | |
2015-11-09 15:41:45 +11:00
ctdb_op_is_disabled ( rec - > takeover_run ) ) {
2020-04-16 14:38:34 +02:00
talloc_free ( mem_ctx ) ;
2015-11-09 15:41:45 +11:00
return 0 ;
}
2008-07-02 13:55:59 +10:00
2013-02-21 10:43:35 +11:00
if ( interfaces_have_changed ( ctdb , rec ) ) {
2009-12-22 15:21:08 +01:00
need_takeover_run = true ;
}
2015-11-09 16:12:31 +11:00
/* If there are unhosted IPs but this node can host them then
* trigger an IP reallocation */
2012-10-11 15:17:54 +11:00
2015-11-09 16:12:31 +11:00
/* Read *available* IPs from local node */
ret = ctdb_ctrl_get_public_ips_flags (
ctdb , CONTROL_TIMEOUT ( ) , CTDB_CURRENT_NODE , mem_ctx ,
CTDB_PUBLIC_IP_FLAGS_ONLY_AVAILABLE , & ips ) ;
2015-11-09 15:41:45 +11:00
if ( ret ! = 0 ) {
2015-11-09 16:12:31 +11:00
DEBUG ( DEBUG_ERR , ( " Unable to retrieve available public IPs \n " ) ) ;
2015-11-09 15:41:45 +11:00
talloc_free ( mem_ctx ) ;
return - 1 ;
}
2012-10-11 15:17:54 +11:00
2015-11-09 15:41:45 +11:00
for ( j = 0 ; j < ips - > num ; j + + ) {
2019-05-23 17:50:32 +10:00
if ( ips - > ips [ j ] . pnn = = CTDB_UNKNOWN_PNN & &
2021-12-13 09:51:36 +11:00
rec - > nodemap - > nodes [ rec - > pnn ] . flags = = 0 ) {
2015-11-09 16:12:31 +11:00
DEBUG ( DEBUG_WARNING ,
( " Unassigned IP %s can be served by this node \n " ,
ctdb_addr_to_str ( & ips - > ips [ j ] . addr ) ) ) ;
2015-11-09 15:41:45 +11:00
need_takeover_run = true ;
2012-10-11 15:17:54 +11:00
}
2015-11-09 15:41:45 +11:00
}
2012-10-11 15:17:54 +11:00
2015-11-09 15:41:45 +11:00
talloc_free ( ips ) ;
2012-10-11 15:17:54 +11:00
2015-11-09 15:44:15 +11:00
if ( ! ctdb - > do_checkpublicip ) {
goto done ;
}
2015-11-09 16:12:31 +11:00
/* Validate the IP addresses that this node has on network
* interfaces . If there is an inconsistency between reality
* and the state expected by CTDB then try to fix it by
* triggering an IP reallocation or releasing extraneous IP
* addresses . */
/* Read *known* IPs from local node */
ret = ctdb_ctrl_get_public_ips_flags (
ctdb , CONTROL_TIMEOUT ( ) , CTDB_CURRENT_NODE , mem_ctx , 0 , & ips ) ;
2015-11-09 15:41:45 +11:00
if ( ret ! = 0 ) {
2015-11-09 16:12:31 +11:00
DEBUG ( DEBUG_ERR , ( " Unable to retrieve known public IPs \n " ) ) ;
2015-11-09 15:41:45 +11:00
talloc_free ( mem_ctx ) ;
return - 1 ;
}
2012-10-11 15:17:54 +11:00
2015-11-09 15:41:45 +11:00
for ( j = 0 ; j < ips - > num ; j + + ) {
2021-12-13 09:51:36 +11:00
if ( ips - > ips [ j ] . pnn = = rec - > pnn ) {
2015-11-09 15:44:15 +11:00
if ( ! ctdb_sys_have_ip ( & ips - > ips [ j ] . addr ) ) {
2015-11-09 16:12:31 +11:00
DEBUG ( DEBUG_ERR ,
( " Assigned IP %s not on an interface \n " ,
ctdb_addr_to_str ( & ips - > ips [ j ] . addr ) ) ) ;
2015-11-09 15:41:45 +11:00
need_takeover_run = true ;
}
} else {
2015-11-09 15:44:15 +11:00
if ( ctdb_sys_have_ip ( & ips - > ips [ j ] . addr ) ) {
2015-11-09 16:12:31 +11:00
DEBUG ( DEBUG_ERR ,
2016-08-02 12:18:15 +10:00
( " IP %s incorrectly on an interface \n " ,
2015-11-09 16:12:31 +11:00
ctdb_addr_to_str ( & ips - > ips [ j ] . addr ) ) ) ;
2016-08-02 12:18:15 +10:00
need_takeover_run = true ;
2008-07-02 13:55:59 +10:00
}
}
}
2015-11-09 15:44:15 +11:00
done :
2009-12-22 15:21:08 +01:00
if ( need_takeover_run ) {
2015-10-29 14:32:49 +11:00
struct ctdb_srvid_message rd ;
2009-12-22 15:21:08 +01:00
TDB_DATA data ;
2015-11-09 16:12:31 +11:00
DEBUG ( DEBUG_NOTICE , ( " Trigger takeoverrun \n " ) ) ;
2009-12-22 15:21:08 +01:00
2016-01-11 17:23:12 +11:00
ZERO_STRUCT ( rd ) ;
2021-12-13 09:51:36 +11:00
rd . pnn = rec - > pnn ;
2009-12-22 15:21:08 +01:00
rd . srvid = 0 ;
data . dptr = ( uint8_t * ) & rd ;
data . dsize = sizeof ( rd ) ;
2020-07-29 07:02:45 +10:00
ret = ctdb_client_send_message ( ctdb ,
CTDB_BROADCAST_CONNECTED ,
CTDB_SRVID_TAKEOVER_RUN ,
data ) ;
2009-12-22 15:21:08 +01:00
if ( ret ! = 0 ) {
2020-07-29 07:02:45 +10:00
D_ERR ( " Failed to send takeover run request \n " ) ;
2009-12-22 15:21:08 +01:00
}
}
2008-07-02 13:55:59 +10:00
talloc_free ( mem_ctx ) ;
return 0 ;
}
2008-12-05 16:32:30 +11:00
2018-01-18 19:52:22 +11:00
struct remote_nodemaps_state {
struct ctdb_node_map_old * * remote_nodemaps ;
2018-01-18 19:58:15 +11:00
struct ctdb_recoverd * rec ;
2018-01-18 19:52:22 +11:00
} ;
2018-01-18 16:19:36 +11:00
static void async_getnodemap_callback ( struct ctdb_context * ctdb ,
uint32_t node_pnn ,
int32_t res ,
TDB_DATA outdata ,
void * callback_data )
2008-12-05 16:32:30 +11:00
{
2018-01-18 19:52:22 +11:00
struct remote_nodemaps_state * state =
( struct remote_nodemaps_state * ) callback_data ;
struct ctdb_node_map_old * * remote_nodemaps = state - > remote_nodemaps ;
2020-07-30 11:57:51 +10:00
struct ctdb_node_map_old * nodemap = state - > rec - > nodemap ;
size_t i ;
2008-12-05 16:32:30 +11:00
2020-07-30 11:57:51 +10:00
for ( i = 0 ; i < nodemap - > num ; i + + ) {
if ( nodemap - > nodes [ i ] . pnn = = node_pnn ) {
break ;
}
}
if ( i > = nodemap - > num ) {
DBG_ERR ( " Invalid PNN % " PRIu32 " \n " , node_pnn ) ;
2008-12-05 16:32:30 +11:00
return ;
}
2020-07-30 11:57:51 +10:00
remote_nodemaps [ i ] = ( struct ctdb_node_map_old * ) talloc_steal (
2018-01-18 16:19:36 +11:00
remote_nodemaps , outdata . dptr ) ;
2008-12-05 16:32:30 +11:00
}
2018-01-18 19:58:15 +11:00
static void async_getnodemap_error ( struct ctdb_context * ctdb ,
uint32_t node_pnn ,
int32_t res ,
TDB_DATA outdata ,
void * callback_data )
{
struct remote_nodemaps_state * state =
( struct remote_nodemaps_state * ) callback_data ;
struct ctdb_recoverd * rec = state - > rec ;
DBG_ERR ( " Failed to retrieve nodemap from node %u \n " , node_pnn ) ;
ctdb_set_culprit ( rec , node_pnn ) ;
}
2018-01-18 16:41:19 +11:00
static int get_remote_nodemaps ( struct ctdb_recoverd * rec ,
2018-01-18 16:19:36 +11:00
TALLOC_CTX * mem_ctx ,
2018-01-18 16:31:39 +11:00
struct ctdb_node_map_old * * * remote_nodemaps )
2008-12-05 16:32:30 +11:00
{
2018-01-18 16:41:19 +11:00
struct ctdb_context * ctdb = rec - > ctdb ;
2018-01-18 16:31:39 +11:00
struct ctdb_node_map_old * * t ;
2008-12-05 16:32:30 +11:00
uint32_t * nodes ;
2018-01-18 19:52:22 +11:00
struct remote_nodemaps_state state ;
2018-01-18 16:19:36 +11:00
int ret ;
2008-12-05 16:32:30 +11:00
2018-01-18 16:31:39 +11:00
t = talloc_zero_array ( mem_ctx ,
struct ctdb_node_map_old * ,
rec - > nodemap - > num ) ;
if ( t = = NULL ) {
DBG_ERR ( " Memory allocation error \n " ) ;
return - 1 ;
}
2019-06-14 00:23:22 +10:00
nodes = list_of_connected_nodes ( ctdb , rec - > nodemap , mem_ctx , false ) ;
2018-01-18 16:19:36 +11:00
2018-01-18 19:52:22 +11:00
state . remote_nodemaps = t ;
2018-01-18 19:58:15 +11:00
state . rec = rec ;
2018-01-18 19:52:22 +11:00
2018-01-18 16:19:36 +11:00
ret = ctdb_client_async_control ( ctdb ,
CTDB_CONTROL_GET_NODEMAP ,
nodes ,
0 ,
CONTROL_TIMEOUT ( ) ,
false ,
tdb_null ,
2008-12-05 16:32:30 +11:00
async_getnodemap_callback ,
2018-01-18 19:58:15 +11:00
async_getnodemap_error ,
2018-01-18 19:52:22 +11:00
& state ) ;
2020-08-17 20:27:18 +10:00
talloc_free ( nodes ) ;
2018-01-18 16:31:39 +11:00
if ( ret ! = 0 ) {
talloc_free ( t ) ;
return ret ;
}
* remote_nodemaps = t ;
return 0 ;
2008-12-05 16:32:30 +11:00
}
2010-06-22 22:50:23 +09:30
static void main_loop ( struct ctdb_context * ctdb , struct ctdb_recoverd * rec ,
TALLOC_CTX * mem_ctx )
2007-05-04 08:30:18 +10:00
{
2015-10-29 17:22:48 +11:00
struct ctdb_node_map_old * nodemap = NULL ;
struct ctdb_node_map_old * * remote_nodemaps = NULL ;
2007-05-04 09:45:53 +10:00
struct ctdb_vnn_map * vnnmap = NULL ;
struct ctdb_vnn_map * remote_vnnmap = NULL ;
2015-03-29 17:49:02 +11:00
uint32_t num_lmasters ;
2008-02-18 19:38:04 +11:00
int32_t debug_level ;
2019-05-23 08:43:58 +10:00
unsigned int i , j ;
int ret ;
2013-06-28 16:31:07 +10:00
bool self_ban ;
2007-06-07 15:18:55 +10:00
2007-06-04 20:22:44 +10:00
2008-01-07 16:17:22 +11:00
/* verify that the main daemon is still running */
2012-05-03 11:42:41 +10:00
if ( ctdb_kill ( ctdb , ctdb - > ctdbd_pid , 0 ) ! = 0 ) {
2008-02-04 20:07:15 +11:00
DEBUG ( DEBUG_CRIT , ( " CTDB daemon is no longer available. Shutting down recovery daemon \n " ) ) ;
2008-01-07 16:17:22 +11:00
exit ( - 1 ) ;
}
2008-09-09 13:44:46 +10:00
/* ping the local daemon to tell it we are alive */
ctdb_ctrl_recd_ping ( ctdb ) ;
2020-03-18 20:27:10 +11:00
if ( rec - > election_in_progress ) {
2007-11-13 10:27:44 +11:00
/* an election is in progress */
2010-06-22 22:50:23 +09:30
return ;
2007-11-13 10:27:44 +11:00
}
2020-03-16 16:16:44 +11:00
/*
* Start leader broadcasts if they are not active ( 1 st time
* through main loop ? Memory allocation error ? )
*/
if ( ! leader_broadcast_loop_active ( rec ) ) {
ret = leader_broadcast_loop ( rec ) ;
if ( ret ! = 0 ) {
D_ERR ( " Failed to set up leader broadcast \n " ) ;
ctdb_set_culprit ( rec , rec - > pnn ) ;
}
}
2021-12-17 14:42:47 +11:00
/*
* Similar for leader broadcast timeouts . These can also have
* been stopped by another node receiving a leader broadcast
* timeout and transmitting an " unknown leader broadcast " .
* Note that this should never be done during an election - at
* the moment there is nothing between here and the above
* election - in - progress check that can process an election
* result ( i . e . no event loop ) .
*/
if ( ! leader_broadcast_timeout_active ( rec ) ) {
ret = leader_broadcast_timeout_start ( rec ) ;
if ( ret ! = 0 ) {
ctdb_set_culprit ( rec , rec - > pnn ) ;
}
}
2020-03-16 16:16:44 +11:00
2008-02-18 19:38:04 +11:00
/* read the debug level from the parent and update locally */
ret = ctdb_ctrl_get_debuglevel ( ctdb , CTDB_CURRENT_NODE , & debug_level ) ;
if ( ret ! = 0 ) {
DEBUG ( DEBUG_ERR , ( __location__ " Failed to read debuglevel from parent \n " ) ) ;
2010-06-22 22:50:23 +09:30
return ;
2008-02-18 19:38:04 +11:00
}
2018-11-07 14:14:05 +01:00
debuglevel_set ( debug_level ) ;
2008-02-18 19:38:04 +11:00
2007-06-04 20:22:44 +10:00
/* get relevant tunables */
2007-06-07 18:05:25 +10:00
ret = ctdb_ctrl_get_all_tunables ( ctdb , CONTROL_TIMEOUT ( ) , CTDB_CURRENT_NODE , & ctdb - > tunable ) ;
if ( ret ! = 0 ) {
2008-02-04 20:07:15 +11:00
DEBUG ( DEBUG_ERR , ( " Failed to get tunables - retrying \n " ) ) ;
2010-06-22 22:50:23 +09:30
return ;
2007-06-07 18:05:25 +10:00
}
2007-05-04 08:30:18 +10:00
2014-09-25 17:17:04 +10:00
/* get runstate */
ret = ctdb_ctrl_get_runstate ( ctdb , CONTROL_TIMEOUT ( ) ,
CTDB_CURRENT_NODE , & ctdb - > runstate ) ;
if ( ret ! = 0 ) {
DEBUG ( DEBUG_ERR , ( " Failed to get runstate - retrying \n " ) ) ;
return ;
}
2015-10-23 16:00:55 +11:00
/* get nodemap */
2021-12-08 20:25:46 +11:00
ret = ctdb_ctrl_getnodemap ( ctdb ,
CONTROL_TIMEOUT ( ) ,
rec - > pnn ,
rec ,
& nodemap ) ;
2007-05-04 09:01:01 +10:00
if ( ret ! = 0 ) {
2021-12-08 20:25:46 +11:00
DBG_ERR ( " Unable to get nodemap from node % " PRIu32 " \n " , rec - > pnn ) ;
2010-06-22 22:50:23 +09:30
return ;
2007-05-04 09:01:01 +10:00
}
2020-03-22 13:46:46 +11:00
talloc_free ( rec - > nodemap ) ;
rec - > nodemap = nodemap ;
2007-05-04 08:30:18 +10:00
2013-06-28 14:09:35 +10:00
/* remember our own node flags */
2021-12-08 20:25:46 +11:00
rec - > node_flags = nodemap - > nodes [ rec - > pnn ] . flags ;
2013-06-28 14:09:35 +10:00
2013-06-28 16:31:07 +10:00
ban_misbehaving_nodes ( rec , & self_ban ) ;
if ( self_ban ) {
DEBUG ( DEBUG_NOTICE , ( " This node was banned, restart main_loop \n " ) ) ;
return ;
}
2013-06-27 16:01:16 +10:00
2017-06-22 17:45:20 +10:00
ret = ctdb_ctrl_getrecmode ( ctdb , mem_ctx , CONTROL_TIMEOUT ( ) ,
CTDB_CURRENT_NODE , & ctdb - > recovery_mode ) ;
if ( ret ! = 0 ) {
D_ERR ( " Failed to read recmode from local node \n " ) ;
return ;
}
2013-06-28 14:02:44 +10:00
/* if the local daemon is STOPPED or BANNED, we verify that the databases are
2013-06-26 07:11:51 +02:00
also frozen and that the recmode is set to active .
2009-07-09 14:19:32 +10:00
*/
2020-03-17 17:10:20 +11:00
if ( rec - > node_flags & NODE_FLAGS_INACTIVE ) {
2013-09-17 12:00:26 +10:00
/* If this node has become inactive then we want to
2021-12-08 11:07:25 +11:00
* reduce the chances of it taking over the leader
* role when it becomes active again . This
* helps to stabilise the leader role so that
2013-09-17 12:00:26 +10:00
* it stays on the most stable node .
*/
rec - > priority_time = timeval_current ( ) ;
2009-07-09 14:19:32 +10:00
if ( ctdb - > recovery_mode = = CTDB_RECOVERY_NORMAL ) {
2013-06-28 14:02:44 +10:00
DEBUG ( DEBUG_ERR , ( " Node is stopped or banned but recovery mode is not active. Activate recovery mode and lock databases \n " ) ) ;
2009-07-09 14:19:32 +10:00
ret = ctdb_ctrl_setrecmode ( ctdb , CONTROL_TIMEOUT ( ) , CTDB_CURRENT_NODE , CTDB_RECOVERY_ACTIVE ) ;
if ( ret ! = 0 ) {
2013-06-28 14:02:44 +10:00
DEBUG ( DEBUG_ERR , ( __location__ " Failed to activate recovery mode in STOPPED or BANNED state \n " ) ) ;
2009-07-09 14:19:32 +10:00
2010-06-22 22:50:23 +09:30
return ;
2009-07-09 14:19:32 +10:00
}
2016-06-01 12:10:46 +10:00
}
if ( ! rec - > frozen_on_inactive ) {
ret = ctdb_ctrl_freeze ( ctdb , CONTROL_TIMEOUT ( ) ,
CTDB_CURRENT_NODE ) ;
2014-05-06 14:24:52 +10:00
if ( ret ! = 0 ) {
2016-06-01 12:10:46 +10:00
DEBUG ( DEBUG_ERR ,
( __location__ " Failed to freeze node "
" in STOPPED or BANNED state \n " ) ) ;
2014-05-06 14:24:52 +10:00
return ;
}
2016-06-01 12:10:46 +10:00
rec - > frozen_on_inactive = true ;
2009-07-09 14:19:32 +10:00
}
2013-06-27 15:39:15 +10:00
/* If this node is stopped or banned then it is not the recovery
* master , so don ' t do anything . This prevents stopped or banned
* node from starting election and sending unnecessary controls .
*/
return ;
2009-07-09 14:19:32 +10:00
}
2013-06-27 15:39:15 +10:00
2016-06-01 12:10:46 +10:00
rec - > frozen_on_inactive = false ;
2015-10-27 15:09:33 +11:00
/* Retrieve capabilities from all connected nodes */
ret = update_capabilities ( rec , nodemap ) ;
if ( ret ! = 0 ) {
DEBUG ( DEBUG_ERR , ( __location__ " Unable to update node capabilities. \n " ) ) ;
return ;
}
2017-06-22 16:15:47 +10:00
if ( ctdb - > recovery_mode = = CTDB_RECOVERY_NORMAL ) {
/* Check if an IP takeover run is needed and trigger one if
* necessary */
2021-12-13 09:51:36 +11:00
verify_local_ip_allocation ( rec ) ;
2017-06-22 16:15:47 +10:00
}
2007-05-07 06:51:58 +10:00
2021-12-08 19:37:39 +11:00
/* If this node is not the leader then skip recovery checks */
if ( ! this_node_is_leader ( rec ) ) {
2010-06-22 22:50:23 +09:30
return ;
2007-05-07 06:51:58 +10:00
}
2007-10-11 06:16:36 +10:00
2019-06-14 03:51:01 +10:00
/* Get the nodemaps for all connected remote nodes */
ret = get_remote_nodemaps ( rec , mem_ctx , & remote_nodemaps ) ;
if ( ret ! = 0 ) {
DBG_ERR ( " Failed to read remote nodemaps \n " ) ;
return ;
}
2018-01-24 10:21:37 +11:00
/* Ensure our local and remote flags are correct */
ret = update_flags ( rec , nodemap , remote_nodemaps ) ;
2016-04-27 21:47:08 +10:00
if ( ret ! = 0 ) {
2018-01-24 10:21:37 +11:00
D_ERR ( " Unable to update flags \n " ) ;
2010-06-22 22:50:23 +09:30
return ;
2007-10-11 06:16:36 +10:00
}
2008-10-17 21:18:06 +11:00
if ( ctdb - > num_nodes ! = nodemap - > num ) {
DEBUG ( DEBUG_ERR , ( __location__ " ctdb->num_nodes (%d) != nodemap->num (%d) reloading nodes file \n " , ctdb - > num_nodes , nodemap - > num ) ) ;
2013-10-14 13:54:39 +11:00
ctdb_load_nodes_file ( ctdb ) ;
2010-06-22 22:50:23 +09:30
return ;
2008-10-17 21:18:06 +11:00
}
2007-09-04 23:15:23 +10:00
2015-10-27 14:35:09 +11:00
/* get the vnnmap */
2021-12-08 20:25:46 +11:00
ret = ctdb_ctrl_getvnnmap ( ctdb ,
CONTROL_TIMEOUT ( ) ,
rec - > pnn ,
mem_ctx ,
& vnnmap ) ;
2015-10-27 14:35:09 +11:00
if ( ret ! = 0 ) {
2021-12-08 20:25:46 +11:00
DBG_ERR ( " Unable to get vnnmap from node %u \n " , rec - > pnn ) ;
2015-10-27 14:35:09 +11:00
return ;
}
2007-09-14 09:49:12 +10:00
if ( rec - > need_recovery ) {
/* a previous recovery didn't finish */
2018-01-16 16:20:05 +11:00
do_recovery ( rec , mem_ctx ) ;
2010-06-22 22:50:23 +09:30
return ;
2007-09-14 09:49:12 +10:00
}
2007-05-07 04:41:12 +10:00
/* verify that all active nodes are in normal mode
and not in recovery mode
2009-09-04 02:20:39 +10:00
*/
2007-08-23 19:27:09 +10:00
switch ( verify_recmode ( ctdb , nodemap ) ) {
2007-08-23 13:48:39 +10:00
case MONITOR_RECOVERY_NEEDED :
2018-01-16 16:20:05 +11:00
do_recovery ( rec , mem_ctx ) ;
2010-06-22 22:50:23 +09:30
return ;
2007-08-23 13:48:39 +10:00
case MONITOR_FAILED :
2010-06-22 22:50:23 +09:30
return ;
2007-08-23 19:27:09 +10:00
case MONITOR_ELECTION_NEEDED :
/* can not happen */
2007-08-23 13:48:39 +10:00
case MONITOR_OK :
break ;
2007-05-07 04:41:12 +10:00
}
2021-12-10 11:43:10 +11:00
if ( cluster_lock_enabled ( rec ) ) {
2021-12-10 11:29:06 +11:00
/* We must already hold the cluster lock */
if ( ! cluster_lock_held ( rec ) ) {
D_ERR ( " Failed cluster lock sanity check \n " ) ;
2021-12-08 20:25:46 +11:00
ctdb_set_culprit ( rec , rec - > pnn ) ;
2018-01-16 16:20:05 +11:00
do_recovery ( rec , mem_ctx ) ;
2010-06-22 22:50:23 +09:30
return ;
2009-06-25 11:41:18 +10:00
}
2007-10-05 13:28:21 +10:00
}
2007-08-23 13:48:39 +10:00
2012-04-30 15:50:44 +10:00
2015-02-06 20:59:11 +11:00
/* If recoveries are disabled then there is no use doing any
* nodemap or flags checks . Recoveries might be disabled due
* to " reloadnodes " , so doing these checks might cause an
* unnecessary recovery . */
if ( ctdb_op_is_disabled ( rec - > recovery ) ) {
2016-05-03 16:00:02 +10:00
goto takeover_run_checks ;
2015-02-06 20:59:11 +11:00
}
2008-12-05 16:32:30 +11:00
/* verify that all other nodes have the same nodemap as we have
*/
2007-05-04 09:45:53 +10:00
for ( j = 0 ; j < nodemap - > num ; j + + ) {
2021-12-08 20:25:46 +11:00
if ( nodemap - > nodes [ j ] . pnn = = rec - > pnn ) {
2019-06-14 00:23:22 +10:00
continue ;
}
2009-04-06 12:00:22 +10:00
if ( nodemap - > nodes [ j ] . flags & NODE_FLAGS_INACTIVE ) {
2007-05-04 09:45:53 +10:00
continue ;
}
2008-12-05 16:32:30 +11:00
/* if the nodes disagree on how many nodes there are
2007-05-04 09:45:53 +10:00
then this is a good reason to try recovery
*/
2008-12-05 16:32:30 +11:00
if ( remote_nodemaps [ j ] - > num ! = nodemap - > num ) {
2008-02-04 20:07:15 +11:00
DEBUG ( DEBUG_ERR , ( __location__ " Remote node:%u has different node count. %u vs %u of the local node \n " ,
2008-12-05 16:32:30 +11:00
nodemap - > nodes [ j ] . pnn , remote_nodemaps [ j ] - > num , nodemap - > num ) ) ;
2009-09-04 02:20:39 +10:00
ctdb_set_culprit ( rec , nodemap - > nodes [ j ] . pnn ) ;
2018-01-16 16:20:05 +11:00
do_recovery ( rec , mem_ctx ) ;
2010-06-22 22:50:23 +09:30
return ;
2007-05-04 09:45:53 +10:00
}
/* if the nodes disagree on which nodes exist and are
active , then that is also a good reason to do recovery
*/
for ( i = 0 ; i < nodemap - > num ; i + + ) {
2008-12-05 16:32:30 +11:00
if ( remote_nodemaps [ j ] - > nodes [ i ] . pnn ! = nodemap - > nodes [ i ] . pnn ) {
2008-02-04 20:07:15 +11:00
DEBUG ( DEBUG_ERR , ( __location__ " Remote node:%u has different nodemap pnn for %d (%u vs %u). \n " ,
2007-09-04 09:50:07 +10:00
nodemap - > nodes [ j ] . pnn , i ,
2008-12-05 16:32:30 +11:00
remote_nodemaps [ j ] - > nodes [ i ] . pnn , nodemap - > nodes [ i ] . pnn ) ) ;
2009-09-04 02:20:39 +10:00
ctdb_set_culprit ( rec , nodemap - > nodes [ j ] . pnn ) ;
2018-01-16 16:20:05 +11:00
do_recovery ( rec , mem_ctx ) ;
2010-06-22 22:50:23 +09:30
return ;
2007-05-04 09:45:53 +10:00
}
}
2013-07-22 17:26:28 +10:00
}
2015-03-29 20:00:17 +11:00
/* count how many active nodes there are */
num_lmasters = 0 ;
for ( i = 0 ; i < nodemap - > num ; i + + ) {
if ( ! ( nodemap - > nodes [ i ] . flags & NODE_FLAGS_INACTIVE ) ) {
if ( ctdb_node_has_capabilities ( rec - > caps ,
ctdb - > nodes [ i ] - > pnn ,
CTDB_CAP_LMASTER ) ) {
num_lmasters + + ;
}
}
}
2007-05-04 09:45:53 +10:00
2013-09-26 13:11:04 +10:00
/* There must be the same number of lmasters in the vnn map as
* there are active nodes with the lmaster capability . . . or
* do a recovery .
2007-05-04 09:45:53 +10:00
*/
2015-03-29 17:49:02 +11:00
if ( vnnmap - > size ! = num_lmasters ) {
2013-09-26 13:11:04 +10:00
DEBUG ( DEBUG_ERR , ( __location__ " The vnnmap count is different from the number of active lmaster nodes: %u vs %u \n " ,
2015-03-29 17:49:02 +11:00
vnnmap - > size , num_lmasters ) ) ;
2021-12-08 20:25:46 +11:00
ctdb_set_culprit ( rec , rec - > pnn ) ;
2018-01-16 16:20:05 +11:00
do_recovery ( rec , mem_ctx ) ;
2010-06-22 22:50:23 +09:30
return ;
2007-05-04 09:45:53 +10:00
}
2019-08-21 14:35:09 +10:00
/*
* Verify that all active lmaster nodes in the nodemap also
* exist in the vnnmap
2007-05-04 09:45:53 +10:00
*/
for ( j = 0 ; j < nodemap - > num ; j + + ) {
2007-06-07 15:18:55 +10:00
if ( nodemap - > nodes [ j ] . flags & NODE_FLAGS_INACTIVE ) {
2007-05-04 09:45:53 +10:00
continue ;
}
2019-08-21 14:35:09 +10:00
if ( ! ctdb_node_has_capabilities ( rec - > caps ,
2019-08-27 12:13:51 +10:00
nodemap - > nodes [ j ] . pnn ,
2019-08-21 14:35:09 +10:00
CTDB_CAP_LMASTER ) ) {
continue ;
}
2021-12-08 20:25:46 +11:00
if ( nodemap - > nodes [ j ] . pnn = = rec - > pnn ) {
2007-05-04 09:45:53 +10:00
continue ;
}
for ( i = 0 ; i < vnnmap - > size ; i + + ) {
2007-09-04 09:50:07 +10:00
if ( vnnmap - > map [ i ] = = nodemap - > nodes [ j ] . pnn ) {
2007-05-04 09:45:53 +10:00
break ;
}
}
2007-06-07 15:18:55 +10:00
if ( i = = vnnmap - > size ) {
2019-08-21 14:35:09 +10:00
D_ERR ( " Active LMASTER node %u is not in the vnnmap \n " ,
nodemap - > nodes [ j ] . pnn ) ;
2009-09-04 02:20:39 +10:00
ctdb_set_culprit ( rec , nodemap - > nodes [ j ] . pnn ) ;
2018-01-16 16:20:05 +11:00
do_recovery ( rec , mem_ctx ) ;
2010-06-22 22:50:23 +09:30
return ;
2007-05-04 09:45:53 +10:00
}
}
2007-05-04 11:57:45 +10:00
/* verify that all other nodes have the same vnnmap
and are from the same generation
*/
2007-05-04 09:45:53 +10:00
for ( j = 0 ; j < nodemap - > num ; j + + ) {
2007-06-07 15:18:55 +10:00
if ( nodemap - > nodes [ j ] . flags & NODE_FLAGS_INACTIVE ) {
2007-05-04 09:45:53 +10:00
continue ;
}
2021-12-08 20:25:46 +11:00
if ( nodemap - > nodes [ j ] . pnn = = rec - > pnn ) {
2007-05-04 09:45:53 +10:00
continue ;
}
2007-09-04 09:50:07 +10:00
ret = ctdb_ctrl_getvnnmap ( ctdb , CONTROL_TIMEOUT ( ) , nodemap - > nodes [ j ] . pnn ,
2007-06-07 18:39:37 +10:00
mem_ctx , & remote_vnnmap ) ;
2007-05-04 09:45:53 +10:00
if ( ret ! = 0 ) {
2008-02-04 20:07:15 +11:00
DEBUG ( DEBUG_ERR , ( __location__ " Unable to get vnnmap from remote node %u \n " ,
2007-09-04 09:50:07 +10:00
nodemap - > nodes [ j ] . pnn ) ) ;
2010-06-22 22:50:23 +09:30
return ;
2007-05-04 09:45:53 +10:00
}
2007-05-04 11:57:45 +10:00
/* verify the vnnmap generation is the same */
if ( vnnmap - > generation ! = remote_vnnmap - > generation ) {
2008-02-04 20:07:15 +11:00
DEBUG ( DEBUG_ERR , ( __location__ " Remote node %u has different generation of vnnmap. %u vs %u (ours) \n " ,
2007-09-04 09:50:07 +10:00
nodemap - > nodes [ j ] . pnn , remote_vnnmap - > generation , vnnmap - > generation ) ) ;
2009-09-04 02:20:39 +10:00
ctdb_set_culprit ( rec , nodemap - > nodes [ j ] . pnn ) ;
2018-01-16 16:20:05 +11:00
do_recovery ( rec , mem_ctx ) ;
2010-06-22 22:50:23 +09:30
return ;
2007-05-04 11:57:45 +10:00
}
2007-05-04 09:45:53 +10:00
/* verify the vnnmap size is the same */
if ( vnnmap - > size ! = remote_vnnmap - > size ) {
2008-02-04 20:07:15 +11:00
DEBUG ( DEBUG_ERR , ( __location__ " Remote node %u has different size of vnnmap. %u vs %u (ours) \n " ,
2007-09-04 09:50:07 +10:00
nodemap - > nodes [ j ] . pnn , remote_vnnmap - > size , vnnmap - > size ) ) ;
2009-09-04 02:20:39 +10:00
ctdb_set_culprit ( rec , nodemap - > nodes [ j ] . pnn ) ;
2018-01-16 16:20:05 +11:00
do_recovery ( rec , mem_ctx ) ;
2010-06-22 22:50:23 +09:30
return ;
2007-05-04 09:45:53 +10:00
}
/* verify the vnnmap is the same */
for ( i = 0 ; i < vnnmap - > size ; i + + ) {
if ( remote_vnnmap - > map [ i ] ! = vnnmap - > map [ i ] ) {
2008-02-04 20:07:15 +11:00
DEBUG ( DEBUG_ERR , ( __location__ " Remote node %u has different vnnmap. \n " ,
2007-09-04 09:50:07 +10:00
nodemap - > nodes [ j ] . pnn ) ) ;
2009-09-04 02:20:39 +10:00
ctdb_set_culprit ( rec , nodemap - > nodes [ j ] . pnn ) ;
2018-01-16 16:20:05 +11:00
do_recovery ( rec , mem_ctx ) ;
2010-06-22 22:50:23 +09:30
return ;
2007-05-04 09:45:53 +10:00
}
}
}
2016-06-20 20:41:05 +10:00
/* FIXME: Add remote public IP checking to ensure that nodes
* have the IP addresses that are allocated to them . */
2016-05-03 16:00:02 +10:00
takeover_run_checks :
2016-05-03 16:07:34 +10:00
/* If there are IP takeover runs requested or the previous one
* failed then perform one and notify the waiters */
2016-05-03 16:00:02 +10:00
if ( ! ctdb_op_is_disabled ( rec - > takeover_run ) & &
2016-05-03 16:07:34 +10:00
( rec - > reallocate_requests | | rec - > need_takeover_run ) ) {
2016-05-03 16:00:02 +10:00
process_ipreallocate_requests ( ctdb , rec ) ;
}
2010-06-22 22:50:23 +09:30
}
2016-06-02 09:26:40 +10:00
static void recd_sig_term_handler ( struct tevent_context * ev ,
struct tevent_signal * se , int signum ,
int count , void * dont_care ,
void * private_data )
{
struct ctdb_recoverd * rec = talloc_get_type_abort (
private_data , struct ctdb_recoverd ) ;
2016-11-25 14:57:30 +11:00
DEBUG ( DEBUG_ERR , ( " Received SIGTERM, exiting \n " ) ) ;
2021-12-10 11:29:06 +11:00
cluster_lock_release ( rec ) ;
2016-06-02 09:26:40 +10:00
exit ( 0 ) ;
}
2019-07-16 08:58:33 +10:00
/*
* Periodically log elements of the cluster state
*
* This can be used to confirm a split brain has occurred
*/
static void maybe_log_cluster_state ( struct tevent_context * ev ,
struct tevent_timer * te ,
struct timeval current_time ,
void * private_data )
{
struct ctdb_recoverd * rec = talloc_get_type_abort (
private_data , struct ctdb_recoverd ) ;
struct ctdb_context * ctdb = rec - > ctdb ;
struct tevent_timer * tt ;
static struct timeval start_incomplete = {
. tv_sec = 0 ,
} ;
bool is_complete ;
bool was_complete ;
unsigned int i ;
double seconds ;
unsigned int minutes ;
unsigned int num_connected ;
2021-12-08 19:37:39 +11:00
if ( ! this_node_is_leader ( rec ) ) {
2019-07-16 08:58:33 +10:00
goto done ;
}
if ( rec - > nodemap = = NULL ) {
goto done ;
}
is_complete = true ;
num_connected = 0 ;
for ( i = 0 ; i < rec - > nodemap - > num ; i + + ) {
struct ctdb_node_and_flags * n = & rec - > nodemap - > nodes [ i ] ;
2021-12-08 20:25:46 +11:00
if ( n - > pnn = = rec - > pnn ) {
2019-07-16 08:58:33 +10:00
continue ;
}
if ( ( n - > flags & NODE_FLAGS_DELETED ) ! = 0 ) {
continue ;
}
if ( ( n - > flags & NODE_FLAGS_DISCONNECTED ) ! = 0 ) {
is_complete = false ;
continue ;
}
num_connected + + ;
}
was_complete = timeval_is_zero ( & start_incomplete ) ;
if ( is_complete ) {
if ( ! was_complete ) {
2021-12-08 11:07:25 +11:00
D_WARNING ( " Cluster complete with leader=%u \n " ,
2020-07-14 15:22:33 +10:00
rec - > leader ) ;
2019-07-16 08:58:33 +10:00
start_incomplete = timeval_zero ( ) ;
}
goto done ;
}
/* Cluster is newly incomplete... */
if ( was_complete ) {
start_incomplete = current_time ;
minutes = 0 ;
goto log ;
}
/*
* Cluster has been incomplete since previous check , so figure
* out how long ( in minutes ) and decide whether to log anything
*/
seconds = timeval_elapsed2 ( & start_incomplete , & current_time ) ;
minutes = ( unsigned int ) seconds / 60 ;
if ( minutes > = 60 ) {
/* Over an hour, log every hour */
if ( minutes % 60 ! = 0 ) {
goto done ;
}
} else if ( minutes > = 10 ) {
/* Over 10 minutes, log every 10 minutes */
if ( minutes % 10 ! = 0 ) {
goto done ;
}
}
log :
2021-12-08 11:07:25 +11:00
D_WARNING ( " Cluster incomplete with leader=%u, elapsed=%u minutes, "
2019-07-16 08:58:33 +10:00
" connected=%u \n " ,
2020-07-14 15:22:33 +10:00
rec - > leader ,
2019-07-16 08:58:33 +10:00
minutes ,
num_connected ) ;
done :
tt = tevent_add_timer ( ctdb - > ev ,
rec ,
timeval_current_ofs ( 60 , 0 ) ,
maybe_log_cluster_state ,
rec ) ;
if ( tt = = NULL ) {
DBG_WARNING ( " Failed to set up cluster state timer \n " ) ;
}
}
2016-06-02 09:26:40 +10:00
2021-09-30 21:16:44 +10:00
static void recd_sighup_hook ( void * private_data )
{
struct ctdb_recoverd * rec = talloc_get_type_abort (
private_data , struct ctdb_recoverd ) ;
if ( rec - > helper_pid > 0 ) {
kill ( rec - > helper_pid , SIGHUP ) ;
}
}
2010-06-22 22:50:23 +09:30
/*
the main monitoring loop
*/
static void monitor_cluster ( struct ctdb_context * ctdb )
{
2016-06-02 09:26:40 +10:00
struct tevent_signal * se ;
2010-06-22 22:50:23 +09:30
struct ctdb_recoverd * rec ;
2021-09-30 21:03:15 +10:00
bool status ;
2010-06-22 22:50:23 +09:30
DEBUG ( DEBUG_NOTICE , ( " monitor_cluster starting \n " ) ) ;
rec = talloc_zero ( ctdb , struct ctdb_recoverd ) ;
CTDB_NO_MEMORY_FATAL ( ctdb , rec ) ;
rec - > ctdb = ctdb ;
2020-07-14 15:22:33 +10:00
rec - > leader = CTDB_UNKNOWN_PNN ;
2021-12-09 10:33:17 +11:00
rec - > pnn = ctdb_get_pnn ( ctdb ) ;
2021-12-10 11:29:06 +11:00
rec - > cluster_lock_handle = NULL ;
2021-09-30 21:15:56 +10:00
rec - > helper_pid = - 1 ;
2007-06-06 10:25:46 +10:00
2015-02-08 20:52:12 +11:00
rec - > takeover_run = ctdb_op_init ( rec , " takeover runs " ) ;
CTDB_NO_MEMORY_FATAL ( ctdb , rec - > takeover_run ) ;
2013-09-03 11:20:01 +10:00
2015-02-06 14:47:33 +11:00
rec - > recovery = ctdb_op_init ( rec , " recoveries " ) ;
CTDB_NO_MEMORY_FATAL ( ctdb , rec - > recovery ) ;
2010-06-22 22:50:23 +09:30
rec - > priority_time = timeval_current ( ) ;
2016-06-01 12:10:46 +10:00
rec - > frozen_on_inactive = false ;
2008-06-26 13:08:37 +10:00
2021-09-30 21:03:15 +10:00
status = logging_setup_sighup_handler ( rec - > ctdb - > ev ,
rec ,
2021-09-30 21:16:44 +10:00
recd_sighup_hook ,
rec ) ;
2021-09-30 21:03:15 +10:00
if ( ! status ) {
D_ERR ( " Failed to install SIGHUP handler \n " ) ;
exit ( 1 ) ;
}
2016-06-02 09:26:40 +10:00
se = tevent_add_signal ( ctdb - > ev , ctdb , SIGTERM , 0 ,
recd_sig_term_handler , rec ) ;
if ( se = = NULL ) {
DEBUG ( DEBUG_ERR , ( " Failed to install SIGTERM handler \n " ) ) ;
exit ( 1 ) ;
}
2021-12-10 11:43:10 +11:00
if ( ! cluster_lock_enabled ( rec ) ) {
2019-07-16 08:58:33 +10:00
struct tevent_timer * tt ;
tt = tevent_add_timer ( ctdb - > ev ,
rec ,
timeval_current_ofs ( 60 , 0 ) ,
maybe_log_cluster_state ,
rec ) ;
if ( tt = = NULL ) {
DBG_WARNING ( " Failed to set up cluster state timer \n " ) ;
}
}
2010-06-22 22:50:23 +09:30
/* register a message port for sending memory dumps */
ctdb_client_set_message_handler ( ctdb , CTDB_SRVID_MEM_DUMP , mem_dump_handler , rec ) ;
2007-05-04 09:45:53 +10:00
2016-03-17 17:26:30 +11:00
/* when a node is assigned banning credits */
ctdb_client_set_message_handler ( ctdb , CTDB_SRVID_BANNING ,
banning_handler , rec ) ;
2010-06-22 22:50:23 +09:30
/* register a message port for recovery elections */
2015-10-29 17:51:52 +11:00
ctdb_client_set_message_handler ( ctdb , CTDB_SRVID_ELECTION , election_handler , rec ) ;
2010-06-22 22:50:23 +09:30
2018-01-17 19:04:34 +11:00
ctdb_client_set_message_handler ( ctdb ,
CTDB_SRVID_SET_NODE_FLAGS ,
srvid_not_implemented ,
rec ) ;
2010-06-22 22:50:23 +09:30
/* when we are asked to puch out a flag change */
ctdb_client_set_message_handler ( ctdb , CTDB_SRVID_PUSH_NODE_FLAGS , push_flags_handler , rec ) ;
/* register a message port for reloadnodes */
ctdb_client_set_message_handler ( ctdb , CTDB_SRVID_RELOAD_NODES , reload_nodes_handler , rec ) ;
/* register a message port for performing a takeover run */
ctdb_client_set_message_handler ( ctdb , CTDB_SRVID_TAKEOVER_RUN , ip_reallocate_handler , rec ) ;
/* register a message port for disabling the ip check for a short while */
ctdb_client_set_message_handler ( ctdb , CTDB_SRVID_DISABLE_IP_CHECK , disable_ip_check_handler , rec ) ;
2012-02-28 06:56:04 +11:00
/* register a message port for forcing a rebalance of a node next
reallocation */
ctdb_client_set_message_handler ( ctdb , CTDB_SRVID_REBALANCE_NODE , recd_node_rebalance_handler , rec ) ;
2013-08-27 15:04:40 +10:00
/* Register a message port for disabling takeover runs */
ctdb_client_set_message_handler ( ctdb ,
CTDB_SRVID_DISABLE_TAKEOVER_RUNS ,
disable_takeover_runs_handler , rec ) ;
2015-02-06 15:06:44 +11:00
/* Register a message port for disabling recoveries */
ctdb_client_set_message_handler ( ctdb ,
CTDB_SRVID_DISABLE_RECOVERIES ,
disable_recoveries_handler , rec ) ;
2020-03-16 16:07:26 +11:00
ctdb_client_set_message_handler ( ctdb ,
CTDB_SRVID_LEADER ,
leader_handler ,
rec ) ;
2010-06-22 22:50:23 +09:30
for ( ; ; ) {
TALLOC_CTX * mem_ctx = talloc_new ( ctdb ) ;
2010-06-22 22:50:35 +09:30
struct timeval start ;
double elapsed ;
2010-06-22 22:50:23 +09:30
if ( ! mem_ctx ) {
DEBUG ( DEBUG_CRIT , ( __location__
" Failed to create temp context \n " ) ) ;
exit ( - 1 ) ;
}
2010-06-22 22:50:35 +09:30
start = timeval_current ( ) ;
2010-06-22 22:50:23 +09:30
main_loop ( ctdb , rec , mem_ctx ) ;
talloc_free ( mem_ctx ) ;
/* we only check for recovery once every second */
2010-06-22 22:50:35 +09:30
elapsed = timeval_elapsed ( & start ) ;
if ( elapsed < ctdb - > tunable . recover_interval ) {
ctdb_wait_timeout ( ctdb , ctdb - > tunable . recover_interval
- elapsed ) ;
}
2010-06-22 22:50:23 +09:30
}
2007-05-04 08:30:18 +10:00
}
2007-06-06 10:25:46 +10:00
/*
2007-06-07 15:18:55 +10:00
event handler for when the main ctdbd dies
*/
2015-10-26 16:50:09 +11:00
static void ctdb_recoverd_parent ( struct tevent_context * ev ,
struct tevent_fd * fde ,
2007-05-15 15:13:36 +10:00
uint16_t flags , void * private_data )
{
2008-02-04 20:07:15 +11:00
DEBUG ( DEBUG_ALERT , ( " recovery daemon parent died - exiting \n " ) ) ;
2007-05-15 15:13:36 +10:00
_exit ( 1 ) ;
}
2008-05-06 11:19:17 +10:00
/*
called regularly to verify that the recovery daemon is still running
*/
2015-10-26 16:50:09 +11:00
static void ctdb_check_recd ( struct tevent_context * ev ,
struct tevent_timer * te ,
struct timeval yt , void * p )
2008-05-06 11:19:17 +10:00
{
struct ctdb_context * ctdb = talloc_get_type ( p , struct ctdb_context ) ;
2012-05-03 11:42:41 +10:00
if ( ctdb_kill ( ctdb , ctdb - > recoverd_pid , 0 ) ! = 0 ) {
2011-03-01 12:09:42 +11:00
DEBUG ( DEBUG_ERR , ( " Recovery daemon (pid:%d) is no longer running. Trying to restart recovery daemon. \n " , ( int ) ctdb - > recoverd_pid ) ) ;
2008-05-06 11:19:17 +10:00
2015-10-26 16:50:09 +11:00
tevent_add_timer ( ctdb - > ev , ctdb , timeval_zero ( ) ,
ctdb_restart_recd , ctdb ) ;
2008-05-06 11:19:17 +10:00
2011-03-01 12:09:42 +11:00
return ;
2008-05-06 11:19:17 +10:00
}
2015-10-26 16:50:09 +11:00
tevent_add_timer ( ctdb - > ev , ctdb - > recd_ctx ,
timeval_current_ofs ( 30 , 0 ) ,
ctdb_check_recd , ctdb ) ;
2008-05-06 11:19:17 +10:00
}
2015-10-26 16:50:09 +11:00
static void recd_sig_child_handler ( struct tevent_context * ev ,
struct tevent_signal * se , int signum ,
int count , void * dont_care ,
void * private_data )
2008-07-09 14:02:54 +10:00
{
// struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
int status ;
pid_t pid = - 1 ;
while ( pid ! = 0 ) {
pid = waitpid ( - 1 , & status , WNOHANG ) ;
if ( pid = = - 1 ) {
2009-06-19 15:55:13 +10:00
if ( errno ! = ECHILD ) {
DEBUG ( DEBUG_ERR , ( __location__ " waitpid() returned error. errno:%s(%d) \n " , strerror ( errno ) , errno ) ) ;
}
2008-07-09 14:02:54 +10:00
return ;
}
if ( pid > 0 ) {
DEBUG ( DEBUG_DEBUG , ( " RECD SIGCHLD from %d \n " , ( int ) pid ) ) ;
}
}
}
2007-06-07 15:18:55 +10:00
/*
startup the recovery daemon as a child of the main ctdb daemon
*/
2007-05-15 15:13:36 +10:00
int ctdb_start_recoverd ( struct ctdb_context * ctdb )
2007-05-04 08:30:18 +10:00
{
2007-05-15 15:13:36 +10:00
int fd [ 2 ] ;
2015-10-26 16:50:09 +11:00
struct tevent_signal * se ;
2010-08-18 09:16:31 +09:30
struct tevent_fd * fde ;
2016-11-29 16:49:41 +11:00
int ret ;
2007-05-04 08:30:18 +10:00
2007-05-15 15:13:36 +10:00
if ( pipe ( fd ) ! = 0 ) {
return - 1 ;
2007-05-04 08:30:18 +10:00
}
2014-08-08 12:51:03 +10:00
ctdb - > recoverd_pid = ctdb_fork ( ctdb ) ;
2007-10-22 12:34:08 +10:00
if ( ctdb - > recoverd_pid = = - 1 ) {
2007-05-15 15:13:36 +10:00
return - 1 ;
2007-05-04 08:30:18 +10:00
}
2012-12-04 15:05:44 +11:00
2007-10-22 12:34:08 +10:00
if ( ctdb - > recoverd_pid ! = 0 ) {
2012-12-04 15:05:44 +11:00
talloc_free ( ctdb - > recd_ctx ) ;
ctdb - > recd_ctx = talloc_new ( ctdb ) ;
CTDB_NO_MEMORY ( ctdb , ctdb - > recd_ctx ) ;
2007-05-15 15:13:36 +10:00
close ( fd [ 0 ] ) ;
2015-10-26 16:50:09 +11:00
tevent_add_timer ( ctdb - > ev , ctdb - > recd_ctx ,
timeval_current_ofs ( 30 , 0 ) ,
ctdb_check_recd , ctdb ) ;
2007-05-15 15:13:36 +10:00
return 0 ;
2007-05-04 08:30:18 +10:00
}
2007-05-15 15:13:36 +10:00
close ( fd [ 1 ] ) ;
srandom ( getpid ( ) ^ time ( NULL ) ) ;
2007-05-04 08:30:18 +10:00
2016-11-29 16:49:41 +11:00
ret = logging_init ( ctdb , NULL , NULL , " ctdb-recoverd " ) ;
if ( ret ! = 0 ) {
return - 1 ;
}
2018-06-19 16:50:41 +10:00
prctl_set_comment ( " ctdb_recoverd " ) ;
2016-11-25 14:44:10 +11:00
if ( switch_from_server_to_client ( ctdb ) ! = 0 ) {
2009-03-23 12:37:30 +11:00
DEBUG ( DEBUG_CRIT , ( __location__ " ERROR: failed to switch recovery daemon into client mode. shutting down. \n " ) ) ;
2007-05-04 08:30:18 +10:00
exit ( 1 ) ;
}
2010-02-04 06:37:41 +11:00
DEBUG ( DEBUG_DEBUG , ( __location__ " Created PIPE FD:%d to recovery daemon \n " , fd [ 0 ] ) ) ;
2009-10-15 11:24:54 +11:00
2015-10-26 16:50:09 +11:00
fde = tevent_add_fd ( ctdb - > ev , ctdb , fd [ 0 ] , TEVENT_FD_READ ,
ctdb_recoverd_parent , & fd [ 0 ] ) ;
2010-08-18 09:16:31 +09:30
tevent_fd_set_auto_close ( fde ) ;
2009-03-23 12:37:30 +11:00
2008-07-09 14:02:54 +10:00
/* set up a handler to pick up sigchld */
2015-10-26 16:50:09 +11:00
se = tevent_add_signal ( ctdb - > ev , ctdb , SIGCHLD , 0 ,
recd_sig_child_handler , ctdb ) ;
2008-07-09 14:02:54 +10:00
if ( se = = NULL ) {
DEBUG ( DEBUG_CRIT , ( " Failed to set up signal handler for SIGCHLD in recovery daemon \n " ) ) ;
exit ( 1 ) ;
}
2007-05-15 15:13:36 +10:00
monitor_cluster ( ctdb ) ;
2007-05-07 06:51:58 +10:00
2008-02-04 20:07:15 +11:00
DEBUG ( DEBUG_ALERT , ( " ERROR: ctdb_recoverd finished!? \n " ) ) ;
2007-05-15 15:13:36 +10:00
return - 1 ;
2007-05-04 08:30:18 +10:00
}
2007-10-22 12:34:08 +10:00
/*
shutdown the recovery daemon
*/
void ctdb_stop_recoverd ( struct ctdb_context * ctdb )
{
if ( ctdb - > recoverd_pid = = 0 ) {
return ;
}
2008-02-04 20:07:15 +11:00
DEBUG ( DEBUG_NOTICE , ( " Shutting down recovery daemon \n " ) ) ;
2012-05-03 11:42:41 +10:00
ctdb_kill ( ctdb , ctdb - > recoverd_pid , SIGTERM ) ;
2012-12-04 15:05:44 +11:00
TALLOC_FREE ( ctdb - > recd_ctx ) ;
TALLOC_FREE ( ctdb - > recd_ping_count ) ;
2007-10-22 12:34:08 +10:00
}
2011-03-01 12:09:42 +11:00
2015-10-26 16:50:09 +11:00
static void ctdb_restart_recd ( struct tevent_context * ev ,
struct tevent_timer * te ,
struct timeval t , void * private_data )
2011-03-01 12:09:42 +11:00
{
struct ctdb_context * ctdb = talloc_get_type ( private_data , struct ctdb_context ) ;
DEBUG ( DEBUG_ERR , ( " Restarting recovery daemon \n " ) ) ;
ctdb_stop_recoverd ( ctdb ) ;
ctdb_start_recoverd ( ctdb ) ;
}