2007-09-21 06:24:02 +04:00
/*
persistent store logic
Copyright ( C ) Andrew Tridgell 2007
Copyright ( C ) Ronnie Sahlberg 2007
This program is free software ; you can redistribute it and / or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation ; either version 3 of the License , or
( at your option ) any later version .
This program is distributed in the hope that it will be useful ,
but WITHOUT ANY WARRANTY ; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the
GNU General Public License for more details .
You should have received a copy of the GNU General Public License
along with this program ; if not , see < http : //www.gnu.org/licenses/>.
*/
# include "includes.h"
# include "system/filesys.h"
# include "system/wait.h"
2014-08-15 09:46:33 +04:00
# include "lib/tdb_wrap/tdb_wrap.h"
2013-06-06 23:58:02 +04:00
# include "tdb.h"
2007-09-21 06:24:02 +04:00
# include "../include/ctdb_private.h"
struct ctdb_persistent_state {
struct ctdb_context * ctdb ;
2011-02-23 02:23:18 +03:00
struct ctdb_db_context * ctdb_db ; /* used by trans3_commit */
2011-02-23 19:35:27 +03:00
struct ctdb_client * client ; /* used by trans3_commit */
2007-09-21 06:24:02 +04:00
struct ctdb_req_control * c ;
const char * errormsg ;
uint32_t num_pending ;
int32_t status ;
2008-08-08 03:58:49 +04:00
uint32_t num_failed , num_sent ;
2007-09-21 06:24:02 +04:00
} ;
2008-08-08 03:58:49 +04:00
/*
1 ) all nodes fail , and all nodes reply
2 ) some nodes fail , all nodes reply
3 ) some nodes timeout
4 ) all nodes succeed
*/
2007-09-21 06:24:02 +04:00
/*
called when a node has acknowledged a ctdb_control_update_record call
*/
static void ctdb_persistent_callback ( struct ctdb_context * ctdb ,
int32_t status , TDB_DATA data ,
const char * errormsg ,
void * private_data )
{
struct ctdb_persistent_state * state = talloc_get_type ( private_data ,
struct ctdb_persistent_state ) ;
2007-09-21 09:19:33 +04:00
2011-02-23 00:24:50 +03:00
if ( ctdb - > recovery_mode ! = CTDB_RECOVERY_NORMAL ) {
DEBUG ( DEBUG_INFO , ( " ctdb_persistent_callback: ignoring reply "
" during recovery \n " ) ) ;
return ;
}
2007-09-21 06:24:02 +04:00
if ( status ! = 0 ) {
2008-02-04 12:07:15 +03:00
DEBUG ( DEBUG_ERR , ( " ctdb_persistent_callback failed with status %d (%s) \n " ,
2011-02-23 00:49:52 +03:00
status , errormsg ? errormsg : " no error message given " ) ) ;
2007-09-21 06:24:02 +04:00
state - > status = status ;
state - > errormsg = errormsg ;
2008-08-08 03:58:49 +04:00
state - > num_failed + + ;
2011-02-23 00:44:16 +03:00
/*
* If a node failed to complete the update_record control ,
* then either a recovery is already running or something
* bad is going on . So trigger a recovery and let the
* recovery finish the transaction , sending back the reply
* for the trans3_commit control to the client .
*/
ctdb - > recovery_mode = CTDB_RECOVERY_ACTIVE ;
return ;
2007-09-21 06:24:02 +04:00
}
2011-02-23 00:44:16 +03:00
2007-09-21 06:24:02 +04:00
state - > num_pending - - ;
2011-02-23 00:47:30 +03:00
if ( state - > num_pending ! = 0 ) {
return ;
2007-09-21 06:24:02 +04:00
}
2011-02-23 00:47:30 +03:00
2013-09-12 10:43:43 +04:00
ctdb_request_control_reply ( state - > ctdb , state - > c , NULL , 0 , state - > errormsg ) ;
2011-02-23 00:47:30 +03:00
talloc_free ( state ) ;
2007-09-21 06:24:02 +04:00
}
2007-09-21 09:19:33 +04:00
/*
called if persistent store times out
*/
static void ctdb_persistent_store_timeout ( struct event_context * ev , struct timed_event * te ,
struct timeval t , void * private_data )
{
struct ctdb_persistent_state * state = talloc_get_type ( private_data , struct ctdb_persistent_state ) ;
2011-02-23 00:24:50 +03:00
if ( state - > ctdb - > recovery_mode ! = CTDB_RECOVERY_NORMAL ) {
DEBUG ( DEBUG_INFO , ( " ctdb_persistent_store_timeout: ignoring "
" timeout during recovery \n " ) ) ;
return ;
}
2013-09-12 10:43:43 +04:00
ctdb_request_control_reply ( state - > ctdb , state - > c , NULL , 1 ,
2008-08-08 03:58:49 +04:00
" timeout in ctdb_persistent_state " ) ;
2007-09-21 09:19:33 +04:00
talloc_free ( state ) ;
}
2007-09-21 06:24:02 +04:00
2011-02-23 19:38:40 +03:00
/**
* Finish pending trans3 commit controls , i . e . send
* reply to the client . This is called by the end - recovery
* control to fix the situation when a recovery interrupts
2013-02-22 14:36:00 +04:00
* the usual progress of a transaction .
2011-02-23 19:38:40 +03:00
*/
void ctdb_persistent_finish_trans3_commits ( struct ctdb_context * ctdb )
{
struct ctdb_db_context * ctdb_db ;
if ( ctdb - > recovery_mode ! = CTDB_RECOVERY_NORMAL ) {
2013-02-22 15:42:10 +04:00
DEBUG ( DEBUG_INFO , ( " ctdb_persistent_finish_trans3_commits: "
" skipping execution when recovery is "
" active \n " ) ) ;
2011-02-23 19:38:40 +03:00
return ;
}
for ( ctdb_db = ctdb - > db_list ; ctdb_db ; ctdb_db = ctdb_db - > next ) {
struct ctdb_persistent_state * state ;
if ( ctdb_db - > persistent_state = = NULL ) {
continue ;
}
state = ctdb_db - > persistent_state ;
2013-09-12 10:43:43 +04:00
ctdb_request_control_reply ( ctdb , state - > c , NULL , 2 ,
2011-02-23 19:38:40 +03:00
" trans3 commit ended by recovery " ) ;
/* The destructor sets ctdb_db->persistent_state to NULL. */
talloc_free ( state ) ;
}
}
2011-02-23 02:01:13 +03:00
static int ctdb_persistent_state_destructor ( struct ctdb_persistent_state * state )
{
2011-02-23 19:35:27 +03:00
if ( state - > client ! = NULL ) {
state - > client - > db_id = 0 ;
}
2011-02-23 02:01:13 +03:00
if ( state - > ctdb_db ! = NULL ) {
state - > ctdb_db - > persistent_state = NULL ;
}
return 0 ;
}
2007-09-21 06:24:02 +04:00
2009-12-03 19:59:49 +03:00
/*
* Store a set of persistent records .
* This is used to roll out a transaction to all nodes .
*/
int32_t ctdb_control_trans3_commit ( struct ctdb_context * ctdb ,
struct ctdb_req_control * c ,
TDB_DATA recdata , bool * async_reply )
{
struct ctdb_client * client ;
struct ctdb_persistent_state * state ;
int i ;
struct ctdb_marshall_buffer * m = ( struct ctdb_marshall_buffer * ) recdata . dptr ;
struct ctdb_db_context * ctdb_db ;
if ( ctdb - > recovery_mode ! = CTDB_RECOVERY_NORMAL ) {
DEBUG ( DEBUG_INFO , ( " rejecting ctdb_control_trans3_commit when recovery active \n " ) ) ;
return - 1 ;
}
2011-02-23 19:35:27 +03:00
client = ctdb_reqid_find ( ctdb , c - > client_id , struct ctdb_client ) ;
if ( client = = NULL ) {
DEBUG ( DEBUG_ERR , ( __location__ " can not match persistent_store "
" to a client. Returning error \n " ) ) ;
return - 1 ;
}
if ( client - > db_id ! = 0 ) {
DEBUG ( DEBUG_ERR , ( __location__ " ERROR: trans3_commit: "
" client-db_id[0x%08x] != 0 "
" (client_id[0x%08x]): trans3_commit active? \n " ,
client - > db_id , client - > client_id ) ) ;
return - 1 ;
}
2009-12-03 19:59:49 +03:00
ctdb_db = find_ctdb_db ( ctdb , m - > db_id ) ;
if ( ctdb_db = = NULL ) {
DEBUG ( DEBUG_ERR , ( __location__ " ctdb_control_trans3_commit: "
" Unknown database db_id[0x%08x] \n " , m - > db_id ) ) ;
return - 1 ;
}
2011-02-23 02:03:07 +03:00
if ( ctdb_db - > persistent_state ! = NULL ) {
DEBUG ( DEBUG_ERR , ( __location__ " Error: "
" ctdb_control_trans3_commit "
" called while a transaction commit is "
" active. db_id[0x%08x] \n " , m - > db_id ) ) ;
return - 1 ;
}
2011-02-23 02:01:13 +03:00
ctdb_db - > persistent_state = talloc_zero ( ctdb_db ,
struct ctdb_persistent_state ) ;
CTDB_NO_MEMORY ( ctdb , ctdb_db - > persistent_state ) ;
2009-12-03 19:59:49 +03:00
2011-02-23 19:35:27 +03:00
client - > db_id = m - > db_id ;
2011-02-23 02:01:13 +03:00
state = ctdb_db - > persistent_state ;
2009-12-03 19:59:49 +03:00
state - > ctdb = ctdb ;
2011-02-23 02:01:13 +03:00
state - > ctdb_db = ctdb_db ;
2009-12-03 19:59:49 +03:00
state - > c = c ;
2011-02-23 19:35:27 +03:00
state - > client = client ;
2011-02-23 02:01:13 +03:00
talloc_set_destructor ( state , ctdb_persistent_state_destructor ) ;
2009-12-03 19:59:49 +03:00
for ( i = 0 ; i < ctdb - > vnn_map - > size ; i + + ) {
struct ctdb_node * node = ctdb - > nodes [ ctdb - > vnn_map - > map [ i ] ] ;
int ret ;
/* only send to active nodes */
if ( node - > flags & NODE_FLAGS_INACTIVE ) {
continue ;
}
ret = ctdb_daemon_send_control ( ctdb , node - > pnn , 0 ,
CTDB_CONTROL_UPDATE_RECORD ,
c - > client_id , 0 , recdata ,
ctdb_persistent_callback ,
state ) ;
if ( ret = = - 1 ) {
DEBUG ( DEBUG_ERR , ( " Unable to send "
" CTDB_CONTROL_UPDATE_RECORD "
" to pnn %u \n " , node - > pnn ) ) ;
talloc_free ( state ) ;
return - 1 ;
}
state - > num_pending + + ;
state - > num_sent + + ;
}
if ( state - > num_pending = = 0 ) {
talloc_free ( state ) ;
return 0 ;
}
/* we need to wait for the replies */
* async_reply = true ;
/* need to keep the control structure around */
talloc_steal ( state , c ) ;
/* but we won't wait forever */
event_add_timed ( ctdb - > ev , state ,
timeval_current_ofs ( ctdb - > tunable . control_timeout , 0 ) ,
ctdb_persistent_store_timeout , state ) ;
return 0 ;
}
2008-07-17 07:50:55 +04:00
/*
2008-07-30 13:57:00 +04:00
backwards compatibility :
2008-07-17 07:50:55 +04:00
start a persistent store operation . passing both the key , header and
data to the daemon . If the client disconnects before it has issued
a persistent_update call to the daemon we trigger a full recovery
to ensure the databases are brought back in sync .
for now we ignore the recdata that the client has passed to us .
*/
int32_t ctdb_control_start_persistent_update ( struct ctdb_context * ctdb ,
struct ctdb_req_control * c ,
TDB_DATA recdata )
{
struct ctdb_client * client = ctdb_reqid_find ( ctdb , c - > client_id , struct ctdb_client ) ;
if ( client = = NULL ) {
DEBUG ( DEBUG_ERR , ( __location__ " can not match start_persistent_update to a client. Returning error \n " ) ) ;
return - 1 ;
}
client - > num_persistent_updates + + ;
return 0 ;
}
2008-07-30 13:57:00 +04:00
/*
backwards compatibility :
called to tell ctdbd that it is no longer doing a persistent update
*/
2008-07-17 07:50:55 +04:00
int32_t ctdb_control_cancel_persistent_update ( struct ctdb_context * ctdb ,
2008-07-30 13:57:00 +04:00
struct ctdb_req_control * c ,
TDB_DATA recdata )
2008-07-17 07:50:55 +04:00
{
struct ctdb_client * client = ctdb_reqid_find ( ctdb , c - > client_id , struct ctdb_client ) ;
if ( client = = NULL ) {
DEBUG ( DEBUG_ERR , ( __location__ " can not match cancel_persistent_update to a client. Returning error \n " ) ) ;
return - 1 ;
}
2008-07-17 12:47:20 +04:00
if ( client - > num_persistent_updates > 0 ) {
client - > num_persistent_updates - - ;
}
2008-07-17 07:50:55 +04:00
return 0 ;
}
2008-07-30 13:57:00 +04:00
2009-12-11 17:31:02 +03:00
static int32_t ctdb_get_db_seqnum ( struct ctdb_context * ctdb ,
uint32_t db_id ,
uint64_t * seqnum )
{
int32_t ret ;
struct ctdb_db_context * ctdb_db ;
const char * keyname = CTDB_DB_SEQNUM_KEY ;
TDB_DATA key ;
TDB_DATA data ;
TALLOC_CTX * mem_ctx = talloc_new ( ctdb ) ;
2011-11-28 03:41:17 +04:00
struct ctdb_ltdb_header header ;
2009-12-11 17:31:02 +03:00
ctdb_db = find_ctdb_db ( ctdb , db_id ) ;
if ( ! ctdb_db ) {
DEBUG ( DEBUG_ERR , ( __location__ " Unknown db 0x%08x \n " , db_id ) ) ;
ret = - 1 ;
goto done ;
}
key . dptr = ( uint8_t * ) discard_const ( keyname ) ;
key . dsize = strlen ( keyname ) + 1 ;
2011-11-28 03:41:17 +04:00
ret = ( int32_t ) ctdb_ltdb_fetch ( ctdb_db , key , & header , mem_ctx , & data ) ;
2009-12-11 17:31:02 +03:00
if ( ret ! = 0 ) {
goto done ;
}
if ( data . dsize ! = sizeof ( uint64_t ) ) {
* seqnum = 0 ;
goto done ;
}
* seqnum = * ( uint64_t * ) data . dptr ;
2008-07-30 13:57:00 +04:00
2009-12-11 17:31:02 +03:00
done :
talloc_free ( mem_ctx ) ;
return ret ;
}
/**
* Get the sequence number of a persistent database .
*/
int32_t ctdb_control_get_db_seqnum ( struct ctdb_context * ctdb ,
TDB_DATA indata ,
TDB_DATA * outdata )
{
uint32_t db_id ;
int32_t ret ;
uint64_t seqnum ;
db_id = * ( uint32_t * ) indata . dptr ;
ret = ctdb_get_db_seqnum ( ctdb , db_id , & seqnum ) ;
if ( ret ! = 0 ) {
goto done ;
}
outdata - > dsize = sizeof ( uint64_t ) ;
outdata - > dptr = ( uint8_t * ) talloc_zero ( outdata , uint64_t ) ;
if ( outdata - > dptr = = NULL ) {
ret = - 1 ;
goto done ;
}
* ( outdata - > dptr ) = seqnum ;
done :
return ret ;
}