2007-09-21 06:24:02 +04:00
/*
persistent store logic
Copyright ( C ) Andrew Tridgell 2007
Copyright ( C ) Ronnie Sahlberg 2007
This program is free software ; you can redistribute it and / or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation ; either version 3 of the License , or
( at your option ) any later version .
This program is distributed in the hope that it will be useful ,
but WITHOUT ANY WARRANTY ; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the
GNU General Public License for more details .
You should have received a copy of the GNU General Public License
along with this program ; if not , see < http : //www.gnu.org/licenses/>.
*/
# include "includes.h"
2010-08-18 03:46:31 +04:00
# include "lib/tevent/tevent.h"
2007-09-21 06:24:02 +04:00
# include "system/filesys.h"
# include "system/wait.h"
# include "db_wrap.h"
# include "lib/tdb/include/tdb.h"
# include "../include/ctdb_private.h"
struct ctdb_persistent_state {
struct ctdb_context * ctdb ;
struct ctdb_req_control * c ;
const char * errormsg ;
uint32_t num_pending ;
int32_t status ;
2008-08-08 03:58:49 +04:00
uint32_t num_failed , num_sent ;
2007-09-21 06:24:02 +04:00
} ;
2008-08-08 03:58:49 +04:00
/*
1 ) all nodes fail , and all nodes reply
2 ) some nodes fail , all nodes reply
3 ) some nodes timeout
4 ) all nodes succeed
*/
2007-09-21 06:24:02 +04:00
/*
called when a node has acknowledged a ctdb_control_update_record call
*/
static void ctdb_persistent_callback ( struct ctdb_context * ctdb ,
int32_t status , TDB_DATA data ,
const char * errormsg ,
void * private_data )
{
struct ctdb_persistent_state * state = talloc_get_type ( private_data ,
struct ctdb_persistent_state ) ;
2011-02-23 00:47:30 +03:00
enum ctdb_trans2_commit_error etype ;
2007-09-21 09:19:33 +04:00
2011-02-23 00:24:50 +03:00
if ( ctdb - > recovery_mode ! = CTDB_RECOVERY_NORMAL ) {
DEBUG ( DEBUG_INFO , ( " ctdb_persistent_callback: ignoring reply "
" during recovery \n " ) ) ;
return ;
}
2007-09-21 06:24:02 +04:00
if ( status ! = 0 ) {
2008-02-04 12:07:15 +03:00
DEBUG ( DEBUG_ERR , ( " ctdb_persistent_callback failed with status %d (%s) \n " ,
2011-02-23 00:49:52 +03:00
status , errormsg ? errormsg : " no error message given " ) ) ;
2007-09-21 06:24:02 +04:00
state - > status = status ;
state - > errormsg = errormsg ;
2008-08-08 03:58:49 +04:00
state - > num_failed + + ;
2011-02-23 00:44:16 +03:00
/*
* If a node failed to complete the update_record control ,
* then either a recovery is already running or something
* bad is going on . So trigger a recovery and let the
* recovery finish the transaction , sending back the reply
* for the trans3_commit control to the client .
*/
ctdb - > recovery_mode = CTDB_RECOVERY_ACTIVE ;
return ;
2007-09-21 06:24:02 +04:00
}
2011-02-23 00:44:16 +03:00
2007-09-21 06:24:02 +04:00
state - > num_pending - - ;
2011-02-23 00:47:30 +03:00
if ( state - > num_pending ! = 0 ) {
return ;
2007-09-21 06:24:02 +04:00
}
2011-02-23 00:47:30 +03:00
if ( state - > num_failed = = state - > num_sent ) {
etype = CTDB_TRANS2_COMMIT_ALLFAIL ;
} else if ( state - > num_failed ! = 0 ) {
etype = CTDB_TRANS2_COMMIT_SOMEFAIL ;
} else {
etype = CTDB_TRANS2_COMMIT_SUCCESS ;
}
ctdb_request_control_reply ( state - > ctdb , state - > c , NULL , etype , state - > errormsg ) ;
talloc_free ( state ) ;
2007-09-21 06:24:02 +04:00
}
2007-09-21 09:19:33 +04:00
/*
called if persistent store times out
*/
static void ctdb_persistent_store_timeout ( struct event_context * ev , struct timed_event * te ,
struct timeval t , void * private_data )
{
struct ctdb_persistent_state * state = talloc_get_type ( private_data , struct ctdb_persistent_state ) ;
2011-02-23 00:24:50 +03:00
if ( state - > ctdb - > recovery_mode ! = CTDB_RECOVERY_NORMAL ) {
DEBUG ( DEBUG_INFO , ( " ctdb_persistent_store_timeout: ignoring "
" timeout during recovery \n " ) ) ;
return ;
}
2008-08-08 03:58:49 +04:00
ctdb_request_control_reply ( state - > ctdb , state - > c , NULL , CTDB_TRANS2_COMMIT_TIMEOUT ,
" timeout in ctdb_persistent_state " ) ;
2007-09-21 09:19:33 +04:00
talloc_free ( state ) ;
}
2007-09-21 06:24:02 +04:00
/*
2008-07-30 13:57:00 +04:00
store a set of persistent records - called from a ctdb client when it has updated
some records in a persistent database . The client will have the record
2007-09-21 06:24:02 +04:00
locked for the duration of this call . The client is the dmaster when
this call is made
*/
2008-07-30 13:57:00 +04:00
int32_t ctdb_control_trans2_commit ( struct ctdb_context * ctdb ,
struct ctdb_req_control * c ,
TDB_DATA recdata , bool * async_reply )
2007-09-21 06:24:02 +04:00
{
2008-07-17 07:50:55 +04:00
struct ctdb_client * client = ctdb_reqid_find ( ctdb , c - > client_id , struct ctdb_client ) ;
2007-09-21 06:24:02 +04:00
struct ctdb_persistent_state * state ;
int i ;
2009-07-21 13:30:38 +04:00
struct ctdb_marshall_buffer * m = ( struct ctdb_marshall_buffer * ) recdata . dptr ;
struct ctdb_db_context * ctdb_db ;
ctdb_db = find_ctdb_db ( ctdb , m - > db_id ) ;
if ( ctdb_db = = NULL ) {
DEBUG ( DEBUG_ERR , ( __location__ " ctdb_control_trans2_commit: "
2009-10-29 15:44:39 +03:00
" Unknown database db_id[0x%08x] \n " , m - > db_id ) ) ;
2009-07-21 13:30:38 +04:00
return - 1 ;
}
2007-09-21 06:24:02 +04:00
2008-07-17 07:50:55 +04:00
if ( client = = NULL ) {
DEBUG ( DEBUG_ERR , ( __location__ " can not match persistent_store to a client. Returning error \n " ) ) ;
return - 1 ;
}
2008-07-30 13:57:00 +04:00
2009-12-07 15:28:11 +03:00
if ( ctdb_db - > unhealthy_reason ) {
DEBUG ( DEBUG_ERR , ( " db(%s) unhealty in ctdb_control_trans2_commit: %s \n " ,
ctdb_db - > db_name , ctdb_db - > unhealthy_reason ) ) ;
return - 1 ;
}
2008-07-30 13:57:00 +04:00
/* handling num_persistent_updates is a bit strange -
there are 3 cases
1 ) very old clients , which never called CTDB_CONTROL_START_PERSISTENT_UPDATE
They don ' t expect num_persistent_updates to be used at all
2 ) less old clients , which uses CTDB_CONTROL_START_PERSISTENT_UPDATE , and expected
this commit to then decrement it
3 ) new clients which use TRANS2 commit functions , and
expect this function to increment the counter , and
then have it decremented in ctdb_control_trans2_error
or ctdb_control_trans2_finished
*/
2008-08-08 07:11:28 +04:00
switch ( c - > opcode ) {
case CTDB_CONTROL_PERSISTENT_STORE :
2009-07-20 18:33:53 +04:00
if ( ctdb_db - > transaction_active ) {
2009-10-29 15:48:36 +03:00
DEBUG ( DEBUG_ERR , ( __location__ " trans2_commit: a "
" transaction is active on database "
" db_id[0x%08x] - refusing persistent "
" store for client id[0x%08x] \n " ,
ctdb_db - > db_id , client - > client_id ) ) ;
2009-07-20 18:33:53 +04:00
return - 1 ;
}
2008-07-30 13:57:00 +04:00
if ( client - > num_persistent_updates > 0 ) {
client - > num_persistent_updates - - ;
2008-08-08 07:11:28 +04:00
}
break ;
case CTDB_CONTROL_TRANS2_COMMIT :
2009-07-21 13:30:38 +04:00
if ( ctdb_db - > transaction_active ) {
2009-10-29 15:24:19 +03:00
DEBUG ( DEBUG_ERR , ( __location__ " trans2_commit: there is "
" already a transaction commit "
2009-10-29 15:44:39 +03:00
" active on db_id[0x%08x] - forbidding "
" client_id[0x%08x] to commit \n " ,
2009-10-29 15:27:47 +03:00
ctdb_db - > db_id , client - > client_id ) ) ;
2009-07-21 13:30:38 +04:00
return - 1 ;
}
if ( client - > db_id ! = 0 ) {
DEBUG ( DEBUG_ERR , ( __location__ " ERROR: trans2_commit: "
2009-10-29 15:44:39 +03:00
" client-db_id[0x%08x] != 0 "
" (client_id[0x%08x]) \n " ,
2009-10-29 15:27:47 +03:00
client - > db_id , client - > client_id ) ) ;
2009-07-21 13:30:38 +04:00
return - 1 ;
}
2008-07-30 13:57:00 +04:00
client - > num_persistent_updates + + ;
2009-07-21 13:30:38 +04:00
ctdb_db - > transaction_active = true ;
client - > db_id = m - > db_id ;
2009-10-29 15:53:44 +03:00
DEBUG ( DEBUG_DEBUG , ( __location__ " client id[0x%08x] started to "
" commit transaction on db id[0x%08x] \n " ,
client - > client_id , client - > db_id ) ) ;
2008-08-08 07:11:28 +04:00
break ;
case CTDB_CONTROL_TRANS2_COMMIT_RETRY :
/* already updated from the first commit */
2009-07-21 13:30:38 +04:00
if ( client - > db_id ! = m - > db_id ) {
DEBUG ( DEBUG_ERR , ( __location__ " ERROR: trans2_commit "
2009-10-29 15:44:39 +03:00
" retry: client-db_id[0x%08x] != "
" db_id[0x%08x] (client_id[0x%08x]) \n " ,
client - > db_id ,
2009-10-29 15:27:47 +03:00
m - > db_id , client - > client_id ) ) ;
2009-07-21 13:30:38 +04:00
return - 1 ;
}
2009-10-29 15:53:44 +03:00
DEBUG ( DEBUG_DEBUG , ( __location__ " client id[0x%08x] started "
" transaction commit retry on "
" db_id[0x%08x] \n " ,
client - > client_id , client - > db_id ) ) ;
2008-08-08 07:11:28 +04:00
break ;
2008-07-17 12:47:20 +04:00
}
2008-07-17 07:50:55 +04:00
2009-12-04 02:06:34 +03:00
if ( ctdb - > recovery_mode ! = CTDB_RECOVERY_NORMAL ) {
DEBUG ( DEBUG_INFO , ( " rejecting ctdb_control_trans2_commit when recovery active \n " ) ) ;
return - 1 ;
}
2008-05-22 10:29:46 +04:00
state = talloc_zero ( ctdb , struct ctdb_persistent_state ) ;
2007-09-21 06:24:02 +04:00
CTDB_NO_MEMORY ( ctdb , state ) ;
state - > ctdb = ctdb ;
2007-09-24 03:57:14 +04:00
state - > c = c ;
2007-09-21 06:24:02 +04:00
2007-09-21 09:44:13 +04:00
for ( i = 0 ; i < ctdb - > vnn_map - > size ; i + + ) {
struct ctdb_node * node = ctdb - > nodes [ ctdb - > vnn_map - > map [ i ] ] ;
2007-09-21 06:24:02 +04:00
int ret ;
/* only send to active nodes */
if ( node - > flags & NODE_FLAGS_INACTIVE ) {
continue ;
}
/* don't send to ourselves */
if ( node - > pnn = = ctdb - > pnn ) {
continue ;
}
ret = ctdb_daemon_send_control ( ctdb , node - > pnn , 0 , CTDB_CONTROL_UPDATE_RECORD ,
c - > client_id , 0 , recdata ,
ctdb_persistent_callback , state ) ;
if ( ret = = - 1 ) {
2008-02-04 12:07:15 +03:00
DEBUG ( DEBUG_ERR , ( " Unable to send CTDB_CONTROL_UPDATE_RECORD to pnn %u \n " , node - > pnn ) ) ;
2007-09-21 06:24:02 +04:00
talloc_free ( state ) ;
return - 1 ;
}
state - > num_pending + + ;
2008-08-08 03:58:49 +04:00
state - > num_sent + + ;
2007-09-21 06:24:02 +04:00
}
if ( state - > num_pending = = 0 ) {
talloc_free ( state ) ;
return 0 ;
}
/* we need to wait for the replies */
* async_reply = true ;
2007-09-21 09:19:33 +04:00
2007-09-24 03:57:14 +04:00
/* need to keep the control structure around */
talloc_steal ( state , c ) ;
/* but we won't wait forever */
2007-09-21 09:19:33 +04:00
event_add_timed ( ctdb - > ev , state ,
timeval_current_ofs ( ctdb - > tunable . control_timeout , 0 ) ,
ctdb_persistent_store_timeout , state ) ;
2007-09-21 06:24:02 +04:00
return 0 ;
}
2009-12-03 19:59:49 +03:00
/*
* Store a set of persistent records .
* This is used to roll out a transaction to all nodes .
*/
int32_t ctdb_control_trans3_commit ( struct ctdb_context * ctdb ,
struct ctdb_req_control * c ,
TDB_DATA recdata , bool * async_reply )
{
struct ctdb_client * client ;
struct ctdb_persistent_state * state ;
int i ;
struct ctdb_marshall_buffer * m = ( struct ctdb_marshall_buffer * ) recdata . dptr ;
struct ctdb_db_context * ctdb_db ;
if ( ctdb - > recovery_mode ! = CTDB_RECOVERY_NORMAL ) {
DEBUG ( DEBUG_INFO , ( " rejecting ctdb_control_trans3_commit when recovery active \n " ) ) ;
return - 1 ;
}
ctdb_db = find_ctdb_db ( ctdb , m - > db_id ) ;
if ( ctdb_db = = NULL ) {
DEBUG ( DEBUG_ERR , ( __location__ " ctdb_control_trans3_commit: "
" Unknown database db_id[0x%08x] \n " , m - > db_id ) ) ;
return - 1 ;
}
client = ctdb_reqid_find ( ctdb , c - > client_id , struct ctdb_client ) ;
if ( client = = NULL ) {
DEBUG ( DEBUG_ERR , ( __location__ " can not match persistent_store "
" to a client. Returning error \n " ) ) ;
return - 1 ;
}
state = talloc_zero ( ctdb , struct ctdb_persistent_state ) ;
CTDB_NO_MEMORY ( ctdb , state ) ;
state - > ctdb = ctdb ;
state - > c = c ;
for ( i = 0 ; i < ctdb - > vnn_map - > size ; i + + ) {
struct ctdb_node * node = ctdb - > nodes [ ctdb - > vnn_map - > map [ i ] ] ;
int ret ;
/* only send to active nodes */
if ( node - > flags & NODE_FLAGS_INACTIVE ) {
continue ;
}
ret = ctdb_daemon_send_control ( ctdb , node - > pnn , 0 ,
CTDB_CONTROL_UPDATE_RECORD ,
c - > client_id , 0 , recdata ,
ctdb_persistent_callback ,
state ) ;
if ( ret = = - 1 ) {
DEBUG ( DEBUG_ERR , ( " Unable to send "
" CTDB_CONTROL_UPDATE_RECORD "
" to pnn %u \n " , node - > pnn ) ) ;
talloc_free ( state ) ;
return - 1 ;
}
state - > num_pending + + ;
state - > num_sent + + ;
}
if ( state - > num_pending = = 0 ) {
talloc_free ( state ) ;
return 0 ;
}
/* we need to wait for the replies */
* async_reply = true ;
/* need to keep the control structure around */
talloc_steal ( state , c ) ;
/* but we won't wait forever */
event_add_timed ( ctdb - > ev , state ,
timeval_current_ofs ( ctdb - > tunable . control_timeout , 0 ) ,
ctdb_persistent_store_timeout , state ) ;
return 0 ;
}
2008-05-28 07:04:25 +04:00
struct ctdb_persistent_write_state {
2007-09-21 06:24:02 +04:00
struct ctdb_db_context * ctdb_db ;
2008-07-30 13:57:00 +04:00
struct ctdb_marshall_buffer * m ;
2007-09-21 06:24:02 +04:00
struct ctdb_req_control * c ;
} ;
/*
2008-05-28 07:04:25 +04:00
called from a child process to write the data
2007-09-21 06:24:02 +04:00
*/
2008-05-28 07:04:25 +04:00
static int ctdb_persistent_store ( struct ctdb_persistent_write_state * state )
2007-09-21 06:24:02 +04:00
{
2008-07-30 13:57:00 +04:00
int ret , i ;
struct ctdb_rec_data * rec = NULL ;
struct ctdb_marshall_buffer * m = state - > m ;
ret = tdb_transaction_start ( state - > ctdb_db - > ltdb - > tdb ) ;
if ( ret = = - 1 ) {
DEBUG ( DEBUG_ERR , ( " Failed to start transaction for db_id 0x%08x in ctdb_persistent_store \n " ,
state - > ctdb_db - > db_id ) ) ;
2007-09-21 06:24:02 +04:00
return - 1 ;
}
2008-07-30 13:57:00 +04:00
for ( i = 0 ; i < m - > count ; i + + ) {
struct ctdb_ltdb_header oldheader ;
struct ctdb_ltdb_header header ;
2008-08-07 18:44:33 +04:00
TDB_DATA key , data , olddata ;
TALLOC_CTX * tmp_ctx = talloc_new ( state ) ;
2008-07-30 13:57:00 +04:00
rec = ctdb_marshall_loop_next ( m , rec , NULL , & header , & key , & data ) ;
if ( rec = = NULL ) {
DEBUG ( DEBUG_ERR , ( " Failed to get next record %d for db_id 0x%08x in ctdb_persistent_store \n " ,
i , state - > ctdb_db - > db_id ) ) ;
2008-08-07 18:44:33 +04:00
talloc_free ( tmp_ctx ) ;
2008-07-30 13:57:00 +04:00
goto failed ;
}
/* fetch the old header and ensure the rsn is less than the new rsn */
2008-08-07 18:44:33 +04:00
ret = ctdb_ltdb_fetch ( state - > ctdb_db , key , & oldheader , tmp_ctx , & olddata ) ;
2008-07-30 13:57:00 +04:00
if ( ret ! = 0 ) {
DEBUG ( DEBUG_ERR , ( " Failed to fetch old record for db_id 0x%08x in ctdb_persistent_store \n " ,
state - > ctdb_db - > db_id ) ) ;
2008-08-07 18:44:33 +04:00
talloc_free ( tmp_ctx ) ;
2008-07-30 13:57:00 +04:00
goto failed ;
}
2008-08-07 18:44:33 +04:00
if ( oldheader . rsn > = header . rsn & &
( olddata . dsize ! = data . dsize | |
memcmp ( olddata . dptr , data . dptr , data . dsize ) ! = 0 ) ) {
2008-07-30 13:57:00 +04:00
DEBUG ( DEBUG_CRIT , ( " existing header for db_id 0x%08x has larger RSN %llu than new RSN %llu in ctdb_persistent_store \n " ,
state - > ctdb_db - > db_id ,
( unsigned long long ) oldheader . rsn , ( unsigned long long ) header . rsn ) ) ;
2008-08-07 18:44:33 +04:00
talloc_free ( tmp_ctx ) ;
2008-07-30 13:57:00 +04:00
goto failed ;
}
2008-08-07 18:44:33 +04:00
talloc_free ( tmp_ctx ) ;
2008-07-30 13:57:00 +04:00
ret = ctdb_ltdb_store ( state - > ctdb_db , key , & header , data ) ;
if ( ret ! = 0 ) {
DEBUG ( DEBUG_CRIT , ( " Failed to store record for db_id 0x%08x in ctdb_persistent_store \n " ,
state - > ctdb_db - > db_id ) ) ;
2009-10-20 18:57:23 +04:00
goto failed ;
2008-07-30 13:57:00 +04:00
}
2007-09-21 06:24:02 +04:00
}
2008-07-30 13:57:00 +04:00
ret = tdb_transaction_commit ( state - > ctdb_db - > ltdb - > tdb ) ;
if ( ret = = - 1 ) {
DEBUG ( DEBUG_ERR , ( " Failed to commit transaction for db_id 0x%08x in ctdb_persistent_store \n " ,
state - > ctdb_db - > db_id ) ) ;
2007-09-21 06:24:02 +04:00
return - 1 ;
}
return 0 ;
2008-07-30 13:57:00 +04:00
failed :
tdb_transaction_cancel ( state - > ctdb_db - > ltdb - > tdb ) ;
return - 1 ;
2007-09-21 06:24:02 +04:00
}
/*
2008-05-28 07:04:25 +04:00
called when we the child has completed the persistent write
on our behalf
2007-09-21 06:24:02 +04:00
*/
2008-05-28 07:04:25 +04:00
static void ctdb_persistent_write_callback ( int status , void * private_data )
2007-09-21 06:24:02 +04:00
{
2008-05-28 07:04:25 +04:00
struct ctdb_persistent_write_state * state = talloc_get_type ( private_data ,
struct ctdb_persistent_write_state ) ;
2007-09-21 06:24:02 +04:00
2008-05-28 07:04:25 +04:00
ctdb_request_control_reply ( state - > ctdb_db - > ctdb , state - > c , NULL , status , NULL ) ;
2008-05-22 10:29:46 +04:00
talloc_free ( state ) ;
2007-09-21 06:24:02 +04:00
}
/*
called if our lockwait child times out
*/
static void ctdb_persistent_lock_timeout ( struct event_context * ev , struct timed_event * te ,
struct timeval t , void * private_data )
{
2008-05-28 07:04:25 +04:00
struct ctdb_persistent_write_state * state = talloc_get_type ( private_data ,
struct ctdb_persistent_write_state ) ;
2007-09-21 06:24:02 +04:00
ctdb_request_control_reply ( state - > ctdb_db - > ctdb , state - > c , NULL , - 1 , " timeout in ctdb_persistent_lock " ) ;
talloc_free ( state ) ;
}
2008-05-28 07:04:25 +04:00
struct childwrite_handle {
struct ctdb_context * ctdb ;
struct ctdb_db_context * ctdb_db ;
struct fd_event * fde ;
int fd [ 2 ] ;
pid_t child ;
void * private_data ;
void ( * callback ) ( int , void * ) ;
struct timeval start_time ;
} ;
static int childwrite_destructor ( struct childwrite_handle * h )
{
2010-09-29 04:38:41 +04:00
CTDB_DECREMENT_STAT ( h - > ctdb , pending_childwrite_calls ) ;
2008-05-28 07:04:25 +04:00
kill ( h - > child , SIGKILL ) ;
return 0 ;
}
/* called when the child process has finished writing the record to the
database
*/
static void childwrite_handler ( struct event_context * ev , struct fd_event * fde ,
uint16_t flags , void * private_data )
{
struct childwrite_handle * h = talloc_get_type ( private_data ,
struct childwrite_handle ) ;
void * p = h - > private_data ;
void ( * callback ) ( int , void * ) = h - > callback ;
pid_t child = h - > child ;
TALLOC_CTX * tmp_ctx = talloc_new ( ev ) ;
int ret ;
char c ;
2010-10-11 08:11:18 +04:00
CTDB_UPDATE_LATENCY ( h - > ctdb , h - > ctdb_db , " persistent " , childwrite_latency , h - > start_time ) ;
2010-09-29 04:38:41 +04:00
CTDB_DECREMENT_STAT ( h - > ctdb , pending_childwrite_calls ) ;
2008-05-28 07:04:25 +04:00
/* the handle needs to go away when the context is gone - when
the handle goes away this implicitly closes the pipe , which
kills the child */
talloc_steal ( tmp_ctx , h ) ;
talloc_set_destructor ( h , NULL ) ;
ret = read ( h - > fd [ 0 ] , & c , 1 ) ;
if ( ret < 1 ) {
DEBUG ( DEBUG_ERR , ( __location__ " Read returned %d. Childwrite failed \n " , ret ) ) ;
c = 1 ;
}
callback ( c , p ) ;
kill ( child , SIGKILL ) ;
talloc_free ( tmp_ctx ) ;
}
/* this creates a child process which will take out a tdb transaction
and write the record to the database .
*/
struct childwrite_handle * ctdb_childwrite ( struct ctdb_db_context * ctdb_db ,
void ( * callback ) ( int , void * private_data ) ,
2008-05-28 07:40:12 +04:00
struct ctdb_persistent_write_state * state )
2008-05-28 07:04:25 +04:00
{
struct childwrite_handle * result ;
int ret ;
pid_t parent = getpid ( ) ;
2010-09-29 04:38:41 +04:00
CTDB_INCREMENT_STAT ( ctdb_db - > ctdb , childwrite_calls ) ;
CTDB_INCREMENT_STAT ( ctdb_db - > ctdb , pending_childwrite_calls ) ;
2008-05-28 07:04:25 +04:00
2008-05-28 07:40:12 +04:00
if ( ! ( result = talloc_zero ( state , struct childwrite_handle ) ) ) {
2010-09-29 04:38:41 +04:00
CTDB_DECREMENT_STAT ( ctdb_db - > ctdb , pending_childwrite_calls ) ;
2008-05-28 07:04:25 +04:00
return NULL ;
}
ret = pipe ( result - > fd ) ;
if ( ret ! = 0 ) {
talloc_free ( result ) ;
2010-09-29 04:38:41 +04:00
CTDB_DECREMENT_STAT ( ctdb_db - > ctdb , pending_childwrite_calls ) ;
2008-05-28 07:04:25 +04:00
return NULL ;
}
2011-01-10 05:57:49 +03:00
result - > child = ctdb_fork ( ctdb_db - > ctdb ) ;
2008-05-28 07:04:25 +04:00
if ( result - > child = = ( pid_t ) - 1 ) {
close ( result - > fd [ 0 ] ) ;
close ( result - > fd [ 1 ] ) ;
talloc_free ( result ) ;
2010-09-29 04:38:41 +04:00
CTDB_DECREMENT_STAT ( ctdb_db - > ctdb , pending_childwrite_calls ) ;
2008-05-28 07:04:25 +04:00
return NULL ;
}
result - > callback = callback ;
2008-05-28 07:40:12 +04:00
result - > private_data = state ;
2008-05-28 07:04:25 +04:00
result - > ctdb = ctdb_db - > ctdb ;
result - > ctdb_db = ctdb_db ;
if ( result - > child = = 0 ) {
char c = 0 ;
close ( result - > fd [ 0 ] ) ;
2010-07-19 13:59:09 +04:00
debug_extra = talloc_asprintf ( NULL , " childwrite-%s: " , ctdb_db - > db_name ) ;
2008-05-28 07:04:25 +04:00
ret = ctdb_persistent_store ( state ) ;
if ( ret ! = 0 ) {
DEBUG ( DEBUG_ERR , ( __location__ " Failed to write persistent data \n " ) ) ;
c = 1 ;
}
write ( result - > fd [ 1 ] , & c , 1 ) ;
/* make sure we die when our parent dies */
while ( kill ( parent , 0 ) = = 0 | | errno ! = ESRCH ) {
sleep ( 5 ) ;
}
_exit ( 0 ) ;
}
close ( result - > fd [ 1 ] ) ;
2009-10-15 04:24:54 +04:00
set_close_on_exec ( result - > fd [ 0 ] ) ;
2008-05-28 07:04:25 +04:00
talloc_set_destructor ( result , childwrite_destructor ) ;
2010-02-03 22:37:41 +03:00
DEBUG ( DEBUG_DEBUG , ( __location__ " Created PIPE FD:%d for ctdb_childwrite \n " , result - > fd [ 0 ] ) ) ;
2009-10-15 04:24:54 +04:00
2008-05-28 07:04:25 +04:00
result - > fde = event_add_fd ( ctdb_db - > ctdb - > ev , result , result - > fd [ 0 ] ,
2010-08-18 03:46:31 +04:00
EVENT_FD_READ , childwrite_handler ,
2008-05-28 07:04:25 +04:00
( void * ) result ) ;
if ( result - > fde = = NULL ) {
talloc_free ( result ) ;
2010-09-29 04:38:41 +04:00
CTDB_DECREMENT_STAT ( ctdb_db - > ctdb , pending_childwrite_calls ) ;
2008-05-28 07:04:25 +04:00
return NULL ;
}
2010-08-18 03:46:31 +04:00
tevent_fd_set_auto_close ( result - > fde ) ;
2008-05-28 07:04:25 +04:00
result - > start_time = timeval_current ( ) ;
return result ;
}
2007-09-21 06:24:02 +04:00
/*
update a record on this node if the new record has a higher rsn than the
current record
*/
int32_t ctdb_control_update_record ( struct ctdb_context * ctdb ,
struct ctdb_req_control * c , TDB_DATA recdata ,
bool * async_reply )
{
struct ctdb_db_context * ctdb_db ;
2008-05-28 07:04:25 +04:00
struct ctdb_persistent_write_state * state ;
struct childwrite_handle * handle ;
2008-07-30 13:57:00 +04:00
struct ctdb_marshall_buffer * m = ( struct ctdb_marshall_buffer * ) recdata . dptr ;
2007-09-21 06:24:02 +04:00
if ( ctdb - > recovery_mode ! = CTDB_RECOVERY_NORMAL ) {
2008-08-04 08:51:51 +04:00
DEBUG ( DEBUG_INFO , ( " rejecting ctdb_control_update_record when recovery active \n " ) ) ;
2007-09-21 06:24:02 +04:00
return - 1 ;
}
2008-07-30 13:57:00 +04:00
ctdb_db = find_ctdb_db ( ctdb , m - > db_id ) ;
2007-09-21 06:24:02 +04:00
if ( ctdb_db = = NULL ) {
2008-07-30 13:57:00 +04:00
DEBUG ( DEBUG_ERR , ( " Unknown database 0x%08x in ctdb_control_update_record \n " , m - > db_id ) ) ;
2007-09-21 06:24:02 +04:00
return - 1 ;
}
2009-12-07 15:28:11 +03:00
if ( ctdb_db - > unhealthy_reason ) {
DEBUG ( DEBUG_ERR , ( " db(%s) unhealty in ctdb_control_update_record: %s \n " ,
ctdb_db - > db_name , ctdb_db - > unhealthy_reason ) ) ;
return - 1 ;
}
2008-05-28 07:04:25 +04:00
state = talloc ( ctdb , struct ctdb_persistent_write_state ) ;
2007-09-21 06:24:02 +04:00
CTDB_NO_MEMORY ( ctdb , state ) ;
state - > ctdb_db = ctdb_db ;
state - > c = c ;
2008-07-30 13:57:00 +04:00
state - > m = m ;
2007-09-21 06:24:02 +04:00
2008-05-28 07:04:25 +04:00
/* create a child process to take out a transaction and
write the data .
*/
2008-05-28 07:40:12 +04:00
handle = ctdb_childwrite ( ctdb_db , ctdb_persistent_write_callback , state ) ;
2007-09-21 06:24:02 +04:00
if ( handle = = NULL ) {
2008-05-28 07:04:25 +04:00
DEBUG ( DEBUG_ERR , ( " Failed to setup childwrite handler in ctdb_control_update_record \n " ) ) ;
2008-05-22 10:29:46 +04:00
talloc_free ( state ) ;
2007-09-21 06:24:02 +04:00
return - 1 ;
}
2008-05-22 10:29:46 +04:00
/* we need to wait for the replies */
2007-09-21 06:24:02 +04:00
* async_reply = true ;
2008-05-22 10:29:46 +04:00
/* need to keep the control structure around */
talloc_steal ( state , c ) ;
/* but we won't wait forever */
2008-05-22 10:33:36 +04:00
event_add_timed ( ctdb - > ev , state , timeval_current_ofs ( ctdb - > tunable . control_timeout , 0 ) ,
2007-09-21 06:24:02 +04:00
ctdb_persistent_lock_timeout , state ) ;
return 0 ;
}
2008-07-17 07:50:55 +04:00
2008-07-30 13:57:00 +04:00
/*
called when a client has finished a local commit in a transaction to
a persistent database
*/
int32_t ctdb_control_trans2_finished ( struct ctdb_context * ctdb ,
struct ctdb_req_control * c )
{
struct ctdb_client * client = ctdb_reqid_find ( ctdb , c - > client_id , struct ctdb_client ) ;
2009-07-21 13:30:38 +04:00
struct ctdb_db_context * ctdb_db ;
ctdb_db = find_ctdb_db ( ctdb , client - > db_id ) ;
if ( ctdb_db = = NULL ) {
DEBUG ( DEBUG_ERR , ( __location__ " ctdb_control_trans2_finish "
" Unknown database 0x%08x \n " , client - > db_id ) ) ;
return - 1 ;
}
if ( ! ctdb_db - > transaction_active ) {
DEBUG ( DEBUG_ERR , ( __location__ " ctdb_control_trans2_finish: "
" Database 0x%08x has no transaction commit "
" started \n " , client - > db_id ) ) ;
return - 1 ;
}
ctdb_db - > transaction_active = false ;
client - > db_id = 0 ;
2008-07-30 13:57:00 +04:00
if ( client - > num_persistent_updates = = 0 ) {
DEBUG ( DEBUG_ERR , ( __location__ " ERROR: num_persistent_updates == 0 \n " ) ) ;
2008-08-07 07:34:18 +04:00
DEBUG ( DEBUG_ERR , ( __location__ " Forcing recovery \n " ) ) ;
client - > ctdb - > recovery_mode = CTDB_RECOVERY_ACTIVE ;
2008-07-30 13:57:00 +04:00
return - 1 ;
}
client - > num_persistent_updates - - ;
2009-10-29 15:53:44 +03:00
DEBUG ( DEBUG_DEBUG , ( __location__ " client id[0x%08x] finished "
" transaction commit db_id[0x%08x] \n " ,
client - > client_id , ctdb_db - > db_id ) ) ;
2008-07-30 13:57:00 +04:00
return 0 ;
}
/*
called when a client gets an error committing its database
during a transaction commit
*/
int32_t ctdb_control_trans2_error ( struct ctdb_context * ctdb ,
struct ctdb_req_control * c )
{
struct ctdb_client * client = ctdb_reqid_find ( ctdb , c - > client_id , struct ctdb_client ) ;
2009-07-21 13:30:38 +04:00
struct ctdb_db_context * ctdb_db ;
ctdb_db = find_ctdb_db ( ctdb , client - > db_id ) ;
if ( ctdb_db = = NULL ) {
DEBUG ( DEBUG_ERR , ( __location__ " ctdb_control_trans2_error: "
" Unknown database 0x%08x \n " , client - > db_id ) ) ;
return - 1 ;
}
if ( ! ctdb_db - > transaction_active ) {
DEBUG ( DEBUG_ERR , ( __location__ " ctdb_control_trans2_error: "
" Database 0x%08x has no transaction commit "
" started \n " , client - > db_id ) ) ;
return - 1 ;
}
ctdb_db - > transaction_active = false ;
client - > db_id = 0 ;
2008-07-30 13:57:00 +04:00
if ( client - > num_persistent_updates = = 0 ) {
DEBUG ( DEBUG_ERR , ( __location__ " ERROR: num_persistent_updates == 0 \n " ) ) ;
2008-08-07 07:34:18 +04:00
} else {
client - > num_persistent_updates - - ;
2008-07-30 13:57:00 +04:00
}
2009-10-29 15:54:55 +03:00
DEBUG ( DEBUG_ERR , ( __location__ " An error occurred during transaction on "
" db_id[0x%08x] - forcing recovery \n " ,
ctdb_db - > db_id ) ) ;
2008-07-30 13:57:00 +04:00
client - > ctdb - > recovery_mode = CTDB_RECOVERY_ACTIVE ;
return 0 ;
}
2009-10-29 02:49:00 +03:00
/**
* Tell whether a transaction is active on this node on the give DB .
*/
int32_t ctdb_control_trans2_active ( struct ctdb_context * ctdb ,
2009-10-29 19:08:37 +03:00
struct ctdb_req_control * c ,
2009-10-29 02:49:00 +03:00
uint32_t db_id )
{
struct ctdb_db_context * ctdb_db ;
2009-10-29 19:08:37 +03:00
struct ctdb_client * client = ctdb_reqid_find ( ctdb , c - > client_id , struct ctdb_client ) ;
2009-10-29 02:49:00 +03:00
ctdb_db = find_ctdb_db ( ctdb , db_id ) ;
if ( ! ctdb_db ) {
DEBUG ( DEBUG_ERR , ( __location__ " Unknown db 0x%08x \n " , db_id ) ) ;
return - 1 ;
}
2009-10-29 19:08:37 +03:00
if ( client - > db_id = = db_id ) {
return 0 ;
}
2009-10-29 02:49:00 +03:00
if ( ctdb_db - > transaction_active ) {
return 1 ;
} else {
return 0 ;
}
}
2008-07-17 07:50:55 +04:00
/*
2008-07-30 13:57:00 +04:00
backwards compatibility :
2008-07-17 07:50:55 +04:00
start a persistent store operation . passing both the key , header and
data to the daemon . If the client disconnects before it has issued
a persistent_update call to the daemon we trigger a full recovery
to ensure the databases are brought back in sync .
for now we ignore the recdata that the client has passed to us .
*/
int32_t ctdb_control_start_persistent_update ( struct ctdb_context * ctdb ,
struct ctdb_req_control * c ,
TDB_DATA recdata )
{
struct ctdb_client * client = ctdb_reqid_find ( ctdb , c - > client_id , struct ctdb_client ) ;
if ( client = = NULL ) {
DEBUG ( DEBUG_ERR , ( __location__ " can not match start_persistent_update to a client. Returning error \n " ) ) ;
return - 1 ;
}
client - > num_persistent_updates + + ;
return 0 ;
}
2008-07-30 13:57:00 +04:00
/*
backwards compatibility :
called to tell ctdbd that it is no longer doing a persistent update
*/
2008-07-17 07:50:55 +04:00
int32_t ctdb_control_cancel_persistent_update ( struct ctdb_context * ctdb ,
2008-07-30 13:57:00 +04:00
struct ctdb_req_control * c ,
TDB_DATA recdata )
2008-07-17 07:50:55 +04:00
{
struct ctdb_client * client = ctdb_reqid_find ( ctdb , c - > client_id , struct ctdb_client ) ;
if ( client = = NULL ) {
DEBUG ( DEBUG_ERR , ( __location__ " can not match cancel_persistent_update to a client. Returning error \n " ) ) ;
return - 1 ;
}
2008-07-17 12:47:20 +04:00
if ( client - > num_persistent_updates > 0 ) {
client - > num_persistent_updates - - ;
}
2008-07-17 07:50:55 +04:00
return 0 ;
}
2008-07-30 13:57:00 +04:00
/*
backwards compatibility :
single record varient of ctdb_control_trans2_commit for older clients
*/
int32_t ctdb_control_persistent_store ( struct ctdb_context * ctdb ,
struct ctdb_req_control * c ,
TDB_DATA recdata , bool * async_reply )
{
struct ctdb_marshall_buffer * m ;
struct ctdb_rec_data * rec = ( struct ctdb_rec_data * ) recdata . dptr ;
TDB_DATA key , data ;
if ( recdata . dsize ! = offsetof ( struct ctdb_rec_data , data ) +
rec - > keylen + rec - > datalen ) {
DEBUG ( DEBUG_ERR , ( __location__ " Bad data size in recdata \n " ) ) ;
return - 1 ;
}
key . dptr = & rec - > data [ 0 ] ;
key . dsize = rec - > keylen ;
data . dptr = & rec - > data [ rec - > keylen ] ;
data . dsize = rec - > datalen ;
m = ctdb_marshall_add ( c , NULL , rec - > reqid , rec - > reqid , key , NULL , data ) ;
CTDB_NO_MEMORY ( ctdb , m ) ;
return ctdb_control_trans2_commit ( ctdb , c , ctdb_marshall_finish ( m ) , async_reply ) ;
}
2009-12-11 17:31:02 +03:00
static int32_t ctdb_get_db_seqnum ( struct ctdb_context * ctdb ,
uint32_t db_id ,
uint64_t * seqnum )
{
int32_t ret ;
struct ctdb_db_context * ctdb_db ;
const char * keyname = CTDB_DB_SEQNUM_KEY ;
TDB_DATA key ;
TDB_DATA data ;
TALLOC_CTX * mem_ctx = talloc_new ( ctdb ) ;
ctdb_db = find_ctdb_db ( ctdb , db_id ) ;
if ( ! ctdb_db ) {
DEBUG ( DEBUG_ERR , ( __location__ " Unknown db 0x%08x \n " , db_id ) ) ;
ret = - 1 ;
goto done ;
}
key . dptr = ( uint8_t * ) discard_const ( keyname ) ;
key . dsize = strlen ( keyname ) + 1 ;
ret = ( int32_t ) ctdb_ltdb_fetch ( ctdb_db , key , NULL , mem_ctx , & data ) ;
if ( ret ! = 0 ) {
goto done ;
}
if ( data . dsize ! = sizeof ( uint64_t ) ) {
* seqnum = 0 ;
goto done ;
}
* seqnum = * ( uint64_t * ) data . dptr ;
2008-07-30 13:57:00 +04:00
2009-12-11 17:31:02 +03:00
done :
talloc_free ( mem_ctx ) ;
return ret ;
}
/**
* Get the sequence number of a persistent database .
*/
int32_t ctdb_control_get_db_seqnum ( struct ctdb_context * ctdb ,
TDB_DATA indata ,
TDB_DATA * outdata )
{
uint32_t db_id ;
int32_t ret ;
uint64_t seqnum ;
db_id = * ( uint32_t * ) indata . dptr ;
ret = ctdb_get_db_seqnum ( ctdb , db_id , & seqnum ) ;
if ( ret ! = 0 ) {
goto done ;
}
outdata - > dsize = sizeof ( uint64_t ) ;
outdata - > dptr = ( uint8_t * ) talloc_zero ( outdata , uint64_t ) ;
if ( outdata - > dptr = = NULL ) {
ret = - 1 ;
goto done ;
}
* ( outdata - > dptr ) = seqnum ;
done :
return ret ;
}