2007-05-12 09:15:27 +04:00
/*
ctdb freeze handling
Copyright ( C ) Andrew Tridgell 2007
2007-05-31 07:50:53 +04:00
This program is free software ; you can redistribute it and / or modify
it under the terms of the GNU General Public License as published by
2007-07-10 09:29:31 +04:00
the Free Software Foundation ; either version 3 of the License , or
2007-05-31 07:50:53 +04:00
( at your option ) any later version .
This program is distributed in the hope that it will be useful ,
2007-05-12 09:15:27 +04:00
but WITHOUT ANY WARRANTY ; without even the implied warranty of
2007-05-31 07:50:53 +04:00
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the
GNU General Public License for more details .
You should have received a copy of the GNU General Public License
2007-07-10 09:29:31 +04:00
along with this program ; if not , see < http : //www.gnu.org/licenses/>.
2007-05-12 09:15:27 +04:00
*/
# include "includes.h"
# include "lib/events/events.h"
# include "lib/tdb/include/tdb.h"
# include "system/network.h"
# include "system/filesys.h"
# include "system/wait.h"
# include "../include/ctdb_private.h"
# include "lib/util/dlinklist.h"
# include "db_wrap.h"
/*
lock all databases
*/
static int ctdb_lock_all_databases ( struct ctdb_context * ctdb )
{
struct ctdb_db_context * ctdb_db ;
for ( ctdb_db = ctdb - > db_list ; ctdb_db ; ctdb_db = ctdb_db - > next ) {
2009-10-10 07:26:09 +04:00
DEBUG ( DEBUG_INFO , ( " locking database 0x%08x priority:%u %s \n " , ctdb_db - > db_id , ctdb_db - > priority , ctdb_db - > db_name ) ) ;
2007-05-12 09:15:27 +04:00
if ( tdb_lockall ( ctdb_db - > ltdb - > tdb ) ! = 0 ) {
return - 1 ;
}
}
return 0 ;
}
/*
a list of control requests waiting for a freeze lock child to get
the database locks
*/
struct ctdb_freeze_waiter {
struct ctdb_freeze_waiter * next , * prev ;
struct ctdb_context * ctdb ;
struct ctdb_req_control * c ;
int32_t status ;
} ;
/* a handle to a freeze lock child process */
struct ctdb_freeze_handle {
struct ctdb_context * ctdb ;
pid_t child ;
int fd ;
struct ctdb_freeze_waiter * waiters ;
2008-01-06 04:38:01 +03:00
bool transaction_started ;
2008-01-06 05:24:55 +03:00
uint32_t transaction_id ;
2007-05-12 09:15:27 +04:00
} ;
/*
destroy a freeze handle
*/
static int ctdb_freeze_handle_destructor ( struct ctdb_freeze_handle * h )
{
2008-07-07 02:50:12 +04:00
struct ctdb_context * ctdb = h - > ctdb ;
struct ctdb_db_context * ctdb_db ;
/* cancel any pending transactions */
if ( ctdb - > freeze_handle & & ctdb - > freeze_handle - > transaction_started ) {
for ( ctdb_db = ctdb - > db_list ; ctdb_db ; ctdb_db = ctdb_db - > next ) {
tdb_add_flags ( ctdb_db - > ltdb - > tdb , TDB_NOLOCK ) ;
if ( tdb_transaction_cancel ( ctdb_db - > ltdb - > tdb ) ! = 0 ) {
DEBUG ( DEBUG_ERR , ( __location__ " Failed to cancel transaction for db '%s' \n " ,
ctdb_db - > db_name ) ) ;
}
tdb_remove_flags ( ctdb_db - > ltdb - > tdb , TDB_NOLOCK ) ;
}
ctdb - > freeze_handle - > transaction_started = false ;
}
ctdb - > freeze_mode = CTDB_FREEZE_NONE ;
ctdb - > freeze_handle = NULL ;
2007-05-12 09:15:27 +04:00
kill ( h - > child , SIGKILL ) ;
return 0 ;
}
/*
called when the child writes its status to us
*/
static void ctdb_freeze_lock_handler ( struct event_context * ev , struct fd_event * fde ,
uint16_t flags , void * private_data )
{
struct ctdb_freeze_handle * h = talloc_get_type ( private_data , struct ctdb_freeze_handle ) ;
int32_t status ;
struct ctdb_freeze_waiter * w ;
2007-05-12 09:59:49 +04:00
if ( h - > ctdb - > freeze_mode = = CTDB_FREEZE_FROZEN ) {
2008-02-04 12:07:15 +03:00
DEBUG ( DEBUG_INFO , ( " freeze child died - unfreezing \n " ) ) ;
2008-07-04 10:05:04 +04:00
if ( h - > ctdb - > freeze_handle = = h ) {
h - > ctdb - > freeze_handle = NULL ;
}
2007-05-12 09:59:49 +04:00
talloc_free ( h ) ;
return ;
}
2007-05-12 09:15:27 +04:00
if ( read ( h - > fd , & status , sizeof ( status ) ) ! = sizeof ( status ) ) {
2008-02-04 12:07:15 +03:00
DEBUG ( DEBUG_ERR , ( " read error from freeze lock child \n " ) ) ;
2007-05-12 09:15:27 +04:00
status = - 1 ;
}
if ( status = = - 1 ) {
2008-02-04 12:07:15 +03:00
DEBUG ( DEBUG_ERR , ( " Failed to get locks in ctdb_freeze_child \n " ) ) ;
2007-05-12 09:15:27 +04:00
/* we didn't get the locks - destroy the handle */
talloc_free ( h ) ;
return ;
}
h - > ctdb - > freeze_mode = CTDB_FREEZE_FROZEN ;
/* notify the waiters */
while ( ( w = h - > ctdb - > freeze_handle - > waiters ) ) {
w - > status = status ;
DLIST_REMOVE ( h - > ctdb - > freeze_handle - > waiters , w ) ;
talloc_free ( w ) ;
}
}
/*
create a child which gets locks on all the open databases , then calls the callback telling the parent
that it is done
*/
static struct ctdb_freeze_handle * ctdb_freeze_lock ( struct ctdb_context * ctdb )
{
struct ctdb_freeze_handle * h ;
int fd [ 2 ] ;
struct fd_event * fde ;
h = talloc_zero ( ctdb , struct ctdb_freeze_handle ) ;
2008-07-04 11:04:26 +04:00
CTDB_NO_MEMORY_NULL ( ctdb , h ) ;
2007-05-12 09:15:27 +04:00
h - > ctdb = ctdb ;
/* use socketpair() instead of pipe() so we have bi-directional fds */
if ( socketpair ( AF_UNIX , SOCK_STREAM , 0 , fd ) ! = 0 ) {
2008-02-04 12:07:15 +03:00
DEBUG ( DEBUG_ERR , ( " Failed to create pipe for ctdb_freeze_lock \n " ) ) ;
2007-05-12 09:15:27 +04:00
talloc_free ( h ) ;
return NULL ;
}
h - > child = fork ( ) ;
if ( h - > child = = - 1 ) {
2008-02-04 12:07:15 +03:00
DEBUG ( DEBUG_ERR , ( " Failed to fork child for ctdb_freeze_lock \n " ) ) ;
2007-05-12 09:15:27 +04:00
talloc_free ( h ) ;
return NULL ;
}
if ( h - > child = = 0 ) {
int ret ;
2008-06-26 05:52:26 +04:00
int count = 0 ;
2007-05-12 09:15:27 +04:00
/* in the child */
close ( fd [ 0 ] ) ;
ret = ctdb_lock_all_databases ( ctdb ) ;
if ( ret ! = 0 ) {
_exit ( 0 ) ;
}
2008-06-26 05:02:08 +04:00
2008-06-26 08:14:37 +04:00
alarm ( 30 ) ;
while ( count + + < 30 ) {
2008-06-26 05:52:26 +04:00
ret = write ( fd [ 1 ] , & ret , sizeof ( ret ) ) ;
2008-06-26 08:00:36 +04:00
if ( ret = = sizeof ( ret ) ) {
break ;
2008-06-26 05:52:26 +04:00
}
2008-06-26 08:00:36 +04:00
DEBUG ( DEBUG_ERR , ( __location__ " Failed to write to socket from freeze child. ret:%d errno:%u \n " , ret , errno ) ) ;
2008-06-26 03:54:27 +04:00
sleep ( 1 ) ;
2008-06-26 05:52:26 +04:00
}
if ( count > = 30 ) {
DEBUG ( DEBUG_ERR , ( __location__ " Failed to write to socket from freeze child. Aborting freeze child \n " ) ) ;
_exit ( 0 ) ;
2008-06-26 03:54:27 +04:00
}
2007-05-12 09:15:27 +04:00
/* the read here means we will die if the parent exits */
read ( fd [ 1 ] , & ret , sizeof ( ret ) ) ;
_exit ( 0 ) ;
}
talloc_set_destructor ( h , ctdb_freeze_handle_destructor ) ;
close ( fd [ 1 ] ) ;
h - > fd = fd [ 0 ] ;
fde = event_add_fd ( ctdb - > ev , h , h - > fd , EVENT_FD_READ | EVENT_FD_AUTOCLOSE ,
ctdb_freeze_lock_handler , h ) ;
if ( fde = = NULL ) {
2008-02-04 12:07:15 +03:00
DEBUG ( DEBUG_ERR , ( " Failed to setup fd event for ctdb_freeze_lock \n " ) ) ;
2007-05-12 09:15:27 +04:00
close ( fd [ 0 ] ) ;
talloc_free ( h ) ;
return NULL ;
}
return h ;
}
/*
destroy a waiter for a freeze mode change
*/
static int ctdb_freeze_waiter_destructor ( struct ctdb_freeze_waiter * w )
{
DLIST_REMOVE ( w - > ctdb - > freeze_handle - > waiters , w ) ;
2007-05-12 15:25:26 +04:00
ctdb_request_control_reply ( w - > ctdb , w - > c , NULL , w - > status , NULL ) ;
2007-05-12 09:15:27 +04:00
return 0 ;
}
/*
2007-06-10 02:46:33 +04:00
start the freeze process
2007-05-12 09:15:27 +04:00
*/
2007-06-10 02:46:33 +04:00
void ctdb_start_freeze ( struct ctdb_context * ctdb )
2007-05-12 09:15:27 +04:00
{
if ( ctdb - > freeze_mode = = CTDB_FREEZE_FROZEN ) {
/* we're already frozen */
2007-06-10 02:46:33 +04:00
return ;
2007-05-12 09:15:27 +04:00
}
/* if there isn't a freeze lock child then create one */
if ( ! ctdb - > freeze_handle ) {
ctdb - > freeze_handle = ctdb_freeze_lock ( ctdb ) ;
2007-06-10 02:46:33 +04:00
CTDB_NO_MEMORY_VOID ( ctdb , ctdb - > freeze_handle ) ;
2007-05-12 09:15:27 +04:00
ctdb - > freeze_mode = CTDB_FREEZE_PENDING ;
}
2007-06-10 02:46:33 +04:00
}
/*
freeze the databases
*/
int32_t ctdb_control_freeze ( struct ctdb_context * ctdb , struct ctdb_req_control * c , bool * async_reply )
{
struct ctdb_freeze_waiter * w ;
2009-10-12 02:22:17 +04:00
uint32_t priority ;
priority = ( uint32_t ) c - > srvid ;
2007-06-10 02:46:33 +04:00
if ( ctdb - > freeze_mode = = CTDB_FREEZE_FROZEN ) {
/* we're already frozen */
return 0 ;
}
ctdb_start_freeze ( ctdb ) ;
2007-05-12 09:15:27 +04:00
/* add ourselves to list of waiters */
w = talloc ( ctdb - > freeze_handle , struct ctdb_freeze_waiter ) ;
CTDB_NO_MEMORY ( ctdb , w ) ;
w - > ctdb = ctdb ;
w - > c = talloc_steal ( w , c ) ;
w - > status = - 1 ;
talloc_set_destructor ( w , ctdb_freeze_waiter_destructor ) ;
DLIST_ADD ( ctdb - > freeze_handle - > waiters , w ) ;
/* we won't reply till later */
* async_reply = True ;
return 0 ;
}
2007-05-23 06:23:07 +04:00
/*
block until we are frozen , used during daemon startup
*/
bool ctdb_blocking_freeze ( struct ctdb_context * ctdb )
{
2007-06-10 02:46:33 +04:00
ctdb_start_freeze ( ctdb ) ;
2007-05-23 06:23:07 +04:00
/* block until frozen */
while ( ctdb - > freeze_mode = = CTDB_FREEZE_PENDING ) {
event_loop_once ( ctdb - > ev ) ;
}
return ctdb - > freeze_mode = = CTDB_FREEZE_FROZEN ;
}
2007-05-12 09:15:27 +04:00
/*
thaw the databases
*/
2009-10-12 02:22:17 +04:00
int32_t ctdb_control_thaw ( struct ctdb_context * ctdb , struct ctdb_req_control * c )
2007-05-12 09:15:27 +04:00
{
2009-10-12 02:22:17 +04:00
uint32_t priority ;
priority = ( uint32_t ) c - > srvid ;
2008-01-06 04:38:01 +03:00
/* cancel any pending transactions */
if ( ctdb - > freeze_handle & & ctdb - > freeze_handle - > transaction_started ) {
struct ctdb_db_context * ctdb_db ;
for ( ctdb_db = ctdb - > db_list ; ctdb_db ; ctdb_db = ctdb_db - > next ) {
tdb_add_flags ( ctdb_db - > ltdb - > tdb , TDB_NOLOCK ) ;
if ( tdb_transaction_cancel ( ctdb_db - > ltdb - > tdb ) ! = 0 ) {
2008-02-04 12:07:15 +03:00
DEBUG ( DEBUG_ERR , ( __location__ " Failed to cancel transaction for db '%s' \n " ,
2008-01-06 04:38:01 +03:00
ctdb_db - > db_name ) ) ;
}
tdb_remove_flags ( ctdb_db - > ltdb - > tdb , TDB_NOLOCK ) ;
}
}
2008-01-05 01:36:21 +03:00
#if 0
/* this hack can be used to get a copy of the databases at the end of a recovery */
system ( " mkdir -p /var/ctdb.saved; /usr/bin/rsync --delete -a /var/ctdb/ /var/ctdb.saved/$$ 2>&1 > /dev/null " ) ;
# endif
2008-01-06 04:38:01 +03:00
#if 0
/* and this one for local testing */
system ( " mkdir -p test.db.saved; /usr/bin/rsync --delete -a test.db/ test.db.saved/$$ 2>&1 > /dev/null " ) ;
# endif
2007-05-12 09:15:27 +04:00
talloc_free ( ctdb - > freeze_handle ) ;
ctdb - > freeze_handle = NULL ;
2007-05-18 18:56:49 +04:00
ctdb_call_resend_all ( ctdb ) ;
2007-05-12 09:15:27 +04:00
return 0 ;
}
2008-01-06 04:38:01 +03:00
/*
start a transaction on all databases - used for recovery
*/
2008-01-06 05:24:55 +03:00
int32_t ctdb_control_transaction_start ( struct ctdb_context * ctdb , uint32_t id )
2008-01-06 04:38:01 +03:00
{
struct ctdb_db_context * ctdb_db ;
if ( ctdb - > freeze_mode ! = CTDB_FREEZE_FROZEN ) {
2008-02-04 12:07:15 +03:00
DEBUG ( DEBUG_ERR , ( __location__ " Failed transaction_start while not frozen \n " ) ) ;
2008-01-06 04:38:01 +03:00
return - 1 ;
}
for ( ctdb_db = ctdb - > db_list ; ctdb_db ; ctdb_db = ctdb_db - > next ) {
int ret ;
tdb_add_flags ( ctdb_db - > ltdb - > tdb , TDB_NOLOCK ) ;
if ( ctdb - > freeze_handle - > transaction_started ) {
if ( tdb_transaction_cancel ( ctdb_db - > ltdb - > tdb ) ! = 0 ) {
2008-02-04 12:07:15 +03:00
DEBUG ( DEBUG_ERR , ( __location__ " Failed to cancel transaction for db '%s' \n " ,
2008-01-06 04:38:01 +03:00
ctdb_db - > db_name ) ) ;
/* not a fatal error */
}
}
ret = tdb_transaction_start ( ctdb_db - > ltdb - > tdb ) ;
tdb_remove_flags ( ctdb_db - > ltdb - > tdb , TDB_NOLOCK ) ;
if ( ret ! = 0 ) {
2008-02-04 12:07:15 +03:00
DEBUG ( DEBUG_ERR , ( __location__ " Failed to start transaction for db '%s' \n " ,
2008-01-06 04:38:01 +03:00
ctdb_db - > db_name ) ) ;
return - 1 ;
}
}
ctdb - > freeze_handle - > transaction_started = true ;
2008-01-06 05:24:55 +03:00
ctdb - > freeze_handle - > transaction_id = id ;
2008-01-06 04:38:01 +03:00
return 0 ;
}
/*
commit transactions on all databases
*/
2008-01-06 05:24:55 +03:00
int32_t ctdb_control_transaction_commit ( struct ctdb_context * ctdb , uint32_t id )
2008-01-06 04:38:01 +03:00
{
struct ctdb_db_context * ctdb_db ;
if ( ctdb - > freeze_mode ! = CTDB_FREEZE_FROZEN ) {
2008-02-04 12:07:15 +03:00
DEBUG ( DEBUG_ERR , ( __location__ " Failed transaction_start while not frozen \n " ) ) ;
2008-01-06 04:38:01 +03:00
return - 1 ;
}
if ( ! ctdb - > freeze_handle - > transaction_started ) {
2008-02-04 12:07:15 +03:00
DEBUG ( DEBUG_ERR , ( __location__ " transaction not started \n " ) ) ;
2008-01-06 04:38:01 +03:00
return - 1 ;
}
2008-01-06 05:24:55 +03:00
if ( id ! = ctdb - > freeze_handle - > transaction_id ) {
2008-02-04 12:07:15 +03:00
DEBUG ( DEBUG_ERR , ( __location__ " incorrect transaction id 0x%x in commit \n " , id ) ) ;
2008-01-06 05:24:55 +03:00
return - 1 ;
}
2008-01-06 04:38:01 +03:00
for ( ctdb_db = ctdb - > db_list ; ctdb_db ; ctdb_db = ctdb_db - > next ) {
tdb_add_flags ( ctdb_db - > ltdb - > tdb , TDB_NOLOCK ) ;
if ( tdb_transaction_commit ( ctdb_db - > ltdb - > tdb ) ! = 0 ) {
2008-07-07 02:51:05 +04:00
DEBUG ( DEBUG_ERR , ( __location__ " Failed to commit transaction for db '%s'. Cancel all transactions and resetting transaction_started to false. \n " ,
2008-01-06 04:38:01 +03:00
ctdb_db - > db_name ) ) ;
2008-07-07 02:51:05 +04:00
/* cancel any pending transactions */
for ( ctdb_db = ctdb - > db_list ; ctdb_db ; ctdb_db = ctdb_db - > next ) {
tdb_add_flags ( ctdb_db - > ltdb - > tdb , TDB_NOLOCK ) ;
if ( tdb_transaction_cancel ( ctdb_db - > ltdb - > tdb ) ! = 0 ) {
DEBUG ( DEBUG_ERR , ( __location__ " Failed to cancel transaction for db '%s' \n " ,
ctdb_db - > db_name ) ) ;
}
tdb_remove_flags ( ctdb_db - > ltdb - > tdb , TDB_NOLOCK ) ;
}
ctdb - > freeze_handle - > transaction_started = false ;
2008-01-06 04:38:01 +03:00
return - 1 ;
}
tdb_remove_flags ( ctdb_db - > ltdb - > tdb , TDB_NOLOCK ) ;
}
ctdb - > freeze_handle - > transaction_started = false ;
2008-01-06 05:24:55 +03:00
ctdb - > freeze_handle - > transaction_id = 0 ;
2008-01-06 04:38:01 +03:00
return 0 ;
}
/*
wipe a database - only possible when in a frozen transaction
*/
int32_t ctdb_control_wipe_database ( struct ctdb_context * ctdb , TDB_DATA indata )
{
2008-01-06 05:24:55 +03:00
struct ctdb_control_wipe_database w = * ( struct ctdb_control_wipe_database * ) indata . dptr ;
2008-01-06 04:38:01 +03:00
struct ctdb_db_context * ctdb_db ;
if ( ctdb - > freeze_mode ! = CTDB_FREEZE_FROZEN ) {
2008-02-04 12:07:15 +03:00
DEBUG ( DEBUG_ERR , ( __location__ " Failed transaction_start while not frozen \n " ) ) ;
2008-01-06 04:38:01 +03:00
return - 1 ;
}
if ( ! ctdb - > freeze_handle - > transaction_started ) {
2008-02-04 12:07:15 +03:00
DEBUG ( DEBUG_ERR , ( __location__ " transaction not started \n " ) ) ;
2008-01-06 04:38:01 +03:00
return - 1 ;
}
2008-01-06 05:24:55 +03:00
if ( w . transaction_id ! = ctdb - > freeze_handle - > transaction_id ) {
2008-02-04 12:07:15 +03:00
DEBUG ( DEBUG_ERR , ( __location__ " incorrect transaction id 0x%x in commit \n " , w . transaction_id ) ) ;
2008-01-06 05:24:55 +03:00
return - 1 ;
}
ctdb_db = find_ctdb_db ( ctdb , w . db_id ) ;
2008-01-06 04:38:01 +03:00
if ( ! ctdb_db ) {
2008-02-04 12:07:15 +03:00
DEBUG ( DEBUG_ERR , ( __location__ " Unknown db 0x%x \n " , w . db_id ) ) ;
2008-01-06 04:38:01 +03:00
return - 1 ;
}
if ( tdb_wipe_all ( ctdb_db - > ltdb - > tdb ) ! = 0 ) {
2008-02-04 12:07:15 +03:00
DEBUG ( DEBUG_ERR , ( __location__ " Failed to wipe database for db '%s' \n " ,
2008-01-06 04:38:01 +03:00
ctdb_db - > db_name ) ) ;
return - 1 ;
}
return 0 ;
}